diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..b930c24c --- /dev/null +++ b/.clang-format @@ -0,0 +1,108 @@ +# To run clang tools: +# cd to root directory +# To update format only: +# find . -name "*.cpp" -or -name "*.cc" -or -name "*.h" -or -name "*.hpp" -or -name "*.I" | xargs -I{} clang-format -i {} +# git status -s . | sed s/^...// | grep -E "(\.cpp|\.h|\.cc|\.hpp|\.I)" | xargs -I{} clang-format -i {} + +# To run modernize +# export CLANG_PATH=/packages/llvm/build/llvm-60 +# export PATH=${CLANG_PATH}/bin:${CLANG_PATH}/share/clang:$PATH +# find src -name "*.cpp" -or -name "*.cc" | xargs -I{} clang-tidy -checks=modernize* -p=/projects/AtomicModel/build/debug -fix {} +# find src -name "*.cpp" -or -name "*.cc" -or -name "*.h" -or -name "*.hpp" -or -name "*.I" | xargs -I{} clang-format -i {} + + + + + +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -4 +AlignAfterOpenBracket: DontAlign +AlignConsecutiveAssignments: true +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: true +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: true +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: true +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: true + AfterControlStatement: false + AfterEnum: false + AfterFunction: true + AfterNamespace: false + AfterObjCDeclaration: true + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +#BreakBeforeBraces: Stroustrup +BreakBeforeBraces: Custom +BreakBeforeTernaryOperators: false +BreakConstructorInitializersBeforeComma: false +ColumnLimit: 100 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: false +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IndentCaseLabels: false +IndentWidth: 4 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 2 +NamespaceIndentation: None +ObjCBlockIndentWidth: 4 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: true +SpaceAfterTemplateKeyword: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: true +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 4 +UseTab: Never +... + diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d479391..8f500927 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,7 +150,7 @@ IF ( NOT ONLY_BUILD_DOCS ) CONFIGURE_NETCDF() CONFIGURE_SILO() CONFIGURE_LBPM() - CONFIGURE_TIMER( 0 "${${PROJ}_INSTALL_DIR}/null_timer" ) + CONFIGURE_TIMER( 0 "${${PROJ}_INSTALL_DIR}/null_timer" FALSE ) CONFIGURE_LINE_COVERAGE() # Set the external library link list SET( EXTERNAL_LIBS ${EXTERNAL_LIBS} ${TIMER_LIBS} ) diff --git a/IO/IOHelpers.h b/IO/IOHelpers.h index 2e9b06e0..4389c619 100644 --- a/IO/IOHelpers.h +++ b/IO/IOHelpers.h @@ -10,9 +10,9 @@ namespace IO { // Find a character in a line inline size_t find( const char *line, char token ) { - size_t i=0; + size_t i = 0; while ( 1 ) { - if ( line[i]==token || line[i]<32 || line[i]==0 ) + if ( line[i] == token || line[i] < 32 || line[i] == 0 ) break; ++i; } @@ -21,17 +21,17 @@ inline size_t find( const char *line, char token ) // Remove preceeding/trailing whitespace -inline std::string deblank( const std::string& str ) +inline std::string deblank( const std::string &str ) { size_t i1 = str.size(); size_t i2 = 0; - for (size_t i=0; i=32 ) { - i1 = std::min(i1,i); - i2 = std::max(i2,i); + for ( size_t i = 0; i < str.size(); i++ ) { + if ( str[i] != ' ' && str[i] >= 32 ) { + i1 = std::min( i1, i ); + i2 = std::max( i2, i ); } } - return str.substr(i1,i2-i1+1); + return str.substr( i1, i2 - i1 + 1 ); } @@ -42,14 +42,14 @@ inline std::vector splitList( const char *line, const char token ) size_t i1 = 0; size_t i2 = 0; while ( 1 ) { - if ( line[i2]==token || line[i2]<32 ) { - std::string tmp(&line[i1],i2-i1); - tmp = deblank(tmp); + if ( line[i2] == token || line[i2] < 32 ) { + std::string tmp( &line[i1], i2 - i1 ); + tmp = deblank( tmp ); if ( !tmp.empty() ) - list.push_back(tmp); - i1 = i2+1; + list.push_back( tmp ); + i1 = i2 + 1; } - if ( line[i2]==0 ) + if ( line[i2] == 0 ) break; i2++; } @@ -57,8 +57,6 @@ inline std::vector splitList( const char *line, const char token ) } - -}; +}; // namespace IO #endif - diff --git a/IO/Mesh.cpp b/IO/Mesh.cpp index eb712296..9966bf52 100644 --- a/IO/Mesh.cpp +++ b/IO/Mesh.cpp @@ -1,4 +1,5 @@ #include "Mesh.h" +#include "IO/IOHelpers.h" #include "common/Utilities.h" #include @@ -19,104 +20,110 @@ inline Point nullPoint() /**************************************************** -* Mesh * -****************************************************/ -Mesh::Mesh( ) -{ -} -Mesh::~Mesh( ) -{ -} + * Mesh * + ****************************************************/ +Mesh::Mesh() {} +Mesh::~Mesh() {} /**************************************************** -* MeshDataStruct * -****************************************************/ -bool MeshDataStruct::check() const + * MeshDataStruct * + ****************************************************/ +#define checkResult( pass, msg ) \ + do { \ + if ( !( pass ) ) { \ + if ( abort ) \ + ERROR( msg ); \ + return false; \ + } \ + } while ( 0 ) +bool MeshDataStruct::check( bool abort ) const { - enum VariableType { NodeVariable=1, EdgeVariable=2, SurfaceVariable=2, VolumeVariable=3, NullVariable=0 }; - bool pass = mesh != nullptr; - for ( const auto& var : vars ) { - pass = pass && static_cast(var->type)>=1 && static_cast(var->type)<=3; - pass = pass && !var->data.empty(); + for ( const auto &var : vars ) { + checkResult( var->type == VariableType::NodeVariable || + var->type == VariableType::EdgeVariable || + var->type == VariableType::SurfaceVariable || + var->type == VariableType::VolumeVariable, + "Invalid data type" ); + checkResult( !var->data.empty(), "Variable data is empty" ); } - if ( !pass ) - return false; - const std::string& meshClass = mesh->className(); + const std::string &meshClass = mesh->className(); if ( meshClass == "PointList" ) { - const auto mesh2 = dynamic_cast( mesh.get() ); - if ( mesh2 == nullptr ) - return false; - for ( const auto& var : vars ) { + auto mesh2 = dynamic_cast( mesh.get() ); + ASSERT( mesh2 ); + for ( const auto &var : vars ) { if ( var->type == IO::VariableType::NodeVariable ) { - pass = pass && var->data.size(0)==mesh2->points.size() && var->data.size(1)==var->dim; + size_t N_points = mesh2->points.size(); + checkResult( var->data.size( 0 ) == N_points, "sizeof NodeVariable" ); + checkResult( var->data.size( 1 ) == var->dim, "sizeof NodeVariable" ); } else if ( var->type == IO::VariableType::EdgeVariable ) { - ERROR("Invalid type for PointList"); + ERROR( "Invalid type for PointList" ); } else if ( var->type == IO::VariableType::SurfaceVariable ) { - ERROR("Invalid type for PointList"); + ERROR( "Invalid type for PointList" ); } else if ( var->type == IO::VariableType::VolumeVariable ) { - ERROR("Invalid type for PointList"); + ERROR( "Invalid type for PointList" ); } else { - ERROR("Invalid variable type"); + ERROR( "Invalid variable type" ); } } } else if ( meshClass == "TriMesh" || meshClass == "TriList" ) { - const auto mesh2 = getTriMesh( mesh ); - if ( mesh2 == nullptr ) - return false; - for ( const auto& var : vars ) { + auto mesh2 = getTriMesh( mesh ); + ASSERT( mesh2 ); + for ( const auto &var : vars ) { if ( var->type == IO::VariableType::NodeVariable ) { - pass = pass && var->data.size(0)==mesh2->vertices->points.size() && var->data.size(1)==var->dim; + size_t N_points = mesh2->vertices->points.size(); + checkResult( var->data.size( 0 ) == N_points, "sizeof NodeVariable" ); + checkResult( var->data.size( 1 ) == var->dim, "sizeof NodeVariable" ); } else if ( var->type == IO::VariableType::EdgeVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var->type == IO::VariableType::SurfaceVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var->type == IO::VariableType::VolumeVariable ) { - pass = pass && var->data.size(0)==mesh2->A.size() && var->data.size(1)==var->dim; + checkResult( var->data.size( 0 ) == mesh2->A.size(), "sizeof VolumeVariable" ); + checkResult( var->data.size( 1 ) == var->dim, "sizeof VolumeVariable" ); } else { - ERROR("Invalid variable type"); + ERROR( "Invalid variable type" ); } } } else if ( meshClass == "DomainMesh" ) { - const auto mesh2 = dynamic_cast( mesh.get() ); - if ( mesh2 == nullptr ) - return false; - for ( const auto& var : vars ) { + auto mesh2 = dynamic_cast( mesh.get() ); + ASSERT( mesh2 ); + for ( const auto &var : vars ) { + ArraySize varSize; if ( var->type == IO::VariableType::NodeVariable ) { - pass = pass && (int) var->data.size(0)==(mesh2->nx+1) && (int) var->data.size(1)==(mesh2->ny+1) - && (int) var->data.size(2)==(mesh2->nz+1) && var->data.size(3)==var->dim; + varSize = ArraySize( mesh2->nx + 1, mesh2->ny + 1, mesh2->nz + 1, var->dim ); } else if ( var->type == IO::VariableType::EdgeVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var->type == IO::VariableType::SurfaceVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var->type == IO::VariableType::VolumeVariable ) { - pass = pass && (int) var->data.size(0)==mesh2->nx && (int) var->data.size(1)==mesh2->ny - && (int) var->data.size(2)==mesh2->nz && var->data.size(3)==var->dim; + varSize = ArraySize( mesh2->nx, mesh2->ny, mesh2->nz, var->dim ); } else { - ERROR("Invalid variable type"); + ERROR( "Invalid variable type" ); } + if ( var->data.size( 0 ) == varSize[0] * varSize[1] * varSize[2] && + var->data.size( 1 ) == varSize[3] ) + var->data.resize( varSize ); + for ( int d = 0; d < 4; d++ ) + checkResult( var->data.size( d ) == varSize[d], "DomainMesh Variable" ); } } else { - ERROR("Unknown mesh class: "+mesh->className()); + ERROR( "Unknown mesh class: " + mesh->className() ); } - return pass; + return true; } /**************************************************** -* PointList * -****************************************************/ -PointList::PointList( ) -{ -} + * PointList * + ****************************************************/ +PointList::PointList() {} PointList::PointList( size_t N ) { Point tmp = nullPoint(); - points.resize(N,tmp); -} -PointList::~PointList( ) -{ + points.resize( N, tmp ); } +PointList::~PointList() {} size_t PointList::numberPointsVar( VariableType type ) const { size_t N = 0; @@ -124,174 +131,168 @@ size_t PointList::numberPointsVar( VariableType type ) const N = points.size(); return N; } -std::pair PointList::pack( int level ) const +std::pair PointList::pack( int level ) const { - std::pair data_out(0,NULL); - if ( level==0 ) { - data_out.first = (2+3*points.size())*sizeof(double); - double *data_ptr = new double[2+3*points.size()]; - data_out.second = data_ptr; - uint64_t *data_int = reinterpret_cast(data_ptr); - data_int[0] = level; - data_int[1] = points.size(); - double *data = &data_ptr[2]; - for (size_t i=0; i data_out( 0, NULL ); + if ( level == 0 ) { + data_out.first = ( 2 + 3 * points.size() ) * sizeof( double ); + double *data_ptr = new double[2 + 3 * points.size()]; + data_out.second = data_ptr; + uint64_t *data_int = reinterpret_cast( data_ptr ); + data_int[0] = level; + data_int[1] = points.size(); + double *data = &data_ptr[2]; + for ( size_t i = 0; i < points.size(); i++ ) { + data[3 * i + 0] = points[i].x; + data[3 * i + 1] = points[i].y; + data[3 * i + 2] = points[i].z; } } return data_out; } -void PointList::unpack( const std::pair& data_in ) +void PointList::unpack( const std::pair &data_in ) { - uint64_t *data_int = reinterpret_cast(data_in.second); - const double *data = reinterpret_cast(data_in.second); - int level = data_int[0]; - uint64_t N = data_int[1]; - data = &data[2]; - if ( level==0 ) { - ASSERT((2+3*N)*sizeof(double)==data_in.first); - points.resize(N); - for (size_t i=0; i( data_in.second ); + const double *data = reinterpret_cast( data_in.second ); + int level = data_int[0]; + uint64_t N = data_int[1]; + data = &data[2]; + if ( level == 0 ) { + ASSERT( ( 2 + 3 * N ) * sizeof( double ) == data_in.first ); + points.resize( N ); + for ( size_t i = 0; i < points.size(); i++ ) { + points[i].x = data[3 * i + 0]; + points[i].y = data[3 * i + 1]; + points[i].z = data[3 * i + 2]; } } } /**************************************************** -* TriList * -****************************************************/ -TriList::TriList( ) -{ -} + * TriList * + ****************************************************/ +TriList::TriList() {} TriList::TriList( size_t N_tri ) { Point tmp = nullPoint(); - A.resize(N_tri,tmp); - B.resize(N_tri,tmp); - C.resize(N_tri,tmp); + A.resize( N_tri, tmp ); + B.resize( N_tri, tmp ); + C.resize( N_tri, tmp ); } -TriList::TriList( const TriMesh& mesh ) +TriList::TriList( const TriMesh &mesh ) { Point tmp = nullPoint(); - A.resize(mesh.A.size(),tmp); - B.resize(mesh.B.size(),tmp); - C.resize(mesh.C.size(),tmp); - ASSERT(mesh.vertices.get()!=NULL); - const std::vector& P = mesh.vertices->points; - for (size_t i=0; i &P = mesh.vertices->points; + for ( size_t i = 0; i < A.size(); i++ ) A[i] = P[mesh.A[i]]; - for (size_t i=0; i TriList::pack( int level ) const +std::pair TriList::pack( int level ) const { - std::pair data_out(0,NULL); - if ( level==0 ) { - data_out.first = (2+9*A.size())*sizeof(double); - double *data_ptr = new double[2+9*A.size()]; - data_out.second = data_ptr; - uint64_t *data_int = reinterpret_cast(data_ptr); - data_int[0] = level; - data_int[1] = A.size(); - double *data = &data_ptr[2]; - for (size_t i=0; i data_out( 0, NULL ); + if ( level == 0 ) { + data_out.first = ( 2 + 9 * A.size() ) * sizeof( double ); + double *data_ptr = new double[2 + 9 * A.size()]; + data_out.second = data_ptr; + uint64_t *data_int = reinterpret_cast( data_ptr ); + data_int[0] = level; + data_int[1] = A.size(); + double *data = &data_ptr[2]; + for ( size_t i = 0; i < A.size(); i++ ) { + data[9 * i + 0] = A[i].x; + data[9 * i + 1] = A[i].y; + data[9 * i + 2] = A[i].z; + data[9 * i + 3] = B[i].x; + data[9 * i + 4] = B[i].y; + data[9 * i + 5] = B[i].z; + data[9 * i + 6] = C[i].x; + data[9 * i + 7] = C[i].y; + data[9 * i + 8] = C[i].z; } } return data_out; } -void TriList::unpack( const std::pair& data_in ) +void TriList::unpack( const std::pair &data_in ) { - uint64_t *data_int = reinterpret_cast(data_in.second); - const double *data = reinterpret_cast(data_in.second); - int level = data_int[0]; - uint64_t N = data_int[1]; - data = &data[2]; - if ( level==0 ) { - ASSERT((2+9*N)*sizeof(double)==data_in.first); - A.resize(N); - B.resize(N); - C.resize(N); - for (size_t i=0; i( data_in.second ); + const double *data = reinterpret_cast( data_in.second ); + int level = data_int[0]; + uint64_t N = data_int[1]; + data = &data[2]; + if ( level == 0 ) { + ASSERT( ( 2 + 9 * N ) * sizeof( double ) == data_in.first ); + A.resize( N ); + B.resize( N ); + C.resize( N ); + for ( size_t i = 0; i < A.size(); i++ ) { + A[i].x = data[9 * i + 0]; + A[i].y = data[9 * i + 1]; + A[i].z = data[9 * i + 2]; + B[i].x = data[9 * i + 3]; + B[i].y = data[9 * i + 4]; + B[i].z = data[9 * i + 5]; + C[i].x = data[9 * i + 6]; + C[i].y = data[9 * i + 7]; + C[i].z = data[9 * i + 8]; } } } /**************************************************** -* TriMesh * -****************************************************/ -TriMesh::TriMesh( ) -{ -} + * TriMesh * + ****************************************************/ +TriMesh::TriMesh() {} TriMesh::TriMesh( size_t N_tri, size_t N_point ) { - vertices.reset( new PointList(N_point) ); - A.resize(N_tri,-1); - B.resize(N_tri,-1); - C.resize(N_tri,-1); + vertices.reset( new PointList( N_point ) ); + A.resize( N_tri, -1 ); + B.resize( N_tri, -1 ); + C.resize( N_tri, -1 ); } TriMesh::TriMesh( size_t N_tri, std::shared_ptr points ) { vertices = points; - A.resize(N_tri,-1); - B.resize(N_tri,-1); - C.resize(N_tri,-1); + A.resize( N_tri, -1 ); + B.resize( N_tri, -1 ); + C.resize( N_tri, -1 ); } -TriMesh::TriMesh( const TriList& mesh ) +TriMesh::TriMesh( const TriList &mesh ) { // For simlicity we will just create a mesh with ~3x the verticies for now - ASSERT(mesh.A.size()==mesh.B.size()&&mesh.A.size()==mesh.C.size()); - A.resize(mesh.A.size()); - B.resize(mesh.B.size()); - C.resize(mesh.C.size()); - vertices.reset( new PointList(3*mesh.A.size()) ); - for (size_t i=0; ipoints[A[i]] = mesh.A[i]; vertices->points[B[i]] = mesh.B[i]; vertices->points[C[i]] = mesh.C[i]; } } -TriMesh::~TriMesh( ) +TriMesh::~TriMesh() { vertices.reset(); A.clear(); @@ -301,181 +302,319 @@ TriMesh::~TriMesh( ) size_t TriMesh::numberPointsVar( VariableType type ) const { size_t N = 0; - if ( type==VariableType::NodeVariable ) + if ( type == VariableType::NodeVariable ) N = vertices->points.size(); - else if ( type==VariableType::SurfaceVariable || type==VariableType::VolumeVariable ) + else if ( type == VariableType::SurfaceVariable || type == VariableType::VolumeVariable ) N = A.size(); return N; } -std::pair TriMesh::pack( int level ) const +std::pair TriMesh::pack( int level ) const { - std::pair data_out(0,NULL); - if ( level==0 ) { - const std::vector& points = vertices->points; - data_out.first = (3+3*points.size())*sizeof(double) + 3*A.size()*sizeof(int); - double *data_ptr = new double[4+3*points.size()+(3*A.size()*sizeof(int))/sizeof(double)]; - data_out.second = data_ptr; - uint64_t *data_int64 = reinterpret_cast(data_ptr); - data_int64[0] = level; - data_int64[1] = points.size(); - data_int64[2] = A.size(); - double *data = &data_ptr[3]; - for (size_t i=0; i data_out( 0, NULL ); + if ( level == 0 ) { + const std::vector &points = vertices->points; + data_out.first = + ( 3 + 3 * points.size() ) * sizeof( double ) + 3 * A.size() * sizeof( int ); + double *data_ptr = + new double[4 + 3 * points.size() + ( 3 * A.size() * sizeof( int ) ) / sizeof( double )]; + data_out.second = data_ptr; + uint64_t *data_int64 = reinterpret_cast( data_ptr ); + data_int64[0] = level; + data_int64[1] = points.size(); + data_int64[2] = A.size(); + double *data = &data_ptr[3]; + for ( size_t i = 0; i < points.size(); i++ ) { + data[3 * i + 0] = points[i].x; + data[3 * i + 1] = points[i].y; + data[3 * i + 2] = points[i].z; } - int *data_int = reinterpret_cast(&data[3*points.size()]); - for (size_t i=0; i( &data[3 * points.size()] ); + for ( size_t i = 0; i < A.size(); i++ ) { + data_int[3 * i + 0] = A[i]; + data_int[3 * i + 1] = B[i]; + data_int[3 * i + 2] = C[i]; } } return data_out; } -void TriMesh::unpack( const std::pair& data_in ) +void TriMesh::unpack( const std::pair &data_in ) { - uint64_t *data_int64 = reinterpret_cast(data_in.second); - const double *data = reinterpret_cast(data_in.second); - int level = data_int64[0]; - uint64_t N_P = data_int64[1]; - uint64_t N_A = data_int64[2]; - data = &data[3]; - if ( level==0 ) { - size_t size = (3+3*N_P)*sizeof(double)+3*N_A*sizeof(int); - ASSERT(size==data_in.first); - vertices.reset( new PointList(N_P) ); - std::vector& points = vertices->points; - for (size_t i=0; i( data_in.second ); + const double *data = reinterpret_cast( data_in.second ); + int level = data_int64[0]; + uint64_t N_P = data_int64[1]; + uint64_t N_A = data_int64[2]; + data = &data[3]; + if ( level == 0 ) { + size_t size = ( 3 + 3 * N_P ) * sizeof( double ) + 3 * N_A * sizeof( int ); + ASSERT( size == data_in.first ); + vertices.reset( new PointList( N_P ) ); + std::vector &points = vertices->points; + for ( size_t i = 0; i < points.size(); i++ ) { + points[i].x = data[3 * i + 0]; + points[i].y = data[3 * i + 1]; + points[i].z = data[3 * i + 2]; } - const int *data_int = reinterpret_cast(&data[3*N_P]); - A.resize(N_A); - B.resize(N_A); - C.resize(N_A); - for (size_t i=0; i( &data[3 * N_P] ); + A.resize( N_A ); + B.resize( N_A ); + C.resize( N_A ); + for ( size_t i = 0; i < A.size(); i++ ) { + A[i] = data_int[3 * i + 0]; + B[i] = data_int[3 * i + 1]; + C[i] = data_int[3 * i + 2]; } } } /**************************************************** -* Domain mesh * -****************************************************/ -DomainMesh::DomainMesh(): - nprocx(0), nprocy(0), nprocz(0), rank(0), - nx(0), ny(0), nz(0), - Lx(0), Ly(0), Lz(0) + * Domain mesh * + ****************************************************/ +DomainMesh::DomainMesh() + : nprocx( 0 ), + nprocy( 0 ), + nprocz( 0 ), + rank( 0 ), + nx( 0 ), + ny( 0 ), + nz( 0 ), + Lx( 0 ), + Ly( 0 ), + Lz( 0 ) { } -DomainMesh::DomainMesh( RankInfoStruct data, - int nx2, int ny2, int nz2, double Lx2, double Ly2, double Lz2 ): - nprocx(data.nx), nprocy(data.ny), nprocz(data.nz), rank(data.rank[1][1][1]), - nx(nx2), ny(ny2), nz(nz2), - Lx(Lx2), Ly(Ly2), Lz(Lz2) -{ -} -DomainMesh::~DomainMesh() +DomainMesh::DomainMesh( + RankInfoStruct data, int nx2, int ny2, int nz2, double Lx2, double Ly2, double Lz2 ) + : nprocx( data.nx ), + nprocy( data.ny ), + nprocz( data.nz ), + rank( data.rank[1][1][1] ), + nx( nx2 ), + ny( ny2 ), + nz( nz2 ), + Lx( Lx2 ), + Ly( Ly2 ), + Lz( Lz2 ) { } +DomainMesh::~DomainMesh() {} size_t DomainMesh::numberPointsVar( VariableType type ) const { size_t N = 0; - if ( type==VariableType::NodeVariable ) - N = (nx+1)*(ny+1)*(nz+1); - else if ( type==VariableType::SurfaceVariable ) - N = (nx+1)*ny*nz + nx*(ny+1)*nz + nx*ny*(nz+1); - else if ( type==VariableType::VolumeVariable ) - N = nx*ny*nz; + if ( type == VariableType::NodeVariable ) + N = ( nx + 1 ) * ( ny + 1 ) * ( nz + 1 ); + else if ( type == VariableType::SurfaceVariable ) + N = ( nx + 1 ) * ny * nz + nx * ( ny + 1 ) * nz + nx * ny * ( nz + 1 ); + else if ( type == VariableType::VolumeVariable ) + N = nx * ny * nz; return N; } -std::pair DomainMesh::pack( int level ) const +std::pair DomainMesh::pack( int level ) const { - std::pair data(0,NULL); - data.first = 7*sizeof(double); + std::pair data( 0, NULL ); + data.first = 7 * sizeof( double ); data.second = new double[7]; - memset(data.second,0,7*sizeof(double)); - int *data_int = reinterpret_cast(data.second); - double *data_double = &reinterpret_cast(data.second)[4]; - data_int[0] = nprocx; - data_int[1] = nprocy; - data_int[2] = nprocz; - data_int[3] = rank; - data_int[4] = nx; - data_int[5] = ny; - data_int[6] = nz; - data_double[0] = Lx; - data_double[1] = Ly; - data_double[2] = Lz; + memset( data.second, 0, 7 * sizeof( double ) ); + int *data_int = reinterpret_cast( data.second ); + double *data_double = &reinterpret_cast( data.second )[4]; + data_int[0] = nprocx; + data_int[1] = nprocy; + data_int[2] = nprocz; + data_int[3] = rank; + data_int[4] = nx; + data_int[5] = ny; + data_int[6] = nz; + data_double[0] = Lx; + data_double[1] = Ly; + data_double[2] = Lz; return data; } -void DomainMesh::unpack( const std::pair& data ) +void DomainMesh::unpack( const std::pair &data ) { - const int *data_int = reinterpret_cast(data.second); - const double *data_double = &reinterpret_cast(data.second)[4]; - nprocx = data_int[0]; - nprocy = data_int[1]; - nprocz = data_int[2]; - rank = data_int[3]; - nx = data_int[4]; - ny = data_int[5]; - nz = data_int[6]; - Lx = data_double[0]; - Ly = data_double[1]; - Lz = data_double[2]; + const int *data_int = reinterpret_cast( data.second ); + const double *data_double = &reinterpret_cast( data.second )[4]; + nprocx = data_int[0]; + nprocy = data_int[1]; + nprocz = data_int[2]; + rank = data_int[3]; + nx = data_int[4]; + ny = data_int[5]; + nz = data_int[6]; + Lx = data_double[0]; + Ly = data_double[1]; + Lz = data_double[2]; } /**************************************************** -* Converters * -****************************************************/ + * Converters * + ****************************************************/ std::shared_ptr getPointList( std::shared_ptr mesh ) { - return std::dynamic_pointer_cast(mesh); + return std::dynamic_pointer_cast( mesh ); } std::shared_ptr getTriMesh( std::shared_ptr mesh ) { std::shared_ptr mesh2; - if ( std::dynamic_pointer_cast(mesh).get() != NULL ) { - mesh2 = std::dynamic_pointer_cast(mesh); - } else if ( std::dynamic_pointer_cast(mesh).get() != NULL ) { - std::shared_ptr trilist = std::dynamic_pointer_cast(mesh); - ASSERT(trilist.get()!=NULL); - mesh2.reset( new TriMesh(*trilist) ); + if ( std::dynamic_pointer_cast( mesh ).get() != NULL ) { + mesh2 = std::dynamic_pointer_cast( mesh ); + } else if ( std::dynamic_pointer_cast( mesh ).get() != NULL ) { + std::shared_ptr trilist = std::dynamic_pointer_cast( mesh ); + ASSERT( trilist.get() != NULL ); + mesh2.reset( new TriMesh( *trilist ) ); } return mesh2; } std::shared_ptr getTriList( std::shared_ptr mesh ) { std::shared_ptr mesh2; - if ( std::dynamic_pointer_cast(mesh).get() != NULL ) { - mesh2 = std::dynamic_pointer_cast(mesh); - } else if ( std::dynamic_pointer_cast(mesh).get() != NULL ) { - std::shared_ptr trimesh = std::dynamic_pointer_cast(mesh); - ASSERT(trimesh.get()!=NULL); - mesh2.reset( new TriList(*trimesh) ); + if ( std::dynamic_pointer_cast( mesh ).get() != NULL ) { + mesh2 = std::dynamic_pointer_cast( mesh ); + } else if ( std::dynamic_pointer_cast( mesh ).get() != NULL ) { + std::shared_ptr trimesh = std::dynamic_pointer_cast( mesh ); + ASSERT( trimesh.get() != NULL ); + mesh2.reset( new TriList( *trimesh ) ); } return mesh2; } std::shared_ptr getPointList( std::shared_ptr mesh ) { - return getPointList( std::const_pointer_cast(mesh) ); + return getPointList( std::const_pointer_cast( mesh ) ); } std::shared_ptr getTriMesh( std::shared_ptr mesh ) { - return getTriMesh( std::const_pointer_cast(mesh) ); + return getTriMesh( std::const_pointer_cast( mesh ) ); } std::shared_ptr getTriList( std::shared_ptr mesh ) { - return getTriList( std::const_pointer_cast(mesh) ); + return getTriList( std::const_pointer_cast( mesh ) ); } -} // IO namespace +/**************************************************** + * Convert enum values * + ****************************************************/ +std::string getString( VariableType type ) +{ + if ( type == VariableType::NodeVariable ) + return "node"; + else if ( type == VariableType::EdgeVariable ) + return "edge"; + else if ( type == VariableType::SurfaceVariable ) + return "face"; + else if ( type == VariableType::VolumeVariable ) + return "cell"; + else if ( type == VariableType::NullVariable ) + return "null"; + else + ERROR( "Invalid type" ); + return ""; +} +VariableType getVariableType( const std::string &type_in ) +{ + auto type = deblank( type_in ); + if ( type == "node" ) + return VariableType::NodeVariable; + else if ( type == "edge" || type == "1" ) + return VariableType::EdgeVariable; + else if ( type == "face" ) + return VariableType::SurfaceVariable; + else if ( type == "cell" || type == "3" ) + return VariableType::VolumeVariable; + else if ( type == "null" ) + return VariableType::NullVariable; + else + ERROR( "Invalid type: " + type ); + return VariableType::NullVariable; +} +std::string getString( DataType type ) +{ + if ( type == DataType::Double ) + return "double"; + else if ( type == DataType::Float ) + return "float"; + else if ( type == DataType::Int ) + return "int"; + else if ( type == DataType::Null ) + return "null"; + else + ERROR( "Invalid type" ); + return ""; +} +DataType getDataType( const std::string &type_in ) +{ + auto type = deblank( type_in ); + if ( type == "double" ) + return DataType::Double; + else if ( type == "float" ) + return DataType::Float; + else if ( type == "int" ) + return DataType::Int; + else if ( type == "null" ) + return DataType::Null; + else + ERROR( "Invalid type: " + type ); + return DataType::Null; +} +std::string getString( MeshType type ) +{ + if ( type == MeshType::PointMesh ) + return "PointMesh"; + else if ( type == MeshType::SurfaceMesh ) + return "SurfaceMesh"; + else if ( type == MeshType::VolumeMesh ) + return "VolumeMesh"; + else if ( type == MeshType::Unknown ) + return "unknown"; + else + ERROR( "Invalid type" ); + return ""; +} +MeshType getMeshType( const std::string &type_in ) +{ + auto type = deblank( type_in ); + if ( type == "PointMesh" || type == "1" ) + return MeshType::PointMesh; + else if ( type == "SurfaceMesh" || type == "2" ) + return MeshType::SurfaceMesh; + else if ( type == "VolumeMesh" || type == "3" ) + return MeshType::VolumeMesh; + else if ( type == "unknown" || type == "-1" ) + return MeshType::Unknown; + else + ERROR( "Invalid type: " + type ); + return MeshType::Unknown; +} +std::string getString( FileFormat type ) +{ + if ( type == FileFormat::OLD ) + return "old"; + else if ( type == FileFormat::NEW ) + return "new"; + else if ( type == FileFormat::NEW_SINGLE ) + return "new(single)"; + else if ( type == FileFormat::SILO ) + return "silo"; + else + ERROR( "Invalid type" ); + return ""; +} +FileFormat getFileFormat( const std::string &type_in ) +{ + auto type = deblank( type_in ); + if ( type == "old" || type == "1" ) + return FileFormat::OLD; + else if ( type == "new" || type == "2" ) + return FileFormat::NEW; + else if ( type == "new(single)" || type == "3" ) + return FileFormat::NEW_SINGLE; + else if ( type == "silo" || type == "4" ) + return FileFormat::SILO; + else + ERROR( "Invalid type: " + type ); + return FileFormat::SILO; +} + +} // namespace IO diff --git a/IO/Mesh.h b/IO/Mesh.h index b204675a..a420f95d 100644 --- a/IO/Mesh.h +++ b/IO/Mesh.h @@ -6,17 +6,36 @@ #include #include +#include "analysis/PointList.h" #include "common/Array.h" #include "common/Communication.h" -#include "analysis/PointList.h" namespace IO { -//! Possible variable types -enum class VariableType: unsigned char { NodeVariable=1, EdgeVariable=2, SurfaceVariable=2, VolumeVariable=3, NullVariable=0 }; -enum class DataType: unsigned char { Double=1, Float=2, Int=2, Null=0 }; +//! Enums to define types +enum class VariableType { + NodeVariable, + EdgeVariable, + SurfaceVariable, + VolumeVariable, + NullVariable +}; +enum class DataType { Double, Float, Int, Null }; +enum class MeshType { PointMesh, SurfaceMesh, VolumeMesh, Unknown }; +enum class FileFormat { OLD, NEW, NEW_SINGLE, SILO }; + + +//! Convert enums to/from strings (more future-proof than static_cast) +std::string getString( VariableType ); +std::string getString( DataType ); +std::string getString( MeshType ); +std::string getString( FileFormat ); +VariableType getVariableType( const std::string & ); +DataType getDataType( const std::string & ); +MeshType getMeshType( const std::string & ); +FileFormat getFileFormat( const std::string & ); /*! \class Mesh @@ -32,21 +51,22 @@ public: //! Number of points for the given variable type virtual size_t numberPointsVar( VariableType type ) const = 0; //! Pack the data - virtual std::pair pack( int level ) const = 0; + virtual std::pair pack( int level ) const = 0; //! Unpack the data - virtual void unpack( const std::pair& data ) = 0; + virtual void unpack( const std::pair &data ) = 0; + protected: //! Empty constructor Mesh(); - Mesh(const Mesh&); - Mesh& operator=(const Mesh&); + Mesh( const Mesh & ); + Mesh &operator=( const Mesh & ); }; /*! \class PointList \brief A class used to hold a list of verticies */ -class PointList: public Mesh +class PointList : public Mesh { public: //! Empty constructor @@ -60,13 +80,14 @@ public: //! Number of points for the given variable type virtual size_t numberPointsVar( VariableType type ) const; //! Pack the data - virtual std::pair pack( int level ) const; + virtual std::pair pack( int level ) const; //! Unpack the data - virtual void unpack( const std::pair& data ); + virtual void unpack( const std::pair &data ); //! Access the points - const std::vector& getPoints() const { return points; } + const std::vector &getPoints() const { return points; } + public: - std::vector points; //!< List of points vertex + std::vector points; //!< List of points vertex }; @@ -74,7 +95,7 @@ public: \brief A class used to hold a list of triangles specified by their vertex coordinates */ class TriMesh; -class TriList: public Mesh +class TriList : public Mesh { public: //! Empty constructor @@ -82,7 +103,7 @@ public: //! Constructor for N triangles TriList( size_t N_tri ); //! Constructor from TriMesh - TriList( const TriMesh& ); + TriList( const TriMesh & ); //! Destructor virtual ~TriList(); //! Mesh class name @@ -90,20 +111,22 @@ public: //! Number of points for the given variable type virtual size_t numberPointsVar( VariableType type ) const; //! Pack the data - virtual std::pair pack( int level ) const; + virtual std::pair pack( int level ) const; //! Unpack the data - virtual void unpack( const std::pair& data ); + virtual void unpack( const std::pair &data ); + public: - std::vector A; //!< First vertex - std::vector B; //!< Second vertex - std::vector C; //!< Third vertex + std::vector A; //!< First vertex + std::vector B; //!< Second vertex + std::vector C; //!< Third vertex }; /*! \class TriMesh - \brief A class used to hold a list of trianges specified by their vertex number and list of coordiantes + \brief A class used to hold a list of trianges specified by their vertex number and list of + coordiantes */ -class TriMesh: public Mesh +class TriMesh : public Mesh { public: //! TriMesh constructor @@ -113,7 +136,7 @@ public: //! Constructor for Nt triangles and the given points TriMesh( size_t N_tri, std::shared_ptr points ); //! Constructor from TriList - TriMesh( const TriList& ); + TriMesh( const TriList & ); //! Destructor virtual ~TriMesh(); //! Mesh class name @@ -121,21 +144,22 @@ public: //! Number of points for the given variable type virtual size_t numberPointsVar( VariableType type ) const; //! Pack the data - virtual std::pair pack( int level ) const; + virtual std::pair pack( int level ) const; //! Unpack the data - virtual void unpack( const std::pair& data ); + virtual void unpack( const std::pair &data ); + public: - std::shared_ptr vertices; //!< List of verticies - std::vector A; //!< First vertex - std::vector B; //!< Second vertex - std::vector C; //!< Third vertex + std::shared_ptr vertices; //!< List of verticies + std::vector A; //!< First vertex + std::vector B; //!< Second vertex + std::vector C; //!< Third vertex }; /*! \class Domain \brief A class used to hold the domain */ -class DomainMesh: public Mesh +class DomainMesh : public Mesh { public: //! Empty constructor @@ -149,9 +173,10 @@ public: //! Number of points for the given variable type virtual size_t numberPointsVar( VariableType type ) const; //! Pack the data - virtual std::pair pack( int level ) const; + virtual std::pair pack( int level ) const; //! Unpack the data - virtual void unpack( const std::pair& data ); + virtual void unpack( const std::pair &data ); + public: int nprocx, nprocy, nprocz, rank; int nx, ny, nz; @@ -159,37 +184,40 @@ public: }; - /*! \class Variable \brief A base class for variables */ -struct Variable -{ +struct Variable { public: // Internal variables - unsigned char dim; //!< Number of points per grid point (1: scalar, 3: vector, ...) - VariableType type; //!< Variable type - DataType precision; //!< Variable precision to use for IO - std::string name; //!< Variable name - Array data; //!< Variable data + unsigned char dim; //!< Number of points per grid point (1: scalar, 3: vector, ...) + VariableType type; //!< Variable type + DataType precision; //!< Variable precision to use for IO + std::string name; //!< Variable name + Array data; //!< Variable data //! Empty constructor - Variable(): dim(0), type(VariableType::NullVariable), precision(DataType::Double) {} + Variable() : dim( 0 ), type( VariableType::NullVariable ), precision( DataType::Double ) {} //! Constructor - Variable( int dim_, IO::VariableType type_, const std::string& name_ ): - dim(dim_), type(type_), precision(DataType::Double), name(name_) {} + Variable( int dim_, IO::VariableType type_, const std::string &name_ ) + : dim( dim_ ), type( type_ ), precision( DataType::Double ), name( name_ ) + { + } //! Constructor - Variable( int dim_, IO::VariableType type_, const std::string& name_, const Array& data_ ): - dim(dim_), type(type_), precision(DataType::Double), name(name_), data(data_) {} + Variable( + int dim_, IO::VariableType type_, const std::string &name_, const Array &data_ ) + : dim( dim_ ), type( type_ ), precision( DataType::Double ), name( name_ ), data( data_ ) + { + } //! Destructor virtual ~Variable() {} + protected: //! Empty constructor - Variable(const Variable&); - Variable& operator=(const Variable&); + Variable( const Variable & ); + Variable &operator=( const Variable & ); }; - /*! \class MeshDataStruct \brief A class used to hold database info for saving a mesh */ @@ -197,11 +225,11 @@ struct MeshDataStruct { DataType precision; //!< Precision to use for IO (mesh) std::string meshName; //!< Mesh name std::shared_ptr mesh; //!< Mesh data - std::vector > vars; + std::vector> vars; //! Empty constructor - MeshDataStruct(): precision(DataType::Double) {} + MeshDataStruct() : precision( DataType::Double ) {} //! Check the data - bool check() const; + bool check( bool abort = true ) const; }; @@ -214,7 +242,6 @@ std::shared_ptr getTriMesh( std::shared_ptr mesh ); std::shared_ptr getTriList( std::shared_ptr mesh ); -} // IO namespace +} // namespace IO #endif - diff --git a/IO/MeshDatabase.cpp b/IO/MeshDatabase.cpp index 2c03ddde..63702c7b 100644 --- a/IO/MeshDatabase.cpp +++ b/IO/MeshDatabase.cpp @@ -1,130 +1,144 @@ #include "IO/MeshDatabase.h" +#include "IO/IOHelpers.h" #include "IO/Mesh.h" #include "IO/PackData.h" -#include "IO/IOHelpers.h" #include "common/MPI.h" #include "common/Utilities.h" -#include +#include #include #include -#include +#include #include +// Default pack/unpack +// clang-format off +#define INSTANTIATE_PACK( TYPE ) \ + template<> \ + size_t packsize( const TYPE &rhs ) \ + { \ + return sizeof( TYPE ); \ + } \ + template<> \ + void pack( const TYPE &rhs, char *buffer ) \ + { \ + memcpy( buffer, &rhs, sizeof( IO::MeshType ) ); \ + } \ + template<> \ + void unpack( TYPE &data, const char *buffer ) \ + { \ + memcpy( &data, buffer, sizeof( IO::MeshType ) ); \ + } +INSTANTIATE_PACK( IO::VariableType ) +INSTANTIATE_PACK( IO::DataType ) +INSTANTIATE_PACK( IO::MeshType ) +INSTANTIATE_PACK( IO::FileFormat ) +// clang-format on + -// MeshType -template<> -size_t packsize( const IO::MeshType& rhs ) -{ - return sizeof(IO::MeshType); -} -template<> -void pack( const IO::MeshType& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(IO::MeshType)); -} -template<> -void unpack( IO::MeshType& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(IO::MeshType)); -} -// Variable::VariableType -template<> -size_t packsize( const IO::VariableType& rhs ) -{ - return sizeof(IO::VariableType); -} -template<> -void pack( const IO::VariableType& rhs, char *buffer ) -{ - memcpy(buffer,&rhs,sizeof(IO::VariableType)); -} -template<> -void unpack( IO::VariableType& data, const char *buffer ) -{ - memcpy(&data,buffer,sizeof(IO::VariableType)); -} // DatabaseEntry template<> -size_t packsize( const IO::DatabaseEntry& rhs ) +size_t packsize( const IO::DatabaseEntry &rhs ) { - return packsize(rhs.name)+packsize(rhs.file)+packsize(rhs.offset); + return packsize( rhs.name ) + packsize( rhs.file ) + packsize( rhs.offset ); } template<> -void pack( const IO::DatabaseEntry& rhs, char *buffer ) +void pack( const IO::DatabaseEntry &rhs, char *buffer ) { - size_t i=0; - pack(rhs.name,&buffer[i]); i+=packsize(rhs.name); - pack(rhs.file,&buffer[i]); i+=packsize(rhs.file); - pack(rhs.offset,&buffer[i]); i+=packsize(rhs.offset); + size_t i = 0; + pack( rhs.name, &buffer[i] ); + i += packsize( rhs.name ); + pack( rhs.file, &buffer[i] ); + i += packsize( rhs.file ); + pack( rhs.offset, &buffer[i] ); + i += packsize( rhs.offset ); } template<> -void unpack( IO::DatabaseEntry& data, const char *buffer ) +void unpack( IO::DatabaseEntry &data, const char *buffer ) { - size_t i=0; - unpack(data.name,&buffer[i]); i+=packsize(data.name); - unpack(data.file,&buffer[i]); i+=packsize(data.file); - unpack(data.offset,&buffer[i]); i+=packsize(data.offset); + size_t i = 0; + unpack( data.name, &buffer[i] ); + i += packsize( data.name ); + unpack( data.file, &buffer[i] ); + i += packsize( data.file ); + unpack( data.offset, &buffer[i] ); + i += packsize( data.offset ); } // VariableDatabase template<> -size_t packsize( const IO::VariableDatabase& rhs ) +size_t packsize( const IO::VariableDatabase &rhs ) { - return packsize(rhs.name)+packsize(rhs.type)+packsize(rhs.dim); + return packsize( rhs.name ) + packsize( rhs.type ) + packsize( rhs.dim ); } template<> -void pack( const IO::VariableDatabase& rhs, char *buffer ) +void pack( const IO::VariableDatabase &rhs, char *buffer ) { - size_t i=0; - pack(rhs.name,&buffer[i]); i+=packsize(rhs.name); - pack(rhs.type,&buffer[i]); i+=packsize(rhs.type); - pack(rhs.dim,&buffer[i]); i+=packsize(rhs.dim); + size_t i = 0; + pack( rhs.name, &buffer[i] ); + i += packsize( rhs.name ); + pack( rhs.type, &buffer[i] ); + i += packsize( rhs.type ); + pack( rhs.dim, &buffer[i] ); + i += packsize( rhs.dim ); } template<> -void unpack( IO::VariableDatabase& data, const char *buffer ) +void unpack( IO::VariableDatabase &data, const char *buffer ) { - size_t i=0; - unpack(data.name,&buffer[i]); i+=packsize(data.name); - unpack(data.type,&buffer[i]); i+=packsize(data.type); - unpack(data.dim,&buffer[i]); i+=packsize(data.dim); + size_t i = 0; + unpack( data.name, &buffer[i] ); + i += packsize( data.name ); + unpack( data.type, &buffer[i] ); + i += packsize( data.type ); + unpack( data.dim, &buffer[i] ); + i += packsize( data.dim ); } // MeshDatabase template<> -size_t packsize( const IO::MeshDatabase& data ) +size_t packsize( const IO::MeshDatabase &data ) { - return packsize(data.name) - + packsize(data.type) - + packsize(data.meshClass) - + packsize(data.format) - + packsize(data.domains) - + packsize(data.variables) - + packsize(data.variable_data); + return packsize( data.name ) + packsize( data.type ) + packsize( data.meshClass ) + + packsize( data.format ) + packsize( data.domains ) + packsize( data.variables ) + + packsize( data.variable_data ); } template<> -void pack( const IO::MeshDatabase& rhs, char *buffer ) +void pack( const IO::MeshDatabase &rhs, char *buffer ) { size_t i = 0; - pack(rhs.name,&buffer[i]); i+=packsize(rhs.name); - pack(rhs.type,&buffer[i]); i+=packsize(rhs.type); - pack(rhs.meshClass,&buffer[i]); i+=packsize(rhs.meshClass); - pack(rhs.format,&buffer[i]); i+=packsize(rhs.format); - pack(rhs.domains,&buffer[i]); i+=packsize(rhs.domains); - pack(rhs.variables,&buffer[i]); i+=packsize(rhs.variables); - pack(rhs.variable_data,&buffer[i]); i+=packsize(rhs.variable_data); + pack( rhs.name, &buffer[i] ); + i += packsize( rhs.name ); + pack( rhs.type, &buffer[i] ); + i += packsize( rhs.type ); + pack( rhs.meshClass, &buffer[i] ); + i += packsize( rhs.meshClass ); + pack( rhs.format, &buffer[i] ); + i += packsize( rhs.format ); + pack( rhs.domains, &buffer[i] ); + i += packsize( rhs.domains ); + pack( rhs.variables, &buffer[i] ); + i += packsize( rhs.variables ); + pack( rhs.variable_data, &buffer[i] ); + i += packsize( rhs.variable_data ); } template<> -void unpack( IO::MeshDatabase& data, const char *buffer ) +void unpack( IO::MeshDatabase &data, const char *buffer ) { - size_t i=0; - unpack(data.name,&buffer[i]); i+=packsize(data.name); - unpack(data.type,&buffer[i]); i+=packsize(data.type); - unpack(data.meshClass,&buffer[i]); i+=packsize(data.meshClass); - unpack(data.format,&buffer[i]); i+=packsize(data.format); - unpack(data.domains,&buffer[i]); i+=packsize(data.domains); - unpack(data.variables,&buffer[i]); i+=packsize(data.variables); - unpack(data.variable_data,&buffer[i]); i+=packsize(data.variable_data); + size_t i = 0; + unpack( data.name, &buffer[i] ); + i += packsize( data.name ); + unpack( data.type, &buffer[i] ); + i += packsize( data.type ); + unpack( data.meshClass, &buffer[i] ); + i += packsize( data.meshClass ); + unpack( data.format, &buffer[i] ); + i += packsize( data.format ); + unpack( data.domains, &buffer[i] ); + i += packsize( data.domains ); + unpack( data.variables, &buffer[i] ); + i += packsize( data.variables ); + unpack( data.variable_data, &buffer[i] ); + i += packsize( data.variable_data ); } @@ -132,79 +146,72 @@ namespace IO { /**************************************************** -* VariableDatabase * -****************************************************/ -bool VariableDatabase::operator==(const VariableDatabase& rhs ) const + * VariableDatabase * + ****************************************************/ +bool VariableDatabase::operator==( const VariableDatabase &rhs ) const { - return type==rhs.type && dim==rhs.dim && name==rhs.name; + return type == rhs.type && dim == rhs.dim && name == rhs.name; } -bool VariableDatabase::operator!=(const VariableDatabase& rhs ) const +bool VariableDatabase::operator!=( const VariableDatabase &rhs ) const { - return type!=rhs.type || dim!=rhs.dim || name!=rhs.name; + return type != rhs.type || dim != rhs.dim || name != rhs.name; } -bool VariableDatabase::operator>=(const VariableDatabase& rhs ) const +bool VariableDatabase::operator>=( const VariableDatabase &rhs ) const { - return operator>(rhs) || operator==(rhs); + return operator>( rhs ) || operator==( rhs ); } -bool VariableDatabase::operator<=(const VariableDatabase& rhs ) const +bool VariableDatabase::operator<=( const VariableDatabase &rhs ) const { return !operator>( rhs ); } +bool VariableDatabase::operator>( const VariableDatabase &rhs ) const { - return !operator>(rhs); -} -bool VariableDatabase::operator>(const VariableDatabase& rhs ) const -{ - if ( name>rhs.name ) + if ( name > rhs.name ) return true; - else if ( namerhs.type ) + if ( type > rhs.type ) return true; - else if ( typerhs.dim ) + if ( dim > rhs.dim ) return true; - else if ( dim(rhs) && operator!=(rhs); + return !operator>( rhs ) && operator!=( rhs ); } /**************************************************** -* MeshDatabase * -****************************************************/ -MeshDatabase::MeshDatabase() + * MeshDatabase * + ****************************************************/ +MeshDatabase::MeshDatabase() {} +MeshDatabase::~MeshDatabase() {} +MeshDatabase::MeshDatabase( const MeshDatabase &rhs ) { -} -MeshDatabase::~MeshDatabase() -{ -} -MeshDatabase::MeshDatabase(const MeshDatabase& rhs) -{ - name = rhs.name; - type = rhs.type; - meshClass = rhs.meshClass; - format = rhs.format; - domains = rhs.domains; - variables = rhs.variables; + name = rhs.name; + type = rhs.type; + meshClass = rhs.meshClass; + format = rhs.format; + domains = rhs.domains; + variables = rhs.variables; variable_data = rhs.variable_data; } -MeshDatabase& MeshDatabase::operator=(const MeshDatabase& rhs) +MeshDatabase &MeshDatabase::operator=( const MeshDatabase &rhs ) { - this->name = rhs.name; - this->type = rhs.type; - this->meshClass = rhs.meshClass; - this->format = rhs.format; - this->domains = rhs.domains; - this->variables = rhs.variables; + this->name = rhs.name; + this->type = rhs.type; + this->meshClass = rhs.meshClass; + this->format = rhs.format; + this->domains = rhs.domains; + this->variables = rhs.variables; this->variable_data = rhs.variable_data; return *this; } -VariableDatabase MeshDatabase::getVariableDatabase( const std::string& varname ) const +VariableDatabase MeshDatabase::getVariableDatabase( const std::string &varname ) const { - for (size_t i=0; i list = splitList(line,';'); - name = list[0]; - file = list[1]; - offset = atol(list[2].c_str()); + auto list = splitList( line, ';' ); + name = list[0]; + file = list[1]; + offset = atol( list[2].c_str() ); } -void DatabaseEntry::read( const char* line ) +void DatabaseEntry::read( const char *line ) { - std::vector list = splitList(line,';'); - name = list[0]; - file = list[1]; - offset = atol(list[2].c_str()); + auto list = splitList( line, ';' ); + name = list[0]; + file = list[1]; + offset = atol( list[2].c_str() ); } -void DatabaseEntry::read( const std::string& line ) +void DatabaseEntry::read( const std::string &line ) { - std::vector list = splitList(line.c_str(),';'); - name = list[0]; - file = list[1]; - offset = atol(list[2].c_str()); + auto list = splitList( line.c_str(), ';' ); + name = list[0]; + file = list[1]; + offset = atol( list[2].c_str() ); } // Gather the mesh databases from all processors -inline int tod( int N ) { return (N+7)/sizeof(double); } -std::vector gatherAll( const std::vector& meshes, const Utilities::MPI& comm ) +inline int tod( int N ) { return ( N + 7 ) / sizeof( double ); } +std::vector gatherAll( + const std::vector &meshes, const Utilities::MPI &comm ) { if ( comm.getSize() == 1 ) return meshes; - PROFILE_START("gatherAll"); - PROFILE_START("gatherAll-pack",2); + PROFILE_START( "gatherAll" ); + PROFILE_START( "gatherAll-pack", 2 ); int size = comm.getSize(); // First pack the mesh data to local buffers int localsize = 0; - for (size_t i=0; i data; + PROFILE_START( "gatherAll-unpack", 2 ); + std::map data; pos = 0; while ( pos < globalsize ) { MeshDatabase tmp; - unpack(tmp,(char*)&globalbuf[pos]); - pos += tod(packsize(tmp)); - std::map::iterator it = data.find(tmp.name); - if ( it==data.end() ) { + unpack( tmp, (char *) &globalbuf[pos] ); + pos += tod( packsize( tmp ) ); + std::map::iterator it = data.find( tmp.name ); + if ( it == data.end() ) { data[tmp.name] = tmp; } else { - for (size_t i=0; isecond.domains.push_back(tmp.domains[i]); - for (size_t i=0; isecond.variables.push_back(tmp.variables[i]); - it->second.variable_data.insert(tmp.variable_data.begin(),tmp.variable_data.end()); + for ( size_t i = 0; i < tmp.domains.size(); i++ ) + it->second.domains.push_back( tmp.domains[i] ); + for ( size_t i = 0; i < tmp.variables.size(); i++ ) + it->second.variables.push_back( tmp.variables[i] ); + it->second.variable_data.insert( tmp.variable_data.begin(), tmp.variable_data.end() ); } } - for (auto it=data.begin(); it!=data.end(); ++it) { + for ( auto it = data.begin(); it != data.end(); ++it ) { // Get the unique variables - std::set data2(it->second.variables.begin(),it->second.variables.end()); - it->second.variables = std::vector(data2.begin(),data2.end()); + std::set data2( + it->second.variables.begin(), it->second.variables.end() ); + it->second.variables = std::vector( data2.begin(), data2.end() ); } // Free temporary memory - delete [] localbuf; - delete [] disp; - delete [] globalbuf; + delete[] localbuf; + delete[] disp; + delete[] globalbuf; // Return the results - std::vector data2(data.size()); - size_t i=0; - for (std::map::iterator it=data.begin(); it!=data.end(); ++it, ++i) + std::vector data2( data.size() ); + size_t i = 0; + for ( auto it = data.begin(); it != data.end(); ++it, ++i ) data2[i] = it->second; - PROFILE_STOP("gatherAll-unpack",2); - PROFILE_STOP("gatherAll"); + PROFILE_STOP( "gatherAll-unpack", 2 ); + PROFILE_STOP( "gatherAll" ); return data2; } //! Write the mesh databases to a file -void write( const std::vector& meshes, const std::string& filename ) +void write( const std::vector &meshes, const std::string &filename ) { - PROFILE_START("write"); - FILE *fid = fopen(filename.c_str(),"wb"); - for (size_t i=0; i(meshes[i].type)); - fprintf(fid," meshClass: %s\n",meshes[i].meshClass.c_str()); - fprintf(fid," format: %i\n",static_cast(meshes[i].format)); - for (size_t j=0; j(var.type),var.dim); + PROFILE_START( "write" ); + FILE *fid = fopen( filename.c_str(), "wb" ); + for ( size_t i = 0; i < meshes.size(); i++ ) { + fprintf( fid, "%s\n", meshes[i].name.c_str() ); + fprintf( fid, " type: %s\n", getString( meshes[i].type ).data() ); + fprintf( fid, " meshClass: %s\n", meshes[i].meshClass.c_str() ); + fprintf( fid, " format: %s\n", getString( meshes[i].format ).data() ); + for ( size_t j = 0; j < meshes[i].domains.size(); j++ ) + fprintf( fid, " domain: %s\n", meshes[i].domains[j].write().c_str() ); + fprintf( fid, " variables: " ); + for ( size_t j = 0; j < meshes[i].variables.size(); j++ ) { + const VariableDatabase &var = meshes[i].variables[j]; + fprintf( fid, "%s|%s|%i; ", var.name.data(), getString( var.type ).data(), var.dim ); } - fprintf(fid,"\n"); - std::map,DatabaseEntry>::const_iterator it; - for (it=meshes[i].variable_data.begin(); it!=meshes[i].variable_data.end(); ++it) { - const char* domain = it->first.first.c_str(); - const char* variable = it->first.second.c_str(); - fprintf(fid," variable(%s,%s): %s\n",domain,variable,it->second.write().c_str()); + fprintf( fid, "\n" ); + for ( auto it = meshes[i].variable_data.begin(); it != meshes[i].variable_data.end(); + ++it ) { + const char *domain = it->first.first.c_str(); + const char *variable = it->first.second.c_str(); + fprintf( + fid, " variable(%s,%s): %s\n", domain, variable, it->second.write().c_str() ); } } - fclose(fid); - PROFILE_STOP("write"); + fclose( fid ); + PROFILE_STOP( "write" ); } //! Read the mesh databases from a file -std::vector read( const std::string& filename ) +std::vector read( const std::string &filename ) { std::vector meshes; - PROFILE_START("read"); - FILE *fid = fopen(filename.c_str(),"rb"); - if ( fid==NULL ) - ERROR("Error opening file"); + PROFILE_START( "read" ); + FILE *fid = fopen( filename.c_str(), "rb" ); + if ( fid == NULL ) + ERROR( "Error opening file" ); char *line = new char[10000]; - while ( std::fgets(line,1000,fid) != NULL ) { - if ( line[0]<32 ) { + while ( std::fgets( line, 1000, fid ) != NULL ) { + if ( line[0] < 32 ) { // Empty line continue; } else if ( line[0] != ' ' ) { - meshes.resize(meshes.size()+1); - std::string name(line); - name.resize(name.size()-1); + meshes.resize( meshes.size() + 1 ); + std::string name( line ); + name.resize( name.size() - 1 ); meshes.back().name = name; - } else if ( strncmp(line," format:",10)==0 ) { - meshes.back().format = static_cast(atoi(&line[10])); - } else if ( strncmp(line," type:",8)==0 ) { - meshes.back().type = static_cast(atoi(&line[8])); - } else if ( strncmp(line," meshClass:",13)==0 ) { - meshes.back().meshClass = deblank(std::string(&line[13])); - } else if ( strncmp(line," domain:",10)==0 ) { - DatabaseEntry data(&line[10]); - meshes.back().domains.push_back(data); - } else if ( strncmp(line," variables:",13)==0 ) { - MeshDatabase& mesh = meshes.back(); - std::vector variables = splitList(&line[13],';'); - mesh.variables.resize(variables.size()); - for (size_t i=0; i tmp = splitList(variables[i].c_str(),'|'); - ASSERT(tmp.size()==3); + } else if ( strncmp( line, " format:", 10 ) == 0 ) { + meshes.back().format = getFileFormat( &line[10] ); + } else if ( strncmp( line, " type:", 8 ) == 0 ) { + meshes.back().type = getMeshType( &line[8] ); + } else if ( strncmp( line, " meshClass:", 13 ) == 0 ) { + meshes.back().meshClass = deblank( std::string( &line[13] ) ); + } else if ( strncmp( line, " domain:", 10 ) == 0 ) { + DatabaseEntry data( &line[10] ); + meshes.back().domains.push_back( data ); + } else if ( strncmp( line, " variables:", 13 ) == 0 ) { + MeshDatabase &mesh = meshes.back(); + std::vector variables = splitList( &line[13], ';' ); + mesh.variables.resize( variables.size() ); + for ( size_t i = 0; i < variables.size(); i++ ) { + std::vector tmp = splitList( variables[i].c_str(), '|' ); + ASSERT( tmp.size() == 3 ); mesh.variables[i].name = tmp[0]; - mesh.variables[i].type = static_cast(atoi(tmp[1].c_str())); - mesh.variables[i].dim = atoi(tmp[2].c_str()); + mesh.variables[i].type = getVariableType( tmp[1] ); + mesh.variables[i].dim = atoi( tmp[2].c_str() ); } - } else if ( strncmp(line," variable(",12)==0 ) { - size_t i1 = find(line,','); - size_t i2 = find(line,':'); - std::string domain = deblank(std::string(line,12,i1-12)); - std::string variable = deblank(std::string(line,i1+1,i2-i1-2)); - std::pair key(domain,variable); - DatabaseEntry data(&line[i2+1]); - meshes.back().variable_data.insert( - std::pair,DatabaseEntry>(key,data) ); + } else if ( strncmp( line, " variable(", 12 ) == 0 ) { + size_t i1 = find( line, ',' ); + size_t i2 = find( line, ':' ); + std::string domain = deblank( std::string( line, 12, i1 - 12 ) ); + std::string variable = deblank( std::string( line, i1 + 1, i2 - i1 - 2 ) ); + std::pair key( domain, variable ); + DatabaseEntry data( &line[i2 + 1] ); + meshes.back().variable_data.insert( + std::pair, DatabaseEntry>( key, data ) ); } else { - ERROR("Error reading line"); + ERROR( "Error reading line" ); } } - fclose(fid); - delete [] line; - PROFILE_STOP("read"); + fclose( fid ); + delete[] line; + PROFILE_STOP( "read" ); return meshes; } // Return the mesh type -IO::MeshType meshType( const IO::Mesh& mesh ) +IO::MeshType meshType( const IO::Mesh &mesh ) { - IO::MeshType type = IO::Unknown; + IO::MeshType type = IO::MeshType::Unknown; const std::string meshClass = mesh.className(); - if ( meshClass=="PointList" ) { - type = IO::PointMesh; - } else if ( meshClass=="TriList" || meshClass=="TriMesh" ) { - type = IO::SurfaceMesh; - } else if ( meshClass=="DomainMesh" ) { - type = IO::VolumeMesh; + if ( meshClass == "PointList" ) { + type = IO::MeshType::PointMesh; + } else if ( meshClass == "TriList" || meshClass == "TriMesh" ) { + type = IO::MeshType::SurfaceMesh; + } else if ( meshClass == "DomainMesh" ) { + type = IO::MeshType::VolumeMesh; } else { - ERROR("Unknown mesh"); + ERROR( "Unknown mesh" ); } return type; } -} // IO namespace - +} // namespace IO diff --git a/IO/MeshDatabase.h b/IO/MeshDatabase.h index 8e501624..508f85d8 100644 --- a/IO/MeshDatabase.h +++ b/IO/MeshDatabase.h @@ -1,90 +1,85 @@ #ifndef MeshDatabase_INC #define MeshDatabase_INC -#include "IO/Mesh.h" +#include "IO/Mesh.h" #include "common/MPI.h" #include +#include #include #include #include -#include namespace IO { -class Mesh; - - -//! Enum to identify mesh type -//enum class MeshType : char { PointMesh=1, SurfaceMesh=2, VolumeMesh=3, Unknown=-1 }; -enum MeshType { PointMesh=1, SurfaceMesh=2, VolumeMesh=3, Unknown=-1 }; - //! Helper struct for containing offsets for the mesh info struct DatabaseEntry { - std::string name; //!< Name of the entry - std::string file; //!< Name of the file containing the entry - size_t offset; //!< Offset in the file to start reading - std::string write( ) const; //!< Convert the data to a string - void read( const char* line ); //!< Convert the string to data - void read( const std::string& line ); //!< Convert the string to data - DatabaseEntry( ) {} //!< Empty constructor - DatabaseEntry( const char* line ); //!< Convert the string to data - ~DatabaseEntry() {} //!< Destructor + std::string name; //!< Name of the entry + std::string file; //!< Name of the file containing the entry + size_t offset; //!< Offset in the file to start reading + std::string write() const; //!< Convert the data to a string + void read( const char *line ); //!< Convert the string to data + void read( const std::string &line ); //!< Convert the string to data + DatabaseEntry() {} //!< Empty constructor + DatabaseEntry( const char *line ); //!< Convert the string to data + ~DatabaseEntry() {} //!< Destructor }; //! Structure to hold the info about the variables struct VariableDatabase { - std::string name; //!< Name of the variable - IO::VariableType type; //!< Variable - unsigned int dim; //!< Number of points per grid point (1: scalar, 3: vector, ...) + std::string name; //!< Name of the variable + IO::VariableType type; //!< Variable + unsigned int dim; //!< Number of points per grid point (1: scalar, 3: vector, ...) // Overload key operators - bool operator==(const VariableDatabase& rhs ) const; - bool operator!=(const VariableDatabase& rhs ) const; - bool operator>=(const VariableDatabase& rhs ) const; - bool operator<=(const VariableDatabase& rhs ) const; - bool operator> (const VariableDatabase& rhs ) const; - bool operator< (const VariableDatabase& rhs ) const; + bool operator==( const VariableDatabase &rhs ) const; + bool operator!=( const VariableDatabase &rhs ) const; + bool operator>=( const VariableDatabase &rhs ) const; + bool operator<=( const VariableDatabase &rhs ) const; + bool operator>( const VariableDatabase &rhs ) const; + bool operator<( const VariableDatabase &rhs ) const; }; //! Structure to hold the info about the meshes struct MeshDatabase { - typedef std::pair variable_id; + typedef std::pair variable_id; std::string name; //!< Name of the mesh MeshType type; //!< Mesh type std::string meshClass; //!< Mesh class - unsigned char format; //!< Data format (1: old, 2: new, 3: new (single), 4: silo) + FileFormat format; //!< Data format (1: old, 2: new, 3: new (single), 4: silo) std::vector domains; //!< List of the domains - std::vector variables; //!< List of the variables - std::map variable_data; //!< Data for the variables - VariableDatabase getVariableDatabase( const std::string& varname ) const; + std::vector variables; //!< List of the variables + std::map variable_data; //!< Data for the variables + VariableDatabase getVariableDatabase( const std::string &varname ) const; + public: MeshDatabase(); ~MeshDatabase(); - MeshDatabase(const MeshDatabase&); - MeshDatabase& operator=(const MeshDatabase&); + MeshDatabase( const MeshDatabase & ); + MeshDatabase &operator=( const MeshDatabase & ); }; //! Gather the mesh databases from all processors -std::vector gatherAll( const std::vector& meshes, const Utilities::MPI& comm ); +std::vector gatherAll( + const std::vector &meshes, const Utilities::MPI &comm ); //! Write the mesh databases to a file -void write( const std::vector& meshes, const std::string& filename ); +void write( const std::vector &meshes, const std::string &filename ); //! Read the mesh databases from a file -std::vector read( const std::string& filename ); +std::vector read( const std::string &filename ); //! Return the mesh type -IO::MeshType meshType( const IO::Mesh& mesh ); +IO::MeshType meshType( const IO::Mesh &mesh ); -} // IO namespace +} // namespace IO #endif diff --git a/IO/PIO.cpp b/IO/PIO.cpp index fe0f7db4..f959cb49 100644 --- a/IO/PIO.cpp +++ b/IO/PIO.cpp @@ -1,10 +1,10 @@ #include "IO/PIO.h" -#include "common/Utilities.h" #include "common/MPI.h" +#include "common/Utilities.h" +#include #include #include -#include namespace IO { @@ -15,19 +15,18 @@ static ParallelStreamBuffer perr_buffer; static ParallelStreamBuffer plog_buffer; -std::ostream pout(&pout_buffer); -std::ostream perr(&perr_buffer); -std::ostream plog(&plog_buffer); - +std::ostream pout( &pout_buffer ); +std::ostream perr( &perr_buffer ); +std::ostream plog( &plog_buffer ); /**************************************************************************** -* Functions to control logging * -****************************************************************************/ -std::ofstream *global_filestream=NULL; -static void shutdownFilestream( ) + * Functions to control logging * + ****************************************************************************/ +std::ofstream *global_filestream = NULL; +static void shutdownFilestream() { - if ( global_filestream!=NULL ) { + if ( global_filestream != NULL ) { global_filestream->flush(); global_filestream->close(); delete global_filestream; @@ -37,16 +36,16 @@ static void shutdownFilestream( ) void Utilities::logOnlyNodeZero( const std::string &filename ) { int rank = 0; - #ifdef USE_MPI - MPI_Comm_rank( MPI_COMM_WORLD, &rank ); - #endif +#ifdef USE_MPI + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); +#endif if ( rank == 0 ) - logAllNodes(filename,true); + logAllNodes( filename, true ); } void Utilities::logAllNodes( const std::string &filename, bool singleStream ) { if ( singleStream ) - ERROR("Not implimented yet"); + ERROR( "Not implimented yet" ); // If the filestream was open, then close it and reset streams shutdownFilestream(); @@ -55,33 +54,33 @@ void Utilities::logAllNodes( const std::string &filename, bool singleStream ) std::string full_filename = filename; if ( !singleStream ) { int rank = 0; - #ifdef USE_MPI - MPI_Comm_rank( MPI_COMM_WORLD, &rank ); - #endif +#ifdef USE_MPI + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); +#endif char tmp[100]; - sprintf(tmp,".%04i",rank); - full_filename += std::string(tmp); + sprintf( tmp, ".%04i", rank ); + full_filename += std::string( tmp ); } - global_filestream = new std::ofstream(full_filename.c_str()); + global_filestream = new std::ofstream( full_filename.c_str() ); - if ( !(*global_filestream) ) { + if ( !( *global_filestream ) ) { delete global_filestream; global_filestream = NULL; perr << "PIO: Could not open log file ``" << full_filename << "''\n"; } else { - pout_buffer.setOutputStream(global_filestream); - pout_buffer.setOutputStream(&std::cout); - perr_buffer.setOutputStream(global_filestream); - perr_buffer.setOutputStream(&std::cerr); - plog_buffer.setOutputStream(global_filestream); + pout_buffer.setOutputStream( global_filestream ); + pout_buffer.setOutputStream( &std::cout ); + perr_buffer.setOutputStream( global_filestream ); + perr_buffer.setOutputStream( &std::cerr ); + plog_buffer.setOutputStream( global_filestream ); } } /**************************************************************************** -* ParallelStreamBuffer class * -****************************************************************************/ -void Utilities::stopLogging( ) + * ParallelStreamBuffer class * + ****************************************************************************/ +void Utilities::stopLogging() { pout_buffer.reset(); perr_buffer.reset(); @@ -93,77 +92,71 @@ void Utilities::stopLogging( ) /**************************************************************************** -* ParallelStreamBuffer class * -****************************************************************************/ -ParallelStreamBuffer::ParallelStreamBuffer( ): - d_rank(0), d_size(0), d_buffer_size(0), d_buffer(NULL) + * ParallelStreamBuffer class * + ****************************************************************************/ +ParallelStreamBuffer::ParallelStreamBuffer() + : d_rank( 0 ), d_size( 0 ), d_buffer_size( 0 ), d_buffer( NULL ) { } -ParallelStreamBuffer:: ~ParallelStreamBuffer() -{ - delete [] d_buffer; -} -void ParallelStreamBuffer::setOutputStream( std::ostream *stream ) -{ - d_stream.push_back( stream ); -} +ParallelStreamBuffer::~ParallelStreamBuffer() { delete[] d_buffer; } +void ParallelStreamBuffer::setOutputStream( std::ostream *stream ) { d_stream.push_back( stream ); } int ParallelStreamBuffer::sync() { - for (size_t i=0; i d_buffer_size ) { - if ( d_buffer_size==0 ) { + if ( d_buffer_size == 0 ) { d_buffer_size = 1024; - d_buffer = new char[d_buffer_size]; - memset(d_buffer,0,d_buffer_size); + d_buffer = new char[d_buffer_size]; + memset( d_buffer, 0, d_buffer_size ); } while ( size > d_buffer_size ) { char *tmp = d_buffer; d_buffer_size *= 2; d_buffer = new char[d_buffer_size]; - memset(d_buffer,0,d_buffer_size); - memcpy(d_buffer,tmp,d_size); - delete [] tmp; + memset( d_buffer, 0, d_buffer_size ); + memcpy( d_buffer, tmp, d_size ); + delete[] tmp; } } } -std::streamsize ParallelStreamBuffer::xsputn( const char* text, std::streamsize n ) +std::streamsize ParallelStreamBuffer::xsputn( const char *text, std::streamsize n ) { - reserve(d_size+n); - memcpy(&d_buffer[d_size],text,n); + reserve( d_size + n ); + memcpy( &d_buffer[d_size], text, n ); d_size += n; - if ( text[n-1]==0 || text[n-1]==10 ) { sync(); } + if ( text[n - 1] == 0 || text[n - 1] == 10 ) { + sync(); + } return n; } -int ParallelStreamBuffer::overflow(int ch) +int ParallelStreamBuffer::overflow( int ch ) { - reserve(d_size+1); + reserve( d_size + 1 ); d_buffer[d_size] = ch; d_size++; - if ( ch==0 || ch==10 ) { sync(); } - return std::char_traits::to_int_type(ch); + if ( ch == 0 || ch == 10 ) { + sync(); + } + return std::char_traits::to_int_type( ch ); } -int ParallelStreamBuffer::underflow() -{ - return -1; -} -void ParallelStreamBuffer::reset() +int ParallelStreamBuffer::underflow() { return -1; } +void ParallelStreamBuffer::reset() { sync(); d_stream.clear(); - delete [] d_buffer; - d_buffer = NULL; + delete[] d_buffer; + d_buffer = NULL; d_buffer_size = 0; } -} // IO namespace - +} // namespace IO diff --git a/IO/PIO.h b/IO/PIO.h index b6d8b103..9b8aeb89 100644 --- a/IO/PIO.h +++ b/IO/PIO.h @@ -17,7 +17,7 @@ extern std::ostream pout; /*! * Parallel output stream perr writes to the standard error from all nodes. - * Output is prepended with the processor number. + * Output is prepended with the processor number. */ extern std::ostream perr; @@ -45,12 +45,11 @@ inline int printp( const char *format, ... ); class ParallelStreamBuffer : public std::streambuf { public: - /*! * Create a parallel buffer class. The object will require further * initialization to set up the I/O streams and prefix string. */ - ParallelStreamBuffer( ); + ParallelStreamBuffer(); /*! * Set the output file stream (multiple output streams are supported) @@ -60,26 +59,26 @@ public: /*! * The destructor simply deallocates any internal data - * buffers. It does not modify the output streams. + * buffers. It does not modify the output streams. */ virtual ~ParallelStreamBuffer(); /*! * Synchronize the parallel buffer (called from streambuf). */ - virtual int sync(); + virtual int sync(); /** * Write the specified number of characters into the output stream (called * from streambuf). - */ - virtual std::streamsize xsputn(const char* text, std::streamsize n); + */ + virtual std::streamsize xsputn( const char *text, std::streamsize n ); /*! * Write an overflow character into the parallel buffer (called from * streambuf). */ - virtual int overflow(int ch); + virtual int overflow( int ch ); /*! * Read an overflow character from the parallel buffer (called from @@ -98,30 +97,30 @@ private: size_t d_size; size_t d_buffer_size; char *d_buffer; - std::vector d_stream; + std::vector d_stream; inline void reserve( size_t size ); }; namespace Utilities { - /*! - * Log messages for node zero only to the specified filename. All output - * to pout, perr, and plog on node zero will go to the log file. - */ - void logOnlyNodeZero( const std::string &filename ); +/*! + * Log messages for node zero only to the specified filename. All output + * to pout, perr, and plog on node zero will go to the log file. + */ +void logOnlyNodeZero( const std::string &filename ); - /*! - * Log messages from all nodes. The diagnostic data for processor XXXXX - * will be sent to a file with the name filename.XXXXX, where filename is - * the function argument. - */ - void logAllNodes( const std::string &filename, bool singleStream=false ); +/*! + * Log messages from all nodes. The diagnostic data for processor XXXXX + * will be sent to a file with the name filename.XXXXX, where filename is + * the function argument. + */ +void logAllNodes( const std::string &filename, bool singleStream = false ); - /*! - * Stop logging messages, flush buffers, and reset memory. - */ - void stopLogging( ); +/*! + * Stop logging messages, flush buffers, and reset memory. + */ +void stopLogging(); } // namespace Utilities diff --git a/IO/PIO.hpp b/IO/PIO.hpp index 67b32cdb..748bf32b 100644 --- a/IO/PIO.hpp +++ b/IO/PIO.hpp @@ -3,9 +3,9 @@ #include "IO/PIO.h" +#include #include #include -#include namespace IO { @@ -13,17 +13,17 @@ namespace IO { inline int printp( const char *format, ... ) { - va_list ap; - va_start(ap,format); + va_list ap; + va_start( ap, format ); char tmp[1024]; - int n = vsprintf(tmp,format,ap); - va_end(ap); + int n = vsprintf( tmp, format, ap ); + va_end( ap ); pout << tmp; pout.flush(); return n; } -} // IO namespace +} // namespace IO #endif diff --git a/IO/PackData.cpp b/IO/PackData.cpp index f10d9ca7..3782914c 100644 --- a/IO/PackData.cpp +++ b/IO/PackData.cpp @@ -4,102 +4,101 @@ /******************************************************** -* Concrete implimentations for packing/unpacking * -********************************************************/ + * Concrete implimentations for packing/unpacking * + ********************************************************/ // unsigned char template<> -size_t packsize( const unsigned char& rhs ) +size_t packsize( const unsigned char &rhs ) { - return sizeof(unsigned char); + return sizeof( unsigned char ); } template<> -void pack( const unsigned char& rhs, char *buffer ) +void pack( const unsigned char &rhs, char *buffer ) { - memcpy(buffer,&rhs,sizeof(unsigned char)); + memcpy( buffer, &rhs, sizeof( unsigned char ) ); } template<> -void unpack( unsigned char& data, const char *buffer ) +void unpack( unsigned char &data, const char *buffer ) { - memcpy(&data,buffer,sizeof(unsigned char)); + memcpy( &data, buffer, sizeof( unsigned char ) ); } // char template<> -size_t packsize( const char& rhs ) +size_t packsize( const char &rhs ) { - return sizeof(char); + return sizeof( char ); } template<> -void pack( const char& rhs, char *buffer ) +void pack( const char &rhs, char *buffer ) { - memcpy(buffer,&rhs,sizeof(char)); + memcpy( buffer, &rhs, sizeof( char ) ); } template<> -void unpack( char& data, const char *buffer ) +void unpack( char &data, const char *buffer ) { - memcpy(&data,buffer,sizeof(char)); + memcpy( &data, buffer, sizeof( char ) ); } // int template<> -size_t packsize( const int& rhs ) +size_t packsize( const int &rhs ) { - return sizeof(int); + return sizeof( int ); } template<> -void pack( const int& rhs, char *buffer ) +void pack( const int &rhs, char *buffer ) { - memcpy(buffer,&rhs,sizeof(int)); + memcpy( buffer, &rhs, sizeof( int ) ); } template<> -void unpack( int& data, const char *buffer ) +void unpack( int &data, const char *buffer ) { - memcpy(&data,buffer,sizeof(int)); + memcpy( &data, buffer, sizeof( int ) ); } // unsigned int template<> -size_t packsize( const unsigned int& rhs ) +size_t packsize( const unsigned int &rhs ) { - return sizeof(unsigned int); + return sizeof( unsigned int ); } template<> -void pack( const unsigned int& rhs, char *buffer ) +void pack( const unsigned int &rhs, char *buffer ) { - memcpy(buffer,&rhs,sizeof(int)); + memcpy( buffer, &rhs, sizeof( int ) ); } template<> -void unpack( unsigned int& data, const char *buffer ) +void unpack( unsigned int &data, const char *buffer ) { - memcpy(&data,buffer,sizeof(int)); + memcpy( &data, buffer, sizeof( int ) ); } // size_t template<> -size_t packsize( const size_t& rhs ) +size_t packsize( const size_t &rhs ) { - return sizeof(size_t); + return sizeof( size_t ); } template<> -void pack( const size_t& rhs, char *buffer ) +void pack( const size_t &rhs, char *buffer ) { - memcpy(buffer,&rhs,sizeof(size_t)); + memcpy( buffer, &rhs, sizeof( size_t ) ); } template<> -void unpack( size_t& data, const char *buffer ) +void unpack( size_t &data, const char *buffer ) { - memcpy(&data,buffer,sizeof(size_t)); + memcpy( &data, buffer, sizeof( size_t ) ); } // std::string template<> -size_t packsize( const std::string& rhs ) +size_t packsize( const std::string &rhs ) { - return rhs.size()+1; + return rhs.size() + 1; } template<> -void pack( const std::string& rhs, char *buffer ) +void pack( const std::string &rhs, char *buffer ) { - memcpy(buffer,rhs.c_str(),rhs.size()+1); + memcpy( buffer, rhs.c_str(), rhs.size() + 1 ); } template<> -void unpack( std::string& data, const char *buffer ) +void unpack( std::string &data, const char *buffer ) { - data = std::string(buffer); + data = std::string( buffer ); } - diff --git a/IO/PackData.h b/IO/PackData.h index 85326c0b..f7c1d748 100644 --- a/IO/PackData.h +++ b/IO/PackData.h @@ -2,77 +2,76 @@ #ifndef included_PackData #define included_PackData -#include -#include #include +#include +#include //! Template function to return the buffer size required to pack a class template -size_t packsize( const TYPE& rhs ); +size_t packsize( const TYPE &rhs ); //! Template function to pack a class to a buffer template -void pack( const TYPE& rhs, char *buffer ); +void pack( const TYPE &rhs, char *buffer ); //! Template function to unpack a class from a buffer template -void unpack( TYPE& data, const char *buffer ); +void unpack( TYPE &data, const char *buffer ); //! Template function to return the buffer size required to pack a std::vector template -size_t packsize( const std::vector& rhs ); +size_t packsize( const std::vector &rhs ); //! Template function to pack a class to a buffer template -void pack( const std::vector& rhs, char *buffer ); +void pack( const std::vector &rhs, char *buffer ); //! Template function to pack a class to a buffer template -void unpack( std::vector& data, const char *buffer ); +void unpack( std::vector &data, const char *buffer ); //! Template function to return the buffer size required to pack a std::pair template -size_t packsize( const std::pair& rhs ); +size_t packsize( const std::pair &rhs ); //! Template function to pack a class to a buffer template -void pack( const std::pair& rhs, char *buffer ); +void pack( const std::pair &rhs, char *buffer ); //! Template function to pack a class to a buffer template -void unpack( std::pair& data, const char *buffer ); +void unpack( std::pair &data, const char *buffer ); //! Template function to return the buffer size required to pack a std::map template -size_t packsize( const std::map& rhs ); +size_t packsize( const std::map &rhs ); //! Template function to pack a class to a buffer template -void pack( const std::map& rhs, char *buffer ); +void pack( const std::map &rhs, char *buffer ); //! Template function to pack a class to a buffer template -void unpack( std::map& data, const char *buffer ); +void unpack( std::map &data, const char *buffer ); //! Template function to return the buffer size required to pack a std::set template -size_t packsize( const std::set& rhs ); +size_t packsize( const std::set &rhs ); //! Template function to pack a class to a buffer template -void pack( const std::set& rhs, char *buffer ); +void pack( const std::set &rhs, char *buffer ); //! Template function to pack a class to a buffer template -void unpack( std::set& data, const char *buffer ); +void unpack( std::set &data, const char *buffer ); #include "IO/PackData.hpp" #endif - diff --git a/IO/PackData.hpp b/IO/PackData.hpp index 006cdf73..fd74aa64 100644 --- a/IO/PackData.hpp +++ b/IO/PackData.hpp @@ -4,152 +4,156 @@ #include "IO/PackData.h" +#include +#include #include #include -#include -#include - /******************************************************** -* Default instantiations for std::vector * -********************************************************/ + * Default instantiations for std::vector * + ********************************************************/ template -size_t packsize( const std::vector& rhs ) +size_t packsize( const std::vector &rhs ) { - size_t bytes = sizeof(size_t); - for (size_t i=0; i -void pack( const std::vector& rhs, char *buffer ) +void pack( const std::vector &rhs, char *buffer ) { size_t size = rhs.size(); - memcpy(buffer,&size,sizeof(size_t)); - size_t pos = sizeof(size_t); - for (size_t i=0; i -void unpack( std::vector& data, const char *buffer ) +void unpack( std::vector &data, const char *buffer ) { size_t size; - memcpy(&size,buffer,sizeof(size_t)); + memcpy( &size, buffer, sizeof( size_t ) ); data.clear(); - data.resize(size); - size_t pos = sizeof(size_t); - for (size_t i=0; i -size_t packsize( const std::pair& rhs ) +size_t packsize( const std::pair &rhs ) { - return packsize(rhs.first)+packsize(rhs.second); + return packsize( rhs.first ) + packsize( rhs.second ); } template -void pack( const std::pair& rhs, char *buffer ) +void pack( const std::pair &rhs, char *buffer ) { - pack(rhs.first,buffer); - pack(rhs.second,&buffer[packsize(rhs.first)]); + pack( rhs.first, buffer ); + pack( rhs.second, &buffer[packsize( rhs.first )] ); } template -void unpack( std::pair& data, const char *buffer ) +void unpack( std::pair &data, const char *buffer ) { - unpack(data.first,buffer); - unpack(data.second,&buffer[packsize(data.first)]); + unpack( data.first, buffer ); + unpack( data.second, &buffer[packsize( data.first )] ); } /******************************************************** -* Default instantiations for std::map * -********************************************************/ + * Default instantiations for std::map * + ********************************************************/ template -size_t packsize( const std::map& rhs ) +size_t packsize( const std::map &rhs ) { - size_t bytes = sizeof(size_t); - typename std::map::const_iterator it; - for (it=rhs.begin(); it!=rhs.end(); ++it) { - bytes += packsize(it->first); - bytes += packsize(it->second); + size_t bytes = sizeof( size_t ); + typename std::map::const_iterator it; + for ( it = rhs.begin(); it != rhs.end(); ++it ) { + bytes += packsize( it->first ); + bytes += packsize( it->second ); } return bytes; } template -void pack( const std::map& rhs, char *buffer ) +void pack( const std::map &rhs, char *buffer ) { size_t N = rhs.size(); - pack(N,buffer); - size_t pos = sizeof(size_t); - typename std::map::const_iterator it; - for (it=rhs.begin(); it!=rhs.end(); ++it) { - pack(it->first,&buffer[pos]); pos+=packsize(it->first); - pack(it->second,&buffer[pos]); pos+=packsize(it->second); + pack( N, buffer ); + size_t pos = sizeof( size_t ); + typename std::map::const_iterator it; + for ( it = rhs.begin(); it != rhs.end(); ++it ) { + pack( it->first, &buffer[pos] ); + pos += packsize( it->first ); + pack( it->second, &buffer[pos] ); + pos += packsize( it->second ); } } template -void unpack( std::map& data, const char *buffer ) +void unpack( std::map &data, const char *buffer ) { size_t N = 0; - unpack(N,buffer); - size_t pos = sizeof(size_t); + unpack( N, buffer ); + size_t pos = sizeof( size_t ); data.clear(); - for (size_t i=0; i tmp; - unpack(tmp.first,&buffer[pos]); pos+=packsize(tmp.first); - unpack(tmp.second,&buffer[pos]); pos+=packsize(tmp.second); - data.insert(tmp); + for ( size_t i = 0; i < N; i++ ) { + std::pair tmp; + unpack( tmp.first, &buffer[pos] ); + pos += packsize( tmp.first ); + unpack( tmp.second, &buffer[pos] ); + pos += packsize( tmp.second ); + data.insert( tmp ); } } /******************************************************** -* Default instantiations for std::set * -********************************************************/ + * Default instantiations for std::set * + ********************************************************/ template -size_t packsize( const std::set& rhs ) +size_t packsize( const std::set &rhs ) { - size_t bytes = sizeof(size_t); + size_t bytes = sizeof( size_t ); typename std::set::const_iterator it; - for (it=rhs.begin(); it!=rhs.end(); ++it) { - bytes += packsize(*it); + for ( it = rhs.begin(); it != rhs.end(); ++it ) { + bytes += packsize( *it ); } return bytes; } template -void pack( const std::set& rhs, char *buffer ) +void pack( const std::set &rhs, char *buffer ) { size_t N = rhs.size(); - pack(N,buffer); - size_t pos = sizeof(size_t); + pack( N, buffer ); + size_t pos = sizeof( size_t ); typename std::set::const_iterator it; - for (it=rhs.begin(); it!=rhs.end(); ++it) { - pack(*it); pos+=packsize(*it); + for ( it = rhs.begin(); it != rhs.end(); ++it ) { + pack( *it ); + pos += packsize( *it ); } } template -void unpack( std::set& data, const char *buffer ) +void unpack( std::set &data, const char *buffer ) { size_t N = 0; - unpack(N,buffer); - size_t pos = sizeof(size_t); + unpack( N, buffer ); + size_t pos = sizeof( size_t ); data.clear(); - for (size_t i=0; i +#include +#include #include +#include #include #include -#include -#include // Inline function to read line without a return argument -static inline void fgetl( char * str, int num, FILE * stream ) +static inline void fgetl( char *str, int num, FILE *stream ) { - char* ptr = fgets( str, num, stream ); - if ( 0 ) {char *temp = (char *)&ptr; temp++;} + char *ptr = fgets( str, num, stream ); + if ( 0 ) { + char *temp = (char *) &ptr; + temp++; + } +} + + +// Check if the file exists +bool fileExists( const std::string &filename ) +{ + std::ifstream ifile( filename.c_str() ); + return ifile.good(); } // Get the path to a file -std::string IO::getPath( const std::string& filename ) +std::string IO::getPath( const std::string &filename ) { - std::string file(filename); - size_t k1 = file.rfind(47); - size_t k2 = file.rfind(92); - if ( k1==std::string::npos ) { k1=0; } - if ( k2==std::string::npos ) { k2=0; } - return file.substr(0,std::max(k1,k2)); + std::string file( filename ); + size_t k1 = file.rfind( 47 ); + size_t k2 = file.rfind( 92 ); + if ( k1 == std::string::npos ) { + k1 = 0; + } + if ( k2 == std::string::npos ) { + k2 = 0; + } + return file.substr( 0, std::max( k1, k2 ) ); } -// List the timesteps in the given directors (dumps.LBPM) -std::vector IO::readTimesteps( const std::string& filename ) +// List the timesteps in the given directory (dumps.LBPM) +std::vector IO::readTimesteps( const std::string &path, const std::string &format ) { - PROFILE_START("readTimesteps"); - FILE *fid= fopen(filename.c_str(),"rb"); - if ( fid==NULL ) - ERROR("Error opening file"); + // Get the name of the summary filename + std::string filename = path + "/"; + if ( format == "old" || format == "new" ) { + filename += "summary.LBM"; + } else if ( format == "silo" ) { + filename += "LBM.visit"; + } else if ( format == "auto" ) { + bool test_old = fileExists( path + "/summary.LBM" ); + bool test_silo = fileExists( path + "/LBM.visit" ); + if ( test_old && test_silo ) { + ERROR( "Unable to determine format (both summary.LBM and LBM.visit exist)" ); + } else if ( test_old ) { + filename += "summary.LBM"; + } else if ( test_silo ) { + filename += "LBM.visit"; + } else { + ERROR( "Unable to determine format (neither summary.LBM or LBM.visit exist)" ); + } + } else { + ERROR( "Unknown format: " + format ); + } + PROFILE_START( "readTimesteps" ); + // Read the data + FILE *fid = fopen( filename.c_str(), "rb" ); + if ( fid == NULL ) + ERROR( "Error opening file" ); std::vector timesteps; char buf[1000]; - while (fgets(buf,sizeof(buf),fid) != NULL) { - std::string line(buf); - line.resize(line.size()-1); + while ( fgets( buf, sizeof( buf ), fid ) != NULL ) { + std::string line( buf ); + line.resize( line.size() - 1 ); auto pos = line.find( "summary.silo" ); if ( pos != std::string::npos ) - line.resize(pos); + line.resize( pos ); if ( line.empty() ) continue; - timesteps.push_back(line); + timesteps.push_back( line ); } - fclose(fid); - PROFILE_STOP("readTimesteps"); + fclose( fid ); + PROFILE_STOP( "readTimesteps" ); + return timesteps; return timesteps; } +// Get the maximum number of domains +int IO::maxDomains( const std::string &path, const std::string &format, const Utilities::MPI &comm ) +{ + int rank = comm.getRank(); + int n_domains = 0; + if ( rank == 0 ) { + // Get the timesteps + auto timesteps = IO::readTimesteps( path, format ); + ASSERT( !timesteps.empty() ); + // Get the database for the first domain + auto db = IO::getMeshList( path, timesteps[0] ); + for ( size_t i = 0; i < db.size(); i++ ) + n_domains = std::max( n_domains, db[i].domains.size() ); + } + return comm.bcast( n_domains, 0 ); +} + + +// Read the data for the given timestep +std::vector IO::readData( + const std::string &path, const std::string ×tep, int rank ) +{ + // Get the mesh databases + auto db = IO::getMeshList( path, timestep ); + // Create the data + std::vector data( db.size() ); + for ( size_t i = 0; i < data.size(); i++ ) { + data[i].precision = IO::DataType::Double; + data[i].meshName = db[i].name; + data[i].mesh = getMesh( path, timestep, db[i], rank ); + data[i].vars.resize( db[i].variables.size() ); + for ( size_t j = 0; j < db[i].variables.size(); j++ ) + data[i].vars[j] = getVariable( path, timestep, db[i], rank, db[i].variables[j].name ); + INSIST( data[i].check(), "Failed check of " + data[i].meshName ); + } + return data; +} + + // Read the list of variables for the given timestep -std::vector IO::getMeshList( const std::string& path, const std::string& timestep ) +std::vector IO::getMeshList( + const std::string &path, const std::string ×tep ) { std::string filename = path + "/" + timestep + "/LBM.summary"; return IO::read( filename ); @@ -71,270 +150,271 @@ std::vector IO::getMeshList( const std::string& path, const st // Read the given mesh domain -std::shared_ptr IO::getMesh( const std::string& path, const std::string& timestep, - const IO::MeshDatabase& meshDatabase, int domain ) +std::shared_ptr IO::getMesh( const std::string &path, const std::string ×tep, + const IO::MeshDatabase &meshDatabase, int domain ) { - PROFILE_START("getMesh"); + PROFILE_START( "getMesh" ); std::shared_ptr mesh; - if ( meshDatabase.format==1 ) { + if ( meshDatabase.format == FileFormat::OLD ) { // Old format (binary doubles) std::string filename = path + "/" + timestep + "/" + meshDatabase.domains[domain].file; - FILE *fid = fopen(filename.c_str(),"rb"); - INSIST(fid!=NULL,"Error opening file"); + FILE *fid = fopen( filename.c_str(), "rb" ); + INSIST( fid != NULL, "Error opening file" ); fseek( fid, 0, SEEK_END ); - size_t bytes = ftell(fid); - size_t N_max = bytes/sizeof(double)+1000; + size_t bytes = ftell( fid ); + size_t N_max = bytes / sizeof( double ) + 1000; double *data = new double[N_max]; - fseek(fid,0,SEEK_SET); - size_t count = fread(data,sizeof(double),N_max,fid); - fclose(fid); - if ( count%3 != 0 ) - ERROR("Error reading file"); - if ( meshDatabase.type==IO::PointMesh ) { - size_t N = count/3; - std::shared_ptr pointlist( new PointList(N) ); - std::vector& P = pointlist->points; - for (size_t i=0; i pointlist( new PointList( N ) ); + std::vector &P = pointlist->points; + for ( size_t i = 0; i < N; i++ ) { + P[i].x = data[3 * i + 0]; + P[i].y = data[3 * i + 1]; + P[i].z = data[3 * i + 2]; } mesh = pointlist; - } else if ( meshDatabase.type==IO::SurfaceMesh ) { - if ( count%9 != 0 ) - ERROR("Error reading file (2)"); - size_t N_tri = count/9; - std::shared_ptr trilist( new TriList(N_tri) ); - std::vector& A = trilist->A; - std::vector& B = trilist->B; - std::vector& C = trilist->C; - for (size_t i=0; i trilist( new TriList( N_tri ) ); + std::vector &A = trilist->A; + std::vector &B = trilist->B; + std::vector &C = trilist->C; + for ( size_t i = 0; i < N_tri; i++ ) { + A[i].x = data[9 * i + 0]; + A[i].y = data[9 * i + 1]; + A[i].z = data[9 * i + 2]; + B[i].x = data[9 * i + 3]; + B[i].y = data[9 * i + 4]; + B[i].z = data[9 * i + 5]; + C[i].x = data[9 * i + 6]; + C[i].y = data[9 * i + 7]; + C[i].z = data[9 * i + 8]; } mesh = trilist; - } else if ( meshDatabase.type==IO::VolumeMesh ) { + } else if ( meshDatabase.type == IO::MeshType::VolumeMesh ) { // this was never supported in the old format mesh = std::shared_ptr( new DomainMesh() ); } else { - ERROR("Unknown mesh type"); + ERROR( "Unknown mesh type" ); } - delete [] data; - } else if ( meshDatabase.format==2 ) { - const DatabaseEntry& database = meshDatabase.domains[domain]; - std::string filename = path + "/" + timestep + "/" + database.file; - FILE *fid = fopen(filename.c_str(),"rb"); - fseek(fid,database.offset,SEEK_SET); + delete[] data; + } else if ( meshDatabase.format == FileFormat::NEW || + meshDatabase.format == FileFormat::NEW_SINGLE ) { + const DatabaseEntry &database = meshDatabase.domains[domain]; + std::string filename = path + "/" + timestep + "/" + database.file; + FILE *fid = fopen( filename.c_str(), "rb" ); + fseek( fid, database.offset, SEEK_SET ); char line[1000]; - fgetl(line,1000,fid); - size_t i1 = find(line,':'); - size_t i2 = find(&line[i1+1],':')+i1+1; - size_t bytes = atol(&line[i2+1]); - char *data = new char[bytes]; - size_t count = fread(data,1,bytes,fid); - fclose(fid); - ASSERT(count==bytes); - if ( meshDatabase.meshClass=="PointList" ) { + fgetl( line, 1000, fid ); + size_t i1 = find( line, ':' ); + size_t i2 = find( &line[i1 + 1], ':' ) + i1 + 1; + size_t bytes = atol( &line[i2 + 1] ); + char *data = new char[bytes]; + size_t count = fread( data, 1, bytes, fid ); + fclose( fid ); + ASSERT( count == bytes ); + if ( meshDatabase.meshClass == "PointList" ) { mesh.reset( new IO::PointList() ); - } else if ( meshDatabase.meshClass=="TriMesh" ) { + } else if ( meshDatabase.meshClass == "TriMesh" ) { mesh.reset( new IO::TriMesh() ); - } else if ( meshDatabase.meshClass=="TriList" ) { + } else if ( meshDatabase.meshClass == "TriList" ) { mesh.reset( new IO::TriList() ); - } else if ( meshDatabase.meshClass=="DomainMesh" ) { + } else if ( meshDatabase.meshClass == "DomainMesh" ) { mesh.reset( new IO::DomainMesh() ); } else { - ERROR("Unknown mesh class"); + ERROR( "Unknown mesh class" ); } - mesh->unpack( std::pair(bytes,data) ); - delete [] data; - } else if ( meshDatabase.format==4 ) { + mesh->unpack( std::pair( bytes, data ) ); + delete[] data; + } else if ( meshDatabase.format == FileFormat::SILO ) { // Reading a silo file #ifdef USE_SILO - const DatabaseEntry& database = meshDatabase.domains[domain]; - std::string filename = path + "/" + timestep + "/" + database.file; - auto fid = silo::open( filename, silo::READ ); - if ( meshDatabase.meshClass=="PointList" ) { + const DatabaseEntry &database = meshDatabase.domains[domain]; + std::string filename = path + "/" + timestep + "/" + database.file; + auto fid = silo::open( filename, silo::READ ); + if ( meshDatabase.meshClass == "PointList" ) { Array coords = silo::readPointMesh( fid, database.name ); - ASSERT(coords.size(1)==3); - std::shared_ptr mesh2( new IO::PointList( coords.size(0) ) ); - for (size_t i=0; ipoints[i].x = coords(i,0); - mesh2->points[i].y = coords(i,1); - mesh2->points[i].z = coords(i,2); + ASSERT( coords.size( 1 ) == 3 ); + std::shared_ptr mesh2( new IO::PointList( coords.size( 0 ) ) ); + for ( size_t i = 0; i < coords.size( 1 ); i++ ) { + mesh2->points[i].x = coords( i, 0 ); + mesh2->points[i].y = coords( i, 1 ); + mesh2->points[i].z = coords( i, 2 ); } mesh = mesh2; - } else if ( meshDatabase.meshClass=="TriMesh" || meshDatabase.meshClass=="TriList" ) { + } else if ( meshDatabase.meshClass == "TriMesh" || meshDatabase.meshClass == "TriList" ) { Array coords; Array tri; silo::readTriMesh( fid, database.name, coords, tri ); - ASSERT( tri.size(1)==3 && coords.size(1)==3 ); - int N_tri = tri.size(0); - int N_point = coords.size(0); + ASSERT( tri.size( 1 ) == 3 && coords.size( 1 ) == 3 ); + int N_tri = tri.size( 0 ); + int N_point = coords.size( 0 ); std::shared_ptr mesh2( new IO::TriMesh( N_tri, N_point ) ); - for (int i=0; ivertices->points[i].x = coords(i,0); - mesh2->vertices->points[i].y = coords(i,1); - mesh2->vertices->points[i].z = coords(i,2); + for ( int i = 0; i < N_point; i++ ) { + mesh2->vertices->points[i].x = coords( i, 0 ); + mesh2->vertices->points[i].y = coords( i, 1 ); + mesh2->vertices->points[i].z = coords( i, 2 ); } - for (int i=0; iA[i] = tri(i,0); - mesh2->B[i] = tri(i,1); - mesh2->C[i] = tri(i,2); + for ( int i = 0; i < N_tri; i++ ) { + mesh2->A[i] = tri( i, 0 ); + mesh2->B[i] = tri( i, 1 ); + mesh2->C[i] = tri( i, 2 ); } - if ( meshDatabase.meshClass=="TriMesh" ) { + if ( meshDatabase.meshClass == "TriMesh" ) { mesh = mesh2; - } else if ( meshDatabase.meshClass=="TriList" ) { + } else if ( meshDatabase.meshClass == "TriList" ) { auto trilist = IO::getTriList( std::dynamic_pointer_cast( mesh2 ) ); - mesh = trilist; + mesh = trilist; } - } else if ( meshDatabase.meshClass=="DomainMesh" ) { + } else if ( meshDatabase.meshClass == "DomainMesh" ) { std::vector range; std::vector N; silo::readUniformMesh( fid, database.name, range, N ); - auto rankinfo = silo::read( fid, database.name+"_rankinfo" ); + auto rankinfo = silo::read( fid, database.name + "_rankinfo" ); RankInfoStruct rank_data( rankinfo[0], rankinfo[1], rankinfo[2], rankinfo[3] ); - mesh.reset( new IO::DomainMesh( rank_data, N[0], N[1], N[2], range[1]-range[0], range[3]-range[2], range[5]-range[4] ) ); + mesh.reset( new IO::DomainMesh( rank_data, N[0], N[1], N[2], range[1] - range[0], + range[3] - range[2], range[5] - range[4] ) ); } else { - ERROR("Unknown mesh class"); + ERROR( "Unknown mesh class" ); } silo::close( fid ); #else - ERROR("Build without silo support"); + ERROR( "Build without silo support" ); #endif } else { - ERROR("Unknown format"); + ERROR( "Unknown format" ); } - PROFILE_STOP("getMesh"); + PROFILE_STOP( "getMesh" ); return mesh; } // Read the given variable for the given mesh domain -std::shared_ptr IO::getVariable( const std::string& path, const std::string& timestep, - const MeshDatabase& meshDatabase, int domain, const std::string& variable ) +std::shared_ptr IO::getVariable( const std::string &path, const std::string ×tep, + const MeshDatabase &meshDatabase, int domain, const std::string &variable ) { - std::pair key(meshDatabase.domains[domain].name,variable); - std::map,DatabaseEntry>::const_iterator it; - it = meshDatabase.variable_data.find(key); - if ( it==meshDatabase.variable_data.end() ) + std::pair key( meshDatabase.domains[domain].name, variable ); + auto it = meshDatabase.variable_data.find( key ); + if ( it == meshDatabase.variable_data.end() ) return std::shared_ptr(); std::shared_ptr var; - if ( meshDatabase.format == 2 ) { - const DatabaseEntry& database = it->second; - std::string filename = path + "/" + timestep + "/" + database.file; - FILE *fid = fopen(filename.c_str(),"rb"); - fseek(fid,database.offset,SEEK_SET); + if ( meshDatabase.format == FileFormat::NEW || meshDatabase.format == FileFormat::NEW_SINGLE ) { + const DatabaseEntry &database = it->second; + std::string filename = path + "/" + timestep + "/" + database.file; + FILE *fid = fopen( filename.c_str(), "rb" ); + fseek( fid, database.offset, SEEK_SET ); char line[1000]; - fgetl(line,1000,fid); - size_t i1 = find(line,':'); - size_t i2 = find(&line[i1+1],':')+i1+1; - std::vector values = splitList(&line[i2+1],','); - ASSERT(values.size()==5); - int dim = atoi(values[0].c_str()); - int type = atoi(values[1].c_str()); - size_t N = atol(values[2].c_str()); - size_t bytes = atol(values[3].c_str()); - std::string precision = values[4]; - var = std::shared_ptr( new IO::Variable() ); - var->dim = dim; - var->type = static_cast(type); - var->name = variable; - var->data.resize(N*dim); - if ( precision=="double" ) { - size_t count = fread(var->data.data(),sizeof(double),N*dim,fid); - ASSERT(count*sizeof(double)==bytes); + fgetl( line, 1000, fid ); + size_t i1 = find( line, ':' ); + size_t i2 = find( &line[i1 + 1], ':' ) + i1 + 1; + std::vector values = splitList( &line[i2 + 1], ',' ); + ASSERT( values.size() == 5 ); + int dim = atoi( values[0].c_str() ); + auto type = values[1]; + size_t N = atol( values[2].c_str() ); + size_t bytes = atol( values[3].c_str() ); + std::string precision = values[4]; + var = std::shared_ptr( new IO::Variable() ); + var->dim = dim; + var->type = getVariableType( type ); + var->name = variable; + var->data.resize( N, dim ); + if ( precision == "double" ) { + size_t count = fread( var->data.data(), sizeof( double ), N * dim, fid ); + ASSERT( count * sizeof( double ) == bytes ); } else { - ERROR("Format not implimented"); + ERROR( "Format not implimented" ); } - fclose(fid); - } else if ( meshDatabase.format == 4 ) { + fclose( fid ); + } else if ( meshDatabase.format == FileFormat::SILO ) { // Reading a silo file #ifdef USE_SILO - const auto& database = meshDatabase.domains[domain]; + const auto &database = meshDatabase.domains[domain]; auto variableDatabase = meshDatabase.getVariableDatabase( variable ); - std::string filename = path + "/" + timestep + "/" + database.file; - auto fid = silo::open( filename, silo::READ ); + std::string filename = path + "/" + timestep + "/" + database.file; + auto fid = silo::open( filename, silo::READ ); var.reset( new Variable( variableDatabase.dim, variableDatabase.type, variable ) ); - if ( meshDatabase.meshClass=="PointList" ) { + if ( meshDatabase.meshClass == "PointList" ) { var->data = silo::readPointMeshVariable( fid, variable ); - } else if ( meshDatabase.meshClass=="TriMesh" || meshDatabase.meshClass=="TriList" ) { + } else if ( meshDatabase.meshClass == "TriMesh" || meshDatabase.meshClass == "TriList" ) { var->data = silo::readTriMeshVariable( fid, variable ); - } else if ( meshDatabase.meshClass=="DomainMesh" ) { + } else if ( meshDatabase.meshClass == "DomainMesh" ) { var->data = silo::readUniformMeshVariable( fid, variable ); } else { - ERROR("Unknown mesh class"); + ERROR( "Unknown mesh class" ); } silo::close( fid ); #else - ERROR("Build without silo support"); + ERROR( "Build without silo support" ); #endif } else { - ERROR("Unknown format"); + ERROR( "Unknown format" ); } return var; } /**************************************************** -* Reformat the variable to match the mesh * -****************************************************/ -void IO::reformatVariable( const IO::Mesh& mesh, IO::Variable& var ) + * Reformat the variable to match the mesh * + ****************************************************/ +void IO::reformatVariable( const IO::Mesh &mesh, IO::Variable &var ) { if ( mesh.className() == "DomainMesh" ) { - const IO::DomainMesh& mesh2 = dynamic_cast( mesh ); + const IO::DomainMesh &mesh2 = dynamic_cast( mesh ); if ( var.type == VariableType::NodeVariable ) { - size_t N2 = var.data.length() / ((mesh2.nx+1)*(mesh2.ny+1)*(mesh2.nz+1)); - ASSERT( (mesh2.nx+1)*(mesh2.ny+1)*(mesh2.nz+1)*N2 == var.data.length() ); - var.data.reshape( { (size_t) mesh2.nx+1, (size_t) mesh2.ny+1, (size_t) mesh2.nz+1, N2 } ); + size_t N2 = + var.data.length() / ( ( mesh2.nx + 1 ) * ( mesh2.ny + 1 ) * ( mesh2.nz + 1 ) ); + ASSERT( + ( mesh2.nx + 1 ) * ( mesh2.ny + 1 ) * ( mesh2.nz + 1 ) * N2 == var.data.length() ); + var.data.reshape( + { (size_t) mesh2.nx + 1, (size_t) mesh2.ny + 1, (size_t) mesh2.nz + 1, N2 } ); } else if ( var.type == VariableType::EdgeVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var.type == VariableType::SurfaceVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var.type == VariableType::VolumeVariable ) { - size_t N2 = var.data.length() / (mesh2.nx*mesh2.ny*mesh2.nz); - ASSERT( mesh2.nx*mesh2.ny*mesh2.nz*N2 == var.data.length() ); + size_t N2 = var.data.length() / ( mesh2.nx * mesh2.ny * mesh2.nz ); + ASSERT( mesh2.nx * mesh2.ny * mesh2.nz * N2 == var.data.length() ); var.data.reshape( { (size_t) mesh2.nx, (size_t) mesh2.ny, (size_t) mesh2.nz, N2 } ); } else { - ERROR("Invalid variable type"); + ERROR( "Invalid variable type" ); } } else if ( mesh.className() == "PointList" ) { - const IO::PointList& mesh2 = dynamic_cast( mesh ); - size_t N = mesh2.points.size(); - size_t N_var = var.data.length()/N; - ASSERT( N*N_var == var.data.length() ); + const IO::PointList &mesh2 = dynamic_cast( mesh ); + size_t N = mesh2.points.size(); + size_t N_var = var.data.length() / N; + ASSERT( N * N_var == var.data.length() ); var.data.reshape( { N, N_var } ); - } else if ( mesh.className()=="TriMesh" || mesh.className() == "TriList" ) { - std::shared_ptr mesh_ptr( const_cast(&mesh), []( void* ) {} ); + } else if ( mesh.className() == "TriMesh" || mesh.className() == "TriList" ) { + std::shared_ptr mesh_ptr( const_cast( &mesh ), []( void * ) {} ); std::shared_ptr mesh2 = getTriMesh( mesh_ptr ); if ( var.type == VariableType::NodeVariable ) { - size_t N = mesh2->vertices->points.size(); - size_t N_var = var.data.length()/N; - ASSERT( N*N_var == var.data.length() ); + size_t N = mesh2->vertices->points.size(); + size_t N_var = var.data.length() / N; + ASSERT( N * N_var == var.data.length() ); var.data.reshape( { N, N_var } ); } else if ( var.type == VariableType::EdgeVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var.type == VariableType::SurfaceVariable ) { - ERROR("Not finished"); + ERROR( "Not finished" ); } else if ( var.type == VariableType::VolumeVariable ) { - size_t N = mesh2->A.size(); - size_t N_var = var.data.length()/N; - ASSERT( N*N_var == var.data.length() ); + size_t N = mesh2->A.size(); + size_t N_var = var.data.length() / N; + ASSERT( N * N_var == var.data.length() ); var.data.reshape( { N, N_var } ); } else { - ERROR("Invalid variable type"); + ERROR( "Invalid variable type" ); } } else { - ERROR("Unknown mesh type"); + ERROR( "Unknown mesh type" ); } } - - - diff --git a/IO/Reader.h b/IO/Reader.h index 4230ff8f..6542a2ea 100644 --- a/IO/Reader.h +++ b/IO/Reader.h @@ -14,20 +14,59 @@ namespace IO { //! Get the path to a file -std::string getPath( const std::string& filename ); +std::string getPath( const std::string &filename ); -//! List the timesteps in the given directors (dumps.LBPM) -std::vector readTimesteps( const std::string& filename ); +/*! + * @brief Get the maximum number of domains written + * @details This function reads the summary files to determine the maximum + * number of domains in the output. + * @param[in] path The path to use for reading + * @param[in] format The data format to use: + * old - Old mesh format (provided for backward compatibility) + * new - New format, 1 file/process + * silo - Silo + * auto - Auto-determin the format + * @param[in] comm Optional comm to use to reduce IO load by + * reading on rank 0 and then communicating the result + */ +int maxDomains( const std::string &path, const std::string &format = "auto", + const Utilities::MPI &comm = MPI_COMM_SELF ); + + +/*! + * @brief Read the timestep list + * @details This function reads the timestep list from the summary file. + * @param[in] path The path to use for reading + * @param[in] format The data format to use: + * old - Old mesh format (provided for backward compatibility) + * new - New format, 1 file/process + * silo - Silo + * auto - Auto-determin the format + * @return append Append any existing data (default is false) + */ +std::vector readTimesteps( + const std::string &path, const std::string &format = "auto" ); + + +/*! + * @brief Read the data for the timestep + * @details This function reads the mesh and variable data provided for the given timestep. + * @param[in] path The path to use for reading + * @param[in] timestep The timestep iteration + * @param[in] domain The desired domain to read + */ +std::vector readData( + const std::string &path, const std::string ×tep, int domain ); //! Read the list of mesh databases for the given timestep -std::vector getMeshList( const std::string& path, const std::string& timestep ); +std::vector getMeshList( const std::string &path, const std::string ×tep ); //! Read the given mesh domain -std::shared_ptr getMesh( const std::string& path, const std::string& timestep, - const MeshDatabase& meshDatabase, int domain ); +std::shared_ptr getMesh( const std::string &path, const std::string ×tep, + const MeshDatabase &meshDatabase, int domain ); /*! @@ -40,8 +79,8 @@ std::shared_ptr getMesh( const std::string& path, const std::string& t * @param[in] variable The variable name to read * @return Returns the variable data as a linear array */ -std::shared_ptr getVariable( const std::string& path, const std::string& timestep, - const MeshDatabase& meshDatabase, int domain, const std::string& variable ); +std::shared_ptr getVariable( const std::string &path, const std::string ×tep, + const MeshDatabase &meshDatabase, int domain, const std::string &variable ); /*! @@ -50,9 +89,9 @@ std::shared_ptr getVariable( const std::string& path, const std::s * @param[in] mesh The underlying mesh * @param[in/out] variable The variable name to read */ -void reformatVariable( const IO::Mesh& mesh, IO::Variable& var ); +void reformatVariable( const IO::Mesh &mesh, IO::Variable &var ); -} // IO namespace +} // namespace IO #endif diff --git a/IO/Writer.cpp b/IO/Writer.cpp index 61c333af..051db47d 100644 --- a/IO/Writer.cpp +++ b/IO/Writer.cpp @@ -1,28 +1,69 @@ #include "IO/Writer.h" -#include "IO/MeshDatabase.h" #include "IO/IOHelpers.h" +#include "IO/MeshDatabase.h" #include "IO/silo.h" #include "common/MPI.h" #include "common/Utilities.h" -#include #include -#include -#include #include +#include +#include +#include - -enum class Format { OLD, NEW, SILO, UNKNOWN }; - +enum class Format { OLD, NEW, SILO, UNKNOWN }; /**************************************************** -* Initialize the writer * -****************************************************/ + * Recursively create the subdirectory * + ****************************************************/ +static void recursiveMkdir( const std::string &path, mode_t mode ) +{ + // Iterate through the root directories until we create the desired path + for ( size_t pos = 0; pos < path.size(); ) { + // slide backwards in string until next slash found + pos++; + for ( ; pos < path.size(); pos++ ) { + if ( path[pos] == '/' || path[pos] == 92 ) + break; + } + // Create the temporary path + auto path2 = path.substr( 0, pos ); + // Check if the temporary path exists + struct stat status; + int result = stat( path2.data(), &status ); + if ( result == 0 ) { + // if there is a part of the path that already exists make sure it is really a directory + if ( !S_ISDIR( status.st_mode ) ) { + ERROR( + "Error in recursiveMkdir...\n" + " Cannot create directories in path = " + + path + + "\n because some intermediate item in path exists and is NOT a directory" ); + } + continue; + } + // Create the directory and test the result + result = mkdir( path2.data(), mode ); + if ( result != 0 ) { + // Maybe another rank created the directory, check + int result = stat( path2.data(), &status ); + if ( result != 0 && !S_ISDIR( status.st_mode ) ) + ERROR( "Error in Utilities::recursiveMkdir...\n" + " Cannot create directory = " + + path2 ); + } + } +} + + +/**************************************************** + * Initialize the writer * + ****************************************************/ static std::string global_IO_path; static Format global_IO_format = Format::UNKNOWN; -void IO::initialize( const std::string& path, const std::string& format, bool append ) +void IO::initialize( const std::string &path, const std::string &format, bool append ) { if ( path.empty() ) global_IO_path = "."; @@ -35,161 +76,168 @@ void IO::initialize( const std::string& path, const std::string& format, bool ap else if ( format == "silo" ) global_IO_format = Format::SILO; else - ERROR("Unknown format"); - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); - if ( !append && rank==0 ) { - mkdir(path.c_str(),S_IRWXU|S_IRGRP); + ERROR( "Unknown format" ); + int rank = Utilities::MPI( MPI_COMM_WORLD ).getRank(); + if ( !append && rank == 0 ) { + recursiveMkdir( path, S_IRWXU | S_IRGRP ); std::string filename; - if ( global_IO_format==Format::OLD || global_IO_format==Format::NEW ) + if ( global_IO_format == Format::OLD || global_IO_format == Format::NEW ) filename = global_IO_path + "/summary.LBM"; - else if ( global_IO_format==Format::SILO ) + else if ( global_IO_format == Format::SILO ) filename = global_IO_path + "/LBM.visit"; else - ERROR("Unknown format"); - auto fid = fopen(filename.c_str(),"wb"); - fclose(fid); + ERROR( "Unknown format" ); + auto fid = fopen( filename.c_str(), "wb" ); + fclose( fid ); } } // Write the mesh data in the original format -static std::vector writeMeshesOrigFormat( const std::vector& meshData, const std::string& path ) +static std::vector writeMeshesOrigFormat( + const std::vector &meshData, const std::string &path, int rank ) { - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); std::vector meshes_written; - for (size_t i=0; i mesh = meshData[i].mesh; IO::MeshDatabase mesh_entry; - mesh_entry.name = meshData[i].meshName; - mesh_entry.type = meshType(*mesh); + mesh_entry.name = meshData[i].meshName; + mesh_entry.type = meshType( *mesh ); mesh_entry.meshClass = meshData[i].mesh->className(); - mesh_entry.format = 1; + mesh_entry.format = IO::FileFormat::OLD; IO::DatabaseEntry domain; - domain.name = domainname; - domain.file = filename; + domain.name = domainname; + domain.file = filename; domain.offset = 0; - mesh_entry.domains.push_back(domain); + mesh_entry.domains.push_back( domain ); if ( !meshData[i].vars.empty() ) { - printf("Warning: variables are not supported with this format\n"); - //for (size_t j=0; jname ); } const std::string meshClass = mesh->className(); - if ( meshClass=="PointList" ) { + if ( meshClass == "PointList" ) { // List of points - std::shared_ptr pointlist = std::dynamic_pointer_cast(mesh); - const std::vector& P = pointlist->points; - for (size_t i=0; i pointlist = + std::dynamic_pointer_cast( mesh ); + const std::vector &P = pointlist->points; + for ( size_t i = 0; i < P.size(); i++ ) { double x[3]; - x[0] = P[i].x; x[1] = P[i].y; x[2] = P[i].z; - fwrite(x,sizeof(double),3,fid); + x[0] = P[i].x; + x[1] = P[i].y; + x[2] = P[i].z; + fwrite( x, sizeof( double ), 3, fid ); } - } else if ( meshClass=="TriList" || meshClass=="TriMesh" ) { + } else if ( meshClass == "TriList" || meshClass == "TriMesh" ) { // Triangle mesh - std::shared_ptr trilist = IO::getTriList(mesh); - const std::vector& A = trilist->A; - const std::vector& B = trilist->B; - const std::vector& C = trilist->C; - for (size_t i=0; i trilist = IO::getTriList( mesh ); + const std::vector &A = trilist->A; + const std::vector &B = trilist->B; + const std::vector &C = trilist->C; + for ( size_t i = 0; i < A.size(); i++ ) { double tri[9]; - tri[0] = A[i].x; tri[1] = A[i].y; tri[2] = A[i].z; - tri[3] = B[i].x; tri[4] = B[i].y; tri[5] = B[i].z; - tri[6] = C[i].x; tri[7] = C[i].y; tri[8] = C[i].z; - fwrite(tri,sizeof(double),9,fid); + tri[0] = A[i].x; + tri[1] = A[i].y; + tri[2] = A[i].z; + tri[3] = B[i].x; + tri[4] = B[i].y; + tri[5] = B[i].z; + tri[6] = C[i].x; + tri[7] = C[i].y; + tri[8] = C[i].z; + fwrite( tri, sizeof( double ), 9, fid ); } - } else if ( meshClass=="DomainMesh" ) { + } else if ( meshClass == "DomainMesh" ) { // This format was never supported with the old format } else { - ERROR("Unknown mesh"); + ERROR( "Unknown mesh" ); } - fclose(fid); + fclose( fid ); std::sort( mesh_entry.variables.begin(), mesh_entry.variables.end() ); - mesh_entry.variables.erase( std::unique( mesh_entry.variables.begin(), mesh_entry.variables.end() ), mesh_entry.variables.end() ); - meshes_written.push_back(mesh_entry); + mesh_entry.variables.erase( + std::unique( mesh_entry.variables.begin(), mesh_entry.variables.end() ), + mesh_entry.variables.end() ); + meshes_written.push_back( mesh_entry ); } return meshes_written; } // Create the database entry for the mesh data -static IO::MeshDatabase getDatabase( const std::string& filename, const IO::MeshDataStruct& mesh, int format ) +static IO::MeshDatabase getDatabase( + const std::string &filename, const IO::MeshDataStruct &mesh, IO::FileFormat format, int rank ) { - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); char domainname[100]; - sprintf(domainname,"%s_%05i",mesh.meshName.c_str(),rank); + sprintf( domainname, "%s_%05i", mesh.meshName.c_str(), rank ); // Create the MeshDatabase IO::MeshDatabase database; - database.name = mesh.meshName; - database.type = meshType(*(mesh.mesh)); + database.name = mesh.meshName; + database.type = meshType( *( mesh.mesh ) ); database.meshClass = mesh.mesh->className(); - database.format = format; + database.format = format; // Write the mesh IO::DatabaseEntry domain; - domain.name = domainname; - domain.file = filename; + domain.name = domainname; + domain.file = filename; domain.offset = -1; - database.domains.push_back(domain); + database.domains.push_back( domain ); // Write the variables - for (size_t i=0; iname; info.type = mesh.vars[i]->type; - info.dim = mesh.vars[i]->dim; - database.variables.push_back(info); + info.dim = mesh.vars[i]->dim; + database.variables.push_back( info ); // Add domain variable info IO::DatabaseEntry variable; - variable.name = mesh.vars[i]->name; - variable.file = filename; + variable.name = mesh.vars[i]->name; + variable.file = filename; variable.offset = -1; - std::pair key(domain.name,mesh.vars[i]->name); - database.variable_data.insert( - std::pair,IO::DatabaseEntry>(key,variable) ); + std::pair key( domain.name, mesh.vars[i]->name ); + database.variable_data.insert( + std::pair, IO::DatabaseEntry>( key, variable ) ); } return database; } // Write a mesh (and variables) to a file -static IO::MeshDatabase write_domain( FILE *fid, const std::string& filename, - const IO::MeshDataStruct& mesh, int format ) +static IO::MeshDatabase write_domain( FILE *fid, const std::string &filename, + const IO::MeshDataStruct &mesh, IO::FileFormat format, int rank ) { const int level = 0; - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); // Create the MeshDatabase - IO::MeshDatabase database = getDatabase( filename, mesh, format ); + IO::MeshDatabase database = getDatabase( filename, mesh, format, rank ); // Write the mesh - IO::DatabaseEntry& domain = database.domains[0]; - domain.offset = ftell(fid); - std::pair data = mesh.mesh->pack(level); - fprintf(fid,"Mesh: %s-%05i: %lu\n",mesh.meshName.c_str(),rank,data.first); - fwrite(data.second,1,data.first,fid); - fprintf(fid,"\n"); - delete [] (char*) data.second; + IO::DatabaseEntry &domain = database.domains[0]; + domain.offset = ftell( fid ); + std::pair data = mesh.mesh->pack( level ); + fprintf( fid, "Mesh: %s-%05i: %lu\n", mesh.meshName.c_str(), rank, data.first ); + fwrite( data.second, 1, data.first, fid ); + fprintf( fid, "\n" ); + delete[]( char * ) data.second; // Write the variables - for (size_t i=0; i key(domain.name,mesh.vars[i]->name); - IO::DatabaseEntry& variable = database.variable_data[key]; - variable.offset = ftell(fid); - int dim = mesh.vars[i]->dim; - int type = static_cast(mesh.vars[i]->type); - size_t N = mesh.vars[i]->data.length(); - if ( type == static_cast(IO::VariableType::NullVariable) ) { - ERROR("Variable type not set"); - } - size_t N_mesh = mesh.mesh->numberPointsVar(mesh.vars[i]->type); - ASSERT(N==dim*N_mesh); - fprintf(fid,"Var: %s-%05i-%s: %i, %i, %lu, %lu, double\n", - database.name.c_str(), rank, variable.name.c_str(), - dim, type, N_mesh, N*sizeof(double) ); - fwrite(mesh.vars[i]->data.data(),sizeof(double),N,fid); - fprintf(fid,"\n"); + for ( size_t i = 0; i < mesh.vars.size(); i++ ) { + ASSERT( mesh.vars[i]->type != IO::VariableType::NullVariable ); + std::pair key( domain.name, mesh.vars[i]->name ); + auto &variable = database.variable_data[key]; + variable.offset = ftell( fid ); + int dim = mesh.vars[i]->dim; + auto type = getString( mesh.vars[i]->type ); + size_t N = mesh.vars[i]->data.length(); + size_t N_mesh = mesh.mesh->numberPointsVar( mesh.vars[i]->type ); + ASSERT( N == dim * N_mesh ); + fprintf( fid, "Var: %s-%05i-%s: %i, %s, %lu, %lu, double\n", database.name.c_str(), rank, + variable.name.c_str(), dim, type.data(), N_mesh, N * sizeof( double ) ); + fwrite( mesh.vars[i]->data.data(), sizeof( double ), N, fid ); + fprintf( fid, "\n" ); } return database; } @@ -198,72 +246,74 @@ static IO::MeshDatabase write_domain( FILE *fid, const std::string& filename, #ifdef USE_SILO // Write a PointList mesh (and variables) to a file template -static void writeSiloPointMesh( DBfile *fid, const IO::PointList& mesh, const std::string& meshname ) +static void writeSiloPointMesh( + DBfile *fid, const IO::PointList &mesh, const std::string &meshname ) { - const auto& points = mesh.getPoints(); - std::vector x(points.size()), y(points.size()), z(points.size()); - for (size_t i=0; i x( points.size() ), y( points.size() ), z( points.size() ); + for ( size_t i = 0; i < x.size(); i++ ) { x[i] = points[i].x; y[i] = points[i].y; z[i] = points[i].z; } const TYPE *coords[] = { x.data(), y.data(), z.data() }; - silo::writePointMesh( fid, meshname, 3, points.size(), coords ); + IO::silo::writePointMesh( fid, meshname, 3, points.size(), coords ); } -static void writeSiloPointList( DBfile *fid, const IO::MeshDataStruct& meshData, IO::MeshDatabase database ) +static void writeSiloPointList( + DBfile *fid, const IO::MeshDataStruct &meshData, IO::MeshDatabase database ) { - const IO::PointList& mesh = dynamic_cast( *meshData.mesh ); + const IO::PointList &mesh = dynamic_cast( *meshData.mesh ); const std::string meshname = database.domains[0].name; if ( meshData.precision == IO::DataType::Double ) { writeSiloPointMesh( fid, mesh, meshname ); } else if ( meshData.precision == IO::DataType::Float ) { writeSiloPointMesh( fid, mesh, meshname ); } else { - ERROR("Unsupported format"); + ERROR( "Unsupported format" ); } - const auto& points = mesh.getPoints(); - std::vector x(points.size()), y(points.size()), z(points.size()); - for (size_t i=0; i x( points.size() ), y( points.size() ), z( points.size() ); + for ( size_t i = 0; i < x.size(); i++ ) { x[i] = points[i].x; y[i] = points[i].y; z[i] = points[i].z; } const double *coords[] = { x.data(), y.data(), z.data() }; - silo::writePointMesh( fid, meshname, 3, points.size(), coords ); - for (size_t i=0; i data2( var.data.size() ); data2.copy( var.data ); - silo::writePointMeshVariable( fid, meshname, var.name, data2 ); + IO::silo::writePointMeshVariable( fid, meshname, var.name, data2 ); } else if ( var.precision == IO::DataType::Int ) { Array data2( var.data.size() ); data2.copy( var.data ); - silo::writePointMeshVariable( fid, meshname, var.name, data2 ); + IO::silo::writePointMeshVariable( fid, meshname, var.name, data2 ); } else { - ERROR("Unsupported format"); + ERROR( "Unsupported format" ); } } } // Write a TriMesh mesh (and variables) to a file template -static void writeSiloTriMesh( DBfile *fid, const IO::TriMesh& mesh, const std::string& meshname ) +static void writeSiloTriMesh( DBfile *fid, const IO::TriMesh &mesh, const std::string &meshname ) { - const auto& points = mesh.vertices->getPoints(); - std::vector x(points.size()), y(points.size()), z(points.size()); - for (size_t i=0; igetPoints(); + std::vector x( points.size() ), y( points.size() ), z( points.size() ); + for ( size_t i = 0; i < x.size(); i++ ) { x[i] = points[i].x; y[i] = points[i].y; z[i] = points[i].z; } const TYPE *coords[] = { x.data(), y.data(), z.data() }; - const int *tri[] = { mesh.A.data(), mesh.B.data(), mesh.C.data() }; - silo::writeTriMesh( fid, meshname, 3, 2, points.size(), coords, mesh.A.size(), tri ); + const int *tri[] = { mesh.A.data(), mesh.B.data(), mesh.C.data() }; + IO::silo::writeTriMesh( fid, meshname, 3, 2, points.size(), coords, mesh.A.size(), tri ); } -static void writeSiloTriMesh2( DBfile *fid, const IO::MeshDataStruct& meshData, - const IO::TriMesh& mesh, IO::MeshDatabase database ) +static void writeSiloTriMesh2( DBfile *fid, const IO::MeshDataStruct &meshData, + const IO::TriMesh &mesh, IO::MeshDatabase database ) { const std::string meshname = database.domains[0].name; if ( meshData.precision == IO::DataType::Double ) { @@ -271,238 +321,240 @@ static void writeSiloTriMesh2( DBfile *fid, const IO::MeshDataStruct& meshData, } else if ( meshData.precision == IO::DataType::Float ) { writeSiloTriMesh( fid, mesh, meshname ); } else { - ERROR("Unsupported format"); + ERROR( "Unsupported format" ); } - for (size_t i=0; i( var.type ); + for ( size_t i = 0; i < meshData.vars.size(); i++ ) { + const IO::Variable &var = *meshData.vars[i]; if ( var.precision == IO::DataType::Double ) { - silo::writeTriMeshVariable( fid, 3, meshname, var.name, var.data, type ); + IO::silo::writeTriMeshVariable( fid, 3, meshname, var.name, var.data, var.type ); } else if ( var.precision == IO::DataType::Float ) { Array data2( var.data.size() ); data2.copy( var.data ); - silo::writeTriMeshVariable( fid, 3, meshname, var.name, data2, type ); + IO::silo::writeTriMeshVariable( fid, 3, meshname, var.name, data2, var.type ); } else if ( var.precision == IO::DataType::Int ) { Array data2( var.data.size() ); data2.copy( var.data ); - silo::writeTriMeshVariable( fid, 3, meshname, var.name, data2, type ); + IO::silo::writeTriMeshVariable( fid, 3, meshname, var.name, data2, var.type ); } else { - ERROR("Unsupported format"); + ERROR( "Unsupported format" ); } } } -static void writeSiloTriMesh( DBfile *fid, const IO::MeshDataStruct& meshData, IO::MeshDatabase database ) +static void writeSiloTriMesh( + DBfile *fid, const IO::MeshDataStruct &meshData, IO::MeshDatabase database ) { - const IO::TriMesh& mesh = dynamic_cast( *meshData.mesh ); + const IO::TriMesh &mesh = dynamic_cast( *meshData.mesh ); writeSiloTriMesh2( fid, meshData, mesh, database ); } -static void writeSiloTriList( DBfile *fid, const IO::MeshDataStruct& meshData, IO::MeshDatabase database ) +static void writeSiloTriList( + DBfile *fid, const IO::MeshDataStruct &meshData, IO::MeshDatabase database ) { auto mesh = getTriMesh( meshData.mesh ); writeSiloTriMesh2( fid, meshData, *mesh, database ); } // Write a DomainMesh mesh (and variables) to a file -static void writeSiloDomainMesh( DBfile *fid, const IO::MeshDataStruct& meshData, IO::MeshDatabase database ) +static void writeSiloDomainMesh( + DBfile *fid, const IO::MeshDataStruct &meshData, IO::MeshDatabase database ) { - const IO::DomainMesh& mesh = dynamic_cast( *meshData.mesh ); + const IO::DomainMesh &mesh = dynamic_cast( *meshData.mesh ); RankInfoStruct info( mesh.rank, mesh.nprocx, mesh.nprocy, mesh.nprocz ); - std::array range = { info.ix*mesh.Lx/info.nx, (info.ix+1)*mesh.Lx/info.nx, - info.jy*mesh.Ly/info.ny, (info.jy+1)*mesh.Ly/info.ny, - info.kz*mesh.Lz/info.nz, (info.kz+1)*mesh.Lz/info.nz }; - std::array N = { mesh.nx, mesh.ny, mesh.nz }; - auto meshname = database.domains[0].name; - silo::writeUniformMesh<3>( fid, meshname, range, N ); - silo::write( fid, meshname+"_rankinfo", { mesh.rank, mesh.nprocx, mesh.nprocy, mesh.nprocz } ); - for (size_t i=0; i( var.type ); + std::array range = { info.ix * mesh.Lx / info.nx, + ( info.ix + 1 ) * mesh.Lx / info.nx, info.jy * mesh.Ly / info.ny, + ( info.jy + 1 ) * mesh.Ly / info.ny, info.kz * mesh.Lz / info.nz, + ( info.kz + 1 ) * mesh.Lz / info.nz }; + std::array N = { mesh.nx, mesh.ny, mesh.nz }; + auto meshname = database.domains[0].name; + IO::silo::writeUniformMesh<3>( fid, meshname, range, N ); + IO::silo::write( + fid, meshname + "_rankinfo", { mesh.rank, mesh.nprocx, mesh.nprocy, mesh.nprocz } ); + for ( size_t i = 0; i < meshData.vars.size(); i++ ) { + const auto &var = *meshData.vars[i]; if ( var.precision == IO::DataType::Double ) { - silo::writeUniformMeshVariable<3>( fid, meshname, N, var.name, var.data, type ); + IO::silo::writeUniformMeshVariable<3>( fid, meshname, N, var.name, var.data, var.type ); } else if ( var.precision == IO::DataType::Float ) { Array data2( var.data.size() ); data2.copy( var.data ); - silo::writeUniformMeshVariable<3>( fid, meshname, N, var.name, data2, type ); + IO::silo::writeUniformMeshVariable<3>( fid, meshname, N, var.name, data2, var.type ); } else if ( var.precision == IO::DataType::Int ) { Array data2( var.data.size() ); data2.copy( var.data ); - silo::writeUniformMeshVariable<3>( fid, meshname, N, var.name, data2, type ); + IO::silo::writeUniformMeshVariable<3>( fid, meshname, N, var.name, data2, var.type ); } else { - ERROR("Unsupported format"); + ERROR( "Unsupported format" ); } } } // Write a mesh (and variables) to a file -static IO::MeshDatabase write_domain_silo( DBfile *fid, const std::string& filename, - const IO::MeshDataStruct& mesh, int format ) +static IO::MeshDatabase write_domain_silo( DBfile *fid, const std::string &filename, + const IO::MeshDataStruct &mesh, IO::FileFormat format, int rank ) { // Create the MeshDatabase - auto database = getDatabase( filename, mesh, format ); - if ( database.meshClass=="PointList" ) { + auto database = getDatabase( filename, mesh, format, rank ); + if ( database.meshClass == "PointList" ) { writeSiloPointList( fid, mesh, database ); - } else if ( database.meshClass=="TriMesh" ) { + } else if ( database.meshClass == "TriMesh" ) { writeSiloTriMesh( fid, mesh, database ); - } else if ( database.meshClass=="TriList" ) { + } else if ( database.meshClass == "TriList" ) { writeSiloTriList( fid, mesh, database ); - } else if ( database.meshClass=="DomainMesh" ) { + } else if ( database.meshClass == "DomainMesh" ) { writeSiloDomainMesh( fid, mesh, database ); } else { - ERROR("Unknown mesh class"); + ERROR( "Unknown mesh class" ); } return database; } // Write the summary file for silo -std::pair getSiloMeshType( const std::string& meshClass ) +std::pair getSiloMeshType( const std::string &meshClass ) { int meshType = 0; - int varType = 0; - if ( meshClass=="PointList" ) { + int varType = 0; + if ( meshClass == "PointList" ) { meshType = DB_POINTMESH; varType = DB_POINTVAR; - } else if ( meshClass=="TriMesh" ) { + } else if ( meshClass == "TriMesh" ) { meshType = DB_UCDMESH; varType = DB_UCDVAR; - } else if ( meshClass=="TriList" ) { + } else if ( meshClass == "TriList" ) { meshType = DB_UCDMESH; varType = DB_UCDVAR; - } else if ( meshClass=="DomainMesh" ) { + } else if ( meshClass == "DomainMesh" ) { meshType = DB_QUAD_RECT; varType = DB_QUADVAR; } else { - ERROR("Unknown mesh class"); + ERROR( "Unknown mesh class" ); } return std::make_pair( meshType, varType ); } -void writeSiloSummary( const std::vector& meshes_written, const std::string& filename ) +void writeSiloSummary( + const std::vector &meshes_written, const std::string &filename ) { - auto fid = silo::open( filename, silo::CREATE ); - for ( const auto& data : meshes_written ) { + auto fid = IO::silo::open( filename, IO::silo::CREATE ); + for ( const auto &data : meshes_written ) { auto type = getSiloMeshType( data.meshClass ); std::vector meshTypes( data.domains.size(), type.first ); std::vector varTypes( data.domains.size(), type.second ); std::vector meshNames; - for ( const auto& tmp : data.domains ) + for ( const auto &tmp : data.domains ) meshNames.push_back( tmp.file + ":" + tmp.name ); - silo::writeMultiMesh( fid, data.name, meshNames, meshTypes ); - for (const auto& variable : data.variables ) { + IO::silo::writeMultiMesh( fid, data.name, meshNames, meshTypes ); + for ( const auto &variable : data.variables ) { std::vector varnames; - for ( const auto& tmp : data.domains ) + for ( const auto &tmp : data.domains ) varnames.push_back( tmp.file + ":" + variable.name ); - silo::writeMultiVar( fid, variable.name, varnames, varTypes ); + IO::silo::writeMultiVar( fid, variable.name, varnames, varTypes ); } } - silo::close( fid ); + IO::silo::close( fid ); } #endif // Write the mesh data in the new format -static std::vector writeMeshesNewFormat( - const std::vector& meshData, const std::string& path, int format ) +static std::vector writeMeshesNewFormat( + const std::vector &meshData, const std::string &path, IO::FileFormat format, + int rank ) { - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); std::vector meshes_written; char filename[100], fullpath[200]; - sprintf(filename,"%05i",rank); - sprintf(fullpath,"%s/%s",path.c_str(),filename); - FILE *fid = fopen(fullpath,"wb"); - for (size_t i=0; i mesh = meshData[i].mesh; - meshes_written.push_back( write_domain(fid,filename,meshData[i],format) ); + meshes_written.push_back( write_domain( fid, filename, meshData[i], format, rank ) ); } - fclose(fid); + fclose( fid ); return meshes_written; } // Write the mesh data to silo -static std::vector writeMeshesSilo( - const std::vector& meshData, const std::string& path, int format ) +static std::vector writeMeshesSilo( + const std::vector &meshData, const std::string &path, IO::FileFormat format, + int rank ) { #ifdef USE_SILO - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); std::vector meshes_written; char filename[100], fullpath[200]; - sprintf(filename,"%05i.silo",rank); - sprintf(fullpath,"%s/%s",path.c_str(),filename); - auto fid = silo::open( fullpath, silo::CREATE ); - for (size_t i=0; i(); #endif -} +} /**************************************************** -* Write the mesh data * -****************************************************/ -void IO::writeData( const std::string& subdir, const std::vector& meshData, const Utilities::MPI& comm ) + * Write the mesh data * + ****************************************************/ +void IO::writeData( const std::string &subdir, const std::vector &meshData, + const Utilities::MPI &comm ) { if ( global_IO_path.empty() ) - IO::initialize( ); - PROFILE_START("writeData"); - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); + IO::initialize(); + PROFILE_START( "writeData" ); + int rank = Utilities::MPI( MPI_COMM_WORLD ).getRank(); // Check the meshData before writing - for ( const auto& data : meshData ) { - if ( !data.check() ) - ERROR("Error in meshData"); - } + for ( const auto &data : meshData ) + ASSERT( data.check() ); // Create the output directory std::string path = global_IO_path + "/" + subdir; - if ( rank == 0 ) { - mkdir(path.c_str(),S_IRWXU|S_IRGRP); - } - comm.barrier(); + recursiveMkdir( path, S_IRWXU | S_IRGRP ); // Write the mesh files std::vector meshes_written; if ( global_IO_format == Format::OLD ) { // Write the original triangle format - meshes_written = writeMeshesOrigFormat( meshData, path ); + meshes_written = writeMeshesOrigFormat( meshData, path, rank ); } else if ( global_IO_format == Format::NEW ) { // Write the new format (double precision) - meshes_written = writeMeshesNewFormat( meshData, path, 2 ); + meshes_written = writeMeshesNewFormat( meshData, path, IO::FileFormat::NEW, rank ); } else if ( global_IO_format == Format::SILO ) { // Write silo - meshes_written = writeMeshesSilo( meshData, path, 4 ); + meshes_written = writeMeshesSilo( meshData, path, IO::FileFormat::SILO, rank ); } else { - ERROR("Unknown format"); + ERROR( "Unknown format" ); } // Gather a complete list of files on rank 0 - meshes_written = gatherAll(meshes_written,comm); + meshes_written = gatherAll( meshes_written, comm ); // Write the summary files if ( rank == 0 ) { // Write the summary file for the current timestep char filename[200]; - sprintf(filename,"%s/LBM.summary",path.c_str()); - write(meshes_written,filename); - // Write summary silo file if needed - #ifdef USE_SILO + sprintf( filename, "%s/LBM.summary", path.c_str() ); + write( meshes_written, filename ); +// Write summary silo file if needed +#ifdef USE_SILO if ( global_IO_format == Format::SILO ) { - sprintf(filename,"%s/summary.silo",path.c_str()); - writeSiloSummary(meshes_written,filename); + sprintf( filename, "%s/summary.silo", path.c_str() ); + writeSiloSummary( meshes_written, filename ); } - #endif +#endif // Add the timestep to the global summary file if ( global_IO_format == Format::OLD || global_IO_format == Format::NEW ) { - auto filename = global_IO_path+"/summary.LBM"; - FILE *fid = fopen(filename.c_str(),"ab"); - fprintf(fid,"%s/\n",subdir.c_str()); - fclose(fid); + auto filename = global_IO_path + "/summary.LBM"; + FILE *fid = fopen( filename.c_str(), "ab" ); + fprintf( fid, "%s/\n", subdir.c_str() ); + fclose( fid ); } else if ( global_IO_format == Format::SILO ) { - auto filename = global_IO_path+"/LBM.visit"; - FILE *fid = fopen(filename.c_str(),"ab"); - fprintf(fid,"%s/summary.silo\n",subdir.c_str()); - fclose(fid); + auto filename = global_IO_path + "/LBM.visit"; + FILE *fid = fopen( filename.c_str(), "ab" ); + fprintf( fid, "%s/summary.silo\n", subdir.c_str() ); + fclose( fid ); } else { - ERROR("Unknown format"); + ERROR( "Unknown format" ); } } - PROFILE_STOP("writeData"); + PROFILE_STOP( "writeData" ); } - - diff --git a/IO/Writer.h b/IO/Writer.h index dfc22db8..c3d9d5bb 100644 --- a/IO/Writer.h +++ b/IO/Writer.h @@ -14,17 +14,18 @@ namespace IO { /*! * @brief Initialize the writer - * @details This function initializes the writer to the given path. All subsequent - * writes will occur in this directory. If this is not called, then it will default - * to the current path. + * @details This function initializes the writer to the given path. + * All subsequent writes will occur in this directory. + * If this is not called, then it will default to the current path. * @param[in] path The path to use for writes * @param[in] format The data format to use: - * old - Old mesh format (provided for backward compatibility, cannot write variables) - * new - New format, 1 file/process - * silo - Silo + * old - Old mesh format + * (provided for backward compatibility, cannot write variables) + * new - New format, 1 file/process silo - Silo * @param[in] append Append any existing data (default is false) */ -void initialize( const std::string& path="", const std::string& format="silo", bool append=false ); +void initialize( + const std::string &path = "", const std::string &format = "silo", bool append = false ); /*! @@ -34,7 +35,8 @@ void initialize( const std::string& path="", const std::string& format="silo", b * @param[in] meshData The data to write * @param[in] comm The comm to use for writing (usually MPI_COMM_WORLD or a dup thereof) */ -void writeData( const std::string& subdir, const std::vector& meshData, const Utilities::MPI& comm ); +void writeData( const std::string &subdir, const std::vector &meshData, + const Utilities::MPI &comm ); /*! @@ -44,14 +46,15 @@ void writeData( const std::string& subdir, const std::vector * @param[in] meshData The data to write * @param[in] comm The comm to use for writing (usually MPI_COMM_WORLD or a dup thereof) */ -inline void writeData( int timestep, const std::vector& meshData, const Utilities::MPI& comm ) +inline void writeData( + int timestep, const std::vector &meshData, const Utilities::MPI &comm ) { char subdir[100]; - sprintf(subdir,"vis%03i",timestep); + sprintf( subdir, "vis%03i", timestep ); writeData( subdir, meshData, comm ); } -} // IO namespace +} // namespace IO #endif diff --git a/IO/netcdf.cpp b/IO/netcdf.cpp index 6c3773e3..06f41dba 100644 --- a/IO/netcdf.cpp +++ b/IO/netcdf.cpp @@ -1,6 +1,6 @@ #include "IO/netcdf.h" -#include "common/Utilities.h" #include "common/MPI.h" +#include "common/Utilities.h" #include "ProfilerApp.h" @@ -12,14 +12,14 @@ #include -#define CHECK_NC_ERR( ERR ) \ - do { \ - if ( ERR != NC_NOERR ) { \ +#define CHECK_NC_ERR( ERR ) \ + do { \ + if ( ERR != NC_NOERR ) { \ std::string msg = "Error calling netcdf routine: "; \ - msg += nc_strerror( ERR ); \ - ERROR( msg ); \ - } \ - } while (0) + msg += nc_strerror( ERR ); \ + ERROR( msg ); \ + } \ + } while ( 0 ) namespace netcdf { @@ -50,43 +50,64 @@ static inline VariableType convertType( nc_type type ) else if ( type == NC_DOUBLE ) type2 = DOUBLE; else - ERROR("Unknown type"); + ERROR( "Unknown type" ); return type2; } // Get nc_type from the template -template inline nc_type getType(); -template<> inline nc_type getType() { return NC_CHAR; } -template<> inline nc_type getType() { return NC_SHORT; } -template<> inline nc_type getType() { return NC_INT; } -template<> inline nc_type getType() { return NC_FLOAT; } -template<> inline nc_type getType() { return NC_DOUBLE; } +template +inline nc_type getType(); +template<> +inline nc_type getType() +{ + return NC_CHAR; +} +template<> +inline nc_type getType() +{ + return NC_SHORT; +} +template<> +inline nc_type getType() +{ + return NC_INT; +} +template<> +inline nc_type getType() +{ + return NC_FLOAT; +} +template<> +inline nc_type getType() +{ + return NC_DOUBLE; +} // Function to reverse an array template -inline std::vector reverse( const std::vector& x ) +inline std::vector reverse( const std::vector &x ) { - std::vector y(x.size()); - for (size_t i=0; i y( x.size() ); + for ( size_t i = 0; i < x.size(); i++ ) + y[i] = x[x.size() - i - 1]; return y; } // Function to reverse an array template -inline std::vector convert( const std::vector& x ) +inline std::vector convert( const std::vector &x ) { - std::vector y(x.size()); - for (size_t i=0; i(x[i]); + std::vector y( x.size() ); + for ( size_t i = 0; i < x.size(); i++ ) + y[i] = static_cast( x[i] ); return y; } /**************************************************** -* Convert the VariableType to a string * -****************************************************/ + * Convert the VariableType to a string * + ****************************************************/ std::string VariableTypeName( VariableType type ) { if ( type == BYTE ) @@ -114,9 +135,9 @@ std::string VariableTypeName( VariableType type ) /**************************************************** -* Open/close a file * -****************************************************/ -int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm ) + * Open/close a file * + ****************************************************/ +int open( const std::string &filename, FileMode mode, const Utilities::MPI &comm ) { int fid = 0; if ( comm.isNull() ) { @@ -127,23 +148,26 @@ int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm int err = nc_open( filename.c_str(), NC_WRITE, &fid ); CHECK_NC_ERR( err ); } else if ( mode == CREATE ) { - int err = nc_create( filename.c_str(), NC_SHARE|NC_64BIT_OFFSET, &fid ); + int err = nc_create( filename.c_str(), NC_SHARE | NC_64BIT_OFFSET, &fid ); CHECK_NC_ERR( err ); } else { - ERROR("Unknown file mode"); + ERROR( "Unknown file mode" ); } } else { if ( mode == READ ) { - int err = nc_open_par( filename.c_str(), NC_MPIPOSIX, comm.getCommunicator(), MPI_INFO_NULL, &fid ); + int err = nc_open_par( + filename.c_str(), NC_MPIPOSIX, comm.getCommunicator(), MPI_INFO_NULL, &fid ); CHECK_NC_ERR( err ); } else if ( mode == WRITE ) { - int err = nc_open_par( filename.c_str(), NC_WRITE|NC_MPIPOSIX, comm.getCommunicator(), MPI_INFO_NULL, &fid ); + int err = nc_open_par( filename.c_str(), NC_WRITE | NC_MPIPOSIX, comm.getCommunicator(), + MPI_INFO_NULL, &fid ); CHECK_NC_ERR( err ); } else if ( mode == CREATE ) { - int err = nc_create_par( filename.c_str(), NC_NETCDF4|NC_MPIIO, comm.getCommunicator(), MPI_INFO_NULL, &fid ); + int err = nc_create_par( filename.c_str(), NC_NETCDF4 | NC_MPIIO, + comm.getCommunicator(), MPI_INFO_NULL, &fid ); CHECK_NC_ERR( err ); } else { - ERROR("Unknown file mode"); + ERROR( "Unknown file mode" ); } } return fid; @@ -152,42 +176,42 @@ void close( int fid ) { int err = nc_close( fid ); if ( err != NC_NOERR ) - ERROR("Error closing file"); + ERROR( "Error closing file" ); } /**************************************************** -* Query basic properties * -****************************************************/ + * Query basic properties * + ****************************************************/ static std::vector getDimVar( int fid, int varid ) { int ndim = 0; - int err = nc_inq_varndims( fid, varid, &ndim ); + int err = nc_inq_varndims( fid, varid, &ndim ); CHECK_NC_ERR( err ); - std::vector dims(ndim,0); - int dimid[64] = {-1}; - err = nc_inq_vardimid( fid, varid, dimid ); + std::vector dims( ndim, 0 ); + int dimid[64] = { -1 }; + err = nc_inq_vardimid( fid, varid, dimid ); CHECK_NC_ERR( err ); - for (int i=0; i getVarDim( int fid, const std::string& var ) +std::vector getVarDim( int fid, const std::string &var ) { return getDimVar( fid, getVarID( fid, var ) ); } -std::vector getAttDim( int fid, const std::string& att ) +std::vector getAttDim( int fid, const std::string &att ) { - std::vector dim(1,0); + std::vector dim( 1, 0 ); int err = nc_inq_attlen( fid, NC_GLOBAL, att.c_str(), dim.data() ); CHECK_NC_ERR( err ); return dim; @@ -197,9 +221,9 @@ std::vector getVarNames( int fid ) int nvar; int err = nc_inq( fid, NULL, &nvar, NULL, NULL ); CHECK_NC_ERR( err ); - std::vector vars(nvar); - for (int i=0; i vars( nvar ); + for ( int i = 0; i < nvar; i++ ) { + char name[NC_MAX_NAME + 1]; err = nc_inq_varname( fid, i, name ); CHECK_NC_ERR( err ); vars[i] = name; @@ -211,262 +235,269 @@ std::vector getAttNames( int fid ) int natt; int err = nc_inq( fid, NULL, NULL, &natt, NULL ); CHECK_NC_ERR( err ); - std::vector att(natt); - for (int i=0; i att( natt ); + for ( int i = 0; i < natt; i++ ) { + char name[NC_MAX_NAME + 1]; + err = nc_inq_attname( fid, NC_GLOBAL, i, name ); CHECK_NC_ERR( err ); att[i] = name; } return att; } -VariableType getVarType( int fid, const std::string& var ) +VariableType getVarType( int fid, const std::string &var ) { int varid = -1; - int err = nc_inq_varid( fid, var.c_str(), &varid ); + int err = nc_inq_varid( fid, var.c_str(), &varid ); CHECK_NC_ERR( err ); - nc_type type=0; - err = nc_inq_vartype( fid, varid, &type ); + nc_type type = 0; + err = nc_inq_vartype( fid, varid, &type ); CHECK_NC_ERR( err ); - return convertType(type); + return convertType( type ); } -VariableType getAttType( int fid, const std::string& att ) +VariableType getAttType( int fid, const std::string &att ) { - nc_type type=0; - int err = nc_inq_atttype( fid, NC_GLOBAL, att.c_str(), &type ); + nc_type type = 0; + int err = nc_inq_atttype( fid, NC_GLOBAL, att.c_str(), &type ); CHECK_NC_ERR( err ); - return convertType(type); + return convertType( type ); } - /**************************************************** -* Read a variable * -****************************************************/ + * Read a variable * + ****************************************************/ template<> -Array getVar( int fid, const std::string& var ) +Array getVar( int fid, const std::string &var ) { - PROFILE_START("getVar"); - Array x( reverse(getVarDim(fid,var)) ); - int err = nc_get_var_ushort( fid, getVarID(fid,var), x.data() ); + PROFILE_START( "getVar" ); + Array x( reverse( getVarDim( fid, var ) ) ); + int err = nc_get_var_ushort( fid, getVarID( fid, var ), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar"); + PROFILE_STOP( "getVar" ); return x.reverseDim(); } template<> -Array getVar( int fid, const std::string& var ) +Array getVar( int fid, const std::string &var ) { - PROFILE_START("getVar"); - Array x( reverse(getVarDim(fid,var)) ); - int err = nc_get_var_short( fid, getVarID(fid,var), x.data() ); + PROFILE_START( "getVar" ); + Array x( reverse( getVarDim( fid, var ) ) ); + int err = nc_get_var_short( fid, getVarID( fid, var ), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar"); + PROFILE_STOP( "getVar" ); return x.reverseDim(); } template<> -Array getVar( int fid, const std::string& var ) +Array getVar( int fid, const std::string &var ) { - PROFILE_START("getVar"); - Array x( reverse(getVarDim(fid,var)) ); - int err = nc_get_var_uint( fid, getVarID(fid,var), x.data() ); + PROFILE_START( "getVar" ); + Array x( reverse( getVarDim( fid, var ) ) ); + int err = nc_get_var_uint( fid, getVarID( fid, var ), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar"); + PROFILE_STOP( "getVar" ); return x.reverseDim(); } template<> -Array getVar( int fid, const std::string& var ) +Array getVar( int fid, const std::string &var ) { - PROFILE_START("getVar"); - Array x( reverse(getVarDim(fid,var)) ); - int err = nc_get_var_int( fid, getVarID(fid,var), x.data() ); + PROFILE_START( "getVar" ); + Array x( reverse( getVarDim( fid, var ) ) ); + int err = nc_get_var_int( fid, getVarID( fid, var ), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar"); + PROFILE_STOP( "getVar" ); return x.reverseDim(); } template<> -Array getVar( int fid, const std::string& var ) +Array getVar( int fid, const std::string &var ) { - PROFILE_START("getVar"); - Array x( reverse(getVarDim(fid,var)) ); - int err = nc_get_var_float( fid, getVarID(fid,var), x.data() ); + PROFILE_START( "getVar" ); + Array x( reverse( getVarDim( fid, var ) ) ); + int err = nc_get_var_float( fid, getVarID( fid, var ), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar"); + PROFILE_STOP( "getVar" ); return x.reverseDim(); } template<> -Array getVar( int fid, const std::string& var ) +Array getVar( int fid, const std::string &var ) { - PROFILE_START("getVar"); - Array x( reverse(getVarDim(fid,var)) ); - int err = nc_get_var_double( fid, getVarID(fid,var), x.data() ); + PROFILE_START( "getVar" ); + Array x( reverse( getVarDim( fid, var ) ) ); + int err = nc_get_var_double( fid, getVarID( fid, var ), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar"); + PROFILE_STOP( "getVar" ); return x.reverseDim(); } template<> -Array getVar( int fid, const std::string& var ) -{ - PROFILE_START("getVar"); - Array x( reverse(getVarDim(fid,var)) ); - int err = nc_get_var_text( fid, getVarID(fid,var), x.data() ); +Array getVar( int fid, const std::string &var ) +{ + PROFILE_START( "getVar" ); + Array x( reverse( getVarDim( fid, var ) ) ); + int err = nc_get_var_text( fid, getVarID( fid, var ), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar"); + PROFILE_STOP( "getVar" ); return x.reverseDim(); } template<> -Array getVar( int fid, const std::string& var ) +Array getVar( int fid, const std::string &var ) { - PROFILE_START("getVar"); - Array tmp = getVar( fid, var ); - std::vector dim = {tmp.size(0), tmp.size(1), tmp.size(2) }; + PROFILE_START( "getVar" ); + Array tmp = getVar( fid, var ); + std::vector dim = { tmp.size( 0 ), tmp.size( 1 ), tmp.size( 2 ) }; if ( dim.size() == 1 ) dim[0] = 1; else dim.erase( dim.begin() ); - Array text(dim); - for (size_t i=0; i"); + Array text( dim ); + for ( size_t i = 0; i < text.length(); i++ ) + text( i ) = &( tmp( 0, i ) ); + PROFILE_STOP( "getVar" ); return text; } -static inline void get_stride_args( const std::vector& start, - const std::vector& count, const std::vector& stride, - size_t *startp, size_t *countp, ptrdiff_t *stridep ) +static inline void get_stride_args( const std::vector &start, const std::vector &count, + const std::vector &stride, size_t *startp, size_t *countp, ptrdiff_t *stridep ) { - for (size_t i=0; i -int nc_get_vars_TYPE( int fid, int varid, const size_t start[], - const size_t count[], const ptrdiff_t stride[], TYPE *ptr ); +int nc_get_vars_TYPE( int fid, int varid, const size_t start[], const size_t count[], + const ptrdiff_t stride[], TYPE *ptr ); template<> -int nc_get_vars_TYPE( int fid, int varid, const size_t start[], - const size_t count[], const ptrdiff_t stride[], short *ptr ) +int nc_get_vars_TYPE( int fid, int varid, const size_t start[], const size_t count[], + const ptrdiff_t stride[], short *ptr ) { return nc_get_vars_short( fid, varid, start, count, stride, ptr ); } template<> -int nc_get_vars_TYPE( int fid, int varid, const size_t start[], - const size_t count[], const ptrdiff_t stride[], int *ptr ) +int nc_get_vars_TYPE( int fid, int varid, const size_t start[], const size_t count[], + const ptrdiff_t stride[], int *ptr ) { return nc_get_vars_int( fid, varid, start, count, stride, ptr ); } template<> -int nc_get_vars_TYPE( int fid, int varid, const size_t start[], - const size_t count[], const ptrdiff_t stride[], float *ptr ) +int nc_get_vars_TYPE( int fid, int varid, const size_t start[], const size_t count[], + const ptrdiff_t stride[], float *ptr ) { return nc_get_vars_float( fid, varid, start, count, stride, ptr ); } template<> -int nc_get_vars_TYPE( int fid, int varid, const size_t start[], - const size_t count[], const ptrdiff_t stride[], double *ptr ) +int nc_get_vars_TYPE( int fid, int varid, const size_t start[], const size_t count[], + const ptrdiff_t stride[], double *ptr ) { return nc_get_vars_double( fid, varid, start, count, stride, ptr ); } template -Array getVar( int fid, const std::string& var, const std::vector& start, - const std::vector& count, const std::vector& stride ) +Array getVar( int fid, const std::string &var, const std::vector &start, + const std::vector &count, const std::vector &stride ) { - PROFILE_START("getVar<> (strided)"); + PROFILE_START( "getVar<> (strided)" ); std::vector var_size = getVarDim( fid, var ); - for (int d=0; d<(int)var_size.size(); d++) { - if ( start[d]<0 || start[d]+stride[d]*(count[d]-1)>(int)var_size[d] ) { - int rank = Utilities::MPI(MPI_COMM_WORLD).getRank(); + for ( int d = 0; d < (int) var_size.size(); d++ ) { + if ( start[d] < 0 || start[d] + stride[d] * ( count[d] - 1 ) > (int) var_size[d] ) { + int rank = Utilities::MPI( MPI_COMM_WORLD ).getRank(); char tmp[1000]; - sprintf(tmp,"%i: Range exceeded array dimension:\n" + sprintf( tmp, + "%i: Range exceeded array dimension:\n" " start[%i]=%i, count[%i]=%i, stride[%i]=%i, var_size[%i]=%i", - rank,d,start[d],d,count[d],d,stride[d],d,(int)var_size[d]); - ERROR(tmp); + rank, d, start[d], d, count[d], d, stride[d], d, (int) var_size[d] ); + ERROR( tmp ); } } - Array x( reverse(convert(count)) ); + Array x( reverse( convert( count ) ) ); size_t startp[10], countp[10]; ptrdiff_t stridep[10]; get_stride_args( start, count, stride, startp, countp, stridep ); - int err = nc_get_vars_TYPE( fid, getVarID(fid,var), startp, countp, stridep, x.data() ); + int err = + nc_get_vars_TYPE( fid, getVarID( fid, var ), startp, countp, stridep, x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getVar<> (strided)"); + PROFILE_STOP( "getVar<> (strided)" ); return x.reverseDim(); } -template Array getVar( int, const std::string&, const std::vector&, const std::vector&, const std::vector& ); -template Array getVar( int, const std::string&, const std::vector&, const std::vector&, const std::vector& ); -template Array getVar( int, const std::string&, const std::vector&, const std::vector&, const std::vector& ); -template Array getVar( int, const std::string&, const std::vector&, const std::vector&, const std::vector& ); +template Array getVar( int, const std::string &, const std::vector &, + const std::vector &, const std::vector & ); +template Array getVar( int, const std::string &, const std::vector &, + const std::vector &, const std::vector & ); +template Array getVar( int, const std::string &, const std::vector &, + const std::vector &, const std::vector & ); +template Array getVar( int, const std::string &, const std::vector &, + const std::vector &, const std::vector & ); /**************************************************** -* Read an attribute * -****************************************************/ + * Read an attribute * + ****************************************************/ template<> -Array getAtt( int fid, const std::string& att ) +Array getAtt( int fid, const std::string &att ) { - PROFILE_START("getAtt"); - Array x( getAttDim(fid,att) ); + PROFILE_START( "getAtt" ); + Array x( getAttDim( fid, att ) ); int err = nc_get_att_double( fid, NC_GLOBAL, att.c_str(), x.data() ); CHECK_NC_ERR( err ); - PROFILE_STOP("getAtt"); + PROFILE_STOP( "getAtt" ); return x; } template<> -Array getAtt( int fid, const std::string& att ) +Array getAtt( int fid, const std::string &att ) { - PROFILE_START("getAtt"); - char *tmp = new char[getAttDim(fid,att)[0]]; - Array x(1); - x(0) = tmp; - delete [] tmp; - PROFILE_STOP("getAtt"); + PROFILE_START( "getAtt" ); + char *tmp = new char[getAttDim( fid, att )[0]]; + Array x( 1 ); + x( 0 ) = tmp; + delete[] tmp; + PROFILE_STOP( "getAtt" ); return x; } /**************************************************** -* Write an array to a file * -****************************************************/ -std::vector defDim( int fid, const std::vector& names, const std::vector& dims ) + * Write an array to a file * + ****************************************************/ +std::vector defDim( + int fid, const std::vector &names, const std::vector &dims ) { - std::vector dimid(names.size(),0); - for (size_t i=0; i dimid( names.size(), 0 ); + for ( size_t i = 0; i < names.size(); i++ ) { + int err = nc_def_dim( fid, names[i].c_str(), dims[i], &dimid[i] ); CHECK_NC_ERR( err ); } return dimid; } template -void write( int fid, const std::string& var, const std::vector& dimids, - const Array& data, const RankInfoStruct& info ) +void write( int fid, const std::string &var, const std::vector &dimids, + const Array &data, const RankInfoStruct &info ) { // Define the variable int varid = 0; - int err = nc_def_var( fid, var.c_str(), getType(), data.ndim(), dimids.data(), &varid ); + int err = nc_def_var( fid, var.c_str(), getType(), data.ndim(), dimids.data(), &varid ); CHECK_NC_ERR( err ); - // exit define mode + // exit define mode err = nc_enddef( fid ); CHECK_NC_ERR( err ); - // set the access method to use MPI/PnetCDF collective I/O + // set the access method to use MPI/PnetCDF collective I/O err = nc_var_par_access( fid, varid, NC_INDEPENDENT ); CHECK_NC_ERR( err ); // parallel write: each process writes its subarray to the file - auto x = data.reverseDim(); - std::vector count = { data.size(0), data.size(1), data.size(2) }; - std::vector start = { info.ix*data.size(0), info.jy*data.size(1), info.kz*data.size(2) }; + auto x = data.reverseDim(); + std::vector count = { data.size( 0 ), data.size( 1 ), data.size( 2 ) }; + std::vector start = { info.ix * data.size( 0 ), info.jy * data.size( 1 ), + info.kz * data.size( 2 ) }; nc_put_vara( fid, varid, start.data(), count.data(), x.data() ); } -template void write( int fid, const std::string& var, const std::vector& dimids, const Array& data, const RankInfoStruct& info ); -template void write( int fid, const std::string& var, const std::vector& dimids, const Array& data, const RankInfoStruct& info ); -template void write( int fid, const std::string& var, const std::vector& dimids, const Array& data, const RankInfoStruct& info ); -template void write( int fid, const std::string& var, const std::vector& dimids, const Array& data, const RankInfoStruct& info ); +template void write( int fid, const std::string &var, const std::vector &dimids, + const Array &data, const RankInfoStruct &info ); +template void write( int fid, const std::string &var, const std::vector &dimids, + const Array &data, const RankInfoStruct &info ); +template void write( int fid, const std::string &var, const std::vector &dimids, + const Array &data, const RankInfoStruct &info ); +template void write( int fid, const std::string &var, const std::vector &dimids, + const Array &data, const RankInfoStruct &info ); - -}; // netcdf namespace +}; // namespace netcdf #else #endif - - diff --git a/IO/netcdf.h b/IO/netcdf.h index e1f65e61..eb77784d 100644 --- a/IO/netcdf.h +++ b/IO/netcdf.h @@ -5,9 +5,8 @@ #include #include "common/Array.h" -#include "common/MPI.h" #include "common/Communication.h" - +#include "common/MPI.h" namespace netcdf { @@ -31,15 +30,15 @@ std::string VariableTypeName( VariableType type ); * @param filename File to open * @param mode Open the file for reading or writing * @param comm MPI communicator to use (MPI_COMM_WORLD: don't use parallel netcdf) -*/ -int open( const std::string& filename, FileMode mode, const Utilities::MPI& comm=MPI_COMM_NULL ); + */ +int open( const std::string &filename, FileMode mode, const Utilities::MPI &comm = MPI_COMM_NULL ); /*! * @brief Close netcdf file * @details This function closes a netcdf file * @param fid Handle to the open file -*/ + */ void close( int fid ); @@ -47,7 +46,7 @@ void close( int fid ); * @brief Read the variable names * @details This function reads a list of the variable names in the file * @param fid Handle to the open file -*/ + */ std::vector getVarNames( int fid ); @@ -55,7 +54,7 @@ std::vector getVarNames( int fid ); * @brief Read the attribute names * @details This function reads a list of the attribute names in the file * @param fid Handle to the open file -*/ + */ std::vector getAttNames( int fid ); @@ -64,8 +63,8 @@ std::vector getAttNames( int fid ); * @details This function returns the type for a variable * @param fid Handle to the open file * @param var Variable to read -*/ -VariableType getVarType( int fid, const std::string& var ); + */ +VariableType getVarType( int fid, const std::string &var ); /*! @@ -73,8 +72,8 @@ VariableType getVarType( int fid, const std::string& var ); * @details This function returns the type for an attribute * @param fid Handle to the open file * @param att Attribute to read -*/ -VariableType getAttType( int fid, const std::string& att ); + */ +VariableType getAttType( int fid, const std::string &att ); /*! @@ -82,8 +81,8 @@ VariableType getAttType( int fid, const std::string& att ); * @details This function returns the die for a variable * @param fid Handle to the open file * @param var Variable to read -*/ -std::vector getVarDim( int fid, const std::string& var ); + */ +std::vector getVarDim( int fid, const std::string &var ); /*! @@ -91,9 +90,9 @@ std::vector getVarDim( int fid, const std::string& var ); * @details This function reads a variable with the given name from the file * @param fid Handle to the open file * @param var Variable to read -*/ + */ template -Array getVar( int fid, const std::string& var ); +Array getVar( int fid, const std::string &var ); /*! @@ -104,10 +103,10 @@ Array getVar( int fid, const std::string& var ); * @param start Starting corner for the read * @param count Number of elements to read * @param stride Stride size for the read -*/ + */ template -Array getVar( int fid, const std::string& var, const std::vector& start, - const std::vector& count, const std::vector& stride ); +Array getVar( int fid, const std::string &var, const std::vector &start, + const std::vector &count, const std::vector &stride ); /*! @@ -115,27 +114,29 @@ Array getVar( int fid, const std::string& var, const std::vector& sta * @details This function reads an attribute with the given name from the file * @param fid Handle to the open file * @param att Attribute to read -*/ + */ template -Array getAtt( int fid, const std::string& att ); +Array getAtt( int fid, const std::string &att ); /*! * @brief Write the dimensions - * @details This function writes the grid dimensions to netcdf. + * @details This function writes the grid dimensions to netcdf. * @param fid Handle to the open file -*/ -std::vector defDim( int fid, const std::vector& names, const std::vector& dims ); + */ +std::vector defDim( + int fid, const std::vector &names, const std::vector &dims ); /*! * @brief Write a variable - * @details This function writes a variable to netcdf. + * @details This function writes a variable to netcdf. * @param fid Handle to the open file -*/ + */ template -void write( int fid, const std::string& var, const std::vector& dimids, const Array& data, const RankInfoStruct& rank_info ); +void write( int fid, const std::string &var, const std::vector &dimids, + const Array &data, const RankInfoStruct &rank_info ); -}; // netcdf namespace +}; // namespace netcdf #endif diff --git a/IO/silo.cpp b/IO/silo.cpp index ddf3646a..a9041e44 100644 --- a/IO/silo.cpp +++ b/IO/silo.cpp @@ -1,6 +1,6 @@ #include "IO/silo.h" -#include "common/Utilities.h" #include "common/MPI.h" +#include "common/Utilities.h" #include "ProfilerApp.h" @@ -10,14 +10,13 @@ #include - -namespace silo { +namespace IO::silo { /**************************************************** -* Open/close a file * -****************************************************/ -DBfile* open( const std::string& filename, FileMode mode ) + * Open/close a file * + ****************************************************/ +DBfile *open( const std::string &filename, FileMode mode ) { DBfile *fid = nullptr; if ( mode == CREATE ) { @@ -29,82 +28,78 @@ DBfile* open( const std::string& filename, FileMode mode ) } return fid; } -void close( DBfile* fid ) -{ - DBClose( fid ); -} +void close( DBfile *fid ) { DBClose( fid ); } /**************************************************** -* Helper functions * -****************************************************/ -VariableDataType varDataType( DBfile *fid, const std::string& name ) + * Helper functions * + ****************************************************/ +DataType varDataType( DBfile *fid, const std::string &name ) { - auto type = DBGetVarType( fid, name.c_str() ); - VariableDataType type2 = VariableDataType::UNKNOWN; + auto type = DBGetVarType( fid, name.c_str() ); + DataType type2 = DataType::Null; if ( type == DB_DOUBLE ) - type2 = VariableDataType::DOUBLE; + type2 = DataType::Double; else if ( type == DB_FLOAT ) - type2 = VariableDataType::FLOAT; + type2 = DataType::Float; else if ( type == DB_INT ) - type2 = VariableDataType::INT; + type2 = DataType::Int; return type2; } /**************************************************** -* Write/read a uniform mesh to silo * -****************************************************/ -void readUniformMesh( DBfile* fid, const std::string& meshname, - std::vector& range, std::vector& N ) + * Write/read a uniform mesh to silo * + ****************************************************/ +void readUniformMesh( + DBfile *fid, const std::string &meshname, std::vector &range, std::vector &N ) { - DBquadmesh* mesh = DBGetQuadmesh( fid, meshname.c_str() ); - int ndim = mesh->ndims; - range.resize(2*ndim); - N.resize(ndim); - for (int d=0; ddims[d]-1; - range[2*d+0] = mesh->min_extents[d]; - range[2*d+1] = mesh->max_extents[d]; + DBquadmesh *mesh = DBGetQuadmesh( fid, meshname.c_str() ); + int ndim = mesh->ndims; + range.resize( 2 * ndim ); + N.resize( ndim ); + for ( int d = 0; d < ndim; d++ ) { + N[d] = mesh->dims[d] - 1; + range[2 * d + 0] = mesh->min_extents[d]; + range[2 * d + 1] = mesh->max_extents[d]; } DBFreeQuadmesh( mesh ); } /**************************************************** -* Write a multimesh * -****************************************************/ -void writeMultiMesh( DBfile* fid, const std::string& meshname, - const std::vector& meshNames, - const std::vector& meshTypes ) + * Write a multimesh * + ****************************************************/ +void writeMultiMesh( DBfile *fid, const std::string &meshname, + const std::vector &meshNames, const std::vector &meshTypes ) { - std::vector meshnames(meshNames.size()); + std::vector meshnames( meshNames.size() ); for ( size_t i = 0; i < meshNames.size(); ++i ) meshnames[i] = (char *) meshNames[i].c_str(); std::string tree_name = meshname + "_tree"; DBoptlist *optList = DBMakeOptlist( 1 ); DBAddOption( optList, DBOPT_MRGTREE_NAME, (char *) tree_name.c_str() ); - DBPutMultimesh( fid, meshname.c_str(), meshNames.size(), meshnames.data(), (int*) meshTypes.data(), nullptr ); + DBPutMultimesh( fid, meshname.c_str(), meshNames.size(), meshnames.data(), + (int *) meshTypes.data(), nullptr ); DBFreeOptlist( optList ); } /**************************************************** -* Write a multivariable * -****************************************************/ -void writeMultiVar( DBfile* fid, const std::string& varname, - const std::vector& varNames, - const std::vector& varTypes ) + * Write a multivariable * + ****************************************************/ +void writeMultiVar( DBfile *fid, const std::string &varname, + const std::vector &varNames, const std::vector &varTypes ) { - std::vector varnames(varNames.size(),nullptr); - for (size_t j=0; j(varNames[j].c_str()); - DBPutMultivar( fid, varname.c_str(), varNames.size(), varnames.data(), (int*) varTypes.data(), nullptr ); + std::vector varnames( varNames.size(), nullptr ); + for ( size_t j = 0; j < varNames.size(); j++ ) + varnames[j] = const_cast( varNames[j].c_str() ); + DBPutMultivar( + fid, varname.c_str(), varNames.size(), varnames.data(), (int *) varTypes.data(), nullptr ); } - -}; // silo namespace +}; // namespace IO::silo #else diff --git a/IO/silo.h b/IO/silo.h index 40a023d7..5e1068fe 100644 --- a/IO/silo.h +++ b/IO/silo.h @@ -1,32 +1,28 @@ #ifndef SILO_INTERFACE #define SILO_INTERFACE +#include #include #include -#include +#include "IO/Mesh.h" #include "common/Array.h" -#include "common/MPI.h" #include "common/Communication.h" +#include "common/MPI.h" #ifdef USE_SILO - #include +#include #else - typedef int DBfile; +typedef int DBfile; #endif - -namespace silo { +namespace IO::silo { enum FileMode { READ, WRITE, CREATE }; -enum class VariableType : int { NodeVariable=1, EdgeVariable=2, SurfaceVariable=2, VolumeVariable=3, NullVariable=0 }; - -enum class VariableDataType { DOUBLE, FLOAT, INT, UNKNOWN }; - /*! * @brief Open silo file @@ -34,16 +30,16 @@ enum class VariableDataType { DOUBLE, FLOAT, INT, UNKNOWN }; * @param[in] filename File to open * @param[in] mode Open the file for reading or writing * @return This function returns a handle to the file -*/ -DBfile* open( const std::string& filename, FileMode mode ); + */ +DBfile *open( const std::string &filename, FileMode mode ); /*! * @brief Close silo file * @details This function closes a silo file * @param[in] fid Handle to the open file -*/ -void close( DBfile* fid ); + */ +void close( DBfile *fid ); /*! @@ -51,8 +47,8 @@ void close( DBfile* fid ); * @details This function returns the type of variable data * @param[in] fid Handle to the open file * @param[in] name Name of variable -*/ -VariableDataType varDataType( DBfile *dbfile, const std::string& name ); + */ +DataType varDataType( DBfile *dbfile, const std::string &name ); /*! @@ -61,9 +57,9 @@ VariableDataType varDataType( DBfile *dbfile, const std::string& name ); * @param[in] fid Handle to the open file * @param[in] varname Variable name * @param[in] data Data to write -*/ + */ template -void write( DBfile* fid, const std::string& varname, const std::vector& data ); +void write( DBfile *fid, const std::string &varname, const std::vector &data ); /*! @@ -72,9 +68,9 @@ void write( DBfile* fid, const std::string& varname, const std::vector& da * @param[in] fid Handle to the open file * @param[in] varname Variable name * @return Data read -*/ + */ template -std::vector read( DBfile* fid, const std::string& varname ); +std::vector read( DBfile *fid, const std::string &varname ); /*! @@ -84,10 +80,10 @@ std::vector read( DBfile* fid, const std::string& varname ); * @param[in] meshname Mesh name * @param[in] range Range of mesh { xmin, xmax, ymin, ymax, zmin, zmax } * @param[in] N Number of cells in each direction -*/ + */ template -void writeUniformMesh( DBfile* fid, const std::string& meshname, - const std::array& range, const std::array& N ); +void writeUniformMesh( DBfile *fid, const std::string &meshname, + const std::array &range, const std::array &N ); /*! @@ -97,9 +93,9 @@ void writeUniformMesh( DBfile* fid, const std::string& meshname, * @param[in] meshname Mesh name * @param[out] range Range of mesh { xmin, xmax, ymin, ymax, zmin, zmax } * @param[out] N Number of cells in each direction -*/ -void readUniformMesh( DBfile* fid, const std::string& meshname, - std::vector& range, std::vector& N ); + */ +void readUniformMesh( + DBfile *fid, const std::string &meshname, std::vector &range, std::vector &N ); /*! @@ -111,10 +107,11 @@ void readUniformMesh( DBfile* fid, const std::string& meshname, * @param[in] varname Variable name * @param[in] data Variable data * @param[in] type Variable type -*/ -template< int NDIM, class TYPE > -void writeUniformMeshVariable( DBfile* fid, const std::string& meshname, const std::array& N, - const std::string& varname, const Array& data, VariableType type ); + */ +template +void writeUniformMeshVariable( DBfile *fid, const std::string &meshname, + const std::array &N, const std::string &varname, const Array &data, + VariableType type ); /*! @@ -123,9 +120,9 @@ void writeUniformMeshVariable( DBfile* fid, const std::string& meshname, const s * @param[in] fid Handle to the open file * @param[in] varname Variable name * @return Variable data -*/ + */ template -Array readUniformMeshVariable( DBfile* fid, const std::string& varname ); +Array readUniformMeshVariable( DBfile *fid, const std::string &varname ); /*! @@ -136,10 +133,10 @@ Array readUniformMeshVariable( DBfile* fid, const std::string& varname ); * @param[in] ndim Number of dimensions * @param[in] N Number of points * @param[in] coords Coordinates of the points -*/ + */ template -void writePointMesh( DBfile* fid, const std::string& meshname, - int ndim, int N, const TYPE *coords[] ); +void writePointMesh( + DBfile *fid, const std::string &meshname, int ndim, int N, const TYPE *coords[] ); /*! @@ -147,10 +144,10 @@ void writePointMesh( DBfile* fid, const std::string& meshname, * @details This function reads a pointmesh from silo * @param[in] fid Handle to the open file * @param[in] meshname Mesh name - * @return Returns the coordinates as a N x ndim array -*/ + * @return Returns the coordinates as a N x ndim array + */ template -Array readPointMesh( DBfile* fid, const std::string& meshname ); +Array readPointMesh( DBfile *fid, const std::string &meshname ); /*! @@ -160,10 +157,10 @@ Array readPointMesh( DBfile* fid, const std::string& meshname ); * @param[in] meshname Mesh name * @param[in] varname Variable name * @param[in] data Variable data -*/ + */ template -void writePointMeshVariable( DBfile* fid, const std::string& meshname, - const std::string& varname, const Array& data ); +void writePointMeshVariable( + DBfile *fid, const std::string &meshname, const std::string &varname, const Array &data ); /*! @@ -172,9 +169,9 @@ void writePointMeshVariable( DBfile* fid, const std::string& meshname, * @param[in] fid Handle to the open file * @param[in] varname Variable name * @return Variable data -*/ + */ template -Array readPointMeshVariable( DBfile* fid, const std::string& varname ); +Array readPointMeshVariable( DBfile *fid, const std::string &varname ); /*! @@ -188,10 +185,10 @@ Array readPointMeshVariable( DBfile* fid, const std::string& varname ); * @param[in] coords Coordinates of the points * @param[in] N_tri Number of triangles * @param[in] tri Coordinates of the points -*/ + */ template -void writeTriMesh( DBfile* fid, const std::string& meshname, - int ndim, int ndim_tri, int N, const TYPE *coords[], int N_tri, const int *tri[] ); +void writeTriMesh( DBfile *fid, const std::string &meshname, int ndim, int ndim_tri, int N, + const TYPE *coords[], int N_tri, const int *tri[] ); /*! @@ -201,9 +198,9 @@ void writeTriMesh( DBfile* fid, const std::string& meshname, * @param[in] meshname Mesh name * @param[in] coords Coordinates of the points * @param[in] tri Coordinates of the points -*/ + */ template -void readTriMesh( DBfile* fid, const std::string& meshname, Array& coords, Array& tri ); +void readTriMesh( DBfile *fid, const std::string &meshname, Array &coords, Array &tri ); /*! @@ -215,10 +212,10 @@ void readTriMesh( DBfile* fid, const std::string& meshname, Array& coords, * @param[in] varname Variable name * @param[in] data Variable data * @param[in] type Variable type -*/ + */ template -void writeTriMeshVariable( DBfile* fid, int ndim, const std::string& meshname, - const std::string& varname, const Array& data, VariableType type ); +void writeTriMeshVariable( DBfile *fid, int ndim, const std::string &meshname, + const std::string &varname, const Array &data, VariableType type ); /*! @@ -227,9 +224,9 @@ void writeTriMeshVariable( DBfile* fid, int ndim, const std::string& meshname, * @param[in] fid Handle to the open file * @param[in] varname Variable name * @return Variable data -*/ + */ template -Array readTriMeshVariable( DBfile* fid, const std::string& varname ); +Array readTriMeshVariable( DBfile *fid, const std::string &varname ); /*! @@ -239,10 +236,9 @@ Array readTriMeshVariable( DBfile* fid, const std::string& varname ); * @param[in] meshname Mesh name * @param[in] subMeshNames Names of the sub meshes in the form "filename:meshname" * @param[in] subMeshTypes Type of each submesh -*/ -void writeMultiMesh( DBfile* fid, const std::string& meshname, - const std::vector& subMeshNames, - const std::vector& subMeshTypes ); + */ +void writeMultiMesh( DBfile *fid, const std::string &meshname, + const std::vector &subMeshNames, const std::vector &subMeshTypes ); /*! @@ -255,14 +251,12 @@ void writeMultiMesh( DBfile* fid, const std::string& meshname, * @param[in] subVarTypes Type of each submesh * @param[in] ndim Dimension of variable (used to determine suffix) * @param[in] nvar Number of subvariables (used to determine suffix) -*/ -void writeMultiVar( DBfile* fid, const std::string& varname, - const std::vector& subVarNames, - const std::vector& subVarTypes ); + */ +void writeMultiVar( DBfile *fid, const std::string &varname, + const std::vector &subVarNames, const std::vector &subVarTypes ); -}; // silo namespace +}; // namespace IO::silo #endif #include "IO/silo.hpp" - diff --git a/IO/silo.hpp b/IO/silo.hpp index 35852004..b76ebd28 100644 --- a/IO/silo.hpp +++ b/IO/silo.hpp @@ -2,8 +2,8 @@ #define SILO_INTERFACE_HPP #include "IO/silo.h" -#include "common/Utilities.h" #include "common/MPI.h" +#include "common/Utilities.h" #include "ProfilerApp.h" @@ -13,52 +13,77 @@ #include - -namespace silo { +namespace IO::silo { /**************************************************** -* Helper functions * -****************************************************/ -template static constexpr int getType(); -template<> constexpr int getType() { return DB_DOUBLE; } -template<> constexpr int getType() { return DB_FLOAT; } -template<> constexpr int getType() { return DB_INT; } + * Helper functions * + ****************************************************/ template -inline void copyData( Array& data, int type, const void *src ) +static constexpr int getType(); +template<> +constexpr int getType() +{ + return DB_DOUBLE; +} +template<> +constexpr int getType() +{ + return DB_FLOAT; +} +template<> +constexpr int getType() +{ + return DB_INT; +} +template +inline void copyData( Array &data, int type, const void *src ) { if ( type == getType() ) - memcpy( data.data(), src, data.length()*sizeof(TYPE) ); + memcpy( data.data(), src, data.length() * sizeof( TYPE ) ); else if ( type == DB_DOUBLE ) - data.copy( static_cast(src) ); + data.copy( static_cast( src ) ); else if ( type == DB_FLOAT ) - data.copy( static_cast(src) ); + data.copy( static_cast( src ) ); else if ( type == DB_INT ) - data.copy( static_cast(src) ); + data.copy( static_cast( src ) ); else - ERROR("Unknown type"); + ERROR( "Unknown type" ); } /**************************************************** -* Write/read an arbitrary vector * -****************************************************/ -template constexpr int getSiloType(); -template<> constexpr int getSiloType() { return DB_INT; } -template<> constexpr int getSiloType() { return DB_FLOAT; } -template<> constexpr int getSiloType() { return DB_DOUBLE; } + * Write/read an arbitrary vector * + ****************************************************/ template -void write( DBfile* fid, const std::string& varname, const std::vector& data ) +constexpr int getSiloType(); +template<> +constexpr int getSiloType() +{ + return DB_INT; +} +template<> +constexpr int getSiloType() +{ + return DB_FLOAT; +} +template<> +constexpr int getSiloType() +{ + return DB_DOUBLE; +} +template +void write( DBfile *fid, const std::string &varname, const std::vector &data ) { int dims = data.size(); - int err = DBWrite( fid, varname.c_str(), (void*) data.data(), &dims, 1, getSiloType() ); + int err = DBWrite( fid, varname.c_str(), (void *) data.data(), &dims, 1, getSiloType() ); ASSERT( err == 0 ); } template -std::vector read( DBfile* fid, const std::string& varname ) +std::vector read( DBfile *fid, const std::string &varname ) { int N = DBGetVarLength( fid, varname.c_str() ); - std::vector data(N); + std::vector data( N ); int err = DBReadVar( fid, varname.c_str(), data.data() ); ASSERT( err == 0 ); return data; @@ -66,31 +91,31 @@ std::vector read( DBfile* fid, const std::string& varname ) /**************************************************** -* Helper function to get variable suffixes * -****************************************************/ + * Helper function to get variable suffixes * + ****************************************************/ inline std::vector getVarSuffix( int ndim, int nvars ) { - std::vector suffix(nvars); + std::vector suffix( nvars ); if ( nvars == 1 ) { suffix[0] = ""; } else if ( nvars == ndim ) { - if ( ndim==2 ) { + if ( ndim == 2 ) { suffix[0] = "_x"; suffix[1] = "_y"; - } else if ( ndim==3 ) { + } else if ( ndim == 3 ) { suffix[0] = "_x"; suffix[1] = "_y"; suffix[2] = "_z"; } else { - ERROR("Not finished"); + ERROR( "Not finished" ); } - } else if ( nvars == ndim*ndim ) { - if ( ndim==2 ) { + } else if ( nvars == ndim * ndim ) { + if ( ndim == 2 ) { suffix[0] = "_xx"; suffix[1] = "_xy"; suffix[2] = "_yx"; suffix[3] = "_yy"; - } else if ( ndim==3 ) { + } else if ( ndim == 3 ) { suffix[0] = "_xx"; suffix[1] = "_xy"; suffix[2] = "_xz"; @@ -101,122 +126,127 @@ inline std::vector getVarSuffix( int ndim, int nvars ) suffix[7] = "_zy"; suffix[8] = "_zz"; } else { - ERROR("Not finished"); + ERROR( "Not finished" ); } } else { - for (int i=0; i -void writeUniformMesh( DBfile* fid, const std::string& meshname, - const std::array& range, const std::array& N ) +void writeUniformMesh( DBfile *fid, const std::string &meshname, + const std::array &range, const std::array &N ) { - PROFILE_START("writeUniformMesh",2); + PROFILE_START( "writeUniformMesh", 2 ); int dims[NDIM]; - for (size_t d=0; d= 1 ) { x = new float[dims[0]]; - for (int i=0; i= 2 ) { y = new float[dims[1]]; - for (int i=0; i= 3 ) { z = new float[dims[2]]; - for (int i=0; i -void writeUniformMeshVariable( DBfile* fid, const std::string& meshname, const std::array& N, - const std::string& varname, const Array& data, VariableType type ) + * Write a vector/tensor quad variable * + ****************************************************/ +template +void writeUniformMeshVariable( DBfile *fid, const std::string &meshname, + const std::array &N, const std::string &varname, const Array &data, + VariableType type ) { - PROFILE_START("writeUniformMeshVariable",2); - int nvars=1, dims[NDIM]={1}; + PROFILE_START( "writeUniformMeshVariable", 2 ); + int nvars = 1, dims[NDIM] = { 1 }; const TYPE *vars[NDIM] = { nullptr }; - int vartype = 0; + int vartype = 0; if ( type == VariableType::NodeVariable ) { - ASSERT( data.ndim()==NDIM || data.ndim()==NDIM+1 ); - for (int d=0; d var_names(nvars); - for (int i=0; i var_names( nvars ); + for ( int i = 0; i < nvars; i++ ) var_names[i] = varname + suffix[i]; - std::vector varnames(nvars,nullptr); - for (int i=0; i(var_names[i].c_str()); - int err = DBPutQuadvar( fid, varname.c_str(), meshname.c_str(), nvars, - varnames.data(), vars, dims, NDIM, nullptr, 0, getType(), vartype, nullptr ); + std::vector varnames( nvars, nullptr ); + for ( int i = 0; i < nvars; i++ ) + varnames[i] = const_cast( var_names[i].c_str() ); + int err = DBPutQuadvar( fid, varname.c_str(), meshname.c_str(), nvars, varnames.data(), vars, + dims, NDIM, nullptr, 0, getType(), vartype, nullptr ); ASSERT( err == 0 ); - PROFILE_STOP("writeUniformMeshVariable",2); + PROFILE_STOP( "writeUniformMeshVariable", 2 ); } -template -Array readUniformMeshVariable( DBfile* fid, const std::string& varname ) +template +Array readUniformMeshVariable( DBfile *fid, const std::string &varname ) { auto var = DBGetQuadvar( fid, varname.c_str() ); ASSERT( var != nullptr ); Array data( var->nels, var->nvals ); int type = var->datatype; - for (int i=0; invals; i++) { + for ( int i = 0; i < var->nvals; i++ ) { Array data2( var->nels ); copyData( data2, type, var->vals[i] ); - memcpy( &data(0,i), data2.data(), var->nels*sizeof(TYPE) ); + memcpy( &data( 0, i ), data2.data(), var->nels * sizeof( TYPE ) ); } DBFreeQuadvar( var ); - std::vector dims( var->ndims+1, var->nvals ); - for (int d=0; dndims; d++) + std::vector dims( var->ndims + 1, var->nvals ); + for ( int d = 0; d < var->ndims; d++ ) dims[d] = var->dims[d]; data.reshape( dims ); return data; @@ -224,54 +254,55 @@ Array readUniformMeshVariable( DBfile* fid, const std::string& varname ) /**************************************************** -* Read/write a point mesh/variable to silo * -****************************************************/ + * Read/write a point mesh/variable to silo * + ****************************************************/ template -void writePointMesh( DBfile* fid, const std::string& meshname, - int ndim, int N, const TYPE *coords[] ) +void writePointMesh( + DBfile *fid, const std::string &meshname, int ndim, int N, const TYPE *coords[] ) { int err = DBPutPointmesh( fid, meshname.c_str(), ndim, coords, N, getType(), nullptr ); ASSERT( err == 0 ); } -template -Array readPointMesh( DBfile* fid, const std::string& meshname ) +template +Array readPointMesh( DBfile *fid, const std::string &meshname ) { auto mesh = DBGetPointmesh( fid, meshname.c_str() ); - int N = mesh->nels; - int ndim = mesh->ndims; - Array coords(N,ndim); + int N = mesh->nels; + int ndim = mesh->ndims; + Array coords( N, ndim ); int type = mesh->datatype; - for (int d=0; d data2( N ); copyData( data2, type, mesh->coords[d] ); - memcpy( &coords(0,d), data2.data(), N*sizeof(TYPE) ); + memcpy( &coords( 0, d ), data2.data(), N * sizeof( TYPE ) ); } DBFreePointmesh( mesh ); return coords; } template -void writePointMeshVariable( DBfile* fid, const std::string& meshname, - const std::string& varname, const Array& data ) +void writePointMeshVariable( + DBfile *fid, const std::string &meshname, const std::string &varname, const Array &data ) { - int N = data.size(0); - int nvars = data.size(1); - std::vector vars(nvars); - for (int i=0; i(), nullptr ); + int N = data.size( 0 ); + int nvars = data.size( 1 ); + std::vector vars( nvars ); + for ( int i = 0; i < nvars; i++ ) + vars[i] = &data( 0, i ); + int err = DBPutPointvar( + fid, varname.c_str(), meshname.c_str(), nvars, vars.data(), N, getType(), nullptr ); ASSERT( err == 0 ); } -template -Array readPointMeshVariable( DBfile* fid, const std::string& varname ) +template +Array readPointMeshVariable( DBfile *fid, const std::string &varname ) { auto var = DBGetPointvar( fid, varname.c_str() ); ASSERT( var != nullptr ); Array data( var->nels, var->nvals ); int type = var->datatype; - for (int i=0; invals; i++) { + for ( int i = 0; i < var->nvals; i++ ) { Array data2( var->nels ); copyData( data2, type, var->vals[i] ); - memcpy( &data(0,i), data2.data(), var->nels*sizeof(TYPE) ); + memcpy( &data( 0, i ), data2.data(), var->nels * sizeof( TYPE ) ); } DBFreeMeshvar( var ); return data; @@ -279,110 +310,110 @@ Array readPointMeshVariable( DBfile* fid, const std::string& varname ) /**************************************************** -* Read/write a triangle mesh * -****************************************************/ + * Read/write a triangle mesh * + ****************************************************/ template -void writeTriMesh( DBfile* fid, const std::string& meshName, - int ndim, int ndim_tri, int N, const TYPE *coords[], int N_tri, const int *tri[] ) +void writeTriMesh( DBfile *fid, const std::string &meshName, int ndim, int ndim_tri, int N, + const TYPE *coords[], int N_tri, const int *tri[] ) { auto zoneName = meshName + "_zones"; - std::vector nodelist( (ndim_tri+1)*N_tri ); - for (int i=0, j=0; i nodelist( ( ndim_tri + 1 ) * N_tri ); + for ( int i = 0, j = 0; i < N_tri; i++ ) { + for ( int d = 0; d < ndim_tri + 1; d++, j++ ) nodelist[j] = tri[d][i]; } int shapetype = 0; - if ( ndim_tri==1 ) + if ( ndim_tri == 1 ) shapetype = DB_ZONETYPE_BEAM; - else if ( ndim_tri==2 ) + else if ( ndim_tri == 2 ) shapetype = DB_ZONETYPE_TRIANGLE; - else if ( ndim_tri==3 ) + else if ( ndim_tri == 3 ) shapetype = DB_ZONETYPE_PYRAMID; else - ERROR("Unknown shapetype"); - int shapesize = ndim_tri+1; - int shapecnt = N_tri; - DBPutZonelist2( fid, zoneName.c_str(), N_tri, ndim_tri, nodelist.data(), - nodelist.size(), 0, 0, 0, &shapetype, &shapesize, &shapecnt, 1, nullptr ); - DBPutUcdmesh( fid, meshName.c_str(), ndim, nullptr, coords, N, - nodelist.size(), zoneName.c_str(), nullptr, getType(), nullptr ); + ERROR( "Unknown shapetype" ); + int shapesize = ndim_tri + 1; + int shapecnt = N_tri; + DBPutZonelist2( fid, zoneName.c_str(), N_tri, ndim_tri, nodelist.data(), nodelist.size(), 0, 0, + 0, &shapetype, &shapesize, &shapecnt, 1, nullptr ); + DBPutUcdmesh( fid, meshName.c_str(), ndim, nullptr, coords, N, nodelist.size(), + zoneName.c_str(), nullptr, getType(), nullptr ); } template -void readTriMesh( DBfile* fid, const std::string& meshname, Array& coords, Array& tri ) +void readTriMesh( DBfile *fid, const std::string &meshname, Array &coords, Array &tri ) { - auto mesh = DBGetUcdmesh( fid, meshname.c_str() ); - int ndim = mesh->ndims; + auto mesh = DBGetUcdmesh( fid, meshname.c_str() ); + int ndim = mesh->ndims; int N_nodes = mesh->nnodes; - coords.resize(N_nodes,ndim); + coords.resize( N_nodes, ndim ); int mesh_type = mesh->datatype; - for (int d=0; d data2( N_nodes ); copyData( data2, mesh_type, mesh->coords[d] ); - memcpy( &coords(0,d), data2.data(), N_nodes*sizeof(TYPE) ); + memcpy( &coords( 0, d ), data2.data(), N_nodes * sizeof( TYPE ) ); } - auto zones = mesh->zones; + auto zones = mesh->zones; int N_zones = zones->nzones; - ASSERT( zones->nshapes==1 ); + ASSERT( zones->nshapes == 1 ); int shapesize = zones->shapesize[0]; - tri.resize(N_zones,shapesize); - for (int i=0; inodelist[i*shapesize+j]; + tri.resize( N_zones, shapesize ); + for ( int i = 0; i < N_zones; i++ ) { + for ( int j = 0; j < shapesize; j++ ) + tri( i, j ) = zones->nodelist[i * shapesize + j]; } DBFreeUcdmesh( mesh ); } template -void writeTriMeshVariable( DBfile* fid, int ndim, const std::string& meshname, - const std::string& varname, const Array& data, VariableType type ) +void writeTriMeshVariable( DBfile *fid, int ndim, const std::string &meshname, + const std::string &varname, const Array &data, VariableType type ) { - int nvars = 0; - int vartype = 0; + int nvars = 0; + int vartype = 0; const TYPE *vars[10] = { nullptr }; if ( type == VariableType::NodeVariable ) { vartype = DB_NODECENT; - nvars = data.size(1); - for (int i=0; i var_names(nvars); - for (int i=0; i var_names( nvars ); + for ( int i = 0; i < nvars; i++ ) var_names[i] = varname + suffix[i]; - std::vector varnames(nvars,nullptr); - for (int i=0; i(var_names[i].c_str()); - DBPutUcdvar( fid, varname.c_str(), meshname.c_str(), nvars, - varnames.data(), vars, data.size(0), nullptr, 0, getType(), vartype, nullptr ); + std::vector varnames( nvars, nullptr ); + for ( int i = 0; i < nvars; i++ ) + varnames[i] = const_cast( var_names[i].c_str() ); + DBPutUcdvar( fid, varname.c_str(), meshname.c_str(), nvars, varnames.data(), vars, + data.size( 0 ), nullptr, 0, getType(), vartype, nullptr ); } template -Array readTriMeshVariable( DBfile* fid, const std::string& varname ) +Array readTriMeshVariable( DBfile *fid, const std::string &varname ) { auto var = DBGetUcdvar( fid, varname.c_str() ); ASSERT( var != nullptr ); Array data( var->nels, var->nvals ); int type = var->datatype; - for (int i=0; invals; i++) { + for ( int i = 0; i < var->nvals; i++ ) { Array data2( var->nels ); copyData( data2, type, var->vals[i] ); - memcpy( &data(0,i), data2.data(), var->nels*sizeof(TYPE) ); + memcpy( &data( 0, i ), data2.data(), var->nels * sizeof( TYPE ) ); } DBFreeUcdvar( var ); return data; } -}; // silo namespace +}; // namespace IO::silo #endif diff --git a/README.titan b/README.titan index 8d087700..a3178875 100644 --- a/README.titan +++ b/README.titan @@ -26,7 +26,6 @@ cmake \ -D CMAKE_CXX_COMPILER:PATH=CC \ -D CFLAGS="-DCBUB" \ -D CXXFLAGS="-DCBUB" \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=aprun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Debug \ diff --git a/StackTrace/ErrorHandlers.h b/StackTrace/ErrorHandlers.h index 12b8d7de..6dd961d7 100644 --- a/StackTrace/ErrorHandlers.h +++ b/StackTrace/ErrorHandlers.h @@ -3,11 +3,10 @@ #include "StackTrace/StackTrace.h" +#include "common/MPI.h" #include -#include "mpi.h" - namespace StackTrace { diff --git a/analysis/FreeEnergy.cpp b/analysis/FreeEnergy.cpp new file mode 100644 index 00000000..6a641a95 --- /dev/null +++ b/analysis/FreeEnergy.cpp @@ -0,0 +1,181 @@ +#include "analysis/FreeEnergy.h" + +FreeEnergyAnalyzer::FreeEnergyAnalyzer(std::shared_ptr dm): + Dm(dm) +{ + + Nx=dm->Nx; Ny=dm->Ny; Nz=dm->Nz; + Volume=(Nx-2)*(Ny-2)*(Nz-2)*Dm->nprocx()*Dm->nprocy()*Dm->nprocz()*1.0; + + ChemicalPotential.resize(Nx,Ny,Nz); ChemicalPotential.fill(0); + Phi.resize(Nx,Ny,Nz); Phi.fill(0); + Pressure.resize(Nx,Ny,Nz); Pressure.fill(0); + Rho.resize(Nx,Ny,Nz); Rho.fill(0); + Vel_x.resize(Nx,Ny,Nz); Vel_x.fill(0); // Gradient of the phase indicator field + Vel_y.resize(Nx,Ny,Nz); Vel_y.fill(0); + Vel_z.resize(Nx,Ny,Nz); Vel_z.fill(0); + SDs.resize(Nx,Ny,Nz); SDs.fill(0); + + if (Dm->rank()==0){ + bool WriteHeader=false; + TIMELOG = fopen("free.csv","r"); + if (TIMELOG != NULL) + fclose(TIMELOG); + else + WriteHeader=true; + + TIMELOG = fopen("free.csv","a+"); + if (WriteHeader) + { + // If timelog is empty, write a short header to list the averages + //fprintf(TIMELOG,"--------------------------------------------------------------------------------------\n"); + fprintf(TIMELOG,"timestep\n"); + } + } + +} + +FreeEnergyAnalyzer::~FreeEnergyAnalyzer(){ + if (Dm->rank()==0){ + fclose(TIMELOG); + } +} + +void FreeEnergyAnalyzer::SetParams(){ + +} + +void FreeEnergyAnalyzer::Basic(ScaLBL_FreeLeeModel &LeeModel, int timestep){ + + int i,j,k; + + if (Dm->rank()==0){ + fprintf(TIMELOG,"%i ",timestep); + /*for (int ion=0; ion input_db, int timestep){ + + auto vis_db = input_db->getDatabase( "Visualization" ); + char VisName[40]; + + std::vector visData; + fillHalo fillData(Dm->Comm,Dm->rank_info,{Dm->Nx-2,Dm->Ny-2,Dm->Nz-2},{1,1,1},0,1); + + IO::initialize("","silo","false"); + // Create the MeshDataStruct + visData.resize(1); + + visData[0].meshName = "domain"; + visData[0].mesh = std::make_shared( Dm->rank_info,Dm->Nx-2,Dm->Ny-2,Dm->Nz-2,Dm->Lx,Dm->Ly,Dm->Lz ); + auto VisPhase = std::make_shared(); + auto VisPressure = std::make_shared(); + auto VisChemicalPotential = std::make_shared(); + auto VxVar = std::make_shared(); + auto VyVar = std::make_shared(); + auto VzVar = std::make_shared(); + + + if (vis_db->getWithDefault( "save_phase_field", true )){ + VisPhase->name = "Phase"; + VisPhase->type = IO::VariableType::VolumeVariable; + VisPhase->dim = 1; + VisPhase->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VisPhase); + } + + if (vis_db->getWithDefault( "save_potential", true )){ + + VisPressure->name = "Pressure"; + VisPressure->type = IO::VariableType::VolumeVariable; + VisPressure->dim = 1; + VisPressure->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VisPressure); + + VisChemicalPotential->name = "ChemicalPotential"; + VisChemicalPotential->type = IO::VariableType::VolumeVariable; + VisChemicalPotential->dim = 1; + VisChemicalPotential->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VisChemicalPotential); + } + + if (vis_db->getWithDefault( "save_velocity", false )){ + VxVar->name = "Velocity_x"; + VxVar->type = IO::VariableType::VolumeVariable; + VxVar->dim = 1; + VxVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VxVar); + VyVar->name = "Velocity_y"; + VyVar->type = IO::VariableType::VolumeVariable; + VyVar->dim = 1; + VyVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VyVar); + VzVar->name = "Velocity_z"; + VzVar->type = IO::VariableType::VolumeVariable; + VzVar->dim = 1; + VzVar->data.resize(Dm->Nx-2,Dm->Ny-2,Dm->Nz-2); + visData[0].vars.push_back(VzVar); + } + + if (vis_db->getWithDefault( "save_phase", true )){ + ASSERT(visData[0].vars[0]->name=="Phase"); + LeeModel.getPhase(Phi); + Array& PhaseData = visData[0].vars[0]->data; + fillData.copy(Phi,PhaseData); + } + + if (vis_db->getWithDefault( "save_potential", true )){ + ASSERT(visData[0].vars[1]->name=="Pressure"); + LeeModel.getPotential(Pressure, ChemicalPotential); + Array& PressureData = visData[0].vars[1]->data; + fillData.copy(Pressure,PressureData); + + ASSERT(visData[0].vars[2]->name=="ChemicalPotential"); + Array& ChemicalPotentialData = visData[0].vars[2]->data; + fillData.copy(ChemicalPotential,ChemicalPotentialData); + } + + if (vis_db->getWithDefault( "save_velocity", false )){ + ASSERT(visData[0].vars[3]->name=="Velocity_x"); + ASSERT(visData[0].vars[4]->name=="Velocity_y"); + ASSERT(visData[0].vars[5]->name=="Velocity_z"); + LeeModel.getVelocity(Vel_x,Vel_y,Vel_z); + Array& VelxData = visData[0].vars[3]->data; + Array& VelyData = visData[0].vars[4]->data; + Array& VelzData = visData[0].vars[5]->data; + fillData.copy(Vel_x,VelxData); + fillData.copy(Vel_y,VelyData); + fillData.copy(Vel_z,VelzData); + } + + if (vis_db->getWithDefault( "write_silo", true )) + IO::writeData( timestep, visData, Dm->Comm ); + +/* if (vis_db->getWithDefault( "save_8bit_raw", true )){ + char CurrentIDFilename[40]; + sprintf(CurrentIDFilename,"id_t%d.raw",timestep); + Averages.AggregateLabels(CurrentIDFilename); + } +*/ +} diff --git a/analysis/FreeEnergy.h b/analysis/FreeEnergy.h new file mode 100644 index 00000000..fbb1ba31 --- /dev/null +++ b/analysis/FreeEnergy.h @@ -0,0 +1,54 @@ +/* + * averaging tools for electrochemistry + */ + +#ifndef FreeEnergyAnalyzer_INC +#define FreeEnergyAnalyzer_INC + +#include +#include "common/Domain.h" +#include "common/Utilities.h" +#include "common/MPI.h" +#include "common/Communication.h" +#include "analysis/analysis.h" +#include "analysis/distance.h" +#include "analysis/Minkowski.h" +#include "analysis/SubPhase.h" +#include "IO/MeshDatabase.h" +#include "IO/Reader.h" +#include "IO/Writer.h" +#include "models/FreeLeeModel.h" + +class FreeEnergyAnalyzer{ +public: + std::shared_ptr Dm; + double Volume; + // input variables + double rho_n, rho_w; + double nu_n, nu_w; + double gamma_wn, beta; + double Fx, Fy, Fz; + + //........................................................................... + int Nx,Ny,Nz; + DoubleArray Rho; + DoubleArray Phi; + DoubleArray ChemicalPotential; + DoubleArray Pressure; + DoubleArray Vel_x; + DoubleArray Vel_y; + DoubleArray Vel_z; + DoubleArray SDs; + + FreeEnergyAnalyzer(std::shared_ptr Dm); + ~FreeEnergyAnalyzer(); + + void SetParams(); + void Basic( ScaLBL_FreeLeeModel &LeeModel, int timestep); + void WriteVis( ScaLBL_FreeLeeModel &LeeModel, std::shared_ptr input_db, int timestep); + +private: + FILE *TIMELOG; +}; +#endif + diff --git a/analysis/SubPhase.cpp b/analysis/SubPhase.cpp index 59778177..d0657391 100644 --- a/analysis/SubPhase.cpp +++ b/analysis/SubPhase.cpp @@ -40,7 +40,7 @@ SubPhase::SubPhase(std::shared_ptr dm): { // If timelog is empty, write a short header to list the averages //fprintf(SUBPHASE,"--------------------------------------------------------------------------------------\n"); - fprintf(SUBPHASE,"time rn rw nun nuw Fx Fy Fz iftwn "); + fprintf(SUBPHASE,"time rn rw nun nuw Fx Fy Fz iftwn wet "); fprintf(SUBPHASE,"pwc pwd pnc pnd "); // pressures fprintf(SUBPHASE,"Mwc Mwd Mwi Mnc Mnd Mni "); // mass fprintf(SUBPHASE,"Pwc_x Pwd_x Pwi_x Pnc_x Pnd_x Pni_x "); // momentum @@ -50,7 +50,7 @@ SubPhase::SubPhase(std::shared_ptr dm): fprintf(SUBPHASE,"Vwc Awc Hwc Xwc "); // wc region fprintf(SUBPHASE,"Vwd Awd Hwd Xwd Nwd "); // wd region fprintf(SUBPHASE,"Vnc Anc Hnc Xnc "); // nc region - fprintf(SUBPHASE,"Vnd And Hnd Xnd Nnd "); // nd region + fprintf(SUBPHASE,"Vnd And Hnd Xnd Nnd "); // nd regionin fprintf(SUBPHASE,"Vi Ai Hi Xi "); // interface region fprintf(SUBPHASE,"Vic Aic Hic Xic Nic\n"); // interface region @@ -65,7 +65,7 @@ SubPhase::SubPhase(std::shared_ptr dm): sprintf(LocalRankFilename,"%s%s","subphase.csv.",LocalRankString); SUBPHASE = fopen(LocalRankFilename,"a+"); //fprintf(SUBPHASE,"--------------------------------------------------------------------------------------\n"); - fprintf(SUBPHASE,"time rn rw nun nuw Fx Fy Fz iftwn "); + fprintf(SUBPHASE,"time rn rw nun nuw Fx Fy Fz iftwn wet "); fprintf(SUBPHASE,"pwc pwd pnc pnd "); // pressures fprintf(SUBPHASE,"Mwc Mwd Mwi Mnc Mnd Mni "); // mass fprintf(SUBPHASE,"Pwc_x Pwd_x Pwi_x Pnc_x Pnd_x Pni_x "); // momentum @@ -93,7 +93,7 @@ SubPhase::SubPhase(std::shared_ptr dm): { // If timelog is empty, write a short header to list the averages //fprintf(TIMELOG,"--------------------------------------------------------------------------------------\n"); - fprintf(TIMELOG,"sw krw krn vw vn pw pn\n"); + fprintf(TIMELOG,"sw krw krn vw vn pw pn wet\n"); } } } @@ -109,7 +109,7 @@ SubPhase::~SubPhase() void SubPhase::Write(int timestep) { if (Dm->rank()==0){ - fprintf(SUBPHASE,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g ",timestep,rho_n,rho_w,nu_n,nu_w,Fx,Fy,Fz,gamma_wn); + fprintf(SUBPHASE,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g ",timestep,rho_n,rho_w,nu_n,nu_w,Fx,Fy,Fz,gamma_wn,total_wetting_interaction_global); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g ",gwc.p, gwd.p, gnc.p, gnd.p); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g %.8g %.8g ",gwc.M, gwd.M, giwn.Mw, gnc.M, gnd.M, giwn.Mn); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g %.8g %.8g ",gwc.Px, gwd.Px, giwn.Pwx, gnc.Px, gnd.Px, giwn.Pnx); @@ -125,7 +125,7 @@ void SubPhase::Write(int timestep) fflush(SUBPHASE); } else{ - fprintf(SUBPHASE,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g ",timestep,rho_n,rho_w,nu_n,nu_w,Fx,Fy,Fz,gamma_wn); + fprintf(SUBPHASE,"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g ",timestep,rho_n,rho_w,nu_n,nu_w,Fx,Fy,Fz,gamma_wn,total_wetting_interaction); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g ",wc.p, wd.p, nc.p, nd.p); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g %.8g %.8g ",wc.M, wd.M, iwn.Mw, nc.M, nd.M, iwn.Mn); fprintf(SUBPHASE,"%.8g %.8g %.8g %.8g %.8g %.8g ",wc.Px, wd.Px, iwn.Pwx, nc.Px, nd.Px, iwn.Pnx); @@ -172,6 +172,21 @@ void SubPhase::Basic(){ double count_w = 0.0; double count_n = 0.0; + /* compute the laplacian */ + Dm->CommunicateMeshHalo(Phi); + for (int k=1; kCommunicateMeshHalo(DelPhi); + for (k=0; kid[n] > 0 && SDs(i,j,k) < 2.0 ){ + count_wetting_interaction += 1.0; + total_wetting_interaction += DelPhi(i,j,k); + } + } + } + } + //printf("wetting interaction = %f, count = %f\n",total_wetting_interaction,count_wetting_interaction); + total_wetting_interaction_global=Dm->Comm.sumReduce( total_wetting_interaction); + count_wetting_interaction_global=Dm->Comm.sumReduce( count_wetting_interaction); + /* normalize wetting interactions <-- Don't do this if normalizing laplacian (use solid surface area) + if (count_wetting_interaction > 0.0) + total_wetting_interaction /= count_wetting_interaction; + if (count_wetting_interaction_global > 0.0) + total_wetting_interaction_global /= count_wetting_interaction_global; + */ + gwb.V=Dm->Comm.sumReduce( wb.V); gnb.V=Dm->Comm.sumReduce( nb.V); gwb.M=Dm->Comm.sumReduce( wb.M); @@ -303,7 +348,7 @@ void SubPhase::Basic(){ double krn = h*h*nu_n*not_water_flow_rate / force_mag ; double krw = h*h*nu_w*water_flow_rate / force_mag; //printf(" water saturation = %f, fractional flow =%f \n",saturation,fractional_flow); - fprintf(TIMELOG,"%.5g %.5g %.5g %.5g %.5g %.5g %.5g\n",saturation,krw,krn,h*water_flow_rate,h*not_water_flow_rate, gwb.p, gnb.p); + fprintf(TIMELOG,"%.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",saturation,krw,krn,h*water_flow_rate,h*not_water_flow_rate, gwb.p, gnb.p, total_wetting_interaction_global); fflush(TIMELOG); } if (err==true){ diff --git a/analysis/SubPhase.h b/analysis/SubPhase.h index 691c654f..a6d35edd 100644 --- a/analysis/SubPhase.h +++ b/analysis/SubPhase.h @@ -68,12 +68,16 @@ public: * b - bulk (total) */ // local entities - phase wc,wd,wb,nc,nd,nb; + phase wc,wd,wb,nc,nd,nb,solid; interface iwn,iwnc; // global entities - phase gwc,gwd,gwb,gnc,gnd,gnb; + phase gwc,gwd,gwb,gnc,gnd,gnb,gsolid; interface giwn,giwnc; + /* fluid-solid wetting interaction */ + double total_wetting_interaction, count_wetting_interaction; + double total_wetting_interaction_global, count_wetting_interaction_global; + //........................................................................... int Nx,Ny,Nz; IntArray PhaseID; // Phase ID array (solid=0, non-wetting=1, wetting=2) diff --git a/analysis/morphology.cpp b/analysis/morphology.cpp index ad231f3f..f21767dd 100644 --- a/analysis/morphology.cpp +++ b/analysis/morphology.cpp @@ -542,7 +542,6 @@ double MorphDrain(DoubleArray &SignDist, signed char *id, std::shared_ptrrank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); Dm->Comm.barrier(); @@ -703,12 +702,14 @@ double MorphGrow(DoubleArray &BoundaryDist, DoubleArray &Dist, Array &id, if (rank == 0) printf(" delta=%f, growth=%f, max. displacement = %f \n",morph_delta, GrowthEstimate, MAX_DISPLACEMENT); // Now adjust morph_delta - double step_size = (TargetGrowth - GrowthEstimate)*(morph_delta - morph_delta_previous) / (GrowthEstimate - GrowthPrevious); - GrowthPrevious = GrowthEstimate; - morph_delta_previous = morph_delta; - morph_delta += step_size; + if (fabs(GrowthEstimate - GrowthPrevious) > 0.0) { + double step_size = (TargetGrowth - GrowthEstimate)*(morph_delta - morph_delta_previous) / (GrowthEstimate - GrowthPrevious); + GrowthPrevious = GrowthEstimate; + morph_delta_previous = morph_delta; + morph_delta += step_size; + } if (morph_delta / morph_delta_previous > 2.0 ) morph_delta = morph_delta_previous*2.0; - + //MAX_DISPLACEMENT *= max(TargetGrowth/GrowthEstimate,1.25); if (morph_delta > 0.0 ){ diff --git a/analysis/runAnalysis.cpp b/analysis/runAnalysis.cpp index c09b71c2..ab40ae4c 100644 --- a/analysis/runAnalysis.cpp +++ b/analysis/runAnalysis.cpp @@ -13,140 +13,170 @@ #include "ProfilerApp.h" -AnalysisType& operator |=(AnalysisType &lhs, AnalysisType rhs) +AnalysisType &operator|=( AnalysisType &lhs, AnalysisType rhs ) { - lhs = static_cast( - static_cast::type>(lhs) | - static_cast::type>(rhs) - ); + lhs = static_cast( static_cast::type>( lhs ) | + static_cast::type>( rhs ) ); return lhs; } bool matches( AnalysisType x, AnalysisType y ) { - return ( static_cast::type>(x) & - static_cast::type>(y) ) != 0; + return ( static_cast::type>( x ) & + static_cast::type>( y ) ) != 0; } +// Create a shared_ptr to an array of values template -void DeleteArray( const TYPE *p ) +static inline std::shared_ptr make_shared_array( size_t N ) { - delete [] p; + return std::shared_ptr( new TYPE[N], []( const TYPE *p ) { delete[] p; } ); } // Helper class to write the restart file from a seperate thread -class WriteRestartWorkItem: public ThreadPool::WorkItemRet +class WriteRestartWorkItem : public ThreadPool::WorkItemRet { public: - WriteRestartWorkItem( const char* filename_, std::shared_ptr cDen_, std::shared_ptr cfq_, int N_ ): - filename(filename_), cfq(cfq_), cDen(cDen_), N(N_) {} - virtual void run() { - PROFILE_START("Save Checkpoint",1); + WriteRestartWorkItem( const std::string &filename_, std::shared_ptr cDen_, + std::shared_ptr cfq_, int N_ ) + : filename( filename_ ), cfq( cfq_ ), cDen( cDen_ ), N( N_ ) + { + } + virtual void run() + { + PROFILE_START( "Save Checkpoint", 1 ); double value; - ofstream File(filename,ios::binary); - for (int n=0; n cfq,cDen; - // const DoubleArray& phase; - //const DoubleArray& dist; + const std::string filename; + std::shared_ptr cfq, cDen; const int N; }; // Helper class to compute the blob ids +typedef std::shared_ptr> BlobIDstruct; +typedef std::shared_ptr> BlobIDList; static const std::string id_map_filename = "lbpm_id_map.txt"; -class BlobIdentificationWorkItem1: public ThreadPool::WorkItemRet +class BlobIdentificationWorkItem1 : public ThreadPool::WorkItemRet { public: - BlobIdentificationWorkItem1( int timestep_, int Nx_, int Ny_, int Nz_, const RankInfoStruct& rank_info_, - std::shared_ptr phase_, const DoubleArray& dist_, - BlobIDstruct last_id_, BlobIDstruct new_index_, BlobIDstruct new_id_, BlobIDList new_list_, runAnalysis::commWrapper&& comm_ ): - timestep(timestep_), Nx(Nx_), Ny(Ny_), Nz(Nz_), rank_info(rank_info_), - phase(phase_), dist(dist_), last_id(last_id_), new_index(new_index_), new_id(new_id_), new_list(new_list_), comm(std::move(comm_)) -{ -} - ~BlobIdentificationWorkItem1() { } - virtual void run() { - // Compute the global blob id and compare to the previous version - PROFILE_START("Identify blobs",1); - double vF = 0.0; - double vS = -1.0; // one voxel buffer region around solid - IntArray& ids = new_index->second; - new_index->first = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,*phase,dist,vF,vS,ids,comm.comm); - PROFILE_STOP("Identify blobs",1); + BlobIdentificationWorkItem1( int timestep_, int Nx_, int Ny_, int Nz_, + const RankInfoStruct &rank_info_, std::shared_ptr phase_, + const DoubleArray &dist_, BlobIDstruct last_id_, BlobIDstruct new_index_, + BlobIDstruct new_id_, BlobIDList new_list_, runAnalysis::commWrapper &&comm_ ) + : timestep( timestep_ ), + Nx( Nx_ ), + Ny( Ny_ ), + Nz( Nz_ ), + rank_info( rank_info_ ), + phase( phase_ ), + dist( dist_ ), + last_id( last_id_ ), + new_index( new_index_ ), + new_id( new_id_ ), + new_list( new_list_ ), + comm( std::move( comm_ ) ) + { } + ~BlobIdentificationWorkItem1() {} + virtual void run() + { + // Compute the global blob id and compare to the previous version + PROFILE_START( "Identify blobs", 1 ); + double vF = 0.0; + double vS = -1.0; // one voxel buffer region around solid + IntArray &ids = new_index->second; + new_index->first = ComputeGlobalBlobIDs( + Nx - 2, Ny - 2, Nz - 2, rank_info, *phase, dist, vF, vS, ids, comm.comm ); + PROFILE_STOP( "Identify blobs", 1 ); + } + private: BlobIdentificationWorkItem1(); int timestep; int Nx, Ny, Nz; - const RankInfoStruct& rank_info; + const RankInfoStruct rank_info; std::shared_ptr phase; - const DoubleArray& dist; + const DoubleArray &dist; BlobIDstruct last_id, new_index, new_id; BlobIDList new_list; runAnalysis::commWrapper comm; }; -class BlobIdentificationWorkItem2: public ThreadPool::WorkItemRet +class BlobIdentificationWorkItem2 : public ThreadPool::WorkItemRet { public: - BlobIdentificationWorkItem2( int timestep_, int Nx_, int Ny_, int Nz_, const RankInfoStruct& rank_info_, - std::shared_ptr phase_, const DoubleArray& dist_, - BlobIDstruct last_id_, BlobIDstruct new_index_, BlobIDstruct new_id_, BlobIDList new_list_ , runAnalysis::commWrapper&& comm_ ): - timestep(timestep_), Nx(Nx_), Ny(Ny_), Nz(Nz_), rank_info(rank_info_), - phase(phase_), dist(dist_), last_id(last_id_), new_index(new_index_), new_id(new_id_), new_list(new_list_), comm(std::move(comm_)) -{ -} - ~BlobIdentificationWorkItem2() { } - virtual void run() { + BlobIdentificationWorkItem2( int timestep_, int Nx_, int Ny_, int Nz_, + const RankInfoStruct &rank_info_, std::shared_ptr phase_, + const DoubleArray &dist_, BlobIDstruct last_id_, BlobIDstruct new_index_, + BlobIDstruct new_id_, BlobIDList new_list_, runAnalysis::commWrapper &&comm_ ) + : timestep( timestep_ ), + Nx( Nx_ ), + Ny( Ny_ ), + Nz( Nz_ ), + rank_info( rank_info_ ), + phase( phase_ ), + dist( dist_ ), + last_id( last_id_ ), + new_index( new_index_ ), + new_id( new_id_ ), + new_list( new_list_ ), + comm( std::move( comm_ ) ) + { + } + ~BlobIdentificationWorkItem2() {} + virtual void run() + { // Compute the global blob id and compare to the previous version - PROFILE_START("Identify blobs maps",1); - const IntArray& ids = new_index->second; - static int max_id = -1; - new_id->first = new_index->first; - new_id->second = new_index->second; - if ( last_id.get()!=NULL ) { + PROFILE_START( "Identify blobs maps", 1 ); + const IntArray &ids = new_index->second; + static int max_id = -1; + new_id->first = new_index->first; + new_id->second = new_index->second; + if ( last_id.get() != NULL ) { // Compute the timestep-timestep map - const IntArray& old_ids = last_id->second; - ID_map_struct map = computeIDMap(Nx,Ny,Nz,old_ids,ids,comm.comm); + const IntArray &old_ids = last_id->second; + ID_map_struct map = computeIDMap( Nx, Ny, Nz, old_ids, ids, comm.comm ); // Renumber the current timestep's ids - getNewIDs(map,max_id,*new_list); - renumberIDs(*new_list,new_id->second); - writeIDMap(map,timestep,id_map_filename); + getNewIDs( map, max_id, *new_list ); + renumberIDs( *new_list, new_id->second ); + writeIDMap( map, timestep, id_map_filename ); } else { max_id = -1; - ID_map_struct map(new_id->first); - getNewIDs(map,max_id,*new_list); - writeIDMap(map,timestep,id_map_filename); + ID_map_struct map( new_id->first ); + getNewIDs( map, max_id, *new_list ); + writeIDMap( map, timestep, id_map_filename ); } - PROFILE_STOP("Identify blobs maps",1); + PROFILE_STOP( "Identify blobs maps", 1 ); } + private: BlobIdentificationWorkItem2(); int timestep; int Nx, Ny, Nz; - const RankInfoStruct& rank_info; + const RankInfoStruct rank_info; std::shared_ptr phase; - const DoubleArray& dist; + const DoubleArray &dist; BlobIDstruct last_id, new_index, new_id; BlobIDList new_list; runAnalysis::commWrapper comm; @@ -154,324 +184,375 @@ private: // Helper class to write the vis file from a thread -class WriteVisWorkItem: public ThreadPool::WorkItemRet +class WriteVisWorkItem : public ThreadPool::WorkItemRet { public: - WriteVisWorkItem( int timestep_, std::vector& visData_, - TwoPhase& Avgerages_, fillHalo& fillData_, runAnalysis::commWrapper&& comm_ ): - timestep(timestep_), visData(visData_), Averages(Avgerages_), fillData(fillData_), comm(std::move(comm_)) - { - } - ~WriteVisWorkItem() { } - virtual void run() { - PROFILE_START("Save Vis",1); - - ASSERT(visData[0].vars[0]->name=="phase"); - Array& PhaseData = visData[0].vars[0]->data; - fillData.copy(Averages.SDn,PhaseData); + WriteVisWorkItem( int timestep_, std::vector &visData_, + TwoPhase &Avgerages_, std::array n_, RankInfoStruct rank_info_, + runAnalysis::commWrapper &&comm_ ) + : timestep( timestep_ ), + visData( visData_ ), + Averages( Avgerages_ ), + n( std::move( n_ ) ), + rank_info( std::move( rank_info_ ) ), + comm( std::move( comm_ ) ) + { + } + ~WriteVisWorkItem() {} + virtual void run() + { + PROFILE_START( "Save Vis", 1 ); - ASSERT(visData[0].vars[5]->name=="SignDist"); - Array& SignData = visData[0].vars[5]->data; - fillData.copy(Averages.SDs,SignData); + fillHalo fillData( comm.comm, rank_info, n, { 1, 1, 1 }, 0, 1 ); - ASSERT(visData[0].vars[1]->name=="Pressure"); - Array& PressData = visData[0].vars[1]->data; - fillData.copy(Averages.Press,PressData); + ASSERT( visData[0].vars[0]->name == "phase" ); + Array &PhaseData = visData[0].vars[0]->data; + fillData.copy( Averages.SDn, PhaseData ); - ASSERT(visData[0].vars[2]->name=="Velocity_x"); - ASSERT(visData[0].vars[3]->name=="Velocity_y"); - ASSERT(visData[0].vars[4]->name=="Velocity_z"); - Array& VelxData = visData[0].vars[2]->data; - Array& VelyData = visData[0].vars[3]->data; - Array& VelzData = visData[0].vars[4]->data; - fillData.copy(Averages.Vel_x,VelxData); - fillData.copy(Averages.Vel_y,VelyData); - fillData.copy(Averages.Vel_z,VelzData); - - ASSERT(visData[0].vars[6]->name=="BlobID"); - Array& BlobData = visData[0].vars[6]->data; - fillData.copy(Averages.Label_NWP,BlobData); + ASSERT( visData[0].vars[5]->name == "SignDist" ); + Array &SignData = visData[0].vars[5]->data; + fillData.copy( Averages.SDs, SignData ); + + ASSERT( visData[0].vars[1]->name == "Pressure" ); + Array &PressData = visData[0].vars[1]->data; + fillData.copy( Averages.Press, PressData ); + + ASSERT( visData[0].vars[2]->name == "Velocity_x" ); + ASSERT( visData[0].vars[3]->name == "Velocity_y" ); + ASSERT( visData[0].vars[4]->name == "Velocity_z" ); + Array &VelxData = visData[0].vars[2]->data; + Array &VelyData = visData[0].vars[3]->data; + Array &VelzData = visData[0].vars[4]->data; + fillData.copy( Averages.Vel_x, VelxData ); + fillData.copy( Averages.Vel_y, VelyData ); + fillData.copy( Averages.Vel_z, VelzData ); + + ASSERT( visData[0].vars[6]->name == "BlobID" ); + Array &BlobData = visData[0].vars[6]->data; + fillData.copy( Averages.Label_NWP, BlobData ); IO::writeData( timestep, visData, comm.comm ); - - PROFILE_STOP("Save Vis",1); + + PROFILE_STOP( "Save Vis", 1 ); }; + private: WriteVisWorkItem(); int timestep; - std::vector& visData; - TwoPhase& Averages; - fillHalo& fillData; + std::array n; + RankInfoStruct rank_info; + std::vector &visData; + TwoPhase &Averages; runAnalysis::commWrapper comm; }; // Helper class to write the vis file from a thread -class IOWorkItem: public ThreadPool::WorkItemRet +class IOWorkItem : public ThreadPool::WorkItemRet { public: - IOWorkItem(int timestep_, std::shared_ptr input_db_, std::vector& visData_, - SubPhase& Averages_, fillHalo& fillData_, runAnalysis::commWrapper&& comm_ ): - timestep(timestep_), input_db(input_db_), visData(visData_), Averages(Averages_), fillData(fillData_), comm(std::move(comm_)) - { - } - ~IOWorkItem() { } - virtual void run() { - auto color_db = input_db->getDatabase( "Color" ); - auto vis_db = input_db->getDatabase( "Visualization" ); - // int timestep = color_db->getWithDefault( "timestep", 0 ); + IOWorkItem( int timestep_, std::shared_ptr input_db_, + std::vector &visData_, SubPhase &Averages_, std::array n_, + RankInfoStruct rank_info_, runAnalysis::commWrapper &&comm_ ) + : timestep( timestep_ ), + input_db( input_db_ ), + visData( visData_ ), + Averages( Averages_ ), + n( std::move( n_ ) ), + rank_info( std::move( rank_info_ ) ), + comm( std::move( comm_ ) ) + { + } + ~IOWorkItem() {} + virtual void run() + { + PROFILE_START( "Save Vis", 1 ); - PROFILE_START("Save Vis",1); + auto color_db = input_db->getDatabase( "Color" ); + auto vis_db = input_db->getDatabase( "Visualization" ); + // int timestep = color_db->getWithDefault( "timestep", 0 ); - if (vis_db->getWithDefault( "save_phase_field", true )){ - ASSERT(visData[0].vars[0]->name=="phase"); - Array& PhaseData = visData[0].vars[0]->data; - fillData.copy(Averages.Phi,PhaseData); + fillHalo fillData( comm.comm, rank_info, n, { 1, 1, 1 }, 0, 1 ); + + if ( vis_db->getWithDefault( "save_phase_field", true ) ) { + ASSERT( visData[0].vars[0]->name == "phase" ); + Array &PhaseData = visData[0].vars[0]->data; + fillData.copy( Averages.Phi, PhaseData ); } - if (vis_db->getWithDefault( "save_pressure", false )){ - ASSERT(visData[0].vars[1]->name=="Pressure"); - Array& PressData = visData[0].vars[1]->data; - fillData.copy(Averages.Pressure,PressData); + if ( vis_db->getWithDefault( "save_pressure", false ) ) { + ASSERT( visData[0].vars[1]->name == "Pressure" ); + Array &PressData = visData[0].vars[1]->data; + fillData.copy( Averages.Pressure, PressData ); } - if (vis_db->getWithDefault( "save_velocity", false )){ - ASSERT(visData[0].vars[2]->name=="Velocity_x"); - ASSERT(visData[0].vars[3]->name=="Velocity_y"); - ASSERT(visData[0].vars[4]->name=="Velocity_z"); - Array& VelxData = visData[0].vars[2]->data; - Array& VelyData = visData[0].vars[3]->data; - Array& VelzData = visData[0].vars[4]->data; - fillData.copy(Averages.Vel_x,VelxData); - fillData.copy(Averages.Vel_y,VelyData); - fillData.copy(Averages.Vel_z,VelzData); + if ( vis_db->getWithDefault( "save_velocity", false ) ) { + ASSERT( visData[0].vars[2]->name == "Velocity_x" ); + ASSERT( visData[0].vars[3]->name == "Velocity_y" ); + ASSERT( visData[0].vars[4]->name == "Velocity_z" ); + Array &VelxData = visData[0].vars[2]->data; + Array &VelyData = visData[0].vars[3]->data; + Array &VelzData = visData[0].vars[4]->data; + fillData.copy( Averages.Vel_x, VelxData ); + fillData.copy( Averages.Vel_y, VelyData ); + fillData.copy( Averages.Vel_z, VelzData ); } - if (vis_db->getWithDefault( "save_distance", false )){ - ASSERT(visData[0].vars[5]->name=="SignDist"); - Array& SignData = visData[0].vars[5]->data; - fillData.copy(Averages.SDs,SignData); + if ( vis_db->getWithDefault( "save_distance", false ) ) { + ASSERT( visData[0].vars[5]->name == "SignDist" ); + Array &SignData = visData[0].vars[5]->data; + fillData.copy( Averages.SDs, SignData ); } - if (vis_db->getWithDefault( "save_connected_components", false )){ - ASSERT(visData[0].vars[6]->name=="BlobID"); - Array& BlobData = visData[0].vars[6]->data; - fillData.copy(Averages.morph_n->label,BlobData); - } - - if (vis_db->getWithDefault( "write_silo", true )) - IO::writeData( timestep, visData, comm.comm ); - - if (vis_db->getWithDefault( "save_8bit_raw", true )){ - char CurrentIDFilename[40]; - sprintf(CurrentIDFilename,"id_t%d.raw",timestep); - Averages.AggregateLabels(CurrentIDFilename); + if ( vis_db->getWithDefault( "save_connected_components", false ) ) { + ASSERT( visData[0].vars[6]->name == "BlobID" ); + Array &BlobData = visData[0].vars[6]->data; + fillData.copy( Averages.morph_n->label, BlobData ); } - PROFILE_STOP("Save Vis",1); + if ( vis_db->getWithDefault( "write_silo", true ) ) + IO::writeData( timestep, visData, comm.comm ); + + if ( vis_db->getWithDefault( "save_8bit_raw", true ) ) { + char CurrentIDFilename[40]; + sprintf( CurrentIDFilename, "id_t%d.raw", timestep ); + Averages.AggregateLabels( CurrentIDFilename ); + } + + PROFILE_STOP( "Save Vis", 1 ); }; + private: IOWorkItem(); int timestep; + std::array n; + RankInfoStruct rank_info; std::shared_ptr input_db; - std::vector& visData; - SubPhase& Averages; - fillHalo& fillData; + std::vector &visData; + SubPhase &Averages; runAnalysis::commWrapper comm; }; // Helper class to run the analysis from within a thread // Note: Averages will be modified after the constructor is called -class AnalysisWorkItem: public ThreadPool::WorkItemRet +class AnalysisWorkItem : public ThreadPool::WorkItemRet { public: - AnalysisWorkItem( AnalysisType type_, int timestep_, TwoPhase& Averages_, - BlobIDstruct ids, BlobIDList id_list_, double beta_ ): - type(type_), timestep(timestep_), Averages(Averages_), - blob_ids(ids), id_list(id_list_), beta(beta_) { } - ~AnalysisWorkItem() { } - virtual void run() { + AnalysisWorkItem( AnalysisType type_, int timestep_, TwoPhase &Averages_, BlobIDstruct ids, + BlobIDList id_list_, double beta_ ) + : type( type_ ), + timestep( timestep_ ), + Averages( Averages_ ), + blob_ids( ids ), + id_list( id_list_ ), + beta( beta_ ) + { + } + ~AnalysisWorkItem() {} + virtual void run() + { Averages.NumberComponents_NWP = blob_ids->first; - Averages.Label_NWP = blob_ids->second; - Averages.Label_NWP_map = *id_list; - Averages.NumberComponents_WP = 1; - Averages.Label_WP.fill(0.0); - if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { + Averages.Label_NWP = blob_ids->second; + Averages.Label_NWP_map = *id_list; + Averages.NumberComponents_WP = 1; + Averages.Label_WP.fill( 0.0 ); + if ( matches( type, AnalysisType::CopyPhaseIndicator ) ) { // Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus); } - if ( matches(type,AnalysisType::ComputeAverages) ) { - PROFILE_START("Compute dist",1); + if ( matches( type, AnalysisType::ComputeAverages ) ) { + PROFILE_START( "Compute dist", 1 ); Averages.Initialize(); Averages.ComputeDelPhi(); - Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.SDn); - Averages.ColorToSignedDistance(beta,Averages.Phase_tminus,Averages.Phase_tminus); - Averages.ColorToSignedDistance(beta,Averages.Phase_tplus,Averages.Phase_tplus); + Averages.ColorToSignedDistance( beta, Averages.Phase, Averages.SDn ); + Averages.ColorToSignedDistance( beta, Averages.Phase_tminus, Averages.Phase_tminus ); + Averages.ColorToSignedDistance( beta, Averages.Phase_tplus, Averages.Phase_tplus ); Averages.UpdateMeshValues(); Averages.ComputeLocal(); Averages.Reduce(); - Averages.PrintAll(timestep); + Averages.PrintAll( timestep ); Averages.Initialize(); Averages.ComponentAverages(); Averages.SortBlobs(); - Averages.PrintComponents(timestep); - PROFILE_STOP("Compute dist",1); + Averages.PrintComponents( timestep ); + PROFILE_STOP( "Compute dist", 1 ); } } + private: AnalysisWorkItem(); AnalysisType type; int timestep; - TwoPhase& Averages; + TwoPhase &Averages; BlobIDstruct blob_ids; BlobIDList id_list; double beta; }; -class TCATWorkItem: public ThreadPool::WorkItemRet +class TCATWorkItem : public ThreadPool::WorkItemRet { public: - TCATWorkItem( AnalysisType type_, int timestep_, TwoPhase& Averages_, - BlobIDstruct ids, BlobIDList id_list_, double beta_ ): - type(type_), timestep(timestep_), Averages(Averages_), - blob_ids(ids), id_list(id_list_), beta(beta_) { } - ~TCATWorkItem() { } - virtual void run() { + TCATWorkItem( AnalysisType type_, int timestep_, TwoPhase &Averages_, BlobIDstruct ids, + BlobIDList id_list_, double beta_ ) + : type( type_ ), + timestep( timestep_ ), + Averages( Averages_ ), + blob_ids( ids ), + id_list( id_list_ ), + beta( beta_ ) + { + } + ~TCATWorkItem() {} + virtual void run() + { Averages.NumberComponents_NWP = blob_ids->first; - Averages.Label_NWP = blob_ids->second; - Averages.Label_NWP_map = *id_list; - Averages.NumberComponents_WP = 1; - Averages.Label_WP.fill(0.0); - if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { + Averages.Label_NWP = blob_ids->second; + Averages.Label_NWP_map = *id_list; + Averages.NumberComponents_WP = 1; + Averages.Label_WP.fill( 0.0 ); + if ( matches( type, AnalysisType::CopyPhaseIndicator ) ) { // Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus); } - if ( matches(type,AnalysisType::ComputeAverages) ) { - PROFILE_START("Compute TCAT",1); + if ( matches( type, AnalysisType::ComputeAverages ) ) { + PROFILE_START( "Compute TCAT", 1 ); Averages.Initialize(); Averages.ComputeDelPhi(); - Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.SDn); - Averages.ColorToSignedDistance(beta,Averages.Phase_tminus,Averages.Phase_tminus); - Averages.ColorToSignedDistance(beta,Averages.Phase_tplus,Averages.Phase_tplus); + Averages.ColorToSignedDistance( beta, Averages.Phase, Averages.SDn ); + Averages.ColorToSignedDistance( beta, Averages.Phase_tminus, Averages.Phase_tminus ); + Averages.ColorToSignedDistance( beta, Averages.Phase_tplus, Averages.Phase_tplus ); Averages.UpdateMeshValues(); Averages.ComputeLocal(); Averages.Reduce(); - Averages.PrintAll(timestep); - PROFILE_STOP("Compute TCAT",1); + Averages.PrintAll( timestep ); + PROFILE_STOP( "Compute TCAT", 1 ); } } + private: TCATWorkItem(); AnalysisType type; int timestep; - TwoPhase& Averages; + TwoPhase &Averages; BlobIDstruct blob_ids; BlobIDList id_list; double beta; }; -class GanglionTrackingWorkItem: public ThreadPool::WorkItemRet +class GanglionTrackingWorkItem : public ThreadPool::WorkItemRet { public: - GanglionTrackingWorkItem( AnalysisType type_, int timestep_, TwoPhase& Averages_, - BlobIDstruct ids, BlobIDList id_list_, double beta_ ): - type(type_), timestep(timestep_), Averages(Averages_), - blob_ids(ids), id_list(id_list_), beta(beta_) { } - ~GanglionTrackingWorkItem() { } - virtual void run() { + GanglionTrackingWorkItem( AnalysisType type_, int timestep_, TwoPhase &Averages_, + BlobIDstruct ids, BlobIDList id_list_, double beta_ ) + : type( type_ ), + timestep( timestep_ ), + Averages( Averages_ ), + blob_ids( ids ), + id_list( id_list_ ), + beta( beta_ ) + { + } + ~GanglionTrackingWorkItem() {} + virtual void run() + { Averages.NumberComponents_NWP = blob_ids->first; - Averages.Label_NWP = blob_ids->second; - Averages.Label_NWP_map = *id_list; - Averages.NumberComponents_WP = 1; - Averages.Label_WP.fill(0.0); - if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { + Averages.Label_NWP = blob_ids->second; + Averages.Label_NWP_map = *id_list; + Averages.NumberComponents_WP = 1; + Averages.Label_WP.fill( 0.0 ); + if ( matches( type, AnalysisType::CopyPhaseIndicator ) ) { // Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus); } - if ( matches(type,AnalysisType::ComputeAverages) ) { - PROFILE_START("Compute ganglion",1); + if ( matches( type, AnalysisType::ComputeAverages ) ) { + PROFILE_START( "Compute ganglion", 1 ); Averages.Initialize(); Averages.ComputeDelPhi(); - Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.SDn); - Averages.ColorToSignedDistance(beta,Averages.Phase_tminus,Averages.Phase_tminus); - Averages.ColorToSignedDistance(beta,Averages.Phase_tplus,Averages.Phase_tplus); + Averages.ColorToSignedDistance( beta, Averages.Phase, Averages.SDn ); + Averages.ColorToSignedDistance( beta, Averages.Phase_tminus, Averages.Phase_tminus ); + Averages.ColorToSignedDistance( beta, Averages.Phase_tplus, Averages.Phase_tplus ); Averages.UpdateMeshValues(); Averages.ComponentAverages(); Averages.SortBlobs(); - Averages.PrintComponents(timestep); - PROFILE_STOP("Compute ganglion",1); + Averages.PrintComponents( timestep ); + PROFILE_STOP( "Compute ganglion", 1 ); } } + private: GanglionTrackingWorkItem(); AnalysisType type; int timestep; - TwoPhase& Averages; + TwoPhase &Averages; BlobIDstruct blob_ids; BlobIDList id_list; double beta; }; -class BasicWorkItem: public ThreadPool::WorkItemRet +class BasicWorkItem : public ThreadPool::WorkItemRet { public: - BasicWorkItem( AnalysisType type_, int timestep_, SubPhase& Averages_ ): - type(type_), timestep(timestep_), Averages(Averages_){ } - ~BasicWorkItem() { } - virtual void run() { + BasicWorkItem( AnalysisType type_, int timestep_, SubPhase &Averages_ ) + : type( type_ ), timestep( timestep_ ), Averages( Averages_ ) + { + } + ~BasicWorkItem() {} + virtual void run() + { - if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { + if ( matches( type, AnalysisType::CopyPhaseIndicator ) ) { // Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus); } - if ( matches(type,AnalysisType::ComputeAverages) ) { - PROFILE_START("Compute basic averages",1); + if ( matches( type, AnalysisType::ComputeAverages ) ) { + PROFILE_START( "Compute basic averages", 1 ); Averages.Basic(); - PROFILE_STOP("Compute basic averages",1); + PROFILE_STOP( "Compute basic averages", 1 ); } } + private: BasicWorkItem(); AnalysisType type; int timestep; - SubPhase& Averages; + SubPhase &Averages; double beta; }; -class SubphaseWorkItem: public ThreadPool::WorkItemRet +class SubphaseWorkItem : public ThreadPool::WorkItemRet { public: - SubphaseWorkItem( AnalysisType type_, int timestep_, SubPhase& Averages_ ): - type(type_), timestep(timestep_), Averages(Averages_){ } - ~SubphaseWorkItem() { } - virtual void run() { - - PROFILE_START("Compute subphase",1); - Averages.Full(); - Averages.Write(timestep); - PROFILE_STOP("Compute subphase",1); + SubphaseWorkItem( AnalysisType type_, int timestep_, SubPhase &Averages_ ) + : type( type_ ), timestep( timestep_ ), Averages( Averages_ ) + { } + ~SubphaseWorkItem() {} + virtual void run() + { + + PROFILE_START( "Compute subphase", 1 ); + Averages.Full(); + Averages.Write( timestep ); + PROFILE_STOP( "Compute subphase", 1 ); + } + private: SubphaseWorkItem(); AnalysisType type; int timestep; - SubPhase& Averages; + SubPhase &Averages; double beta; }; - /****************************************************************** * MPI comm wrapper for use with analysis * ******************************************************************/ -runAnalysis::commWrapper::commWrapper( int tag_, const Utilities::MPI& comm_, runAnalysis* analysis_ ): - comm(comm_), - tag(tag_), - analysis(analysis_) +runAnalysis::commWrapper::commWrapper( + int tag_, const Utilities::MPI &comm_, runAnalysis *analysis_ ) + : comm( comm_ ), tag( tag_ ), analysis( analysis_ ) { } -runAnalysis::commWrapper::commWrapper( commWrapper &&rhs ): - comm(rhs.comm), - tag(rhs.tag), - analysis(rhs.analysis) +runAnalysis::commWrapper::commWrapper( commWrapper &&rhs ) + : comm( rhs.comm ), tag( rhs.tag ), analysis( rhs.analysis ) { rhs.tag = -1; } @@ -482,48 +563,44 @@ runAnalysis::commWrapper::~commWrapper() comm.barrier(); analysis->d_comm_used[tag] = false; } -runAnalysis::commWrapper runAnalysis::getComm( ) +runAnalysis::commWrapper runAnalysis::getComm() { // Get a tag from root int tag = -1; if ( d_rank == 0 ) { - for (int i=0; i<1024; i++) { + for ( int i = 0; i < 1024; i++ ) { if ( !d_comm_used[i] ) { tag = i; break; } } if ( tag == -1 ) - ERROR("Unable to get comm"); + ERROR( "Unable to get comm" ); } - tag = d_comm.bcast( tag, 0 ); + tag = d_comm.bcast( tag, 0 ); d_comm_used[tag] = true; if ( d_comms[tag].isNull() ) d_comms[tag] = d_comm.dup(); - return commWrapper(tag,d_comms[tag],this); + return commWrapper( tag, d_comms[tag], this ); } /****************************************************************** * Constructor/Destructors * ******************************************************************/ -runAnalysis::runAnalysis( std::shared_ptr input_db, - const RankInfoStruct& rank_info, - std::shared_ptr ScaLBL_Comm, - std::shared_ptr Dm, - int Np, - bool Regular, - IntArray Map ): - d_Np( Np ), - d_regular ( Regular), - d_rank_info( rank_info ), - d_Map( Map ), - d_comm( Dm->Comm.dup() ), - d_ScaLBL_Comm( ScaLBL_Comm) +runAnalysis::runAnalysis( std::shared_ptr input_db, const RankInfoStruct &rank_info, + std::shared_ptr ScaLBL_Comm, std::shared_ptr Dm, int Np, + bool Regular, IntArray Map ) + : d_Np( Np ), + d_regular( Regular ), + d_rank_info( rank_info ), + d_Map( Map ), + d_comm( Dm->Comm.dup() ), + d_ScaLBL_Comm( ScaLBL_Comm ) { - auto db = input_db->getDatabase( "Analysis" ); - auto vis_db = input_db->getDatabase( "Visualization" ); + auto db = input_db->getDatabase( "Analysis" ); + auto vis_db = input_db->getDatabase( "Visualization" ); // Ids of work items to use for dependencies ThreadPool::thread_id_t d_wait_blobID; @@ -533,117 +610,251 @@ runAnalysis::runAnalysis( std::shared_ptr input_db, ThreadPool::thread_id_t d_wait_subphase; char rankString[20]; - sprintf(rankString,"%05d",Dm->rank()); - d_n[0] = Dm->Nx-2; - d_n[1] = Dm->Ny-2; - d_n[2] = Dm->Nz-2; + sprintf( rankString, "%05d", Dm->rank() ); + d_n[0] = Dm->Nx - 2; + d_n[1] = Dm->Ny - 2; + d_n[2] = Dm->Nz - 2; d_N[0] = Dm->Nx; d_N[1] = Dm->Ny; d_N[2] = Dm->Nz; - - d_restart_interval = db->getScalar( "restart_interval" ); - d_analysis_interval = db->getScalar( "analysis_interval" ); + + d_restart_interval = db->getScalar( "restart_interval" ); + d_analysis_interval = db->getScalar( "analysis_interval" ); d_subphase_analysis_interval = INT_MAX; - d_visualization_interval = INT_MAX; - d_blobid_interval = INT_MAX; - if (db->keyExists( "blobid_interval" )){ - d_blobid_interval = db->getScalar( "blobid_interval" ); - } - if (db->keyExists( "visualization_interval" )){ - d_visualization_interval = db->getScalar( "visualization_interval" ); - } - if (db->keyExists( "subphase_analysis_interval" )){ - d_subphase_analysis_interval = db->getScalar( "subphase_analysis_interval" ); - } - + d_visualization_interval = INT_MAX; + d_blobid_interval = INT_MAX; + if ( db->keyExists( "blobid_interval" ) ) { + d_blobid_interval = db->getScalar( "blobid_interval" ); + } + if ( db->keyExists( "visualization_interval" ) ) { + d_visualization_interval = db->getScalar( "visualization_interval" ); + } + if ( db->keyExists( "subphase_analysis_interval" ) ) { + d_subphase_analysis_interval = db->getScalar( "subphase_analysis_interval" ); + } + auto restart_file = db->getScalar( "restart_file" ); - d_restartFile = restart_file + "." + rankString; - - + d_restartFile = restart_file + "." + rankString; + + d_rank = d_comm.getRank(); - writeIDMap(ID_map_struct(),0,id_map_filename); + writeIDMap( ID_map_struct(), 0, id_map_filename ); // Initialize IO for silo - IO::initialize("","silo","false"); - // Create the MeshDataStruct - d_meshData.resize(1); + IO::initialize( "", "silo", "false" ); + // Create the MeshDataStruct + d_meshData.resize( 1 ); d_meshData[0].meshName = "domain"; - d_meshData[0].mesh = std::make_shared( d_rank_info,d_n[0],d_n[1],d_n[2],Dm->Lx,Dm->Ly,Dm->Lz ); - auto PhaseVar = std::make_shared(); - auto PressVar = std::make_shared(); - auto VxVar = std::make_shared(); - auto VyVar = std::make_shared(); - auto VzVar = std::make_shared(); + d_meshData[0].mesh = std::make_shared( + d_rank_info, d_n[0], d_n[1], d_n[2], Dm->Lx, Dm->Ly, Dm->Lz ); + auto PhaseVar = std::make_shared(); + auto PressVar = std::make_shared(); + auto VxVar = std::make_shared(); + auto VyVar = std::make_shared(); + auto VzVar = std::make_shared(); auto SignDistVar = std::make_shared(); - auto BlobIDVar = std::make_shared(); - - if (vis_db->getWithDefault( "save_phase_field", true )){ + auto BlobIDVar = std::make_shared(); + + if ( vis_db->getWithDefault( "save_phase_field", true ) ) { PhaseVar->name = "phase"; PhaseVar->type = IO::VariableType::VolumeVariable; - PhaseVar->dim = 1; - PhaseVar->data.resize(d_n[0],d_n[1],d_n[2]); - d_meshData[0].vars.push_back(PhaseVar); + PhaseVar->dim = 1; + PhaseVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( PhaseVar ); } - if (vis_db->getWithDefault( "save_pressure", false )){ + if ( vis_db->getWithDefault( "save_pressure", false ) ) { PressVar->name = "Pressure"; PressVar->type = IO::VariableType::VolumeVariable; - PressVar->dim = 1; - PressVar->data.resize(d_n[0],d_n[1],d_n[2]); - d_meshData[0].vars.push_back(PressVar); + PressVar->dim = 1; + PressVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( PressVar ); } - if (vis_db->getWithDefault( "save_velocity", false )){ + if ( vis_db->getWithDefault( "save_velocity", false ) ) { VxVar->name = "Velocity_x"; VxVar->type = IO::VariableType::VolumeVariable; - VxVar->dim = 1; - VxVar->data.resize(d_n[0],d_n[1],d_n[2]); - d_meshData[0].vars.push_back(VxVar); + VxVar->dim = 1; + VxVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( VxVar ); VyVar->name = "Velocity_y"; VyVar->type = IO::VariableType::VolumeVariable; - VyVar->dim = 1; - VyVar->data.resize(d_n[0],d_n[1],d_n[2]); - d_meshData[0].vars.push_back(VyVar); + VyVar->dim = 1; + VyVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( VyVar ); VzVar->name = "Velocity_z"; VzVar->type = IO::VariableType::VolumeVariable; - VzVar->dim = 1; - VzVar->data.resize(d_n[0],d_n[1],d_n[2]); - d_meshData[0].vars.push_back(VzVar); + VzVar->dim = 1; + VzVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( VzVar ); } - if (vis_db->getWithDefault( "save_distance", false )){ + if ( vis_db->getWithDefault( "save_distance", false ) ) { SignDistVar->name = "SignDist"; SignDistVar->type = IO::VariableType::VolumeVariable; - SignDistVar->dim = 1; - SignDistVar->data.resize(d_n[0],d_n[1],d_n[2]); - d_meshData[0].vars.push_back(SignDistVar); + SignDistVar->dim = 1; + SignDistVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( SignDistVar ); } - if (vis_db->getWithDefault( "save_connected_components", false )){ + if ( vis_db->getWithDefault( "save_connected_components", false ) ) { BlobIDVar->name = "BlobID"; BlobIDVar->type = IO::VariableType::VolumeVariable; - BlobIDVar->dim = 1; - BlobIDVar->data.resize(d_n[0],d_n[1],d_n[2]); - d_meshData[0].vars.push_back(BlobIDVar); + BlobIDVar->dim = 1; + BlobIDVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( BlobIDVar ); } - + // Initialize the comms - for (int i=0; i<1024; i++) + for ( int i = 0; i < 1024; i++ ) d_comm_used[i] = false; // Initialize the threads int N_threads = db->getWithDefault( "N_threads", 4 ); - auto method = db->getWithDefault( "load_balance", "default" ); + auto method = db->getWithDefault( "load_balance", "default" ); createThreads( method, N_threads ); } -runAnalysis::~runAnalysis( ) + +runAnalysis::runAnalysis( ScaLBL_ColorModel &ColorModel) +/* std::shared_ptr input_db, const RankInfoStruct &rank_info, + std::shared_ptr ScaLBL_Comm, std::shared_ptr Dm, int Np, + bool Regular, IntArray Map ) + : d_Np( Np ), + d_regular( Regular ), + d_rank_info( rank_info ), + d_Map( Map ), + d_comm( Dm->Comm.dup() ), + d_ScaLBL_Comm( ScaLBL_Comm )*/ +{ + + d_comm = ColorModel.Dm->Comm.dup(); + d_Np = ColorModel.Np; + bool Regular = false; + + auto input_db = ColorModel.db; + auto db = input_db->getDatabase( "Analysis" ); + auto vis_db = input_db->getDatabase( "Visualization" ); + + // Ids of work items to use for dependencies + ThreadPool::thread_id_t d_wait_blobID; + ThreadPool::thread_id_t d_wait_analysis; + ThreadPool::thread_id_t d_wait_vis; + ThreadPool::thread_id_t d_wait_restart; + ThreadPool::thread_id_t d_wait_subphase; + + char rankString[20]; + sprintf( rankString, "%05d", ColorModel.Dm->rank() ); + d_n[0] = ColorModel.Dm->Nx - 2; + d_n[1] = ColorModel.Dm->Ny - 2; + d_n[2] = ColorModel.Dm->Nz - 2; + d_N[0] = ColorModel.Dm->Nx; + d_N[1] = ColorModel.Dm->Ny; + d_N[2] = ColorModel.Dm->Nz; + + d_restart_interval = db->getScalar( "restart_interval" ); + d_analysis_interval = db->getScalar( "analysis_interval" ); + d_subphase_analysis_interval = INT_MAX; + d_visualization_interval = INT_MAX; + d_blobid_interval = INT_MAX; + if ( db->keyExists( "blobid_interval" ) ) { + d_blobid_interval = db->getScalar( "blobid_interval" ); + } + if ( db->keyExists( "visualization_interval" ) ) { + d_visualization_interval = db->getScalar( "visualization_interval" ); + } + if ( db->keyExists( "subphase_analysis_interval" ) ) { + d_subphase_analysis_interval = db->getScalar( "subphase_analysis_interval" ); + } + + auto restart_file = db->getScalar( "restart_file" ); + d_restartFile = restart_file + "." + rankString; + + + d_rank = d_comm.getRank(); + writeIDMap( ID_map_struct(), 0, id_map_filename ); + // Initialize IO for silo + IO::initialize( "", "silo", "false" ); + // Create the MeshDataStruct + d_meshData.resize( 1 ); + + d_meshData[0].meshName = "domain"; + d_meshData[0].mesh = std::make_shared( + d_rank_info, d_n[0], d_n[1], d_n[2], ColorModel.Dm->Lx, ColorModel.Dm->Ly, ColorModel.Dm->Lz ); + auto PhaseVar = std::make_shared(); + auto PressVar = std::make_shared(); + auto VxVar = std::make_shared(); + auto VyVar = std::make_shared(); + auto VzVar = std::make_shared(); + auto SignDistVar = std::make_shared(); + auto BlobIDVar = std::make_shared(); + + if ( vis_db->getWithDefault( "save_phase_field", true ) ) { + PhaseVar->name = "phase"; + PhaseVar->type = IO::VariableType::VolumeVariable; + PhaseVar->dim = 1; + PhaseVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( PhaseVar ); + } + + if ( vis_db->getWithDefault( "save_pressure", false ) ) { + PressVar->name = "Pressure"; + PressVar->type = IO::VariableType::VolumeVariable; + PressVar->dim = 1; + PressVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( PressVar ); + } + + if ( vis_db->getWithDefault( "save_velocity", false ) ) { + VxVar->name = "Velocity_x"; + VxVar->type = IO::VariableType::VolumeVariable; + VxVar->dim = 1; + VxVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( VxVar ); + VyVar->name = "Velocity_y"; + VyVar->type = IO::VariableType::VolumeVariable; + VyVar->dim = 1; + VyVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( VyVar ); + VzVar->name = "Velocity_z"; + VzVar->type = IO::VariableType::VolumeVariable; + VzVar->dim = 1; + VzVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( VzVar ); + } + + if ( vis_db->getWithDefault( "save_distance", false ) ) { + SignDistVar->name = "SignDist"; + SignDistVar->type = IO::VariableType::VolumeVariable; + SignDistVar->dim = 1; + SignDistVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( SignDistVar ); + } + + if ( vis_db->getWithDefault( "save_connected_components", false ) ) { + BlobIDVar->name = "BlobID"; + BlobIDVar->type = IO::VariableType::VolumeVariable; + BlobIDVar->dim = 1; + BlobIDVar->data.resize( d_n[0], d_n[1], d_n[2] ); + d_meshData[0].vars.push_back( BlobIDVar ); + } + + + // Initialize the comms + for ( int i = 0; i < 1024; i++ ) + d_comm_used[i] = false; + // Initialize the threads + int N_threads = db->getWithDefault( "N_threads", 4 ); + auto method = db->getWithDefault( "load_balance", "default" ); + createThreads( method, N_threads ); +} +runAnalysis::~runAnalysis() { // Finish processing analysis finish(); } -void runAnalysis::finish( ) +void runAnalysis::finish() { - PROFILE_START("finish"); + PROFILE_START( "finish" ); // Wait for the work items to finish d_tpool.wait_pool_finished(); // Clear the wait ids @@ -654,23 +865,23 @@ void runAnalysis::finish( ) d_wait_restart.reset(); // Syncronize d_comm.barrier(); - PROFILE_STOP("finish"); + PROFILE_STOP( "finish" ); } /****************************************************************** * Set the thread affinities * ******************************************************************/ -void print( const std::vector& ids ) +void print( const std::vector &ids ) { if ( ids.empty() ) return; - printf("%i",ids[0]); - for (size_t i=1; i 0 ) - std::cerr << "Warning: Failed to start MPI with necessary thread support, errors may occur\n"; + std::cerr + << "Warning: Failed to start MPI with necessary thread support, errors may occur\n"; // Create the threads const auto cores = d_tpool.getProcessAffinity(); if ( N_threads == 0 ) { @@ -694,17 +906,17 @@ void runAnalysis::createThreads( const std::string& method, int N_threads ) int N = cores.size() - 1; d_tpool.setNumThreads( N ); d_tpool.setThreadAffinity( { cores[0] } ); - for ( int i=0; i input_db, TwoPhase& Averages, const double *Phi, - double *Pressure, double *Velocity, double *fq, double *Den) +void runAnalysis::run( int timestep, std::shared_ptr input_db, TwoPhase &Averages, + const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den ) { - int N = d_N[0]*d_N[1]*d_N[2]; + int N = d_N[0] * d_N[1] * d_N[2]; NULL_USE( N ); NULL_USE( Phi ); - - auto db = input_db->getDatabase( "Analysis" ); - //int timestep = db->getWithDefault( "timestep", 0 ); + + auto db = input_db->getDatabase( "Analysis" ); + // int timestep = db->getWithDefault( "timestep", 0 ); // Check which analysis steps we need to perform auto type = computeAnalysisType( timestep ); @@ -783,18 +994,18 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase finish(); } - PROFILE_START("run"); + PROFILE_START( "run" ); // Copy the appropriate variables to the host (so we can spawn new threads) ScaLBL_DeviceBarrier(); - PROFILE_START("Copy data to host",1); + PROFILE_START( "Copy data to host", 1 ); std::shared_ptr phase; /* if ( matches(type,AnalysisType::CopyPhaseIndicator) || matches(type,AnalysisType::ComputeAverages) || - matches(type,AnalysisType::CopySimState) || + matches(type,AnalysisType::CopySimState) || matches(type,AnalysisType::IdentifyBlobs) ) { - phase = std::shared_ptr(new DoubleArray(d_N[0],d_N[1],d_N[2])); + phase = std::make_shared(d_N[0],d_N[1],d_N[2]); //ScaLBL_CopyToHost(phase->data(),Phi,N*sizeof(double)); // try 2 d_ScaLBL_Comm.RegulLayout(d_Map,Phi,Averages.Phase); // memcpy(Averages.Phase.data(),phase->data(),N*sizeof(double)); @@ -820,131 +1031,137 @@ void runAnalysis::run(int timestep, std::shared_ptr input_db, TwoPhase delete [] TmpDat; } */ - //if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { - if ( timestep%d_analysis_interval + 8 == d_analysis_interval ) { - if (d_regular) - d_ScaLBL_Comm->RegularLayout(d_Map,Phi,Averages.Phase_tplus); - else - ScaLBL_CopyToHost(Averages.Phase_tplus.data(),Phi,N*sizeof(double)); - //memcpy(Averages.Phase_tplus.data(),phase->data(),N*sizeof(double)); - } - if ( timestep%d_analysis_interval == 0 ) { - if (d_regular) - d_ScaLBL_Comm->RegularLayout(d_Map,Phi,Averages.Phase_tminus); - else - ScaLBL_CopyToHost(Averages.Phase_tminus.data(),Phi,N*sizeof(double)); - //memcpy(Averages.Phase_tminus.data(),phase->data(),N*sizeof(double)); - } - //if ( matches(type,AnalysisType::CopySimState) ) { - if ( timestep%d_analysis_interval + 4 == d_analysis_interval ) { - // Copy the members of Averages to the cpu (phase was copied above) - PROFILE_START("Copy-Pressure",1); - ScaLBL_D3Q19_Pressure(fq,Pressure,d_Np); - //ScaLBL_D3Q19_Momentum(fq,Velocity,d_Np); - ScaLBL_DeviceBarrier(); - PROFILE_STOP("Copy-Pressure",1); - PROFILE_START("Copy-Wait",1); - PROFILE_STOP("Copy-Wait",1); - PROFILE_START("Copy-State",1); - //memcpy(Averages.Phase.data(),phase->data(),N*sizeof(double)); - if (d_regular) - d_ScaLBL_Comm->RegularLayout(d_Map,Phi,Averages.Phase); + // if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { + if ( timestep % d_analysis_interval + 8 == d_analysis_interval ) { + if ( d_regular ) + d_ScaLBL_Comm->RegularLayout( d_Map, Phi, Averages.Phase_tplus ); else - ScaLBL_CopyToHost(Averages.Phase.data(),Phi,N*sizeof(double)); + ScaLBL_CopyToHost( Averages.Phase_tplus.data(), Phi, N * sizeof( double ) ); + // memcpy(Averages.Phase_tplus.data(),phase->data(),N*sizeof(double)); + } + if ( timestep % d_analysis_interval == 0 ) { + if ( d_regular ) + d_ScaLBL_Comm->RegularLayout( d_Map, Phi, Averages.Phase_tminus ); + else + ScaLBL_CopyToHost( Averages.Phase_tminus.data(), Phi, N * sizeof( double ) ); + // memcpy(Averages.Phase_tminus.data(),phase->data(),N*sizeof(double)); + } + // if ( matches(type,AnalysisType::CopySimState) ) { + if ( timestep % d_analysis_interval + 4 == d_analysis_interval ) { + // Copy the members of Averages to the cpu (phase was copied above) + PROFILE_START( "Copy-Pressure", 1 ); + ScaLBL_D3Q19_Pressure( fq, Pressure, d_Np ); + // ScaLBL_D3Q19_Momentum(fq,Velocity,d_Np); + ScaLBL_DeviceBarrier(); + PROFILE_STOP( "Copy-Pressure", 1 ); + PROFILE_START( "Copy-Wait", 1 ); + PROFILE_STOP( "Copy-Wait", 1 ); + PROFILE_START( "Copy-State", 1 ); + // memcpy(Averages.Phase.data(),phase->data(),N*sizeof(double)); + if ( d_regular ) + d_ScaLBL_Comm->RegularLayout( d_Map, Phi, Averages.Phase ); + else + ScaLBL_CopyToHost( Averages.Phase.data(), Phi, N * sizeof( double ) ); // copy other variables - d_ScaLBL_Comm->RegularLayout(d_Map,Pressure,Averages.Press); - d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[0],Averages.Vel_x); - d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[d_Np],Averages.Vel_y); - d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[2*d_Np],Averages.Vel_z); - PROFILE_STOP("Copy-State",1); + d_ScaLBL_Comm->RegularLayout( d_Map, Pressure, Averages.Press ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Velocity[0], Averages.Vel_x ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Velocity[d_Np], Averages.Vel_y ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Velocity[2 * d_Np], Averages.Vel_z ); + PROFILE_STOP( "Copy-State", 1 ); } - std::shared_ptr cfq,cDen; - //if ( matches(type,AnalysisType::CreateRestart) ) { - if (timestep%d_restart_interval==0){ + std::shared_ptr cfq, cDen; + // if ( matches(type,AnalysisType::CreateRestart) ) { + if ( timestep % d_restart_interval == 0 ) { // Copy restart data to the CPU - cDen = std::shared_ptr(new double[2*d_Np],DeleteArray); - cfq = std::shared_ptr(new double[19*d_Np],DeleteArray); - ScaLBL_CopyToHost(cfq.get(),fq,19*d_Np*sizeof(double)); - ScaLBL_CopyToHost(cDen.get(),Den,2*d_Np*sizeof(double)); + cDen = make_shared_array( 2 * d_Np ); + cfq = make_shared_array( 19 * d_Np ); + ScaLBL_CopyToHost( cfq.get(), fq, 19 * d_Np * sizeof( double ) ); + ScaLBL_CopyToHost( cDen.get(), Den, 2 * d_Np * sizeof( double ) ); } - PROFILE_STOP("Copy data to host",1); + PROFILE_STOP( "Copy data to host", 1 ); // Spawn threads to do blob identification work - if ( matches(type,AnalysisType::IdentifyBlobs) ) { - phase = std::shared_ptr(new DoubleArray(d_N[0],d_N[1],d_N[2])); - if (d_regular) - d_ScaLBL_Comm->RegularLayout(d_Map,Phi,*phase); + if ( matches( type, AnalysisType::IdentifyBlobs ) ) { + phase = std::make_shared( d_N[0], d_N[1], d_N[2] ); + if ( d_regular ) + d_ScaLBL_Comm->RegularLayout( d_Map, Phi, *phase ); else - ScaLBL_CopyToHost(phase->data(),Phi,N*sizeof(double)); + ScaLBL_CopyToHost( phase->data(), Phi, N * sizeof( double ) ); - BlobIDstruct new_index(new std::pair(0,IntArray())); - BlobIDstruct new_ids(new std::pair(0,IntArray())); - BlobIDList new_list(new std::vector()); - auto work1 = new BlobIdentificationWorkItem1(timestep,d_N[0],d_N[1],d_N[2],d_rank_info, - phase,Averages.SDs,d_last_ids,new_index,new_ids,new_list,getComm()); - auto work2 = new BlobIdentificationWorkItem2(timestep,d_N[0],d_N[1],d_N[2],d_rank_info, - phase,Averages.SDs,d_last_ids,new_index,new_ids,new_list,getComm()); - work1->add_dependency(d_wait_blobID); - work2->add_dependency(d_tpool.add_work(work1)); - d_wait_blobID = d_tpool.add_work(work2); - d_last_index = new_index; - d_last_ids = new_ids; + auto new_index = std::make_shared>( 0, IntArray() ); + auto new_ids = std::make_shared>( 0, IntArray() ); + auto new_list = std::make_shared>(); + auto work1 = new BlobIdentificationWorkItem1( timestep, d_N[0], d_N[1], d_N[2], d_rank_info, + phase, Averages.SDs, d_last_ids, new_index, new_ids, new_list, getComm() ); + auto work2 = new BlobIdentificationWorkItem2( timestep, d_N[0], d_N[1], d_N[2], d_rank_info, + phase, Averages.SDs, d_last_ids, new_index, new_ids, new_list, getComm() ); + work1->add_dependency( d_wait_blobID ); + work2->add_dependency( d_tpool.add_work( work1 ) ); + d_wait_blobID = d_tpool.add_work( work2 ); + d_last_index = new_index; + d_last_ids = new_ids; d_last_id_map = new_list; } // Spawn threads to do the analysis work - //if (timestep%d_restart_interval==0){ + // if (timestep%d_restart_interval==0){ // if ( matches(type,AnalysisType::ComputeAverages) ) { - if ( timestep%d_analysis_interval == 0 ) { - auto work = new AnalysisWorkItem(type,timestep,Averages,d_last_index,d_last_id_map,d_beta); - work->add_dependency(d_wait_blobID); - work->add_dependency(d_wait_analysis); - work->add_dependency(d_wait_vis); // Make sure we are done using analysis before modifying - d_wait_analysis = d_tpool.add_work(work); + if ( timestep % d_analysis_interval == 0 ) { + auto work = + new AnalysisWorkItem( type, timestep, Averages, d_last_index, d_last_id_map, d_beta ); + work->add_dependency( d_wait_blobID ); + work->add_dependency( d_wait_analysis ); + work->add_dependency( d_wait_vis ); // Make sure we are done using analysis before modifying + d_wait_analysis = d_tpool.add_work( work ); } // Spawn a thread to write the restart file // if ( matches(type,AnalysisType::CreateRestart) ) { - if (timestep%d_restart_interval==0){ + if ( timestep % d_restart_interval == 0 ) { - if (d_rank==0) { - input_db->putScalar( "Restart", true ); - std::ofstream OutStream("Restart.db"); - input_db->print(OutStream, ""); - OutStream.close(); - } - // Write the restart file (using a seperate thread) - auto work = new WriteRestartWorkItem(d_restartFile.c_str(),cDen,cfq,d_Np); - work->add_dependency(d_wait_restart); - d_wait_restart = d_tpool.add_work(work); + if ( d_rank == 0 ) { + input_db->putScalar( "Restart", true ); + std::ofstream OutStream( "Restart.db" ); + input_db->print( OutStream, "" ); + OutStream.close(); + } + // Write the restart file (using a seperate thread) + auto work = new WriteRestartWorkItem( d_restartFile.c_str(), cDen, cfq, d_Np ); + work->add_dependency( d_wait_restart ); + d_wait_restart = d_tpool.add_work( work ); } // Save the results for visualization // if ( matches(type,AnalysisType::CreateRestart) ) { - if (timestep%d_restart_interval==0){ + if ( timestep % d_restart_interval == 0 ) { // Write the vis files - commWrapper comm = getComm(); - fillHalo fillData( comm.comm, d_rank_info, d_n, {1,1,1}, 0, 1 ); - auto work = new WriteVisWorkItem( timestep, d_meshData, Averages, fillData, std::move( comm ) ); - work->add_dependency(d_wait_blobID); - work->add_dependency(d_wait_analysis); - work->add_dependency(d_wait_vis); - d_wait_vis = d_tpool.add_work(work); + auto work = + new WriteVisWorkItem( timestep, d_meshData, Averages, d_n, d_rank_info, getComm() ); + work->add_dependency( d_wait_blobID ); + work->add_dependency( d_wait_analysis ); + work->add_dependency( d_wait_vis ); + d_wait_vis = d_tpool.add_work( work ); } - PROFILE_STOP("run"); + PROFILE_STOP( "run" ); } /****************************************************************** * Run the analysis * ******************************************************************/ -void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den) +void runAnalysis::basic( int timestep, std::shared_ptr input_db, SubPhase &Averages, + const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den ) { + int Nx = d_N[0]; + int Ny = d_N[1]; + int Nz = d_N[2]; + int N = Nx * Ny * Nz; + NULL_USE( N ); // Check which analysis steps we need to perform - auto color_db = input_db->getDatabase( "Color" ); - auto vis_db = input_db->getDatabase( "Visualization" ); + auto color_db = input_db->getDatabase( "Color" ); + auto vis_db = input_db->getDatabase( "Visualization" ); - //int timestep = color_db->getWithDefault( "timestep", 0 ); + // int timestep = color_db->getWithDefault( "timestep", 0 ); auto type = computeAnalysisType( timestep ); if ( type == AnalysisType::AnalyzeNone ) return; @@ -955,97 +1172,102 @@ void runAnalysis::basic(int timestep, std::shared_ptr input_db, SubPha finish(); } - PROFILE_START("basic"); + PROFILE_START( "basic" ); // Copy the appropriate variables to the host (so we can spawn new threads) ScaLBL_DeviceBarrier(); - PROFILE_START("Copy data to host",1); + PROFILE_START( "Copy data to host", 1 ); - //if ( matches(type,AnalysisType::CopySimState) ) { - if ( timestep%d_analysis_interval == 0 ) { + // if ( matches(type,AnalysisType::CopySimState) ) { + if ( timestep % d_analysis_interval == 0 ) { finish(); // can't copy if threads are still working on data // Copy the members of Averages to the cpu (phase was copied above) - PROFILE_START("Copy-Pressure",1); - ScaLBL_D3Q19_Pressure(fq,Pressure,d_Np); - //ScaLBL_D3Q19_Momentum(fq,Velocity,d_Np); + PROFILE_START( "Copy-Pressure", 1 ); + ScaLBL_D3Q19_Pressure( fq, Pressure, d_Np ); + // ScaLBL_D3Q19_Momentum(fq,Velocity,d_Np); ScaLBL_DeviceBarrier(); - PROFILE_STOP("Copy-Pressure",1); - PROFILE_START("Copy-Wait",1); - PROFILE_STOP("Copy-Wait",1); - PROFILE_START("Copy-State",1); + PROFILE_STOP( "Copy-Pressure", 1 ); + PROFILE_START( "Copy-Wait", 1 ); + PROFILE_STOP( "Copy-Wait", 1 ); + PROFILE_START( "Copy-State", 1 ); + /*if (d_regular) + d_ScaLBL_Comm->RegularLayout(d_Map,Phi,Averages.Phi); + else */ + ScaLBL_CopyToHost( Averages.Phi.data(), Phi, N * sizeof( double ) ); // copy other variables - d_ScaLBL_Comm->RegularLayout(d_Map,Pressure,Averages.Pressure); - d_ScaLBL_Comm->RegularLayout(d_Map,&Den[0],Averages.Rho_n); - d_ScaLBL_Comm->RegularLayout(d_Map,&Den[d_Np],Averages.Rho_w); - d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[0],Averages.Vel_x); - d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[d_Np],Averages.Vel_y); - d_ScaLBL_Comm->RegularLayout(d_Map,&Velocity[2*d_Np],Averages.Vel_z); - PROFILE_STOP("Copy-State",1); + d_ScaLBL_Comm->RegularLayout( d_Map, Pressure, Averages.Pressure ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Den[0], Averages.Rho_n ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Den[d_Np], Averages.Rho_w ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Velocity[0], Averages.Vel_x ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Velocity[d_Np], Averages.Vel_y ); + d_ScaLBL_Comm->RegularLayout( d_Map, &Velocity[2 * d_Np], Averages.Vel_z ); + PROFILE_STOP( "Copy-State", 1 ); } - PROFILE_STOP("Copy data to host"); + PROFILE_STOP( "Copy data to host" ); // Spawn threads to do the analysis work - //if (timestep%d_restart_interval==0){ + // if (timestep%d_restart_interval==0){ // if ( matches(type,AnalysisType::ComputeAverages) ) { - if ( timestep%d_analysis_interval == 0 ) { - auto work = new BasicWorkItem(type,timestep,Averages); - work->add_dependency(d_wait_subphase); // Make sure we are done using analysis before modifying - work->add_dependency(d_wait_analysis); - work->add_dependency(d_wait_vis); - d_wait_analysis = d_tpool.add_work(work); - } - - if ( timestep%d_subphase_analysis_interval == 0 ) { - auto work = new SubphaseWorkItem(type,timestep,Averages); - work->add_dependency(d_wait_subphase); // Make sure we are done using analysis before modifying - work->add_dependency(d_wait_analysis); - work->add_dependency(d_wait_vis); - d_wait_subphase = d_tpool.add_work(work); + if ( timestep % d_analysis_interval == 0 ) { + auto work = new BasicWorkItem( type, timestep, Averages ); + work->add_dependency( + d_wait_subphase ); // Make sure we are done using analysis before modifying + work->add_dependency( d_wait_analysis ); + work->add_dependency( d_wait_vis ); + d_wait_analysis = d_tpool.add_work( work ); } - if (timestep%d_restart_interval==0){ - std::shared_ptr cfq,cDen; - // Copy restart data to the CPU - cDen = std::shared_ptr(new double[2*d_Np],DeleteArray); - cfq = std::shared_ptr(new double[19*d_Np],DeleteArray); - ScaLBL_CopyToHost(cfq.get(),fq,19*d_Np*sizeof(double)); - ScaLBL_CopyToHost(cDen.get(),Den,2*d_Np*sizeof(double)); - - if (d_rank==0) { - color_db->putScalar("timestep",timestep); - color_db->putScalar( "Restart", true ); - input_db->putDatabase("Color", color_db); - std::ofstream OutStream("Restart.db"); - input_db->print(OutStream, ""); - OutStream.close(); - - } - // Write the restart file (using a seperate thread) - auto work1 = new WriteRestartWorkItem(d_restartFile.c_str(),cDen,cfq,d_Np); - work1->add_dependency(d_wait_restart); - d_wait_restart = d_tpool.add_work(work1); - + if ( timestep % d_subphase_analysis_interval == 0 ) { + auto work = new SubphaseWorkItem( type, timestep, Averages ); + work->add_dependency( + d_wait_subphase ); // Make sure we are done using analysis before modifying + work->add_dependency( d_wait_analysis ); + work->add_dependency( d_wait_vis ); + d_wait_subphase = d_tpool.add_work( work ); } - - if (timestep%d_visualization_interval==0){ + + if ( timestep % d_restart_interval == 0 ) { + std::shared_ptr cfq, cDen; + // Copy restart data to the CPU + cDen = make_shared_array( 2 * d_Np ); + cfq = make_shared_array( 19 * d_Np ); + ScaLBL_CopyToHost( cfq.get(), fq, 19 * d_Np * sizeof( double ) ); + ScaLBL_CopyToHost( cDen.get(), Den, 2 * d_Np * sizeof( double ) ); + + if ( d_rank == 0 ) { + color_db->putScalar( "timestep", timestep ); + color_db->putScalar( "Restart", true ); + input_db->putDatabase( "Color", color_db ); + std::ofstream OutStream( "Restart.db" ); + input_db->print( OutStream, "" ); + OutStream.close(); + } + // Write the restart file (using a seperate thread) + auto work1 = new WriteRestartWorkItem( d_restartFile.c_str(), cDen, cfq, d_Np ); + work1->add_dependency( d_wait_restart ); + d_wait_restart = d_tpool.add_work( work1 ); + } + + if ( timestep % d_visualization_interval == 0 ) { // Write the vis files - commWrapper comm = getComm(); - fillHalo fillData( comm.comm, d_rank_info, d_n, {1,1,1}, 0, 1 ); - auto work = new IOWorkItem( timestep, input_db, d_meshData, Averages, fillData, std::move( comm ) ); - work->add_dependency(d_wait_analysis); - work->add_dependency(d_wait_subphase); - work->add_dependency(d_wait_vis); - d_wait_vis = d_tpool.add_work(work); + auto work = + new IOWorkItem( timestep, input_db, d_meshData, Averages, d_n, d_rank_info, getComm() ); + work->add_dependency( d_wait_analysis ); + work->add_dependency( d_wait_subphase ); + work->add_dependency( d_wait_vis ); + d_wait_vis = d_tpool.add_work( work ); } - PROFILE_STOP("basic"); + PROFILE_STOP( "basic" ); } -void runAnalysis::WriteVisData(int timestep, std::shared_ptr input_db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den) +void runAnalysis::WriteVisData( int timestep, std::shared_ptr input_db, + SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, + double *Den ) { - auto color_db = input_db->getDatabase( "Color" ); - auto vis_db = input_db->getDatabase( "Visualization" ); - //int timestep = color_db->getWithDefault( "timestep", 0 ); + auto color_db = input_db->getDatabase( "Color" ); + auto vis_db = input_db->getDatabase( "Visualization" ); + // int timestep = color_db->getWithDefault( "timestep", 0 ); // Check which analysis steps we need to perform auto type = computeAnalysisType( timestep ); @@ -1061,16 +1283,15 @@ void runAnalysis::WriteVisData(int timestep, std::shared_ptr input_db, // Copy the appropriate variables to the host (so we can spawn new threads) ScaLBL_DeviceBarrier(); - PROFILE_START("write vis",1); + PROFILE_START( "write vis", 1 ); // if (Averages.WriteVis == true){ - commWrapper comm = getComm(); - fillHalo fillData( comm.comm, d_rank_info, d_n, {1,1,1}, 0, 1 ); - auto work2 = new IOWorkItem(timestep, input_db, d_meshData, Averages, fillData, std::move( comm ) ); - work2->add_dependency(d_wait_vis); - d_wait_vis = d_tpool.add_work(work2); + auto work2 = + new IOWorkItem( timestep, input_db, d_meshData, Averages, d_n, d_rank_info, getComm() ); + work2->add_dependency( d_wait_vis ); + d_wait_vis = d_tpool.add_work( work2 ); - //Averages.WriteVis = false; - - PROFILE_STOP("write vis"); + // Averages.WriteVis = false; + + PROFILE_STOP( "write vis" ); } diff --git a/analysis/runAnalysis.h b/analysis/runAnalysis.h index 33adbcb0..c7c4ce71 100644 --- a/analysis/runAnalysis.h +++ b/analysis/runAnalysis.h @@ -1,41 +1,51 @@ #ifndef RunAnalysis_H_INC #define RunAnalysis_H_INC -#include "analysis/analysis.h" -#include "analysis/TwoPhase.h" #include "analysis/SubPhase.h" +#include "analysis/TwoPhase.h" +#include "analysis/analysis.h" #include "common/Communication.h" #include "common/ScaLBL.h" #include "threadpool/thread_pool.h" +#include "models/ColorModel.h" #include -typedef std::shared_ptr> BlobIDstruct; -typedef std::shared_ptr> BlobIDList; - // Types of analysis -enum class AnalysisType : uint64_t { AnalyzeNone=0, IdentifyBlobs=0x01, CopyPhaseIndicator=0x02, - CopySimState=0x04, ComputeAverages=0x08, CreateRestart=0x10, WriteVis=0x20, ComputeSubphase=0x40 }; +enum class AnalysisType : uint64_t { + AnalyzeNone = 0, + IdentifyBlobs = 0x01, + CopyPhaseIndicator = 0x02, + CopySimState = 0x04, + ComputeAverages = 0x08, + CreateRestart = 0x10, + WriteVis = 0x20, + ComputeSubphase = 0x40 +}; //! Class to run the analysis in multiple threads class runAnalysis { public: - //! Constructor - runAnalysis(std::shared_ptr db, const RankInfoStruct& rank_info, - std::shared_ptr ScaLBL_Comm, std::shared_ptr dm, int Np, bool Regular, IntArray Map ); + runAnalysis( std::shared_ptr db, const RankInfoStruct &rank_info, + std::shared_ptr ScaLBL_Comm, std::shared_ptr dm, int Np, + bool Regular, IntArray Map ); + + runAnalysis( ScaLBL_ColorModel &ColorModel); //! Destructor ~runAnalysis(); //! Run the next analysis - void run(int timestep, std::shared_ptr db, TwoPhase &Averages, const double *Phi, + void run( int timestep, std::shared_ptr db, TwoPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den ); - - void basic( int timestep, std::shared_ptr db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den ); - void WriteVisData(int timestep, std::shared_ptr vis_db, SubPhase &Averages, const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den); + + void basic( int timestep, std::shared_ptr db, SubPhase &Averages, const double *Phi, + double *Pressure, double *Velocity, double *fq, double *Den ); + void WriteVisData( int timestep, std::shared_ptr vis_db, SubPhase &Averages, + const double *Phi, double *Pressure, double *Velocity, double *fq, double *Den ); //! Finish all active analysis void finish(); @@ -44,7 +54,8 @@ public: * \brief Set the affinities * \details This function will create the analysis threads and set the affinity * of this thread and all analysis threads. If MPI_THREAD_MULTIPLE is not - * enabled, the analysis threads will be disabled and the analysis will run in the current thread. + * enabled, the analysis threads will be disabled and the analysis will run in the current + * thread. * @param[in] method Method used to control the affinities: * none - Don't use threads (runs all analysis in the current thread) * default - Create the specified number of threads, but don't load balance @@ -53,39 +64,36 @@ public: * that all threads run on independent cores * @param[in] N_threads Number of threads, only used by some of the methods */ - void createThreads( const std::string& method = "default", int N_threads = 4 ); + void createThreads( const std::string &method = "default", int N_threads = 4 ); private: - runAnalysis(); // Determine the analysis to perform AnalysisType computeAnalysisType( int timestep ); public: - class commWrapper { - public: + public: Utilities::MPI comm; int tag; runAnalysis *analysis; - commWrapper( int tag, const Utilities::MPI& comm, runAnalysis *analysis ); - commWrapper( ) = delete; + commWrapper( int tag, const Utilities::MPI &comm, runAnalysis *analysis ); + commWrapper() = delete; commWrapper( const commWrapper &rhs ) = delete; - commWrapper& operator=( const commWrapper &rhs ) = delete; + commWrapper &operator=( const commWrapper &rhs ) = delete; commWrapper( commWrapper &&rhs ); ~commWrapper(); }; // Get a comm (not thread safe) - commWrapper getComm( ); + commWrapper getComm(); private: - - std::array d_n; // Number of local cells - std::array d_N; // NNumber of local cells with ghosts + std::array d_n; // Number of local cells + std::array d_N; // Number of local cells with ghosts int d_Np; int d_rank; int d_restart_interval, d_analysis_interval, d_blobid_interval, d_visualization_interval; @@ -95,9 +103,9 @@ private: ThreadPool d_tpool; RankInfoStruct d_rank_info; IntArray d_Map; - BlobIDstruct d_last_ids; - BlobIDstruct d_last_index; - BlobIDList d_last_id_map; + std::shared_ptr> d_last_ids; + std::shared_ptr> d_last_index; + std::shared_ptr> d_last_id_map; std::vector d_meshData; std::string d_restartFile; Utilities::MPI d_comm; @@ -114,8 +122,6 @@ private: // Friends friend commWrapper::~commWrapper(); - }; #endif - diff --git a/cmake/FindMPI.cmake b/cmake/FindMPI.cmake deleted file mode 100644 index e1578737..00000000 --- a/cmake/FindMPI.cmake +++ /dev/null @@ -1,362 +0,0 @@ -# - Message Passing Interface (MPI) module. -# -# The Message Passing Interface (MPI) is a library used to write -# high-performance parallel applications that use message passing, and -# is typically deployed on a cluster. MPI is a standard interface -# (defined by the MPI forum) for which many implementations are -# available. All of these implementations have somewhat different -# compilation approaches (different include paths, libraries to link -# against, etc.), and this module tries to smooth out those differences. -# -# This module will set the following variables: -# MPI_FOUND TRUE if we have found MPI -# MPI_COMPILE_FLAGS Compilation flags for MPI programs -# MPI_INCLUDE_PATH Include path(s) for MPI header -# MPI_LINK_FLAGS Linking flags for MPI programs -# MPI_LIBRARY First MPI library to link against (cached) -# MPI_EXTRA_LIBRARY Extra MPI libraries to link against (cached) -# MPI_LIBRARIES All libraries to link MPI programs against -# MPIEXEC Executable for running MPI programs -# MPIEXEC_NUMPROC_FLAG Flag to pass to MPIEXEC before giving it the -# number of processors to run on -# MPIEXEC_PREFLAGS Flags to pass to MPIEXEC directly before the -# executable to run. -# MPIEXEC_POSTFLAGS Flags to pass to MPIEXEC after all other flags. -# -# This module will attempt to auto-detect these settings, first by -# looking for a MPI compiler, which many MPI implementations provide -# as a pass-through to the native compiler to simplify the compilation -# of MPI programs. The MPI compiler is stored in the cache variable -# MPI_COMPILER, and will attempt to look for commonly-named drivers -# mpic++, mpicxx, mpiCC, or mpicc. If the compiler driver is found and -# recognized, it will be used to set all of the module variables. To -# skip this auto-detection, set MPI_LIBRARY and MPI_INCLUDE_PATH in -# the CMake cache. -# -# If no compiler driver is found or the compiler driver is not -# recognized, this module will then search for common include paths -# and library names to try to detect MPI. -# -# If CMake initially finds a different MPI than was intended, and you -# want to use the MPI compiler auto-detection for a different MPI -# implementation, set MPI_COMPILER to the MPI compiler driver you want -# to use (e.g., mpicxx) and then set MPI_LIBRARY to the string -# MPI_LIBRARY-NOTFOUND. When you re-configure, auto-detection of MPI -# will run again with the newly-specified MPI_COMPILER. -# -# When using MPIEXEC to execute MPI applications, you should typically -# use all of the MPIEXEC flags as follows: -# ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} PROCS ${MPIEXEC_PREFLAGS} EXECUTABLE -# ${MPIEXEC_POSTFLAGS} ARGS -# where PROCS is the number of processors on which to execute the program, -# EXECUTABLE is the MPI program, and ARGS are the arguments to pass to the -# MPI program. - -#============================================================================= -# Copyright 2001-2009 Kitware, Inc. -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# (To distribute this file outside of CMake, substitute the full -# License text for the above reference.) - -# This module is maintained by David Partyka . - -# A set of directories to search through in addition to the standard system paths -# that find_program will search through. -# Microsoft HPC SDK is automatically added to the system path -# Argonne National Labs MPICH2 sets a registry key that we can use. - -set(_MPI_PACKAGE_DIR - mpi - mpich - openmpi - lib/mpi - lib/mpich - lib/openmpi - "MPICH/SDK" - "Microsoft Compute Cluster Pack" - "Microsoft HPC Pack 2008 R2" - ) - -set(_MPI_PREFIX_PATH) -if(WIN32) - list(APPEND _MPI_PREFIX_PATH "[HKEY_LOCAL_MACHINE\\SOFTWARE\\MPICH\\SMPD;binary]/..") - list(APPEND _MPI_PREFIX_PATH "[HKEY_LOCAL_MACHINE\\SOFTWARE\\MPICH2;Path]") -endif() - -foreach(SystemPrefixDir ${CMAKE_SYSTEM_PREFIX_PATH}) - foreach(MpiPackageDir ${_MPI_PREFIX_PATH}) - if(EXISTS ${SystemPrefixDir}/${MpiPackageDir}) - list(APPEND _MPI_PREFIX_PATH "${SystemPrefixDir}/${MpiPackageDir}") - endif() - endforeach(MpiPackageDir) -endforeach(SystemPrefixDir) - -# Most mpi distros have some form of mpiexec which gives us something we can reliably look for. -find_program(MPIEXEC - NAMES mpiexec mpirun lamexec - PATHS ${_MPI_PREFIX_PATH} - PATH_SUFFIXES bin - DOC "Executable for running MPI programs." - ) - -# call get_filename_component twice to remove mpiexec and the directory it exists in (typically bin). -# This gives us a fairly reliable base directory to search for /bin /lib and /include from. -get_filename_component(_MPI_BASE_DIR "${MPIEXEC}" PATH) -get_filename_component(_MPI_BASE_DIR "${_MPI_BASE_DIR}" PATH) - -# If there is an mpi compiler find it and interogate (farther below) it for the include -# and lib dirs otherwise we will continue to search from ${_MPI_BASE_DIR}. -find_program(MPI_COMPILER - NAMES mpic++ mpicxx mpiCC mpicc - HINTS "${_MPI_BASE_DIR}" - PATH_SUFFIXES bin - DOC "MPI compiler. Used only to detect MPI compilation flags.") -mark_as_advanced(MPI_COMPILER) - -set(MPIEXEC_NUMPROC_FLAG "-np" CACHE STRING "Flag used by MPI to specify the number of processes for MPIEXEC; the next option will be the number of processes.") -set(MPIEXEC_PREFLAGS "" CACHE STRING "These flags will be directly before the executable that is being run by MPIEXEC.") -set(MPIEXEC_POSTFLAGS "" CACHE STRING "These flags will come after all flags given to MPIEXEC.") -set(MPIEXEC_MAX_NUMPROCS "2" CACHE STRING "Maximum number of processors available to run MPI applications.") -mark_as_advanced(MPIEXEC MPIEXEC_NUMPROC_FLAG MPIEXEC_PREFLAGS - MPIEXEC_POSTFLAGS MPIEXEC_MAX_NUMPROCS) - -if (MPI_INCLUDE_PATH AND MPI_LIBRARY) - # Do nothing: we already have MPI_INCLUDE_PATH and MPI_LIBRARY in - # the cache, and we don't want to override those settings. -elseif (MPI_COMPILER) - # Check whether the -showme:compile option works. This indicates - # that we have either Open MPI or a newer version of LAM-MPI, and - # implies that -showme:link will also work. - # Note that Windows distros do not have an mpi compiler to interogate. - exec_program(${MPI_COMPILER} - ARGS -showme:compile - OUTPUT_VARIABLE MPI_COMPILE_CMDLINE - RETURN_VALUE MPI_COMPILER_RETURN) - - if (MPI_COMPILER_RETURN EQUAL 0) - # If we appear to have -showme:compile, then we should also have - # -showme:link. Try it. - exec_program(${MPI_COMPILER} - ARGS -showme:link - OUTPUT_VARIABLE MPI_LINK_CMDLINE - RETURN_VALUE MPI_COMPILER_RETURN) - - # Note that we probably have -showme:incdirs and -showme:libdirs - # as well. - set(MPI_COMPILER_MAY_HAVE_INCLIBDIRS TRUE) - endif (MPI_COMPILER_RETURN EQUAL 0) - - if (MPI_COMPILER_RETURN EQUAL 0) - # Do nothing: we have our command lines now - else (MPI_COMPILER_RETURN EQUAL 0) - # Older versions of LAM-MPI have "-showme". Try it. - exec_program(${MPI_COMPILER} - ARGS -showme - OUTPUT_VARIABLE MPI_COMPILE_CMDLINE - RETURN_VALUE MPI_COMPILER_RETURN) - endif (MPI_COMPILER_RETURN EQUAL 0) - - if (MPI_COMPILER_RETURN EQUAL 0) - # Do nothing: we have our command lines now - else (MPI_COMPILER_RETURN EQUAL 0) - # MPICH uses "-show". Try it. - exec_program(${MPI_COMPILER} - ARGS -show - OUTPUT_VARIABLE MPI_COMPILE_CMDLINE - RETURN_VALUE MPI_COMPILER_RETURN) - endif (MPI_COMPILER_RETURN EQUAL 0) - - if (MPI_COMPILER_RETURN EQUAL 0) - # We have our command lines, but we might need to copy - # MPI_COMPILE_CMDLINE into MPI_LINK_CMDLINE, if the underlying - if (NOT MPI_LINK_CMDLINE) - SET(MPI_LINK_CMDLINE ${MPI_COMPILE_CMDLINE}) - endif (NOT MPI_LINK_CMDLINE) - else (MPI_COMPILER_RETURN EQUAL 0) - message(STATUS "Unable to determine MPI from MPI driver ${MPI_COMPILER}") - endif (MPI_COMPILER_RETURN EQUAL 0) -endif (MPI_INCLUDE_PATH AND MPI_LIBRARY) - -if (MPI_INCLUDE_PATH AND MPI_LIBRARY) - # Do nothing: we already have MPI_INCLUDE_PATH and MPI_LIBRARY in - # the cache, and we don't want to override those settings. -elseif (MPI_COMPILE_CMDLINE) - # Extract compile flags from the compile command line. - string(REGEX MATCHALL "(^| )-[Df]([^\" ]+|\"[^\"]+\")" MPI_ALL_COMPILE_FLAGS "${MPI_COMPILE_CMDLINE}") - set(MPI_COMPILE_FLAGS_WORK) - foreach(FLAG ${MPI_ALL_COMPILE_FLAGS}) - if (MPI_COMPILE_FLAGS_WORK) - set(MPI_COMPILE_FLAGS_WORK "${MPI_COMPILE_FLAGS_WORK} ${FLAG}") - else(MPI_COMPILE_FLAGS_WORK) - set(MPI_COMPILE_FLAGS_WORK ${FLAG}) - endif(MPI_COMPILE_FLAGS_WORK) - endforeach(FLAG) - - # Extract include paths from compile command line - string(REGEX MATCHALL "(^| )-I([^\" ]+|\"[^\"]+\")" MPI_ALL_INCLUDE_PATHS "${MPI_COMPILE_CMDLINE}") - set(MPI_INCLUDE_PATH_WORK) - foreach(IPATH ${MPI_ALL_INCLUDE_PATHS}) - string(REGEX REPLACE "^ ?-I" "" IPATH ${IPATH}) - string(REGEX REPLACE "//" "/" IPATH ${IPATH}) - list(APPEND MPI_INCLUDE_PATH_WORK ${IPATH}) - endforeach(IPATH) - - if (NOT MPI_INCLUDE_PATH_WORK) - if (MPI_COMPILER_MAY_HAVE_INCLIBDIRS) - # The compile command line didn't have any include paths on it, - # but we may have -showme:incdirs. Use it. - exec_program(${MPI_COMPILER} - ARGS -showme:incdirs - OUTPUT_VARIABLE MPI_INCLUDE_PATH_WORK - RETURN_VALUE MPI_COMPILER_RETURN) - separate_arguments(MPI_INCLUDE_PATH_WORK) - endif (MPI_COMPILER_MAY_HAVE_INCLIBDIRS) - endif (NOT MPI_INCLUDE_PATH_WORK) - - if (NOT MPI_INCLUDE_PATH_WORK) - # If all else fails, just search for mpi.h in the normal include - # paths. - find_path(MPI_INCLUDE_PATH mpi.h - HINTS ${_MPI_BASE_DIR} ${_MPI_PREFIX_PATH} - PATH_SUFFIXES include - ) - set(MPI_INCLUDE_PATH_WORK ${MPI_INCLUDE_PATH}) - endif (NOT MPI_INCLUDE_PATH_WORK) - - # Extract linker paths from the link command line - string(REGEX MATCHALL "(^| |-Wl,)-L([^\" ]+|\"[^\"]+\")" MPI_ALL_LINK_PATHS "${MPI_LINK_CMDLINE}") - set(MPI_LINK_PATH) - foreach(LPATH ${MPI_ALL_LINK_PATHS}) - string(REGEX REPLACE "^(| |-Wl,)-L" "" LPATH ${LPATH}) - string(REGEX REPLACE "//" "/" LPATH ${LPATH}) - list(APPEND MPI_LINK_PATH ${LPATH}) - endforeach(LPATH) - - if (NOT MPI_LINK_PATH) - if (MPI_COMPILER_MAY_HAVE_INCLIBDIRS) - # The compile command line didn't have any linking paths on it, - # but we may have -showme:libdirs. Use it. - exec_program(${MPI_COMPILER} - ARGS -showme:libdirs - OUTPUT_VARIABLE MPI_LINK_PATH - RETURN_VALUE MPI_COMPILER_RETURN) - separate_arguments(MPI_LINK_PATH) - endif (MPI_COMPILER_MAY_HAVE_INCLIBDIRS) - endif (NOT MPI_LINK_PATH) - - # Extract linker flags from the link command line - string(REGEX MATCHALL "(^| )-Wl,([^\" ]+|\"[^\"]+\")" MPI_ALL_LINK_FLAGS "${MPI_LINK_CMDLINE}") - set(MPI_LINK_FLAGS_WORK) - foreach(FLAG ${MPI_ALL_LINK_FLAGS}) - if (MPI_LINK_FLAGS_WORK) - set(MPI_LINK_FLAGS_WORK "${MPI_LINK_FLAGS_WORK} ${FLAG}") - else(MPI_LINK_FLAGS_WORK) - set(MPI_LINK_FLAGS_WORK ${FLAG}) - endif(MPI_LINK_FLAGS_WORK) - endforeach(FLAG) - if ( MPI_LINK_FLAGS_WORK ) - string ( REGEX REPLACE "^ " "" MPI_LINK_FLAGS_WORK ${MPI_LINK_FLAGS_WORK} ) - endif () - - # Extract the set of libraries to link against from the link command - # line - string(REGEX MATCHALL "(^| )-l([^\" ]+|\"[^\"]+\")" MPI_LIBNAMES "${MPI_LINK_CMDLINE}") - - # Determine full path names for all of the libraries that one needs - # to link against in an MPI program - set(MPI_LIBRARIES) - foreach(LIB ${MPI_LIBNAMES}) - string(REGEX REPLACE "^ ?-l" "" LIB ${LIB}) - set(MPI_LIB "MPI_LIB-NOTFOUND" CACHE FILEPATH "Cleared" FORCE) - find_library(MPI_LIB ${LIB} HINTS ${MPI_LINK_PATH}) - if (MPI_LIB) - list(APPEND MPI_LIBRARIES ${MPI_LIB}) - elseif (NOT MPI_FIND_QUIETLY) - message(WARNING "Unable to find MPI library ${LIB}") - endif () - endforeach(LIB) - set(MPI_LIB "MPI_LIB-NOTFOUND" CACHE INTERNAL "Scratch variable for MPI detection" FORCE) - - # Chop MPI_LIBRARIES into the old-style MPI_LIBRARY and - # MPI_EXTRA_LIBRARY. - list(LENGTH MPI_LIBRARIES MPI_NUMLIBS) - list(LENGTH MPI_LIBNAMES MPI_NUMLIBS_EXPECTED) - if (MPI_NUMLIBS EQUAL MPI_NUMLIBS_EXPECTED) - list(GET MPI_LIBRARIES 0 MPI_LIBRARY_WORK) - set(MPI_LIBRARY ${MPI_LIBRARY_WORK} CACHE FILEPATH "MPI library to link against" FORCE) - else (MPI_NUMLIBS EQUAL MPI_NUMLIBS_EXPECTED) - set(MPI_LIBRARY "MPI_LIBRARY-NOTFOUND" CACHE FILEPATH "MPI library to link against" FORCE) - endif (MPI_NUMLIBS EQUAL MPI_NUMLIBS_EXPECTED) - if (MPI_NUMLIBS GREATER 1) - set(MPI_EXTRA_LIBRARY_WORK ${MPI_LIBRARIES}) - list(REMOVE_AT MPI_EXTRA_LIBRARY_WORK 0) - set(MPI_EXTRA_LIBRARY ${MPI_EXTRA_LIBRARY_WORK} CACHE STRING "Extra MPI libraries to link against" FORCE) - else (MPI_NUMLIBS GREATER 1) - set(MPI_EXTRA_LIBRARY "MPI_EXTRA_LIBRARY-NOTFOUND" CACHE STRING "Extra MPI libraries to link against" FORCE) - endif (MPI_NUMLIBS GREATER 1) - - # Set up all of the appropriate cache entries - set(MPI_COMPILE_FLAGS ${MPI_COMPILE_FLAGS_WORK} CACHE STRING "MPI compilation flags" FORCE) - set(MPI_INCLUDE_PATH ${MPI_INCLUDE_PATH_WORK} CACHE STRING "MPI include path" FORCE) - set(MPI_LINK_FLAGS ${MPI_LINK_FLAGS_WORK} CACHE STRING "MPI linking flags" FORCE) -else (MPI_COMPILE_CMDLINE) -# No MPI compiler to interogate so attempt to find everything with find functions. - find_path(MPI_INCLUDE_PATH mpi.h - HINTS ${_MPI_BASE_DIR} ${_MPI_PREFIX_PATH} - PATH_SUFFIXES include Inc - ) - - # Decide between 32-bit and 64-bit libraries for Microsoft's MPI - if("${CMAKE_SIZEOF_VOID_P}" EQUAL 8) - set(MS_MPI_ARCH_DIR amd64) - else() - set(MS_MPI_ARCH_DIR i386) - endif() - - find_library(MPI_LIBRARY - NAMES mpi mpich msmpi - HINTS ${_MPI_BASE_DIR} ${_MPI_PREFIX_PATH} - PATH_SUFFIXES lib lib/${MS_MPI_ARCH_DIR} Lib Lib/${MS_MPI_ARCH_DIR} - ) - - find_library(MPI_EXTRA_LIBRARY - NAMES mpi++ - HINTS ${_MPI_BASE_DIR} ${_MPI_PREFIX_PATH} - PATH_SUFFIXES lib - DOC "Extra MPI libraries to link against.") - - set(MPI_COMPILE_FLAGS "" CACHE STRING "MPI compilation flags") - set(MPI_LINK_FLAGS "" CACHE STRING "MPI linking flags") -endif (MPI_INCLUDE_PATH AND MPI_LIBRARY) - -# Set up extra variables to conform to -if (MPI_EXTRA_LIBRARY) - set(MPI_LIBRARIES ${MPI_LIBRARY} ${MPI_EXTRA_LIBRARY}) -else (MPI_EXTRA_LIBRARY) - set(MPI_LIBRARIES ${MPI_LIBRARY}) -endif (MPI_EXTRA_LIBRARY) - -if (MPI_INCLUDE_PATH AND MPI_LIBRARY) - set(MPI_FOUND TRUE) -else (MPI_INCLUDE_PATH AND MPI_LIBRARY) - set(MPI_FOUND FALSE) -endif (MPI_INCLUDE_PATH AND MPI_LIBRARY) - -#include("${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake") -# handle the QUIETLY and REQUIRED arguments -#find_package_handle_standard_args(MPI DEFAULT_MSG MPI_LIBRARY MPI_INCLUDE_PATH) - -mark_as_advanced(MPI_INCLUDE_PATH MPI_COMPILE_FLAGS MPI_LINK_FLAGS MPI_LIBRARY - MPI_EXTRA_LIBRARY) - -# unset to cleanup namespace -unset(_MPI_PACKAGE_DIR) -unset(_MPI_PREFIX_PATH) -unset(_MPI_BASE_DIR) diff --git a/cmake/Find_TIMER.cmake b/cmake/Find_TIMER.cmake index 7ebc7aea..a4d7bdb2 100644 --- a/cmake/Find_TIMER.cmake +++ b/cmake/Find_TIMER.cmake @@ -4,7 +4,7 @@ # CONFIGURE_TIMER( DEFAULT_USE_TIMER NULL_TIMER_DIR ) # This function assumes that USE_TIMER is set to indicate if the timer should be used # If USE_TIMER is set, TIMER_DIRECTORY specifies the install path for the timer -# If USE_TIMER is not set we will create a summy timer that does nothing. +# If USE_TIMER is not set we will create a dummy timer that does nothing. # The input argument DEFAULT_USE_TIMER specifies if the timer library is included by default. # The input argument NULL_TIMER_DIR specifies the location to install the dummy timer. # If it is an empty string, the default install path "${CMAKE_CURRENT_BINARY_DIR}/null_timer" is used. @@ -13,7 +13,7 @@ # TIMER_CXXFLAGS - C++ flags for the timer library # TIMER_LDFLAGS - Linker flags to link the timer library # TIMER_LDLIBS - Linker libraries to link the timer library -FUNCTION( CONFIGURE_TIMER DEFAULT_USE_TIMER NULL_TIMER_DIR ) +FUNCTION( CONFIGURE_TIMER DEFAULT_USE_TIMER NULL_TIMER_DIR QUIET ) # Determine if we want to use the timer utility CHECK_ENABLE_FLAG( USE_TIMER ${DEFAULT_USE_TIMER} ) SET( TIMER_INCLUDE ) @@ -33,20 +33,23 @@ FUNCTION( CONFIGURE_TIMER DEFAULT_USE_TIMER NULL_TIMER_DIR ) FIND_LIBRARY( TIMER_LIBS NAMES timerutility PATHS ${TIMER_DIRECTORY}/lib NO_DEFAULT_PATH ) SET( TIMER_INCLUDE ${TIMER_DIRECTORY}/include ) SET( TIMER_CXXFLAGS "-DUSE_TIMER -I${TIMER_DIRECTORY}/include" ) - SET( TIMER_LDFLAGS -L${TIMER_DIRECTORY}/lib ) - SET( TIMER_LDLIBS -ltimerutility ) + SET( TIMER_LDFLAGS ) + SET( TIMER_LDLIBS "${TIMER_LIBS}" ) ELSE() MESSAGE( FATAL_ERROR "Default search for TIMER is not yet supported. Use -D TIMER_DIRECTORY=" ) ENDIF() - SET(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_RPATH} "${TIMER_DIRECTORY}/lib" PARENT_SCOPE ) + SET( CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_RPATH} "${TIMER_DIRECTORY}/lib" PARENT_SCOPE ) INCLUDE_DIRECTORIES( "${TIMER_INCLUDE}" ) ADD_DEFINITIONS( -DUSE_TIMER ) - MESSAGE( "Using timer utility" ) - MESSAGE( " TIMER_LIBRARIES = ${TIMER_LIBS}" ) + IF ( NOT QUIET ) + MESSAGE( STATUS "Using timer utility" ) + MESSAGE( STATUS " TIMER_LIBRARIES = ${TIMER_LIBS}" ) + ENDIF() ELSE() IF ( "${NULL_TIMER_DIR}" STREQUAL "" ) SET( NULL_TIMER_DIR "${CMAKE_CURRENT_BINARY_DIR}/null_timer" ) ENDIF() + # Write ProfilerApp.h FILE(WRITE "${NULL_TIMER_DIR}/ProfilerApp.h" "#define PROFILE_START(...) do {} while(0)\n" ) FILE(APPEND "${NULL_TIMER_DIR}/ProfilerApp.h" "#define PROFILE_STOP(...) do {} while(0)\n" ) FILE(APPEND "${NULL_TIMER_DIR}/ProfilerApp.h" "#define PROFILE_START2(...) do {} while(0)\n" ) @@ -61,9 +64,25 @@ FUNCTION( CONFIGURE_TIMER DEFAULT_USE_TIMER NULL_TIMER_DIR ) FILE(APPEND "${NULL_TIMER_DIR}/ProfilerApp.h" "#define PROFILE_DISABLE_TRACE() do {} while(0)\n" ) FILE(APPEND "${NULL_TIMER_DIR}/ProfilerApp.h" "#define PROFILE_ENABLE_MEMORY() do {} while(0)\n" ) FILE(APPEND "${NULL_TIMER_DIR}/ProfilerApp.h" "#define PROFILE_DISABLE_MEMORY() do {} while(0)\n" ) + # Write MemoryApp.h + FILE(WRITE "${NULL_TIMER_DIR}/MemoryApp.h" "#include \n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" "class MemoryApp final {\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" "public:\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " struct MemoryStats {\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " size_t bytes_new, bytes_delete, N_new, N_delete, tot_bytes_used, system_memory, stack_used, stack_size;\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " MemoryStats() { memset(this,0,sizeof(MemoryStats)); }\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " };\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " static inline void print( std::ostream& ) {}\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " static inline size_t getMemoryUsage() { return 0; }\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " static inline size_t getTotalMemoryUsage() { return 0; }\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " static inline size_t getSystemMemory() { return 0; }\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" " static inline MemoryStats getMemoryStats() { return MemoryStats(); }\n" ) + FILE(APPEND "${NULL_TIMER_DIR}/MemoryApp.h" "};\n" ) SET( TIMER_INCLUDE "${NULL_TIMER_DIR}" ) INCLUDE_DIRECTORIES( "${TIMER_INCLUDE}" ) - MESSAGE( "Disabling timer utility" ) + IF ( NOT QUIET ) + MESSAGE( STATUS "Disabling timer utility" ) + ENDIF() ENDIF() SET( TIMER_INCLUDE "${TIMER_INCLUDE}" PARENT_SCOPE ) SET( TIMER_CXXFLAGS "${TIMER_CXXFLAGS}" PARENT_SCOPE ) @@ -88,12 +107,12 @@ MACRO( CHECK_ENABLE_FLAG FLAG DEFAULT ) SET( ${FLAG} ${DEFAULT} ) ELSEIF( ${FLAG} STREQUAL "" ) SET( ${FLAG} ${DEFAULT} ) - ELSEIF( ( ${${FLAG}} STREQUAL "false" ) OR ( ${${FLAG}} STREQUAL "0" ) OR ( ${${FLAG}} STREQUAL "OFF" ) ) + ELSEIF( ( ${${FLAG}} STREQUAL "FALSE" ) OR ( ${${FLAG}} STREQUAL "false" ) OR ( ${${FLAG}} STREQUAL "0" ) OR ( ${${FLAG}} STREQUAL "OFF" ) ) SET( ${FLAG} 0 ) - ELSEIF( ( ${${FLAG}} STREQUAL "true" ) OR ( ${${FLAG}} STREQUAL "1" ) OR ( ${${FLAG}} STREQUAL "ON" ) ) + ELSEIF( ( ${${FLAG}} STREQUAL "TRUE" ) OR ( ${${FLAG}} STREQUAL "true" ) OR ( ${${FLAG}} STREQUAL "1" ) OR ( ${${FLAG}} STREQUAL "ON" ) ) SET( ${FLAG} 1 ) ELSE() - MESSAGE( "Bad value for ${FLAG} (${${FLAG}}); use true or false" ) + MESSAGE( FATAL_ERROR "Bad value for ${FLAG} (${${FLAG}}); use true or false" ) ENDIF () ENDMACRO() diff --git a/cmake/ctest_script.cmake b/cmake/ctest_script.cmake index 88bf92fe..e460fed5 100644 --- a/cmake/ctest_script.cmake +++ b/cmake/ctest_script.cmake @@ -32,7 +32,6 @@ SET( CMAKE_MAKE_PROGRAM $ENV{CMAKE_MAKE_PROGRAM} ) SET( CTEST_CMAKE_GENERATOR $ENV{CTEST_CMAKE_GENERATOR} ) SET( LDLIBS $ENV{LDLIBS} ) SET( LDFLAGS $ENV{LDFLAGS} ) -SET( MPI_COMPILER $ENV{MPI_COMPILER} ) SET( MPI_DIRECTORY $ENV{MPI_DIRECTORY} ) SET( MPI_INCLUDE $ENV{MPI_INCLUDE} ) SET( MPI_LINK_FLAGS $ENV{MPI_LINK_FLAGS} ) @@ -198,7 +197,7 @@ SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DCMAKE_C_FLAGS='${CFLAGS}';-DCMAKE_CXX_FLA SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DLDFLAGS:STRING='${FLAGS}';-DLDLIBS:STRING='${LDLIBS}'" ) SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DENABLE_GCOV:BOOL=${ENABLE_GCOV}" ) IF ( USE_MPI ) - SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DMPI_COMPILER:BOOL=true;-DMPIEXEC=${MPIEXEC}") + SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DMPIEXEC=${MPIEXEC}") IF ( NOT USE_VALGRIND ) SET( CTEST_OPTIONS "${CTEST_OPTIONS};-DUSE_MPI_FOR_SERIAL_TESTS:BOOL=true") ENDIF() diff --git a/cmake/libraries.cmake b/cmake/libraries.cmake index 43d2726e..f899c289 100644 --- a/cmake/libraries.cmake +++ b/cmake/libraries.cmake @@ -41,93 +41,61 @@ ENDMACRO() # Macro to find and configure the MPI libraries MACRO( CONFIGURE_MPI ) # Determine if we want to use MPI - CHECK_ENABLE_FLAG(USE_MPI 1 ) + CHECK_ENABLE_FLAG( USE_MPI 1 ) IF ( USE_MPI ) - # Check if we specified the MPI directory - IF ( MPI_DIRECTORY ) - # Check the provided MPI directory for include files - VERIFY_PATH( "${MPI_DIRECTORY}" ) - IF ( EXISTS "${MPI_DIRECTORY}/include/mpi.h" ) - SET( MPI_INCLUDE_PATH "${MPI_DIRECTORY}/include" ) - ELSEIF ( EXISTS "${MPI_DIRECTORY}/Inc/mpi.h" ) - SET( MPI_INCLUDE_PATH "${MPI_DIRECTORY}/Inc" ) - ELSE() - MESSAGE( FATAL_ERROR "mpi.h not found in ${MPI_DIRECTORY}/include" ) - ENDIF () - INCLUDE_DIRECTORIES ( ${MPI_INCLUDE_PATH} ) - SET ( MPI_INCLUDE ${MPI_INCLUDE_PATH} ) - # Set MPI libraries - IF ( ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" ) - FIND_LIBRARY( MSMPI_LIB NAMES msmpi PATHS "${MPI_DIRECTORY}/Lib/x64" NO_DEFAULT_PATH ) - FIND_LIBRARY( MSMPI_LIB NAMES msmpi PATHS "${MPI_DIRECTORY}/Lib/amd64" NO_DEFAULT_PATH ) - FIND_LIBRARY( MSMPIFEC_LIB NAMES msmpifec PATHS "${MPI_DIRECTORY}/Lib/x64" NO_DEFAULT_PATH ) - FIND_LIBRARY( MSMPIFEC_LIB NAMES msmpifec PATHS "${MPI_DIRECTORY}/Lib/amd64" NO_DEFAULT_PATH ) - FIND_LIBRARY( MSMPIFMC_LIB NAMES msmpifmc PATHS "${MPI_DIRECTORY}/Lib/x64" NO_DEFAULT_PATH ) - FIND_LIBRARY( MSMPIFMC_LIB NAMES msmpifmc PATHS "${MPI_DIRECTORY}/Lib/amd64" NO_DEFAULT_PATH ) - SET( MPI_LIBRARIES ${MSMPI_LIB} ${MSMPIFEC_LIB} ${MSMPIFMC_LIB} ) - ENDIF() - # Set the mpi executable - IF ( MPIEXEC ) - # User specified the MPI command directly, use as is - ELSEIF ( MPIEXEC_CMD ) - # User specified the name of the MPI executable - SET ( MPIEXEC ${MPI_DIRECTORY}/bin/${MPIEXEC_CMD} ) - IF ( NOT EXISTS ${MPIEXEC} ) - MESSAGE( FATAL_ERROR "${MPIEXEC_CMD} not found in ${MPI_DIRECTORY}/bin" ) - ENDIF () - ELSE () - # Search for the MPI executable in the current directory - FIND_PROGRAM( MPIEXEC NAMES mpiexec mpirun lamexec PATHS ${MPI_DIRECTORY}/bin NO_DEFAULT_PATH ) - IF ( NOT MPIEXEC ) - MESSAGE( FATAL_ERROR "Could not locate mpi executable" ) - ENDIF() - ENDIF () - # Set MPI flags - IF ( NOT MPIEXEC_NUMPROC_FLAG ) - SET( MPIEXEC_NUMPROC_FLAG "-np" ) - ENDIF() - ELSEIF ( MPI_COMPILER ) - # The mpi compiler should take care of everything - IF ( MPI_INCLUDE ) - INCLUDE_DIRECTORIES( ${MPI_INCLUDE} ) - ENDIF() + MESSAGE( "Configuring MPI" ) + IF ( MPIEXEC ) + SET( MPIEXEC_EXECUTABLE ${MPIEXEC} ) + ENDIF() + IF ( NOT MPI_SKIP_SEARCH ) + FIND_PACKAGE( MPI ) ELSE() - # Perform the default search for MPI - INCLUDE ( FindMPI ) - IF ( NOT MPI_FOUND ) - MESSAGE( " MPI_INCLUDE = ${MPI_INCLUDE}" ) - MESSAGE( " MPI_LINK_FLAGS = ${MPI_LINK_FLAGS}" ) - MESSAGE( " MPI_LIBRARIES = ${MPI_LIBRARIES}" ) - MESSAGE( FATAL_ERROR "Did not find MPI" ) - ENDIF () - INCLUDE_DIRECTORIES( "${MPI_INCLUDE_PATH}" ) - SET( MPI_INCLUDE "${MPI_INCLUDE_PATH}" ) + # Write mpi test + SET( MPI_TEST_SRC "${CMAKE_CURRENT_BINARY_DIR}/test_mpi.cpp" ) + FILE(WRITE ${MPI_TEST_SRC} "#include \n" ) + FILE(APPEND ${MPI_TEST_SRC} "int main(int argc, char** argv) {\n" ) + FILE(APPEND ${MPI_TEST_SRC} " MPI_Init(NULL, NULL);\n") + FILE(APPEND ${MPI_TEST_SRC} " MPI_Finalize();\n" ) + FILE(APPEND ${MPI_TEST_SRC} "}\n" ) + # Test the compile + IF ( CMAKE_CXX_COMPILER ) + SET( TMP_FLAGS -DINCLUDE_DIRECTORIES=${MPI_CXX_INCLUDE_PATH} ) + TRY_COMPILE( MPI_TEST_CXX ${CMAKE_CURRENT_BINARY_DIR} ${MPI_TEST_SRC} + CMAKE_FLAGS ${TMP_FLAGS} + LINK_OPTIONS ${MPI_CXX_LINK_FLAGS} + LINK_LIBRARIES ${MPI_CXX_LIBRARIES} + OUTPUT_VARIABLE OUT_TXT) + IF ( NOT ${MPI_TEST} ) + MESSAGE( FATAL_ERROR "Skipping MPI search and default compile fails:\n${OUT_TXT}" ) + ENDIF() + SET( MPI_C_FOUND TRUE ) + SET( MPI_CXX_FOUND TRUE ) + SET( MPI_Fortran_FOUND TRUE ) + ENDIF() ENDIF() - # Check if we need to use MPI for serial tests - CHECK_ENABLE_FLAG( USE_MPI_FOR_SERIAL_TESTS 0 ) - # Set defaults if they have not been set - IF ( NOT MPIEXEC ) - SET( MPIEXEC mpirun ) + STRING( STRIP "${MPI_CXX_COMPILE_FLAGS}" MPI_CXX_COMPILE_FLAGS ) + STRING( STRIP "${MPI_CXX_LINK_FLAGS}" MPI_CXX_LINK_FLAGS ) + STRING( STRIP "${MPI_CXX_LIBRARIES}" MPI_CXX_LIBRARIES ) + MESSAGE( " MPI_CXX_FOUND = ${MPI_CXX_FOUND}" ) + MESSAGE( " MPI_CXX_COMPILER = ${MPI_CXX_COMPILER}" ) + MESSAGE( " MPI_CXX_COMPILE_FLAGS = ${MPI_CXX_COMPILE_FLAGS}" ) + MESSAGE( " MPI_CXX_INCLUDE_PATH = ${MPI_CXX_INCLUDE_PATH}" ) + MESSAGE( " MPI_CXX_LINK_FLAGS = ${MPI_CXX_LINK_FLAGS}" ) + MESSAGE( " MPI_CXX_LIBRARIES = ${MPI_CXX_LIBRARIES}" ) + MESSAGE( " MPIEXEC = ${MPIEXEC}" ) + MESSAGE( " MPIEXEC_NUMPROC_FLAG = ${MPIEXEC_NUMPROC_FLAG}" ) + MESSAGE( " MPIEXEC_PREFLAGS = ${MPIEXEC_PREFLAGS}" ) + MESSAGE( " MPIEXEC_POSTFLAGS = ${MPIEXEC_POSTFLAGS}" ) + ADD_DEFINITIONS( -DUSE_MPI ) + INCLUDE_DIRECTORIES( ${MPI_CXX_INCLUDE_PATH} ) + SET( MPI_LIBRARIES ${MPI_CXX_LIBRARIES} ) + SET( MPI_LINK_FLAGS ${MPI_CXX_LINK_FLAGS} ) + IF ( NOT MPI_CXX_FOUND ) + MESSAGE( FATAL_ERROR "MPI not found" ) ENDIF() - IF ( NOT MPIEXEC_NUMPROC_FLAG ) - SET( MPIEXEC_NUMPROC_FLAG "-np" ) + IF ( USE_MPI AND NOT MPIEXEC ) + MESSAGE( FATAL_ERROR "Unable to find MPIEXEC, please set it before continuing" ) ENDIF() - # Set the definitions - ADD_DEFINITIONS( "-DUSE_MPI" ) - MESSAGE( "Using MPI" ) - MESSAGE( " MPIEXEC = ${MPIEXEC}" ) - MESSAGE( " MPIEXEC_NUMPROC_FLAG = ${MPIEXEC_NUMPROC_FLAG}" ) - MESSAGE( " MPI_INCLUDE = ${MPI_INCLUDE}" ) - MESSAGE( " MPI_LINK_FLAGS = ${MPI_LINK_FLAGS}" ) - MESSAGE( " MPI_LIBRARIES = ${MPI_LIBRARIES}" ) - ELSE() - SET( USE_MPI_FOR_SERIAL_TESTS 0 ) - SET( MPIEXEC "" ) - SET( MPIEXEC_NUMPROC_FLAG "" ) - SET( MPI_INCLUDE "" ) - SET( MPI_LINK_FLAGS "" ) - SET( MPI_LIBRARIES "" ) - MESSAGE( "Not using MPI, all parallel tests will be disabled" ) ENDIF() ENDMACRO() diff --git a/cmake/macros.cmake b/cmake/macros.cmake index d1c8dbe7..8030dfa4 100644 --- a/cmake/macros.cmake +++ b/cmake/macros.cmake @@ -681,8 +681,8 @@ MACRO( TARGET_LINK_EXTERNAL_LIBRARIES TARGET_NAME ) FOREACH ( tmp ${BLAS_LAPACK_LIBS} ) TARGET_LINK_LIBRARIES( ${TARGET_NAME} ${ARGN} ${tmp} ) ENDFOREACH() - FOREACH ( MPI_LIBRARIES ) - TARGET_LINK_LIBRARIES( ${EXE} ${ARGN} ${tmp} ) + FOREACH ( tmp ${MPI_LIBRARIES} ) + TARGET_LINK_LIBRARIES( ${TARGET_NAME} ${ARGN} ${tmp} ) ENDFOREACH() FOREACH ( tmp ${CMAKE_C_IMPLICIT_LINK_LIBRARIES} ${CMAKE_CXX_IMPLICIT_LINK_LIBRARIES} ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES} ) diff --git a/common/Communication.h b/common/Communication.h index 4cd9ad70..5baaa962 100644 --- a/common/Communication.h +++ b/common/Communication.h @@ -67,6 +67,10 @@ public: //! Destructor ~fillHalo( ); + fillHalo() = delete; + fillHalo(const fillHalo&) = delete; + fillHalo& operator=(const fillHalo&) = delete; + /*! * @brief Communicate the halos * @param[in] array The array on which we fill the halos @@ -93,9 +97,6 @@ private: TYPE *mem; TYPE *send[3][3][3], *recv[3][3][3]; MPI_Request send_req[3][3][3], recv_req[3][3][3]; - fillHalo(); // Private empty constructor - fillHalo(const fillHalo&); // Private copy constructor - fillHalo& operator=(const fillHalo&); // Private assignment operator void pack( const Array& array, int i, int j, int k, TYPE *buffer ); void unpack( Array& array, int i, int j, int k, const TYPE *buffer ); }; diff --git a/common/Domain.cpp b/common/Domain.cpp index d552fb8a..ec24365d 100644 --- a/common/Domain.cpp +++ b/common/Domain.cpp @@ -558,17 +558,13 @@ void Domain::Decomp( const std::string& Filename ) int64_t z_transition_size = (nprocz*nz - (global_Nz - zStart))/2; if (z_transition_size < 0) z_transition_size=0; - char LocalRankFilename[40]; - char *loc_id; - loc_id = new char [(nx+2)*(ny+2)*(nz+2)]; - // Set up the sub-domains if (RANK==0){ printf("Distributing subdomains across %i processors \n",nprocs); printf("Process grid: %i x %i x %i \n",nprocx,nprocy,nprocz); printf("Subdomain size: %i x %i x %i \n",nx,ny,nz); printf("Size of transition region: %ld \n", z_transition_size); - + auto loc_id = new char [(nx+2)*(ny+2)*(nz+2)]; for (int kp=0; kpcommunicator = comm.communicator; this->comm_rank = comm.comm_rank; this->comm_size = comm.comm_size; - this->d_ranks = comm.d_ranks; this->d_isNull = comm.d_isNull; this->d_manage = comm.d_manage; this->d_maxTag = comm.d_maxTag; @@ -537,7 +530,6 @@ MPI_CLASS &MPI_CLASS::operator=( MPI_CLASS &&rhs ) std::swap( profile_level, rhs.profile_level ); std::swap( comm_rank, rhs.comm_rank ); std::swap( comm_size, rhs.comm_size ); - std::swap( d_ranks, rhs.d_ranks ); std::swap( d_maxTag, rhs.d_maxTag ); std::swap( d_currentTag, rhs.d_currentTag ); std::swap( d_count, rhs.d_count ); @@ -560,7 +552,6 @@ std::atomic_int d_global_count_self = { 1 }; MPI_CLASS::MPI_CLASS( MPI_Comm comm, bool manage ) { d_count = nullptr; - d_ranks = nullptr; d_manage = false; tmp_alignment = -1; // Check if we are using our version of comm_world @@ -623,11 +614,7 @@ MPI_CLASS::MPI_CLASS( MPI_Comm comm, bool manage ) } if ( d_manage ) ++N_MPI_Comm_created; - // Create d_ranks - if ( comm_size > 1 ) { - d_ranks = new int[comm_size]; - d_ranks[0] = -1; - } + #else // We are not using MPI, intialize based on the communicator NULL_USE( manage ); @@ -636,7 +623,7 @@ MPI_CLASS::MPI_CLASS( MPI_Comm comm, bool manage ) d_maxTag = mpi_max_tag; d_isNull = communicator == MPI_COMM_NULL; if ( d_isNull ) - comm_size = 0; + comm_size = 0; #endif if ( communicator == MPI_CLASS_COMM_WORLD ) { d_currentTag = d_global_currentTag_world1; @@ -663,34 +650,32 @@ MPI_CLASS::MPI_CLASS( MPI_Comm comm, bool manage ) ************************************************************************/ std::vector MPI_CLASS::globalRanks() const { - // Get my global rank if it has not been set - static int myGlobalRank = -1; - if ( myGlobalRank == -1 ) { -#ifdef USE_MPI - if ( MPI_active() ) - MPI_Comm_rank( MPI_CLASS_COMM_WORLD, &myGlobalRank ); -#else - myGlobalRank = 0; -#endif - } - // Check if we are dealing with a serial or null communicator - if ( comm_size == 1 ) - return std::vector( 1, myGlobalRank ); - if ( d_ranks == nullptr || communicator == MPI_COMM_NULL ) + if ( d_isNull ) return std::vector(); - // Fill d_ranks if necessary - if ( d_ranks[0] == -1 ) { - if ( communicator == MPI_CLASS_COMM_WORLD ) { - for ( int i = 0; i < comm_size; i++ ) - d_ranks[i] = i; - } else { - - MPI_ASSERT( myGlobalRank != -1 ); - this->allGather( myGlobalRank, d_ranks ); - } +#ifdef USE_MPI + // Get my global rank and size if it has not been set + static int globalRank = -1; + static int globalSize = -1; + if ( globalRank == -1 && MPI_active() ) { + MPI_Comm_rank( MPI_CLASS_COMM_WORLD, &globalRank ); + MPI_Comm_size( MPI_CLASS_COMM_WORLD, &globalSize ); } - // Return d_ranks - return std::vector( d_ranks, d_ranks + comm_size ); + // Check if we are dealing with a serial or global communicator + if ( comm_size == 1 ) + return std::vector( 1, globalRank ); + if ( comm_size == globalSize ) { + std::vector ranks( globalSize ); + for ( int i = 0; i < globalSize; i++ ) + ranks[i] = i; + return ranks; + } + // Get the global rank from each rank in the communicator + auto ranks = allGather( globalRank ); + std::sort( ranks.begin(), ranks.end() ); + return ranks; +#else + return std::vector( 1, 1 ); +#endif } @@ -2806,49 +2791,44 @@ MPI_Request MPI_CLASS::IrecvBytes( } - /************************************************************************ * sendrecv * ************************************************************************/ #if defined( USE_MPI ) template<> -void MPI_CLASS::sendrecv( const char* sendbuf, int sendcount, int dest, int sendtag, - char* recvbuf, int recvcount, int source, int recvtag ) const +void MPI_CLASS::sendrecv( const char *sendbuf, int sendcount, int dest, int sendtag, + char *recvbuf, int recvcount, int source, int recvtag ) const { PROFILE_START( "sendrecv", profile_level ); - MPI_Sendrecv( sendbuf, sendcount, MPI_CHAR, dest, sendtag, - recvbuf, recvcount, MPI_CHAR, source, recvtag, - communicator, MPI_STATUS_IGNORE ); + MPI_Sendrecv( sendbuf, sendcount, MPI_CHAR, dest, sendtag, recvbuf, recvcount, MPI_CHAR, source, + recvtag, communicator, MPI_STATUS_IGNORE ); PROFILE_STOP( "sendrecv", profile_level ); } template<> -void MPI_CLASS::sendrecv( const int* sendbuf, int sendcount, int dest, int sendtag, - int* recvbuf, int recvcount, int source, int recvtag ) const +void MPI_CLASS::sendrecv( const int *sendbuf, int sendcount, int dest, int sendtag, + int *recvbuf, int recvcount, int source, int recvtag ) const { PROFILE_START( "sendrecv", profile_level ); - MPI_Sendrecv( sendbuf, sendcount, MPI_INT, dest, sendtag, - recvbuf, recvcount, MPI_INT, source, recvtag, - communicator, MPI_STATUS_IGNORE ); + MPI_Sendrecv( sendbuf, sendcount, MPI_INT, dest, sendtag, recvbuf, recvcount, MPI_INT, source, + recvtag, communicator, MPI_STATUS_IGNORE ); PROFILE_STOP( "sendrecv", profile_level ); } template<> -void MPI_CLASS::sendrecv( const float* sendbuf, int sendcount, int dest, int sendtag, - float* recvbuf, int recvcount, int source, int recvtag ) const +void MPI_CLASS::sendrecv( const float *sendbuf, int sendcount, int dest, int sendtag, + float *recvbuf, int recvcount, int source, int recvtag ) const { PROFILE_START( "sendrecv", profile_level ); - MPI_Sendrecv( sendbuf, sendcount, MPI_FLOAT, dest, sendtag, - recvbuf, recvcount, MPI_FLOAT, source, recvtag, - communicator, MPI_STATUS_IGNORE ); + MPI_Sendrecv( sendbuf, sendcount, MPI_FLOAT, dest, sendtag, recvbuf, recvcount, MPI_FLOAT, + source, recvtag, communicator, MPI_STATUS_IGNORE ); PROFILE_STOP( "sendrecv", profile_level ); } template<> -void MPI_CLASS::sendrecv( const double* sendbuf, int sendcount, int dest, int sendtag, - double* recvbuf, int recvcount, int source, int recvtag ) const +void MPI_CLASS::sendrecv( const double *sendbuf, int sendcount, int dest, int sendtag, + double *recvbuf, int recvcount, int source, int recvtag ) const { PROFILE_START( "sendrecv", profile_level ); - MPI_Sendrecv( sendbuf, sendcount, MPI_DOUBLE, dest, sendtag, - recvbuf, recvcount, MPI_DOUBLE, source, recvtag, - communicator, MPI_STATUS_IGNORE ); + MPI_Sendrecv( sendbuf, sendcount, MPI_DOUBLE, dest, sendtag, recvbuf, recvcount, MPI_DOUBLE, + source, recvtag, communicator, MPI_STATUS_IGNORE ); PROFILE_STOP( "sendrecv", profile_level ); } #endif @@ -3815,17 +3795,16 @@ MPI MPI::loadBalance( double local, std::vector work ) MPI_ASSERT( (int) work.size() == getSize() ); auto perf = allGather( local ); std::vector I( work.size() ); - for ( size_t i=0; i key( work.size() ); - for ( size_t i=0; i globalRanks() const; @@ -796,7 +802,8 @@ public: // Member functions * @brief This function sends and recieves data using a blocking call */ template - void sendrecv( const type *sendbuf, int sendcount, int dest, int sendtag, type *recvbuf, int recvcount, int source, int recvtag ) const; + void sendrecv( const type *sendbuf, int sendcount, int dest, int sendtag, type *recvbuf, + int recvcount, int source, int recvtag ) const; /*! @@ -1126,9 +1133,6 @@ private: // data members // The rank and size of the communicator int comm_rank, comm_size; - // The ranks of the comm in the global comm - mutable int *volatile d_ranks; - // Some attributes int d_maxTag; int *volatile d_currentTag; diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp index c3edc44f..4726bae6 100644 --- a/common/ScaLBL.cpp +++ b/common/ScaLBL.cpp @@ -1,5 +1,8 @@ #include "common/ScaLBL.h" +#include + + ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ //...................................................................................... Lock=false; // unlock the communicator @@ -306,10 +309,129 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr Dm){ } -ScaLBL_Communicator::~ScaLBL_Communicator(){ - // destrutor does nothing (bad idea) - // -- note that there needs to be a way to free memory allocated on the device!!! +ScaLBL_Communicator::~ScaLBL_Communicator() +{ + + ScaLBL_FreeDeviceMemory( sendbuf_x ); + ScaLBL_FreeDeviceMemory( sendbuf_X ); + ScaLBL_FreeDeviceMemory( sendbuf_y ); + ScaLBL_FreeDeviceMemory( sendbuf_Y ); + ScaLBL_FreeDeviceMemory( sendbuf_z ); + ScaLBL_FreeDeviceMemory( sendbuf_Z ); + ScaLBL_FreeDeviceMemory( sendbuf_xy ); + ScaLBL_FreeDeviceMemory( sendbuf_xY ); + ScaLBL_FreeDeviceMemory( sendbuf_Xy ); + ScaLBL_FreeDeviceMemory( sendbuf_XY ); + ScaLBL_FreeDeviceMemory( sendbuf_xz ); + ScaLBL_FreeDeviceMemory( sendbuf_xZ ); + ScaLBL_FreeDeviceMemory( sendbuf_Xz ); + ScaLBL_FreeDeviceMemory( sendbuf_XZ ); + ScaLBL_FreeDeviceMemory( sendbuf_yz ); + ScaLBL_FreeDeviceMemory( sendbuf_yZ ); + ScaLBL_FreeDeviceMemory( sendbuf_Yz ); + ScaLBL_FreeDeviceMemory( sendbuf_YZ ); + ScaLBL_FreeDeviceMemory( recvbuf_x ); + ScaLBL_FreeDeviceMemory( recvbuf_X ); + ScaLBL_FreeDeviceMemory( recvbuf_y ); + ScaLBL_FreeDeviceMemory( recvbuf_Y ); + ScaLBL_FreeDeviceMemory( recvbuf_z ); + ScaLBL_FreeDeviceMemory( recvbuf_Z ); + ScaLBL_FreeDeviceMemory( recvbuf_xy ); + ScaLBL_FreeDeviceMemory( recvbuf_xY ); + ScaLBL_FreeDeviceMemory( recvbuf_Xy ); + ScaLBL_FreeDeviceMemory( recvbuf_XY ); + ScaLBL_FreeDeviceMemory( recvbuf_xz ); + ScaLBL_FreeDeviceMemory( recvbuf_xZ ); + ScaLBL_FreeDeviceMemory( recvbuf_Xz ); + ScaLBL_FreeDeviceMemory( recvbuf_XZ ); + ScaLBL_FreeDeviceMemory( recvbuf_yz ); + ScaLBL_FreeDeviceMemory( recvbuf_yZ ); + ScaLBL_FreeDeviceMemory( recvbuf_Yz ); + ScaLBL_FreeDeviceMemory( recvbuf_YZ ); + ScaLBL_FreeDeviceMemory( dvcSendList_x ); + ScaLBL_FreeDeviceMemory( dvcSendList_X ); + ScaLBL_FreeDeviceMemory( dvcSendList_y ); + ScaLBL_FreeDeviceMemory( dvcSendList_Y ); + ScaLBL_FreeDeviceMemory( dvcSendList_z ); + ScaLBL_FreeDeviceMemory( dvcSendList_Z ); + ScaLBL_FreeDeviceMemory( dvcSendList_xy ); + ScaLBL_FreeDeviceMemory( dvcSendList_xY ); + ScaLBL_FreeDeviceMemory( dvcSendList_Xy ); + ScaLBL_FreeDeviceMemory( dvcSendList_XY ); + ScaLBL_FreeDeviceMemory( dvcSendList_xz ); + ScaLBL_FreeDeviceMemory( dvcSendList_xZ ); + ScaLBL_FreeDeviceMemory( dvcSendList_Xz ); + ScaLBL_FreeDeviceMemory( dvcSendList_XZ ); + ScaLBL_FreeDeviceMemory( dvcSendList_yz ); + ScaLBL_FreeDeviceMemory( dvcSendList_yZ ); + ScaLBL_FreeDeviceMemory( dvcSendList_Yz ); + ScaLBL_FreeDeviceMemory( dvcSendList_YZ ); + ScaLBL_FreeDeviceMemory( dvcRecvList_x ); + ScaLBL_FreeDeviceMemory( dvcRecvList_X ); + ScaLBL_FreeDeviceMemory( dvcRecvList_y ); + ScaLBL_FreeDeviceMemory( dvcRecvList_Y ); + ScaLBL_FreeDeviceMemory( dvcRecvList_z ); + ScaLBL_FreeDeviceMemory( dvcRecvList_Z ); + ScaLBL_FreeDeviceMemory( dvcRecvList_xy ); + ScaLBL_FreeDeviceMemory( dvcRecvList_xY ); + ScaLBL_FreeDeviceMemory( dvcRecvList_Xy ); + ScaLBL_FreeDeviceMemory( dvcRecvList_XY ); + ScaLBL_FreeDeviceMemory( dvcRecvList_xz ); + ScaLBL_FreeDeviceMemory( dvcRecvList_xZ ); + ScaLBL_FreeDeviceMemory( dvcRecvList_Xz ); + ScaLBL_FreeDeviceMemory( dvcRecvList_XZ ); + ScaLBL_FreeDeviceMemory( dvcRecvList_yz ); + ScaLBL_FreeDeviceMemory( dvcRecvList_yZ ); + ScaLBL_FreeDeviceMemory( dvcRecvList_Yz ); + ScaLBL_FreeDeviceMemory( dvcRecvList_YZ ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_x ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_X ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_y ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_Y ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_z ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_Z ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_xy ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_xY ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_Xy ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_XY ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_xz ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_xZ ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_Xz ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_XZ ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_yz ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_yZ ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_Yz ); + ScaLBL_FreeDeviceMemory( dvcRecvDist_YZ ); } +double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np){ + /* EACH MPI PROCESS GETS ITS OWN MEASUREMENT*/ + /* use MRT kernels to check performance without communication / synchronization */ + int TIMESTEPS=500; + double RLX_SETA=1.0; + double RLX_SETB = 8.f*(2.f-RLX_SETA)/(8.f-RLX_SETA); + double FX = 0.0; + double FY = 0.0; + double FZ = 0.0; + ScaLBL_D3Q19_Init(fq, Np); + //.......create and start timer............ + Barrier(); + auto t1 = std::chrono::system_clock::now(); + for (int t=0; t( t2 - t1 ).count(); + double cputime = 0.5*diff/TIMESTEPS; + // Performance obtained from each node + double MLUPS = double(Np)/cputime/1000000; + return MLUPS; + +} int ScaLBL_Communicator::LastExterior(){ return next; } @@ -364,7 +486,7 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis int idx,i,j,k,n; // Check that Map has size matching sub-domain - if (Map.size(0) != Nx) + if ( (int) Map.size(0) != Nx) ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Map array dimensions do not match! \n"); // Initialize Map @@ -394,9 +516,9 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis n = k*Nx*Ny+j*Nx+i; if (id[n] > 0){ // Counts for the six faces - if (i>0 && i<=width) Map(n)=idx++; - else if (j>0 && j<=width) Map(n)=idx++; - else if (k>0 && k<=width) Map(n)=idx++; + if (i>0 && i<=width) Map(n)=idx++; + else if (j>0 && j<=width) Map(n)=idx++; + else if (k>0 && k<=width) Map(n)=idx++; else if (i>Nx-width-2 && iNy-width-2 && jNz-width-2 && k0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + // q = 1 + //nread = neighborList[n+Np]; + Aq[nr2] = a1; + Bq[nr2] = b1; + // q=2 + //nread = neighborList[n]; + Aq[nr1] = a2; + Bq[nr1] = b2; + + //............................................... + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + // q = 3 + //nread = neighborList[n+3*Np]; + Aq[nr4] = a1; + Bq[nr4] = b1; + // q = 4 + //nread = neighborList[n+2*Np]; + Aq[nr3] = a2; + Bq[nr3] = b2; + + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + // q = 5 + //nread = neighborList[n+5*Np]; + Aq[nr6] = a1; + Bq[nr6] = b1; + // q = 6 + //nread = neighborList[n+4*Np]; + Aq[nr5] = a2; + Bq[nr5] = b2; + //............................................... + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Color(int *Map, double *Aq, double *Bq, double *Den, + double *Phi, double *ColorGrad, double *Vel, double rhoA, double rhoB, double beta, int start, int finish, int Np){ + + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double ux,uy,uz; + double phi; + // Instantiate mass transport distributions + // Stationary value - distribution 0 + for (int n=start; n0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + Aq[1*Np+n] = a1; + Bq[1*Np+n] = b1; + Aq[2*Np+n] = a2; + Bq[2*Np+n] = b2; + + //............................................... + // q = 2 + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + Aq[3*Np+n] = a1; + Bq[3*Np+n] = b1; + Aq[4*Np+n] = a2; + Bq[4*Np+n] = b2; + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + Aq[5*Np+n] = a1; + Bq[5*Np+n] = b1; + Aq[6*Np+n] = a2; + Bq[6*Np+n] = b2; + //............................................... + + } +} + + extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq, double *Den, double *Phi, int start, int finish, int Np){ - int idx,n,nread; + int idx,nread; double fq,nA,nB; for (int n=start; n +#include #define STOKES -extern "C" void ScaLBL_Color_Init(char *ID, double *Den, double *Phi, double das, double dbs, int Nx, int Ny, int Nz) -{ - int n,N; - - N = Nx*Ny*Nz; - - for (n=0; n 0){ - - // Retrieve the color gradient - nx = ColorGrad[n]; - ny = ColorGrad[N+n]; - nz = ColorGrad[2*N+n]; - //...........Normalize the Color Gradient................................. - C = sqrt(nx*nx+ny*ny+nz*nz); - if (C==0.0) C=1.0; - nx = nx/C; - ny = ny/C; - nz = nz/C; - //......No color gradient at z-boundary if pressure BC are set............. - // if (pBC && k==0) nx = ny = nz = 0.f; - // if (pBC && k==Nz-1) nx = ny = nz = 0.f; - //........................................................................ - // READ THE DISTRIBUTIONS - // (read from opposite array due to previous swap operation) - //........................................................................ - f2 = distodd[n]; - f4 = distodd[N+n]; - f6 = distodd[2*N+n]; - f8 = distodd[3*N+n]; - f10 = distodd[4*N+n]; - f12 = distodd[5*N+n]; - f14 = distodd[6*N+n]; - f16 = distodd[7*N+n]; - f18 = distodd[8*N+n]; - //........................................................................ - f0 = disteven[n]; - f1 = disteven[N+n]; - f3 = disteven[2*N+n]; - f5 = disteven[3*N+n]; - f7 = disteven[4*N+n]; - f9 = disteven[5*N+n]; - f11 = disteven[6*N+n]; - f13 = disteven[7*N+n]; - f15 = disteven[8*N+n]; - f17 = disteven[9*N+n]; - //........................................................................ - // PERFORM RELAXATION PROCESS - //........................................................................ - //....................compute the moments............................................... - rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; - m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17); - m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; - jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; - m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14; - jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; - m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18; - jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; - m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18; - m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18); - m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17); - m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13; - m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13; - m13 = f8+f7-f10-f9; - m14 = f16+f15-f18-f17; - m15 = f12+f11-f14-f13; - m16 = f7-f8+f9-f10-f11+f12-f13+f14; - m17 = -f7+f8+f9-f10+f15-f16+f17-f18; - m18 = f11-f12-f13+f14-f15+f16+f17-f18; - //..........Toelke, Fruediger et. al. 2006............... - if (C == 0.0) nx = ny = nz = 1.0; -#ifdef STOKES - m1 = m1 + rlx_setA*(- 11*rho -alpha*C - m1); - m2 = m2 + rlx_setA*(3*rho - m2); - m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); - m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); - m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); - m9 = m9 + rlx_setA*( 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); - m10 = m10 + rlx_setA*( - m10); - m11 = m11 + rlx_setA*( 0.5*alpha*C*(ny*ny-nz*nz)- m11); - m12 = m12 + rlx_setA*( - m12); - m13 = m13 + rlx_setA*( 0.5*alpha*C*nx*ny - m13); - m14 = m14 + rlx_setA*( 0.5*alpha*C*ny*nz - m14); - m15 = m15 + rlx_setA*( 0.5*alpha*C*nx*nz - m15); - m16 = m16 + rlx_setB*( - m16); - m17 = m17 + rlx_setB*( - m17); - m18 = m18 + rlx_setB*( - m18); -#else - m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) -alpha*C - m1); - m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho)- m2); - m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); - m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); - m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); - m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); - m10 = m10 + rlx_setA*( - m10); - m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); - m12 = m12 + rlx_setA*( - m12); - m13 = m13 + rlx_setA*( (jx*jy/rho) + 0.5*alpha*C*nx*ny - m13); - m14 = m14 + rlx_setA*( (jy*jz/rho) + 0.5*alpha*C*ny*nz - m14); - m15 = m15 + rlx_setA*( (jx*jz/rho) + 0.5*alpha*C*nx*nz - m15); - m16 = m16 + rlx_setB*( - m16); - m17 = m17 + rlx_setB*( - m17); - m18 = m18 + rlx_setB*( - m18); -#endif - //.................inverse transformation...................................................... - f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2; - f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(jx-m4)+0.0555555555555555555555555*(m9-m10); - f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(m4-jx)+0.0555555555555555555555555*(m9-m10); - f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12); - f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12); - f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11); - f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 - +0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11); - f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6) - +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 - +0.04166666666666666*m12+0.25*m13+0.125*(m16-m17); - f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6) - +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 - +0.04166666666666666*m12+0.25*m13+0.125*(m17-m16); - f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6) - +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 - +0.04166666666666666*m12-0.25*m13+0.125*(m16+m17); - f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4) - +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 - +0.04166666666666666*m12-0.25*m13-0.125*(m16+m17); - f11 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8) - +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 - -0.04166666666666666*m12+0.25*m15+0.125*(m18-m16); - f12 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8) - +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 - -0.04166666666666666*m12+0.25*m15+0.125*(m16-m18); - f13 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8) - +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 - -0.04166666666666666*m12-0.25*m15-0.125*(m16+m18); - f14 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4) - +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 - -0.04166666666666666*m12-0.25*m15+0.125*(m16+m18); - f15 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8) - -0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18); - f16 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8) - -0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17); - f17 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8) - -0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18); - f18 = 0.05263157894736842*rho+0.003341687552213868*m1 - +0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6) - -0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18); - //....................................................................................................... - // incorporate external force - f1 += 0.16666666*Fx; - f2 -= 0.16666666*Fx; - f3 += 0.16666666*Fy; - f4 -= 0.16666666*Fy; - f5 += 0.16666666*Fz; - f6 -= 0.16666666*Fz; - f7 += 0.08333333333*(Fx+Fy); - f8 -= 0.08333333333*(Fx+Fy); - f9 += 0.08333333333*(Fx-Fy); - f10 -= 0.08333333333*(Fx-Fy); - f11 += 0.08333333333*(Fx+Fz); - f12 -= 0.08333333333*(Fx+Fz); - f13 += 0.08333333333*(Fx-Fz); - f14 -= 0.08333333333*(Fx-Fz); - f15 += 0.08333333333*(Fy+Fz); - f16 -= 0.08333333333*(Fy+Fz); - f17 += 0.08333333333*(Fy-Fz); - f18 -= 0.08333333333*(Fy-Fz); - //*********** WRITE UPDATED VALUES TO MEMORY ****************** - // Write the updated distributions - //....EVEN..................................... - disteven[n] = f0; - disteven[N+n] = f2; - disteven[2*N+n] = f4; - disteven[3*N+n] = f6; - disteven[4*N+n] = f8; - disteven[5*N+n] = f10; - disteven[6*N+n] = f12; - disteven[7*N+n] = f14; - disteven[8*N+n] = f16; - disteven[9*N+n] = f18; - //....ODD...................................... - distodd[n] = f1; - distodd[N+n] = f3; - distodd[2*N+n] = f5; - distodd[3*N+n] = f7; - distodd[4*N+n] = f9; - distodd[5*N+n] = f11; - distodd[6*N+n] = f13; - distodd[7*N+n] = f15; - distodd[8*N+n] = f17; - - //...Store the Velocity.......................... - Velocity[n] = jx; - Velocity[N+n] = jy; - Velocity[2*N+n] = jz; - /* Velocity[3*n] = jx; - Velocity[3*n+1] = jy; - Velocity[3*n+2] = jz; - */ //...Store the Color Gradient.................... - // ColorGrad[3*n] = nx*C; - // ColorGrad[3*n+1] = ny*C; - // ColorGrad[3*n+2] = nz*C; - //............................................... - //*************************************************************** - } // check if n is in the solid - } // loop over n -} - -extern "C" void ScaLBL_D3Q19_ColorCollide( char *ID, double *disteven, double *distodd, double *phi, double *ColorGrad, - double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB, - double alpha, double beta, double Fx, double Fy, double Fz) -{ - - int i,j,k,n,nn,N; - // distributions - double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; - double f10,f11,f12,f13,f14,f15,f16,f17,f18; - - // non-conserved moments - double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; - // additional variables needed for computations - double rho,jx,jy,jz,C,nx,ny,nz; - - N = Nx*Ny*Nz; - char id; - - for (n=0; n 0){ - - //.......Back out the 3-D indices for node n.............. - k = n/(Nx*Ny); - j = (n-Nx*Ny*k)/Nx; - i = n-Nx*Ny*k-Nx*j; - //........................................................................ - //........Get 1-D index for this thread.................... - // n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x; - //........................................................................ - // COMPUTE THE COLOR GRADIENT - //........................................................................ - //.................Read Phase Indicator Values............................ - //........................................................................ - nn = n-1; // neighbor index (get convention) - if (i-1<0) nn += Nx; // periodic BC along the x-boundary - f1 = phi[nn]; // get neighbor for phi - 1 - //........................................................................ - nn = n+1; // neighbor index (get convention) - if (!(i+10)) delta=0; - a1 = na*(0.1111111111111111*(1+4.5*ux))+delta; - b1 = nb*(0.1111111111111111*(1+4.5*ux))-delta; - a2 = na*(0.1111111111111111*(1-4.5*ux))-delta; - b2 = nb*(0.1111111111111111*(1-4.5*ux))+delta; - - A_odd[n] = a1; - A_even[N+n] = a2; - B_odd[n] = b1; - B_even[N+n] = b2; - //............................................... - // q = 2 - // Cq = {0,1,0} - delta = beta*na*nb*nab*0.1111111111111111*ny; - if (!(na*nb*nab>0)) delta=0; - a1 = na*(0.1111111111111111*(1+4.5*uy))+delta; - b1 = nb*(0.1111111111111111*(1+4.5*uy))-delta; - a2 = na*(0.1111111111111111*(1-4.5*uy))-delta; - b2 = nb*(0.1111111111111111*(1-4.5*uy))+delta; - - A_odd[N+n] = a1; - A_even[2*N+n] = a2; - B_odd[N+n] = b1; - B_even[2*N+n] = b2; - //............................................... - // q = 4 - // Cq = {0,0,1} - delta = beta*na*nb*nab*0.1111111111111111*nz; - if (!(na*nb*nab>0)) delta=0; - a1 = na*(0.1111111111111111*(1+4.5*uz))+delta; - b1 = nb*(0.1111111111111111*(1+4.5*uz))-delta; - a2 = na*(0.1111111111111111*(1-4.5*uz))-delta; - b2 = nb*(0.1111111111111111*(1-4.5*uz))+delta; - - A_odd[2*N+n] = a1; - A_even[3*N+n] = a2; - B_odd[2*N+n] = b1; - B_even[3*N+n] = b2; - //............................................... - - /* // Construction and streaming for the components - for (idx=0; idx<3; idx++){ - //............................................... - // Distribution index - q = 2*idx; - // Associated discrete velocity - Cqx = D3Q7[idx][0]; - Cqy = D3Q7[idx][1]; - Cqz = D3Q7[idx][2]; - // Generate the Equilibrium Distribution - a1 = na*feq[q]; - b1 = nb*feq[q]; - a2 = na*feq[q+1]; - b2 = nb*feq[q+1]; - // Recolor the distributions - if (C > 0.0){ - sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz); - //if (idx > 2) sp = 0.7071067811865475*sp; - //delta = sp*min( min(a1,a2), min(b1,b2) ); - delta = na*nb/(na+nb)*0.1111111111111111*sp; - //if (a1>0 && b1>0){ - a1 += beta*delta; - a2 -= beta*delta; - b1 -= beta*delta; - b2 += beta*delta; - } - // Save the re-colored distributions - A_odd[N*idx+n] = a1; - A_even[N*(idx+1)+n] = a2; - B_odd[N*idx+n] = b1; - B_even[N*(idx+1)+n] = b2; - //............................................... - } - */ - } - } -} - -//************************************************************************* -extern "C" void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity, - double beta, int Nx, int Ny, int Nz, bool pBC, int S) -{ - char id; - - int idx; - int in,jn,kn,n,nn,N; - int q,Cqx,Cqy,Cqz; - // int sendLoc; - - double na,nb; // density values - double ux,uy,uz; // flow velocity - double nx,ny,nz,C; // color gradient components - double a1,a2,b1,b2; - double sp,delta; - double feq[6]; // equilibrium distributions - // Set of Discrete velocities for the D3Q19 Model - int D3Q7[3][3]={{1,0,0},{0,1,0},{0,0,1}}; - N = Nx*Ny*Nz; - - for (n=0; n 0 && na+nb > 0.0){ - //.......Back out the 3-D indices for node n.............. - int k = n/(Nx*Ny); - int j = (n-Nx*Ny*k)/Nx; - int i = n-Nx*Ny*k-Nx*j; - //.....Load the Color gradient......... - nx = ColorGrad[n]; - ny = ColorGrad[N+n]; - nz = ColorGrad[2*N+n]; - C = sqrt(nx*nx+ny*ny+nz*nz); - nx = nx/C; - ny = ny/C; - nz = nz/C; - //....Load the flow velocity........... - ux = Velocity[n]; - uy = Velocity[N+n]; - uz = Velocity[2*N+n]; - //....Instantiate the density distributions - // Generate Equilibrium Distributions and stream - // Stationary value - distribution 0 - // Den[2*n] += 0.3333333333333333*na; - // Den[2*n+1] += 0.3333333333333333*nb; - Den[2*n] += 0.3333333333333333*na; - Den[2*n+1] += 0.3333333333333333*nb; - // Non-Stationary equilibrium distributions - feq[0] = 0.1111111111111111*(1+3*ux); - feq[1] = 0.1111111111111111*(1-3*ux); - feq[2] = 0.1111111111111111*(1+3*uy); - feq[3] = 0.1111111111111111*(1-3*uy); - feq[4] = 0.1111111111111111*(1+3*uz); - feq[5] = 0.1111111111111111*(1-3*uz); - // Construction and streaming for the components - for (idx=0; idx<3; idx++){ - // Distribution index - q = 2*idx; - // Associated discrete velocity - Cqx = D3Q7[idx][0]; - Cqy = D3Q7[idx][1]; - Cqz = D3Q7[idx][2]; - // Generate the Equilibrium Distribution - a1 = na*feq[q]; - b1 = nb*feq[q]; - a2 = na*feq[q+1]; - b2 = nb*feq[q+1]; - // Recolor the distributions - if (C > 0.0){ - sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz); - //if (idx > 2) sp = 0.7071067811865475*sp; - //delta = sp*min( min(a1,a2), min(b1,b2) ); - delta = na*nb/(na+nb)*0.1111111111111111*sp; - //if (a1>0 && b1>0){ - a1 += beta*delta; - a2 -= beta*delta; - b1 -= beta*delta; - b2 += beta*delta; - } - - // .......Get the neighbor node.............. - //nn = n + Stride[idx]; - in = i+Cqx; - jn = j+Cqy; - kn = k+Cqz; - - // Adjust for periodic BC, if necessary - // if (in<0) in+= Nx; - // if (jn<0) jn+= Ny; - // if (kn<0) kn+= Nz; - // if (!(in 0 ){ - // Get the density value (Streaming already performed) - Na = Den[n]; - Nb = Den[N+n]; - Phi[n] = (Na-Nb)/(Na+Nb); - } + for (n=0; n0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; - - Aq[1*Np+n] = a1; - Bq[1*Np+n] = b1; - Aq[2*Np+n] = a2; - Bq[2*Np+n] = b2; - - //............................................... - // q = 2 - // Cq = {0,1,0} - delta = beta*nA*nB*nAB*0.1111111111111111*ny; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; - - Aq[3*Np+n] = a1; - Bq[3*Np+n] = b1; - Aq[4*Np+n] = a2; - Bq[4*Np+n] = b2; - //............................................... - // q = 4 - // Cq = {0,0,1} - delta = beta*nA*nB*nAB*0.1111111111111111*nz; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; - - Aq[5*Np+n] = a1; - Bq[5*Np+n] = b1; - Aq[6*Np+n] = a2; - Bq[6*Np+n] = b2; - //............................................... - - } } -//extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, double *dist, double *Aq, double *Bq, double *Den, double *Velocity, -// double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, -// double Fx, double Fy, double Fz, int start, int finish, int Np){ -extern "C" void ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, - double *Phi, double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, - double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ - - int n,nn,ijk,nread; - int nr1,nr2,nr3,nr4,nr5,nr6; - int nr7,nr8,nr9,nr10; - int nr11,nr12,nr13,nr14; - //int nr15,nr16,nr17,nr18; - double fq; - // conserved momemnts - double rho,jx,jy,jz; - // non-conserved moments - double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; - double m3,m5,m7; - double nA,nB; // number density - double a1,b1,a2,b2,nAB,delta; - double C,nx,ny,nz; //color gradient magnitude and direction - double ux,uy,uz; - double phi,tau,rho0,rlx_setA,rlx_setB; - - const double mrt_V1=0.05263157894736842; - const double mrt_V2=0.012531328320802; - const double mrt_V3=0.04761904761904762; - const double mrt_V4=0.004594820384294068; - const double mrt_V5=0.01587301587301587; - const double mrt_V6=0.0555555555555555555555555; - const double mrt_V7=0.02777777777777778; - const double mrt_V8=0.08333333333333333; - const double mrt_V9=0.003341687552213868; - const double mrt_V10=0.003968253968253968; - const double mrt_V11=0.01388888888888889; - const double mrt_V12=0.04166666666666666; - - for (int n=start; n even part of dist) - //fq = dist[nread]; // reading the f2 data into register fq - nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) - fq = dist[nr2]; // reading the f2 data into register fq - rho += fq; - m1 -= 11.0*(fq); - m2 -= 4.0*(fq); - jx -= fq; - m4 += 4.0*(fq); - m9 += 2.0*(fq); - m10 -= 4.0*(fq); - - // q=3 - //nread = neighborList[n+2*Np]; // neighbor 4 - //fq = dist[nread]; - nr3 = neighborList[n+2*Np]; // neighbor 4 - fq = dist[nr3]; - rho += fq; - m1 -= 11.0*fq; - m2 -= 4.0*fq; - jy = fq; - m6 = -4.0*fq; - m9 -= fq; - m10 += 2.0*fq; - m11 = fq; - m12 = -2.0*fq; - - // q = 4 - //nread = neighborList[n+3*Np]; // neighbor 3 - //fq = dist[nread]; - nr4 = neighborList[n+3*Np]; // neighbor 3 - fq = dist[nr4]; - rho+= fq; - m1 -= 11.0*fq; - m2 -= 4.0*fq; - jy -= fq; - m6 += 4.0*fq; - m9 -= fq; - m10 += 2.0*fq; - m11 += fq; - m12 -= 2.0*fq; - - // q=5 - //nread = neighborList[n+4*Np]; - //fq = dist[nread]; - nr5 = neighborList[n+4*Np]; - fq = dist[nr5]; - rho += fq; - m1 -= 11.0*fq; - m2 -= 4.0*fq; - jz = fq; - m8 = -4.0*fq; - m9 -= fq; - m10 += 2.0*fq; - m11 -= fq; - m12 += 2.0*fq; - - - // q = 6 - //nread = neighborList[n+5*Np]; - //fq = dist[nread]; - nr6 = neighborList[n+5*Np]; - fq = dist[nr6]; - rho+= fq; - m1 -= 11.0*fq; - m2 -= 4.0*fq; - jz -= fq; - m8 += 4.0*fq; - m9 -= fq; - m10 += 2.0*fq; - m11 -= fq; - m12 += 2.0*fq; - - // q=7 - //nread = neighborList[n+6*Np]; - //fq = dist[nread]; - nr7 = neighborList[n+6*Np]; - fq = dist[nr7]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx += fq; - m4 += fq; - jy += fq; - m6 += fq; - m9 += fq; - m10 += fq; - m11 += fq; - m12 += fq; - m13 = fq; - m16 = fq; - m17 = -fq; - - // q = 8 - //nread = neighborList[n+7*Np]; - //fq = dist[nread]; - nr8 = neighborList[n+7*Np]; - fq = dist[nr8]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx -= fq; - m4 -= fq; - jy -= fq; - m6 -= fq; - m9 += fq; - m10 += fq; - m11 += fq; - m12 += fq; - m13 += fq; - m16 -= fq; - m17 += fq; - - // q=9 - //nread = neighborList[n+8*Np]; - //fq = dist[nread]; - nr9 = neighborList[n+8*Np]; - fq = dist[nr9]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx += fq; - m4 += fq; - jy -= fq; - m6 -= fq; - m9 += fq; - m10 += fq; - m11 += fq; - m12 += fq; - m13 -= fq; - m16 += fq; - m17 += fq; - - // q = 10 - //nread = neighborList[n+9*Np]; - //fq = dist[nread]; - nr10 = neighborList[n+9*Np]; - fq = dist[nr10]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx -= fq; - m4 -= fq; - jy += fq; - m6 += fq; - m9 += fq; - m10 += fq; - m11 += fq; - m12 += fq; - m13 -= fq; - m16 -= fq; - m17 -= fq; - - // q=11 - //nread = neighborList[n+10*Np]; - //fq = dist[nread]; - nr11 = neighborList[n+10*Np]; - fq = dist[nr11]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx += fq; - m4 += fq; - jz += fq; - m8 += fq; - m9 += fq; - m10 += fq; - m11 -= fq; - m12 -= fq; - m15 = fq; - m16 -= fq; - m18 = fq; - - // q=12 - //nread = neighborList[n+11*Np]; - //fq = dist[nread]; - nr12 = neighborList[n+11*Np]; - fq = dist[nr12]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx -= fq; - m4 -= fq; - jz -= fq; - m8 -= fq; - m9 += fq; - m10 += fq; - m11 -= fq; - m12 -= fq; - m15 += fq; - m16 += fq; - m18 -= fq; - - // q=13 - //nread = neighborList[n+12*Np]; - //fq = dist[nread]; - nr13 = neighborList[n+12*Np]; - fq = dist[nr13]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx += fq; - m4 += fq; - jz -= fq; - m8 -= fq; - m9 += fq; - m10 += fq; - m11 -= fq; - m12 -= fq; - m15 -= fq; - m16 -= fq; - m18 -= fq; - - // q=14 - //nread = neighborList[n+13*Np]; - //fq = dist[nread]; - nr14 = neighborList[n+13*Np]; - fq = dist[nr14]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jx -= fq; - m4 -= fq; - jz += fq; - m8 += fq; - m9 += fq; - m10 += fq; - m11 -= fq; - m12 -= fq; - m15 -= fq; - m16 += fq; - m18 += fq; - - // q=15 - nread = neighborList[n+14*Np]; - fq = dist[nread]; - //fq = dist[17*Np+n]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jy += fq; - m6 += fq; - jz += fq; - m8 += fq; - m9 -= 2.0*fq; - m10 -= 2.0*fq; - m14 = fq; - m17 += fq; - m18 -= fq; - - // q=16 - nread = neighborList[n+15*Np]; - fq = dist[nread]; - //fq = dist[8*Np+n]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jy -= fq; - m6 -= fq; - jz -= fq; - m8 -= fq; - m9 -= 2.0*fq; - m10 -= 2.0*fq; - m14 += fq; - m17 -= fq; - m18 += fq; - - // q=17 - //fq = dist[18*Np+n]; - nread = neighborList[n+16*Np]; - fq = dist[nread]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jy += fq; - m6 += fq; - jz -= fq; - m8 -= fq; - m9 -= 2.0*fq; - m10 -= 2.0*fq; - m14 -= fq; - m17 += fq; - m18 += fq; - - // q=18 - nread = neighborList[n+17*Np]; - fq = dist[nread]; - //fq = dist[9*Np+n]; - rho += fq; - m1 += 8.0*fq; - m2 += fq; - jy -= fq; - m6 -= fq; - jz += fq; - m8 += fq; - m9 -= 2.0*fq; - m10 -= 2.0*fq; - m14 -= fq; - m17 -= fq; - m18 -= fq; - - //........................................................................ - //..............carry out relaxation process.............................. - //..........Toelke, Fruediger et. al. 2006................................ - if (C == 0.0) nx = ny = nz = 0.0; - m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -19*alpha*C - m1); - m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2); - m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); - m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); - m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); - m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); - m10 = m10 + rlx_setA*( - m10); - m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); - m12 = m12 + rlx_setA*( - m12); - m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13); - m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14); - m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15); - m16 = m16 + rlx_setB*( - m16); - m17 = m17 + rlx_setB*( - m17); - m18 = m18 + rlx_setB*( - m18); - //.................inverse transformation...................................................... - - // q=0 - fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; - dist[n] = fq; - - // q = 1 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx; - //nread = neighborList[n+Np]; - dist[nr2] = fq; - - // q=2 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; - //nread = neighborList[n]; - dist[nr1] = fq; - - // q = 3 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; - //nread = neighborList[n+3*Np]; - dist[nr4] = fq; - - // q = 4 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; - //nread = neighborList[n+2*Np]; - dist[nr3] = fq; - - // q = 5 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; - //nread = neighborList[n+5*Np]; - dist[nr6] = fq; - - // q = 6 - fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; - //nread = neighborList[n+4*Np]; - dist[nr5] = fq; - - // q = 7 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ - mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); - //nread = neighborList[n+7*Np]; - dist[nr8] = fq; - - // q = 8 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 - +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); - //nread = neighborList[n+6*Np]; - dist[nr7] = fq; - - // q = 9 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ - mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); - //nread = neighborList[n+9*Np]; - dist[nr10] = fq; - - // q = 10 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ - mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); - //nread = neighborList[n+8*Np]; - dist[nr9] = fq; - - // q = 11 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) - +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 - -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); - //nread = neighborList[n+11*Np]; - dist[nr12] = fq; - - // q = 12 - fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ - mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); - //nread = neighborList[n+10*Np]; - dist[nr11]= fq; - - // q = 13 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) - +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 - -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); - //nread = neighborList[n+13*Np]; - dist[nr14] = fq; - - // q= 14 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) - +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 - -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); - //nread = neighborList[n+12*Np]; - dist[nr13] = fq; - - - // q = 15 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) - -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); - nread = neighborList[n+15*Np]; - dist[nread] = fq; - - // q = 16 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) - -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); - nread = neighborList[n+14*Np]; - dist[nread] = fq; - - - // q = 17 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) - -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); - nread = neighborList[n+17*Np]; - dist[nread] = fq; - - // q = 18 - fq = mrt_V1*rho+mrt_V9*m1 - +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) - -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); - nread = neighborList[n+16*Np]; - dist[nread] = fq; - - // write the velocity - ux = jx / rho0; - uy = jy / rho0; - uz = jz / rho0; - Vel[n] = ux; - Vel[Np+n] = uy; - Vel[2*Np+n] = uz; - - // Instantiate mass transport distributions - // Stationary value - distribution 0 - nAB = 1.0/(nA+nB); - Aq[n] = 0.3333333333333333*nA; - Bq[n] = 0.3333333333333333*nB; - - //............................................... - // q = 0,2,4 - // Cq = {1,0,0}, {0,1,0}, {0,0,1} - delta = beta*nA*nB*nAB*0.1111111111111111*nx; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; - - // q = 1 - //nread = neighborList[n+Np]; - Aq[nr2] = a1; - Bq[nr2] = b1; - // q=2 - //nread = neighborList[n]; - Aq[nr1] = a2; - Bq[nr1] = b2; - - //............................................... - // Cq = {0,1,0} - delta = beta*nA*nB*nAB*0.1111111111111111*ny; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; - - // q = 3 - //nread = neighborList[n+3*Np]; - Aq[nr4] = a1; - Bq[nr4] = b1; - // q = 4 - //nread = neighborList[n+2*Np]; - Aq[nr3] = a2; - Bq[nr3] = b2; - - //............................................... - // q = 4 - // Cq = {0,0,1} - delta = beta*nA*nB*nAB*0.1111111111111111*nz; - if (!(nA*nB*nAB>0)) delta=0; - a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; - b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; - a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; - b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; - - // q = 5 - //nread = neighborList[n+5*Np]; - Aq[nr6] = a1; - Bq[nr6] = b1; - // q = 6 - //nread = neighborList[n+4*Np]; - Aq[nr5] = a2; - Bq[nr5] = b2; - //............................................... - } -} - -extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq, - double *Den, double *Phi, int start, int finish, int Np){ - - int idx,n,nread; - double fq,nA,nB; - - for (int n=start; n 1.f){ - nA = 1.0; nB = 0.f; - } - else if (phi < -1.f){ - nB = 1.0; nA = 0.f; - } - else{ - nA=0.5*(phi+1.f); - nB=0.5*(1.f-phi); - } - Den[idx] = nA; - Den[Np+idx] = nB; + if (phi > 1.f) phi = 1.0; + if (phi < -1.f) phi = -1.0; + Den[idx] = rhoA + 0.5*(1.0-phi)*(rhoB-rhoA); + + //compute unit normal of color gradient + nx = ColorGrad[idx+0*Np]; + ny = ColorGrad[idx+1*Np]; + nz = ColorGrad[idx+2*Np]; + cg_mag = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag_temp = cg_mag; + if (cg_mag==0.0) ColorMag_temp=1.0; + nx = nx/ColorMag_temp; + ny = ny/ColorMag_temp; + nz = nz/ColorMag_temp; + + theta = M*cs2_inv*(1-4.0*phi*phi)/W; + theta = 0; // try more diffusive initial condition + + hq[0*Np+idx]=0.3333333333333333*(phi); + hq[1*Np+idx]=0.1111111111111111*(phi+theta*nx); + hq[2*Np+idx]=0.1111111111111111*(phi-theta*nx); + hq[3*Np+idx]=0.1111111111111111*(phi+theta*ny); + hq[4*Np+idx]=0.1111111111111111*(phi-theta*ny); + hq[5*Np+idx]=0.1111111111111111*(phi+theta*nz); + hq[6*Np+idx]=0.1111111111111111*(phi-theta*nz); - Aq[idx]=0.3333333333333333*nA; - Aq[Np+idx]=0.1111111111111111*nA; - Aq[2*Np+idx]=0.1111111111111111*nA; - Aq[3*Np+idx]=0.1111111111111111*nA; - Aq[4*Np+idx]=0.1111111111111111*nA; - Aq[5*Np+idx]=0.1111111111111111*nA; - Aq[6*Np+idx]=0.1111111111111111*nA; - - Bq[idx]=0.3333333333333333*nB; - Bq[Np+idx]=0.1111111111111111*nB; - Bq[2*Np+idx]=0.1111111111111111*nB; - Bq[3*Np+idx]=0.1111111111111111*nB; - Bq[4*Np+idx]=0.1111111111111111*nB; - Bq[5*Np+idx]=0.1111111111111111*nB; - Bq[6*Np+idx]=0.1111111111111111*nB; } } -extern "C" void ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Dest){ - int n; double value; - for (n=0; n 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(chem*nx+Fx)/3.0); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(chem*ny+Fy)/3.0); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(chem*nz+Fz)/3.0); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17) + +0.5*(rhoA-rhoB)/2.0/3.0*(ux*nx+uy*ny+uz*nz); + + //compute equilibrium distributions + feq0 = 0.3333333333333333*p - 0.25*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz) - 0.5*(-(nx*ux) - ny*uy - nz*uz)* + (-0.08333333333333333*(rhoA - rhoB)*(ux*ux + uy*uy + uz*uz) + chem*(0.3333333333333333 - 0.5*(ux*ux + uy*uy + uz*uz))); + feq1 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx - nx*ux - ny*uy - nz*uz)* + (2*chem*ux*ux - 0.3333333333333333*((-rhoA + rhoB)*ux*ux + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz))); + feq2 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx + nx*ux + ny*uy + nz*uz)* + (-2.*chem*ux*ux + 0.1111111111111111*(-4.*chem + rhoB*(-2.*ux - 1.*ux*ux - 1.*uy*uy - 1.*uz*uz) + + rhoA*(2.*ux + ux*ux + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*ux*ux + + chem*(4.*ux + 2.*ux*ux + 2.*uy*uy + 2.*uz*uz))); + feq3 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny - nx*ux - ny*uy - nz*uz)* + (2*chem*uy*uy - 0.3333333333333333*((-rhoA + rhoB)*uy*uy + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz))); + feq4 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uy*uy + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 2.*uy - 1.*uy*uy - 1.*uz*uz) + + rhoA*(ux*ux + 2.*uy + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uy*uy + + chem*(2.*ux*ux + 4.*uy + 2.*uy*uy + 2.*uz*uz))); + feq5 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)) - 0.0625*(nx*ux + ny*uy + nz*(-1. + uz))* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + (-2. + uz)*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(-4. + 2.*uz)))); + feq6 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))) - 0.0625*(nz + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (-2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + uz*(2. + uz))) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(4. + 2.*uz)))); + feq7 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx + ny - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) + 0.3333333333333333*((rhoA - rhoB)*(ux + uy)*(ux + uy) - 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq8 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(-(nx*(1 + ux)) - ny*(1 + uy) - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uy)*(ux + uy)) + + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq9 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq10 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(ny - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq11 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nx + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uz)*(ux + uz) + 0.3333333333333333*((rhoA - rhoB)*(ux + uz)*(ux + uz) - 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq12 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*(1 + ux)) - ny*uy - nz*(1 + uz))* + (2*chem*(ux + uz)*(ux + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uz)*(ux + uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq13 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(nx - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq14 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq15 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(ny + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(uy + uz)*(uy + uz) + 0.3333333333333333*((rhoA - rhoB)*(uy + uz)*(uy + uz) - 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))); + feq16 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*ux) - ny*(1 + uy) - nz*(1 + uz))* + (2*chem*(uy + uz)*(uy + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy + uz)*(uy + uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))); + feq17 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(ny - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))); + feq18 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 - (m0-feq0)/tau + 0.25*(2*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + + (mgx*ux + mgy*uy + mgz*uz)*(2*chem*(ux*ux + uy*uy + uz*uz) + + 0.3333333333333333*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*uz)))); + + // q = 1 + dist[nr2] = m1 - (m1-feq1)/tau + 0.125*(2*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx*(-1 + ux) + mgy*uy + mgz*uz)*(-2*chem*(ux*ux) + + 0.3333333333333333*((-rhoA + rhoB)*(ux*ux) + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz)))); + + // q=2 + dist[nr1] = m2 - (m2-feq2)/tau + 0.125*(2*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(ux*ux) + + 0.3333333333333333*((-rhoA + rhoB)*(ux*ux) + 2*chem*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*uz)))); + + // q = 3 + dist[nr4] = m3 - (m3-feq3)/tau + 0.125*(2*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*uz)*(-2*chem*(uy*uy) + + 0.3333333333333333*((-rhoA + rhoB)*(uy*uy) + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 4 + dist[nr3] = m4 - (m4-feq4)/tau + 0.125*(2*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(uy*uy) + + 0.3333333333333333*((-rhoA + rhoB)*(uy*uy) + 2*chem*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 5 + dist[nr6] = m5 - (m5-feq5)/tau + 0.125*(2*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*uy + mgz*(-1 + uz))*(-2*chem*(uz*uz) + + 0.3333333333333333*((-rhoA + rhoB)*(uz*uz) + 2*chem*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 6 + dist[nr5] = m6 - (m6-feq6)/tau + 0.125*(2*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(uz*uz) + + 0.3333333333333333*((-rhoA + rhoB)*(uz*uz) + 2*chem*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 7 + dist[nr8] = m7 - (m7-feq7)/tau + 0.0625*(-2*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*(-1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((ux + uy)*(ux + uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uy)*(ux + uy))) + 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 8 + dist[nr7] = m8 - (m8-feq8)/tau + 0.0625*(2*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgx + mgy + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((ux + uy)*(ux + uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uy)*(ux + uy))) + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 9 + dist[nr10] = m9 - (m9-feq9)/tau + 0.0625*(2*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*((ux - uy)*(ux - uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uy)*(ux - uy))) + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 10 + dist[nr9] = m10 - (m10-feq10)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*(1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((ux - uy)*(ux - uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uy)*(ux - uy))) + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 11 + dist[nr12] = m11 - (m11-feq11)/tau + 0.0625*(-2*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*(-1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*((ux + uz)*(ux + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uz)*(ux + uz))) + 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 12 + dist[nr11] = m12 - (m12-feq12)/tau + 0.0625*(2*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + (mgx + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((ux + uz)*(ux + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uz)*(ux + uz))) + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 13 + dist[nr14] = m13 - (m13-feq13)/tau + 0.0625*(2*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*((ux - uz)*(ux - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uz)*(ux - uz))) + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q= 14 + dist[nr13] = m14 - (m14-feq14)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*(1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*((ux - uz)*(ux - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uz)*(ux - uz))) + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 15 + dist[nr16] = m15 - (m15-feq15)/tau + 0.0625*(-2*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*(-1 + uz))* + (-2*chem*((uy + uz)*(uy + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy + uz)*(uy + uz))) + 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)))); + + // q = 16 + dist[nr15] = m16 - (m16-feq16)/tau + 0.0625*(2*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + (mgy + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((uy + uz)*(uy + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy + uz)*(uy + uz))) + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 17 + dist[nr18] = m17 - (m17-feq17)/tau + 0.0625*(2*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((uy - uz)*(uy - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy - uz)*(uy - uz))) + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 18 + dist[nr17] = m18 - (m18-feq18)/tau + 0.0625*(2*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*(1 + uy) + mgz*(-1 + uz))* + (-2*chem*((uy - uz)*(uy - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy - uz)*(uy - uz))) + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)))); + //----------------------------------------------------------------------------------------------------------------------------------------// + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + //Update chemical potential on device + mu_phi[n] = chem; + //Update color gradient on device + ColorGrad[0*Np+n] = nx; + ColorGrad[1*Np+n] = ny; + ColorGrad[2*Np+n] = nz; + + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np){ + + int nn,nn2x,ijk; + //int nr1,nr2,nr3,nr4,nr5,nr6,nr7,nr8,nr9,nr10,nr11,nr12,nr13,nr14,nr15,nr16,nr17,nr18; + double ux,uy,uz;//fluid velocity + double p;//pressure + double chem;//chemical potential + double phi; //phase field + double rho0;//fluid density + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + double mm1,mm2,mm4,mm6,mm8,mm9,mm10,mm11,mm12,mm13,mm14,mm15,mm16,mm17,mm18; + double mm3,mm5,mm7; + double feq0,feq1,feq2,feq3,feq4,feq5,feq6,feq7,feq8,feq9,feq10,feq11,feq12,feq13,feq14,feq15,feq16,feq17,feq18; + double nx,ny,nz;//normal color gradient + double mgx,mgy,mgz;//mixed gradient reaching secondary neighbor + + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + //double h0,h1,h2,h3,h4,h5,h6;//distributions for LB phase field + double tau;//position dependent LB relaxation time for fluid + //double C,theta; + //double M = 2.0/9.0*(tauM-0.5);//diffusivity (or mobility) for the phase field D3Q7 + + for (int n=start; n 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(Fx)/3.0); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(Fy)/3.0); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(Fz)/3.0); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 + 0.5*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + (m0 - 0.3333333333333333*p + 0.25*(Fx*ux + Fy*uy + Fz*uz)* + (-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz))/ + tau; + + // q = 1 + dist[nr2] = m1 + 0.25*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + (m1 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q=2 + dist[nr1] = m2 + 0.25*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + (m2 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(ux*ux) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q = 3 + dist[nr4] = m3 + 0.25*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + (m3 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 4 + dist[nr3] = m4 + 0.25*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + (m4 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uy*uy) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 5 + dist[nr6] = m5 + 0.25*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + (m5 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 6 + dist[nr5] = m6 + 0.25*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + (m6 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 7 + dist[nr8] = m7 - 0.125*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m7 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 8 + dist[nr7] = m8 + 0.125*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))\ + - (m8 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 9 + dist[nr10] = m9 + 0.125*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + - (m9 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 10 + dist[nr9] = m10 + 0.125*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m10 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 11 + dist[nr12] = m11 - 0.125*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m11 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 12 + dist[nr11] = m12 + 0.125*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m12 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 13 + dist[nr14] = m13 + 0.125*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m13 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q= 14 + dist[nr13] = m14 + 0.125*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m14 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 15 + dist[nr16] = m15 - 0.125*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m15 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 16 + dist[nr15] = m16 + 0.125*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))\ + - (m16 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 17 + dist[nr18] = m17 + 0.125*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))\ + - (m17 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 18 + dist[nr17] = m18 + 0.125*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m18 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + //----------------------------------------------------------------------------------------------------------------------------------------// + + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ + + double ux,uy,uz;//fluid velocity + double p;//pressure + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + + for (int n=start; n 10Np => odd part of dist) - f1 = dist[nr1]; // reading the f1 data into register fq + if ( n 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq - nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) - f2 = dist[nr2]; // reading the f2 data into register fq + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq - // q=3 - nr3 = neighborList[n+2*Np]; // neighbor 4 - f3 = dist[nr3]; + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; - // q = 4 - nr4 = neighborList[n+3*Np]; // neighbor 3 - f4 = dist[nr4]; + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; - // q=5 - nr5 = neighborList[n+4*Np]; - f5 = dist[nr5]; + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; - // q = 6 - nr6 = neighborList[n+5*Np]; - f6 = dist[nr6]; - - // q=7 - nr7 = neighborList[n+6*Np]; - f7 = dist[nr7]; + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; - // q = 8 - nr8 = neighborList[n+7*Np]; - f8 = dist[nr8]; + // q=7 + nr7 = neighborList[n+6*Np]; + f7 = dist[nr7]; - // q=9 - nr9 = neighborList[n+8*Np]; - f9 = dist[nr9]; + // q = 8 + nr8 = neighborList[n+7*Np]; + f8 = dist[nr8]; - // q = 10 - nr10 = neighborList[n+9*Np]; - f10 = dist[nr10]; + // q=9 + nr9 = neighborList[n+8*Np]; + f9 = dist[nr9]; - // q=11 - nr11 = neighborList[n+10*Np]; - f11 = dist[nr11]; + // q = 10 + nr10 = neighborList[n+9*Np]; + f10 = dist[nr10]; - // q=12 - nr12 = neighborList[n+11*Np]; - f12 = dist[nr12]; + // q=11 + nr11 = neighborList[n+10*Np]; + f11 = dist[nr11]; - // q=13 - nr13 = neighborList[n+12*Np]; - f13 = dist[nr13]; + // q=12 + nr12 = neighborList[n+11*Np]; + f12 = dist[nr12]; - // q=14 - nr14 = neighborList[n+13*Np]; - f14 = dist[nr14]; + // q=13 + nr13 = neighborList[n+12*Np]; + f13 = dist[nr13]; - // q=15 - nr15 = neighborList[n+14*Np]; - f15 = dist[nr15]; + // q=14 + nr14 = neighborList[n+13*Np]; + f14 = dist[nr14]; - // q=16 - nr16 = neighborList[n+15*Np]; - f16 = dist[nr16]; + // q=15 + nr15 = neighborList[n+14*Np]; + f15 = dist[nr15]; - // q=17 - //fq = dist[18*Np+n]; - nr17 = neighborList[n+16*Np]; - f17 = dist[nr17]; + // q=16 + nr16 = neighborList[n+15*Np]; + f16 = dist[nr16]; - // q=18 - nr18 = neighborList[n+17*Np]; - f18 = dist[nr18]; + // q=17 + //fq = dist[18*Np+n]; + nr17 = neighborList[n+16*Np]; + f17 = dist[nr17]; - rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; - ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; - uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; - uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; - uu = 1.5*(ux*ux+uy*uy+uz*uz); + // q=18 + nr18 = neighborList[n+17*Np]; + f18 = dist[nr18]; - // q=0 - dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu); + rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + ux = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; + uy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; + uz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; + uu = 1.5*(ux*ux+uy*uy+uz*uz); - // q = 1 - dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx; + // q=0 + dist[n] = f0*(1.0-rlx)+rlx*0.3333333333333333*(1.0-uu); - // q=2 - dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx; + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*(rho + 3.0*ux + 4.5*ux*ux - uu) + 0.16666666*Fx; - // q = 3 - dist[nr4] = f3*(1.0-rlx) + - rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy; + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*(rho - 3.0*ux + 4.5*ux*ux - uu)- 0.16666666*Fx; - // q = 4 - dist[nr3] = f4*(1.0-rlx) + - rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy; + // q = 3 + dist[nr4] = f3*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uy + 4.5*uy*uy - uu) + 0.16666666*Fy; - // q = 5 - dist[nr6] = f5*(1.0-rlx) + - rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz; + // q = 4 + dist[nr3] = f4*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uy + 4.5*uy*uy - uu)- 0.16666666*Fy; - // q = 6 - dist[nr5] = f6*(1.0-rlx) + - rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz; + // q = 5 + dist[nr6] = f5*(1.0-rlx) + + rlx*0.05555555555555555*(rho + 3.0*uz + 4.5*uz*uz - uu) + 0.16666666*Fz; - // q = 7 - dist[nr8] = f7*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy); + // q = 6 + dist[nr5] = f6*(1.0-rlx) + + rlx*0.05555555555555555*(rho - 3.0*uz + 4.5*uz*uz - uu) - 0.16666666*Fz; - // q = 8 - dist[nr7] = f8*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy); + // q = 7 + dist[nr8] = f7*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) + 0.08333333333*(Fx+Fy); - // q = 9 - dist[nr10] = f9*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy); + // q = 8 + dist[nr7] = f8*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux+uy) + 4.5*(ux+uy)*(ux+uy) - uu) - 0.08333333333*(Fx+Fy); - // q = 10 - dist[nr9] = f10*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy); + // q = 9 + dist[nr10] = f9*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) + 0.08333333333*(Fx-Fy); - // q = 11 - dist[nr12] = f11*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz); + // q = 10 + dist[nr9] = f10*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux-uy) + 4.5*(ux-uy)*(ux-uy) - uu) - 0.08333333333*(Fx-Fy); - // q = 12 - dist[nr11] = f12*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz); + // q = 11 + dist[nr12] = f11*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) + 0.08333333333*(Fx+Fz); - // q = 13 - dist[nr14] = f13*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz); + // q = 12 + dist[nr11] = f12*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux+uz) + 4.5*(ux+uz)*(ux+uz) - uu) - 0.08333333333*(Fx+Fz); - // q= 14 - dist[nr13] = f14*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz); + // q = 13 + dist[nr14] = f13*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu) + 0.08333333333*(Fx-Fz); - // q = 15 - dist[nr16] = f15*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz); + // q= 14 + dist[nr13] = f14*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(ux-uz) + 4.5*(ux-uz)*(ux-uz) - uu)- 0.08333333333*(Fx-Fz); - // q = 16 - dist[nr15] = f16*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz); + // q = 15 + dist[nr16] = f15*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) + 0.08333333333*(Fy+Fz); - // q = 17 - dist[nr18] = f17*(1.0-rlx) + - rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz); + // q = 16 + dist[nr15] = f16*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(uy+uz) + 4.5*(uy+uz)*(uy+uz) - uu) - 0.08333333333*(Fy+Fz); - // q = 18 - dist[nr17] = f18*(1.0-rlx) + - rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz); + // q = 17 + dist[nr18] = f17*(1.0-rlx) + + rlx*0.02777777777777778*(rho + 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) + 0.08333333333*(Fy-Fz); + + // q = 18 + dist[nr17] = f18*(1.0-rlx) + + rlx*0.02777777777777778*(rho - 3.0*(uy-uz) + 4.5*(uy-uz)*(uy-uz) - uu) - 0.08333333333*(Fy-Fz); } } } extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ - - dvc_ScaLBL_D3Q19_AAeven_BGK<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz); - cudaError_t err = cudaGetLastError(); + dvc_ScaLBL_D3Q19_AAeven_BGK<<>>(dist,start,finish,Np,rlx,Fx,Fy,Fz); + + cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_AAeven_BGK: %s \n",cudaGetErrorString(err)); } } extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){ - dvc_ScaLBL_D3Q19_AAodd_BGK<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz); + dvc_ScaLBL_D3Q19_AAodd_BGK<<>>(neighborList,dist,start,finish,Np,rlx,Fx,Fy,Fz); - cudaError_t err = cudaGetLastError(); + cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_AAeven_BGK: %s \n",cudaGetErrorString(err)); } diff --git a/cuda/FreeLee.cu b/cuda/FreeLee.cu new file mode 100644 index 00000000..45bbf65b --- /dev/null +++ b/cuda/FreeLee.cu @@ -0,0 +1,2122 @@ +#include +#include +#include + +#define STOKES + +#define NBLOCKS 1024 +#define NTHREADS 256 + +__global__ void dvc_ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) +{ + int n; + double p = 1.0;//NOTE: take initial pressure p=1.0 + double chem; + double cg_x,cg_y,cg_z; + + //for (n=0; n 1.f) phi = 1.0; + if (phi < -1.f) phi = -1.0; + Den[idx] = rhoA + 0.5*(1.0-phi)*(rhoB-rhoA); + + //compute unit normal of color gradient + nx = ColorGrad[idx+0*Np]; + ny = ColorGrad[idx+1*Np]; + nz = ColorGrad[idx+2*Np]; + cg_mag = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag_temp = cg_mag; + if (cg_mag==0.0) ColorMag_temp=1.0; + nx = nx/ColorMag_temp; + ny = ny/ColorMag_temp; + nz = nz/ColorMag_temp; + + theta = M*cs2_inv*(1-4.0*phi*phi)/W; + + hq[0*Np+idx]=0.3333333333333333*(phi); + hq[1*Np+idx]=0.1111111111111111*(phi+theta*nx); + hq[2*Np+idx]=0.1111111111111111*(phi-theta*nx); + hq[3*Np+idx]=0.1111111111111111*(phi+theta*ny); + hq[4*Np+idx]=0.1111111111111111*(phi-theta*ny); + hq[5*Np+idx]=0.1111111111111111*(phi+theta*nz); + hq[6*Np+idx]=0.1111111111111111*(phi-theta*nz); + + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, + double rhoA, double rhoB, int start, int finish, int Np){ + + int idx,n,nread; + double fq,phi; + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(chem*nx+Fx)/3.0); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(chem*ny+Fy)/3.0); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(chem*nz+Fz)/3.0); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17) + +0.5*(rhoA-rhoB)/2.0/3.0*(ux*nx+uy*ny+uz*nz); + + //compute equilibrium distributions + feq0 = 0.3333333333333333*p - 0.25*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz) - 0.5*(-(nx*ux) - ny*uy - nz*uz)* + (-0.08333333333333333*(rhoA - rhoB)*(ux*ux + uy*uy + uz*uz) + chem*(0.3333333333333333 - 0.5*(ux*ux + uy*uy + uz*uz))); + feq1 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx - nx*ux - ny*uy - nz*uz)* + (2*chem*ux*ux - 0.3333333333333333*((-rhoA + rhoB)*ux*ux + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz))); + feq2 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx + nx*ux + ny*uy + nz*uz)* + (-2.*chem*ux*ux + 0.1111111111111111*(-4.*chem + rhoB*(-2.*ux - 1.*ux*ux - 1.*uy*uy - 1.*uz*uz) + + rhoA*(2.*ux + ux*ux + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*ux*ux + + chem*(4.*ux + 2.*ux*ux + 2.*uy*uy + 2.*uz*uz))); + feq3 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny - nx*ux - ny*uy - nz*uz)* + (2*chem*uy*uy - 0.3333333333333333*((-rhoA + rhoB)*uy*uy + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz))); + feq4 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uy*uy + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 2.*uy - 1.*uy*uy - 1.*uz*uz) + + rhoA*(ux*ux + 2.*uy + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uy*uy + + chem*(2.*ux*ux + 4.*uy + 2.*uy*uy + 2.*uz*uz))); + feq5 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)) - 0.0625*(nx*ux + ny*uy + nz*(-1. + uz))* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + (-2. + uz)*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(-4. + 2.*uz)))); + feq6 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))) - 0.0625*(nz + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (-2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + uz*(2. + uz))) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(4. + 2.*uz)))); + feq7 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx + ny - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) + 0.3333333333333333*((rhoA - rhoB)*(ux + uy)*(ux + uy) - 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq8 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(-(nx*(1 + ux)) - ny*(1 + uy) - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uy)*(ux + uy)) + + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq9 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq10 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(ny - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq11 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nx + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uz)*(ux + uz) + 0.3333333333333333*((rhoA - rhoB)*(ux + uz)*(ux + uz) - 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq12 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*(1 + ux)) - ny*uy - nz*(1 + uz))* + (2*chem*(ux + uz)*(ux + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uz)*(ux + uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq13 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(nx - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq14 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq15 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(ny + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(uy + uz)*(uy + uz) + 0.3333333333333333*((rhoA - rhoB)*(uy + uz)*(uy + uz) - 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))); + feq16 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*ux) - ny*(1 + uy) - nz*(1 + uz))* + (2*chem*(uy + uz)*(uy + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy + uz)*(uy + uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))); + feq17 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(ny - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))); + feq18 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 - (m0-feq0)/tau + 0.25*(2*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + + (mgx*ux + mgy*uy + mgz*uz)*(2*chem*(ux*ux + uy*uy + uz*uz) + + 0.3333333333333333*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*uz)))); + + // q = 1 + dist[nr2] = m1 - (m1-feq1)/tau + 0.125*(2*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx*(-1 + ux) + mgy*uy + mgz*uz)*(-2*chem*(ux*ux) + + 0.3333333333333333*((-rhoA + rhoB)*(ux*ux) + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz)))); + + // q=2 + dist[nr1] = m2 - (m2-feq2)/tau + 0.125*(2*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(ux*ux) + + 0.3333333333333333*((-rhoA + rhoB)*(ux*ux) + 2*chem*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*uz)))); + + // q = 3 + dist[nr4] = m3 - (m3-feq3)/tau + 0.125*(2*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*uz)*(-2*chem*(uy*uy) + + 0.3333333333333333*((-rhoA + rhoB)*(uy*uy) + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 4 + dist[nr3] = m4 - (m4-feq4)/tau + 0.125*(2*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(uy*uy) + + 0.3333333333333333*((-rhoA + rhoB)*(uy*uy) + 2*chem*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 5 + dist[nr6] = m5 - (m5-feq5)/tau + 0.125*(2*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*uy + mgz*(-1 + uz))*(-2*chem*(uz*uz) + + 0.3333333333333333*((-rhoA + rhoB)*(uz*uz) + 2*chem*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 6 + dist[nr5] = m6 - (m6-feq6)/tau + 0.125*(2*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(uz*uz) + + 0.3333333333333333*((-rhoA + rhoB)*(uz*uz) + 2*chem*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 7 + dist[nr8] = m7 - (m7-feq7)/tau + 0.0625*(-2*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*(-1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((ux + uy)*(ux + uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uy)*(ux + uy))) + 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 8 + dist[nr7] = m8 - (m8-feq8)/tau + 0.0625*(2*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgx + mgy + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((ux + uy)*(ux + uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uy)*(ux + uy))) + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 9 + dist[nr10] = m9 - (m9-feq9)/tau + 0.0625*(2*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*((ux - uy)*(ux - uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uy)*(ux - uy))) + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 10 + dist[nr9] = m10 - (m10-feq10)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*(1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((ux - uy)*(ux - uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uy)*(ux - uy))) + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 11 + dist[nr12] = m11 - (m11-feq11)/tau + 0.0625*(-2*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*(-1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*((ux + uz)*(ux + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uz)*(ux + uz))) + 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 12 + dist[nr11] = m12 - (m12-feq12)/tau + 0.0625*(2*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + (mgx + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((ux + uz)*(ux + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uz)*(ux + uz))) + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 13 + dist[nr14] = m13 - (m13-feq13)/tau + 0.0625*(2*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*((ux - uz)*(ux - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uz)*(ux - uz))) + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q= 14 + dist[nr13] = m14 - (m14-feq14)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*(1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*((ux - uz)*(ux - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uz)*(ux - uz))) + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 15 + dist[nr16] = m15 - (m15-feq15)/tau + 0.0625*(-2*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*(-1 + uz))* + (-2*chem*((uy + uz)*(uy + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy + uz)*(uy + uz))) + 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)))); + + // q = 16 + dist[nr15] = m16 - (m16-feq16)/tau + 0.0625*(2*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + (mgy + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((uy + uz)*(uy + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy + uz)*(uy + uz))) + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 17 + dist[nr18] = m17 - (m17-feq17)/tau + 0.0625*(2*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((uy - uz)*(uy - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy - uz)*(uy - uz))) + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 18 + dist[nr17] = m18 - (m18-feq18)/tau + 0.0625*(2*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*(1 + uy) + mgz*(-1 + uz))* + (-2*chem*((uy - uz)*(uy - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy - uz)*(uy - uz))) + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)))); + //----------------------------------------------------------------------------------------------------------------------------------------// + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + //Update chemical potential on device + mu_phi[n] = chem; + //Update color gradient on device + ColorGrad[0*Np+n] = nx; + ColorGrad[1*Np+n] = ny; + ColorGrad[2*Np+n] = nz; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np){ + + int n,nn,nn2x,ijk; + double ux,uy,uz;//fluid velocity + double p;//pressure + double chem;//chemical potential + double phi; //phase field + double rho0;//fluid density + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + double mm1,mm2,mm4,mm6,mm8,mm9,mm10,mm11,mm12,mm13,mm14,mm15,mm16,mm17,mm18; + double mm3,mm5,mm7; + double feq0,feq1,feq2,feq3,feq4,feq5,feq6,feq7,feq8,feq9,feq10,feq11,feq12,feq13,feq14,feq15,feq16,feq17,feq18; + double nx,ny,nz;//normal color gradient + double mgx,mgy,mgz;//mixed gradient reaching secondary neighbor + + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + //double h0,h1,h2,h3,h4,h5,h6;//distributions for LB phase field + double tau;//position dependent LB relaxation time for fluid + //double C,theta; + //double M = 2.0/9.0*(tauM-0.5);//diffusivity (or mobility) for the phase field D3Q7 + + // for (int n=start; n 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(Fx)/3.0); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(Fy)/3.0); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(Fz)/3.0); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 + 0.5*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + (m0 - 0.3333333333333333*p + 0.25*(Fx*ux + Fy*uy + Fz*uz)* + (-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz))/ + tau; + + // q = 1 + dist[nr2] = m1 + 0.25*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + (m1 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q=2 + dist[nr1] = m2 + 0.25*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + (m2 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(ux*ux) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q = 3 + dist[nr4] = m3 + 0.25*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + (m3 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 4 + dist[nr3] = m4 + 0.25*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + (m4 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uy*uy) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 5 + dist[nr6] = m5 + 0.25*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + (m5 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 6 + dist[nr5] = m6 + 0.25*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + (m6 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 7 + dist[nr8] = m7 - 0.125*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m7 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 8 + dist[nr7] = m8 + 0.125*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))\ + - (m8 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 9 + dist[nr10] = m9 + 0.125*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + - (m9 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 10 + dist[nr9] = m10 + 0.125*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m10 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 11 + dist[nr12] = m11 - 0.125*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m11 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 12 + dist[nr11] = m12 + 0.125*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m12 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 13 + dist[nr14] = m13 + 0.125*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m13 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q= 14 + dist[nr13] = m14 + 0.125*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m14 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 15 + dist[nr16] = m15 - 0.125*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m15 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 16 + dist[nr15] = m16 + 0.125*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))\ + - (m16 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 17 + dist[nr18] = m17 + 0.125*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))\ + - (m17 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 18 + dist[nr17] = m18 + 0.125*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m18 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + //----------------------------------------------------------------------------------------------------------------------------------------// + + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ + + int n; + double ux,uy,uz;//fluid velocity + double p;//pressure + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + + // for (int n=start; n>>( gqbar, mu_phi, ColorGrad, Fx, Fy, Fz, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init: %s \n",cudaGetErrorString(err)); + } +} + + +extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double Fx, double Fy, double Fz, int Np){ + + dvc_ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init<<>>( gqbar, Fx, Fy, Fz, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, double *Den, double *hq, double *ColorGrad, + double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){ + + dvc_ScaLBL_FreeLeeModel_PhaseField_Init<<>>(Map, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_FreeLeeModel_PhaseField_Init: %s \n",cudaGetErrorString(err)); + } + + +} +extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, + double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np) +{ + cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField, cudaFuncCachePreferL1); + dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField<<>>(neighborList, Map, hq, Den, Phi, ColorGrad, Vel, + rhoA, rhoB, tauM, W, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAodd_FreeLee_PhaseField: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, + double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){ + + cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField, cudaFuncCachePreferL1); + dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField<<>>( Map, hq, Den, Phi, ColorGrad, Vel, rhoA, rhoB, tauM, W, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAeven_FreeLee_PhaseField: %s \n",cudaGetErrorString(err)); + } +} + + +extern "C" void ScaLBL_D3Q7_ComputePhaseField(int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, int start, int finish, int Np){ + + cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q7_ComputePhaseField, cudaFuncCachePreferL1); + dvc_ScaLBL_D3Q7_ComputePhaseField<<>>( Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_ComputePhaseField: %s \n",cudaGetErrorString(err)); + } +} + + +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np){ + + cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel, cudaFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel<<>>(neighborList, Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad, + rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_FreeLeeModel: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np){ + + cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel, cudaFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel<<>>(Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad, + rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_FreeLeeModel: %s \n",cudaGetErrorString(err)); + } + +} + +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ + + cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK, cudaFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK<<>>(neighborList, dist, Vel, Pressure, + tau, rho0, Fx, Fy, Fz, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK: %s \n",cudaGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ + + cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK, cudaFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK<<>>(dist, Vel, Pressure, + tau, rho0, Fx, Fy, Fz, start, finish, Np); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK: %s \n",cudaGetErrorString(err)); + } +} + + +extern "C" void ScaLBL_D3Q9_MGTest(int *Map, double *Phi,double *ColorGrad,int strideY, int strideZ, int start, int finish, int Np){ +} \ No newline at end of file diff --git a/cuda/MixedGradient.cu b/cuda/MixedGradient.cu new file mode 100644 index 00000000..556e34ef --- /dev/null +++ b/cuda/MixedGradient.cu @@ -0,0 +1,76 @@ +/* Implement Mixed Gradient (Lee et al. JCP 2016)*/ +#include +#include +#include + +#define NBLOCKS 560 +#define NTHREADS 128 + +__global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz) +{ + static int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}, + {1,1,0},{-1,-1,0},{1,-1,0},{-1,1,0}, + {1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1}, + {0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}}; + + int i,j,k,n,N,idx; + int np,np2,nm; // neighbors + double v,vp,vp2,vm; // values at neighbors + double grad; + N = Nx*Ny*Nz; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(Map, Phi, Gradient, start, finish, Np, Nx, Ny, Nz); + + cudaError_t err = cudaGetLastError(); + if (cudaSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_MixedGradient: %s \n",cudaGetErrorString(err)); + } + cudaProfilerStop(); +} + diff --git a/example/Piston/input.db b/example/Piston/input.db index fab67cc5..5a9ba030 100644 --- a/example/Piston/input.db +++ b/example/Piston/input.db @@ -35,4 +35,6 @@ Analysis { load_balance = "independent" // Load balance method to use: "none", "default", "independent" } +Visualization { +} \ No newline at end of file diff --git a/example/Plates/input.db b/example/Plates/input.db index 2e74a43f..2da2ea3e 100644 --- a/example/Plates/input.db +++ b/example/Plates/input.db @@ -7,10 +7,10 @@ Color { beta = 0.95; F = 0, 0, 0 Restart = false - timestepMax = 3000 + timestepMax = 500 flux = 0.0 ComponentLabels = -2, -1 - ComponentAffinity = -1.0, -0.5; + ComponentAffinity = 1.0, 1.0; } Domain { @@ -26,13 +26,14 @@ Domain { } Analysis { - blobid_interval = 1000 // Frequency to perform blob identification - analysis_interval = 1000 // Frequency to perform analysis - restart_interval = 1000 // Frequency to write restart data - visualization_interval = 1000 // Frequency to write visualization data + analysis_interval = 100 // Frequency to perform analysis + visualization_interval = 500 + subphase_analysis_interval = 100 + restart_interval = 100000 restart_file = "Restart" // Filename to use for restart file (will append rank) N_threads = 4 // Number of threads to use load_balance = "independent" // Load balance method to use: "none", "default", "independent" } - +Visualization { +} \ No newline at end of file diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index f63cf035..9e613960 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -1,6 +1,7 @@ SET( HIP_SEPERABLE_COMPILATION ON ) -SET_SOURCE_FILES_PROPERTIES( BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.hip PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 ) -HIP_ADD_LIBRARY( lbpm-hip BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.cu SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} ) +FILE( GLOB HIP_SOURCES "*.cu" ) +SET_SOURCE_FILES_PROPERTIES( ${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 ) +HIP_ADD_LIBRARY( lbpm-hip ${HIP_SOURCES} SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} ) #TARGET_LINK_LIBRARIES( lbpm-hip /opt/rocm-3.3.0/lib/libhip_hcc.so ) #TARGET_LINK_LIBRARIES( lbpm-wia lbpm-hip ) #ADD_DEPENDENCIES( lbpm-hip copy-include ) diff --git a/hip/D3Q19.cu b/hip/D3Q19.cu index 13d4ab75..fe06820b 100644 --- a/hip/D3Q19.cu +++ b/hip/D3Q19.cu @@ -89,9 +89,25 @@ __global__ void sum_kernel_block(double *sum, double *input, int n) __inline__ __device__ double warpReduceSum(double val) { +#if 0 for (int offset = warpSize/2; offset > 0; offset /= 2) val += __shfl_down_sync(0xFFFFFFFF, val, offset, 32); return val; +#else + short int id = threadIdx.x % warpSize; + __shared__ double tmp[64]; + tmp[id] = val; + __syncthreads(); + if ( warpSize == 64) { + tmp[id] += tmp[id+32]; __syncthreads(); + } + tmp[id] += tmp[id+16]; __syncthreads(); + tmp[id] += tmp[id+8]; __syncthreads(); + tmp[id] += tmp[id+4]; __syncthreads(); + tmp[id] += tmp[id+2]; __syncthreads(); + tmp[id] += tmp[id+1]; __syncthreads(); + return tmp[0]; +#endif } __inline__ __device__ @@ -1730,6 +1746,44 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist, } } +__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){ + int idx, n; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + double f5 = 0.111111111111111111111111 - dist[6*Np+n]; + double f11 = 0.05555555555555555555556 - dist[12*Np+n]; + double f14 = 0.05555555555555555555556 - dist[13*Np+n]; + double f15 = 0.05555555555555555555556 - dist[16*Np+n]; + double f18 = 0.05555555555555555555556 - dist[17*Np+n]; + + dist[6*Np+n] = f5; + dist[12*Np+n] = f11; + dist[13*Np+n] = f14; + dist[16*Np+n] = f15; + dist[17*Np+n] = f18; + } +} + +__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){ + int idx, n; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + double f6 = 0.111111111111111111111111 - dist[5*Np+n]; + double f12 = 0.05555555555555555555556 - dist[11*Np+n]; + double f13 = 0.05555555555555555555556 - dist[14*Np+n] ; + double f16 = 0.05555555555555555555556 - dist[15*Np+n]; + double f17 = 0.05555555555555555555556 - dist[18*Np+n]; + + dist[5*Np+n] = f6; + dist[11*Np+n] = f12; + dist[14*Np+n] = f13; + dist[15*Np+n] = f16; + dist[18*Np+n] = f17; + } +} + __global__ void dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *d_neighborList, int *list, double *dist, double din, int count, int Np) { int idx, n; @@ -2605,6 +2659,24 @@ extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, doub } +extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_Reflection_BC_z<<>>(list, dist, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("HIP error in ScaLBL_D3Q19_Reflection_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q19_Reflection_BC_Z<<>>(list, dist, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("HIP error in ScaLBL_D3Q19_Reflection_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + extern "C" double deviceReduce(double *in, double* out, int N) { int threads = 512; int blocks = min((N + threads - 1) / threads, 1024); diff --git a/hip/D3Q7BC.cu b/hip/D3Q7BC.cu new file mode 100644 index 00000000..9413a68a --- /dev/null +++ b/hip/D3Q7BC.cu @@ -0,0 +1,536 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 560 +#define NTHREADS 128 + +__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count) +{ + + int idx; + int iq,ib; + double value_b,value_q; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + iq = BounceBackDist_list[idx]; + ib = BounceBackSolid_list[idx]; + value_b = BoundaryValue[ib];//get boundary value from a solid site + value_q = dist[iq]; + dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice + } +} + +__global__ void dvc_ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count) +{ + + int idx; + int iq,ib; + double value_b,value_q; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + iq = BounceBackDist_list[idx]; + ib = BounceBackSolid_list[idx]; + value_b = BoundaryValue[ib];//get boundary value from a solid site + value_q = dist[iq]; + dist[iq] = value_q + value_b; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f6 = dist[5*Np+n]; + //................................................... + f5 = Vin - (f0+f1+f2+f3+f4+f6); + dist[6*Np+n] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f5 = dist[6*Np+n]; + //................................................... + f6 = Vout - (f0+f1+f2+f3+f4+f5); + dist[5*Np+n] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np) +{ + int idx, n; + int nread,nr5; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+5*Np]; + f6 = dist[nread]; + + // Unknown distributions + nr5 = d_neighborList[n+4*Np]; + f5 = Vin - (f0+f1+f2+f3+f4+f6); + dist[nr5] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np) +{ + int idx, n; + int nread,nr6; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+4*Np]; + f5 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + // unknown distributions + nr6 = d_neighborList[n+5*Np]; + f6 = Vout - (f0+f1+f2+f3+f4+f5); + dist[nr6] = f6; + } +} + +__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count) +{ + int idx,n,nm; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + nm = Map[n]; + Psi[nm] = Vin; + } +} + + +__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count) +{ + int idx,n,nm; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + nm = Map[n]; + Psi[nm] = Vout; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f6 = dist[5*Np+n]; + //................................................... + f5 = Cin - (f0+f1+f2+f3+f4+f6); + dist[6*Np+n] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np) +{ + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f5 = dist[6*Np+n]; + //................................................... + f6 = Cout - (f0+f1+f2+f3+f4+f5); + dist[5*Np+n] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np) +{ + int idx, n; + int nread,nr5; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+5*Np]; + f6 = dist[nread]; + + // Unknown distributions + nr5 = d_neighborList[n+4*Np]; + f5 = Cin - (f0+f1+f2+f3+f4+f6); + dist[nr5] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np) +{ + int idx, n; + int nread,nr6; + double f0,f1,f2,f3,f4,f5,f6; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+4*Np]; + f5 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + // unknown distributions + nr6 = d_neighborList[n+5*Np]; + f6 = Cout - (f0+f1+f2+f3+f4+f5); + dist[nr6] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f6 = dist[5*Np+n]; + fsum_partial = f0+f1+f2+f3+f4+f6; + uz = VelocityZ[n]; + //................................................... + f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau); + dist[6*Np+n] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx,n; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + f1 = dist[2*Np+n]; + f2 = dist[1*Np+n]; + f3 = dist[4*Np+n]; + f4 = dist[3*Np+n]; + f5 = dist[6*Np+n]; + fsum_partial = f0+f1+f2+f3+f4+f5; + uz = VelocityZ[n]; + //................................................... + f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau); + dist[5*Np+n] = f6; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx, n; + int nread,nr5; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + nread = d_neighborList[n+5*Np]; + f6 = dist[nread]; + + fsum_partial = f0+f1+f2+f3+f4+f6; + uz = VelocityZ[n]; + //................................................... + f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau); + + // Unknown distributions + nr5 = d_neighborList[n+4*Np]; + dist[nr5] = f5; + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np) +{ + //NOTE: FluxIn is the inward flux + int idx, n; + int nread,nr6; + double f0,f1,f2,f3,f4,f5,f6; + double fsum_partial; + double uz; + idx = blockIdx.x*blockDim.x + threadIdx.x; + if (idx < count){ + n = list[idx]; + f0 = dist[n]; + + nread = d_neighborList[n]; + f1 = dist[nread]; + + nread = d_neighborList[n+2*Np]; + f3 = dist[nread]; + + nread = d_neighborList[n+4*Np]; + f5 = dist[nread]; + + nread = d_neighborList[n+Np]; + f2 = dist[nread]; + + nread = d_neighborList[n+3*Np]; + f4 = dist[nread]; + + fsum_partial = f0+f1+f2+f3+f4+f5; + uz = VelocityZ[n]; + //................................................... + f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau); + + // unknown distributions + nr6 = d_neighborList[n+5*Np]; + dist[nr6] = f6; + } +} +//************************************************************************* + +extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Solid_Dirichlet_D3Q7<<>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_Solid_Dirichlet_D3Q7 (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Solid_Neumann_D3Q7<<>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_Solid_Neumann_D3Q7 (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<>>(list, dist, Vin, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<>>(list, dist, Vout, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<>>(d_neighborList, list, dist, Vin, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<>>(d_neighborList, list, dist, Vout, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Poisson_D3Q7_BC_z<<>>(list, Map, Psi, Vin, count); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_Poisson_D3Q7_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){ + int GRID = count / 512 + 1; + dvc_ScaLBL_Poisson_D3Q7_BC_Z<<>>(list, Map, Psi, Vout, count); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_Poisson_D3Q7_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<>>(list, dist, Cin, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<>>(list, dist, Cout, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<>>(d_neighborList, list, dist, Cin, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<>>(d_neighborList, list, dist, Cout, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z<<>>(list, dist, FluxIn, tau, VelocityZ, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z<<>>(list, dist, FluxIn, tau, VelocityZ, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z<<>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){ + int GRID = count / 512 + 1; + dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z<<>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err)); + } +} diff --git a/hip/FreeLee.cu b/hip/FreeLee.cu new file mode 100644 index 00000000..09bc8689 --- /dev/null +++ b/hip/FreeLee.cu @@ -0,0 +1,2122 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define STOKES + +#define NBLOCKS 1024 +#define NTHREADS 256 + +__global__ void dvc_ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(double *gqbar, double *mu_phi, double *ColorGrad, double Fx, double Fy, double Fz, int Np) +{ + int n; + double p = 1.0;//NOTE: take initial pressure p=1.0 + double chem; + double cg_x,cg_y,cg_z; + + //for (n=0; n 1.f) phi = 1.0; + if (phi < -1.f) phi = -1.0; + Den[idx] = rhoA + 0.5*(1.0-phi)*(rhoB-rhoA); + + //compute unit normal of color gradient + nx = ColorGrad[idx+0*Np]; + ny = ColorGrad[idx+1*Np]; + nz = ColorGrad[idx+2*Np]; + cg_mag = sqrt(nx*nx+ny*ny+nz*nz); + double ColorMag_temp = cg_mag; + if (cg_mag==0.0) ColorMag_temp=1.0; + nx = nx/ColorMag_temp; + ny = ny/ColorMag_temp; + nz = nz/ColorMag_temp; + + theta = M*cs2_inv*(1-4.0*phi*phi)/W; + + hq[0*Np+idx]=0.3333333333333333*(phi); + hq[1*Np+idx]=0.1111111111111111*(phi+theta*nx); + hq[2*Np+idx]=0.1111111111111111*(phi-theta*nx); + hq[3*Np+idx]=0.1111111111111111*(phi+theta*ny); + hq[4*Np+idx]=0.1111111111111111*(phi-theta*ny); + hq[5*Np+idx]=0.1111111111111111*(phi+theta*nz); + hq[6*Np+idx]=0.1111111111111111*(phi-theta*nz); + + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, + double rhoA, double rhoB, int start, int finish, int Np){ + + int idx,n,nread; + double fq,phi; + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(chem*nx+Fx)/3.0); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(chem*ny+Fy)/3.0); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(chem*nz+Fz)/3.0); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17) + +0.5*(rhoA-rhoB)/2.0/3.0*(ux*nx+uy*ny+uz*nz); + + //compute equilibrium distributions + feq0 = 0.3333333333333333*p - 0.25*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz) - 0.5*(-(nx*ux) - ny*uy - nz*uz)* + (-0.08333333333333333*(rhoA - rhoB)*(ux*ux + uy*uy + uz*uz) + chem*(0.3333333333333333 - 0.5*(ux*ux + uy*uy + uz*uz))); + feq1 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx - nx*ux - ny*uy - nz*uz)* + (2*chem*ux*ux - 0.3333333333333333*((-rhoA + rhoB)*ux*ux + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz))); + feq2 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-ux*ux + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)) - 0.0625*(nx + nx*ux + ny*uy + nz*uz)* + (-2.*chem*ux*ux + 0.1111111111111111*(-4.*chem + rhoB*(-2.*ux - 1.*ux*ux - 1.*uy*uy - 1.*uz*uz) + + rhoA*(2.*ux + ux*ux + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*ux*ux + + chem*(4.*ux + 2.*ux*ux + 2.*uy*uy + 2.*uz*uz))); + feq3 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny - nx*ux - ny*uy - nz*uz)* + (2*chem*uy*uy - 0.3333333333333333*((-rhoA + rhoB)*uy*uy + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz))); + feq4 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uy*uy + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.0625*(ny + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uy*uy + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 2.*uy - 1.*uy*uy - 1.*uz*uz) + + rhoA*(ux*ux + 2.*uy + uy*uy + uz*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uy*uy + + chem*(2.*ux*ux + 4.*uy + 2.*uy*uy + 2.*uz*uz))); + feq5 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)) - 0.0625*(nx*ux + ny*uy + nz*(-1. + uz))* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + (-2. + uz)*uz)) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(-4. + 2.*uz)))); + feq6 = 0.05555555555555555*p - 0.08333333333333333*rho0*(-uz*uz + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))) - 0.0625*(nz + nx*ux + ny*uy + nz*uz)* + (-2.*chem*uz*uz + 0.1111111111111111*(-4.*chem + rhoB*(-1.*ux*ux - 1.*uy*uy + (-2. - 1.*uz)*uz) + + rhoA*(ux*ux + uy*uy + uz*(2. + uz))) + 0.3333333333333333*((-1.*rhoA + rhoB)*uz*uz + + chem*(2.*ux*ux + 2.*uy*uy + uz*(4. + 2.*uz)))); + feq7 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx + ny - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) + 0.3333333333333333*((rhoA - rhoB)*(ux + uy)*(ux + uy) - 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq8 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(-(nx*(1 + ux)) - ny*(1 + uy) - nz*uz)* + (2*chem*(ux + uy)*(ux + uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uy)*(ux + uy)) + + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq9 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)) - 0.03125*(nx - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz))); + feq10 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uy - 1.*uy*uy + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)) - 0.03125*(ny - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uy)*(ux - uy) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uy)*(ux - uy)) + + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))); + feq11 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nx + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(ux + uz)*(ux + uz) + 0.3333333333333333*((rhoA - rhoB)*(ux + uz)*(ux + uz) - 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq12 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux - 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*(1 + ux)) - ny*uy - nz*(1 + uz))* + (2*chem*(ux + uz)*(ux + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux + uz)*(ux + uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq13 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))) - 0.03125*(nx - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))); + feq14 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*ux*ux + 2.*ux*uz - 1.*uz*uz + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*(1 + ux) - ny*uy - nz*uz)* + (2*chem*(ux - uz)*(ux - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(ux - uz)*(ux - uz)) + + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))); + feq15 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(ny + nz - nx*ux - ny*uy - nz*uz)* + (2*chem*(uy + uz)*(uy + uz) + 0.3333333333333333*((rhoA - rhoB)*(uy + uz)*(uy + uz) - 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))); + feq16 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy - 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(-(nx*ux) - ny*(1 + uy) - nz*(1 + uz))* + (2*chem*(uy + uz)*(uy + uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy + uz)*(uy + uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))); + feq17 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) - + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))) - 0.03125*(ny - nx*ux - ny*uy - nz*(1 + uz))* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))); + feq18 = 0.027777777777777776*p - 0.041666666666666664*rho0* + (-(uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) - + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))*(-0.2222222222222222 - 1.*uy*uy + 2.*uy*uz - 1.*uz*uz + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)) - 0.03125*(nz - nx*ux - ny*(1 + uy) - nz*uz)* + (2*chem*(uy - uz)*(uy - uz) - 0.3333333333333333*(-((rhoA - rhoB)*(uy - uz)*(uy - uz)) + + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + 0.1111111111111111* + (4*chem - (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 - (m0-feq0)/tau + 0.25*(2*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + + (mgx*ux + mgy*uy + mgz*uz)*(2*chem*(ux*ux + uy*uy + uz*uz) + + 0.3333333333333333*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*uz)))); + + // q = 1 + dist[nr2] = m1 - (m1-feq1)/tau + 0.125*(2*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx*(-1 + ux) + mgy*uy + mgz*uz)*(-2*chem*(ux*ux) + + 0.3333333333333333*((-rhoA + rhoB)*(ux*ux) + 2*chem*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*uz)))); + + // q=2 + dist[nr1] = m2 - (m2-feq2)/tau + 0.125*(2*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + (mgx + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(ux*ux) + + 0.3333333333333333*((-rhoA + rhoB)*(ux*ux) + 2*chem*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*uz)))); + + // q = 3 + dist[nr4] = m3 - (m3-feq3)/tau + 0.125*(2*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*uz)*(-2*chem*(uy*uy) + + 0.3333333333333333*((-rhoA + rhoB)*(uy*uy) + 2*chem*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 4 + dist[nr3] = m4 - (m4-feq4)/tau + 0.125*(2*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(uy*uy) + + 0.3333333333333333*((-rhoA + rhoB)*(uy*uy) + 2*chem*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 5 + dist[nr6] = m5 - (m5-feq5)/tau + 0.125*(2*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*uy + mgz*(-1 + uz))*(-2*chem*(uz*uz) + + 0.3333333333333333*((-rhoA + rhoB)*(uz*uz) + 2*chem*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 6 + dist[nr5] = m6 - (m6-feq6)/tau + 0.125*(2*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*uy + mgz*uz)*(-2*chem*(uz*uz) + + 0.3333333333333333*((-rhoA + rhoB)*(uz*uz) + 2*chem*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 7 + dist[nr8] = m7 - (m7-feq7)/tau + 0.0625*(-2*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*(-1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((ux + uy)*(ux + uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uy)*(ux + uy))) + 2*chem*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 8 + dist[nr7] = m8 - (m8-feq8)/tau + 0.0625*(2*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgx + mgy + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((ux + uy)*(ux + uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uy)*(ux + uy))) + 2*chem*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 9 + dist[nr10] = m9 - (m9-feq9)/tau + 0.0625*(2*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + (mgy + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*((ux - uy)*(ux - uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uy)*(ux - uy))) + 2*chem*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)))); + + // q = 10 + dist[nr9] = m10 - (m10-feq10)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + (mgx*(1 + ux) + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((ux - uy)*(ux - uy)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uy)*(ux - uy))) + 2*chem*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)))); + + // q = 11 + dist[nr12] = m11 - (m11-feq11)/tau + 0.0625*(-2*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*(-1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*((ux + uz)*(ux + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uz)*(ux + uz))) + 2*chem*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 12 + dist[nr11] = m12 - (m12-feq12)/tau + 0.0625*(2*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + (mgx + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((ux + uz)*(ux + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux + uz)*(ux + uz))) + 2*chem*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q = 13 + dist[nr14] = m13 - (m13-feq13)/tau + 0.0625*(2*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + (mgz + mgx*(-1 + ux) + mgy*uy + mgz*uz)* + (-2*chem*((ux - uz)*(ux - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uz)*(ux - uz))) + 2*chem*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))))); + + // q= 14 + dist[nr13] = m14 - (m14-feq14)/tau + 0.0625*(2*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + (mgx*(1 + ux) + mgy*uy + mgz*(-1 + uz))* + (-2*chem*((ux - uz)*(ux - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((ux - uz)*(ux - uz))) + 2*chem*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)))); + + // q = 15 + dist[nr16] = m15 - (m15-feq15)/tau + 0.0625*(-2*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*(-1 + uy) + mgz*(-1 + uz))* + (-2*chem*((uy + uz)*(uy + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy + uz)*(uy + uz))) + 2*chem*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)))); + + // q = 16 + dist[nr15] = m16 - (m16-feq16)/tau + 0.0625*(2*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + (mgy + mgz + mgx*ux + mgy*uy + mgz*uz)* + (-2*chem*((uy + uz)*(uy + uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy + uz)*(uy + uz))) + 2*chem*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 17 + dist[nr18] = m17 - (m17-feq17)/tau + 0.0625*(2*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + (mgz + mgx*ux + mgy*(-1 + uy) + mgz*uz)* + (-2*chem*((uy - uz)*(uy - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy - uz)*(uy - uz))) + 2*chem*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))))); + + // q = 18 + dist[nr17] = m18 - (m18-feq18)/tau + 0.0625*(2*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + (mgx*ux + mgy*(1 + uy) + mgz*(-1 + uz))* + (-2*chem*((uy - uz)*(uy - uz)) + 0.3333333333333333* + (-((rhoA - rhoB)*((uy - uz)*(uy - uz))) + 2*chem*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.1111111111111111*(-4*chem + (rhoA - rhoB)*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)))); + //----------------------------------------------------------------------------------------------------------------------------------------// + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + //Update chemical potential on device + mu_phi[n] = chem; + //Update color gradient on device + ColorGrad[0*Np+n] = nx; + ColorGrad[1*Np+n] = ny; + ColorGrad[2*Np+n] = nz; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np){ + + int n,nn,nn2x,ijk; + double ux,uy,uz;//fluid velocity + double p;//pressure + double chem;//chemical potential + double phi; //phase field + double rho0;//fluid density + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + double mm1,mm2,mm4,mm6,mm8,mm9,mm10,mm11,mm12,mm13,mm14,mm15,mm16,mm17,mm18; + double mm3,mm5,mm7; + double feq0,feq1,feq2,feq3,feq4,feq5,feq6,feq7,feq8,feq9,feq10,feq11,feq12,feq13,feq14,feq15,feq16,feq17,feq18; + double nx,ny,nz;//normal color gradient + double mgx,mgy,mgz;//mixed gradient reaching secondary neighbor + + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + //double h0,h1,h2,h3,h4,h5,h6;//distributions for LB phase field + double tau;//position dependent LB relaxation time for fluid + //double C,theta; + //double M = 2.0/9.0*(tauM-0.5);//diffusivity (or mobility) for the phase field D3Q7 + + // for (int n=start; n 10Np => odd part of dist) + m1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + m2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + m3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + m4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + m5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + m6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + m7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + m8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + m9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + m10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + m11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + m12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + m13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + m14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + m15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + m16 = dist[nr16]; + + // q=17 + nr17 = neighborList[n+16*Np]; + m17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + m18 = dist[nr18]; + + //compute fluid velocity + ux = 3.0/rho0*(m1-m2+m7-m8+m9-m10+m11-m12+m13-m14+0.5*(Fx)/3.0); + uy = 3.0/rho0*(m3-m4+m7-m8-m9+m10+m15-m16+m17-m18+0.5*(Fy)/3.0); + uz = 3.0/rho0*(m5-m6+m11-m12-m13+m14+m15-m16-m17+m18+0.5*(Fz)/3.0); + //compute pressure + p = (m0+m2+m1+m4+m3+m6+m5+m8+m7+m10+m9+m12+m11+m14+m13+m16+m15+m18+m17); + + //------------------------------------------------- BCK collison ------------------------------------------------------------// + // q=0 + dist[n] = m0 + 0.5*(Fx*ux + Fy*uy + Fz*uz)*(-0.6666666666666666 + ux*ux + uy*uy + uz*uz) - + (m0 - 0.3333333333333333*p + 0.25*(Fx*ux + Fy*uy + Fz*uz)* + (-0.6666666666666666 + ux*ux + uy*uy + uz*uz) + 0.16666666666666666*rho0*(ux*ux + uy*uy + uz*uz))/ + tau; + + // q = 1 + dist[nr2] = m1 + 0.25*(Fx*(-1 + ux) + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) - + (m1 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q=2 + dist[nr1] = m2 + 0.25*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - ux*ux + + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) - + (m2 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(ux*ux) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*uz)) + + 0.125*(Fx + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(ux*ux) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*uz)))/tau; + + // q = 3 + dist[nr4] = m3 + 0.25*(Fx*ux + Fy*(-1 + uy) + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) - + (m3 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.125*(Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 4 + dist[nr3] = m4 + 0.25*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uy*uy + + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) - + (m4 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uy*uy) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.125*(Fy + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uy*uy) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 5 + dist[nr6] = m5 + 0.25*(Fx*ux + Fy*uy + Fz*(-1 + uz))*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) - + (m5 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.125*(Fx*ux + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 6 + dist[nr5] = m6 + 0.25*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - uz*uz + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) - + (m6 - 0.05555555555555555*p + 0.08333333333333333*rho0* + (-(uz*uz) + 0.3333333333333333*(ux*ux + uy*uy + uz*(2 + uz))) + + 0.125*(Fz + Fx*ux + Fy*uy + Fz*uz)*(-0.2222222222222222 - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 7 + dist[nr8] = m7 - 0.125*(Fx*(-1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (0.2222222222222222 + (ux + uy)*(ux + uy) - 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m7 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(-2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 8 + dist[nr7] = m8 + 0.125*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uy)*(ux + uy) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz))\ + - (m8 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uy)*(ux + uy)) + 0.3333333333333333*(2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx + Fy + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 9 + dist[nr10] = m9 + 0.125*(Fy + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + - (m9 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(-2*ux + ux*ux + 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fy + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(-2.*ux + ux*ux + 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 10 + dist[nr9] = m10 + 0.125*(Fx*(1 + ux) + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (ux - uy)*(ux - uy) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz))\ + - (m10 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uy)*(ux - uy)) + 0.3333333333333333*(2*ux + ux*ux - 2*uy + uy*uy + uz*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uy - 1.*(uy*uy) + + 0.3333333333333333*(2.*ux + ux*ux - 2.*uy + uy*uy + uz*uz)))/tau; + + // q = 11 + dist[nr12] = m11 - 0.125*(Fx*(-1 + ux) + Fy*uy + Fz*(-1 + uz))* + (0.2222222222222222 + (ux + uz)*(ux + uz) - 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m11 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(-1. + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 12 + dist[nr11] = m12 + 0.125*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux + uz)*(ux + uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m12 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux + uz)*(ux + uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fx + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) - 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q = 13 + dist[nr14] = m13 + 0.125*(Fz + Fx*(-1 + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz)))\ + - (m13 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(-2*ux + ux*ux + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*(-1. + ux) + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(-2.*ux + ux*ux + uy*uy + uz*(2. + uz))))/tau; + + // q= 14 + dist[nr13] = m14 + 0.125*(Fx*(1 + ux) + Fy*uy + Fz*(-1 + uz))* + (-0.2222222222222222 - (ux - uz)*(ux - uz) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz))\ + - (m14 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((ux - uz)*(ux - uz)) + 0.3333333333333333*(2*ux + ux*ux + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*(1 + ux) + Fy*uy + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(ux*ux) + 2.*ux*uz - 1.*(uz*uz) + + 0.3333333333333333*(2.*ux + ux*ux + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 15 + dist[nr16] = m15 - 0.125*(Fx*ux + Fy*(-1 + uy) + Fz*(-1 + uz))* + (0.2222222222222222 + (uy + uz)*(uy + uz) - 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m15 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(-1. + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + + // q = 16 + dist[nr15] = m16 + 0.125*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - (uy + uz)*(uy + uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz)))\ + - (m16 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy + uz)*(uy + uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fy + Fz + Fx*ux + Fy*uy + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) - 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 17 + dist[nr18] = m17 + 0.125*(Fz + Fx*ux + Fy*(-1 + uy) + Fz*uz)* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz)))\ + - (m17 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux - 2*uy + uy*uy + uz*(2 + uz))) + + 0.0625*(Fz + Fx*ux + Fy*(-1. + uy) + Fz*uz)* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux - 2.*uy + uy*uy + uz*(2. + uz))))/tau; + + // q = 18 + dist[nr17] = m18 + 0.125*(Fx*ux + Fy*(1 + uy) + Fz*(-1 + uz))* + (-0.2222222222222222 - (uy - uz)*(uy - uz) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz))\ + - (m18 - 0.027777777777777776*p + 0.041666666666666664*rho0* + (-((uy - uz)*(uy - uz)) + 0.3333333333333333*(ux*ux + 2*uy + uy*uy + (-2 + uz)*uz)) + + 0.0625*(Fx*ux + Fy*(1 + uy) + Fz*(-1. + uz))* + (-0.2222222222222222 - 1.*(uy*uy) + 2.*uy*uz - 1.*(uz*uz) + + 0.3333333333333333*(ux*ux + 2.*uy + uy*uy + (-2. + uz)*uz)))/tau; + //----------------------------------------------------------------------------------------------------------------------------------------// + + + //Update velocity on device + Vel[0*Np+n] = ux; + Vel[1*Np+n] = uy; + Vel[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = p; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ + + int n; + double ux,uy,uz;//fluid velocity + double p;//pressure + // distribution functions + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m0,m3,m5,m7; + + // for (int n=start; n>>( gqbar, mu_phi, ColorGrad, Fx, Fy, Fz, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init: %s \n",hipGetErrorString(err)); + } +} + + +extern "C" void ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(double *gqbar, double Fx, double Fy, double Fz, int Np){ + + dvc_ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init<<>>( gqbar, Fx, Fy, Fz, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, double *Den, double *hq, double *ColorGrad, + double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){ + + dvc_ScaLBL_FreeLeeModel_PhaseField_Init<<>>(Map, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_FreeLeeModel_PhaseField_Init: %s \n",hipGetErrorString(err)); + } + + +} +extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, + double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np) +{ + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField, hipFuncCachePreferL1); + dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField<<>>(neighborList, Map, hq, Den, Phi, ColorGrad, Vel, + rhoA, rhoB, tauM, W, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAodd_FreeLee_PhaseField: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel, + double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField, hipFuncCachePreferL1); + dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField<<>>( Map, hq, Den, Phi, ColorGrad, Vel, rhoA, rhoB, tauM, W, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_AAeven_FreeLee_PhaseField: %s \n",hipGetErrorString(err)); + } +} + + +extern "C" void ScaLBL_D3Q7_ComputePhaseField(int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q7_ComputePhaseField, hipFuncCachePreferL1); + dvc_ScaLBL_D3Q7_ComputePhaseField<<>>( Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q7_ComputePhaseField: %s \n",hipGetErrorString(err)); + } +} + + +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel, hipFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel<<>>(neighborList, Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad, + rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_FreeLeeModel: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad, + double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz, + int strideY, int strideZ, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel, hipFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel<<>>(Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad, + rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_FreeLeeModel: %s \n",hipGetErrorString(err)); + } + +} + +extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK<<>>(neighborList, dist, Vel, Pressure, + tau, rho0, Fx, Fy, Fz, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure, + double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){ + + hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1); + dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK<<>>(dist, Vel, Pressure, + tau, rho0, Fx, Fy, Fz, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("CUDA error in ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK: %s \n",hipGetErrorString(err)); + } +} + + +extern "C" void ScaLBL_D3Q9_MGTest(int *Map, double *Phi,double *ColorGrad,int strideY, int strideZ, int start, int finish, int Np){ +} \ No newline at end of file diff --git a/hip/Greyscale.cu b/hip/Greyscale.cu new file mode 100644 index 00000000..fd2d5438 --- /dev/null +++ b/hip/Greyscale.cu @@ -0,0 +1,2745 @@ +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + + +__global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Gx, double Gy, double Gz, + double *Poros,double *Perm, double *Velocity, double *Pressure){ + int n; + // conserved momemnts + double rho,vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + double pressure; + //double uu; + // non-conserved moments + double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu_eff = (1.0/rlx_eff-0.5)/3.0;//kinematic viscosity + double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + // q=7 + nr7 = neighborList[n+6*Np]; + f7 = dist[nr7]; + + // q = 8 + nr8 = neighborList[n+7*Np]; + f8 = dist[nr8]; + + // q=9 + nr9 = neighborList[n+8*Np]; + f9 = dist[nr9]; + + // q = 10 + nr10 = neighborList[n+9*Np]; + f10 = dist[nr10]; + + // q=11 + nr11 = neighborList[n+10*Np]; + f11 = dist[nr11]; + + // q=12 + nr12 = neighborList[n+11*Np]; + f12 = dist[nr12]; + + // q=13 + nr13 = neighborList[n+12*Np]; + f13 = dist[nr13]; + + // q=14 + nr14 = neighborList[n+13*Np]; + f14 = dist[nr14]; + + // q=15 + nr15 = neighborList[n+14*Np]; + f15 = dist[nr15]; + + // q=16 + nr16 = neighborList[n+15*Np]; + f16 = dist[nr16]; + + // q=17 + //fq = dist[18*Np+n]; + nr17 = neighborList[n+16*Np]; + f17 = dist[nr17]; + + // q=18 + nr18 = neighborList[n+17*Np]; + f18 = dist[nr18]; + + porosity = Poros[n]; + perm = Perm[n]; + + c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); + if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes + GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); + c1 = porosity*0.5*GeoFun/sqrt(perm); + if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes + + rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; + pressure = rho/porosity/3.0; + vx = (f1-f2+f7-f8+f9-f10+f11-f12+f13-f14)/rho+0.5*porosity*Gx; + vy = (f3-f4+f7-f8-f9+f10+f15-f16+f17-f18)/rho+0.5*porosity*Gy; + vz = (f5-f6+f11-f12-f13+f14+f15-f16-f17+f18)/rho+0.5*porosity*Gz; + v_mag=sqrt(vx*vx+vy*vy+vz*vz); + ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); + uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); + uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); + u_mag=sqrt(ux*ux+uy*uy+uz*uz); + + //Update the body force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium + Fx = -porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx; + Fy = -porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy; + Fz = -porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz; + if (porosity==1.0){ + Fx=Gx; + Fy=Gy; + Fz=Gz; + } + + //------------------------ BGK collison where body force has higher-order terms ----------------------------------------------------------// +// // q=0 +// dist[n] = f0*(1.0-rlx) + rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// + 0.3333333333333333*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 1 +// dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q=2 +// dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3. + (6.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 3 +// dist[nr4] = f3*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 4 +// dist[nr3] = f4*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. + (6.*uy)/porosity) + Fz*(0. - (3.*uz)/porosity)); +// +// // q = 5 +// dist[nr6] = f5*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(3. + (6.*uz)/porosity)); +// +// // q = 6 +// dist[nr5] = f6*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux+ uy*uy + uz*uz))/porosity) +// +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(0. - (3.*uy)/porosity) + Fz*(-3. + (6.*uz)/porosity)); +// +// // q = 7 +// dist[nr8] = f7*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uy) + (4.5*(ux + uy)*(ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(ux + uy))/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 8 +// dist[nr7] = f8*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uy) + (4.5*(-ux - uy)*(-ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uy))/porosity) + Fy*(-3. - (9.*(-ux - uy))/porosity - (3.*uy)/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 9 +// dist[nr10] = f9*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uy) + (4.5*(ux - uy)*(ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3. - (3.*ux)/porosity + (9.*(ux - uy))/porosity) + Fy*(-3. - (9.*(ux - uy))/porosity - (3.*uy)/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 10 +// dist[nr9] = f10*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uy) + (4.5*(-ux + uy)*(-ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uy))/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(-ux + uy))/porosity) + +// Fz*(0. - (3.*uz)/porosity)); +// +// // q = 11 +// dist[nr12] = f11*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uz) + (4.5*(ux + uz)*(ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(ux + uz))/porosity)); +// +// // q = 12 +// dist[nr11] = f12*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uz) + (4.5*(-ux - uz)*(-ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux - uz))/porosity) + +// Fz*(-3. - (9.*(-ux - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 13 +// dist[nr14] = f13*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uz) + (4.5*(ux - uz)*(ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(3. - (3.*ux)/porosity + (9.*(ux - uz))/porosity) + +// Fz*(-3. - (9.*(ux - uz))/porosity - (3.*uz)/porosity)); +// +// // q= 14 +// dist[nr13] = f14*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uz) + (4.5*(-ux + uz)*(-ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(0. - (3.*uy)/porosity) + Fx*(-3. - (3.*ux)/porosity - (9.*(-ux + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(-ux + uz))/porosity)); +// +// // q = 15 +// dist[nr16] = f15*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy + uz) + (4.5*(uy + uz)*(uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(uy + uz))/porosity)); +// +// // q = 16 +// dist[nr15] = f16*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy - uz) + (4.5*(-uy - uz)*(-uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy - uz))/porosity) + +// Fz*(-3. - (9.*(-uy - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 17 +// dist[nr18] = f17*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy - uz) + (4.5*(uy - uz)*(uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(3. - (3.*uy)/porosity + (9.*(uy - uz))/porosity) + +// Fz*(-3. - (9.*(uy - uz))/porosity - (3.*uz)/porosity)); +// +// // q = 18 +// dist[nr17] = f18*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy + uz) + (4.5*(-uy + uz)*(-uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) +// +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(0. - (3.*ux)/porosity) + Fy*(-3. - (3.*uy)/porosity - (9.*(-uy + uz))/porosity) + +// Fz*(3. - (3.*uz)/porosity + (9.*(-uy + uz))/porosity)); + //----------------------------------------------------------------------------------------------------------------------------------------// + + + //------------------------ BGK collison where body force has NO higher-order terms ----------------------------------------------------------// + // q=0 + dist[n] = f0*(1.0-rlx) + rlx*0.3333333333333333*rho*(1. - (1.5*(ux*ux + uy*uy + uz*uz))/porosity); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(3.)); + + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*ux + (4.5*ux*ux)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fx*(-3.)); + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fy*(3.)); + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uy + (4.5*uy*uy)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fy*(-3.)); + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 + 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fz*(3.)); + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + rlx*0.05555555555555555*rho*(1 - 3.*uz + (4.5*uz*uz)/porosity - (1.5*(ux*ux+ uy*uy + uz*uz))/porosity) + +0.05555555555555555*rho*(1. - 0.5*rlx)*(Fz*(-3.)); + + // q = 7 + dist[nr8] = f7*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uy) + (4.5*(ux + uy)*(ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fy*(3.)); + + // q = 8 + dist[nr7] = f8*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uy) + (4.5*(-ux - uy)*(-ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fy*(-3.)); + + // q = 9 + dist[nr10] = f9*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uy) + (4.5*(ux - uy)*(ux - uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fy*(-3.)); + + // q = 10 + dist[nr9] = f10*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uy) + (4.5*(-ux + uy)*(-ux + uy))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fy*(3.)); + + // q = 11 + dist[nr12] = f11*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux + uz) + (4.5*(ux + uz)*(ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fz*(3.)); + + // q = 12 + dist[nr11] = f12*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux - uz) + (4.5*(-ux - uz)*(-ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fz*(-3.)); + + // q = 13 + dist[nr14] = f13*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(ux - uz) + (4.5*(ux - uz)*(ux - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(3.) + Fz*(-3.)); + + // q= 14 + dist[nr13] = f14*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-ux + uz) + (4.5*(-ux + uz)*(-ux + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fx*(-3.) + Fz*(3.)); + + // q = 15 + dist[nr16] = f15*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy + uz) + (4.5*(uy + uz)*(uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(3.) + Fz*(3.)); + + // q = 16 + dist[nr15] = f16*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy - uz) + (4.5*(-uy - uz)*(-uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(-3.) + Fz*(-3.)); + + // q = 17 + dist[nr18] = f17*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(uy - uz) + (4.5*(uy - uz)*(uy - uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(3.) + Fz*(-3.)); + + // q = 18 + dist[nr17] = f18*(1.0-rlx) + rlx*0.027777777777777776*rho*(1 + 3.*(-uy + uz) + (4.5*(-uy + uz)*(-uy + uz))/porosity - (1.5*(ux*ux + uy*uy + uz*uz))/porosity) + +0.027777777777777776*rho*(1. - 0.5*rlx)*(Fy*(-3.) + Fz*(3.)); + //-------------------------------------------------------------------------------------------------------------------------------------------// + + + //Update velocity on device + Velocity[0*Np+n] = ux; + Velocity[1*Np+n] = uy; + Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Gx, double Gy, double Gz, + double *Poros,double *Perm, double *Velocity, double Den, double *Pressure){ + + int n; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + double pressure;//defined for this incompressible model + // conserved momemnts + double jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double fq; + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu_eff = (1.0/rlx_eff-0.5)/3.0;//kinematic viscosity + double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) + double rlx_setA = rlx; + double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + fq = dist[nread]; // reading the f1 data into register fq + pressure = fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jx = fq; + m4 = -4.0*fq; + m9 = 2.0*fq; + m10 = -4.0*fq; + + // q=2 + nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nread]; // reading the f2 data into register fq + pressure += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + nread = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + nread = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + nread = neighborList[n+4*Np]; + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q = 6 + nread = neighborList[n+5*Np]; + fq = dist[nread]; + pressure += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + nread = neighborList[n+6*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + nread = neighborList[n+7*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + nread = neighborList[n+8*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + nread = neighborList[n+9*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + nread = neighborList[n+10*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + nread = neighborList[n+11*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + nread = neighborList[n+12*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + nread = neighborList[n+13*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + nread = neighborList[n+16*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + pressure += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + //---------------------------------------------------------------------// + + porosity = Poros[n]; + perm = Perm[n]; + + c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); + if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes + GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); + c1 = porosity*0.5*GeoFun/sqrt(perm); + if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes + + vx = jx/Den+0.5*porosity*Gx; + vy = jy/Den+0.5*porosity*Gy; + vz = jz/Den+0.5*porosity*Gz; + v_mag=sqrt(vx*vx+vy*vy+vz*vz); + ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); + uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); + uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); + u_mag=sqrt(ux*ux+uy*uy+uz*uz); + + //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium + Fx = Den*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); + Fy = Den*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); + Fz = Den*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); + if (porosity==1.0){ + Fx=Den*Gx; + Fy=Den*Gy; + Fz=Den*Gz; + } + + //Calculate pressure for Incompressible-MRT model + pressure=0.5/porosity*(pressure-0.5*Den*u_mag*u_mag/porosity); + +// //..............carry out relaxation process............................................... +// m1 = m1 + rlx_setA*((-30*Den+19*Den*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1) +// + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; +// m2 = m2 + rlx_setA*((12*Den - 5.5*Den*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2) +// + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; +// jx = jx + Fx; +// m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); +// jy = jy + Fy; +// m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); +// jz = jz + Fz; +// m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); +// m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9) +// + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; +// m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) +// + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; +// m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11) +// + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; +// m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12) +// + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; +// m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13) +// + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; +// m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14) +// + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; +// m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15) +// + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; +// m16 = m16 + rlx_setB*( - m16); +// m17 = m17 + rlx_setB*( - m17); +// m18 = m18 + rlx_setB*( - m18); +// //....................................................................................................... + + //-------------------- IMRT collison where body force has NO higher-order terms -------------// + //..............carry out relaxation process............................................... + m1 = m1 + rlx_setA*((-30*Den+19*Den*(ux*ux+uy*uy+uz*uz)/porosity + 57*pressure*porosity) - m1); + m2 = m2 + rlx_setA*((12*Den - 5.5*Den*(ux*ux+uy*uy+uz*uz)/porosity-27*pressure*porosity) - m2); + jx = jx + Fx; + m4 = m4 + rlx_setB*((-0.6666666666666666*ux*Den) - m4) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + jy = jy + Fy; + m6 = m6 + rlx_setB*((-0.6666666666666666*uy*Den) - m6) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + jz = jz + Fz; + m8 = m8 + rlx_setB*((-0.6666666666666666*uz*Den) - m8) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + m9 = m9 + rlx_setA*((Den*(2*ux*ux-uy*uy-uz*uz)/porosity) - m9); + m10 = m10 + rlx_setA*(-0.5*Den*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); + m11 = m11 + rlx_setA*((Den*(uy*uy-uz*uz)/porosity) - m11); + m12 = m12 + rlx_setA*(-0.5*(Den*(uy*uy-uz*uz)/porosity)- m12); + m13 = m13 + rlx_setA*((Den*ux*uy/porosity) - m13); + m14 = m14 + rlx_setA*((Den*uy*uz/porosity) - m14); + m15 = m15 + rlx_setA*((Den*ux*uz/porosity) - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + + + //.................inverse transformation...................................................... + // q=0 + fq = mrt_V1*Den-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); + nread = neighborList[n+Np]; + dist[nread] = fq; + + // q=2 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); + nread = neighborList[n]; + dist[nread] = fq; + + // q = 3 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + nread = neighborList[n+3*Np]; + dist[nread] = fq; + + // q = 4 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + nread = neighborList[n+2*Np]; + dist[nread] = fq; + + // q = 5 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + nread = neighborList[n+5*Np]; + dist[nread] = fq; + + // q = 6 + fq = mrt_V1*Den-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + nread = neighborList[n+4*Np]; + dist[nread] = fq; + + // q = 7 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); + nread = neighborList[n+7*Np]; + dist[nread] = fq; + + // q = 8 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m17-m16); + nread = neighborList[n+6*Np]; + dist[nread] = fq; + + // q = 9 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); + nread = neighborList[n+9*Np]; + dist[nread] = fq; + + // q = 10 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); + nread = neighborList[n+8*Np]; + dist[nread] = fq; + + // q = 11 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m18-m16); + nread = neighborList[n+11*Np]; + dist[nread] = fq; + + // q = 12 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); + nread = neighborList[n+10*Np]; + dist[nread]= fq; + + // q = 13 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15-0.125*(m16+m18); + nread = neighborList[n+13*Np]; + dist[nread] = fq; + + // q= 14 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4)+mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12-0.25*m15+0.125*(m16+m18); + nread = neighborList[n+12*Np]; + dist[nread] = fq; + + // q = 15 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8)-mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + // q = 17 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8)-mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*Den+mrt_V9*m1+mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6)-mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + //........................................................................ + + //Update velocity on device + Velocity[0*Np+n] = ux; + Velocity[1*Np+n] = uy; + Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; + + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAodd_Greyscale_MRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Gx, double Gy, double Gz, + double *Poros,double *Perm, double *Velocity,double rho0, double *Pressure){ + + int n, nread; + int nr1,nr2,nr3,nr4,nr5,nr6; + int nr7,nr8,nr9,nr10; + int nr11,nr12,nr13,nr14; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + double pressure;//defined for this incompressible model + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double fq; + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu_eff = (1.0/rlx_eff-0.5)/3.0;//kinematic viscosity + double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) + double rlx_setA = rlx; + double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s even part of dist) + //fq = dist[nread]; // reading the f2 data into register fq + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nr2]; // reading the f2 data into register fq + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + //nread = neighborList[n+2*Np]; // neighbor 4 + //fq = dist[nread]; + nr3 = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nr3]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + //nread = neighborList[n+3*Np]; // neighbor 3 + //fq = dist[nread]; + nr4 = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nr4]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + //nread = neighborList[n+4*Np]; + //fq = dist[nread]; + nr5 = neighborList[n+4*Np]; + fq = dist[nr5]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + //nread = neighborList[n+5*Np]; + //fq = dist[nread]; + nr6 = neighborList[n+5*Np]; + fq = dist[nr6]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + //nread = neighborList[n+6*Np]; + //fq = dist[nread]; + nr7 = neighborList[n+6*Np]; + fq = dist[nr7]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + //nread = neighborList[n+7*Np]; + //fq = dist[nread]; + nr8 = neighborList[n+7*Np]; + fq = dist[nr8]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + //nread = neighborList[n+8*Np]; + //fq = dist[nread]; + nr9 = neighborList[n+8*Np]; + fq = dist[nr9]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + //nread = neighborList[n+9*Np]; + //fq = dist[nread]; + nr10 = neighborList[n+9*Np]; + fq = dist[nr10]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + //nread = neighborList[n+10*Np]; + //fq = dist[nread]; + nr11 = neighborList[n+10*Np]; + fq = dist[nr11]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + //nread = neighborList[n+11*Np]; + //fq = dist[nread]; + nr12 = neighborList[n+11*Np]; + fq = dist[nr12]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + //nread = neighborList[n+12*Np]; + //fq = dist[nread]; + nr13 = neighborList[n+12*Np]; + fq = dist[nr13]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + //nread = neighborList[n+13*Np]; + //fq = dist[nread]; + nr14 = neighborList[n+13*Np]; + fq = dist[nr14]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + //---------------------------------------------------------------------// + + porosity = Poros[n]; + perm = Perm[n]; + + c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); + if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes + GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); + c1 = porosity*0.5*GeoFun/sqrt(perm); + if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes + + vx = jx/rho0+0.5*porosity*Gx; + vy = jy/rho0+0.5*porosity*Gy; + vz = jz/rho0+0.5*porosity*Gz; + v_mag=sqrt(vx*vx+vy*vy+vz*vz); + ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); + uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); + uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); + u_mag=sqrt(ux*ux+uy*uy+uz*uz); + + //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium + Fx = rho0*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); + Fy = rho0*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); + Fz = rho0*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); + if (porosity==1.0){ + Fx=rho0*Gx; + Fy=rho0*Gy; + Fz=rho0*Gz; + } + + //Calculate pressure for MRT model + //pressure=rho/3.f/porosity; + pressure=rho/3.f; + + //-------------------- MRT collison where body force has NO higher-order terms -------------// + m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity) - m2); + jx = jx + Fx; + m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + jy = jy + Fy; + m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + jz = jz + Fz; + m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) - m9); + m10 = m10 + rlx_setA*( - m10); + //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); + m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) - m11); + m12 = m12 + rlx_setA*( - m12); + //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12); + m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) - m13); + m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) - m14); + m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + + + //.................inverse transformation...................................................... + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); + //nread = neighborList[n+Np]; + dist[nr2] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); + //nread = neighborList[n]; + dist[nr1] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + //nread = neighborList[n+3*Np]; + dist[nr4] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + //nread = neighborList[n+2*Np]; + dist[nr3] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + //nread = neighborList[n+5*Np]; + dist[nr6] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + //nread = neighborList[n+4*Np]; + dist[nr5] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); + //nread = neighborList[n+7*Np]; + dist[nr8] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16); + //nread = neighborList[n+6*Np]; + dist[nr7] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); + //nread = neighborList[n+9*Np]; + dist[nr10] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); + //nread = neighborList[n+8*Np]; + dist[nr9] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16); + //nread = neighborList[n+11*Np]; + dist[nr12] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ + mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); + //nread = neighborList[n+10*Np]; + dist[nr11]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18); + //nread = neighborList[n+13*Np]; + dist[nr14] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18); + //nread = neighborList[n+12*Np]; + dist[nr13] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + //........................................................................ + + //Update velocity on device + Velocity[0*Np+n] = ux; + Velocity[1*Np+n] = uy; + Velocity[2*Np+n] = uz; + //Update pressure on device + Pressure[n] = pressure; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_Greyscale_MRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Gx, double Gy, double Gz, + double *Poros,double *Perm, double *Velocity,double rho0, double *Pressure){ + + int n; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + double pressure;//defined for this incompressible model + // conserved momemnts + double rho,jx,jy,jz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double fq; + //double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18; + double GeoFun;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double mu_eff = (1.0/rlx_eff-0.5)/3.0;//kinematic viscosity + double Fx, Fy, Fz;//The total body force including Brinkman force and user-specified (Gx,Gy,Gz) + double rlx_setA = rlx; + double rlx_setB = 8.f*(2.f-rlx_setA)/(8.f-rlx_setA); + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_Greyscale: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAodd_Greyscale<<>>(neighborList,dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_Greyscale: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_IMRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAeven_Greyscale_IMRT<<>>(dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Den,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_Greyscale_IMRT: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_IMRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double Den,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAodd_Greyscale_IMRT<<>>(neighborList,dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,Den,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_Greyscale_IMRT: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAodd_Greyscale_MRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double rho0,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAodd_Greyscale_MRT<<>>(neighborList,dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,rho0,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_Greyscale_MRT: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_AAeven_Greyscale_MRT(double *dist, int start, int finish, int Np, double rlx, double rlx_eff, double Fx, double Fy, double Fz,double *Poros,double *Perm, double *Velocity,double rho0,double *Pressure){ + + dvc_ScaLBL_D3Q19_AAeven_Greyscale_MRT<<>>(dist,start,finish,Np,rlx,rlx_eff,Fx,Fy,Fz,Poros,Perm,Velocity,rho0,Pressure); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_Greyscale_MRT: %s \n",hipGetErrorString(err)); + } +} + +extern "C" void ScaLBL_D3Q19_GreyIMRT_Init(double *dist, int Np, double Den){ + dvc_ScaLBL_D3Q19_GreyIMRT_Init<<>>(dist, Np, Den); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_GreyIMRT_Init: %s \n",hipGetErrorString(err)); + } +} diff --git a/hip/GreyscaleColor.cu b/hip/GreyscaleColor.cu new file mode 100644 index 00000000..0ceb0522 --- /dev/null +++ b/hip/GreyscaleColor.cu @@ -0,0 +1,3038 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + + +//Model-1 & 4 +__global__ void dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor(int *neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *GreySolidGrad, double *Poros,double *Perm, double *Velocity, double *Pressure, + double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff,double alpha, double beta, + double Gx, double Gy, double Gz, int strideY, int strideZ, int start, int finish, int Np){ + + int n,nn,ijk,nread; + int nr1,nr2,nr3,nr4,nr5,nr6; + int nr7,nr8,nr9,nr10; + int nr11,nr12,nr13,nr14; + //int nr15,nr16,nr17,nr18; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m3,m5,m7; + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double phi,tau,rho0,rlx_setA,rlx_setB; + + double GeoFun=0.0;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double tau_eff; + double mu_eff;//kinematic viscosity + double nx_gs,ny_gs,nz_gs;//grey-solid color gradient + double nx_phase,ny_phase,nz_phase,C_phase; + double Fx,Fy,Fz; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s even part of dist) + //fq = dist[nread]; // reading the f2 data into register fq + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nr2]; // reading the f2 data into register fq + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + //nread = neighborList[n+2*Np]; // neighbor 4 + //fq = dist[nread]; + nr3 = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nr3]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + //nread = neighborList[n+3*Np]; // neighbor 3 + //fq = dist[nread]; + nr4 = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nr4]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + //nread = neighborList[n+4*Np]; + //fq = dist[nread]; + nr5 = neighborList[n+4*Np]; + fq = dist[nr5]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + //nread = neighborList[n+5*Np]; + //fq = dist[nread]; + nr6 = neighborList[n+5*Np]; + fq = dist[nr6]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + //nread = neighborList[n+6*Np]; + //fq = dist[nread]; + nr7 = neighborList[n+6*Np]; + fq = dist[nr7]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + //nread = neighborList[n+7*Np]; + //fq = dist[nread]; + nr8 = neighborList[n+7*Np]; + fq = dist[nr8]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + //nread = neighborList[n+8*Np]; + //fq = dist[nread]; + nr9 = neighborList[n+8*Np]; + fq = dist[nr9]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + //nread = neighborList[n+9*Np]; + //fq = dist[nread]; + nr10 = neighborList[n+9*Np]; + fq = dist[nr10]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + //nread = neighborList[n+10*Np]; + //fq = dist[nread]; + nr11 = neighborList[n+10*Np]; + fq = dist[nr11]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + //nread = neighborList[n+11*Np]; + //fq = dist[nread]; + nr12 = neighborList[n+11*Np]; + fq = dist[nr12]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + //nread = neighborList[n+12*Np]; + //fq = dist[nread]; + nr13 = neighborList[n+12*Np]; + fq = dist[nr13]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + //nread = neighborList[n+13*Np]; + //fq = dist[nread]; + nr14 = neighborList[n+13*Np]; + fq = dist[nr14]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + + // Compute greyscale related parameters + c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); + if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes + //GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); + c1 = porosity*0.5*GeoFun/sqrt(perm); + if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes + + vx = jx/rho0+0.5*(porosity*Gx); + vy = jy/rho0+0.5*(porosity*Gy); + vz = jz/rho0+0.5*(porosity*Gz); + v_mag=sqrt(vx*vx+vy*vy+vz*vz); + ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); + uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); + uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); + u_mag=sqrt(ux*ux+uy*uy+uz*uz); + + //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium + Fx = rho0*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); + Fy = rho0*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); + Fz = rho0*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); + if (porosity==1.0){ + Fx=rho0*(Gx); + Fy=rho0*(Gy); + Fz=rho0*(Gz); + } + + // write the velocity + Velocity[n] = ux; + Velocity[Np+n] = uy; + Velocity[2*Np+n] = uz; + //Pressure[n] = rho/3.f/porosity; + Pressure[n] = rho/3.f; + + //........................................................................ + //..............carry out relaxation process.............................. + //..........Toelke, Fruediger et. al. 2006................................ + //---------------- NO higher-order force -------------------------------// + if (C == 0.0) nx = ny = nz = 0.0; + m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) -19*alpha*C - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity)- m2); + jx = jx + Fx; + m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + jy = jy + Fy; + m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + jz = jz + Fz; + m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) + + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); + m10 = m10 + rlx_setA*( - m10); + //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); + m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); + m12 = m12 + rlx_setA*( - m12); + //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12); + m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) + 0.5*alpha*C*nx*ny - m13); + m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) + 0.5*alpha*C*ny*nz - m14); + m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) + 0.5*alpha*C*nx*nz - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //----------------------------------------------------------------------// + + //----------------With higher-order force ------------------------------// + //if (C == 0.0) nx = ny = nz = 0.0; + //m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) -19*alpha*C - m1) + // + (1-0.5*rlx_setA)*38*(Fx*ux+Fy*uy+Fz*uz)/porosity; + //m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity)- m2) + // + (1-0.5*rlx_setA)*11*(-Fx*ux-Fy*uy-Fz*uz)/porosity; + //jx = jx + Fx; + //m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) + // + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); + //jy = jy + Fy; + //m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) + // + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); + //jz = jz + Fz; + //m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) + // + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); + //m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9) + // + (1-0.5*rlx_setA)*(4*Fx*ux-2*Fy*uy-2*Fz*uz)/porosity; + ////m10 = m10 + rlx_setA*( - m10); + //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10) + // + (1-0.5*rlx_setA)*(-2*Fx*ux+Fy*uy+Fz*uz)/porosity; + //m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(ny*ny-nz*nz)- m11) + // + (1-0.5*rlx_setA)*(2*Fy*uy-2*Fz*uz)/porosity; + ////m12 = m12 + rlx_setA*( - m12); + //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12) + // + (1-0.5*rlx_setA)*(-Fy*uy+Fz*uz)/porosity; + //m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) + 0.5*alpha*C*nx*ny - m13); + // + (1-0.5*rlx_setA)*(Fy*ux+Fx*uy)/porosity; + //m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) + 0.5*alpha*C*ny*nz - m14); + // + (1-0.5*rlx_setA)*(Fz*uy+Fy*uz)/porosity; + //m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) + 0.5*alpha*C*nx*nz - m15); + // + (1-0.5*rlx_setA)*(Fz*ux+Fx*uz)/porosity; + //m16 = m16 + rlx_setB*( - m16); + //m17 = m17 + rlx_setB*( - m17); + //m18 = m18 + rlx_setB*( - m18); + //----------------------------------------------------------------------// + + //.................inverse transformation...................................................... + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); + //nread = neighborList[n+Np]; + dist[nr2] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); + //nread = neighborList[n]; + dist[nr1] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + //nread = neighborList[n+3*Np]; + dist[nr4] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); + //nread = neighborList[n+2*Np]; + dist[nr3] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + //nread = neighborList[n+5*Np]; + dist[nr6] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); + //nread = neighborList[n+4*Np]; + dist[nr5] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); + //nread = neighborList[n+7*Np]; + dist[nr8] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16); + //nread = neighborList[n+6*Np]; + dist[nr7] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); + //nread = neighborList[n+9*Np]; + dist[nr10] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ + mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); + //nread = neighborList[n+8*Np]; + dist[nr9] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16); + //nread = neighborList[n+11*Np]; + dist[nr12] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ + mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); + //nread = neighborList[n+10*Np]; + dist[nr11]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18); + //nread = neighborList[n+13*Np]; + dist[nr14] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18); + //nread = neighborList[n+12*Np]; + dist[nr13] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + //........................................................................ + + // Instantiate mass transport distributions + // Stationary value - distribution 0 + nAB = 1.0/(nA+nB); + Aq[n] = 0.3333333333333333*nA; + Bq[n] = 0.3333333333333333*nB; + + //............................................... + // q = 0,2,4 + // Cq = {1,0,0}, {0,1,0}, {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nx; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + // q = 1 + //nread = neighborList[n+Np]; + Aq[nr2] = a1; + Bq[nr2] = b1; + // q=2 + //nread = neighborList[n]; + Aq[nr1] = a2; + Bq[nr1] = b2; + + //............................................... + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + // q = 3 + //nread = neighborList[n+3*Np]; + Aq[nr4] = a1; + Bq[nr4] = b1; + // q = 4 + //nread = neighborList[n+2*Np]; + Aq[nr3] = a2; + Bq[nr3] = b2; + + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + // q = 5 + //nread = neighborList[n+5*Np]; + Aq[nr6] = a1; + Bq[nr6] = b1; + // q = 6 + //nread = neighborList[n+4*Np]; + Aq[nr5] = a2; + Bq[nr5] = b2; + //............................................... + } + } +} + +//Model-1 & 4 +__global__ void dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *GreySolidGrad, double *Poros,double *Perm, double *Velocity, double *Pressure, + double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, + double Gx, double Gy, double Gz, int strideY, int strideZ, int start, int finish, int Np){ + int ijk,nn,n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double vx,vy,vz,v_mag; + double ux,uy,uz,u_mag; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + double m3,m5,m7; + double nA,nB; // number density + double a1,b1,a2,b2,nAB,delta; + double C,nx,ny,nz; //color gradient magnitude and direction + double phi,tau,rho0,rlx_setA,rlx_setB; + + double GeoFun=0.0;//geometric function from Guo's PRE 66, 036304 (2002) + double porosity; + double perm;//voxel permeability + double c0, c1; //Guo's model parameters + double tau_eff; + double mu_eff;//kinematic viscosity + double nx_gs,ny_gs,nz_gs;//grey-solid color gradient + double nx_phase,ny_phase,nz_phase,C_phase; + double Fx,Fy,Fz; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; + + Aq[1*Np+n] = a1; + Bq[1*Np+n] = b1; + Aq[2*Np+n] = a2; + Bq[2*Np+n] = b2; + + //............................................... + // q = 2 + // Cq = {0,1,0} + delta = beta*nA*nB*nAB*0.1111111111111111*ny; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; + + Aq[3*Np+n] = a1; + Bq[3*Np+n] = b1; + Aq[4*Np+n] = a2; + Bq[4*Np+n] = b2; + //............................................... + // q = 4 + // Cq = {0,0,1} + delta = beta*nA*nB*nAB*0.1111111111111111*nz; + if (!(nA*nB*nAB>0)) delta=0; + a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; + b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; + a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; + b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; + + Aq[5*Np+n] = a1; + Bq[5*Np+n] = b1; + Aq[6*Np+n] = a2; + Bq[6*Np+n] = b2; + //............................................... + + } + } +} + +__global__ void dvc_ScaLBL_PhaseField_InitFromRestart(double *Den, double *Aq, double *Bq, int start, int finish, int Np){ + int idx; + double nA,nB; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s1.0) t1 =((t1>0.0)-(t1<0.0))*(1.0-fabs(t1))+t1; +// //........................................................................ +// nn = ijk+1; // neighbor index (get convention) +// m2 = Phi[nn]; // get neighbor for phi - 2 +// t2 = m2+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t2)>1.0) t2 =((t2>0.0)-(t2<0.0))*(1.0-fabs(t2))+t2; +// //........................................................................ +// nn = ijk-strideY; // neighbor index (get convention) +// m3 = Phi[nn]; // get neighbor for phi - 3 +// t3 = m3+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t3)>1.0) t3 =((t3>0.0)-(t3<0.0))*(1.0-fabs(t3))+t3; +// //........................................................................ +// nn = ijk+strideY; // neighbor index (get convention) +// m4 = Phi[nn]; // get neighbor for phi - 4 +// t4 = m4+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t4)>1.0) t4 =((t4>0.0)-(t4<0.0))*(1.0-fabs(t4))+t4; +// //........................................................................ +// nn = ijk-strideZ; // neighbor index (get convention) +// m5 = Phi[nn]; // get neighbor for phi - 5 +// t5 = m5+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t5)>1.0) t5 =((t5>0.0)-(t5<0.0))*(1.0-fabs(t5))+t5; +// //........................................................................ +// nn = ijk+strideZ; // neighbor index (get convention) +// m6 = Phi[nn]; // get neighbor for phi - 6 +// t6 = m6+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t6)>1.0) t6 =((t6>0.0)-(t6<0.0))*(1.0-fabs(t6))+t6; +// //........................................................................ +// nn = ijk-strideY-1; // neighbor index (get convention) +// m7 = Phi[nn]; // get neighbor for phi - 7 +// t7 = m7+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t7)>1.0) t7 =((t7>0.0)-(t7<0.0))*(1.0-fabs(t7))+t7; +// //........................................................................ +// nn = ijk+strideY+1; // neighbor index (get convention) +// m8 = Phi[nn]; // get neighbor for phi - 8 +// t8 = m8+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t8)>1.0) t8 =((t8>0.0)-(t8<0.0))*(1.0-fabs(t8))+t8; +// //........................................................................ +// nn = ijk+strideY-1; // neighbor index (get convention) +// m9 = Phi[nn]; // get neighbor for phi - 9 +// t9 = m9+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t9)>1.0) t9 =((t9>0.0)-(t9<0.0))*(1.0-fabs(t9))+t9; +// //........................................................................ +// nn = ijk-strideY+1; // neighbor index (get convention) +// m10 = Phi[nn]; // get neighbor for phi - 10 +// t10 = m10+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t10)>1.0) t10 =((t10>0.0)-(t10<0.0))*(1.0-fabs(t10))+t10; +// //........................................................................ +// nn = ijk-strideZ-1; // neighbor index (get convention) +// m11 = Phi[nn]; // get neighbor for phi - 11 +// t11 = m11+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t11)>1.0) t11 =((t11>0.0)-(t11<0.0))*(1.0-fabs(t11))+t11; +// //........................................................................ +// nn = ijk+strideZ+1; // neighbor index (get convention) +// m12 = Phi[nn]; // get neighbor for phi - 12 +// t12 = m12+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t12)>1.0) t12 =((t12>0.0)-(t12<0.0))*(1.0-fabs(t12))+t12; +// //........................................................................ +// nn = ijk+strideZ-1; // neighbor index (get convention) +// m13 = Phi[nn]; // get neighbor for phi - 13 +// t13 = m13+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t13)>1.0) t13 =((t13>0.0)-(t13<0.0))*(1.0-fabs(t13))+t13; +// //........................................................................ +// nn = ijk-strideZ+1; // neighbor index (get convention) +// m14 = Phi[nn]; // get neighbor for phi - 14 +// t14 = m14+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t14)>1.0) t14 =((t14>0.0)-(t14<0.0))*(1.0-fabs(t14))+t14; +// //........................................................................ +// nn = ijk-strideZ-strideY; // neighbor index (get convention) +// m15 = Phi[nn]; // get neighbor for phi - 15 +// t15 = m15+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t15)>1.0) t15 =((t15>0.0)-(t15<0.0))*(1.0-fabs(t15))+t15; +// //........................................................................ +// nn = ijk+strideZ+strideY; // neighbor index (get convention) +// m16 = Phi[nn]; // get neighbor for phi - 16 +// t16 = m16+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t16)>1.0) t16 =((t16>0.0)-(t16<0.0))*(1.0-fabs(t16))+t16; +// //........................................................................ +// nn = ijk+strideZ-strideY; // neighbor index (get convention) +// m17 = Phi[nn]; // get neighbor for phi - 17 +// t17 = m17+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t17)>1.0) t17 =((t17>0.0)-(t17<0.0))*(1.0-fabs(t17))+t17; +// //........................................................................ +// nn = ijk-strideZ+strideY; // neighbor index (get convention) +// m18 = Phi[nn]; // get neighbor for phi - 18 +// t18 = m18+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t18)>1.0) t18 =((t18>0.0)-(t18<0.0))*(1.0-fabs(t18))+t18; +// //............Compute the Color Gradient................................... +// nx_phase = -(m1-m2+0.5*(m7-m8+m9-m10+m11-m12+m13-m14)); +// ny_phase = -(m3-m4+0.5*(m7-m8-m9+m10+m15-m16+m17-m18)); +// nz_phase = -(m5-m6+0.5*(m11-m12-m13+m14+m15-m16-m17+m18)); +// C_phase = sqrt(nx_phase*nx_phase+ny_phase*ny_phase+nz_phase*nz_phase); +// //correct the normal color gradient by considering the effect of grey solid +// nx = -(t1-t2+0.5*(t7-t8+t9-t10+t11-t12+t13-t14)); +// ny = -(t3-t4+0.5*(t7-t8-t9+t10+t15-t16+t17-t18)); +// nz = -(t5-t6+0.5*(t11-t12-t13+t14+t15-t16-t17+t18)); +// +// if (C_phase==0.0){//i.e. if in a bulk phase, there is no need for grey-solid correction +// nx = nx_phase; +// ny = ny_phase; +// nz = nz_phase; +// } +// +// //...........Normalize the Color Gradient................................. +// C = sqrt(nx*nx+ny*ny+nz*nz); +// double ColorMag = C; +// if (C==0.0) ColorMag=1.0; +// nx = nx/ColorMag; +// ny = ny/ColorMag; +// nz = nz/ColorMag; +// +// // q=0 +// fq = dist[n]; +// rho = fq; +// m1 = -30.0*fq; +// m2 = 12.0*fq; +// +// // q=1 +// //nread = neighborList[n]; // neighbor 2 +// //fq = dist[nread]; // reading the f1 data into register fq +// nr1 = neighborList[n]; +// fq = dist[nr1]; // reading the f1 data into register fq +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jx = fq; +// m4 = -4.0*fq; +// m9 = 2.0*fq; +// m10 = -4.0*fq; +// +// // f2 = dist[10*Np+n]; +// //nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) +// //fq = dist[nread]; // reading the f2 data into register fq +// nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) +// fq = dist[nr2]; // reading the f2 data into register fq +// rho += fq; +// m1 -= 11.0*(fq); +// m2 -= 4.0*(fq); +// jx -= fq; +// m4 += 4.0*(fq); +// m9 += 2.0*(fq); +// m10 -= 4.0*(fq); +// +// // q=3 +// //nread = neighborList[n+2*Np]; // neighbor 4 +// //fq = dist[nread]; +// nr3 = neighborList[n+2*Np]; // neighbor 4 +// fq = dist[nr3]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jy = fq; +// m6 = -4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 = fq; +// m12 = -2.0*fq; +// +// // q = 4 +// //nread = neighborList[n+3*Np]; // neighbor 3 +// //fq = dist[nread]; +// nr4 = neighborList[n+3*Np]; // neighbor 3 +// fq = dist[nr4]; +// rho+= fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jy -= fq; +// m6 += 4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 += fq; +// m12 -= 2.0*fq; +// +// // q=5 +// //nread = neighborList[n+4*Np]; +// //fq = dist[nread]; +// nr5 = neighborList[n+4*Np]; +// fq = dist[nr5]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jz = fq; +// m8 = -4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 -= fq; +// m12 += 2.0*fq; +// +// +// // q = 6 +// //nread = neighborList[n+5*Np]; +// //fq = dist[nread]; +// nr6 = neighborList[n+5*Np]; +// fq = dist[nr6]; +// rho+= fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jz -= fq; +// m8 += 4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 -= fq; +// m12 += 2.0*fq; +// +// // q=7 +// //nread = neighborList[n+6*Np]; +// //fq = dist[nread]; +// nr7 = neighborList[n+6*Np]; +// fq = dist[nr7]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jy += fq; +// m6 += fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 = fq; +// m16 = fq; +// m17 = -fq; +// +// // q = 8 +// //nread = neighborList[n+7*Np]; +// //fq = dist[nread]; +// nr8 = neighborList[n+7*Np]; +// fq = dist[nr8]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jy -= fq; +// m6 -= fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 += fq; +// m16 -= fq; +// m17 += fq; +// +// // q=9 +// //nread = neighborList[n+8*Np]; +// //fq = dist[nread]; +// nr9 = neighborList[n+8*Np]; +// fq = dist[nr9]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jy -= fq; +// m6 -= fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 -= fq; +// m16 += fq; +// m17 += fq; +// +// // q = 10 +// //nread = neighborList[n+9*Np]; +// //fq = dist[nread]; +// nr10 = neighborList[n+9*Np]; +// fq = dist[nr10]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jy += fq; +// m6 += fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 -= fq; +// m16 -= fq; +// m17 -= fq; +// +// // q=11 +// //nread = neighborList[n+10*Np]; +// //fq = dist[nread]; +// nr11 = neighborList[n+10*Np]; +// fq = dist[nr11]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jz += fq; +// m8 += fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 = fq; +// m16 -= fq; +// m18 = fq; +// +// // q=12 +// //nread = neighborList[n+11*Np]; +// //fq = dist[nread]; +// nr12 = neighborList[n+11*Np]; +// fq = dist[nr12]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jz -= fq; +// m8 -= fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 += fq; +// m16 += fq; +// m18 -= fq; +// +// // q=13 +// //nread = neighborList[n+12*Np]; +// //fq = dist[nread]; +// nr13 = neighborList[n+12*Np]; +// fq = dist[nr13]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jz -= fq; +// m8 -= fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 -= fq; +// m16 -= fq; +// m18 -= fq; +// +// // q=14 +// //nread = neighborList[n+13*Np]; +// //fq = dist[nread]; +// nr14 = neighborList[n+13*Np]; +// fq = dist[nr14]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jz += fq; +// m8 += fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 -= fq; +// m16 += fq; +// m18 += fq; +// +// // q=15 +// nread = neighborList[n+14*Np]; +// fq = dist[nread]; +// //fq = dist[17*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy += fq; +// m6 += fq; +// jz += fq; +// m8 += fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 = fq; +// m17 += fq; +// m18 -= fq; +// +// // q=16 +// nread = neighborList[n+15*Np]; +// fq = dist[nread]; +// //fq = dist[8*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy -= fq; +// m6 -= fq; +// jz -= fq; +// m8 -= fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 += fq; +// m17 -= fq; +// m18 += fq; +// +// // q=17 +// //fq = dist[18*Np+n]; +// nread = neighborList[n+16*Np]; +// fq = dist[nread]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy += fq; +// m6 += fq; +// jz -= fq; +// m8 -= fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 -= fq; +// m17 += fq; +// m18 += fq; +// +// // q=18 +// nread = neighborList[n+17*Np]; +// fq = dist[nread]; +// //fq = dist[9*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy -= fq; +// m6 -= fq; +// jz += fq; +// m8 += fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 -= fq; +// m17 -= fq; +// m18 -= fq; +// +// // Compute greyscale related parameters +// c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); +// if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes +// //GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); +// c1 = porosity*0.5*GeoFun/sqrt(perm); +// if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes +// +// vx = jx/rho0+0.5*(porosity*Gx); +// vy = jy/rho0+0.5*(porosity*Gy); +// vz = jz/rho0+0.5*(porosity*Gz); +// v_mag=sqrt(vx*vx+vy*vy+vz*vz); +// ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); +// uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); +// uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); +// u_mag=sqrt(ux*ux+uy*uy+uz*uz); +// +// //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium +// Fx = rho0*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); +// Fy = rho0*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); +// Fz = rho0*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); +// if (porosity==1.0){ +// Fx=rho0*(Gx); +// Fy=rho0*(Gy); +// Fz=rho0*(Gz); +// } +// +// // write the velocity +// Velocity[n] = ux; +// Velocity[Np+n] = uy; +// Velocity[2*Np+n] = uz; +// +// //........................................................................ +// //..............carry out relaxation process.............................. +// //..........Toelke, Fruediger et. al. 2006................................ +// if (C == 0.0) nx = ny = nz = 0.0; +// m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) -19*alpha*C - m1); +// m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity)- m2); +// jx = jx + Fx; +// m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); +// jy = jy + Fy; +// m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); +// jz = jz + Fz; +// m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); +// m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); +// m10 = m10 + rlx_setA*( - m10); +// //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); +// m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); +// m12 = m12 + rlx_setA*( - m12); +// //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12); +// m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) + 0.5*alpha*C*nx*ny - m13); +// m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) + 0.5*alpha*C*ny*nz - m14); +// m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) + 0.5*alpha*C*nx*nz - m15); +// m16 = m16 + rlx_setB*( - m16); +// m17 = m17 + rlx_setB*( - m17); +// m18 = m18 + rlx_setB*( - m18); +// +// //.................inverse transformation...................................................... +// // q=0 +// fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; +// dist[n] = fq; +// +// // q = 1 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); +// //nread = neighborList[n+Np]; +// dist[nr2] = fq; +// +// // q=2 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); +// //nread = neighborList[n]; +// dist[nr1] = fq; +// +// // q = 3 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); +// //nread = neighborList[n+3*Np]; +// dist[nr4] = fq; +// +// // q = 4 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); +// //nread = neighborList[n+2*Np]; +// dist[nr3] = fq; +// +// // q = 5 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); +// //nread = neighborList[n+5*Np]; +// dist[nr6] = fq; +// +// // q = 6 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); +// //nread = neighborList[n+4*Np]; +// dist[nr5] = fq; +// +// // q = 7 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); +// //nread = neighborList[n+7*Np]; +// dist[nr8] = fq; +// +// // q = 8 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 +// +mrt_V12*m12+0.25*m13+0.125*(m17-m16); +// //nread = neighborList[n+6*Np]; +// dist[nr7] = fq; +// +// // q = 9 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); +// //nread = neighborList[n+9*Np]; +// dist[nr10] = fq; +// +// // q = 10 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); +// //nread = neighborList[n+8*Np]; +// dist[nr9] = fq; +// +// // q = 11 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12+0.25*m15+0.125*(m18-m16); +// //nread = neighborList[n+11*Np]; +// dist[nr12] = fq; +// +// // q = 12 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ +// mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); +// //nread = neighborList[n+10*Np]; +// dist[nr11]= fq; +// +// // q = 13 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12-0.25*m15-0.125*(m16+m18); +// //nread = neighborList[n+13*Np]; +// dist[nr14] = fq; +// +// // q= 14 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12-0.25*m15+0.125*(m16+m18); +// //nread = neighborList[n+12*Np]; +// dist[nr13] = fq; +// +// +// // q = 15 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) +// -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); +// nread = neighborList[n+15*Np]; +// dist[nread] = fq; +// +// // q = 16 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) +// -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); +// nread = neighborList[n+14*Np]; +// dist[nread] = fq; +// +// +// // q = 17 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) +// -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); +// nread = neighborList[n+17*Np]; +// dist[nread] = fq; +// +// // q = 18 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) +// -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); +// nread = neighborList[n+16*Np]; +// dist[nread] = fq; +// //........................................................................ +// +// // Instantiate mass transport distributions +// // Stationary value - distribution 0 +// nAB = 1.0/(nA+nB); +// Aq[n] = 0.3333333333333333*nA; +// Bq[n] = 0.3333333333333333*nB; +// +// //............................................... +// // q = 0,2,4 +// // Cq = {1,0,0}, {0,1,0}, {0,0,1} +// delta = beta*nA*nB*nAB*0.1111111111111111*nx; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; +// +// // q = 1 +// //nread = neighborList[n+Np]; +// Aq[nr2] = a1; +// Bq[nr2] = b1; +// // q=2 +// //nread = neighborList[n]; +// Aq[nr1] = a2; +// Bq[nr1] = b2; +// +// //............................................... +// // Cq = {0,1,0} +// delta = beta*nA*nB*nAB*0.1111111111111111*ny; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; +// +// // q = 3 +// //nread = neighborList[n+3*Np]; +// Aq[nr4] = a1; +// Bq[nr4] = b1; +// // q = 4 +// //nread = neighborList[n+2*Np]; +// Aq[nr3] = a2; +// Bq[nr3] = b2; +// +// //............................................... +// // q = 4 +// // Cq = {0,0,1} +// delta = beta*nA*nB*nAB*0.1111111111111111*nz; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; +// +// // q = 5 +// //nread = neighborList[n+5*Np]; +// Aq[nr6] = a1; +// Bq[nr6] = b1; +// // q = 6 +// //nread = neighborList[n+4*Np]; +// Aq[nr5] = a2; +// Bq[nr5] = b2; +// //............................................... +// } +// } +//} +// +////Model-2&3 +//__global__ void dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, +// double *Phi, double *GreySolidGrad, double *Poros,double *Perm, double *Velocity, +// double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, +// double Gx, double Gy, double Gz, int strideY, int strideZ, int start, int finish, int Np){ +// int ijk,nn,n; +// double fq; +// // conserved momemnts +// double rho,jx,jy,jz; +// double vx,vy,vz,v_mag; +// double ux,uy,uz,u_mag; +// // non-conserved moments +// double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; +// double m3,m5,m7; +// double t1,t2,t4,t6,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18; +// double t3,t5,t7; +// double nA,nB; // number density +// double a1,b1,a2,b2,nAB,delta; +// double C,nx,ny,nz; //color gradient magnitude and direction +// double phi,tau,rho0,rlx_setA,rlx_setB; +// +// double GeoFun=0.0;//geometric function from Guo's PRE 66, 036304 (2002) +// double porosity; +// double perm;//voxel permeability +// double c0, c1; //Guo's model parameters +// double tau_eff; +// double mu_eff;//kinematic viscosity +// double nx_phase,ny_phase,nz_phase,C_phase; +// double Fx,Fy,Fz; +// +// const double mrt_V1=0.05263157894736842; +// const double mrt_V2=0.012531328320802; +// const double mrt_V3=0.04761904761904762; +// const double mrt_V4=0.004594820384294068; +// const double mrt_V5=0.01587301587301587; +// const double mrt_V6=0.0555555555555555555555555; +// const double mrt_V7=0.02777777777777778; +// const double mrt_V8=0.08333333333333333; +// const double mrt_V9=0.003341687552213868; +// const double mrt_V10=0.003968253968253968; +// const double mrt_V11=0.01388888888888889; +// const double mrt_V12=0.04166666666666666; +// +// int S = Np/NBLOCKS/NTHREADS + 1; +// for (int s=0; s1.0) t1 =((t1>0.0)-(t1<0.0))*(1.0-fabs(t1))+t1; +// //........................................................................ +// nn = ijk+1; // neighbor index (get convention) +// m2 = Phi[nn]; // get neighbor for phi - 2 +// t2 = m2+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t2)>1.0) t2 =((t2>0.0)-(t2<0.0))*(1.0-fabs(t2))+t2; +// //........................................................................ +// nn = ijk-strideY; // neighbor index (get convention) +// m3 = Phi[nn]; // get neighbor for phi - 3 +// t3 = m3+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t3)>1.0) t3 =((t3>0.0)-(t3<0.0))*(1.0-fabs(t3))+t3; +// //........................................................................ +// nn = ijk+strideY; // neighbor index (get convention) +// m4 = Phi[nn]; // get neighbor for phi - 4 +// t4 = m4+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t4)>1.0) t4 =((t4>0.0)-(t4<0.0))*(1.0-fabs(t4))+t4; +// //........................................................................ +// nn = ijk-strideZ; // neighbor index (get convention) +// m5 = Phi[nn]; // get neighbor for phi - 5 +// t5 = m5+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t5)>1.0) t5 =((t5>0.0)-(t5<0.0))*(1.0-fabs(t5))+t5; +// //........................................................................ +// nn = ijk+strideZ; // neighbor index (get convention) +// m6 = Phi[nn]; // get neighbor for phi - 6 +// t6 = m6+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t6)>1.0) t6 =((t6>0.0)-(t6<0.0))*(1.0-fabs(t6))+t6; +// //........................................................................ +// nn = ijk-strideY-1; // neighbor index (get convention) +// m7 = Phi[nn]; // get neighbor for phi - 7 +// t7 = m7+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t7)>1.0) t7 =((t7>0.0)-(t7<0.0))*(1.0-fabs(t7))+t7; +// //........................................................................ +// nn = ijk+strideY+1; // neighbor index (get convention) +// m8 = Phi[nn]; // get neighbor for phi - 8 +// t8 = m8+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t8)>1.0) t8 =((t8>0.0)-(t8<0.0))*(1.0-fabs(t8))+t8; +// //........................................................................ +// nn = ijk+strideY-1; // neighbor index (get convention) +// m9 = Phi[nn]; // get neighbor for phi - 9 +// t9 = m9+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t9)>1.0) t9 =((t9>0.0)-(t9<0.0))*(1.0-fabs(t9))+t9; +// //........................................................................ +// nn = ijk-strideY+1; // neighbor index (get convention) +// m10 = Phi[nn]; // get neighbor for phi - 10 +// t10 = m10+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t10)>1.0) t10 =((t10>0.0)-(t10<0.0))*(1.0-fabs(t10))+t10; +// //........................................................................ +// nn = ijk-strideZ-1; // neighbor index (get convention) +// m11 = Phi[nn]; // get neighbor for phi - 11 +// t11 = m11+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t11)>1.0) t11 =((t11>0.0)-(t11<0.0))*(1.0-fabs(t11))+t11; +// //........................................................................ +// nn = ijk+strideZ+1; // neighbor index (get convention) +// m12 = Phi[nn]; // get neighbor for phi - 12 +// t12 = m12+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t12)>1.0) t12 =((t12>0.0)-(t12<0.0))*(1.0-fabs(t12))+t12; +// //........................................................................ +// nn = ijk+strideZ-1; // neighbor index (get convention) +// m13 = Phi[nn]; // get neighbor for phi - 13 +// t13 = m13+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t13)>1.0) t13 =((t13>0.0)-(t13<0.0))*(1.0-fabs(t13))+t13; +// //........................................................................ +// nn = ijk-strideZ+1; // neighbor index (get convention) +// m14 = Phi[nn]; // get neighbor for phi - 14 +// t14 = m14+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t14)>1.0) t14 =((t14>0.0)-(t14<0.0))*(1.0-fabs(t14))+t14; +// //........................................................................ +// nn = ijk-strideZ-strideY; // neighbor index (get convention) +// m15 = Phi[nn]; // get neighbor for phi - 15 +// t15 = m15+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t15)>1.0) t15 =((t15>0.0)-(t15<0.0))*(1.0-fabs(t15))+t15; +// //........................................................................ +// nn = ijk+strideZ+strideY; // neighbor index (get convention) +// m16 = Phi[nn]; // get neighbor for phi - 16 +// t16 = m16+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t16)>1.0) t16 =((t16>0.0)-(t16<0.0))*(1.0-fabs(t16))+t16; +// //........................................................................ +// nn = ijk+strideZ-strideY; // neighbor index (get convention) +// m17 = Phi[nn]; // get neighbor for phi - 17 +// t17 = m17+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t17)>1.0) t17 =((t17>0.0)-(t17<0.0))*(1.0-fabs(t17))+t17; +// //........................................................................ +// nn = ijk-strideZ+strideY; // neighbor index (get convention) +// m18 = Phi[nn]; // get neighbor for phi - 18 +// t18 = m18+(1.0-porosity)*GreySolidGrad[nn]; +// if (fabs(t18)>1.0) t18 =((t18>0.0)-(t18<0.0))*(1.0-fabs(t18))+t18; +// //............Compute the Color Gradient................................... +// nx_phase = -(m1-m2+0.5*(m7-m8+m9-m10+m11-m12+m13-m14)); +// ny_phase = -(m3-m4+0.5*(m7-m8-m9+m10+m15-m16+m17-m18)); +// nz_phase = -(m5-m6+0.5*(m11-m12-m13+m14+m15-m16-m17+m18)); +// C_phase = sqrt(nx_phase*nx_phase+ny_phase*ny_phase+nz_phase*nz_phase); +// //correct the normal color gradient by considering the effect of grey solid +// nx = -(t1-t2+0.5*(t7-t8+t9-t10+t11-t12+t13-t14)); +// ny = -(t3-t4+0.5*(t7-t8-t9+t10+t15-t16+t17-t18)); +// nz = -(t5-t6+0.5*(t11-t12-t13+t14+t15-t16-t17+t18)); +// +// if (C_phase==0.0){ +// nx = nx_phase; +// ny = ny_phase; +// nz = nz_phase; +// } +// +// //...........Normalize the Color Gradient................................. +// C = sqrt(nx*nx+ny*ny+nz*nz); +// double ColorMag = C; +// if (C==0.0) ColorMag=1.0; +// nx = nx/ColorMag; +// ny = ny/ColorMag; +// nz = nz/ColorMag; +// +// // q=0 +// fq = dist[n]; +// rho = fq; +// m1 = -30.0*fq; +// m2 = 12.0*fq; +// +// // q=1 +// fq = dist[2*Np+n]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jx = fq; +// m4 = -4.0*fq; +// m9 = 2.0*fq; +// m10 = -4.0*fq; +// +// // f2 = dist[10*Np+n]; +// fq = dist[1*Np+n]; +// rho += fq; +// m1 -= 11.0*(fq); +// m2 -= 4.0*(fq); +// jx -= fq; +// m4 += 4.0*(fq); +// m9 += 2.0*(fq); +// m10 -= 4.0*(fq); +// +// // q=3 +// fq = dist[4*Np+n]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jy = fq; +// m6 = -4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 = fq; +// m12 = -2.0*fq; +// +// // q = 4 +// fq = dist[3*Np+n]; +// rho+= fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jy -= fq; +// m6 += 4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 += fq; +// m12 -= 2.0*fq; +// +// // q=5 +// fq = dist[6*Np+n]; +// rho += fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jz = fq; +// m8 = -4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 -= fq; +// m12 += 2.0*fq; +// +// // q = 6 +// fq = dist[5*Np+n]; +// rho+= fq; +// m1 -= 11.0*fq; +// m2 -= 4.0*fq; +// jz -= fq; +// m8 += 4.0*fq; +// m9 -= fq; +// m10 += 2.0*fq; +// m11 -= fq; +// m12 += 2.0*fq; +// +// // q=7 +// fq = dist[8*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jy += fq; +// m6 += fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 = fq; +// m16 = fq; +// m17 = -fq; +// +// // q = 8 +// fq = dist[7*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jy -= fq; +// m6 -= fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 += fq; +// m16 -= fq; +// m17 += fq; +// +// // q=9 +// fq = dist[10*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jy -= fq; +// m6 -= fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 -= fq; +// m16 += fq; +// m17 += fq; +// +// // q = 10 +// fq = dist[9*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jy += fq; +// m6 += fq; +// m9 += fq; +// m10 += fq; +// m11 += fq; +// m12 += fq; +// m13 -= fq; +// m16 -= fq; +// m17 -= fq; +// +// // q=11 +// fq = dist[12*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jz += fq; +// m8 += fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 = fq; +// m16 -= fq; +// m18 = fq; +// +// // q=12 +// fq = dist[11*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jz -= fq; +// m8 -= fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 += fq; +// m16 += fq; +// m18 -= fq; +// +// // q=13 +// fq = dist[14*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx += fq; +// m4 += fq; +// jz -= fq; +// m8 -= fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 -= fq; +// m16 -= fq; +// m18 -= fq; +// +// // q=14 +// fq = dist[13*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jx -= fq; +// m4 -= fq; +// jz += fq; +// m8 += fq; +// m9 += fq; +// m10 += fq; +// m11 -= fq; +// m12 -= fq; +// m15 -= fq; +// m16 += fq; +// m18 += fq; +// +// // q=15 +// fq = dist[16*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy += fq; +// m6 += fq; +// jz += fq; +// m8 += fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 = fq; +// m17 += fq; +// m18 -= fq; +// +// // q=16 +// fq = dist[15*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy -= fq; +// m6 -= fq; +// jz -= fq; +// m8 -= fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 += fq; +// m17 -= fq; +// m18 += fq; +// +// // q=17 +// fq = dist[18*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy += fq; +// m6 += fq; +// jz -= fq; +// m8 -= fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 -= fq; +// m17 += fq; +// m18 += fq; +// +// // q=18 +// fq = dist[17*Np+n]; +// rho += fq; +// m1 += 8.0*fq; +// m2 += fq; +// jy -= fq; +// m6 -= fq; +// jz += fq; +// m8 += fq; +// m9 -= 2.0*fq; +// m10 -= 2.0*fq; +// m14 -= fq; +// m17 -= fq; +// m18 -= fq; +// +// // Compute greyscale related parameters +// c0 = 0.5*(1.0+porosity*0.5*mu_eff/perm); +// if (porosity==1.0) c0 = 0.5;//i.e. apparent pore nodes +// //GeoFun = 1.75/sqrt(150.0*porosity*porosity*porosity); +// c1 = porosity*0.5*GeoFun/sqrt(perm); +// if (porosity==1.0) c1 = 0.0;//i.e. apparent pore nodes +// +// vx = jx/rho0+0.5*(porosity*Gx); +// vy = jy/rho0+0.5*(porosity*Gy); +// vz = jz/rho0+0.5*(porosity*Gz); +// v_mag=sqrt(vx*vx+vy*vy+vz*vz); +// ux = vx/(c0+sqrt(c0*c0+c1*v_mag)); +// uy = vy/(c0+sqrt(c0*c0+c1*v_mag)); +// uz = vz/(c0+sqrt(c0*c0+c1*v_mag)); +// u_mag=sqrt(ux*ux+uy*uy+uz*uz); +// +// //Update the total force to include linear (Darcy) and nonlinear (Forchheimer) drags due to the porous medium +// Fx = rho0*(-porosity*mu_eff/perm*ux - porosity*GeoFun/sqrt(perm)*u_mag*ux + porosity*Gx); +// Fy = rho0*(-porosity*mu_eff/perm*uy - porosity*GeoFun/sqrt(perm)*u_mag*uy + porosity*Gy); +// Fz = rho0*(-porosity*mu_eff/perm*uz - porosity*GeoFun/sqrt(perm)*u_mag*uz + porosity*Gz); +// if (porosity==1.0){ +// Fx=rho0*(Gx); +// Fy=rho0*(Gy); +// Fz=rho0*(Gz); +// } +// +// // write the velocity +// Velocity[n] = ux; +// Velocity[Np+n] = uy; +// Velocity[2*Np+n] = uz; +// +// //........................................................................ +// //..............carry out relaxation process.............................. +// //..........Toelke, Fruediger et. al. 2006................................ +// if (C == 0.0) nx = ny = nz = 0.0; +// m1 = m1 + rlx_setA*((19*(ux*ux+uy*uy+uz*uz)*rho0/porosity - 11*rho) -19*alpha*C - m1); +// m2 = m2 + rlx_setA*((3*rho - 5.5*(ux*ux+uy*uy+uz*uz)*rho0/porosity)- m2); +// jx = jx + Fx; +// m4 = m4 + rlx_setB*((-0.6666666666666666*ux*rho0)- m4) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fx); +// jy = jy + Fy; +// m6 = m6 + rlx_setB*((-0.6666666666666666*uy*rho0)- m6) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fy); +// jz = jz + Fz; +// m8 = m8 + rlx_setB*((-0.6666666666666666*uz*rho0)- m8) +// + (1-0.5*rlx_setB)*(-0.6666666666666666*Fz); +// m9 = m9 + rlx_setA*(((2*ux*ux-uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); +// m10 = m10 + rlx_setA*( - m10); +// //m10 = m10 + rlx_setA*(-0.5*rho0*((2*ux*ux-uy*uy-uz*uz)/porosity)- m10); +// m11 = m11 + rlx_setA*(((uy*uy-uz*uz)*rho0/porosity) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); +// m12 = m12 + rlx_setA*( - m12); +// //m12 = m12 + rlx_setA*(-0.5*(rho0*(uy*uy-uz*uz)/porosity)- m12); +// m13 = m13 + rlx_setA*( (ux*uy*rho0/porosity) + 0.5*alpha*C*nx*ny - m13); +// m14 = m14 + rlx_setA*( (uy*uz*rho0/porosity) + 0.5*alpha*C*ny*nz - m14); +// m15 = m15 + rlx_setA*( (ux*uz*rho0/porosity) + 0.5*alpha*C*nx*nz - m15); +// m16 = m16 + rlx_setB*( - m16); +// m17 = m17 + rlx_setB*( - m17); +// m18 = m18 + rlx_setB*( - m18); +// +// //.................inverse transformation...................................................... +// // q=0 +// fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; +// dist[n] = fq; +// +// // q = 1 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10); +// dist[1*Np+n] = fq; +// +// // q=2 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10); +// dist[2*Np+n] = fq; +// +// // q = 3 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); +// dist[3*Np+n] = fq; +// +// // q = 4 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12); +// dist[4*Np+n] = fq; +// +// // q = 5 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); +// dist[5*Np+n] = fq; +// +// // q = 6 +// fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11); +// dist[6*Np+n] = fq; +// +// // q = 7 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17); +// dist[7*Np+n] = fq; +// +// +// // q = 8 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 +// +mrt_V12*m12+0.25*m13+0.125*(m17-m16); +// dist[8*Np+n] = fq; +// +// // q = 9 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17); +// dist[9*Np+n] = fq; +// +// // q = 10 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ +// mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17); +// dist[10*Np+n] = fq; +// +// +// // q = 11 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12+0.25*m15+0.125*(m18-m16); +// dist[11*Np+n] = fq; +// +// // q = 12 +// fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ +// mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18); +// dist[12*Np+n] = fq; +// +// // q = 13 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12-0.25*m15-0.125*(m16+m18); +// dist[13*Np+n] = fq; +// +// // q= 14 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) +// +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 +// -mrt_V12*m12-0.25*m15+0.125*(m16+m18); +// +// dist[14*Np+n] = fq; +// +// // q = 15 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) +// -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18); +// dist[15*Np+n] = fq; +// +// // q = 16 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) +// -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17); +// dist[16*Np+n] = fq; +// +// +// // q = 17 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) +// -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18); +// dist[17*Np+n] = fq; +// +// // q = 18 +// fq = mrt_V1*rho+mrt_V9*m1 +// +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) +// -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18); +// dist[18*Np+n] = fq; +// //........................................................................ +// +// // Instantiate mass transport distributions +// // Stationary value - distribution 0 +// nAB = 1.0/(nA+nB); +// Aq[n] = 0.3333333333333333*nA; +// Bq[n] = 0.3333333333333333*nB; +// +// //............................................... +// // q = 0,2,4 +// // Cq = {1,0,0}, {0,1,0}, {0,0,1} +// delta = beta*nA*nB*nAB*0.1111111111111111*nx; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; +// +// Aq[1*Np+n] = a1; +// Bq[1*Np+n] = b1; +// Aq[2*Np+n] = a2; +// Bq[2*Np+n] = b2; +// +// //............................................... +// // q = 2 +// // Cq = {0,1,0} +// delta = beta*nA*nB*nAB*0.1111111111111111*ny; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; +// +// Aq[3*Np+n] = a1; +// Bq[3*Np+n] = b1; +// Aq[4*Np+n] = a2; +// Bq[4*Np+n] = b2; +// //............................................... +// // q = 4 +// // Cq = {0,0,1} +// delta = beta*nA*nB*nAB*0.1111111111111111*nz; +// if (!(nA*nB*nAB>0)) delta=0; +// a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; +// b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; +// a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; +// b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; +// +// Aq[5*Np+n] = a1; +// Bq[5*Np+n] = b1; +// Aq[6*Np+n] = a2; +// Bq[6*Np+n] = b2; +// //............................................... +// +// } +// } +//} + +//__global__ void dvc_ScaLBL_D3Q19_GreyscaleColor_Init(double *dist, double *Porosity, int Np) +//{ +// int n; +// int S = Np/NBLOCKS/NTHREADS + 1; +// double porosity; +// for (int s=0; s>>(dist,Porosity,Np); +// hipError_t err = hipGetLastError(); +// if (hipSuccess != err){ +// printf("hip error in ScaLBL_D3Q19_GreyscaleColor_Init: %s \n",hipGetErrorString(err)); +// } +//} + +//Model-1 & 4 +extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi,double *GreySolidGrad, double *Poros,double *Perm,double *Vel, double *Pressure, + double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, + double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + + //cudaProfilerStart(); + //cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor, cudaFuncCachePreferL1); + + dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor<<>>(Map, dist, Aq, Bq, Den, Phi, GreySolidGrad, Poros, Perm, Vel, Pressure, + rhoA, rhoB, tauA, tauB, tauA_eff, tauB_eff, alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_GreyscaleColor: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); + +} + +//Model-1 & 4 +extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor(int *d_neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, + double *Phi, double *GreySolidGrad, double *Poros,double *Perm,double *Vel,double *Pressure, + double rhoA, double rhoB, double tauA, double tauB, double tauA_eff,double tauB_eff, double alpha, double beta, + double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ + + //cudaProfilerStart(); + //cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor, cudaFuncCachePreferL1); + + dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor<<>>(d_neighborList, Map, dist, Aq, Bq, Den, Phi, GreySolidGrad, Poros, Perm,Vel,Pressure, + + rhoA, rhoB, tauA, tauB, tauA_eff, tauB_eff,alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_GreyscaleColor: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_PhaseField_InitFromRestart(double *Den, double *Aq, double *Bq, int start, int finish, int Np){ + dvc_ScaLBL_PhaseField_InitFromRestart<<>>(Den, Aq, Bq, start, finish, Np); + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_PhaseField_InitFromRestart: %s \n",hipGetErrorString(err)); + } +} +////Model-2&3 +//extern "C" void ScaLBL_D3Q19_AAeven_GreyscaleColor(int *Map, double *dist, double *Aq, double *Bq, double *Den, +// double *Phi,double *GreySolidGrad, double *Poros,double *Perm,double *Vel, +// double rhoA, double rhoB, double tauA, double tauB,double tauA_eff,double tauB_eff, double alpha, double beta, +// double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ +// +// //cudaProfilerStart(); +// //cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor, cudaFuncCachePreferL1); +// +// dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor<<>>(Map, dist, Aq, Bq, Den, Phi, GreySolidGrad, Poros, Perm, Vel, +// rhoA, rhoB, tauA, tauB, tauA_eff, tauB_eff, alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); +// hipError_t err = hipGetLastError(); +// if (hipSuccess != err){ +// printf("hip error in ScaLBL_D3Q19_AAeven_GreyscaleColor: %s \n",hipGetErrorString(err)); +// } +// //cudaProfilerStop(); +// +//} +// +////Model-2&3 +//extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor(int *d_neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, +// double *Phi, double *GreySolidGrad, double *Poros,double *Perm,double *Vel, +// double rhoA, double rhoB, double tauA, double tauB, double tauA_eff,double tauB_eff, double alpha, double beta, +// double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ +// +// //cudaProfilerStart(); +// //cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor, cudaFuncCachePreferL1); +// +// dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor<<>>(d_neighborList, Map, dist, Aq, Bq, Den, Phi, GreySolidGrad, Poros, Perm,Vel, +// rhoA, rhoB, tauA, tauB, tauA_eff, tauB_eff,alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); +// +// hipError_t err = hipGetLastError(); +// if (hipSuccess != err){ +// printf("hip error in ScaLBL_D3Q19_AAodd_GreyscaleColor: %s \n",hipGetErrorString(err)); +// } +// //cudaProfilerStop(); +//} diff --git a/hip/Ion.cu b/hip/Ion.cu new file mode 100644 index 00000000..2c48858d --- /dev/null +++ b/hip/Ion.cu @@ -0,0 +1,392 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + + +__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){ + int n,nread; + double fq,Ci; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + // q=2 + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + // q=4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + // q=6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + // q=0 + dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci; + //dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)); + //dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q=2 + dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)); + //dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)); + //dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)); + //dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)); + //dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)); + //dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz))); + } + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField, + double Di, int zi, double rlx, double Vt, int start, int finish, int Np){ + int n; + double Ci; + double ux,uy,uz; + double uEPx,uEPy,uEPz;//electrochemical induced velocity + double Ex,Ey,Ez;//electrical field + double f0,f1,f2,f3,f4,f5,f6; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s0) + CD_tmp; + } + } +} + + +extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<>>(neighborList,dist,Den,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<>>(dist,Den,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *Velocity, double *ElectricField, + double Di, int zi, double rlx, double Vt, int start, int finish, int Np){ + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAodd_Ion<<>>(neighborList,dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Ion: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *Velocity, double *ElectricField, + double Di, int zi, double rlx, double Vt, int start, int finish, int Np){ + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_Ion<<>>(dist,Den,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Ion: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Ion_Init<<>>(dist,Den,DenInit,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_Ion_Init: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Ion_Init_FromFile<<>>(dist,Den,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_Ion_Init_FromFile: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} diff --git a/hip/MixedGradient.cu b/hip/MixedGradient.cu new file mode 100644 index 00000000..31518ee5 --- /dev/null +++ b/hip/MixedGradient.cu @@ -0,0 +1,77 @@ +/* Implement Mixed Gradient (Lee et al. JCP 2016)*/ +#include +//#include +#include "hip/hip_runtime.h" + + +#define NBLOCKS 560 +#define NTHREADS 128 + +__global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz) +{ + static const int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1}, + {1,1,0},{-1,-1,0},{1,-1,0},{-1,1,0}, + {1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1}, + {0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}}; + + int i,j,k,n,N,idx; + int np,np2,nm; // neighbors + double v,vp,vp2,vm; // values at neighbors + double grad; + N = Nx*Ny*Nz; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(Map, Phi, Gradient, start, finish, Np, Nx, Ny, Nz); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_MixedGradient: %s \n",hipGetErrorString(err)); + } + hipProfilerStop(); +} + diff --git a/hip/Poisson.cu b/hip/Poisson.cu new file mode 100644 index 00000000..34975f58 --- /dev/null +++ b/hip/Poisson.cu @@ -0,0 +1,330 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + +__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){ + int n; + double psi;//electric potential + double fq; + int nread; + int idx; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + f1 = dist[nr1]; // reading the f1 data into register fq + + nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + f2 = dist[nr2]; // reading the f2 data into register fq + + // q=3 + nr3 = neighborList[n+2*Np]; // neighbor 4 + f3 = dist[nr3]; + + // q = 4 + nr4 = neighborList[n+3*Np]; // neighbor 3 + f4 = dist[nr4]; + + // q=5 + nr5 = neighborList[n+4*Np]; + f5 = dist[nr5]; + + // q = 6 + nr6 = neighborList[n+5*Np]; + f6 = dist[nr6]; + + Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu + Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound + Ez = (f5-f6)*rlx*4.0; + ElectricField[n+0*Np] = Ex; + ElectricField[n+1*Np] = Ey; + ElectricField[n+2*Np] = Ez; + + // q = 0 + dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e); + + // q = 1 + dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 2 + dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 3 + dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 4 + dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 5 + dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + + // q = 6 + dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e); + //........................................................................ + } + } +} + +__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){ + + int n; + double psi;//electric potential + double Ex,Ey,Ez;//electric field + double rho_e;//local charge density + double f0,f1,f2,f3,f4,f5,f6; + double rlx=1.0/tau; + int idx; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(neighborList,Map,dist,Psi,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<>>(Map,dist,Psi,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAodd_Poisson<<>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_AAeven_Poisson<<>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q7_Poisson_Init<<>>(Map,dist,Psi,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q7_Poisson_Init: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} diff --git a/hip/Stokes.cu b/hip/Stokes.cu new file mode 100644 index 00000000..a6a05fba --- /dev/null +++ b/hip/Stokes.cu @@ -0,0 +1,996 @@ +#include +#include +#include "hip/hip_runtime.h" + +#define NBLOCKS 1024 +#define NTHREADS 256 + + +__global__ void dvc_ScaLBL_D3Q19_AAodd_StokesMRT(int *neighborList, double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz, double rho0, double den_scale, double h, double time_conv,int start, int finish, int Np){ + + int n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double ux,uy,uz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + int nread; + // body force due to electric field + double rhoE;//charge density + double Ex,Ey,Ez; + // total body force + double Fx,Fy,Fz; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s 10Np => odd part of dist) + fq = dist[nread]; // reading the f1 data into register fq + //fp = dist[10*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jx = fq; + m4 = -4.0*fq; + m9 = 2.0*fq; + m10 = -4.0*fq; + + // f2 = dist[10*Np+n]; + nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) + fq = dist[nread]; // reading the f2 data into register fq + //fq = dist[Np+n]; + rho += fq; + m1 -= 11.0*(fq); + m2 -= 4.0*(fq); + jx -= fq; + m4 += 4.0*(fq); + m9 += 2.0*(fq); + m10 -= 4.0*(fq); + + // q=3 + nread = neighborList[n+2*Np]; // neighbor 4 + fq = dist[nread]; + //fq = dist[11*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy = fq; + m6 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 = fq; + m12 = -2.0*fq; + + // q = 4 + nread = neighborList[n+3*Np]; // neighbor 3 + fq = dist[nread]; + //fq = dist[2*Np+n]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jy -= fq; + m6 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 += fq; + m12 -= 2.0*fq; + + // q=5 + nread = neighborList[n+4*Np]; + fq = dist[nread]; + //fq = dist[12*Np+n]; + rho += fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz = fq; + m8 = -4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + + // q = 6 + nread = neighborList[n+5*Np]; + fq = dist[nread]; + //fq = dist[3*Np+n]; + rho+= fq; + m1 -= 11.0*fq; + m2 -= 4.0*fq; + jz -= fq; + m8 += 4.0*fq; + m9 -= fq; + m10 += 2.0*fq; + m11 -= fq; + m12 += 2.0*fq; + + // q=7 + nread = neighborList[n+6*Np]; + fq = dist[nread]; + //fq = dist[13*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 = fq; + m16 = fq; + m17 = -fq; + + // q = 8 + nread = neighborList[n+7*Np]; + fq = dist[nread]; + //fq = dist[4*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 += fq; + m16 -= fq; + m17 += fq; + + // q=9 + nread = neighborList[n+8*Np]; + fq = dist[nread]; + //fq = dist[14*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jy -= fq; + m6 -= fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 += fq; + m17 += fq; + + // q = 10 + nread = neighborList[n+9*Np]; + fq = dist[nread]; + //fq = dist[5*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jy += fq; + m6 += fq; + m9 += fq; + m10 += fq; + m11 += fq; + m12 += fq; + m13 -= fq; + m16 -= fq; + m17 -= fq; + + // q=11 + nread = neighborList[n+10*Np]; + fq = dist[nread]; + //fq = dist[15*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 = fq; + m16 -= fq; + m18 = fq; + + // q=12 + nread = neighborList[n+11*Np]; + fq = dist[nread]; + //fq = dist[6*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 += fq; + m16 += fq; + m18 -= fq; + + // q=13 + nread = neighborList[n+12*Np]; + fq = dist[nread]; + //fq = dist[16*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx += fq; + m4 += fq; + jz -= fq; + m8 -= fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 -= fq; + m18 -= fq; + + // q=14 + nread = neighborList[n+13*Np]; + fq = dist[nread]; + //fq = dist[7*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jx -= fq; + m4 -= fq; + jz += fq; + m8 += fq; + m9 += fq; + m10 += fq; + m11 -= fq; + m12 -= fq; + m15 -= fq; + m16 += fq; + m18 += fq; + + // q=15 + nread = neighborList[n+14*Np]; + fq = dist[nread]; + //fq = dist[17*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 = fq; + m17 += fq; + m18 -= fq; + + // q=16 + nread = neighborList[n+15*Np]; + fq = dist[nread]; + //fq = dist[8*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 += fq; + m17 -= fq; + m18 += fq; + + // q=17 + //fq = dist[18*Np+n]; + nread = neighborList[n+16*Np]; + fq = dist[nread]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy += fq; + m6 += fq; + jz -= fq; + m8 -= fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 += fq; + m18 += fq; + + // q=18 + nread = neighborList[n+17*Np]; + fq = dist[nread]; + //fq = dist[9*Np+n]; + rho += fq; + m1 += 8.0*fq; + m2 += fq; + jy -= fq; + m6 -= fq; + jz += fq; + m8 += fq; + m9 -= 2.0*fq; + m10 -= 2.0*fq; + m14 -= fq; + m17 -= fq; + m18 -= fq; + + // write the velocity + ux = jx / rho0; + uy = jy / rho0; + uz = jz / rho0; + Velocity[n] = ux; + Velocity[Np+n] = uy; + Velocity[2*Np+n] = uz; + + //..............incorporate external force................................................ + //..............carry out relaxation process............................................... + m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) - m1); + m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0) - m2); + m4 = m4 + rlx_setB*((-0.6666666666666666*jx) - m4); + m6 = m6 + rlx_setB*((-0.6666666666666666*jy) - m6); + m8 = m8 + rlx_setB*((-0.6666666666666666*jz) - m8); + m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) - m9); + m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10); + m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) - m11); + m12 = m12 + rlx_setA*(-0.5*((jy*jy-jz*jz)/rho0) - m12); + m13 = m13 + rlx_setA*((jx*jy/rho0) - m13); + m14 = m14 + rlx_setA*((jy*jz/rho0) - m14); + m15 = m15 + rlx_setA*((jx*jz/rho0) - m15); + m16 = m16 + rlx_setB*( - m16); + m17 = m17 + rlx_setB*( - m17); + m18 = m18 + rlx_setB*( - m18); + //....................................................................................................... + //.................inverse transformation...................................................... + + // q=0 + fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; + dist[n] = fq; + + // q = 1 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx; + nread = neighborList[n+Np]; + dist[nread] = fq; + + // q=2 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; + nread = neighborList[n]; + dist[nread] = fq; + + // q = 3 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; + nread = neighborList[n+3*Np]; + dist[nread] = fq; + + // q = 4 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; + nread = neighborList[n+2*Np]; + dist[nread] = fq; + + // q = 5 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; + nread = neighborList[n+5*Np]; + dist[nread] = fq; + + // q = 6 + fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; + nread = neighborList[n+4*Np]; + dist[nread] = fq; + + // q = 7 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); + nread = neighborList[n+7*Np]; + dist[nread] = fq; + + // q = 8 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); + nread = neighborList[n+6*Np]; + dist[nread] = fq; + + // q = 9 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); + nread = neighborList[n+9*Np]; + dist[nread] = fq; + + // q = 10 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4) + +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 + +mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); + nread = neighborList[n+8*Np]; + dist[nread] = fq; + + // q = 11 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); + nread = neighborList[n+11*Np]; + dist[nread] = fq; + + // q = 12 + fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); + nread = neighborList[n+10*Np]; + dist[nread]= fq; + + // q = 13 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); + nread = neighborList[n+13*Np]; + dist[nread] = fq; + + // q= 14 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) + +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 + -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); + nread = neighborList[n+12*Np]; + dist[nread] = fq; + + + // q = 15 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); + nread = neighborList[n+15*Np]; + dist[nread] = fq; + + // q = 16 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) + -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); + nread = neighborList[n+14*Np]; + dist[nread] = fq; + + + // q = 17 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) + -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); + nread = neighborList[n+17*Np]; + dist[nread] = fq; + + // q = 18 + fq = mrt_V1*rho+mrt_V9*m1 + +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) + -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); + nread = neighborList[n+16*Np]; + dist[nread] = fq; + } + } +} + +__global__ void dvc_ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){ + + int n; + double fq; + // conserved momemnts + double rho,jx,jy,jz; + double ux,uy,uz; + // non-conserved moments + double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; + // body force due to electric field + double rhoE;//charge density + double Ex,Ey,Ez; + // total body force + double Fx,Fy,Fz; + + const double mrt_V1=0.05263157894736842; + const double mrt_V2=0.012531328320802; + const double mrt_V3=0.04761904761904762; + const double mrt_V4=0.004594820384294068; + const double mrt_V5=0.01587301587301587; + const double mrt_V6=0.0555555555555555555555555; + const double mrt_V7=0.02777777777777778; + const double mrt_V8=0.08333333333333333; + const double mrt_V9=0.003341687552213868; + const double mrt_V10=0.003968253968253968; + const double mrt_V11=0.01388888888888889; + const double mrt_V12=0.04166666666666666; + + int S = Np/NBLOCKS/NTHREADS + 1; + for (int s=0; s>>(neighborList,dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAodd_StokesMRT: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + +extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB, double Gx, double Gy, double Gz,double rho0, double den_scale, double h, double time_conv, int start, int finish, int Np){ + + //cudaProfilerStart(); + dvc_ScaLBL_D3Q19_AAeven_StokesMRT<<>>(dist,Velocity,ChargeDensity,ElectricField,rlx_setA,rlx_setB,Gx,Gy,Gz,rho0,den_scale,h,time_conv,start,finish,Np); + + hipError_t err = hipGetLastError(); + if (hipSuccess != err){ + printf("hip error in ScaLBL_D3Q19_AAeven_StokesMRT: %s \n",hipGetErrorString(err)); + } + //cudaProfilerStop(); +} + diff --git a/models/ColorModel.cpp b/models/ColorModel.cpp index 5f0e6f0e..204fd1d6 100644 --- a/models/ColorModel.cpp +++ b/models/ColorModel.cpp @@ -9,15 +9,33 @@ color lattice boltzmann model #include #include + ScaLBL_ColorModel::ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM): -rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),alpha(0),beta(0), -Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), -Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) + rank(RANK), nprocs(NP), Restart(0), timestep(0), timestepMax(0), + tauA(0), tauB(0), rhoA(0), rhoB(0), alpha(0), beta(0), + Fx(0), Fy(0), Fz(0), flux(0), din(0), dout(0), + inletA(0), inletB(0), outletA(0), outletB(0), + Nx(0), Ny(0), Nz(0), N(0), Np(0), nprocx(0), nprocy(0), nprocz(0), + BoundaryCondition(0), Lx(0), Ly(0), Lz(0), id(nullptr), + NeighborList(nullptr), dvcMap(nullptr), fq(nullptr), Aq(nullptr), Bq(nullptr), + Den(nullptr), Phi(nullptr), ColorGrad(nullptr), Velocity(nullptr), Pressure(nullptr), + comm(COMM) { REVERSE_FLOW_DIRECTION = false; } -ScaLBL_ColorModel::~ScaLBL_ColorModel(){ - +ScaLBL_ColorModel::~ScaLBL_ColorModel() +{ + delete [] id; + ScaLBL_FreeDeviceMemory( NeighborList ); + ScaLBL_FreeDeviceMemory( dvcMap ); + ScaLBL_FreeDeviceMemory( fq ); + ScaLBL_FreeDeviceMemory( Aq ); + ScaLBL_FreeDeviceMemory( Bq ); + ScaLBL_FreeDeviceMemory( Den ); + ScaLBL_FreeDeviceMemory( Phi ); + ScaLBL_FreeDeviceMemory( Pressure ); + ScaLBL_FreeDeviceMemory( Velocity ); + ScaLBL_FreeDeviceMemory( ColorGrad ); } /*void ScaLBL_ColorModel::WriteCheckpoint(const char *FILENAME, const double *cPhi, const double *cfq, int Np) @@ -238,9 +256,26 @@ void ScaLBL_ColorModel::ReadInput(){ } } // MeanFilter(Averages->SDs); + Minkowski Solid(Dm); if (rank==0) printf("Initialized solid phase -- Converting to Signed Distance function \n"); CalcDist(Averages->SDs,id_solid,*Mask); - + Solid.ComputeScalar(Averages->SDs,0.0); + /* save averages */ + Averages->solid.V = Solid.Vi; + Averages->solid.A = Solid.Ai; + Averages->solid.H = Solid.Ji; + Averages->solid.X = Solid.Xi; + Averages->gsolid.V = Solid.Vi_global; + Averages->gsolid.A = Solid.Ai_global; + Averages->gsolid.H = Solid.Ji_global; + Averages->gsolid.X = Solid.Xi_global; + /* write to file */ + if (rank == 0) { + FILE *SOLID = fopen("solid.csv","w"); + fprintf(SOLID,"Vs As Hs Xs\n"); + fprintf(SOLID,"%.8g %.8g %.8g %.8g\n",Solid.Vi_global,Solid.Ai_global,Solid.Ji_global,Solid.Xi_global); + fclose(SOLID); + } if (rank == 0) cout << "Domain set." << endl; Averages->SetParams(rhoA,rhoB,tauA,tauB,Fx,Fy,Fz,alpha,beta); @@ -254,12 +289,17 @@ void ScaLBL_ColorModel::AssignComponentLabels(double *phase) auto LabelList = color_db->getVector( "ComponentLabels" ); auto AffinityList = color_db->getVector( "ComponentAffinity" ); + auto WettingConvention = color_db->getWithDefault( "WettingConvention", "none" ); NLABELS=LabelList.size(); if (NLABELS != AffinityList.size()){ ERROR("Error: ComponentLabels and ComponentAffinity must be the same length! \n"); } + if (WettingConvention == "SCAL"){ + for (size_t idx=0; idxPhi.data(),Phi,N*sizeof(double)); } +double ScaLBL_ColorModel::Run(int returntime){ + int nprocs=nprocx*nprocy*nprocz; + + //************ MAIN ITERATION LOOP ***************************************/ + comm.barrier(); + PROFILE_START("Loop"); + //std::shared_ptr analysis_db; + bool Regular = false; + auto current_db = db->cloneDatabase(); + auto t1 = std::chrono::system_clock::now(); + int START_TIMESTEP = timestep; + int EXIT_TIMESTEP = min(timestepMax,returntime); + while (timestep < EXIT_TIMESTEP ) { + //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } + PROFILE_START("Update"); + // *************ODD TIMESTEP************* + timestep++; + // Compute the Phase indicator field + // Read for Aq, Bq happens in this routine (requires communication) + ScaLBL_Comm->BiSendD3Q7AA(Aq,Bq); //READ FROM NORMAL + ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, Aq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->BiRecvD3Q7AA(Aq,Bq); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, Aq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + + // Perform the collision operation + ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL + if (BoundaryCondition > 0 && BoundaryCondition < 5){ + ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); + ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); + } + // Halo exchange for phase field + ScaLBL_Comm_Regular->SendHalo(Phi); + + ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_Regular->RecvHalo(Phi); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + // Set BCs + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } + ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_Comm->Barrier(); + + // *************EVEN TIMESTEP************* + timestep++; + // Compute the Phase indicator field + ScaLBL_Comm->BiSendD3Q7AA(Aq,Bq); //READ FROM NORMAL + ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, Aq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->BiRecvD3Q7AA(Aq,Bq); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, Aq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); + + // Perform the collision operation + ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL + // Halo exchange for phase field + if (BoundaryCondition > 0 && BoundaryCondition < 5){ + ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); + ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); + } + ScaLBL_Comm_Regular->SendHalo(Phi); + ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_Regular->RecvHalo(Phi); + ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + // Set boundary conditions + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + } + ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, Aq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, + alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_Comm->Barrier(); + //************************************************************************ + } + PROFILE_STOP("Update"); + + PROFILE_STOP("Loop"); + PROFILE_SAVE("lbpm_color_simulator",1); + //************************************************************************ + // Compute the walltime per timestep + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / (timestep - START_TIMESTEP); + // Performance obtained from each node + double MLUPS = double(Np)/cputime/1000000; + + if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("CPU time = %f \n", cputime); + if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + return(MLUPS); + MLUPS *= nprocs; + +} + void ScaLBL_ColorModel::Run(){ int nprocs=nprocx*nprocy*nprocz; const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); @@ -538,7 +695,6 @@ void ScaLBL_ColorModel::Run(){ if (color_db->keyExists( "krA_morph_factor" )){ KRA_MORPH_FACTOR = color_db->getScalar( "krA_morph_factor" ); } - /* defaults for simulation protocols */ auto protocol = color_db->getWithDefault( "protocol", "none" ); if (protocol == "image sequence"){ @@ -583,7 +739,7 @@ void ScaLBL_ColorModel::Run(){ if (analysis_db->keyExists( "seed_water" )){ seed_water = analysis_db->getScalar( "seed_water" ); if (rank == 0) printf("Seed water in oil %f (seed_water) \n",seed_water); - USE_SEED = true; + ASSERT(protocol == "seed water"); } if (analysis_db->keyExists( "morph_delta" )){ morph_delta = analysis_db->getScalar( "morph_delta" ); @@ -614,7 +770,6 @@ void ScaLBL_ColorModel::Run(){ MAX_MORPH_TIMESTEPS = analysis_db->getScalar( "max_morph_timesteps" ); } - if (rank==0){ printf("********************************************************\n"); if (protocol == "image sequence"){ @@ -651,20 +806,15 @@ void ScaLBL_ColorModel::Run(){ fflush(stdout); } - //.......create and start timer............ - double starttime,stoptime,cputime; - ScaLBL_Comm->Barrier(); - comm.barrier(); - starttime = MPI_Wtime(); - //......................................... - //************ MAIN ITERATION LOOP ***************************************/ + comm.barrier(); PROFILE_START("Loop"); //std::shared_ptr analysis_db; bool Regular = false; auto current_db = db->cloneDatabase(); runAnalysis analysis( current_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map ); //analysis.createThreads( analysis_method, 4 ); + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax ) { //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } PROFILE_START("Update"); @@ -997,10 +1147,10 @@ void ScaLBL_ColorModel::Run(){ PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_Comm->Barrier(); - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; @@ -1080,7 +1230,6 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ ScaLBL_CopyToHost(phase.data(), Phi, N*sizeof(double)); // Extract only the connected part of NWP - BlobIDstruct new_index; double vF=0.0; double vS=0.0; ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,Averages->SDs,vF,vS,phase_label,Dm->Comm); comm.barrier(); @@ -1202,6 +1351,7 @@ double ScaLBL_ColorModel::MorphOpenConnected(double target_volume_change){ } return(volume_change); } + double ScaLBL_ColorModel::SeedPhaseField(const double seed_water_in_oil){ srand(time(NULL)); double mass_loss =0.f; @@ -1283,7 +1433,7 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta double vF = 0.f; double vS = 0.f; double delta_volume; - double WallFactor = 0.0; + double WallFactor = 1.0; bool USE_CONNECTED_NWP = false; DoubleArray phase(Nx,Ny,Nz); @@ -1306,6 +1456,11 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta } } double volume_initial = Dm->Comm.sumReduce( count); + double PoreVolume = Dm->Volume*Dm->Porosity(); + /*ensure target isn't an absurdly small fraction of pore volume */ + if (volume_initial < target_delta_volume*PoreVolume){ + volume_initial = target_delta_volume*PoreVolume; + } /* sprintf(LocalRankFilename,"phi_initial.%05i.raw",rank); FILE *INPUT = fopen(LocalRankFilename,"wb"); @@ -1317,7 +1472,6 @@ double ScaLBL_ColorModel::MorphInit(const double beta, const double target_delta double volume_connected = 0.0; double second_biggest = 0.0; if (USE_CONNECTED_NWP){ - BlobIDstruct new_index; ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,phase,Averages->SDs,vF,vS,phase_label,comm); comm.barrier(); @@ -1566,3 +1720,68 @@ void ScaLBL_ColorModel::WriteDebug(){ fclose(CGZ_FILE); */ } + +FlowAdaptor::FlowAdaptor(ScaLBL_ColorModel &M){ + Nx = M.Dm->Nx; + Ny = M.Dm->Ny; + Nz = M.Dm->Nz; + timestep=-1; + timestep_previous=-1; + + phi.resize(Nx,Ny,Nz); phi.fill(0); // phase indicator field + phi_t.resize(Nx,Ny,Nz); phi_t.fill(0); // time derivative for the phase indicator field +} + +FlowAdaptor::~FlowAdaptor(){ + +} + +double FlowAdaptor::MoveInterface(ScaLBL_ColorModel &M){ + + double INTERFACE_CUTOFF = M.color_db->getWithDefault( "move_interface_cutoff", 0.975 ); + double MOVE_INTERFACE_FACTOR = M.color_db->getWithDefault( "move_interface_factor", 10.0 ); + + ScaLBL_CopyToHost( phi.data(), M.Phi, Nx*Ny*Nz* sizeof( double ) ); + /* compute the local derivative of phase indicator field */ + double beta = M.beta; + double factor = 0.5/beta; + double total_interface_displacement = 0.0; + double total_interface_sites = 0.0; + for (int n=0; nPhi(n); + double dist1 = factor*log((1.0+value1)/(1.0-value1)); + double value2 = phi(n); + double dist2 = factor*log((1.0+value2)/(1.0-value2)); + phi_t(n) = value2; + if (value1 < INTERFACE_CUTOFF && value1 > -1*INTERFACE_CUTOFF && value2 < INTERFACE_CUTOFF && value2 > -1*INTERFACE_CUTOFF ){ + /* time derivative of distance */ + double dxdt = 0.125*(dist2-dist1); + /* extrapolate to move the distance further */ + double dist3 = dist2 + MOVE_INTERFACE_FACTOR*dxdt; + /* compute the new phase interface */ + phi_t(n) = (2.f*(exp(-2.f*beta*(dist3)))/(1.f+exp(-2.f*beta*(dist3))) - 1.f); + total_interface_displacement += fabs(MOVE_INTERFACE_FACTOR*dxdt); + total_interface_sites += 1.0; + } + } + ScaLBL_CopyToDevice( M.Phi, phi_t.data(), Nx*Ny*Nz* sizeof( double ) ); + + +/* ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_PhaseField_Init(dvcMap, Phi, Den, Aq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4){ + if (Dm->kproc()==0){ + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,1); + ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,2); + } + if (Dm->kproc() == nprocz-1){ + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-1); + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-2); + ScaLBL_SetSlice_z(Phi,-1.0,Nx,Ny,Nz,Nz-3); + } + } + */ +} + diff --git a/models/ColorModel.h b/models/ColorModel.h index f5667765..7d3c858a 100644 --- a/models/ColorModel.h +++ b/models/ColorModel.h @@ -16,6 +16,10 @@ Implementation of color lattice boltzmann model #include "ProfilerApp.h" #include "threadpool/thread_pool.h" + +#ifndef ScaLBL_ColorModel_INC +#define ScaLBL_ColorModel_INC + class ScaLBL_ColorModel{ public: ScaLBL_ColorModel(int RANK, int NP, const Utilities::MPI& COMM); @@ -29,7 +33,9 @@ public: void Create(); void Initialize(); void Run(); + double Run(int returntime); void WriteDebug(); + void getPhaseField(DoubleArray &f); bool Restart,pBC; bool REVERSE_FLOW_DIRECTION; @@ -86,3 +92,17 @@ private: double MorphOpenConnected(double target_volume_change); }; +class FlowAdaptor{ +public: + FlowAdaptor(ScaLBL_ColorModel &M); + ~FlowAdaptor(); + double MoveInterface(ScaLBL_ColorModel &M); + DoubleArray phi; + DoubleArray phi_t; +private: + int Nx, Ny, Nz; + int timestep; + int timestep_previous; +}; +#endif + diff --git a/models/DFHModel.cpp b/models/DFHModel.cpp index 7fd61271..24639d3e 100644 --- a/models/DFHModel.cpp +++ b/models/DFHModel.cpp @@ -490,14 +490,10 @@ void ScaLBL_DFHModel::Run(){ if (rank==0) printf("********************************************************\n"); if (rank==0) printf("No. of timesteps: %i \n", timestepMax); - //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); - //......................................... //************ MAIN ITERATION LOOP ***************************************/ - + auto t1 = std::chrono::system_clock::now(); bool Regular = true; PROFILE_START("Loop"); runAnalysis analysis( analysis_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map ); @@ -589,10 +585,10 @@ void ScaLBL_DFHModel::Run(){ //************************************************************************ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; if (rank==0) printf("********************************************************\n"); diff --git a/models/FreeLeeModel.cpp b/models/FreeLeeModel.cpp index dfe43a2a..428db40f 100644 --- a/models/FreeLeeModel.cpp +++ b/models/FreeLeeModel.cpp @@ -10,8 +10,9 @@ color lattice boltzmann model #include ScaLBL_FreeLeeModel::ScaLBL_FreeLeeModel(int RANK, int NP, const Utilities::MPI& COMM): -rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(0),tauA(0),tauB(0),rhoA(0),rhoB(0),W(0),gamma(0), +rank(RANK), nprocs(NP), Restart(0),timestep(0),timestepMax(2),tauA(1.0),tauB(1.0),tauM(1.0),rhoA(1.0),rhoB(1.0),W(5.0),gamma(0.001),kappa(0.0075),beta(0.0024), Fx(0),Fy(0),Fz(0),flux(0),din(0),dout(0),inletA(0),inletB(0),outletA(0),outletB(0), +tau(1.0),rho0(1.0), Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),Lx(0),Ly(0),Lz(0),comm(COMM) { @@ -19,6 +20,45 @@ Nx(0),Ny(0),Nz(0),N(0),Np(0),nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0), ScaLBL_FreeLeeModel::~ScaLBL_FreeLeeModel(){ } + + +void ScaLBL_FreeLeeModel::getPhase(DoubleArray &PhaseValues){ + + DoubleArray PhaseWideHalo(Nxh,Nyh,Nzh); + ScaLBL_CopyToHost(PhaseWideHalo.data(), Phi, sizeof(double)*Nh); + + // use halo width = 1 for analysis data + for (int k=1; kRegularLayout(Map,Pressure,PressureValues); + ScaLBL_Comm->Barrier(); comm.barrier(); + + ScaLBL_Comm->RegularLayout(Map,mu_phi,MuValues); + ScaLBL_Comm->Barrier(); comm.barrier(); + +} + +void ScaLBL_FreeLeeModel::getVelocity(DoubleArray &Vel_x, DoubleArray &Vel_y, DoubleArray &Vel_z){ + + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],Vel_x); + ScaLBL_Comm->Barrier(); comm.barrier(); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],Vel_y); + ScaLBL_Comm->Barrier(); comm.barrier(); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],Vel_z); + ScaLBL_Comm->Barrier(); comm.barrier(); +} + void ScaLBL_FreeLeeModel::ReadParams(string filename){ // read the input database db = std::make_shared( filename ); @@ -30,10 +70,15 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ // set defaults timestepMax = 100000; tauA = tauB = 1.0; + tauM = 1.0;//relaxation time for phase field rhoA = rhoB = 1.0; + tau = 1.0;//only for single-fluid Lee model + rho0 = 1.0;//only for single-fluid Lee model Fx = Fy = Fz = 0.0; - gamma=1e-3; - W=5; + gamma=1e-3;//surface tension + W=5.0;//interfacial thickness + beta = 12.0*gamma/W; + kappa = 3.0*gamma*W/2.0;//beta and kappa are related to surface tension \gamma Restart=false; din=dout=1.0; flux=0.0; @@ -42,12 +87,21 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ if (freelee_db->keyExists( "timestepMax" )){ timestepMax = freelee_db->getScalar( "timestepMax" ); } + if (freelee_db->keyExists( "tau" )){//only for single-fluid Lee model + tau = freelee_db->getScalar( "tau" ); + } if (freelee_db->keyExists( "tauA" )){ tauA = freelee_db->getScalar( "tauA" ); } if (freelee_db->keyExists( "tauB" )){ tauB = freelee_db->getScalar( "tauB" ); } + if (freelee_db->keyExists( "tauM" )){ + tauM = freelee_db->getScalar( "tauM" ); + } + if (freelee_db->keyExists( "rho0" )){ + rho0 = freelee_db->getScalar( "rho0" ); + } if (freelee_db->keyExists( "rhoA" )){ rhoA = freelee_db->getScalar( "rhoA" ); } @@ -81,6 +135,9 @@ void ScaLBL_FreeLeeModel::ReadParams(string filename){ inletB=0.f; outletA=0.f; outletB=1.f; + //update secondary parameters + beta = 12.0*gamma/W; + kappa = 3.0*gamma*W/2.0;//beta and kappa are related to surface tension \gamma //if (BoundaryCondition==4) flux *= rhoA; // mass flux must adjust for density (see formulation for details) BoundaryCondition = 0; @@ -184,9 +241,9 @@ void ScaLBL_FreeLeeModel::ReadInput(){ } -void ScaLBL_FreeLeeModel::Create(){ +void ScaLBL_FreeLeeModel::Create_TwoFluid(){ /* - * This function creates the variables needed to run a LBM + * This function creates the variables needed to run two-fluid Lee model */ //......................................................... // Initialize communication structures in averaging domain @@ -198,7 +255,7 @@ void ScaLBL_FreeLeeModel::Create(){ // Create a communicator for the device (will use optimized layout) // ScaLBL_Communicator ScaLBL_Comm(Mask); // original ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); - ScaLBL_Comm_Regular = std::shared_ptr(new ScaLBL_Communicator(Mask)); + //ScaLBL_Comm_Regular = std::shared_ptr(new ScaLBL_Communicator(Mask)); ScaLBL_Comm_WideHalo = std::shared_ptr(new ScaLBLWideHalo_Communicator(Mask,2)); // create the layout for the LBM @@ -220,7 +277,7 @@ void ScaLBL_FreeLeeModel::Create(){ //........................................................................... ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np); - ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &gqbar, 19*dist_mem_size); ScaLBL_AllocateDeviceMemory((void **) &hq, 7*dist_mem_size); ScaLBL_AllocateDeviceMemory((void **) &mu_phi, dist_mem_size); ScaLBL_AllocateDeviceMemory((void **) &Den, dist_mem_size); @@ -239,46 +296,329 @@ void ScaLBL_FreeLeeModel::Create(){ for (int i=1; iMap(i,j,k); } } } // check that TmpMap is valid for (int idx=0; idxLastExterior(); idx++){ auto n = TmpMap[idx]; - if (n > Nx*Ny*Nz){ + if (n > Nxh*Nyh*Nzh){ printf("Bad value! idx=%i \n", n); - TmpMap[idx] = Nx*Ny*Nz-1; + TmpMap[idx] = Nxh*Nyh*Nzh-1; } } for (int idx=ScaLBL_Comm->FirstInterior(); idxLastInterior(); idx++){ auto n = TmpMap[idx]; - if ( n > Nx*Ny*Nz ){ + if ( n > Nxh*Nyh*Nzh ){ printf("Bad value! idx=%i \n",n); - TmpMap[idx] = Nx*Ny*Nz-1; + TmpMap[idx] = Nxh*Nyh*Nzh-1; } } + // copy the device map ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); - ScaLBL_DeviceBarrier(); - delete [] TmpMap; - // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); - // initialize phi based on PhaseLabel (include solid component labels) + comm.barrier(); + delete [] TmpMap; + delete [] neighborList; } -/******************************************************** - * AssignComponentLabels * - ********************************************************/ - -void ScaLBL_FreeLeeModel::Initialize(){ - - if (rank==0) printf ("Initializing distributions \n"); - ScaLBL_D3Q19_Init(fq, Np); +void ScaLBL_FreeLeeModel::Create_SingleFluid(){ /* - * This function initializes model + * This function creates the variables needed to run single-fluid Lee model */ + //......................................................... + // Initialize communication structures in averaging domain + for (int i=0; iid[i] = Mask->id[i]; + Mask->CommInit(); + Np=Mask->PoreCount(); + //........................................................................... + if (rank==0) printf ("Create ScaLBL_Communicator \n"); + // Create a communicator for the device (will use optimized layout) + // ScaLBL_Communicator ScaLBL_Comm(Mask); // original + ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); + + // create the layout for the LBM + int Npad=(Np/16 + 2)*16; + if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); + Map.resize(Nx,Ny,Nz); Map.fill(-2); + auto neighborList= new int[18*Npad]; + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,1); + comm.barrier(); + + //........................................................................... + // MAIN VARIABLES ALLOCATED HERE + //........................................................................... + // LBM variables + if (rank==0) printf ("Allocating distributions \n"); + //......................device distributions................................. + dist_mem_size = Np*sizeof(double); + neighborSize=18*(Np*sizeof(int)); + //........................................................................... + ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); + ScaLBL_AllocateDeviceMemory((void **) &gqbar, 19*dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np); + //........................................................................... + // Update GPU data structures + if (rank==0) printf ("Setting up device map and neighbor list \n"); + // copy the neighbor list + ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + comm.barrier(); + delete [] neighborList; +} + +void ScaLBL_FreeLeeModel::AssignComponentLabels_ChemPotential_ColorGrad() +{ + double *phase; + phase = new double[Nh]; + + size_t NLABELS=0; + signed char VALUE=0; + double AFFINITY=0.f; + + auto LabelList = freelee_db->getVector( "ComponentLabels" ); + auto AffinityList = freelee_db->getVector( "ComponentAffinity" ); + + NLABELS=LabelList.size(); + if (NLABELS != AffinityList.size()){ + ERROR("Error: ComponentLabels and ComponentAffinity must be the same length! \n"); + } + + double label_count[NLABELS]; + double label_count_global[NLABELS]; + + // Assign the labels + for (size_t idx=0; idxid[n] + int x=i-1; + int y=j-1; + int z=k-1; + if (x<0) x=0; + if (y<0) y=0; + if (z<0) z=0; + if (x>=Nx) x=Nx-1; + if (y>=Ny) y=Ny-1; + if (z>=Nz) z=Nz-1; + int n = z*Nx*Ny+y*Nx+x; + VALUE=id[n]; + + // Assign the affinity from the paired list + for (unsigned int idx=0; idx < NLABELS; idx++){ + //printf("idx=%i, value=%i, %i, \n",idx, VALUE,LabelList[idx]); + if (VALUE == LabelList[idx]){ + AFFINITY=AffinityList[idx]; + label_count[idx] += 1.0; + idx = NLABELS; + //Mask->id[n] = 0; // set mask to zero since this is an immobile component + } + } + // fluid labels are reserved + if (VALUE == 1) AFFINITY=1.0; + else if (VALUE == 2) AFFINITY=-1.0; + phase[nh] = AFFINITY; + } + } + } + + // Set Dm to match Mask + for (int i=0; iid[i] = Mask->id[i]; + + for (size_t idx=0; idxComm.sumReduce(label_count[idx]); + + if (rank==0){ + printf("Number of component labels: %lu \n",NLABELS); + for (unsigned int idx=0; idxBarrier(); + comm.barrier(); + + //debug + //save the phase field and check it + //FILE *OUTFILE; + //sprintf(LocalRankFilename,"Phase_Init.%05i.raw",rank); + //OUTFILE = fopen(LocalRankFilename,"wb"); + //fwrite(phase,8,Nh,OUTFILE); + //fclose(OUTFILE); + + DoubleArray PhaseField(Nx,Ny,Nz); + FILE *OUTFILE; + ScaLBL_Comm->RegularLayout(Map,mu_phi_host,PhaseField); + sprintf(LocalRankFilename,"Chem_Init.%05i.raw",rank); + OUTFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,OUTFILE); + fclose(OUTFILE); + + ScaLBL_Comm->RegularLayout(Map,&ColorGrad_host[0],PhaseField); + FILE *CGX_FILE; + sprintf(LocalRankFilename,"Gradient_X_Init.%05i.raw",rank); + CGX_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGX_FILE); + fclose(CGX_FILE); + + ScaLBL_Comm->RegularLayout(Map,&ColorGrad_host[Np],PhaseField); + FILE *CGY_FILE; + sprintf(LocalRankFilename,"Gradient_Y_Init.%05i.raw",rank); + CGY_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGY_FILE); + fclose(CGY_FILE); + + ScaLBL_Comm->RegularLayout(Map,&ColorGrad_host[2*Np],PhaseField); + FILE *CGZ_FILE; + sprintf(LocalRankFilename,"Gradient_Z_Init.%05i.raw",rank); + CGZ_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,CGZ_FILE); + fclose(CGZ_FILE); + + delete [] phase; + delete [] ColorGrad_host; + delete [] mu_phi_host; + delete [] Dst; +} + +void ScaLBL_FreeLeeModel::Initialize_TwoFluid(){ + /* + * This function initializes two-fluid Lee model + */ + if (rank==0) printf ("Initializing phase field, chemical potential and color gradient\n"); + AssignComponentLabels_ChemPotential_ColorGrad();//initialize phase field Phi + + if (rank==0) printf ("Initializing distributions for momentum transport\n"); + ScaLBL_D3Q19_FreeLeeModel_TwoFluid_Init(gqbar, mu_phi, ColorGrad, Fx, Fy, Fz, Np); + + if (rank==0) printf ("Initializing density field and distributions for phase-field transport\n"); + ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_FreeLeeModel_PhaseField_Init(dvcMap, Phi, Den, hq, ColorGrad, rhoA, rhoB, tauM, W, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + if (Restart == true){ + //TODO need to revise this function if (rank==0){ printf("Reading restart file! \n"); } @@ -292,7 +632,7 @@ void ScaLBL_FreeLeeModel::Initialize(){ cDen = new double[2*Np]; cDist = new double[19*Np]; ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int)); - ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double)); + //ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double)); ifstream File(LocalRestartFile,ios::binary); int idx; @@ -331,18 +671,19 @@ void ScaLBL_FreeLeeModel::Initialize(){ // Copy the restart data to the GPU ScaLBL_CopyToDevice(Den,cDen,2*Np*sizeof(double)); - ScaLBL_CopyToDevice(fq,cDist,19*Np*sizeof(double)); + ScaLBL_CopyToDevice(gqbar,cDist,19*Np*sizeof(double)); ScaLBL_CopyToDevice(Phi,cPhi,N*sizeof(double)); - ScaLBL_DeviceBarrier(); - + ScaLBL_Comm->Barrier(); comm.barrier(); + + if (rank==0) printf ("Initializing phase and density fields on device from Restart\n"); + //TODO the following function is to be updated. + //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, 0, ScaLBL_Comm->LastExterior(), Np); + //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); } - if (rank==0) printf ("Initializing phase field \n"); - //ScaLBL_PhaseField_Init(dvcMap, Phi, Den, hq, Bq, 0, ScaLBL_Comm->LastExterior(), Np); - //ScaLBL_PhaseField_Init(dvcMap, Phi, Den, hq, Bq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - // establish reservoirs for external bC + // TODO to be revised if (BoundaryCondition == 1 || BoundaryCondition == 2 || BoundaryCondition == 3 || BoundaryCondition == 4 ){ if (Dm->kproc()==0){ ScaLBL_SetSlice_z(Phi,1.0,Nx,Ny,Nz,0); @@ -358,7 +699,201 @@ void ScaLBL_FreeLeeModel::Initialize(){ //ScaLBL_CopyToHost(Averages->Phi.data(),Phi,N*sizeof(double)); } -void ScaLBL_FreeLeeModel::Run(){ +void ScaLBL_FreeLeeModel::Initialize_SingleFluid(){ + /* + * This function initializes single-fluid Lee model + */ + if (rank==0) printf ("Initializing distributions for momentum transport\n"); + ScaLBL_D3Q19_FreeLeeModel_SingleFluid_Init(gqbar, Fx, Fy, Fz, Np); + + if (Restart == true){ + //TODO need to revise this function + //remove the phase-related part + + + +// if (rank==0){ +// printf("Reading restart file! \n"); +// } +// +// // Read in the restart file to CPU buffers +// int *TmpMap; +// TmpMap = new int[Np]; +// +// double *cPhi, *cDist, *cDen; +// cPhi = new double[N]; +// cDen = new double[2*Np]; +// cDist = new double[19*Np]; +// ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int)); +// //ScaLBL_CopyToHost(cPhi, Phi, N*sizeof(double)); +// +// ifstream File(LocalRestartFile,ios::binary); +// int idx; +// double value,va,vb; +// for (int n=0; nLastExterior(); n++){ +// va = cDen[n]; +// vb = cDen[Np + n]; +// value = (va-vb)/(va+vb); +// idx = TmpMap[n]; +// if (!(idx < 0) && idxFirstInterior(); nLastInterior(); n++){ +// va = cDen[n]; +// vb = cDen[Np + n]; +// value = (va-vb)/(va+vb); +// idx = TmpMap[n]; +// if (!(idx < 0) && idxBarrier(); +// comm.barrier(); +// +// if (rank==0) printf ("Initializing phase and density fields on device from Restart\n"); +// //TODO the following function is to be updated. +// //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, 0, ScaLBL_Comm->LastExterior(), Np); +// //ScaLBL_FreeLeeModel_PhaseField_InitFromRestart(Den, hq, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + } +} + +double ScaLBL_FreeLeeModel::Run_TwoFluid(int returntime){ + int nprocs=nprocx*nprocy*nprocz; + + int START_TIME = timestep; + int EXIT_TIME = min(returntime, timestepMax); + //************ MAIN ITERATION LOOP ***************************************/ + comm.barrier(); + auto t1 = std::chrono::system_clock::now(); + PROFILE_START("Loop"); + + while (timestep < EXIT_TIME ) { + //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } + PROFILE_START("Update"); + // *************ODD TIMESTEP************* + timestep++; + //------------------------------------------------------------------------------------------------------------------- + // Compute the Phase indicator field + // Read for hq happens in this routine (requires communication) + ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMAL + ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(NeighborList, dvcMap, hq, Den, Phi, ColorGrad, Velocity, rhoA, rhoB, tauM, W, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(NeighborList, dvcMap, hq, Den, Phi, ColorGrad, Velocity, rhoA, rhoB, tauM, W, 0, ScaLBL_Comm->LastExterior(), Np); + + // Perform the collision operation + // Halo exchange for phase field + ScaLBL_D3Q7_ComputePhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, 0, ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_WideHalo->Send(Phi); + ScaLBL_Comm_WideHalo->Recv(Phi); + if (BoundaryCondition > 0 && BoundaryCondition < 5){ + //TODO to be revised + // Need to add BC for hq!!! + ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); + ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); + } + + ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, + kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + + ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + // Set BCs + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); + } + if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); + } + + ScaLBL_D3Q19_AAodd_FreeLeeModel(NeighborList, dvcMap, gqbar, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, + kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_Comm->Barrier(); + + + // *************EVEN TIMESTEP************* + timestep++; + // Compute the Phase indicator field + ScaLBL_Comm->SendD3Q7AA(hq,0); //READ FROM NORMA + ScaLBL_D3Q7_AAeven_FreeLee_PhaseField(dvcMap, hq, Den, Phi, ColorGrad, Velocity, rhoA, rhoB, tauM, W, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q7AA(hq,0); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + ScaLBL_D3Q7_AAeven_FreeLee_PhaseField(dvcMap, hq, Den, Phi, ColorGrad, Velocity, rhoA, rhoB, tauM, W, 0, ScaLBL_Comm->LastExterior(), Np); + + // Perform the collision operation + // Halo exchange for phase field + ScaLBL_D3Q7_ComputePhaseField(dvcMap, hq, Den, Phi, rhoA, rhoB, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_WideHalo->Send(Phi); + ScaLBL_Comm_WideHalo->Recv(Phi); + if (BoundaryCondition > 0 && BoundaryCondition < 5){ + ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); + ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); + } + ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL + + ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, + kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + // Set boundary conditions + if (BoundaryCondition == 3){ + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); + } + else if (BoundaryCondition == 4){ + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); + } + else if (BoundaryCondition == 5){ + ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); + } + ScaLBL_D3Q19_AAeven_FreeLeeModel(dvcMap, gqbar, Den, Phi, mu_phi, Velocity, Pressure, ColorGrad, rhoA, rhoB, tauA, tauB, + kappa, beta, W, Fx, Fy, Fz, Nxh, Nxh*Nyh, 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_Comm->Barrier(); + //************************************************************************ + PROFILE_STOP("Update"); + } + PROFILE_STOP("Loop"); + PROFILE_SAVE("lbpm_color_simulator",1); + //************************************************************************ + if (rank==0) printf("-------------------------------------------------------------------\n"); + // Compute the walltime per timestep + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / (EXIT_TIME-START_TIME); + // Performance obtained from each node + double MLUPS = double(Np)/cputime/1000000; + + return MLUPS; +} + +void ScaLBL_FreeLeeModel::Run_SingleFluid(){ int nprocs=nprocx*nprocy*nprocz; const RankInfoStruct rank_info(rank,nprocx,nprocy,nprocz); @@ -369,97 +904,69 @@ void ScaLBL_FreeLeeModel::Run(){ } //.......create and start timer............ - double starttime,stoptime,cputime; - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->Barrier(); comm.barrier(); - starttime = MPI_Wtime(); //......................................... //************ MAIN ITERATION LOOP ***************************************/ PROFILE_START("Loop"); + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax ) { //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } PROFILE_START("Update"); // *************ODD TIMESTEP************* timestep++; - /* // Compute the Phase indicator field - // Read for hq, Bq happens in this routine (requires communication) - ScaLBL_Comm->BiSendD3Q7AA(hq,Bq); //READ FROM NORMAL - ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->BiRecvD3Q7AA(hq,Bq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); - ScaLBL_D3Q7_AAodd_PhaseField(NeighborList, dvcMap, hq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); - + //------------------------------------------------------------------------------------------------------------------- // Perform the collision operation - ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL - if (BoundaryCondition > 0 && BoundaryCondition < 5){ - ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); - ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); - } - // Halo exchange for phase field - ScaLBL_Comm_Regular->SendHalo(Phi); - - ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm_Regular->RecvHalo(Phi); - ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); - // Set BCs + ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FROM NORMAL + ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(NeighborList, gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz, + ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); + // Set boundary conditions + // TODO to be revised! if (BoundaryCondition == 3){ - ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); } if (BoundaryCondition == 4){ - din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); } else if (BoundaryCondition == 5){ - ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); - ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); } - ScaLBL_D3Q19_AAodd_Color(NeighborList, dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - ScaLBL_DeviceBarrier(); - MPI_Barrier(ScaLBL_Comm->MPI_COMM_SCALBL); + ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(NeighborList, gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz, + 0, ScaLBL_Comm->LastExterior(), Np); + ScaLBL_Comm->Barrier(); + // *************EVEN TIMESTEP************* timestep++; - // Compute the Phase indicator field - ScaLBL_Comm->BiSendD3Q7AA(hq,Bq); //READ FROM NORMAL - ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Bq, Den, Phi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm->BiRecvD3Q7AA(hq,Bq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); - ScaLBL_D3Q7_AAeven_PhaseField(dvcMap, hq, Bq, Den, Phi, 0, ScaLBL_Comm->LastExterior(), Np); - + //------------------------------------------------------------------------------------------------------------------- // Perform the collision operation - ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL - // Halo exchange for phase field - if (BoundaryCondition > 0 && BoundaryCondition < 5){ - ScaLBL_Comm->Color_BC_z(dvcMap, Phi, Den, inletA, inletB); - ScaLBL_Comm->Color_BC_Z(dvcMap, Phi, Den, outletA, outletB); - } - ScaLBL_Comm_Regular->SendHalo(Phi); - ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); - ScaLBL_Comm_Regular->RecvHalo(Phi); - ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE - ScaLBL_DeviceBarrier(); + ScaLBL_Comm->SendD3Q19AA(gqbar); //READ FORM NORMAL + ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz, + ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm->RecvD3Q19AA(gqbar); //WRITE INTO OPPOSITE + ScaLBL_Comm->Barrier(); // Set boundary conditions + // TODO to be revised! if (BoundaryCondition == 3){ - ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, gqbar, din, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); } else if (BoundaryCondition == 4){ - din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep); - ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep); + din = ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, gqbar, flux, timestep); + ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, gqbar, dout, timestep); } else if (BoundaryCondition == 5){ - ScaLBL_Comm->D3Q19_Reflection_BC_z(fq); - ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq); + ScaLBL_Comm->D3Q19_Reflection_BC_z(gqbar); + ScaLBL_Comm->D3Q19_Reflection_BC_Z(gqbar); } - ScaLBL_D3Q19_AAeven_Color(dvcMap, fq, hq, Bq, Den, Phi, Velocity, rhoA, rhoB, tauA, tauB, - alpha, beta, Fx, Fy, Fz, Nx, Nx*Ny, 0, ScaLBL_Comm->LastExterior(), Np); - */ + ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(gqbar, Velocity, Pressure, tau, rho0, Fx, Fy, Fz, + 0, ScaLBL_Comm->LastExterior(), Np); ScaLBL_Comm->Barrier(); //************************************************************************ PROFILE_STOP("Update"); @@ -467,10 +974,10 @@ void ScaLBL_FreeLeeModel::Run(){ PROFILE_STOP("Loop"); PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; @@ -484,33 +991,63 @@ void ScaLBL_FreeLeeModel::Run(){ // ************************************************************************ } - -void ScaLBL_FreeLeeModel::WriteDebug(){ +void ScaLBL_FreeLeeModel::WriteDebug_TwoFluid(){ // Copy back final phase indicator field and convert to regular layout - DoubleArray PhaseField(Nx,Ny,Nz); + DoubleArray PhaseData(Nxh,Nyh,Nzh); //ScaLBL_Comm->RegularLayout(Map,Phi,PhaseField); - ScaLBL_CopyToHost(PhaseField.data(), Phi, sizeof(double)*N); + ScaLBL_CopyToHost(PhaseData.data(), Phi, sizeof(double)*Nh); + /* + IntArray MapData(Np); + ScaLBL_CopyToHost(MapData.data(), dvcMap, sizeof(int)*Np); + FILE *MAP; + sprintf(LocalRankFilename,"Map.%05i.raw",rank); + MAP = fopen(LocalRankFilename,"wb"); + fwrite(MapData.data(),4,Np,MAP); + fclose(MAP); + + FILE *NB; + //IntArray Neighbors(18,Np); + //ScaLBL_CopyToHost(Neighbors.data(), NeighborList, sizeof(int)*Np*18); + sprintf(LocalRankFilename,"neighbors.%05i.raw",rank); + NB = fopen(LocalRankFilename,"wb"); + fwrite(NeighborList,4,18*Np,NB); + fclose(NB); + + FILE *DIST; + DoubleArray DistData(7, Np); + ScaLBL_CopyToHost(DistData.data(), hq, 7*sizeof(double)*Np); + sprintf(LocalRankFilename,"h.%05i.raw",rank); + DIST = fopen(LocalRankFilename,"wb"); + fwrite(DistData.data(),8,7*Np,DIST); + fclose(DIST); + + */ FILE *OUTFILE; sprintf(LocalRankFilename,"Phase.%05i.raw",rank); OUTFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,OUTFILE); + fwrite(PhaseData.data(),8,Nh,OUTFILE); fclose(OUTFILE); - ScaLBL_Comm->RegularLayout(Map,&Den[0],PhaseField); + DoubleArray PhaseField(Nx,Ny,Nz); + FILE *DIST; + for (int q=0; q<7; q++){ + ScaLBL_Comm->RegularLayout(Map,&hq[q*Np],PhaseField); + + sprintf(LocalRankFilename,"h%i.%05i.raw",q,rank); + DIST = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,Nx*Ny*Nz,DIST); + fclose(DIST); + + } + + ScaLBL_Comm->RegularLayout(Map,Den,PhaseField); FILE *AFILE; - sprintf(LocalRankFilename,"A.%05i.raw",rank); + sprintf(LocalRankFilename,"Density.%05i.raw",rank); AFILE = fopen(LocalRankFilename,"wb"); fwrite(PhaseField.data(),8,N,AFILE); fclose(AFILE); - ScaLBL_Comm->RegularLayout(Map,&Den[Np],PhaseField); - FILE *BFILE; - sprintf(LocalRankFilename,"B.%05i.raw",rank); - BFILE = fopen(LocalRankFilename,"wb"); - fwrite(PhaseField.data(),8,N,BFILE); - fclose(BFILE); - ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField); FILE *PFILE; sprintf(LocalRankFilename,"Pressure.%05i.raw",rank); @@ -539,7 +1076,7 @@ void ScaLBL_FreeLeeModel::WriteDebug(){ fwrite(PhaseField.data(),8,N,VELZ_FILE); fclose(VELZ_FILE); -/* ScaLBL_Comm->RegularLayout(Map,&ColorGrad[0],PhaseField); + ScaLBL_Comm->RegularLayout(Map,&ColorGrad[0],PhaseField); FILE *CGX_FILE; sprintf(LocalRankFilename,"Gradient_X.%05i.raw",rank); CGX_FILE = fopen(LocalRankFilename,"wb"); @@ -559,5 +1096,187 @@ void ScaLBL_FreeLeeModel::WriteDebug(){ CGZ_FILE = fopen(LocalRankFilename,"wb"); fwrite(PhaseField.data(),8,N,CGZ_FILE); fclose(CGZ_FILE); -*/ + +} + +void ScaLBL_FreeLeeModel::WriteDebug_SingleFluid(){ + + DoubleArray PhaseField(Nx,Ny,Nz); + + // Copy back final phase indicator field and convert to regular layout + ScaLBL_Comm->RegularLayout(Map,Pressure,PhaseField); + FILE *PFILE; + sprintf(LocalRankFilename,"Pressure.%05i.raw",rank); + PFILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,PFILE); + fclose(PFILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[0],PhaseField); + FILE *VELX_FILE; + sprintf(LocalRankFilename,"Velocity_X.%05i.raw",rank); + VELX_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELX_FILE); + fclose(VELX_FILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[Np],PhaseField); + FILE *VELY_FILE; + sprintf(LocalRankFilename,"Velocity_Y.%05i.raw",rank); + VELY_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELY_FILE); + fclose(VELY_FILE); + + ScaLBL_Comm->RegularLayout(Map,&Velocity[2*Np],PhaseField); + FILE *VELZ_FILE; + sprintf(LocalRankFilename,"Velocity_Z.%05i.raw",rank); + VELZ_FILE = fopen(LocalRankFilename,"wb"); + fwrite(PhaseField.data(),8,N,VELZ_FILE); + fclose(VELZ_FILE); +} + +void ScaLBL_FreeLeeModel::Create_DummyPhase_MGTest(){ + // Initialize communication structures in averaging domain + for (int i=0; iid[i] = Mask->id[i]; + Mask->CommInit(); + Np=Mask->PoreCount(); + //........................................................................... + if (rank==0) printf ("Create ScaLBL_Communicator \n"); + // Create a communicator for the device (will use optimized layout) + // ScaLBL_Communicator ScaLBL_Comm(Mask); // original + ScaLBL_Comm = std::shared_ptr(new ScaLBL_Communicator(Mask)); + //ScaLBL_Comm_Regular = std::shared_ptr(new ScaLBL_Communicator(Mask)); + ScaLBL_Comm_WideHalo = std::shared_ptr(new ScaLBLWideHalo_Communicator(Mask,2)); + + // create the layout for the LBM + int Npad=(Np/16 + 2)*16; + if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N); + Map.resize(Nx,Ny,Nz); Map.fill(-2); + auto neighborList= new int[18*Npad]; + Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,1); + comm.barrier(); + + //........................................................................... + // MAIN VARIABLES ALLOCATED HERE + //........................................................................... + // LBM variables + if (rank==0) printf ("Allocating distributions \n"); + //......................device distributions................................. + dist_mem_size = Np*sizeof(double); + neighborSize=18*(Np*sizeof(int)); + //........................................................................... + //ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize); + ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np); + //ScaLBL_AllocateDeviceMemory((void **) &gqbar, 19*dist_mem_size); + //ScaLBL_AllocateDeviceMemory((void **) &hq, 7*dist_mem_size); + //ScaLBL_AllocateDeviceMemory((void **) &mu_phi, dist_mem_size); + //ScaLBL_AllocateDeviceMemory((void **) &Den, dist_mem_size); + ScaLBL_AllocateDeviceMemory((void **) &Phi, sizeof(double)*Nh); + //ScaLBL_AllocateDeviceMemory((void **) &Pressure, sizeof(double)*Np); + //ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np); + ScaLBL_AllocateDeviceMemory((void **) &ColorGrad, 3*sizeof(double)*Np); + //........................................................................... + // Update GPU data structures + if (rank==0) printf ("Setting up device map and neighbor list \n"); + fflush(stdout); + int *TmpMap; + TmpMap=new int[Np]; + for (int k=1; kMap(i,j,k); + } + } + } + // check that TmpMap is valid + for (int idx=0; idxLastExterior(); idx++){ + auto n = TmpMap[idx]; + if (n > Nxh*Nyh*Nzh){ + printf("Bad value! idx=%i \n", n); + TmpMap[idx] = Nxh*Nyh*Nzh-1; + } + } + for (int idx=ScaLBL_Comm->FirstInterior(); idxLastInterior(); idx++){ + auto n = TmpMap[idx]; + if ( n > Nxh*Nyh*Nzh ){ + printf("Bad value! idx=%i \n",n); + TmpMap[idx] = Nxh*Nyh*Nzh-1; + } + } + // copy the device map + ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np); + // copy the neighbor list + //ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); + comm.barrier(); + + double *phase; + phase = new double[Nh]; + + for (int k=0;kid[n] + int x=i-1; + int y=j-1; + int z=k-1; + if (x<0) x=0; + if (y<0) y=0; + if (z<0) z=0; + if (x>=Nx) x=Nx-1; + if (y>=Ny) y=Ny-1; + if (z>=Nz) z=Nz-1; + int n = z*Nx*Ny+y*Nx+x; + phase[nh]=id[n]; + } + } + } + ScaLBL_CopyToDevice(Phi, phase, Nh*sizeof(double)); + ScaLBL_Comm->Barrier(); + comm.barrier(); + delete [] TmpMap; + delete [] neighborList; + delete [] phase; +} + +void ScaLBL_FreeLeeModel::MGTest(){ + + comm.barrier(); + + ScaLBL_Comm_WideHalo->Send(Phi); + ScaLBL_D3Q9_MGTest(dvcMap,Phi,ColorGrad,Nxh,Nxh*Nyh, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); + ScaLBL_Comm_WideHalo->Recv(Phi); + ScaLBL_D3Q9_MGTest(dvcMap,Phi,ColorGrad,Nxh,Nxh*Nyh, 0, ScaLBL_Comm->LastExterior(), Np); + + //check the sum of ColorGrad + double cgx_loc = 0.0; + double cgy_loc = 0.0; + double cgz_loc = 0.0; + double cgx,cgy,cgz; + double *ColorGrad_host; + ColorGrad_host = new double [3*Np]; + ScaLBL_CopyToHost(&ColorGrad_host[0],&ColorGrad[0], 3*Np*sizeof(double)); + for (int i = ScaLBL_Comm->FirstInterior(); iLastInterior();i++){ + cgx_loc+=ColorGrad_host[0*Np+i]; + cgy_loc+=ColorGrad_host[1*Np+i]; + cgz_loc+=ColorGrad_host[2*Np+i]; + } + for (int i = 0; iLastExterior();i++){ + cgx_loc+=ColorGrad_host[0*Np+i]; + cgy_loc+=ColorGrad_host[1*Np+i]; + cgz_loc+=ColorGrad_host[2*Np+i]; + } + cgx=Dm->Comm.sumReduce( cgx_loc); + cgy=Dm->Comm.sumReduce( cgy_loc); + cgz=Dm->Comm.sumReduce( cgz_loc); + if (rank==0){ + printf("Sum of all x-component of the mixed gradient = %.2g \n",cgx); + printf("Sum of all y-component of the mixed gradient = %.2g \n",cgy); + printf("Sum of all z-component of the mixed gradient = %.2g \n",cgz); + } + + delete [] ColorGrad_host; } diff --git a/models/FreeLeeModel.h b/models/FreeLeeModel.h index 5aa2d30a..17cc6323 100644 --- a/models/FreeLeeModel.h +++ b/models/FreeLeeModel.h @@ -16,6 +16,9 @@ Implementation of Lee et al JCP 2016 lattice boltzmann model #include "common/ScaLBL.h" #include "common/WideHalo.h" +#ifndef ScaLBL_FreeLeeModel_INC +#define ScaLBL_FreeLeeModel_INC + class ScaLBL_FreeLeeModel{ public: ScaLBL_FreeLeeModel(int RANK, int NP, const Utilities::MPI& COMM); @@ -26,16 +29,27 @@ public: void ReadParams(std::shared_ptr db0); void SetDomain(); void ReadInput(); - void Create(); - void Initialize(); - void Run(); - void WriteDebug(); + void Create_TwoFluid(); + void Initialize_TwoFluid(); + double Run_TwoFluid(int returntime); + + void WriteDebug_TwoFluid(); + void Create_SingleFluid(); + void Initialize_SingleFluid(); + void Run_SingleFluid(); + + void WriteDebug_SingleFluid(); + // test utilities + void Create_DummyPhase_MGTest(); + void MGTest(); bool Restart,pBC; int timestep,timestepMax; int BoundaryCondition; double tauA,tauB,rhoA,rhoB; - double W,gamma; + double tau, rho0;//only for single-fluid Lee model + double tauM;//relaxation time for phase field (or mass) + double W,gamma,kappa,beta; double Fx,Fy,Fz,flux; double din,dout,inletA,inletB,outletA,outletB; @@ -61,14 +75,18 @@ public: signed char *id; int *NeighborList; int *dvcMap; - double *fq, *hq; + double *gqbar, *hq; double *mu_phi, *Den, *Phi; double *ColorGrad; double *Velocity; double *Pressure; + void getPhase(DoubleArray &PhaseValues); + void getPotential(DoubleArray &PressureValues, DoubleArray &MuValues); + void getVelocity(DoubleArray &Vx, DoubleArray &Vy, DoubleArray &Vz); + DoubleArray SignDist; - + private: Utilities::MPI comm; @@ -81,6 +99,7 @@ private: //int rank,nprocs; void LoadParams(std::shared_ptr db0); + void AssignComponentLabels_ChemPotential_ColorGrad(); }; - +#endif diff --git a/models/GreyscaleColorModel.cpp b/models/GreyscaleColorModel.cpp index dc1e12f9..5d2b4d07 100644 --- a/models/GreyscaleColorModel.cpp +++ b/models/GreyscaleColorModel.cpp @@ -910,10 +910,8 @@ void ScaLBL_GreyscaleColorModel::Run(){ } //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_Comm->Barrier(); comm.barrier(); - starttime = MPI_Wtime(); //......................................... //************ MAIN ITERATION LOOP ***************************************/ @@ -923,6 +921,7 @@ void ScaLBL_GreyscaleColorModel::Run(){ auto current_db = db->cloneDatabase(); //runAnalysis analysis( current_db, rank_info, ScaLBL_Comm, Dm, Np, Regular, Map ); //analysis.createThreads( analysis_method, 4 ); + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax ) { //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } PROFILE_START("Update"); @@ -1319,10 +1318,10 @@ void ScaLBL_GreyscaleColorModel::Run(){ PROFILE_SAVE("lbpm_color_simulator",1); //************************************************************************ ScaLBL_Comm->Barrier(); - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; diff --git a/models/GreyscaleModel.cpp b/models/GreyscaleModel.cpp index 6c580cc5..308cc1e6 100644 --- a/models/GreyscaleModel.cpp +++ b/models/GreyscaleModel.cpp @@ -485,10 +485,8 @@ void ScaLBL_GreyscaleModel::Run(){ } //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); //......................................... Minkowski Morphology(Mask); @@ -500,6 +498,7 @@ void ScaLBL_GreyscaleModel::Run(){ double rlx_eff = 1.0/tau_eff; double error = 1.0; double flow_rate_previous = 0.0; + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax && error > tolerance) { //************************************************************************/ // *************ODD TIMESTEP*************// @@ -744,10 +743,10 @@ void ScaLBL_GreyscaleModel::Run(){ //************************************************************************ ScaLBL_DeviceBarrier(); comm.barrier(); - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; diff --git a/models/IonModel.cpp b/models/IonModel.cpp index bdd07473..67887811 100644 --- a/models/IonModel.cpp +++ b/models/IonModel.cpp @@ -784,7 +784,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField){ //.......create and start timer............ //double starttime,stoptime,cputime; //ScaLBL_Comm->Barrier(); comm.barrier(); - //starttime = MPI_Wtime(); + //auto t1 = std::chrono::system_clock::now(); for (int ic=0; icLastExterior(), Np); } //************************************************************************/ - //stoptime = MPI_Wtime(); //if (rank==0) printf("-------------------------------------------------------------------\n"); //// Compute the walltime per timestep - //cputime = (stoptime - starttime)/timestep; + //auto t2 = std::chrono::system_clock::now(); + //double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; //// Performance obtained from each node //double MLUPS = double(Np)/cputime/1000000; diff --git a/models/MRTModel.cpp b/models/MRTModel.cpp index 3e19b717..e1a451e2 100644 --- a/models/MRTModel.cpp +++ b/models/MRTModel.cpp @@ -26,6 +26,8 @@ void ScaLBL_MRTModel::ReadParams(string filename){ tolerance = 1.0e-8; Fx = Fy = 0.0; Fz = 1.0e-5; + dout = 1.0; + din = 1.0; // Color Model parameters if (mrt_db->keyExists( "timestepMax" )){ @@ -194,7 +196,8 @@ void ScaLBL_MRTModel::Create(){ // copy the neighbor list ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize); comm.barrier(); - + double MLUPS = ScaLBL_Comm->GetPerformance(NeighborList,fq,Np); + printf(" MLPUS=%f from rank %i\n",MLUPS,rank); } void ScaLBL_MRTModel::Initialize(){ @@ -227,14 +230,13 @@ void ScaLBL_MRTModel::Run(){ } //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_DeviceBarrier(); comm.barrier(); - starttime = MPI_Wtime(); if (rank==0) printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax); if (rank==0) printf("********************************************************\n"); timestep=0; double error = 1.0; double flow_rate_previous = 0.0; + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax && error > tolerance) { //************************************************************************/ timestep++; @@ -351,10 +353,10 @@ void ScaLBL_MRTModel::Run(){ } } //************************************************************************/ - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; diff --git a/models/MultiPhysController.cpp b/models/MultiPhysController.cpp index bbc77923..b815383e 100644 --- a/models/MultiPhysController.cpp +++ b/models/MultiPhysController.cpp @@ -2,7 +2,7 @@ ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK),nprocs(NP),Restart(0),timestepMax(0),num_iter_Stokes(0),num_iter_Ion(0), -analysis_interval(0),visualization_interval(0),tolerance(0),comm(COMM) +analysis_interval(0),visualization_interval(0),tolerance(0),time_conv_max(0),comm(COMM) { } @@ -25,6 +25,7 @@ void ScaLBL_Multiphys_Controller::ReadParams(string filename){ analysis_interval = 500; visualization_interval = 10000; tolerance = 1.0e-6; + time_conv_max = 0.0; // load input parameters if (study_db->keyExists( "timestepMax" )){ @@ -135,3 +136,12 @@ vector ScaLBL_Multiphys_Controller::getIonNumIter_PNP_coupling(double Stoke } return num_iter_ion; } + +void ScaLBL_Multiphys_Controller::getTimeConvMax_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv){ + //Return maximum of the time converting factor from Stokes and ion solvers + vector TimeConv; + + TimeConv.assign(IonTimeConv.begin(),IonTimeConv.end()); + TimeConv.insert(TimeConv.begin(),StokesTimeConv); + time_conv_max = *max_element(TimeConv.begin(),TimeConv.end()); +} diff --git a/models/MultiPhysController.h b/models/MultiPhysController.h index 4388d6b9..a9ea7a6b 100644 --- a/models/MultiPhysController.h +++ b/models/MultiPhysController.h @@ -27,6 +27,7 @@ public: int getStokesNumIter_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv); vector getIonNumIter_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv); //void getIonNumIter_PNP_coupling(double StokesTimeConv,vector &IonTimeConv,vector &IonTimeMax); + void getTimeConvMax_PNP_coupling(double StokesTimeConv,const vector &IonTimeConv); bool Restart; int timestepMax; @@ -35,6 +36,7 @@ public: int analysis_interval; int visualization_interval; double tolerance; + double time_conv_max; //double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity int rank,nprocs; diff --git a/models/PoissonSolver.cpp b/models/PoissonSolver.cpp index 186b6224..25a31600 100644 --- a/models/PoissonSolver.cpp +++ b/models/PoissonSolver.cpp @@ -8,8 +8,11 @@ ScaLBL_Poisson::ScaLBL_Poisson(int RANK, int NP, const Utilities::MPI& COMM): rank(RANK), nprocs(NP),timestep(0),timestepMax(0),tau(0),k2_inv(0),tolerance(0),h(0), epsilon0(0),epsilon0_LB(0),epsilonR(0),epsilon_LB(0),Vin(0),Vout(0),Nx(0),Ny(0),Nz(0),N(0),Np(0),analysis_interval(0), -chargeDen_dummy(0),WriteLog(0), -nprocx(0),nprocy(0),nprocz(0),BoundaryCondition(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0),comm(COMM) +chargeDen_dummy(0),WriteLog(0),nprocx(0),nprocy(0),nprocz(0), +BoundaryConditionInlet(0),BoundaryConditionOutlet(0),BoundaryConditionSolid(0),Lx(0),Ly(0),Lz(0), +Vin0(0),freqIn(0),t0_In(0),Vin_Type(0),Vout0(0),freqOut(0),t0_Out(0),Vout_Type(0), +TestPeriodic(0),TestPeriodicTime(0),TestPeriodicTimeConv(0),TestPeriodicSaveInterval(0), +comm(COMM) { } @@ -33,10 +36,12 @@ void ScaLBL_Poisson::ReadParams(string filename){ epsilonR = 78.4;//default dielectric constant of water epsilon_LB = epsilon0_LB*epsilonR;//electric permittivity analysis_interval = 1000; - Vin = 1.0; //Boundary-z (inlet) electric potential - Vout = 1.0; //Boundary-Z (outlet) electric potential chargeDen_dummy = 1.0e-3;//For debugging;unit=[C/m^3] WriteLog = false; + TestPeriodic = false; + TestPeriodicTime = 1.0;//unit: [sec] + TestPeriodicTimeConv = 0.01; //unit [sec/lt] + TestPeriodicSaveInterval = 0.1; //unit [sec] // LB-Poisson Model parameters if (electric_db->keyExists( "timestepMax" )){ @@ -57,6 +62,18 @@ void ScaLBL_Poisson::ReadParams(string filename){ if (electric_db->keyExists( "WriteLog" )){ WriteLog = electric_db->getScalar( "WriteLog" ); } + if (electric_db->keyExists( "TestPeriodic" )){ + TestPeriodic = electric_db->getScalar( "TestPeriodic" ); + } + if (electric_db->keyExists( "TestPeriodicTime" )){ + TestPeriodicTime = electric_db->getScalar( "TestPeriodicTime" ); + } + if (electric_db->keyExists( "TestPeriodicTimeConv" )){ + TestPeriodicTimeConv = electric_db->getScalar( "TestPeriodicTimeConv" ); + } + if (electric_db->keyExists( "TestPeriodicSaveInterval" )){ + TestPeriodicSaveInterval = electric_db->getScalar( "TestPeriodicSaveInterval" ); + } // Read solid boundary condition specific to Poisson equation BoundaryConditionSolid = 1; @@ -65,10 +82,15 @@ void ScaLBL_Poisson::ReadParams(string filename){ } // Read boundary condition for electric potential // BC = 0: normal periodic BC - // BC = 1: fixed inlet and outlet potential - BoundaryCondition = 0; - if (electric_db->keyExists( "BC" )){ - BoundaryCondition = electric_db->getScalar( "BC" ); + // BC = 1: fixed electric potential + // BC = 2: sine/cosine periodic electric potential (need extra input parameters) + BoundaryConditionInlet = 0; + BoundaryConditionOutlet = 0; + if (electric_db->keyExists( "BC_Inlet" )){ + BoundaryConditionInlet = electric_db->getScalar( "BC_Inlet" ); + } + if (electric_db->keyExists( "BC_Outlet" )){ + BoundaryConditionOutlet = electric_db->getScalar( "BC_Outlet" ); } // Read domain parameters @@ -117,8 +139,17 @@ void ScaLBL_Poisson::SetDomain(){ for (int i=0; iid[i] = 1; // initialize this way //Averages = std::shared_ptr ( new TwoPhase(Dm) ); // TwoPhase analysis object comm.barrier(); - Dm->BoundaryCondition = BoundaryCondition; - Mask->BoundaryCondition = BoundaryCondition; + if (BoundaryConditionInlet==0 && BoundaryConditionOutlet==0){ + Dm->BoundaryCondition = 0; + Mask->BoundaryCondition = 0; + } + else if (BoundaryConditionInlet>0 && BoundaryConditionOutlet>0){ + Dm->BoundaryCondition = 1; + Mask->BoundaryCondition = 1; + } + else {//i.e. non-periodic and periodic BCs are mixed + ERROR("Error: check the type of inlet and outlet boundary condition! Mixed periodic and non-periodic BCs are found!\n"); + } Dm->CommInit(); comm.barrier(); @@ -343,15 +374,91 @@ void ScaLBL_Poisson::Create(){ void ScaLBL_Poisson::Potential_Init(double *psi_init){ - if (BoundaryCondition==1){ - if (electric_db->keyExists( "Vin" )){ - Vin = electric_db->getScalar( "Vin" ); - } - if (electric_db->keyExists( "Vout" )){ - Vout = electric_db->getScalar( "Vout" ); - } + //set up default boundary input parameters + Vin0 = Vout0 = 1.0; //unit: [V] + freqIn = freqOut = 50.0; //unit: [Hz] + t0_In = t0_Out = 0.0; //unit: [sec] + Vin_Type = Vout_Type = 1; //1->sin; 2->cos + Vin = 1.0; //Boundary-z (inlet) electric potential + Vout = 1.0; //Boundary-Z (outlet) electric potential + + if (BoundaryConditionInlet>0){ + switch (BoundaryConditionInlet){ + case 1: + if (electric_db->keyExists( "Vin" )){ + Vin = electric_db->getScalar( "Vin" ); + } + if (rank==0) printf("LB-Poisson Solver: inlet boundary; fixed electric potential Vin = %.3g [V]\n",Vin); + break; + case 2: + if (electric_db->keyExists( "Vin0" )){//voltage amplitude; unit: Volt + Vin0 = electric_db->getScalar( "Vin0" ); + } + if (electric_db->keyExists( "freqIn" )){//unit: Hz + freqIn = electric_db->getScalar( "freqIn" ); + } + if (electric_db->keyExists( "t0_In" )){//timestep shift, unit: lt + t0_In = electric_db->getScalar( "t0_In" ); + } + if (electric_db->keyExists( "Vin_Type" )){ + //type=1 -> sine + //tyep=2 -> cosine + Vin_Type = electric_db->getScalar( "Vin_Type" ); + if (Vin_Type>2 || Vin_Type<=0) ERROR("Error: user-input Vin_Type is currently not supported! \n"); + } + if (rank==0){ + if (Vin_Type==1){ + printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Sin[2*pi*%.3g*(t+%.3g)] [V]\n",Vin0,freqIn,t0_In); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin0,freqIn,t0_In); + } + else if (Vin_Type==2){ + printf("LB-Poisson Solver: inlet boundary; periodic electric potential Vin = %.3g*Cos[2*pi*%.3g*(t+%.3g)] [V] \n",Vin0,freqIn,t0_In); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vin0,freqIn,t0_In); + } + } + break; + } + } + if (BoundaryConditionOutlet>0){ + switch (BoundaryConditionOutlet){ + case 1: + if (electric_db->keyExists( "Vout" )){ + Vout = electric_db->getScalar( "Vout" ); + } + if (rank==0) printf("LB-Poisson Solver: outlet boundary; fixed electric potential Vout = %.3g [V] \n",Vout); + break; + case 2: + if (electric_db->keyExists( "Vout0" )){//voltage amplitude; unit: Volt + Vout0 = electric_db->getScalar( "Vout0" ); + } + if (electric_db->keyExists( "freqOut" )){//unit: Hz + freqOut = electric_db->getScalar( "freqOut" ); + } + if (electric_db->keyExists( "t0_Out" )){//timestep shift, unit: lt + t0_Out = electric_db->getScalar( "t0_Out" ); + } + if (electric_db->keyExists( "Vout_Type" )){ + //type=1 -> sine + //tyep=2 -> cosine + Vout_Type = electric_db->getScalar( "Vout_Type" ); + if (Vout_Type>2 || Vin_Type<=0) ERROR("Error: user-input Vout_Type is currently not supported! \n"); + } + if (rank==0){ + if (Vout_Type==1){ + printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Sin[2*pi*%.3g*(t+%.3g)] [V]\n",Vout0,freqOut,t0_Out); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout0,freqOut,t0_Out); + } + else if (Vout_Type==2){ + printf("LB-Poisson Solver: outlet boundary; periodic electric potential Vout = %.3g*Cos[2*pi*%.3g*(t+%.3g)] [V]\n",Vout0,freqOut,t0_Out); + printf(" V0 = %.3g [V], frequency = %.3g [Hz], timestep shift = %.3g [sec] \n",Vout0,freqOut,t0_Out); + } + } + break; + } } //By default only periodic BC is applied and Vin=Vout=1.0, i.e. there is no potential gradient along Z-axis + if (BoundaryConditionInlet==2) Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,0); + if (BoundaryConditionOutlet==2) Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,0); double slope = (Vout-Vin)/(Nz-2); double psi_linearized; for (int k=0;kBarrier(); comm.barrier(); - //starttime = MPI_Wtime(); + //comm.barrier(); + //auto t1 = std::chrono::system_clock::now(); timestep=0; double error = 1.0; @@ -420,13 +533,13 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ // *************ODD TIMESTEP*************// timestep++; - SolveElectricPotentialAAodd();//update electric potential + SolveElectricPotentialAAodd(timestep_from_Study);//update electric potential SolvePoissonAAodd(ChargeDensity);//perform collision ScaLBL_Comm->Barrier(); comm.barrier(); // *************EVEN TIMESTEP*************// timestep++; - SolveElectricPotentialAAeven();//update electric potential + SolveElectricPotentialAAeven(timestep_from_Study);//update electric potential SolvePoissonAAeven(ChargeDensity);//perform collision ScaLBL_Comm->Barrier(); comm.barrier(); //************************************************************************/ @@ -466,11 +579,11 @@ void ScaLBL_Poisson::Run(double *ChargeDensity){ } //************************************************************************/ - //stoptime = MPI_Wtime(); ////if (rank==0) printf("LB-Poission Solver: a steady-state solution is obtained\n"); ////if (rank==0) printf("---------------------------------------------------------------------------\n"); //// Compute the walltime per timestep - //cputime = (stoptime - starttime)/timestep; + //auto t2 = std::chrono::system_clock::now(); + //double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; //// Performance obtained from each node //double MLUPS = double(Np)/cputime/1000000; @@ -506,29 +619,65 @@ void ScaLBL_Poisson::getConvergenceLog(int timestep,double error){ } } -void ScaLBL_Poisson::SolveElectricPotentialAAodd(){ +void ScaLBL_Poisson::SolveElectricPotentialAAodd(int timestep_from_Study){ ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE ScaLBL_Comm->Barrier(); // Set boundary conditions - if (BoundaryCondition == 1){ - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + if (BoundaryConditionInlet > 0){ + switch (BoundaryConditionInlet){ + case 1: + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + break; + case 2: + Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,timestep_from_Study); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + break; + } + } + if (BoundaryConditionOutlet > 0){ + switch (BoundaryConditionOutlet){ + case 1: + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + break; + case 2: + Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,timestep_from_Study); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + break; + } } //-------------------------// ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); } -void ScaLBL_Poisson::SolveElectricPotentialAAeven(){ +void ScaLBL_Poisson::SolveElectricPotentialAAeven(int timestep_from_Study){ ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np); ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE ScaLBL_Comm->Barrier(); // Set boundary conditions - if (BoundaryCondition == 1){ - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); - ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + if (BoundaryConditionInlet > 0){ + switch (BoundaryConditionInlet){ + case 1: + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + break; + case 2: + Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,t0_In,Vin_Type,timestep_from_Study); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep); + break; + } + } + if (BoundaryConditionOutlet > 0){ + switch (BoundaryConditionOutlet){ + case 1: + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + break; + case 2: + Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,t0_Out,Vout_Type,timestep_from_Study); + ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep); + break; + } } //-------------------------// ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np); diff --git a/models/PoissonSolver.h b/models/PoissonSolver.h index 72b43d28..09d4d756 100644 --- a/models/PoissonSolver.h +++ b/models/PoissonSolver.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "common/ScaLBL.h" #include "common/Communication.h" @@ -16,6 +17,7 @@ #include "analysis/Minkowski.h" #include "ProfilerApp.h" +#define _USE_MATH_DEFINES #ifndef ScaLBL_POISSON_INC #define ScaLBL_POISSON_INC @@ -30,8 +32,8 @@ public: void SetDomain(); void ReadInput(); void Create(); - void Initialize(); - void Run(double *ChargeDensity); + void Initialize(double time_conv_from_Study); + void Run(double *ChargeDensity,int timestep_from_Study); void getElectricPotential(DoubleArray &ReturnValues); void getElectricPotential_debug(int timestep); void getElectricField(DoubleArray &Values_x, DoubleArray &Values_y, DoubleArray &Values_z); @@ -41,7 +43,8 @@ public: //bool Restart,pBC; int timestep,timestepMax; int analysis_interval; - int BoundaryCondition; + int BoundaryConditionInlet; + int BoundaryConditionOutlet; int BoundaryConditionSolid; double tau; double tolerance; @@ -50,11 +53,18 @@ public: double Vin, Vout; double chargeDen_dummy;//for debugging bool WriteLog; + double Vin0,freqIn,t0_In,Vin_Type; + double Vout0,freqOut,t0_Out,Vout_Type; + bool TestPeriodic; + double TestPeriodicTime;//unit: [sec] + double TestPeriodicTimeConv; //unit [sec/lt] + double TestPeriodicSaveInterval; //unit [sec] int Nx,Ny,Nz,N,Np; int rank,nprocx,nprocy,nprocz,nprocs; double Lx,Ly,Lz; double h;//image resolution + double time_conv;//phys to LB time converting factor; unit=[sec/lt] std::shared_ptr Dm; // this domain is for analysis std::shared_ptr Mask; // this domain is for lbm @@ -91,12 +101,13 @@ private: void AssignSolidBoundary(double *poisson_solid); void Potential_Init(double *psi_init); void ElectricField_LB_to_Phys(DoubleArray &Efield_reg); - void SolveElectricPotentialAAodd(); - void SolveElectricPotentialAAeven(); + void SolveElectricPotentialAAodd(int timestep_from_Study); + void SolveElectricPotentialAAeven(int timestep_from_Study); //void SolveElectricField(); void SolvePoissonAAodd(double *ChargeDensity); void SolvePoissonAAeven(double *ChargeDensity); void getConvergenceLog(int timestep,double error); + double getBoundaryVoltagefromPeriodicBC(double V0,double freq,double t0,int V_type,int time_step); }; #endif diff --git a/models/StokesModel.cpp b/models/StokesModel.cpp index 50b7fa39..fe6b0c92 100644 --- a/models/StokesModel.cpp +++ b/models/StokesModel.cpp @@ -573,16 +573,14 @@ void ScaLBL_StokesModel::Run(){ } } - //.......create and start timer............ - double starttime,stoptime,cputime; ScaLBL_Comm->Barrier(); comm.barrier(); - starttime = MPI_Wtime(); if (rank==0) printf("****************************************************************\n"); if (rank==0) printf("LB Single-Fluid Navier-Stokes Solver: timestepMax = %i\n", timestepMax); if (rank==0) printf("****************************************************************\n"); timestep=0; double error = 1.0; double flow_rate_previous = 0.0; + auto t1 = std::chrono::system_clock::now(); while (timestep < timestepMax && error > tolerance) { //************************************************************************/ timestep++; @@ -700,10 +698,10 @@ void ScaLBL_StokesModel::Run(){ } } //************************************************************************/ - stoptime = MPI_Wtime(); if (rank==0) printf("-------------------------------------------------------------------\n"); // Compute the walltime per timestep - cputime = (stoptime - starttime)/timestep; + auto t2 = std::chrono::system_clock::now(); + double cputime = std::chrono::duration( t2 - t1 ).count() / timestep; // Performance obtained from each node double MLUPS = double(Np)/cputime/1000000; diff --git a/sample_scripts/config_build_eos b/sample_scripts/config_build_eos index f4d69f26..8c7aeb92 100755 --- a/sample_scripts/config_build_eos +++ b/sample_scripts/config_build_eos @@ -33,7 +33,6 @@ cmake \ -D CMAKE_CXX_STANDARD=14 \ -D USE_TIMER=false \ -D TIMER_DIRECTORY=${HOME}/timerutility/build/opt \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=aprun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D USE_CUDA=0 \ diff --git a/sample_scripts/config_build_rhea b/sample_scripts/config_build_rhea index 0e9b7296..0f5713da 100755 --- a/sample_scripts/config_build_rhea +++ b/sample_scripts/config_build_rhea @@ -27,7 +27,6 @@ cmake \ -D CMAKE_CXX_STD=11 \ -D USE_TIMER=false \ -D TIMER_DIRECTORY=${HOME}/timerutility/build/opt \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=aprun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D USE_CUDA=0 \ diff --git a/sample_scripts/config_build_titan b/sample_scripts/config_build_titan index 18d50e1a..32fd639a 100755 --- a/sample_scripts/config_build_titan +++ b/sample_scripts/config_build_titan @@ -25,7 +25,6 @@ cmake \ -D CMAKE_CUDA_FLAGS="-arch sm_35" \ -D CMAKE_CUDA_HOST_COMPILER="/opt/gcc/6.3.0/bin/gcc" \ -D USE_MPI=1 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=aprun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D USE_SILO=1 \ diff --git a/sample_scripts/config_build_titan_silo b/sample_scripts/config_build_titan_silo index 22069a6c..0a3df511 100755 --- a/sample_scripts/config_build_titan_silo +++ b/sample_scripts/config_build_titan_silo @@ -20,7 +20,6 @@ cmake \ -D CMAKE_CUDA_FLAGS="-arch sm_35" \ -D CMAKE_CUDA_HOST_COMPILER="/opt/gcc/6.3.0/bin/gcc" \ -D USE_MPI=1 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=aprun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D USE_NETCDF=0 \ diff --git a/sample_scripts/config_poplar_hip b/sample_scripts/config_poplar_hip index f1c3159c..5628c074 100755 --- a/sample_scripts/config_poplar_hip +++ b/sample_scripts/config_poplar_hip @@ -36,6 +36,5 @@ cmake \ # MPI_THREAD_MULTIPLE=1 MV2_USE_RDMA_CM=0 MV2_USE_RDMA_CM= MV2_NUM_HCAS=1 MV2_USE_CUDA=1 MV2_ENABLE_AFFINITY=0 srun -n 2 -N 1 --cpu-bind=v -c 1 ./test_MPI -# -D MPI_COMPILER:BOOL=TRUE \ # -D MPIEXEC=mpirun \ # -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ diff --git a/sample_scripts/config_summit_hip b/sample_scripts/config_summit_hip index 23d3d919..b921b14a 100755 --- a/sample_scripts/config_summit_hip +++ b/sample_scripts/config_summit_hip @@ -21,7 +21,6 @@ cmake \ -D HIP_NVCC_OPTIONS="-arch sm_70" \ -D LINK_LIBRARIES="/sw/summit/cuda/9.2.148/lib64/libcudart.so" \ -D USE_MPI=1 \ - -D MPI_COMPILER:BOOL=TRUE \ -D USE_NETCDF=0 \ -D USE_SILO=1 \ -D SILO_DIRECTORY=${TPL_DIR}/silo \ diff --git a/sample_scripts/config_titan b/sample_scripts/config_titan index 8493d58b..f1b02507 100755 --- a/sample_scripts/config_titan +++ b/sample_scripts/config_titan @@ -9,7 +9,6 @@ cmake \ -D CMAKE_CUDA_FLAGS="-arch sm_35" \ -D CMAKE_CUDA_HOST_COMPILER="/usr/bin/gcc" \ -D USE_MPI=1 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=aprun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ ${HOME}/LBPM-WIA diff --git a/sample_scripts/configure_arc_cluster b/sample_scripts/configure_arc_cluster index f4124cf6..e97553dd 100755 --- a/sample_scripts/configure_arc_cluster +++ b/sample_scripts/configure_arc_cluster @@ -10,7 +10,6 @@ cmake \ -D CMAKE_CXX_COMPILER:PATH=mpicxx \ -D CMAKE_C_FLAGS="" \ -D CMAKE_CXX_FLAGS="" \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/configure_arden b/sample_scripts/configure_arden index 057732a9..3d0759aa 100755 --- a/sample_scripts/configure_arden +++ b/sample_scripts/configure_arden @@ -5,7 +5,6 @@ cmake -D CMAKE_C_COMPILER:PATH=/opt/arden/openmpi/3.1.2/bin/mpicc \ -D CMAKE_C_FLAGS="-O3 -fPIC" \ -D CMAKE_CXX_FLAGS="-O3 -fPIC " \ -D CMAKE_CXX_STANDARD=14 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/configure_basic_cluster b/sample_scripts/configure_basic_cluster index 667ebcad..7a861974 100755 --- a/sample_scripts/configure_basic_cluster +++ b/sample_scripts/configure_basic_cluster @@ -8,7 +8,6 @@ cmake \ -D CMAKE_CXX_COMPILER:PATH=mpicxx \ -D CMAKE_C_FLAGS="" \ -D CMAKE_CXX_FLAGS="" \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/configure_blueridge b/sample_scripts/configure_blueridge index 667ebcad..7a861974 100755 --- a/sample_scripts/configure_blueridge +++ b/sample_scripts/configure_blueridge @@ -8,7 +8,6 @@ cmake \ -D CMAKE_CXX_COMPILER:PATH=mpicxx \ -D CMAKE_C_FLAGS="" \ -D CMAKE_CXX_FLAGS="" \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/configure_cascades_cpu b/sample_scripts/configure_cascades_cpu index f6c77004..a6b0e203 100755 --- a/sample_scripts/configure_cascades_cpu +++ b/sample_scripts/configure_cascades_cpu @@ -11,7 +11,6 @@ cmake \ -D CMAKE_CXX_COMPILER:PATH=mpicxx \ -D CMAKE_C_FLAGS="-fPIC" \ -D CMAKE_CXX_FLAGS="-fPIC" \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/configure_desktop b/sample_scripts/configure_desktop index 1e717c98..38f917ad 100755 --- a/sample_scripts/configure_desktop +++ b/sample_scripts/configure_desktop @@ -7,7 +7,6 @@ cmake \ -D CMAKE_C_FLAGS="-g " \ -D CMAKE_CXX_FLAGS="-g " \ -D CMAKE_CXX_STANDARD=14 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/configure_huckleberry b/sample_scripts/configure_huckleberry index 8a4a313a..abe134b0 100755 --- a/sample_scripts/configure_huckleberry +++ b/sample_scripts/configure_huckleberry @@ -12,7 +12,6 @@ cmake \ -D CMAKE_CXX_COMPILER:PATH=mpicxx \ -D CMAKE_C_FLAGS="-fPIC" \ -D CMAKE_CXX_FLAGS="-fPIC" \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ @@ -26,5 +25,5 @@ cmake \ -D USE_TIMER=0 \ ~/LBPM-WIA -make VERBOSE=1 -j8 && make install +make VERBOSE=1 -j4 && make install diff --git a/sample_scripts/configure_titan_jem b/sample_scripts/configure_titan_jem index 8375ad87..a3b730cd 100755 --- a/sample_scripts/configure_titan_jem +++ b/sample_scripts/configure_titan_jem @@ -15,7 +15,6 @@ cmake \ -D CMAKE_C_COMPILER:PATH=cc \ -D CMAKE_CXX_COMPILER:PATH=CC \ -D CMAKE_CXX_COMPILER:PATH=CC \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=aprun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Debug \ diff --git a/sample_scripts/configure_ubuntu b/sample_scripts/configure_ubuntu index 516925d0..c6c3239f 100755 --- a/sample_scripts/configure_ubuntu +++ b/sample_scripts/configure_ubuntu @@ -5,9 +5,7 @@ cmake -D CMAKE_C_COMPILER:PATH=/opt/arden/openmpi/3.1.2/bin/mpicc \ -D CMAKE_C_FLAGS="-O3 -fPIC" \ -D CMAKE_CXX_FLAGS="-O3 -fPIC " \ -D CMAKE_CXX_STANDARD=14 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ - -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ -D CUDA_FLAGS="-arch sm_35" \ -D CUDA_HOST_COMPILER="/usr/bin/gcc" \ @@ -16,7 +14,7 @@ cmake -D CMAKE_C_COMPILER:PATH=/opt/arden/openmpi/3.1.2/bin/mpicc \ -D USE_SILO=1 \ -D SILO_LIB="/opt/arden/silo/4.10.2/lib/libsiloh5.a" \ -D SILO_DIRECTORY="/opt/arden/silo/4.10.2" \ - -D USE_NETCDF=1 \ + -D USE_NETCDF=0 \ -D NETCDF_DIRECTORY="/opt/arden/netcdf/4.6.1" \ -D USE_CUDA=0 \ -D USE_TIMER=0 \ diff --git a/sample_scripts/daedalus_config b/sample_scripts/daedalus_config index f14f3627..f2855433 100755 --- a/sample_scripts/daedalus_config +++ b/sample_scripts/daedalus_config @@ -12,7 +12,6 @@ i -D CMAKE_CXX_COMPILER:PATH=/home/christopher/openmpi/install_dir/bin/mpicxx -D USE_DOXYGEN=false \ # -D CMAKE_C_FLAGS="-std=gnu++11 -w" \ # -D CMAKE_CXX_FLAGS="-std=gnu++11 -w" \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=/home/christopher/openmpi/install_dir/bin/mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Release \ diff --git a/sample_scripts/promethius_config b/sample_scripts/promethius_config index ae26ed94..49f42670 100755 --- a/sample_scripts/promethius_config +++ b/sample_scripts/promethius_config @@ -7,7 +7,6 @@ cmake \ -D CMAKE_C_FLAGS="-g " \ -D CMAKE_CXX_FLAGS="-g -Wno-deprecated-declarations" \ -D CXX_STD=11 \ - -D MPI_COMPILER:BOOL=TRUE \ -D MPIEXEC=mpirun \ -D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \ -D CMAKE_BUILD_TYPE:STRING=Debug \ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 0a8074a3..44f869bf 100755 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,8 +4,10 @@ ADD_LBPM_EXECUTABLE( lbpm_color_simulator ) ADD_LBPM_EXECUTABLE( lbpm_permeability_simulator ) ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator ) -ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator ) ADD_LBPM_EXECUTABLE( lbpm_greyscaleColor_simulator ) +ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator ) +ADD_LBPM_EXECUTABLE( lbpm_freelee_simulator ) +ADD_LBPM_EXECUTABLE( lbpm_freelee_SingleFluidBGK_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator ) #ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator ) ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator ) @@ -33,14 +35,15 @@ ADD_LBPM_EXECUTABLE( GenerateSphereTest ) #ADD_LBPM_EXECUTABLE( BlobAnalysis ) #ADD_LBPM_EXECUTABLE( BlobIdentify ) #ADD_LBPM_EXECUTABLE( BlobIdentifyParallel ) -#ADD_LBPM_EXECUTABLE( convertIO ) -#ADD_LBPM_EXECUTABLE( DataAggregator ) +ADD_LBPM_EXECUTABLE( convertIO ) +ADD_LBPM_EXECUTABLE( DataAggregator ) #ADD_LBPM_EXECUTABLE( BlobAnalyzeParallel )( ADD_LBPM_EXECUTABLE( lbpm_minkowski_scalar ) ADD_LBPM_EXECUTABLE( TestPoissonSolver ) ADD_LBPM_EXECUTABLE( TestIonModel ) ADD_LBPM_EXECUTABLE( TestNernstPlanck ) ADD_LBPM_EXECUTABLE( TestPNP_Stokes ) +ADD_LBPM_EXECUTABLE( TestMixedGrad ) @@ -59,6 +62,7 @@ ADD_LBPM_TEST( TestMap ) ADD_LBPM_TEST( TestWideHalo ) ADD_LBPM_TEST( TestColorGradDFH ) ADD_LBPM_TEST( TestBubbleDFH ../example/Bubble/input.db) +ADD_LBPM_TEST( testGlobalMassFreeLee ../example/Bubble/input.db) #ADD_LBPM_TEST( TestColorMassBounceback ../example/Bubble/input.db) ADD_LBPM_TEST( TestPressVel ../example/Bubble/input.db) ADD_LBPM_TEST( TestPoiseuille ../example/Piston/poiseuille.db) diff --git a/tests/TestMixedGrad.cpp b/tests/TestMixedGrad.cpp new file mode 100644 index 00000000..6baede7c --- /dev/null +++ b/tests/TestMixedGrad.cpp @@ -0,0 +1,199 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "common/Utilities.h" +#include "models/FreeLeeModel.h" + +inline void Initialize_Mask(ScaLBL_FreeLeeModel &LeeModel){ + // initialize a bubble + int i,j,k,n; + int rank = LeeModel.Mask->rank(); + int Nx = LeeModel.Mask->Nx; + int Ny = LeeModel.Mask->Ny; + int Nz = LeeModel.Mask->Nz; + if (rank == 0) printf(" initialize mask...\n"); + + for (k=0;kid[n]=1; + LeeModel.id[n] = LeeModel.Mask->id[n]; + } + } + } +} + + +inline void Initialize_DummyPhaseField(ScaLBL_FreeLeeModel &LeeModel, double ax, double ay, double az){ + // initialize a bubble + int i,j,k,n; + int rank = LeeModel.Mask->rank(); + int Nx = LeeModel.Mask->Nx; + int Ny = LeeModel.Mask->Ny; + int Nz = LeeModel.Mask->Nz; + if (rank == 0) printf("Setting up dummy phase field with gradient {x,y,z} = {%f , %f , %f}...\n",ax,ay,az); + + double * Dummy; + int Nh = (Nx+2)*(Ny+2)*(Nz+2); + Dummy = new double [(Nx+2)*(Ny+2)*(Nz+2)]; + for (k=0;kid[n]=1; + LeeModel.id[n] = LeeModel.Mask->id[n]; + int nh = (k+1)*(Nx+2)*(Ny+2) + (j+1)*(Nx+2) + i+1; + Dummy[nh] = ax*double(i) + ay*double(j) + az*double(k); + } + } + } + ScaLBL_CopyToDevice(LeeModel.Phi, Dummy, sizeof(double)*Nh); + + LeeModel.MGTest(); +} + +inline int MultiHaloNeighborCheck(ScaLBL_FreeLeeModel &LeeModel){ + int i,j,k,iq,stride,nread; + int Nxh = LeeModel.Nxh; + int Nyh = LeeModel.Nyh; + int Np = LeeModel.Np; + + int *TmpMap; + TmpMap = new int[Np]; + ScaLBL_CopyToHost(TmpMap, LeeModel.dvcMap, Np*sizeof(int)); + + int *neighborList; + neighborList = new int[18*Np]; + ScaLBL_CopyToHost(neighborList, LeeModel.NeighborList, 18*Np*sizeof(int)); + printf("Check stride for interior neighbors \n"); + int count = 0; + for (int n=LeeModel.ScaLBL_Comm->FirstInterior(); nLastInterior(); n++){ + // q=0 + int idx = TmpMap[n]; + k = idx/Nxh/Nyh; + j = (idx-k*Nxh*Nyh)/Nxh; + i = (idx-k*Nxh*Nyh -j*Nxh); + + // q=1 + nread = neighborList[n]; + iq = TmpMap[nread%Np]; + stride = idx - iq; + if (stride != 1){ + printf(" %i, %i, %i q = 1 stride=%i \n ",i,j,k,stride); + count++; + } + + // q=2 + nread = neighborList[n+Np]; + iq = TmpMap[nread%Np]; + stride = iq - idx; + if (stride != 1){ + printf(" %i, %i, %i q = 2 stride=%i \n ",i,j,k,stride); + count++; + } + + + // q=3 + nread = neighborList[n+2*Np]; + iq = TmpMap[nread%Np]; + stride = idx - iq; + if (stride != Nxh){ + printf(" %i, %i, %i q = 3 stride=%i \n ",i,j,k,stride); + count++; + } + + // q = 4 + nread = neighborList[n+3*Np]; + iq = TmpMap[nread%Np]; + stride = iq-idx; + if (stride != Nxh){ + printf(" %i, %i, %i q = 4 stride=%i \n ",i,j,k,stride); + count++; + } + + + // q=5 + nread = neighborList[n+4*Np]; + iq = TmpMap[nread%Np]; + stride = idx - iq; + if (stride != Nxh*Nyh){ + count++; + printf(" %i, %i, %i q = 5 stride=%i \n ",i,j,k,stride); + } + + // q = 6 + nread = neighborList[n+5*Np]; + iq = TmpMap[nread%Np]; + stride = iq - idx; + if (stride != Nxh*Nyh){ + count++; + printf(" %i, %i, %i q = 6 stride=%i \n ",i,j,k,stride); + } + + } + return count; +} + +int main( int argc, char **argv ) +{ + + // Initialize + Utilities::startup( argc, argv ); + int errors = 0; + // Load the input database + auto db = std::make_shared( argv[1] ); + + { // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); + + if ( rank == 0 ) { + printf( "********************************************************\n" ); + printf( "Running Mixed Gradient Test \n" ); + printf( "********************************************************\n" ); + } + // Initialize compute device + int device = ScaLBL_SetDevice( rank ); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); + + PROFILE_ENABLE( 1 ); + // PROFILE_ENABLE_TRACE(); + // PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START( "Main" ); + Utilities::setErrorHandlers(); + + auto filename = argv[1]; + ScaLBL_FreeLeeModel LeeModel( rank, nprocs, comm ); + LeeModel.ReadParams( filename ); + LeeModel.SetDomain(); + Initialize_Mask(LeeModel); + //LeeModel.Create_DummyPhase_MGTest(); + LeeModel.Create_TwoFluid(); + + errors=MultiHaloNeighborCheck(LeeModel); + + Initialize_DummyPhaseField(LeeModel,1.0, 2.0, 3.0); + LeeModel.WriteDebug_TwoFluid(); + + PROFILE_STOP( "Main" ); + PROFILE_SAVE( file, level ); + // **************************************************** + + + } // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::shutdown(); + return errors; + +} diff --git a/tests/TestNernstPlanck.cpp b/tests/TestNernstPlanck.cpp index d2145b03..f0f82e52 100644 --- a/tests/TestNernstPlanck.cpp +++ b/tests/TestNernstPlanck.cpp @@ -66,7 +66,7 @@ int main(int argc, char **argv) PoissonSolver.SetDomain(); PoissonSolver.ReadInput(); PoissonSolver.Create(); - PoissonSolver.Initialize(); + PoissonSolver.Initialize(0); int timestep=0; double error = 1.0; @@ -74,7 +74,7 @@ int main(int argc, char **argv) while (timestep < Study.timestepMax && error > Study.tolerance){ timestep++; - PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental + PoissonSolver.Run(IonModel.ChargeDensity,0);//solve Poisson equtaion to get steady-state electrical potental IonModel.Run(IonModel.FluidVelocityDummy,PoissonSolver.ElectricField); //solve for ion transport and electric potential timestep++;//AA operations diff --git a/tests/TestNetcdf.cpp b/tests/TestNetcdf.cpp index 38fe08b3..6d43a04d 100644 --- a/tests/TestNetcdf.cpp +++ b/tests/TestNetcdf.cpp @@ -1,7 +1,7 @@ // Test reading/writing netcdf files #include "IO/netcdf.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "common/Communication.h" #include "common/UnitTest.h" @@ -13,7 +13,8 @@ void load( const std::string& ); void test_NETCDF( UnitTest& ut ) { - const int rank = comm_rank( MPI_COMM_WORLD ); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); int nprocx = 2; int nprocy = 2; int nprocz = 2; @@ -26,11 +27,11 @@ void test_NETCDF( UnitTest& ut ) size_t z = info.kz*data.size(2); const char* filename = "test.nc"; std::vector dim = { (int) data.size(0)*nprocx, (int) data.size(1)*nprocy, (int) data.size(2)*nprocz }; - int fid = netcdf::open( filename, netcdf::CREATE, MPI_COMM_WORLD ); + int fid = netcdf::open( filename, netcdf::CREATE, comm ); auto dims = netcdf::defDim( fid, {"X", "Y", "Z"}, dim ); netcdf::write( fid, "tmp", dims, data, info ); netcdf::close( fid ); - MPI_Barrier( MPI_COMM_WORLD ); + comm.barrier(); // Read the contents of the file we created fid = netcdf::open( filename, netcdf::READ ); Array tmp = netcdf::getVar( fid, "tmp" ); diff --git a/tests/TestPNP_Stokes.cpp b/tests/TestPNP_Stokes.cpp index a3db3c3f..65b796f7 100644 --- a/tests/TestPNP_Stokes.cpp +++ b/tests/TestPNP_Stokes.cpp @@ -82,7 +82,7 @@ int main(int argc, char **argv) PoissonSolver.SetDomain(); PoissonSolver.ReadInput(); PoissonSolver.Create(); - PoissonSolver.Initialize(); + PoissonSolver.Initialize(0); int timestep=0; @@ -94,7 +94,7 @@ int main(int argc, char **argv) while (timestep < Study.timestepMax && error > Study.tolerance){ timestep++; - PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental + PoissonSolver.Run(IonModel.ChargeDensity,0);//solve Poisson equtaion to get steady-state electrical potental StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential diff --git a/tests/TestPoissonSolver.cpp b/tests/TestPoissonSolver.cpp index 701d8a67..38f242c6 100644 --- a/tests/TestPoissonSolver.cpp +++ b/tests/TestPoissonSolver.cpp @@ -51,14 +51,37 @@ int main(int argc, char **argv) PoissonSolver.SetDomain(); PoissonSolver.ReadInput(); PoissonSolver.Create(); - PoissonSolver.Initialize(); + if (PoissonSolver.TestPeriodic==true){ + PoissonSolver.Initialize(PoissonSolver.TestPeriodicTimeConv); + } + else { + PoissonSolver.Initialize(0); + } //Initialize dummy charge density for test PoissonSolver.DummyChargeDensity(); - PoissonSolver.Run(PoissonSolver.ChargeDensityDummy); - PoissonSolver.getElectricPotential_debug(1); - PoissonSolver.getElectricField_debug(1); + if (PoissonSolver.TestPeriodic==true){ + if (rank==0) printf("Testing periodic voltage input is enabled. Total test time is %.3g[s], saving data every %.3g[s]; user-specified time resolution is %.3g[s/lt]\n", + PoissonSolver.TestPeriodicTime,PoissonSolver.TestPeriodicSaveInterval,PoissonSolver.TestPeriodicTimeConv); + int timestep = 0; + int timeMax = int(PoissonSolver.TestPeriodicTime/PoissonSolver.TestPeriodicTimeConv); + int timeSave = int(PoissonSolver.TestPeriodicSaveInterval/PoissonSolver.TestPeriodicTimeConv); + while (timestep +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include "common/UnitTest.h" -#include "common/Utilities.h" -#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Reader.h" #include "IO/Writer.h" #include "ProfilerApp.h" +#include "common/MPI.h" +#include "common/UnitTest.h" +#include "common/Utilities.h" -inline bool approx_equal( const Point& A, const Point& B ) +inline bool approx_equal( const Point &A, const Point &B ) { - double tol = 1e-7*sqrt(A.x*A.x+A.y*A.y+A.z*A.z); - return fabs(A.x-B.x)<=tol && fabs(A.y-B.y)<=tol && fabs(A.z-B.z)<=tol; + double tol = 1e-7 * sqrt( A.x * A.x + A.y * A.y + A.z * A.z ); + return fabs( A.x - B.x ) <= tol && fabs( A.y - B.y ) <= tol && fabs( A.z - B.z ) <= tol; } -inline bool approx_equal( const double& A, const double& B ) +inline bool approx_equal( const double &A, const double &B ) { - return fabs(A-B) <= std::max(1e-7*fabs(A+B),1e-20); + return fabs( A - B ) <= std::max( 1e-7 * fabs( A + B ), 1e-20 ); } -inline double distance( const Point& p ) +inline double distance( const Point &p ) { return sqrt( p.x * p.x + p.y * p.y + p.z * p.z ); } + + +bool checkMesh( const std::vector &meshData, const std::string &format, + std::shared_ptr mesh ) { - return sqrt(p.x*p.x+p.y*p.y+p.z*p.z); + + // Get direct access to the meshes used to test the reader + const auto pointmesh = dynamic_cast( meshData[0].mesh.get() ); + const auto trimesh = dynamic_cast( meshData[1].mesh.get() ); + const auto trilist = dynamic_cast( meshData[2].mesh.get() ); + const auto domain = dynamic_cast( meshData[3].mesh.get() ); + const size_t N_tri = trimesh->A.size(); + if ( mesh->className() == "pointmesh" ) { + // Check the pointmesh + auto pmesh = IO::getPointList( mesh ); + if ( pmesh.get() == NULL ) + return false; + if ( pmesh->points.size() != pointmesh->points.size() ) + return false; + } + if ( mesh->className() == "trimesh" || mesh->className() == "trilist" ) { + // Check the trimesh/trilist + auto mesh1 = IO::getTriMesh( mesh ); + auto mesh2 = IO::getTriList( mesh ); + if ( mesh1.get() == NULL || mesh2.get() == NULL ) + return false; + if ( mesh1->A.size() != N_tri || mesh1->B.size() != N_tri || mesh1->C.size() != N_tri || + mesh2->A.size() != N_tri || mesh2->B.size() != N_tri || mesh2->C.size() != N_tri ) + return false; + const std::vector &P1 = mesh1->vertices->points; + const std::vector &A1 = mesh1->A; + const std::vector &B1 = mesh1->B; + const std::vector &C1 = mesh1->C; + const std::vector &A2 = mesh2->A; + const std::vector &B2 = mesh2->B; + const std::vector &C2 = mesh2->C; + const std::vector &A = trilist->A; + const std::vector &B = trilist->B; + const std::vector &C = trilist->C; + for ( size_t i = 0; i < N_tri; i++ ) { + if ( !approx_equal( P1[A1[i]], A[i] ) || !approx_equal( P1[B1[i]], B[i] ) || + !approx_equal( P1[C1[i]], C[i] ) ) + return false; + if ( !approx_equal( A2[i], A[i] ) || !approx_equal( B2[i], B[i] ) || + !approx_equal( C2[i], C[i] ) ) + return false; + } + } + if ( mesh->className() == "domain" && format != "old" ) { + // Check the domain mesh + const IO::DomainMesh &mesh1 = *std::dynamic_pointer_cast( mesh ); + if ( mesh1.nprocx != domain->nprocx || mesh1.nprocy != domain->nprocy || + mesh1.nprocz != domain->nprocz ) + return false; + if ( mesh1.nx != domain->nx || mesh1.ny != domain->ny || mesh1.nz != domain->nz ) + return false; + if ( mesh1.Lx != domain->Lx || mesh1.Ly != domain->Ly || mesh1.Lz != domain->Lz ) + return false; + } + return true; +} + + +bool checkVar( const std::string &format, std::shared_ptr mesh, + std::shared_ptr variable1, std::shared_ptr variable2 ) +{ + if ( format == "new" ) + IO::reformatVariable( *mesh, *variable2 ); + bool pass = true; + const IO::Variable &var1 = *variable1; + const IO::Variable &var2 = *variable2; + pass = var1.name == var2.name; + pass = pass && var1.dim == var2.dim; + pass = pass && var1.type == var2.type; + pass = pass && var1.data.length() == var2.data.length(); + if ( pass ) { + for ( size_t m = 0; m < var1.data.length(); m++ ) + pass = pass && approx_equal( var1.data( m ), var2.data( m ) ); + } + return pass; } // Test writing and reading the given format -void testWriter( const std::string& format, std::vector& meshData, UnitTest& ut ) +void testWriter( + const std::string &format, std::vector &meshData, UnitTest &ut ) { + PROFILE_SCOPED( path, 0, timer ); + Utilities::MPI comm( MPI_COMM_WORLD ); int nprocs = comm.getSize(); comm.barrier(); + + // Set the path for the writer + std::string path = "test_" + format; + + // Get the format std::string format2 = format; - auto precision = IO::DataType::Double; + auto precision = IO::DataType::Double; if ( format == "silo-double" ) { - format2 = "silo"; + format2 = "silo"; precision = IO::DataType::Double; } else if ( format == "silo-float" ) { - format2 = "silo"; + format2 = "silo"; precision = IO::DataType::Float; } + // Set the precision for the variables - for ( auto& data : meshData ) { + for ( auto &data : meshData ) { data.precision = precision; - for ( auto& var : data.vars ) + for ( auto &var : data.vars ) var->precision = precision; } // Write the data - PROFILE_START(format+"-write"); - IO::initialize( "test_"+format, format2, false ); + IO::initialize( path, format2, false ); IO::writeData( 0, meshData, comm ); IO::writeData( 3, meshData, comm ); comm.barrier(); - PROFILE_STOP(format+"-write"); - // Get the summary name for reading - std::string path = "test_" + format; - std::string summary_name; - if ( format=="old" || format=="new" ) - summary_name = "summary.LBM"; - else if ( format=="silo-float" || format=="silo-double" ) - summary_name = "LBM.visit"; + + // Get a list of the timesteps + auto timesteps = IO::readTimesteps( path, format2 ); + if ( timesteps.size() == 2 ) + ut.passes( format + ": Corrent number of timesteps" ); else - ERROR("Unknown format"); + ut.failure( format + ": Incorrent number of timesteps" ); - // Get direct access to the meshes used to test the reader - const auto pointmesh = dynamic_cast( meshData[0].mesh.get() ); - const auto trimesh = dynamic_cast( meshData[1].mesh.get() ); - const auto trilist = dynamic_cast( meshData[2].mesh.get() ); - const auto domain = dynamic_cast( meshData[3].mesh.get() ); - const size_t N_tri = trimesh->A.size(); - // Get a list of the timesteps - PROFILE_START(format+"-read-timesteps"); - auto timesteps = IO::readTimesteps( path + "/" + summary_name ); - PROFILE_STOP(format+"-read-timesteps"); - if ( timesteps.size()==2 ) - ut.passes(format+": Corrent number of timesteps"); - else - ut.failure(format+": Incorrent number of timesteps"); - - // Check the mesh lists - for ( const auto& timestep : timesteps ) { - // Load the list of meshes and check its size - PROFILE_START(format+"-read-getMeshList"); - auto databaseList = IO::getMeshList(path,timestep); - PROFILE_STOP(format+"-read-getMeshList"); - if ( databaseList.size()==meshData.size() ) - ut.passes(format+": Corrent number of meshes found"); - else - ut.failure(format+": Incorrent number of meshes found"); - // Check the number of domains for each mesh - bool pass = true; - for ( const auto& database : databaseList ) - pass = pass && (int)database.domains.size()==nprocs; - if ( pass ) { - ut.passes(format+": Corrent number of domains for mesh"); - } else { - ut.failure(format+": Incorrent number of domains for mesh"); - continue; + // Test the simple read interface + bool pass = true; + for ( const auto ×tep : timesteps ) { + auto data = IO::readData( path, timestep, comm.getRank() ); + pass = pass && data.size() == meshData.size(); + for ( size_t i = 0; i < data.size(); i++ ) { + pass = pass && checkMesh( meshData, format, data[i].mesh ); } - // For each domain, load the mesh and check its data - for ( const auto& database : databaseList ) { - pass = true; - for (size_t k=0; kpoints.size() != pointmesh->points.size() ) { - pass = false; - break; - } - } - if ( database.name=="trimesh" || database.name=="trilist" ) { - // Check the trimesh/trilist - auto mesh1 = IO::getTriMesh(mesh); - auto mesh2 = IO::getTriList(mesh); - if ( mesh1.get()==NULL || mesh2.get()==NULL ) { - pass = false; - break; - } - if ( mesh1->A.size()!=N_tri || mesh1->B.size()!=N_tri || mesh1->C.size()!=N_tri || - mesh2->A.size()!=N_tri || mesh2->B.size()!=N_tri || mesh2->C.size()!=N_tri ) - { - pass = false; - break; - } - const std::vector& P1 = mesh1->vertices->points; - const std::vector& A1 = mesh1->A; - const std::vector& B1 = mesh1->B; - const std::vector& C1 = mesh1->C; - const std::vector& A2 = mesh2->A; - const std::vector& B2 = mesh2->B; - const std::vector& C2 = mesh2->C; - const std::vector& A = trilist->A; - const std::vector& B = trilist->B; - const std::vector& C = trilist->C; - for (size_t i=0; i(mesh); - if ( mesh1.nprocx!=domain->nprocx || mesh1.nprocy!=domain->nprocy || mesh1.nprocz!=domain->nprocz ) - pass = false; - if ( mesh1.nx!=domain->nx || mesh1.ny!=domain->ny || mesh1.nz!=domain->nz ) - pass = false; - if ( mesh1.Lx!=domain->Lx || mesh1.Ly!=domain->Ly || mesh1.Lz!=domain->Lz ) - pass = false; + } else { + pass = pass && checkMesh( meshData, format, mesh ); } } if ( pass ) { - ut.passes(format+": Mesh \"" + database.name + "\" loaded correctly"); + ut.passes( format + ": Mesh \"" + database.name + "\" loaded correctly" ); } else { - ut.failure(format+": Mesh \"" + database.name + "\" did not load correctly"); + ut.failure( format + ": Mesh \"" + database.name + "\" did not load correctly" ); continue; } // Load the variables and check their data - if ( format=="old" ) - continue; // Old format does not support variables - const IO::MeshDataStruct* mesh0 = NULL; - for (size_t k=0; kvars.size(); v++) { - PROFILE_START(format+"-read-getVariable"); - auto variable = IO::getVariable(path,timestep,database,k,mesh0->vars[v]->name); - if ( format=="new" ) - IO::reformatVariable( *mesh, *variable ); - PROFILE_STOP(format+"-read-getVariable"); - const IO::Variable& var1 = *mesh0->vars[v]; - const IO::Variable& var2 = *variable; - pass = var1.name == var2.name; - pass = pass && var1.dim == var2.dim; - pass = pass && var1.type == var2.type; - pass = pass && var1.data.length() == var2.data.length(); + for ( int k = 0; k < N_domains; k++ ) { + auto mesh = IO::getMesh( path, timestep, database, k ); + for ( size_t v = 0; v < mesh0->vars.size(); v++ ) { + PROFILE_START( format + "-read-getVariable" ); + auto variable = + IO::getVariable( path, timestep, database, k, mesh0->vars[v]->name ); + pass = checkVar( format, mesh, mesh0->vars[v], variable ); if ( pass ) { - for (size_t m=0; mname + "\" matched"); + ut.passes( format + ": Variable \"" + variable->name + "\" matched" ); } else { - ut.failure(format+": Variable \"" + variable->name + "\" did not match"); + ut.failure( + format + ": Variable \"" + variable->name + "\" did not match" ); break; } } @@ -224,157 +237,161 @@ void testWriter( const std::string& format, std::vector& mes // Main -int main(int argc, char **argv) +int main( int argc, char **argv ) { Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); + int rank = comm.getRank(); int nprocs = comm.getSize(); - Utilities::setAbortBehavior(true,2); + Utilities::setAbortBehavior( true, 2 ); Utilities::setErrorHandlers(); UnitTest ut; // Create some points const int N_points = 8; - const int N_tri = 12; - double x[8] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - double y[8] = { 0, 0, 1, 1, 0, 0, 1, 1 }; - double z[8] = { 0, 0, 0, 0, 1, 1, 1, 1 }; - int tri[N_tri][3] = { - {0,1,3}, {0,3,2}, {4,5,7}, {4,7,6}, // z faces - {0,1,4}, {1,4,5}, {2,3,6}, {3,6,7}, // y faces - {0,2,4}, {2,4,6}, {1,3,5}, {3,5,7} // x faces + const int N_tri = 12; + double x[8] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + double y[8] = { 0, 0, 1, 1, 0, 0, 1, 1 }; + double z[8] = { 0, 0, 0, 0, 1, 1, 1, 1 }; + int tri[N_tri][3] = { + { 0, 1, 3 }, { 0, 3, 2 }, { 4, 5, 7 }, { 4, 7, 6 }, // z faces + { 0, 1, 4 }, { 1, 4, 5 }, { 2, 3, 6 }, { 3, 6, 7 }, // y faces + { 0, 2, 4 }, { 2, 4, 6 }, { 1, 3, 5 }, { 3, 5, 7 } // x faces }; // Create the meshes - auto set1 = std::make_shared(N_points); - for (int i=0; i( N_points ); + for ( int i = 0; i < N_points; i++ ) { set1->points[i].x = x[i]; set1->points[i].y = y[i]; set1->points[i].z = z[i]; } - auto trimesh = std::make_shared(N_tri,set1); - for (int i=0; i( N_tri, set1 ); + for ( int i = 0; i < N_tri; i++ ) { trimesh->A[i] = tri[i][0]; trimesh->B[i] = tri[i][1]; trimesh->C[i] = tri[i][2]; } - auto trilist = std::make_shared(*trimesh); - for (int i=0; iA[i],A) || !approx_equal(trilist->B[i],B) || !approx_equal(trilist->C[i],C) ) - { - printf("Failed to create trilist\n"); + auto trilist = std::make_shared( *trimesh ); + for ( int i = 0; i < N_tri; i++ ) { + Point A( x[tri[i][0]], y[tri[i][0]], z[tri[i][0]] ); + Point B( x[tri[i][1]], y[tri[i][1]], z[tri[i][1]] ); + Point C( x[tri[i][2]], y[tri[i][2]], z[tri[i][2]] ); + if ( !approx_equal( trilist->A[i], A ) || !approx_equal( trilist->B[i], B ) || + !approx_equal( trilist->C[i], C ) ) { + printf( "Failed to create trilist\n" ); return -1; } } RankInfoStruct rank_data( rank, nprocs, 1, 1 ); - auto domain = std::make_shared(rank_data,6,7,8,1.0,1.0,1.0); + auto domain = std::make_shared( rank_data, 6, 7, 8, 1.0, 1.0, 1.0 ); // Create the variables const auto NodeVar = IO::VariableType::NodeVariable; const auto VolVar = IO::VariableType::VolumeVariable; - auto set_node_mag = std::make_shared(1,NodeVar,"Node_set_mag"); - auto set_node_vec = std::make_shared(3,NodeVar,"Node_set_vec"); - auto list_node_mag = std::make_shared(1,NodeVar,"Node_list_mag"); - auto list_node_vec = std::make_shared(3,NodeVar,"Node_list_vec"); - auto point_node_mag = std::make_shared(1,NodeVar,"Node_point_mag"); - auto point_node_vec = std::make_shared(3,NodeVar,"Node_point_vec"); - auto domain_node_mag = std::make_shared(1,NodeVar,"Node_domain_mag"); - auto domain_node_vec = std::make_shared(3,NodeVar,"Node_domain_vec"); - auto set_cell_mag = std::make_shared(1,VolVar,"Cell_set_mag"); - auto set_cell_vec = std::make_shared(3,VolVar,"Cell_set_vec"); - auto list_cell_mag = std::make_shared(1,VolVar,"Cell_list_mag"); - auto list_cell_vec = std::make_shared(3,VolVar,"Cell_list_vec"); - auto domain_cell_mag = std::make_shared(1,VolVar,"Cell_domain_mag"); - auto domain_cell_vec = std::make_shared(3,VolVar,"Cell_domain_vec"); + auto set_node_mag = std::make_shared( 1, NodeVar, "Node_set_mag" ); + auto set_node_vec = std::make_shared( 3, NodeVar, "Node_set_vec" ); + auto list_node_mag = std::make_shared( 1, NodeVar, "Node_list_mag" ); + auto list_node_vec = std::make_shared( 3, NodeVar, "Node_list_vec" ); + auto point_node_mag = std::make_shared( 1, NodeVar, "Node_point_mag" ); + auto point_node_vec = std::make_shared( 3, NodeVar, "Node_point_vec" ); + auto domain_node_mag = std::make_shared( 1, NodeVar, "Node_domain_mag" ); + auto domain_node_vec = std::make_shared( 3, NodeVar, "Node_domain_vec" ); + auto set_cell_mag = std::make_shared( 1, VolVar, "Cell_set_mag" ); + auto set_cell_vec = std::make_shared( 3, VolVar, "Cell_set_vec" ); + auto list_cell_mag = std::make_shared( 1, VolVar, "Cell_list_mag" ); + auto list_cell_vec = std::make_shared( 3, VolVar, "Cell_list_vec" ); + auto domain_cell_mag = std::make_shared( 1, VolVar, "Cell_domain_mag" ); + auto domain_cell_vec = std::make_shared( 3, VolVar, "Cell_domain_vec" ); point_node_mag->data.resize( N_points ); point_node_vec->data.resize( N_points, 3 ); - for (int i=0; idata(i) = distance(set1->points[i]); - point_node_vec->data(i,0) = set1->points[i].x; - point_node_vec->data(i,1) = set1->points[i].y; - point_node_vec->data(i,2) = set1->points[i].z; + for ( int i = 0; i < N_points; i++ ) { + point_node_mag->data( i ) = distance( set1->points[i] ); + point_node_vec->data( i, 0 ) = set1->points[i].x; + point_node_vec->data( i, 1 ) = set1->points[i].y; + point_node_vec->data( i, 2 ) = set1->points[i].z; } set_node_mag->data = point_node_mag->data; set_node_vec->data = point_node_vec->data; - list_node_mag->data.resize( 3*N_tri ); - list_node_vec->data.resize( 3*N_tri, 3 ); - for (int i=0; idata(3*i+0) = distance(trilist->A[i]); - list_node_mag->data(3*i+1) = distance(trilist->B[i]); - list_node_mag->data(3*i+2) = distance(trilist->C[i]); - list_node_vec->data(3*i+0,0) = trilist->A[i].x; - list_node_vec->data(3*i+0,1) = trilist->A[i].y; - list_node_vec->data(3*i+0,2) = trilist->A[i].z; - list_node_vec->data(3*i+1,0) = trilist->B[i].x; - list_node_vec->data(3*i+1,1) = trilist->B[i].y; - list_node_vec->data(3*i+1,2) = trilist->B[i].z; - list_node_vec->data(3*i+2,0) = trilist->C[i].x; - list_node_vec->data(3*i+2,1) = trilist->C[i].y; - list_node_vec->data(3*i+2,2) = trilist->C[i].z; + list_node_mag->data.resize( 3 * N_tri ); + list_node_vec->data.resize( 3 * N_tri, 3 ); + for ( int i = 0; i < N_points; i++ ) { + list_node_mag->data( 3 * i + 0 ) = distance( trilist->A[i] ); + list_node_mag->data( 3 * i + 1 ) = distance( trilist->B[i] ); + list_node_mag->data( 3 * i + 2 ) = distance( trilist->C[i] ); + list_node_vec->data( 3 * i + 0, 0 ) = trilist->A[i].x; + list_node_vec->data( 3 * i + 0, 1 ) = trilist->A[i].y; + list_node_vec->data( 3 * i + 0, 2 ) = trilist->A[i].z; + list_node_vec->data( 3 * i + 1, 0 ) = trilist->B[i].x; + list_node_vec->data( 3 * i + 1, 1 ) = trilist->B[i].y; + list_node_vec->data( 3 * i + 1, 2 ) = trilist->B[i].z; + list_node_vec->data( 3 * i + 2, 0 ) = trilist->C[i].x; + list_node_vec->data( 3 * i + 2, 1 ) = trilist->C[i].y; + list_node_vec->data( 3 * i + 2, 2 ) = trilist->C[i].z; } - domain_node_mag->data.resize(domain->nx+1,domain->ny+1,domain->nz+1); - domain_node_vec->data.resize({(size_t)domain->nx+1,(size_t)domain->ny+1,(size_t)domain->nz+1,3}); - for (int i=0; inx+1; i++) { - for (int j=0; jny+1; j++) { - for (int k=0; knz+1; k++) { - domain_node_mag->data(i,j,k) = distance(Point(i,j,k)); - domain_node_vec->data(i,j,k,0) = Point(i,j,k).x; - domain_node_vec->data(i,j,k,1) = Point(i,j,k).y; - domain_node_vec->data(i,j,k,2) = Point(i,j,k).z; + domain_node_mag->data.resize( domain->nx + 1, domain->ny + 1, domain->nz + 1 ); + domain_node_vec->data.resize( + { (size_t) domain->nx + 1, (size_t) domain->ny + 1, (size_t) domain->nz + 1, 3 } ); + for ( int i = 0; i < domain->nx + 1; i++ ) { + for ( int j = 0; j < domain->ny + 1; j++ ) { + for ( int k = 0; k < domain->nz + 1; k++ ) { + domain_node_mag->data( i, j, k ) = distance( Point( i, j, k ) ); + domain_node_vec->data( i, j, k, 0 ) = Point( i, j, k ).x; + domain_node_vec->data( i, j, k, 1 ) = Point( i, j, k ).y; + domain_node_vec->data( i, j, k, 2 ) = Point( i, j, k ).z; } } } set_cell_mag->data.resize( N_tri ); set_cell_vec->data.resize( N_tri, 3 ); - for (int i=0; idata(i) = i; - set_cell_vec->data(i,0) = 3*i+0; - set_cell_vec->data(i,1) = 3*i+1; - set_cell_vec->data(i,2) = 3*i+2; + for ( int i = 0; i < N_tri; i++ ) { + set_cell_mag->data( i ) = i; + set_cell_vec->data( i, 0 ) = 3 * i + 0; + set_cell_vec->data( i, 1 ) = 3 * i + 1; + set_cell_vec->data( i, 2 ) = 3 * i + 2; } list_cell_mag->data = set_cell_mag->data; list_cell_vec->data = set_cell_vec->data; - domain_cell_mag->data.resize(domain->nx,domain->ny,domain->nz); - domain_cell_vec->data.resize({(size_t)domain->nx,(size_t)domain->ny,(size_t)domain->nz,3}); - for (int i=0; inx; i++) { - for (int j=0; jny; j++) { - for (int k=0; knz; k++) { - domain_cell_mag->data(i,j,k) = distance(Point(i,j,k)); - domain_cell_vec->data(i,j,k,0) = Point(i,j,k).x; - domain_cell_vec->data(i,j,k,1) = Point(i,j,k).y; - domain_cell_vec->data(i,j,k,2) = Point(i,j,k).z; + domain_cell_mag->data.resize( domain->nx, domain->ny, domain->nz ); + domain_cell_vec->data.resize( + { (size_t) domain->nx, (size_t) domain->ny, (size_t) domain->nz, 3 } ); + for ( int i = 0; i < domain->nx; i++ ) { + for ( int j = 0; j < domain->ny; j++ ) { + for ( int k = 0; k < domain->nz; k++ ) { + domain_cell_mag->data( i, j, k ) = distance( Point( i, j, k ) ); + domain_cell_vec->data( i, j, k, 0 ) = Point( i, j, k ).x; + domain_cell_vec->data( i, j, k, 1 ) = Point( i, j, k ).y; + domain_cell_vec->data( i, j, k, 2 ) = Point( i, j, k ).z; } } } // Create the MeshDataStruct - std::vector meshData(4); + std::vector meshData( 4 ); meshData[0].meshName = "pointmesh"; - meshData[0].mesh = set1; - meshData[0].vars.push_back(point_node_mag); - meshData[0].vars.push_back(point_node_vec); + meshData[0].mesh = set1; + meshData[0].vars.push_back( point_node_mag ); + meshData[0].vars.push_back( point_node_vec ); meshData[1].meshName = "trimesh"; - meshData[1].mesh = trimesh; - meshData[1].vars.push_back(set_node_mag); - meshData[1].vars.push_back(set_node_vec); - meshData[1].vars.push_back(set_cell_mag); - meshData[1].vars.push_back(set_cell_vec); + meshData[1].mesh = trimesh; + meshData[1].vars.push_back( set_node_mag ); + meshData[1].vars.push_back( set_node_vec ); + meshData[1].vars.push_back( set_cell_mag ); + meshData[1].vars.push_back( set_cell_vec ); meshData[2].meshName = "trilist"; - meshData[2].mesh = trilist; - meshData[2].vars.push_back(list_node_mag); - meshData[2].vars.push_back(list_node_vec); - meshData[2].vars.push_back(list_cell_mag); - meshData[2].vars.push_back(list_cell_vec); + meshData[2].mesh = trilist; + meshData[2].vars.push_back( list_node_mag ); + meshData[2].vars.push_back( list_node_vec ); + meshData[2].vars.push_back( list_cell_mag ); + meshData[2].vars.push_back( list_cell_vec ); meshData[3].meshName = "domain"; - meshData[3].mesh = domain; - meshData[3].vars.push_back(domain_node_mag); - meshData[3].vars.push_back(domain_node_vec); - meshData[3].vars.push_back(domain_cell_mag); - meshData[3].vars.push_back(domain_cell_vec); + meshData[3].mesh = domain; + meshData[3].vars.push_back( domain_node_mag ); + meshData[3].vars.push_back( domain_node_vec ); + meshData[3].vars.push_back( domain_cell_mag ); + meshData[3].vars.push_back( domain_cell_vec ); + for ( const auto &data : meshData ) + ASSERT( data.check( true ) ); // Run the tests testWriter( "old", meshData, ut ); @@ -384,11 +401,9 @@ int main(int argc, char **argv) // Finished ut.report(); - PROFILE_SAVE("TestWriter",true); + PROFILE_SAVE( "TestWriter", true ); int N_errors = ut.NumFailGlobal(); comm.barrier(); Utilities::shutdown(); return N_errors; } - - diff --git a/tests/convertIO.cpp b/tests/convertIO.cpp index a6745263..4ecc8aa9 100644 --- a/tests/convertIO.cpp +++ b/tests/convertIO.cpp @@ -5,85 +5,66 @@ #include #include -#include "common/MPI_Helpers.h" -#include "common/Communication.h" +#include "common/MPI.h" #include "common/Utilities.h" #include "IO/Mesh.h" #include "IO/Reader.h" #include "IO/Writer.h" #include "ProfilerApp.h" - int main(int argc, char **argv) { - // Initialize MPI - Utilities::startup( argc, argv ); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); - Utilities::setErrorHandlers(); - PROFILE_ENABLE(2); - PROFILE_ENABLE_TRACE(); - PROFILE_START("Main"); - { // Limit scope + // Initialize MPI + Utilities::startup( argc, argv ); + Utilities::setErrorHandlers(); + PROFILE_ENABLE(2); + PROFILE_ENABLE_TRACE(); + PROFILE_START("Main"); - // Get inputs - if ( argc != 3 ) { - std::cerr << "Error calling convertIO:\n"; - std::cerr << " convertIO input_file format\n"; - return -1; - } - std::string filename = argv[1]; - std::string format = argv[2]; - std::string path = IO::getPath( filename ); + { // Limit scope - // Read the timesteps - auto timesteps = IO::readTimesteps( filename ); - // Loop through the timesteps, reading/writing the data - IO::initialize( "", format, false ); - for ( auto timestep : timesteps ) { - - // Read the list of MeshDatabase - auto databases = IO::getMeshList( path, timestep ); - - // Build the MeshDataStruct - std::vector meshData(databases.size()); - - // Loop through the database - int i = 0; - PROFILE_START("Read"); - for ( const auto& database : databases ) { - - // Read the appropriate mesh domain - ASSERT( (int) database.domains.size() == nprocs ); - meshData[i].meshName = database.name; - meshData[i].mesh = IO::getMesh( path, timestep, database, rank ); - - // Read the variables - for ( auto var : database.variables ) { - auto varData = IO::getVariable( path, timestep, database, rank, var.name ); - IO::reformatVariable( *meshData[i].mesh, *varData ); - meshData[i].vars.push_back( varData ); - } - - i++; + Utilities::MPI comm( MPI_COMM_WORLD ); + // Get inputs + if ( argc != 5 ) { + std::cerr << "Error calling convertIO:\n"; + std::cerr << " convertIO \n"; + return -1; } - MPI_Barrier(comm); - PROFILE_STOP("Read"); + std::string path_in = argv[1]; + std::string format_in = argv[2]; + std::string path_out = argv[3]; + std::string format_out = argv[4]; - // Save the mesh data to a new file - PROFILE_START("Write"); - IO::writeData( timestep, meshData, MPI_COMM_WORLD ); - MPI_Barrier(comm); - PROFILE_STOP("Write"); - } + // Check that we have enough ranks to load and write the data + // This is really only a bottleneck for the writer + int N_domains = IO::maxDomains( path_in, format_in, comm ); + ASSERT( comm.getSize() == N_domains ); - } // Limit scope - PROFILE_STOP("Main"); - PROFILE_SAVE("convertData",true); - comm.barrier(); - Utilities::shutdown(); - return 0; + // Read the timesteps + auto timesteps = IO::readTimesteps( path_in, format_in ); + + // Loop through the timesteps, reading/writing the data + IO::initialize( path_out, format_out, false ); + for ( auto timestep : timesteps ) { + + // Set the domain to read (needs to be the current rank for the writer to be valid) + int domain = comm.getRank(); + + // Get the maximum number of domains for the + auto data = IO::readData( path_in, timestep, domain ); + + // Save the mesh data to a new file + IO::writeData( timestep, data, comm ); + + } + + } // Limit scope + + // shutdown + PROFILE_STOP("Main"); + PROFILE_SAVE("convertData",true); + Utilities::shutdown(); + return 0; } diff --git a/tests/lbpm_BGK_simulator.cpp b/tests/lbpm_BGK_simulator.cpp index bf8778a6..46002886 100644 --- a/tests/lbpm_BGK_simulator.cpp +++ b/tests/lbpm_BGK_simulator.cpp @@ -30,8 +30,8 @@ int main(int argc, char **argv) int rank,nprocs; Utilities::startup( argc, argv ); Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); + int rank = comm.getRank(); + int nprocs = comm.getSize(); { // parallel domain size (# of sub-domains) int nprocx,nprocy,nprocz; diff --git a/tests/lbpm_color_simulator.cpp b/tests/lbpm_color_simulator.cpp index 1d579486..d62bef0f 100644 --- a/tests/lbpm_color_simulator.cpp +++ b/tests/lbpm_color_simulator.cpp @@ -1,13 +1,13 @@ +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include "models/ColorModel.h" #include "common/Utilities.h" +#include "models/ColorModel.h" //#define WRE_SURFACES @@ -21,63 +21,85 @@ // Implementation of Two-Phase Immiscible LBM using CUDA //************************************************************************* -int main(int argc, char **argv) +int main( int argc, char **argv ) { - - // Initialize MPI - Utilities::startup( argc, argv ); - Utilities::MPI comm( MPI_COMM_WORLD ); - int rank = comm.getRank(); - int nprocs = comm.getSize(); - - // Load the input database - auto db = std::make_shared( argv[1] ); - // Initialize MPI and error handlers - auto multiple = db->getWithDefault( "MPI_THREAD_MULTIPLE", true ); - Utilities::startup( argc, argv, multiple ); - Utilities::MPI::changeProfileLevel( 1 ); + // Initialize + Utilities::startup( argc, argv ); - { // Limit scope so variables that contain communicators will free before MPI_Finialize + { // Limit scope so variables that contain communicators will free before MPI_Finialize - if (rank == 0){ - printf("********************************************************\n"); - printf("Running Color LBM \n"); - printf("********************************************************\n"); - } - // Initialize compute device - int device=ScaLBL_SetDevice(rank); - NULL_USE( device ); - ScaLBL_DeviceBarrier(); - comm.barrier(); + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); + std::string SimulationMode = "production"; + // Load the input database + auto db = std::make_shared( argv[1] ); + if (argc > 2) { + SimulationMode = "development"; + } - PROFILE_ENABLE(1); - //PROFILE_ENABLE_TRACE(); - //PROFILE_ENABLE_MEMORY(); - PROFILE_SYNCHRONIZE(); - PROFILE_START("Main"); - Utilities::setErrorHandlers(); + if ( rank == 0 ) { + printf( "********************************************************\n" ); + printf( "Running Color LBM \n" ); + printf( "********************************************************\n" ); + if (SimulationMode == "development") + printf("**** DEVELOPMENT MODE ENABLED *************\n"); + } + // Initialize compute device + int device = ScaLBL_SetDevice( rank ); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); - auto filename = argv[1]; - ScaLBL_ColorModel ColorModel(rank,nprocs,comm); - ColorModel.ReadParams(filename); - ColorModel.SetDomain(); - ColorModel.ReadInput(); - ColorModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables - ColorModel.Initialize(); // initializing the model will set initial conditions for variables - ColorModel.Run(); - //ColorModel.WriteDebug(); + PROFILE_ENABLE( 1 ); + // PROFILE_ENABLE_TRACE(); + // PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START( "Main" ); + Utilities::setErrorHandlers(); - PROFILE_STOP("Main"); - auto file = db->getWithDefault( "TimerFile", "lbpm_color_simulator" ); - auto level = db->getWithDefault( "TimerLevel", 1 ); - PROFILE_SAVE(file,level); - // **************************************************** + auto filename = argv[1]; + ScaLBL_ColorModel ColorModel( rank, nprocs, comm ); + ColorModel.ReadParams( filename ); + ColorModel.SetDomain(); + ColorModel.ReadInput(); + ColorModel.Create(); // creating the model will create data structure to match the pore + // structure and allocate variables + ColorModel.Initialize(); // initializing the model will set initial conditions for variables + + if (SimulationMode == "development"){ + double MLUPS=0.0; + int timestep = 0; + int analysis_interval = ColorModel.timestepMax; + if (ColorModel.analysis_db->keyExists( "" )){ + analysis_interval = ColorModel.analysis_db->getScalar( "analysis_interval" ); + } + FlowAdaptor Adapt(ColorModel); + runAnalysis analysis(ColorModel); + while (ColorModel.timestep < ColorModel.timestepMax){ + timestep += analysis_interval; + MLUPS = ColorModel.Run(timestep); + if (rank==0) printf("Lattice update rate (per MPI process)= %f MLUPS \n", MLUPS); + + Adapt.MoveInterface(ColorModel); + } + } //Analysis.WriteVis(LeeModel,LeeModel.db, timestep); + + else + ColorModel.Run(); + + ColorModel.WriteDebug(); + + PROFILE_STOP( "Main" ); + auto file = db->getWithDefault( "TimerFile", "lbpm_color_simulator" ); + auto level = db->getWithDefault( "TimerLevel", 1 ); + PROFILE_SAVE( file, level ); + // **************************************************** - } // Limit scope so variables that contain communicators will free before MPI_Finialize + } // Limit scope so variables that contain communicators will free before MPI_Finialize - Utilities::shutdown(); + Utilities::shutdown(); + return 0; } - - diff --git a/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp index 600d9f2f..caeef89a 100644 --- a/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp +++ b/tests/lbpm_electrokinetic_SingleFluid_simulator.cpp @@ -79,20 +79,22 @@ int main(int argc, char **argv) IonModel.timestepMax = Study.getIonNumIter_PNP_coupling(StokesModel.time_conv,IonModel.time_conv); IonModel.Initialize(); + // Get maximal time converting factor based on Sotkes and Ion solvers + Study.getTimeConvMax_PNP_coupling(StokesModel.time_conv,IonModel.time_conv); // Initialize LB-Poisson model PoissonSolver.ReadParams(filename); PoissonSolver.SetDomain(); PoissonSolver.ReadInput(); PoissonSolver.Create(); - PoissonSolver.Initialize(); + PoissonSolver.Initialize(Study.time_conv_max); int timestep=0; while (timestep < Study.timestepMax){ timestep++; - PoissonSolver.Run(IonModel.ChargeDensity);//solve Poisson equtaion to get steady-state electrical potental + PoissonSolver.Run(IonModel.ChargeDensity,timestep);//solve Poisson equtaion to get steady-state electrical potental StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity IonModel.Run(StokesModel.Velocity,PoissonSolver.ElectricField); //solve for ion transport and electric potential diff --git a/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp b/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp new file mode 100644 index 00000000..19d99b9c --- /dev/null +++ b/tests/lbpm_freelee_SingleFluidBGK_simulator.cpp @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "common/Utilities.h" +#include "models/FreeLeeModel.h" + +//******************************************************************* +// Implementation of Free-Energy Two-Phase LBM (Lee model) +//******************************************************************* + +int main( int argc, char **argv ) +{ + + // Initialize + Utilities::startup( argc, argv ); + + // Load the input database + auto db = std::make_shared( argv[1] ); + + { // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); + + if (rank == 0){ + printf("********************************************************\n"); + printf("Running Single-Fluid Solver based on Lee LBM \n"); + printf("********************************************************\n"); + } + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); + + PROFILE_ENABLE(1); + //PROFILE_ENABLE_TRACE(); + //PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START("Main"); + Utilities::setErrorHandlers(); + + auto filename = argv[1]; + ScaLBL_FreeLeeModel LeeModel( rank,nprocs,comm ); + LeeModel.ReadParams( filename ); + LeeModel.SetDomain(); + LeeModel.ReadInput(); + LeeModel.Create_SingleFluid(); + LeeModel.Initialize_SingleFluid(); + LeeModel.Run_SingleFluid(); + LeeModel.WriteDebug_SingleFluid(); + + PROFILE_STOP("Main"); + auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_SingleFluidBGK_simulator" ); + auto level = db->getWithDefault( "TimerLevel", 1 ); + PROFILE_SAVE( file,level ); + // **************************************************** + + + } // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::shutdown(); + return 0; +} diff --git a/tests/lbpm_freelee_simulator.cpp b/tests/lbpm_freelee_simulator.cpp new file mode 100644 index 00000000..0f003baa --- /dev/null +++ b/tests/lbpm_freelee_simulator.cpp @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "common/Utilities.h" +#include "models/FreeLeeModel.h" +#include "analysis/FreeEnergy.h" + +//******************************************************************* +// Implementation of Free-Energy Two-Phase LBM (Lee model) +//******************************************************************* + +int main( int argc, char **argv ) +{ + + // Initialize + Utilities::startup( argc, argv ); + + // Load the input database + auto db = std::make_shared( argv[1] ); + + { // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); + + if (rank == 0){ + printf("********************************************************\n"); + printf("Running Free Energy Lee LBM \n"); + printf("********************************************************\n"); + } + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); + + PROFILE_ENABLE(1); + //PROFILE_ENABLE_TRACE(); + //PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START("Main"); + Utilities::setErrorHandlers(); + + auto filename = argv[1]; + ScaLBL_FreeLeeModel LeeModel( rank,nprocs,comm ); + LeeModel.ReadParams( filename ); + LeeModel.SetDomain(); + LeeModel.ReadInput(); + LeeModel.Create_TwoFluid(); + + FreeEnergyAnalyzer Analysis(LeeModel.Dm); + + LeeModel.Initialize_TwoFluid(); + + /*** RUN MAIN TIMESTEPS HERE ************/ + double MLUPS=0.0; + int timestep = 0; + int visualization_time = LeeModel.timestepMax; + if (LeeModel.vis_db->keyExists( "visualization_interval" )){ + visualization_time = LeeModel.vis_db->getScalar( "visualization_interval" ); + timestep += visualization_time; + } + while (LeeModel.timestep < LeeModel.timestepMax){ + MLUPS = LeeModel.Run_TwoFluid(timestep); + if (rank==0) printf("Lattice update rate (per MPI process)= %f MLUPS \n", MLUPS); + Analysis.WriteVis(LeeModel,LeeModel.db, timestep); + timestep += visualization_time; + } + //LeeModel.WriteDebug_TwoFluid(); + if (rank==0) printf("********************************************************\n"); + if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS); + MLUPS *= nprocs; + if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS); + if (rank==0) printf("********************************************************\n"); + // ************************************************************************ + + PROFILE_STOP("Main"); + auto file = db->getWithDefault( "TimerFile", "lbpm_freelee_simulator" ); + auto level = db->getWithDefault( "TimerLevel", 1 ); + PROFILE_SAVE( file,level ); + // **************************************************** + + + } // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::shutdown(); + return 0; +} diff --git a/tests/lbpm_morph_pp.cpp b/tests/lbpm_morph_pp.cpp index 12f6f319..e40dd6e0 100644 --- a/tests/lbpm_morph_pp.cpp +++ b/tests/lbpm_morph_pp.cpp @@ -128,7 +128,6 @@ int main(int argc, char **argv) comm.barrier(); // Extract only the connected part of NWP - BlobIDstruct new_index; double vF=0.0; double vS=0.0; ComputeGlobalBlobIDs(nx-2,ny-2,nz-2,Dm->rank_info,phase,SignDist,vF,vS,phase_label,Dm->Comm); Dm->Comm.barrier(); diff --git a/tests/lbpm_uCT_pp.cpp b/tests/lbpm_uCT_pp.cpp index dbf9684b..b5d42e82 100644 --- a/tests/lbpm_uCT_pp.cpp +++ b/tests/lbpm_uCT_pp.cpp @@ -14,7 +14,7 @@ #include "common/Array.h" #include "common/Domain.h" #include "common/Communication.h" -#include "common/MPI_Helpers.h" +#include "common/MPI.h" #include "IO/MeshDatabase.h" #include "IO/Mesh.h" #include "IO/Writer.h" @@ -192,7 +192,7 @@ int main(int argc, char **argv) fillFloat[0]->fill( LOCVOL[0] ); } netcdf::close( fid ); - MPI_Barrier(comm); + comm.barrier(); PROFILE_STOP("ReadVolume"); if (rank==0) printf("Read complete\n"); @@ -255,15 +255,15 @@ int main(int argc, char **argv) } } } - count_plus=sumReduce( Dm[0]->Comm, count_plus); - count_minus=sumReduce( Dm[0]->Comm, count_minus); + count_plus = Dm[0]->Comm.sumReduce( count_plus); + count_minus = Dm[0]->Comm.sumReduce( count_minus); if (rank==0) printf("minimum value=%f, max value=%f \n",min_value,max_value); if (rank==0) printf("plus=%i, minus=%i \n",count_plus,count_minus); ASSERT( count_plus > 0 && count_minus > 0 ); - MPI_Barrier(comm); - mean_plus = sumReduce( Dm[0]->Comm, mean_plus ) / count_plus; - mean_minus = sumReduce( Dm[0]->Comm, mean_minus ) / count_minus; - MPI_Barrier(comm); + comm.barrier(); + mean_plus = Dm[0]->Comm.sumReduce( mean_plus ) / count_plus; + mean_minus = Dm[0]->Comm.sumReduce( mean_minus ) / count_minus; + comm.barrier(); if (rank==0) printf(" Region 1 mean (+): %f, Region 2 mean (-): %f \n",mean_plus, mean_minus); //if (rank==0) printf("Scale the input data (size = %i) \n",LOCVOL[0].length()); @@ -284,7 +284,7 @@ int main(int argc, char **argv) // Fill the source data for the coarse meshes if (rank==0) printf("Coarsen the mesh for N_levels=%i \n",N_levels); - MPI_Barrier(comm); + comm.barrier(); PROFILE_START("CoarsenMesh"); for (int i=1; i filter(ratio[0],ratio[1],ratio[2]); @@ -300,7 +300,7 @@ int main(int argc, char **argv) printf(" filter_x=%i, filter_y=%i, filter_z=%i \n",int(filter.size(0)),int(filter.size(1)),int(filter.size(2)) ); printf(" ratio= %i,%i,%i \n",int(ratio[0]),int(ratio[1]),int(ratio[2]) ); } - MPI_Barrier(comm); + comm.barrier(); } PROFILE_STOP("CoarsenMesh"); @@ -312,7 +312,7 @@ int main(int argc, char **argv) NonLocalMean.back(), *fillFloat.back(), *Dm.back(), nprocx, rough_cutoff, lamda, nlm_sigsq, nlm_depth); PROFILE_STOP("Solve coarse mesh"); - MPI_Barrier(comm); + comm.barrier(); // Refine the solution PROFILE_START("Refine distance"); @@ -326,7 +326,7 @@ int main(int argc, char **argv) rough_cutoff, lamda, nlm_sigsq, nlm_depth); } PROFILE_STOP("Refine distance"); - MPI_Barrier(comm); + comm.barrier(); // Perform a final filter PROFILE_START("Filtering final domains"); @@ -424,14 +424,14 @@ int main(int argc, char **argv) meshData[0].vars.push_back(filter_Dist2_var); fillDouble[0]->copy( filter_Dist2, filter_Dist2_var->data ); #endif - MPI_Barrier(comm); + comm.barrier(); if (rank==0) printf("Writing output \n"); // Write visulization data IO::writeData( 0, meshData, comm ); if (rank==0) printf("Finished. \n"); // Compute the Minkowski functionals - MPI_Barrier(comm); + comm.barrier(); auto Averages = std::make_shared(Dm[0]); Array phase_label(Nx[0]+2,Ny[0]+2,Nz[0]+2); diff --git a/tests/testGlobalMassFreeLee.cpp b/tests/testGlobalMassFreeLee.cpp new file mode 100644 index 00000000..2e976854 --- /dev/null +++ b/tests/testGlobalMassFreeLee.cpp @@ -0,0 +1,101 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "common/Utilities.h" +#include "models/FreeLeeModel.h" + +//******************************************************************* +// Implementation of Free-Energy Two-Phase LBM (Lee model) +//******************************************************************* + +int main( int argc, char **argv ) +{ + + // Initialize + Utilities::startup( argc, argv ); + + // Load the input database + auto db = std::make_shared( argv[1] ); + + { // Limit scope so variables that contain communicators will free before MPI_Finialize + + Utilities::MPI comm( MPI_COMM_WORLD ); + int rank = comm.getRank(); + int nprocs = comm.getSize(); + + if (rank == 0){ + printf("********************************************************\n"); + printf("Running Free Energy Lee LBM \n"); + printf("********************************************************\n"); + } + // Initialize compute device + int device=ScaLBL_SetDevice(rank); + NULL_USE( device ); + ScaLBL_DeviceBarrier(); + comm.barrier(); + + PROFILE_ENABLE(1); + //PROFILE_ENABLE_TRACE(); + //PROFILE_ENABLE_MEMORY(); + PROFILE_SYNCHRONIZE(); + PROFILE_START("Main"); + Utilities::setErrorHandlers(); + + auto filename = argv[1]; + ScaLBL_FreeLeeModel LeeModel( rank,nprocs,comm ); + LeeModel.ReadParams( filename ); + LeeModel.SetDomain(); + LeeModel.ReadInput(); + LeeModel.Create_TwoFluid(); + LeeModel.Initialize_TwoFluid(); + /* check neighbors */ + + + /* Copy the initial density to test that global mass is conserved */ + int Nx = LeeModel.Dm->Nx; + int Ny = LeeModel.Dm->Ny; + int Nz = LeeModel.Dm->Nz; + DoubleArray DensityInit(Nx,Ny,Nz); + LeeModel.ScaLBL_Comm->RegularLayout(LeeModel.Map,LeeModel.Den,DensityInit); + + double MLUPS = LeeModel.Run_TwoFluid(LeeModel.timestepMax); + + DoubleArray DensityFinal(Nx,Ny,Nz); + LeeModel.ScaLBL_Comm->RegularLayout(LeeModel.Map,LeeModel.Den,DensityFinal); + + DoubleArray DensityChange(Nx,Ny,Nz); + double totalChange=0.0; + for (int k=1; k