diff --git a/tests/lbpm_color_simulator.h b/analysis/runAnalysis.cpp similarity index 54% rename from tests/lbpm_color_simulator.h rename to analysis/runAnalysis.cpp index c17fa0ec..15e7c95d 100644 --- a/tests/lbpm_color_simulator.h +++ b/analysis/runAnalysis.cpp @@ -1,22 +1,22 @@ // Run the analysis, blob identification, and write restart files +#include "analysis/runAnalysis.h" +#include "analysis/analysis.h" #include "common/Array.h" #include "common/Communication.h" #include "common/MPI_Helpers.h" #include "common/ScaLBL.h" #include "IO/MeshDatabase.h" +#include "threadpool/thread_pool.h" + +#include "ProfilerApp.h" -//#define ANALYSIS_INTERVAL 6 -//#define ANALYSIS_INTERVAL 1000 -//#define BLOBID_INTERVAL 1000 -enum class AnalysisType : uint64_t { AnalyzeNone=0, IdentifyBlobs=0x01, CopyPhaseIndicator=0x02, - CopySimState=0x04, ComputeAverages=0x08, CreateRestart=0x10, WriteVis=0x20 }; AnalysisType& operator |=(AnalysisType &lhs, AnalysisType rhs) { - lhs = static_cast ( + lhs = static_cast( static_cast::type>(lhs) | - static_cast::type>(rhs) + static_cast::type>(rhs) ); return lhs; } @@ -34,15 +34,6 @@ void DeleteArray( const TYPE *p ) } -// Structure used to store ids -struct AnalysisWaitIdStruct { - ThreadPool::thread_id_t blobID; - ThreadPool::thread_id_t analysis; - ThreadPool::thread_id_t vis; - ThreadPool::thread_id_t restart; -}; - - // Helper class to write the restart file from a seperate thread class WriteRestartWorkItem: public ThreadPool::WorkItemRet { @@ -65,27 +56,24 @@ private: // Helper class to compute the blob ids static const std::string id_map_filename = "lbpm_id_map.txt"; -typedef std::shared_ptr > BlobIDstruct; -typedef std::shared_ptr > BlobIDList; class BlobIdentificationWorkItem1: public ThreadPool::WorkItemRet { public: BlobIdentificationWorkItem1( int timestep_, int Nx_, int Ny_, int Nz_, const RankInfoStruct& rank_info_, std::shared_ptr phase_, const DoubleArray& dist_, - BlobIDstruct last_id_, BlobIDstruct new_index_, BlobIDstruct new_id_, BlobIDList new_list_ ): + BlobIDstruct last_id_, BlobIDstruct new_index_, BlobIDstruct new_id_, BlobIDList new_list_, runAnalysis::commWrapper&& comm_ ): timestep(timestep_), Nx(Nx_), Ny(Ny_), Nz(Nz_), rank_info(rank_info_), - phase(phase_), dist(dist_), last_id(last_id_), new_index(new_index_), new_id(new_id_), new_list(new_list_) + phase(phase_), dist(dist_), last_id(last_id_), new_index(new_index_), new_id(new_id_), new_list(new_list_), comm(std::move(comm_)) { - MPI_Comm_dup(MPI_COMM_WORLD,&newcomm); } - ~BlobIdentificationWorkItem1() { MPI_Comm_free(&newcomm); } + ~BlobIdentificationWorkItem1() { } virtual void run() { // Compute the global blob id and compare to the previous version PROFILE_START("Identify blobs",1); double vF = 0.0; double vS = -1.0; // one voxel buffer region around solid IntArray& ids = new_index->second; - new_index->first = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,*phase,dist,vF,vS,ids,newcomm); + new_index->first = ComputeGlobalBlobIDs(Nx-2,Ny-2,Nz-2,rank_info,*phase,dist,vF,vS,ids,comm.comm); PROFILE_STOP("Identify blobs",1); } private: @@ -97,20 +85,19 @@ private: const DoubleArray& dist; BlobIDstruct last_id, new_index, new_id; BlobIDList new_list; - MPI_Comm newcomm; + runAnalysis::commWrapper comm; }; class BlobIdentificationWorkItem2: public ThreadPool::WorkItemRet { public: BlobIdentificationWorkItem2( int timestep_, int Nx_, int Ny_, int Nz_, const RankInfoStruct& rank_info_, std::shared_ptr phase_, const DoubleArray& dist_, - BlobIDstruct last_id_, BlobIDstruct new_index_, BlobIDstruct new_id_, BlobIDList new_list_ ): + BlobIDstruct last_id_, BlobIDstruct new_index_, BlobIDstruct new_id_, BlobIDList new_list_ , runAnalysis::commWrapper&& comm_ ): timestep(timestep_), Nx(Nx_), Ny(Ny_), Nz(Nz_), rank_info(rank_info_), - phase(phase_), dist(dist_), last_id(last_id_), new_index(new_index_), new_id(new_id_), new_list(new_list_) + phase(phase_), dist(dist_), last_id(last_id_), new_index(new_index_), new_id(new_id_), new_list(new_list_), comm(std::move(comm_)) { - MPI_Comm_dup(MPI_COMM_WORLD,&newcomm); } - ~BlobIdentificationWorkItem2() { MPI_Comm_free(&newcomm); } + ~BlobIdentificationWorkItem2() { } virtual void run() { // Compute the global blob id and compare to the previous version PROFILE_START("Identify blobs maps",1); @@ -121,7 +108,7 @@ public: if ( last_id.get()!=NULL ) { // Compute the timestep-timestep map const IntArray& old_ids = last_id->second; - ID_map_struct map = computeIDMap(Nx,Ny,Nz,old_ids,ids,newcomm); + ID_map_struct map = computeIDMap(Nx,Ny,Nz,old_ids,ids,comm.comm); // Renumber the current timestep's ids getNewIDs(map,max_id,*new_list); renumberIDs(*new_list,new_id->second); @@ -143,7 +130,7 @@ private: const DoubleArray& dist; BlobIDstruct last_id, new_index, new_id; BlobIDList new_list; - MPI_Comm newcomm; + runAnalysis::commWrapper comm; }; @@ -152,12 +139,11 @@ class WriteVisWorkItem: public ThreadPool::WorkItemRet { public: WriteVisWorkItem( int timestep_, std::vector& visData_, - TwoPhase& Avgerages_, fillHalo& fillData_ ): - timestep(timestep_), visData(visData_), Averages(Avgerages_), fillData(fillData_) + TwoPhase& Avgerages_, fillHalo& fillData_, runAnalysis::commWrapper&& comm_ ): + timestep(timestep_), visData(visData_), Averages(Avgerages_), fillData(fillData_), comm(std::move(comm_)) { - MPI_Comm_dup(MPI_COMM_WORLD,&newcomm); } - ~WriteVisWorkItem() { MPI_Comm_free(&newcomm); } + ~WriteVisWorkItem() { } virtual void run() { PROFILE_START("Save Vis",1); ASSERT(visData[0].vars[0]->name=="phase"); @@ -172,7 +158,7 @@ public: fillData.copy(Averages.Press,PressData); fillData.copy(Averages.SDs,SignData); fillData.copy(Averages.Label_NWP,BlobData); - IO::writeData( timestep, visData, newcomm ); + IO::writeData( timestep, visData, comm.comm ); PROFILE_STOP("Save Vis",1); }; private: @@ -181,7 +167,7 @@ private: std::vector& visData; TwoPhase& Averages; fillHalo& fillData; - MPI_Comm newcomm; + runAnalysis::commWrapper comm; }; @@ -233,25 +219,177 @@ private: }; -// Function to start the analysis -void run_analysis( int timestep, int restart_interval, int ANALYSIS_INTERVAL, int BLOBID_INTERVAL, - const RankInfoStruct& rank_info, ScaLBL_Communicator &ScaLBL_Comm, TwoPhase& Averages, - BlobIDstruct& last_ids, BlobIDstruct& last_index, BlobIDList& last_id_map, - int Np, int Nx, int Ny, int Nz, bool pBC, double beta, double err, - const double *Phi, double *Pressure, double *Velocity, - IntArray Map, double *fq, double *Den, - const char *LocalRestartFile, std::vector& visData, fillHalo& fillData, - ThreadPool& tpool, AnalysisWaitIdStruct& wait ) +/****************************************************************** + * MPI comm wrapper for use with analysis * + ******************************************************************/ +runAnalysis::commWrapper::commWrapper( int tag_, MPI_Comm comm_, runAnalysis* analysis_ ): + tag(tag_), + comm(comm_), + analysis(analysis_) { - int N = Nx*Ny*Nz; +} +runAnalysis::commWrapper::commWrapper( commWrapper &&rhs ): + tag(rhs.tag), + comm(rhs.comm), + analysis(rhs.analysis) +{ + rhs.tag = -1; +} +runAnalysis::commWrapper::~commWrapper() +{ + if ( tag == -1 ) + return; + MPI_Barrier( comm ); + analysis->d_comm_used[tag] = false; +} +runAnalysis::commWrapper runAnalysis::getComm( ) +{ + // Get a tag from root + int tag = -1; + if ( d_rank == 0 ) { + for (int i=0; i<1024; i++) { + if ( !d_comm_used[i] ) { + tag = i; + break; + } + } + if ( tag == -1 ) + ERROR("Unable to get comm"); + } + MPI_Bcast( &tag, 1, MPI_INT, 0, d_comm ); + d_comm_used[tag] = true; + if ( d_comms[tag] == MPI_COMM_NULL ) + MPI_Comm_dup( MPI_COMM_WORLD, &d_comms[tag] ); + return commWrapper(tag,d_comms[tag],this); +} - // Determin the analysis we want to perform + +/****************************************************************** + * Constructor/Destructors * + ******************************************************************/ +runAnalysis::runAnalysis( int N_threads, int restart_interval, int analysis_interval, + int blobid_interval, const RankInfoStruct& rank_info, const ScaLBL_Communicator &ScaLBL_Comm, const Domain& Dm, + int Np, int Nx, int Ny, int Nz, double Lx, double Ly, double Lz, bool pBC, double beta, double err, + IntArray Map, const std::string& LocalRestartFile ): + d_Np( Np ), + d_restart_interval( restart_interval ), + d_analysis_interval( analysis_interval ), + d_blobid_interval( blobid_interval ), + d_beta( beta ), + d_tpool( N_threads ), + d_ScaLBL_Comm( ScaLBL_Comm ), + d_rank_info( rank_info ), + d_Map( Map ), + d_fillData(Dm.Comm,Dm.rank_info,Nx-2,Ny-2,Nz-2,1,1,1,0,1), + d_restartFile( LocalRestartFile ) +{ + d_N[0] = Nx; + d_N[1] = Ny; + d_N[2] = Nz; + d_rank = MPI_WORLD_RANK(); + writeIDMap(ID_map_struct(),0,id_map_filename); + // Create the MeshDataStruct + d_meshData.resize(1); + d_meshData[0].meshName = "domain"; + d_meshData[0].mesh = std::make_shared( Dm.rank_info,Nx-2,Ny-2,Nz-2,Lx,Ly,Lz ); + auto PhaseVar = std::make_shared(); + auto PressVar = std::make_shared(); + auto SignDistVar = std::make_shared(); + auto BlobIDVar = std::make_shared(); + PhaseVar->name = "phase"; + PhaseVar->type = IO::VariableType::VolumeVariable; + PhaseVar->dim = 1; + PhaseVar->data.resize(Nx-2,Ny-2,Nz-2); + d_meshData[0].vars.push_back(PhaseVar); + PressVar->name = "Pressure"; + PressVar->type = IO::VariableType::VolumeVariable; + PressVar->dim = 1; + PressVar->data.resize(Nx-2,Ny-2,Nz-2); + d_meshData[0].vars.push_back(PressVar); + SignDistVar->name = "SignDist"; + SignDistVar->type = IO::VariableType::VolumeVariable; + SignDistVar->dim = 1; + SignDistVar->data.resize(Nx-2,Ny-2,Nz-2); + d_meshData[0].vars.push_back(SignDistVar); + BlobIDVar->name = "BlobID"; + BlobIDVar->type = IO::VariableType::VolumeVariable; + BlobIDVar->dim = 1; + BlobIDVar->data.resize(Nx-2,Ny-2,Nz-2); + d_meshData[0].vars.push_back(BlobIDVar); + // Initialize the comms + MPI_Comm_dup(MPI_COMM_WORLD,&d_comm); + for (int i=0; i<1024; i++) { + d_comms[i] = MPI_COMM_NULL; + d_comm_used[i] = false; + } +} +runAnalysis::~runAnalysis( ) +{ + // Finish processing analysis + finish(); + // Clear internal data + MPI_Comm_free( &d_comm ); + for (int i=0; i<1024; i++) { + if ( d_comms[i] != MPI_COMM_NULL ) + MPI_Comm_free(&d_comms[i]); + } +} +void runAnalysis::finish( ) +{ + // Wait for the work items to finish + d_tpool.wait_pool_finished(); + // Clear the wait ids + d_wait_blobID.reset(); + d_wait_analysis.reset(); + d_wait_vis.reset(); + d_wait_restart.reset(); + // Syncronize + MPI_Barrier( d_comm ); +} + + +/****************************************************************** + * Set the thread affinities * + ******************************************************************/ +void print( const std::vector& ids ) +{ + if ( ids.empty() ) + return; + printf("%i",ids[0]); + for (size_t i=1; i 20 ) { + std::cerr << "Analysis queue is getting behind, waiting ...\n"; + finish(); + } + + PROFILE_START("run"); // Copy the appropriate variables to the host (so we can spawn new threads) ScaLBL_DeviceBarrier(); @@ -298,44 +455,41 @@ void run_analysis( int timestep, int restart_interval, int ANALYSIS_INTERVAL, in matches(type,AnalysisType::CopySimState) || matches(type,AnalysisType::IdentifyBlobs) ) { - phase = std::shared_ptr(new DoubleArray(Nx,Ny,Nz)); + phase = std::shared_ptr(new DoubleArray(d_N[0],d_N[1],d_N[2])); ScaLBL_CopyToHost(phase->data(),Phi,N*sizeof(double)); } if ( matches(type,AnalysisType::CopyPhaseIndicator) ) { memcpy(Averages.Phase_tplus.data(),phase->data(),N*sizeof(double)); - //Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tplus); + //Averages.ColorToSignedDistance(d_beta,Averages.Phase,Averages.Phase_tplus); } if ( matches(type,AnalysisType::ComputeAverages) ) { memcpy(Averages.Phase_tminus.data(),phase->data(),N*sizeof(double)); - //Averages.ColorToSignedDistance(beta,Averages.Phase,Averages.Phase_tminus); + //Averages.ColorToSignedDistance(d_beta,Averages.Phase,Averages.Phase_tminus); } if ( matches(type,AnalysisType::CopySimState) ) { // Copy the members of Averages to the cpu (phase was copied above) - // Wait PROFILE_START("Copy-Pressure",1); - ScaLBL_D3Q19_Pressure(fq,Pressure,Np); - ScaLBL_D3Q19_Momentum(fq,Velocity,Np); + ScaLBL_D3Q19_Pressure(fq,Pressure,d_Np); + ScaLBL_D3Q19_Momentum(fq,Velocity,d_Np); ScaLBL_DeviceBarrier(); PROFILE_STOP("Copy-Pressure",1); PROFILE_START("Copy-Wait",1); - tpool.wait(wait.analysis); - tpool.wait(wait.vis); // Make sure we are done using analysis before modifying PROFILE_STOP("Copy-Wait",1); PROFILE_START("Copy-State",1); memcpy(Averages.Phase.data(),phase->data(),N*sizeof(double)); - ScaLBL_Comm.RegularLayout(Map,Pressure,Averages.Press); - ScaLBL_Comm.RegularLayout(Map,&Velocity[0],Averages.Vel_x); - ScaLBL_Comm.RegularLayout(Map,&Velocity[Np],Averages.Vel_y); - ScaLBL_Comm.RegularLayout(Map,&Velocity[2*Np],Averages.Vel_z); + d_ScaLBL_Comm.RegularLayout(d_Map,Pressure,Averages.Press); + d_ScaLBL_Comm.RegularLayout(d_Map,&Velocity[0],Averages.Vel_x); + d_ScaLBL_Comm.RegularLayout(d_Map,&Velocity[d_Np],Averages.Vel_y); + d_ScaLBL_Comm.RegularLayout(d_Map,&Velocity[2*d_Np],Averages.Vel_z); PROFILE_STOP("Copy-State",1); } std::shared_ptr cDen, cfq; if ( matches(type,AnalysisType::CreateRestart) ) { // Copy restart data to the CPU - cDen = std::shared_ptr(new double[2*Np],DeleteArray); - cfq = std::shared_ptr(new double[19*Np],DeleteArray); - ScaLBL_CopyToHost(cfq.get(),fq,19*Np*sizeof(double)); - ScaLBL_CopyToHost(cDen.get(),Den,2*Np*sizeof(double)); + cDen = std::shared_ptr(new double[2*d_Np],DeleteArray); + cfq = std::shared_ptr(new double[19*d_Np],DeleteArray); + ScaLBL_CopyToHost(cfq.get(),fq,19*d_Np*sizeof(double)); + ScaLBL_CopyToHost(cDen.get(),Den,2*d_Np*sizeof(double)); } PROFILE_STOP("Copy data to host",1); @@ -344,30 +498,29 @@ void run_analysis( int timestep, int restart_interval, int ANALYSIS_INTERVAL, in BlobIDstruct new_index(new std::pair(0,IntArray())); BlobIDstruct new_ids(new std::pair(0,IntArray())); BlobIDList new_list(new std::vector()); - auto work1 = new BlobIdentificationWorkItem1(timestep,Nx,Ny,Nz,rank_info, - phase,Averages.SDs,last_ids,new_index,new_ids,new_list); - auto work2 = new BlobIdentificationWorkItem2(timestep,Nx,Ny,Nz,rank_info, - phase,Averages.SDs,last_ids,new_index,new_ids,new_list); - work1->add_dependency(wait.blobID); - work2->add_dependency(tpool.add_work(work1)); - wait.blobID = tpool.add_work(work2); - last_index = new_index; - last_ids = new_ids; - last_id_map = new_list; + auto work1 = new BlobIdentificationWorkItem1(timestep,d_N[0],d_N[1],d_N[2],d_rank_info, + phase,Averages.SDs,d_last_ids,new_index,new_ids,new_list,getComm()); + auto work2 = new BlobIdentificationWorkItem2(timestep,d_N[0],d_N[1],d_N[2],d_rank_info, + phase,Averages.SDs,d_last_ids,new_index,new_ids,new_list,getComm()); + work1->add_dependency(d_wait_blobID); + work2->add_dependency(d_tpool.add_work(work1)); + d_wait_blobID = d_tpool.add_work(work2); + d_last_index = new_index; + d_last_ids = new_ids; + d_last_id_map = new_list; } // Spawn threads to do the analysis work if ( matches(type,AnalysisType::ComputeAverages) ) { - auto work = new AnalysisWorkItem(type,timestep,Averages,last_index,last_id_map,beta); - work->add_dependency(wait.blobID); - work->add_dependency(wait.analysis); - work->add_dependency(wait.vis); // Make sure we are done using analysis before modifying - wait.analysis = tpool.add_work(work); + auto work = new AnalysisWorkItem(type,timestep,Averages,d_last_index,d_last_id_map,d_beta); + work->add_dependency(d_wait_blobID); + work->add_dependency(d_wait_analysis); + work->add_dependency(d_wait_vis); // Make sure we are done using analysis before modifying + d_wait_analysis = d_tpool.add_work(work); } // Spawn a thread to write the restart file if ( matches(type,AnalysisType::CreateRestart) ) { - int rank = MPI_WORLD_RANK(); /* if (pBC) { err = fabs(sat_w - sat_w_previous); sat_w_previous = sat_w; @@ -375,32 +528,28 @@ void run_analysis( int timestep, int restart_interval, int ANALYSIS_INTERVAL, in printf("Timestep %i: change in saturation since last checkpoint is %f \n",timestep,err); } } */ - // Wait for previous restart files to finish writing (not necessary, but helps to ensure memory usage is limited) - tpool.wait(wait.restart); // Retain the timestep associated with the restart files - if (rank==0) { + if (d_rank==0) { FILE *Rst = fopen("Restart.txt","w"); fprintf(Rst,"%i\n",timestep+4); fclose(Rst); } // Write the restart file (using a seperate thread) - auto work = new WriteRestartWorkItem(LocalRestartFile,cDen,cfq,Np); - work->add_dependency(wait.restart); - wait.restart = tpool.add_work(work); + auto work = new WriteRestartWorkItem(d_restartFile.c_str(),cDen,cfq,d_Np); + work->add_dependency(d_wait_restart); + d_wait_restart = d_tpool.add_work(work); } // Save the results for visualization if ( matches(type,AnalysisType::CreateRestart) ) { - // Wait for previous restart files to finish writing (not necessary, but helps to ensure memory usage is limited) - tpool.wait(wait.vis); // Write the vis files - auto work = new WriteVisWorkItem( timestep, visData, Averages, fillData ); - work->add_dependency(wait.blobID); - work->add_dependency(wait.analysis); - work->add_dependency(wait.vis); - wait.vis = tpool.add_work(work); + auto work = new WriteVisWorkItem( timestep, d_meshData, Averages, d_fillData, getComm() ); + work->add_dependency(d_wait_blobID); + work->add_dependency(d_wait_analysis); + work->add_dependency(d_wait_vis); + d_wait_vis = d_tpool.add_work(work); } - PROFILE_STOP("start_analysis"); + PROFILE_STOP("run"); } diff --git a/analysis/runAnalysis.h b/analysis/runAnalysis.h new file mode 100644 index 00000000..a2b00115 --- /dev/null +++ b/analysis/runAnalysis.h @@ -0,0 +1,109 @@ +#ifndef RunAnalysis_H_INC +#define RunAnalysis_H_INC + +#include "analysis/analysis.h" +#include "analysis/TwoPhase.h" +#include "common/Communication.h" +#include "common/ScaLBL.h" +#include "threadpool/thread_pool.h" + + +typedef std::shared_ptr> BlobIDstruct; +typedef std::shared_ptr> BlobIDList; + + +// Types of analysis +enum class AnalysisType : uint64_t { AnalyzeNone=0, IdentifyBlobs=0x01, CopyPhaseIndicator=0x02, + CopySimState=0x04, ComputeAverages=0x08, CreateRestart=0x10, WriteVis=0x20 }; + + +//! Class to run the analysis in multiple threads +class runAnalysis +{ +public: + + //! Constructor + runAnalysis( int N_threads, int restart_interval, int analysis_interval, int blobid_interval, + const RankInfoStruct& rank_info, const ScaLBL_Communicator &ScaLBL_Comm, const Domain& dm, + int Np, int Nx, int Ny, int Nz, double Lx, double Ly, double Lz, bool pBC, double beta, double err, + IntArray Map, const std::string& LocalRestartFile ); + + //! Destructor + ~runAnalysis(); + + //! Run the next analysis + void run( int timestep, TwoPhase& Averages, const double *Phi, + double *Pressure, double *Velocity, double *fq, double *Den ); + + //! Finish all active analysis + void finish(); + + /*! + * \brief Set the affinities + * \details This function will set the affinity of this thread and all analysis threads + * @param[in] method Method used to control the affinities: + * none - Don't do anything + */ + void setAffinities( const std::string& method = "none" ); + + +private: + + runAnalysis(); + + // Determine the analysis to perform + AnalysisType computeAnalysisType( int timestep ); + +public: + + class commWrapper + { + public: + MPI_Comm comm; + int tag; + runAnalysis *analysis; + commWrapper( int tag, MPI_Comm comm, runAnalysis *analysis ); + commWrapper( ) = delete; + commWrapper( const commWrapper &rhs ) = delete; + commWrapper& operator=( const commWrapper &rhs ) = delete; + commWrapper( commWrapper &&rhs ); + ~commWrapper(); + }; + + // Get a comm (not thread safe) + commWrapper getComm( ); + +private: + + int d_N[3]; + int d_Np; + int d_rank; + int d_restart_interval, d_analysis_interval, d_blobid_interval; + double d_beta; + ThreadPool d_tpool; + ScaLBL_Communicator d_ScaLBL_Comm; + RankInfoStruct d_rank_info; + IntArray d_Map; + BlobIDstruct d_last_ids; + BlobIDstruct d_last_index; + BlobIDList d_last_id_map; + std::vector d_meshData; + fillHalo d_fillData; + std::string d_restartFile; + MPI_Comm d_comm; + MPI_Comm d_comms[1024]; + volatile bool d_comm_used[1024]; + + // Ids of work items to use for dependencies + ThreadPool::thread_id_t d_wait_blobID; + ThreadPool::thread_id_t d_wait_analysis; + ThreadPool::thread_id_t d_wait_vis; + ThreadPool::thread_id_t d_wait_restart; + + // Friends + friend commWrapper::~commWrapper(); + +}; + +#endif + diff --git a/common/ScaLBL.cpp b/common/ScaLBL.cpp new file mode 100644 index 00000000..b430fa8a --- /dev/null +++ b/common/ScaLBL.cpp @@ -0,0 +1,4077 @@ +#include "common/Domain.h" +#include "common/ScaLBL.h" + + +ScaLBL_Communicator::ScaLBL_Communicator(Domain &Dm){ + //...................................................................................... + Lock=false; // unlock the communicator + //...................................................................................... + // Create a separate copy of the communicator for the device + //MPI_Comm_group(Dm.Comm,&Group); + //MPI_Comm_create(Dm.Comm,Group,&MPI_COMM_SCALBL); + MPI_Comm_dup(Dm.Comm,&MPI_COMM_SCALBL); + //...................................................................................... + // Copy the domain size and communication information directly from Dm + Nx = Dm.Nx; + Ny = Dm.Ny; + Nz = Dm.Nz; + N = Nx*Ny*Nz; + next=0; + rank=Dm.rank; + rank_x=Dm.rank_x; + rank_y=Dm.rank_y; + rank_z=Dm.rank_z; + rank_X=Dm.rank_X; + rank_Y=Dm.rank_Y; + rank_Z=Dm.rank_Z; + rank_xy=Dm.rank_xy; + rank_XY=Dm.rank_XY; + rank_xY=Dm.rank_xY; + rank_Xy=Dm.rank_Xy; + rank_xz=Dm.rank_xz; + rank_XZ=Dm.rank_XZ; + rank_xZ=Dm.rank_xZ; + rank_Xz=Dm.rank_Xz; + rank_yz=Dm.rank_yz; + rank_YZ=Dm.rank_YZ; + rank_yZ=Dm.rank_yZ; + rank_Yz=Dm.rank_Yz; + sendCount_x=Dm.sendCount_x; + sendCount_y=Dm.sendCount_y; + sendCount_z=Dm.sendCount_z; + sendCount_X=Dm.sendCount_X; + sendCount_Y=Dm.sendCount_Y; + sendCount_Z=Dm.sendCount_Z; + sendCount_xy=Dm.sendCount_xy; + sendCount_yz=Dm.sendCount_yz; + sendCount_xz=Dm.sendCount_xz; + sendCount_Xy=Dm.sendCount_Xy; + sendCount_Yz=Dm.sendCount_Yz; + sendCount_xZ=Dm.sendCount_xZ; + sendCount_xY=Dm.sendCount_xY; + sendCount_yZ=Dm.sendCount_yZ; + sendCount_Xz=Dm.sendCount_Xz; + sendCount_XY=Dm.sendCount_XY; + sendCount_YZ=Dm.sendCount_YZ; + sendCount_XZ=Dm.sendCount_XZ; + recvCount_x=Dm.recvCount_x; + recvCount_y=Dm.recvCount_y; + recvCount_z=Dm.recvCount_z; + recvCount_X=Dm.recvCount_X; + recvCount_Y=Dm.recvCount_Y; + recvCount_Z=Dm.recvCount_Z; + recvCount_xy=Dm.recvCount_xy; + recvCount_yz=Dm.recvCount_yz; + recvCount_xz=Dm.recvCount_xz; + recvCount_Xy=Dm.recvCount_Xy; + recvCount_Yz=Dm.recvCount_Yz; + recvCount_xZ=Dm.recvCount_xZ; + recvCount_xY=Dm.recvCount_xY; + recvCount_yZ=Dm.recvCount_yZ; + recvCount_Xz=Dm.recvCount_Xz; + recvCount_XY=Dm.recvCount_XY; + recvCount_YZ=Dm.recvCount_YZ; + recvCount_XZ=Dm.recvCount_XZ; + + iproc = Dm.iproc; + jproc = Dm.jproc; + kproc = Dm.kproc; + nprocx = Dm.nprocx; + nprocy = Dm.nprocy; + nprocz = Dm.nprocz; + //...................................................................................... + + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_x, 5*sendCount_x*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_X, 5*sendCount_X*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_y, 5*sendCount_y*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_Y, 5*sendCount_Y*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_z, 5*sendCount_z*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_Z, 5*sendCount_Z*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_xy, sendCount_xy*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_xY, sendCount_xY*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_Xy, sendCount_Xy*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_XY, sendCount_XY*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_xz, sendCount_xz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_xZ, sendCount_xZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_Xz, sendCount_Xz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_XZ, sendCount_XZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_yz, sendCount_yz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_yZ, sendCount_yZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_Yz, sendCount_Yz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &sendbuf_YZ, sendCount_YZ*sizeof(double)); // Allocate device memory + //...................................................................................... + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_x, 5*recvCount_x*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_X, 5*recvCount_X*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_y, 5*recvCount_y*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_Y, 5*recvCount_Y*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_z, 5*recvCount_z*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_Z, 5*recvCount_Z*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_xy, recvCount_xy*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_xY, recvCount_xY*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_Xy, recvCount_Xy*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_XY, recvCount_XY*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_xz, recvCount_xz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_xZ, recvCount_xZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_Xz, recvCount_Xz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_XZ, recvCount_XZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_yz, recvCount_yz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_yZ, recvCount_yZ*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_Yz, recvCount_Yz*sizeof(double)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &recvbuf_YZ, recvCount_YZ*sizeof(double)); // Allocate device memory + //...................................................................................... + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_x, sendCount_x*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_X, sendCount_X*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_y, sendCount_y*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_Y, sendCount_Y*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_z, sendCount_z*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_Z, sendCount_Z*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_xy, sendCount_xy*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_xY, sendCount_xY*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_Xy, sendCount_Xy*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_XY, sendCount_XY*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_xz, sendCount_xz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_xZ, sendCount_xZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_Xz, sendCount_Xz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_XZ, sendCount_XZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_yz, sendCount_yz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_yZ, sendCount_yZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_Yz, sendCount_Yz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcSendList_YZ, sendCount_YZ*sizeof(int)); // Allocate device memory + //...................................................................................... + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_x, recvCount_x*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_X, recvCount_X*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_y, recvCount_y*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_Y, recvCount_Y*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_z, recvCount_z*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_Z, recvCount_Z*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_xy, recvCount_xy*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_xY, recvCount_xY*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_Xy, recvCount_Xy*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_XY, recvCount_XY*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_xz, recvCount_xz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_xZ, recvCount_xZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_Xz, recvCount_Xz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_XZ, recvCount_XZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_yz, recvCount_yz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_yZ, recvCount_yZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_Yz, recvCount_Yz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvList_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory + //...................................................................................... + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_x, 5*recvCount_x*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_X, 5*recvCount_X*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_y, 5*recvCount_y*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Y, 5*recvCount_Y*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_z, 5*recvCount_z*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Z, 5*recvCount_Z*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_xy, recvCount_xy*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_xY, recvCount_xY*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Xy, recvCount_Xy*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_XY, recvCount_XY*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_xz, recvCount_xz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_xZ, recvCount_xZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Xz, recvCount_Xz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_XZ, recvCount_XZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_yz, recvCount_yz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_yZ, recvCount_yZ*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_Yz, recvCount_Yz*sizeof(int)); // Allocate device memory + ScaLBL_AllocateDeviceMemory((void **) &dvcRecvDist_YZ, recvCount_YZ*sizeof(int)); // Allocate device memory + //...................................................................................... + + ScaLBL_CopyToDevice(dvcSendList_x,Dm.sendList_x,sendCount_x*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_X,Dm.sendList_X,sendCount_X*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_y,Dm.sendList_y,sendCount_y*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_Y,Dm.sendList_Y,sendCount_Y*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_z,Dm.sendList_z,sendCount_z*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_Z,Dm.sendList_Z,sendCount_Z*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_xy,Dm.sendList_xy,sendCount_xy*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_XY,Dm.sendList_XY,sendCount_XY*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_xY,Dm.sendList_xY,sendCount_xY*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_Xy,Dm.sendList_Xy,sendCount_Xy*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_xz,Dm.sendList_xz,sendCount_xz*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_XZ,Dm.sendList_XZ,sendCount_XZ*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_xZ,Dm.sendList_xZ,sendCount_xZ*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_Xz,Dm.sendList_Xz,sendCount_Xz*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_yz,Dm.sendList_yz,sendCount_yz*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_YZ,Dm.sendList_YZ,sendCount_YZ*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_yZ,Dm.sendList_yZ,sendCount_yZ*sizeof(int)); + ScaLBL_CopyToDevice(dvcSendList_Yz,Dm.sendList_Yz,sendCount_Yz*sizeof(int)); + //...................................................................................... + ScaLBL_CopyToDevice(dvcRecvList_x,Dm.recvList_x,recvCount_x*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_X,Dm.recvList_X,recvCount_X*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_y,Dm.recvList_y,recvCount_y*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_Y,Dm.recvList_Y,recvCount_Y*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_z,Dm.recvList_z,recvCount_z*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_Z,Dm.recvList_Z,recvCount_Z*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_xy,Dm.recvList_xy,recvCount_xy*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_XY,Dm.recvList_XY,recvCount_XY*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_xY,Dm.recvList_xY,recvCount_xY*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_Xy,Dm.recvList_Xy,recvCount_Xy*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_xz,Dm.recvList_xz,recvCount_xz*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_XZ,Dm.recvList_XZ,recvCount_XZ*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_xZ,Dm.recvList_xZ,recvCount_xZ*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_Xz,Dm.recvList_Xz,recvCount_Xz*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_yz,Dm.recvList_yz,recvCount_yz*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_YZ,Dm.recvList_YZ,recvCount_YZ*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_yZ,Dm.recvList_yZ,recvCount_yZ*sizeof(int)); + ScaLBL_CopyToDevice(dvcRecvList_Yz,Dm.recvList_Yz,recvCount_Yz*sizeof(int)); + //...................................................................................... + + MPI_Barrier(MPI_COMM_SCALBL); + + //................................................................................... + // Set up the recieve distribution lists + //................................................................................... + //...Map recieve list for the X face: q=2,8,10,12,14 ................................. + D3Q19_MapRecv(-1,0,0,Dm.recvList_X,0,recvCount_X,dvcRecvDist_X); + D3Q19_MapRecv(-1,-1,0,Dm.recvList_X,recvCount_X,recvCount_X,dvcRecvDist_X); + D3Q19_MapRecv(-1,1,0,Dm.recvList_X,2*recvCount_X,recvCount_X,dvcRecvDist_X); + D3Q19_MapRecv(-1,0,-1,Dm.recvList_X,3*recvCount_X,recvCount_X,dvcRecvDist_X); + D3Q19_MapRecv(-1,0,1,Dm.recvList_X,4*recvCount_X,recvCount_X,dvcRecvDist_X); + //................................................................................... + //...Map recieve list for the x face: q=1,7,9,11,13.................................. + D3Q19_MapRecv(1,0,0,Dm.recvList_x,0,recvCount_x,dvcRecvDist_x); + D3Q19_MapRecv(1,1,0,Dm.recvList_x,recvCount_x,recvCount_x,dvcRecvDist_x); + D3Q19_MapRecv(1,-1,0,Dm.recvList_x,2*recvCount_x,recvCount_x,dvcRecvDist_x); + D3Q19_MapRecv(1,0,1,Dm.recvList_x,3*recvCount_x,recvCount_x,dvcRecvDist_x); + D3Q19_MapRecv(1,0,-1,Dm.recvList_x,4*recvCount_x,recvCount_x,dvcRecvDist_x); + //................................................................................... + //...Map recieve list for the y face: q=4,8,9,16,18 ................................... + D3Q19_MapRecv(0,-1,0,Dm.recvList_Y,0,recvCount_Y,dvcRecvDist_Y); + D3Q19_MapRecv(-1,-1,0,Dm.recvList_Y,recvCount_Y,recvCount_Y,dvcRecvDist_Y); + D3Q19_MapRecv(1,-1,0,Dm.recvList_Y,2*recvCount_Y,recvCount_Y,dvcRecvDist_Y); + D3Q19_MapRecv(0,-1,-1,Dm.recvList_Y,3*recvCount_Y,recvCount_Y,dvcRecvDist_Y); + D3Q19_MapRecv(0,-1,1,Dm.recvList_Y,4*recvCount_Y,recvCount_Y,dvcRecvDist_Y); + //................................................................................... + //...Map recieve list for the Y face: q=3,7,10,15,17 .................................. + D3Q19_MapRecv(0,1,0,Dm.recvList_y,0,recvCount_y,dvcRecvDist_y); + D3Q19_MapRecv(1,1,0,Dm.recvList_y,recvCount_y,recvCount_y,dvcRecvDist_y); + D3Q19_MapRecv(-1,1,0,Dm.recvList_y,2*recvCount_y,recvCount_y,dvcRecvDist_y); + D3Q19_MapRecv(0,1,1,Dm.recvList_y,3*recvCount_y,recvCount_y,dvcRecvDist_y); + D3Q19_MapRecv(0,1,-1,Dm.recvList_y,4*recvCount_y,recvCount_y,dvcRecvDist_y); + //................................................................................... + //...Map recieve list for the z face<<<6,12,13,16,17).............................................. + D3Q19_MapRecv(0,0,-1,Dm.recvList_Z,0,recvCount_Z,dvcRecvDist_Z); + D3Q19_MapRecv(-1,0,-1,Dm.recvList_Z,recvCount_Z,recvCount_Z,dvcRecvDist_Z); + D3Q19_MapRecv(1,0,-1,Dm.recvList_Z,2*recvCount_Z,recvCount_Z,dvcRecvDist_Z); + D3Q19_MapRecv(0,-1,-1,Dm.recvList_Z,3*recvCount_Z,recvCount_Z,dvcRecvDist_Z); + D3Q19_MapRecv(0,1,-1,Dm.recvList_Z,4*recvCount_Z,recvCount_Z,dvcRecvDist_Z); + //...Map recieve list for the Z face<<<5,11,14,15,18).............................................. + D3Q19_MapRecv(0,0,1,Dm.recvList_z,0,recvCount_z,dvcRecvDist_z); + D3Q19_MapRecv(1,0,1,Dm.recvList_z,recvCount_z,recvCount_z,dvcRecvDist_z); + D3Q19_MapRecv(-1,0,1,Dm.recvList_z,2*recvCount_z,recvCount_z,dvcRecvDist_z); + D3Q19_MapRecv(0,1,1,Dm.recvList_z,3*recvCount_z,recvCount_z,dvcRecvDist_z); + D3Q19_MapRecv(0,-1,1,Dm.recvList_z,4*recvCount_z,recvCount_z,dvcRecvDist_z); + //.................................................................................. + //...Map recieve list for the xy edge <<<8)................................ + D3Q19_MapRecv(-1,-1,0,Dm.recvList_XY,0,recvCount_XY,dvcRecvDist_XY); + //...Map recieve list for the Xy edge <<<9)................................ + D3Q19_MapRecv(1,-1,0,Dm.recvList_xY,0,recvCount_xY,dvcRecvDist_xY); + //...Map recieve list for the xY edge <<<10)................................ + D3Q19_MapRecv(-1,1,0,Dm.recvList_Xy,0,recvCount_Xy,dvcRecvDist_Xy); + //...Map recieve list for the XY edge <<<7)................................ + D3Q19_MapRecv(1,1,0,Dm.recvList_xy,0,recvCount_xy,dvcRecvDist_xy); + //...Map recieve list for the xz edge <<<12)................................ + D3Q19_MapRecv(-1,0,-1,Dm.recvList_XZ,0,recvCount_XZ,dvcRecvDist_XZ); + //...Map recieve list for the xZ edge <<<14)................................ + D3Q19_MapRecv(-1,0,1,Dm.recvList_Xz,0,recvCount_Xz,dvcRecvDist_Xz); + //...Map recieve list for the Xz edge <<<13)................................ + D3Q19_MapRecv(1,0,-1,Dm.recvList_xZ,0,recvCount_xZ,dvcRecvDist_xZ); + //...Map recieve list for the XZ edge <<<11)................................ + D3Q19_MapRecv(1,0,1,Dm.recvList_xz,0,recvCount_xz,dvcRecvDist_xz); + //...Map recieve list for the yz edge <<<16)................................ + D3Q19_MapRecv(0,-1,-1,Dm.recvList_YZ,0,recvCount_YZ,dvcRecvDist_YZ); + //...Map recieve list for the yZ edge <<<18)................................ + D3Q19_MapRecv(0,-1,1,Dm.recvList_Yz,0,recvCount_Yz,dvcRecvDist_Yz); + //...Map recieve list for the Yz edge <<<17)................................ + D3Q19_MapRecv(0,1,-1,Dm.recvList_yZ,0,recvCount_yZ,dvcRecvDist_yZ); + //...Map recieve list for the YZ edge <<<15)................................ + D3Q19_MapRecv(0,1,1,Dm.recvList_yz,0,recvCount_yz,dvcRecvDist_yz); + //................................................................................... + + //...................................................................................... + MPI_Barrier(MPI_COMM_SCALBL); + ScaLBL_DeviceBarrier(); + //...................................................................................... + SendCount = sendCount_x+sendCount_X+sendCount_y+sendCount_Y+sendCount_z+sendCount_Z+ + sendCount_xy+sendCount_Xy+sendCount_xY+sendCount_XY+ + sendCount_xZ+sendCount_Xz+sendCount_xZ+sendCount_XZ+ + sendCount_yz+sendCount_Yz+sendCount_yZ+sendCount_YZ; + + RecvCount = recvCount_x+recvCount_X+recvCount_y+recvCount_Y+recvCount_z+recvCount_Z+ + recvCount_xy+recvCount_Xy+recvCount_xY+recvCount_XY+ + recvCount_xZ+recvCount_Xz+recvCount_xZ+recvCount_XZ+ + recvCount_yz+recvCount_Yz+recvCount_yZ+recvCount_YZ; + + CommunicationCount = SendCount+RecvCount; + //...................................................................................... + +} + + +ScaLBL_Communicator::~ScaLBL_Communicator(){ + // destrutor does nothing (bad idea) + // -- note that there needs to be a way to free memory allocated on the device!!! +} + +void ScaLBL_Communicator::D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, int *list, int start, int count, + int *d3q19_recvlist){ + int i,j,k,n,nn,idx; + int * ReturnDist; + ReturnDist=new int [count]; + + for (idx=0; idx Np ){ + ERROR("ScaLBL_Communicator::MemoryDenseLayout: Failed to create memory efficient layout!\n"); + } + + // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryDenseLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); + else if (!(idx<0)){ + // store the idx associated with each neighbor + // store idx for self if neighbor is in solid or out of domain + //D3Q19 = {{1,0,0},{-1,0,0} + // {0,1,0},{0,-1,0} + // {0,0,1},{0,0,-1}, + // {1,1,0},{-1,-1,0}, + // {1,-1,0},{-1,1,0}, + // {1,0,1},{-1,0,-1}, + // {1,0,-1},{-1,0,1}, + // {0,1,1},{0,-1,-1}, + // {0,1,-1},{0,-1,1}}; + // note that only odd distributions need to be stored to execute the swap algorithm + int neighbor; // cycle through the neighbors of lattice site idx + neighbor=Map(i+1,j,k); + if (neighbor==-2) neighborList[idx]=-1; + else if (neighbor<0) neighborList[idx]=idx; + else neighborList[idx]=neighbor; + + neighbor=Map(i,j+1,k); + if (neighbor==-2) neighborList[Np+idx]=-1; + else if (neighbor<0) neighborList[Np+idx]=idx; + else neighborList[Np+idx]=neighbor; + + neighbor=Map(i,j,k+1); + if (neighbor==-2) neighborList[2*Np+idx]=-1; + else if (neighbor<0) neighborList[2*Np+idx]=idx; + else neighborList[2*Np+idx]=neighbor; + + neighbor=Map(i+1,j+1,k); + if (neighbor==-2) neighborList[3*Np+idx]=-1; + else if (neighbor<0) neighborList[3*Np+idx]=idx; + else neighborList[3*Np+idx]=neighbor; + + neighbor=Map(i+1,j-1,k); + if (neighbor==-2) neighborList[4*Np+idx]=-1; + else if (neighbor<0) neighborList[4*Np+idx]=idx; + else neighborList[4*Np+idx]=neighbor; + + neighbor=Map(i+1,j,k+1); + if (neighbor==-2) neighborList[5*Np+idx]=-1; + else if (neighbor<0) neighborList[5*Np+idx]=idx; + else neighborList[5*Np+idx]=neighbor; + + neighbor=Map(i+1,j,k-1); + if (neighbor==-2) neighborList[6*Np+idx]=-1; + else if (neighbor<0) neighborList[6*Np+idx]=idx; + else neighborList[6*Np+idx]=neighbor; + + neighbor=Map(i,j+1,k+1); + if (neighbor==-2) neighborList[7*Np+idx]=-1; + else if (neighbor<0) neighborList[7*Np+idx]=idx; + else neighborList[7*Np+idx]=neighbor; + + neighbor=Map(i,j+1,k-1); + if (neighbor==-2) neighborList[8*Np+idx]=-1; + else if (neighbor<0) neighborList[8*Np+idx]=idx; + else neighborList[8*Np+idx]=neighbor; + } + } + } + } + + //for (idx=0; idx Np ){ + ERROR("ScaLBL_Communicator::MemoryDenseLayoutFull: Failed to create memory efficient layout!\n"); + } + + // if (rank == 0) { + // printf("* Displaying the final map from rank %d\n",rank); + // + // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryDenseLayoutFull: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); + else if (!(idx<0)){ + // store the idx associated with each neighbor + // store idx for self if neighbor is in solid or out of domain + //D3Q19 = {{1,0,0},{-1,0,0} + // {0,1,0},{0,-1,0} + // {0,0,1},{0,0,-1}, + // {1,1,0},{-1,-1,0}, + // {1,-1,0},{-1,1,0}, + // {1,0,1},{-1,0,-1}, + // {1,0,-1},{-1,0,1}, + // {0,1,1},{0,-1,-1}, + // {0,1,-1},{0,-1,1}}; + + + /* + * Storing the full neighbor list. The AA algorithm may require fewer neighbors but I'm saving everything for now... + * + */ + + + int neighbor; // cycle through the neighbors of lattice site idx + neighbor=Map(i+1,j,k); + if (neighbor==-2) neighborList[idx]=-1; + else if (neighbor<0) neighborList[idx]=idx; + else neighborList[idx]=neighbor; + + // 2 + neighbor=Map(i-1,j,k); + if (neighbor==-2) neighborList[Np+idx]=-1; + else if (neighbor<0) neighborList[Np+idx]=idx; + else neighborList[Np+idx]=neighbor; + + neighbor=Map(i,j+1,k); + if (neighbor==-2) neighborList[2*Np+idx]=-1; + else if (neighbor<0) neighborList[2*Np+idx]=idx; + else neighborList[2*Np+idx]=neighbor; + + // 4 + neighbor=Map(i,j-1,k); + if (neighbor==-2) neighborList[3*Np+idx]=-1; + else if (neighbor<0) neighborList[3*Np+idx]=idx; + else neighborList[3*Np+idx]=neighbor; + + + neighbor=Map(i,j,k+1); + if (neighbor==-2) neighborList[4*Np+idx]=-1; + else if (neighbor<0) neighborList[4*Np+idx]=idx; + else neighborList[4*Np+idx]=neighbor; + + // 6 + neighbor=Map(i,j,k-1); + if (neighbor==-2) neighborList[5*Np+idx]=-1; + else if (neighbor<0) neighborList[5*Np+idx]=idx; + else neighborList[5*Np+idx]=neighbor; + + neighbor=Map(i+1,j+1,k); + if (neighbor==-2) neighborList[6*Np+idx]=-1; + else if (neighbor<0) neighborList[6*Np+idx]=idx; + else neighborList[6*Np+idx]=neighbor; + + // 8 + neighbor=Map(i-1,j-1,k); + if (neighbor==-2) neighborList[7*Np+idx]=-1; + else if (neighbor<0) neighborList[7*Np+idx]=idx; + else neighborList[7*Np+idx]=neighbor; + + + neighbor=Map(i+1,j-1,k); + if (neighbor==-2) neighborList[8*Np+idx]=-1; + else if (neighbor<0) neighborList[8*Np+idx]=idx; + else neighborList[8*Np+idx]=neighbor; + + // 10 + neighbor=Map(i-1,j+1,k); + if (neighbor==-2) neighborList[9*Np+idx]=-1; + else if (neighbor<0) neighborList[9*Np+idx]=idx; + else neighborList[9*Np+idx]=neighbor; + + + neighbor=Map(i+1,j,k+1); + if (neighbor==-2) neighborList[10*Np+idx]=-1; + else if (neighbor<0) neighborList[10*Np+idx]=idx; + else neighborList[10*Np+idx]=neighbor; + + // 12 + neighbor=Map(i-1,j,k-1); + if (neighbor==-2) neighborList[11*Np+idx]=-1; + else if (neighbor<0) neighborList[11*Np+idx]=idx; + else neighborList[11*Np+idx]=neighbor; + + + neighbor=Map(i+1,j,k-1); + if (neighbor==-2) neighborList[12*Np+idx]=-1; + else if (neighbor<0) neighborList[12*Np+idx]=idx; + else neighborList[12*Np+idx]=neighbor; + + // 14 + neighbor=Map(i-1,j,k+1); + if (neighbor==-2) neighborList[13*Np+idx]=-1; + else if (neighbor<0) neighborList[13*Np+idx]=idx; + else neighborList[13*Np+idx]=neighbor; + + neighbor=Map(i,j+1,k+1); + if (neighbor==-2) neighborList[14*Np+idx]=-1; + else if (neighbor<0) neighborList[14*Np+idx]=idx; + else neighborList[14*Np+idx]=neighbor; + + // 16 + neighbor=Map(i,j-1,k-1); + if (neighbor==-2) neighborList[15*Np+idx]=-1; + else if (neighbor<0) neighborList[15*Np+idx]=idx; + else neighborList[15*Np+idx]=neighbor; + + neighbor=Map(i,j+1,k-1); + if (neighbor==-2) neighborList[16*Np+idx]=-1; + else if (neighbor<0) neighborList[16*Np+idx]=idx; + else neighborList[16*Np+idx]=neighbor; + + // 18 + neighbor=Map(i,j-1,k+1); + if (neighbor==-2) neighborList[17*Np+idx]=-1; + else if (neighbor<0) neighborList[17*Np+idx]=idx; + else neighborList[17*Np+idx]=neighbor; + + + } + } + } + } + + //....................................................................... + // Now map through SendList and RecvList to update indices + // First loop over the send lists + + int *TempBuffer; + TempBuffer = new int [5*RecvCount]; + + //....................................................................... + // Re-index the send lists + ScaLBL_CopyToHost(TempBuffer,dvcSendList_x,sendCount_x*sizeof(int)); + + for (i=0; i Np ){ + ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n"); + } + + // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); + else if (!(idx<0)){ + // store the idx associated with each neighbor + // store idx for self if neighbor is in solid or out of domain + //D3Q19 = {{1,0,0},{-1,0,0} + // {0,1,0},{0,-1,0} + // {0,0,1},{0,0,-1}, + // {1,1,0},{-1,-1,0}, + // {1,-1,0},{-1,1,0}, + // {1,0,1},{-1,0,-1}, + // {1,0,-1},{-1,0,1}, + // {0,1,1},{0,-1,-1}, + // {0,1,-1},{0,-1,1}}; + // note that only odd distributions need to be stored to execute the swap algorithm + int neighbor; // cycle through the neighbors of lattice site idx + neighbor=Map(i+1,j,k); + if (neighbor==-2) neighborList[idx]=-1; + else if (neighbor<0) neighborList[idx]=idx; + else neighborList[idx]=neighbor; + + neighbor=Map(i,j+1,k); + if (neighbor==-2) neighborList[Np+idx]=-1; + else if (neighbor<0) neighborList[Np+idx]=idx; + else neighborList[Np+idx]=neighbor; + + neighbor=Map(i,j,k+1); + if (neighbor==-2) neighborList[2*Np+idx]=-1; + else if (neighbor<0) neighborList[2*Np+idx]=idx; + else neighborList[2*Np+idx]=neighbor; + + neighbor=Map(i+1,j+1,k); + if (neighbor==-2) neighborList[3*Np+idx]=-1; + else if (neighbor<0) neighborList[3*Np+idx]=idx; + else neighborList[3*Np+idx]=neighbor; + + neighbor=Map(i+1,j-1,k); + if (neighbor==-2) neighborList[4*Np+idx]=-1; + else if (neighbor<0) neighborList[4*Np+idx]=idx; + else neighborList[4*Np+idx]=neighbor; + + neighbor=Map(i+1,j,k+1); + if (neighbor==-2) neighborList[5*Np+idx]=-1; + else if (neighbor<0) neighborList[5*Np+idx]=idx; + else neighborList[5*Np+idx]=neighbor; + + neighbor=Map(i+1,j,k-1); + if (neighbor==-2) neighborList[6*Np+idx]=-1; + else if (neighbor<0) neighborList[6*Np+idx]=idx; + else neighborList[6*Np+idx]=neighbor; + + neighbor=Map(i,j+1,k+1); + if (neighbor==-2) neighborList[7*Np+idx]=-1; + else if (neighbor<0) neighborList[7*Np+idx]=idx; + else neighborList[7*Np+idx]=neighbor; + + neighbor=Map(i,j+1,k-1); + if (neighbor==-2) neighborList[8*Np+idx]=-1; + else if (neighbor<0) neighborList[8*Np+idx]=idx; + else neighborList[8*Np+idx]=neighbor; + } + } + } + } + + //for (idx=0; idx Np ){ + ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n"); + } + + // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); + else if (!(idx<0)){ + // store the idx associated with each neighbor + // store idx for self if neighbor is in solid or out of domain + //D3Q19 = {{1,0,0},{-1,0,0} + // {0,1,0},{0,-1,0} + // {0,0,1},{0,0,-1}, + // {1,1,0},{-1,-1,0}, + // {1,-1,0},{-1,1,0}, + // {1,0,1},{-1,0,-1}, + // {1,0,-1},{-1,0,1}, + // {0,1,1},{0,-1,-1}, + // {0,1,-1},{0,-1,1}}; + // note that only odd distributions need to be stored to execute the swap algorithm + int neighbor; // cycle through the neighbors of lattice site idx + neighbor=Map(i+1,j,k); + if (neighbor==-2) neighborList[idx]=-1; + else if (neighbor<0) neighborList[idx]=idx; + else neighborList[idx]=neighbor; + + neighbor=Map(i-1,j,k); + if (neighbor==-2) neighborList[Np+idx]=-1; + else if (neighbor<0) neighborList[Np+idx]=idx; + else neighborList[Np+idx]=neighbor; + + neighbor=Map(i,j+1,k); + if (neighbor==-2) neighborList[2*Np+idx]=-1; + else if (neighbor<0) neighborList[2*Np+idx]=idx; + else neighborList[2*Np+idx]=neighbor; + + neighbor=Map(i,j-1,k); + if (neighbor==-2) neighborList[3*Np+idx]=-1; + else if (neighbor<0) neighborList[3*Np+idx]=idx; + else neighborList[3*Np+idx]=neighbor; + + + neighbor=Map(i,j,k+1); + if (neighbor==-2) neighborList[4*Np+idx]=-1; + else if (neighbor<0) neighborList[4*Np+idx]=idx; + else neighborList[4*Np+idx]=neighbor; + + neighbor=Map(i,j,k-1); + if (neighbor==-2) neighborList[5*Np+idx]=-1; + else if (neighbor<0) neighborList[5*Np+idx]=idx; + else neighborList[5*Np+idx]=neighbor; + + neighbor=Map(i+1,j+1,k); + if (neighbor==-2) neighborList[6*Np+idx]=-1; + else if (neighbor<0) neighborList[6*Np+idx]=idx; + else neighborList[6*Np+idx]=neighbor; + + neighbor=Map(i-1,j-1,k); + if (neighbor==-2) neighborList[7*Np+idx]=-1; + else if (neighbor<0) neighborList[7*Np+idx]=idx; + else neighborList[7*Np+idx]=neighbor; + + + neighbor=Map(i+1,j-1,k); + if (neighbor==-2) neighborList[8*Np+idx]=-1; + else if (neighbor<0) neighborList[8*Np+idx]=idx; + else neighborList[8*Np+idx]=neighbor; + + neighbor=Map(i-1,j+1,k); + if (neighbor==-2) neighborList[9*Np+idx]=-1; + else if (neighbor<0) neighborList[9*Np+idx]=idx; + else neighborList[9*Np+idx]=neighbor; + + + neighbor=Map(i+1,j,k+1); + if (neighbor==-2) neighborList[10*Np+idx]=-1; + else if (neighbor<0) neighborList[10*Np+idx]=idx; + else neighborList[10*Np+idx]=neighbor; + + neighbor=Map(i-1,j,k-1); + if (neighbor==-2) neighborList[11*Np+idx]=-1; + else if (neighbor<0) neighborList[11*Np+idx]=idx; + else neighborList[11*Np+idx]=neighbor; + + + neighbor=Map(i+1,j,k-1); + if (neighbor==-2) neighborList[12*Np+idx]=-1; + else if (neighbor<0) neighborList[12*Np+idx]=idx; + else neighborList[12*Np+idx]=neighbor; + + neighbor=Map(i-1,j,k+1); + if (neighbor==-2) neighborList[13*Np+idx]=-1; + else if (neighbor<0) neighborList[13*Np+idx]=idx; + else neighborList[13*Np+idx]=neighbor; + + neighbor=Map(i,j+1,k+1); + if (neighbor==-2) neighborList[14*Np+idx]=-1; + else if (neighbor<0) neighborList[14*Np+idx]=idx; + else neighborList[14*Np+idx]=neighbor; + + neighbor=Map(i,j-1,k-1); + if (neighbor==-2) neighborList[15*Np+idx]=-1; + else if (neighbor<0) neighborList[15*Np+idx]=idx; + else neighborList[15*Np+idx]=neighbor; + + neighbor=Map(i,j+1,k-1); + if (neighbor==-2) neighborList[16*Np+idx]=-1; + else if (neighbor<0) neighborList[16*Np+idx]=idx; + else neighborList[16*Np+idx]=neighbor; + + neighbor=Map(i,j-1,k+1); + if (neighbor==-2) neighborList[17*Np+idx]=-1; + else if (neighbor<0) neighborList[17*Np+idx]=idx; + else neighborList[17*Np+idx]=neighbor; + } + } + } + } + + //for (idx=0; idx Np ){ + ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n"); + } + /* + for (k=1;k Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); + else if (!(idx<0)){ + // store the idx associated with each neighbor + // store idx for self if neighbor is in solid or out of domain + //D3Q19 = {{1,0,0},{-1,0,0} + // {0,1,0},{0,-1,0} + // {0,0,1},{0,0,-1}, + // {1,1,0},{-1,-1,0}, + // {1,-1,0},{-1,1,0}, + // {1,0,1},{-1,0,-1}, + // {1,0,-1},{-1,0,1}, + // {0,1,1},{0,-1,-1}, + // {0,1,-1},{0,-1,1}}; + int neighbor; // cycle through the neighbors of lattice site idx + neighbor=Map(i-1,j,k); + if (neighbor<0) neighborList[idx]=idx + 2*Np; + else neighborList[idx]=neighbor + 1*Np; + + neighbor=Map(i+1,j,k); + if (neighbor<0) neighborList[Np+idx] = idx + 1*Np; + else neighborList[Np+idx]= neighbor + 2*Np; + + neighbor=Map(i,j-1,k); + if (neighbor<0) neighborList[2*Np+idx]=idx + 4*Np; + else neighborList[2*Np+idx]=neighbor + 3*Np; + + neighbor=Map(i,j+1,k); + if (neighbor<0) neighborList[3*Np+idx]=idx + 3*Np; + else neighborList[3*Np+idx]=neighbor + 4*Np; + + neighbor=Map(i,j,k-1); + if (neighbor<0) neighborList[4*Np+idx]=idx + 6*Np; + else neighborList[4*Np+idx]=neighbor + 5*Np; + + neighbor=Map(i,j,k+1); + if (neighbor<0) neighborList[5*Np+idx]=idx + 5*Np; + else neighborList[5*Np+idx]=neighbor + 6*Np; + + neighbor=Map(i-1,j-1,k); + if (neighbor<0) neighborList[6*Np+idx]=idx + 8*Np; + else neighborList[6*Np+idx]=neighbor + 7*Np; + + neighbor=Map(i+1,j+1,k); + if (neighbor<0) neighborList[7*Np+idx]=idx + 7*Np; + else neighborList[7*Np+idx]=neighbor+8*Np; + + neighbor=Map(i-1,j+1,k); + if (neighbor<0) neighborList[8*Np+idx]=idx + 10*Np; + else neighborList[8*Np+idx]=neighbor + 9*Np; + + neighbor=Map(i+1,j-1,k); + if (neighbor<0) neighborList[9*Np+idx]=idx + 9*Np; + else neighborList[9*Np+idx]=neighbor + 10*Np; + + neighbor=Map(i-1,j,k-1); + if (neighbor<0) neighborList[10*Np+idx]=idx + 12*Np; + else neighborList[10*Np+idx]=neighbor + 11*Np; + + neighbor=Map(i+1,j,k+1); + if (neighbor<0) neighborList[11*Np+idx]=idx + 11*Np; + else neighborList[11*Np+idx]=neighbor + 12*Np; + + neighbor=Map(i-1,j,k+1); + if (neighbor<0) neighborList[12*Np+idx]=idx + 14*Np; + else neighborList[12*Np+idx]=neighbor + 13*Np; + + neighbor=Map(i+1,j,k-1); + if (neighbor<0) neighborList[13*Np+idx]=idx + 13*Np; + else neighborList[13*Np+idx]=neighbor + 14*Np; + + neighbor=Map(i,j-1,k-1); + if (neighbor<0) neighborList[14*Np+idx]=idx + 16*Np; + else neighborList[14*Np+idx]=neighbor + 15*Np; + + neighbor=Map(i,j+1,k+1); + if (neighbor<0) neighborList[15*Np+idx]=idx + 15*Np; + else neighborList[15*Np+idx]=neighbor + 16*Np; + + neighbor=Map(i,j-1,k+1); + if (neighbor<0) neighborList[16*Np+idx]=idx + 18*Np; + else neighborList[16*Np+idx]=neighbor + 17*Np; + + neighbor=Map(i,j+1,k-1); + if (neighbor<0) neighborList[17*Np+idx]=idx + 17*Np; + else neighborList[17*Np+idx]=neighbor + 18*Np; + } + } + } + } + + //for (idx=0; idx Np ){ - ERROR("ScaLBL_Communicator::MemoryDenseLayout: Failed to create memory efficient layout!\n"); - } - - // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryDenseLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); - else if (!(idx<0)){ - // store the idx associated with each neighbor - // store idx for self if neighbor is in solid or out of domain - //D3Q19 = {{1,0,0},{-1,0,0} - // {0,1,0},{0,-1,0} - // {0,0,1},{0,0,-1}, - // {1,1,0},{-1,-1,0}, - // {1,-1,0},{-1,1,0}, - // {1,0,1},{-1,0,-1}, - // {1,0,-1},{-1,0,1}, - // {0,1,1},{0,-1,-1}, - // {0,1,-1},{0,-1,1}}; - // note that only odd distributions need to be stored to execute the swap algorithm - int neighbor; // cycle through the neighbors of lattice site idx - neighbor=Map(i+1,j,k); - if (neighbor==-2) neighborList[idx]=-1; - else if (neighbor<0) neighborList[idx]=idx; - else neighborList[idx]=neighbor; - - neighbor=Map(i,j+1,k); - if (neighbor==-2) neighborList[Np+idx]=-1; - else if (neighbor<0) neighborList[Np+idx]=idx; - else neighborList[Np+idx]=neighbor; - - neighbor=Map(i,j,k+1); - if (neighbor==-2) neighborList[2*Np+idx]=-1; - else if (neighbor<0) neighborList[2*Np+idx]=idx; - else neighborList[2*Np+idx]=neighbor; - - neighbor=Map(i+1,j+1,k); - if (neighbor==-2) neighborList[3*Np+idx]=-1; - else if (neighbor<0) neighborList[3*Np+idx]=idx; - else neighborList[3*Np+idx]=neighbor; - - neighbor=Map(i+1,j-1,k); - if (neighbor==-2) neighborList[4*Np+idx]=-1; - else if (neighbor<0) neighborList[4*Np+idx]=idx; - else neighborList[4*Np+idx]=neighbor; - - neighbor=Map(i+1,j,k+1); - if (neighbor==-2) neighborList[5*Np+idx]=-1; - else if (neighbor<0) neighborList[5*Np+idx]=idx; - else neighborList[5*Np+idx]=neighbor; - - neighbor=Map(i+1,j,k-1); - if (neighbor==-2) neighborList[6*Np+idx]=-1; - else if (neighbor<0) neighborList[6*Np+idx]=idx; - else neighborList[6*Np+idx]=neighbor; - - neighbor=Map(i,j+1,k+1); - if (neighbor==-2) neighborList[7*Np+idx]=-1; - else if (neighbor<0) neighborList[7*Np+idx]=idx; - else neighborList[7*Np+idx]=neighbor; - - neighbor=Map(i,j+1,k-1); - if (neighbor==-2) neighborList[8*Np+idx]=-1; - else if (neighbor<0) neighborList[8*Np+idx]=idx; - else neighborList[8*Np+idx]=neighbor; - } - } - } - } - - //for (idx=0; idx Np ){ - ERROR("ScaLBL_Communicator::MemoryDenseLayoutFull: Failed to create memory efficient layout!\n"); - } - - // if (rank == 0) { - // printf("* Displaying the final map from rank %d\n",rank); - // - // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryDenseLayoutFull: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); - else if (!(idx<0)){ - // store the idx associated with each neighbor - // store idx for self if neighbor is in solid or out of domain - //D3Q19 = {{1,0,0},{-1,0,0} - // {0,1,0},{0,-1,0} - // {0,0,1},{0,0,-1}, - // {1,1,0},{-1,-1,0}, - // {1,-1,0},{-1,1,0}, - // {1,0,1},{-1,0,-1}, - // {1,0,-1},{-1,0,1}, - // {0,1,1},{0,-1,-1}, - // {0,1,-1},{0,-1,1}}; - - - /* - * Storing the full neighbor list. The AA algorithm may require fewer neighbors but I'm saving everything for now... - * - */ - - - int neighbor; // cycle through the neighbors of lattice site idx - neighbor=Map(i+1,j,k); - if (neighbor==-2) neighborList[idx]=-1; - else if (neighbor<0) neighborList[idx]=idx; - else neighborList[idx]=neighbor; - - // 2 - neighbor=Map(i-1,j,k); - if (neighbor==-2) neighborList[Np+idx]=-1; - else if (neighbor<0) neighborList[Np+idx]=idx; - else neighborList[Np+idx]=neighbor; - - neighbor=Map(i,j+1,k); - if (neighbor==-2) neighborList[2*Np+idx]=-1; - else if (neighbor<0) neighborList[2*Np+idx]=idx; - else neighborList[2*Np+idx]=neighbor; - - // 4 - neighbor=Map(i,j-1,k); - if (neighbor==-2) neighborList[3*Np+idx]=-1; - else if (neighbor<0) neighborList[3*Np+idx]=idx; - else neighborList[3*Np+idx]=neighbor; - - - neighbor=Map(i,j,k+1); - if (neighbor==-2) neighborList[4*Np+idx]=-1; - else if (neighbor<0) neighborList[4*Np+idx]=idx; - else neighborList[4*Np+idx]=neighbor; - - // 6 - neighbor=Map(i,j,k-1); - if (neighbor==-2) neighborList[5*Np+idx]=-1; - else if (neighbor<0) neighborList[5*Np+idx]=idx; - else neighborList[5*Np+idx]=neighbor; - - neighbor=Map(i+1,j+1,k); - if (neighbor==-2) neighborList[6*Np+idx]=-1; - else if (neighbor<0) neighborList[6*Np+idx]=idx; - else neighborList[6*Np+idx]=neighbor; - - // 8 - neighbor=Map(i-1,j-1,k); - if (neighbor==-2) neighborList[7*Np+idx]=-1; - else if (neighbor<0) neighborList[7*Np+idx]=idx; - else neighborList[7*Np+idx]=neighbor; - - - neighbor=Map(i+1,j-1,k); - if (neighbor==-2) neighborList[8*Np+idx]=-1; - else if (neighbor<0) neighborList[8*Np+idx]=idx; - else neighborList[8*Np+idx]=neighbor; - - // 10 - neighbor=Map(i-1,j+1,k); - if (neighbor==-2) neighborList[9*Np+idx]=-1; - else if (neighbor<0) neighborList[9*Np+idx]=idx; - else neighborList[9*Np+idx]=neighbor; - - - neighbor=Map(i+1,j,k+1); - if (neighbor==-2) neighborList[10*Np+idx]=-1; - else if (neighbor<0) neighborList[10*Np+idx]=idx; - else neighborList[10*Np+idx]=neighbor; - - // 12 - neighbor=Map(i-1,j,k-1); - if (neighbor==-2) neighborList[11*Np+idx]=-1; - else if (neighbor<0) neighborList[11*Np+idx]=idx; - else neighborList[11*Np+idx]=neighbor; - - - neighbor=Map(i+1,j,k-1); - if (neighbor==-2) neighborList[12*Np+idx]=-1; - else if (neighbor<0) neighborList[12*Np+idx]=idx; - else neighborList[12*Np+idx]=neighbor; - - // 14 - neighbor=Map(i-1,j,k+1); - if (neighbor==-2) neighborList[13*Np+idx]=-1; - else if (neighbor<0) neighborList[13*Np+idx]=idx; - else neighborList[13*Np+idx]=neighbor; - - neighbor=Map(i,j+1,k+1); - if (neighbor==-2) neighborList[14*Np+idx]=-1; - else if (neighbor<0) neighborList[14*Np+idx]=idx; - else neighborList[14*Np+idx]=neighbor; - - // 16 - neighbor=Map(i,j-1,k-1); - if (neighbor==-2) neighborList[15*Np+idx]=-1; - else if (neighbor<0) neighborList[15*Np+idx]=idx; - else neighborList[15*Np+idx]=neighbor; - - neighbor=Map(i,j+1,k-1); - if (neighbor==-2) neighborList[16*Np+idx]=-1; - else if (neighbor<0) neighborList[16*Np+idx]=idx; - else neighborList[16*Np+idx]=neighbor; - - // 18 - neighbor=Map(i,j-1,k+1); - if (neighbor==-2) neighborList[17*Np+idx]=-1; - else if (neighbor<0) neighborList[17*Np+idx]=idx; - else neighborList[17*Np+idx]=neighbor; - - - } - } - } - } - - //....................................................................... - // Now map through SendList and RecvList to update indices - // First loop over the send lists - - int *TempBuffer; - TempBuffer = new int [5*RecvCount]; - - //....................................................................... - // Re-index the send lists - ScaLBL_CopyToHost(TempBuffer,dvcSendList_x,sendCount_x*sizeof(int)); - - for (i=0; i Np ){ - ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n"); - } - - // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); - else if (!(idx<0)){ - // store the idx associated with each neighbor - // store idx for self if neighbor is in solid or out of domain - //D3Q19 = {{1,0,0},{-1,0,0} - // {0,1,0},{0,-1,0} - // {0,0,1},{0,0,-1}, - // {1,1,0},{-1,-1,0}, - // {1,-1,0},{-1,1,0}, - // {1,0,1},{-1,0,-1}, - // {1,0,-1},{-1,0,1}, - // {0,1,1},{0,-1,-1}, - // {0,1,-1},{0,-1,1}}; - // note that only odd distributions need to be stored to execute the swap algorithm - int neighbor; // cycle through the neighbors of lattice site idx - neighbor=Map(i+1,j,k); - if (neighbor==-2) neighborList[idx]=-1; - else if (neighbor<0) neighborList[idx]=idx; - else neighborList[idx]=neighbor; - - neighbor=Map(i,j+1,k); - if (neighbor==-2) neighborList[Np+idx]=-1; - else if (neighbor<0) neighborList[Np+idx]=idx; - else neighborList[Np+idx]=neighbor; - - neighbor=Map(i,j,k+1); - if (neighbor==-2) neighborList[2*Np+idx]=-1; - else if (neighbor<0) neighborList[2*Np+idx]=idx; - else neighborList[2*Np+idx]=neighbor; - - neighbor=Map(i+1,j+1,k); - if (neighbor==-2) neighborList[3*Np+idx]=-1; - else if (neighbor<0) neighborList[3*Np+idx]=idx; - else neighborList[3*Np+idx]=neighbor; - - neighbor=Map(i+1,j-1,k); - if (neighbor==-2) neighborList[4*Np+idx]=-1; - else if (neighbor<0) neighborList[4*Np+idx]=idx; - else neighborList[4*Np+idx]=neighbor; - - neighbor=Map(i+1,j,k+1); - if (neighbor==-2) neighborList[5*Np+idx]=-1; - else if (neighbor<0) neighborList[5*Np+idx]=idx; - else neighborList[5*Np+idx]=neighbor; - - neighbor=Map(i+1,j,k-1); - if (neighbor==-2) neighborList[6*Np+idx]=-1; - else if (neighbor<0) neighborList[6*Np+idx]=idx; - else neighborList[6*Np+idx]=neighbor; - - neighbor=Map(i,j+1,k+1); - if (neighbor==-2) neighborList[7*Np+idx]=-1; - else if (neighbor<0) neighborList[7*Np+idx]=idx; - else neighborList[7*Np+idx]=neighbor; - - neighbor=Map(i,j+1,k-1); - if (neighbor==-2) neighborList[8*Np+idx]=-1; - else if (neighbor<0) neighborList[8*Np+idx]=idx; - else neighborList[8*Np+idx]=neighbor; - } - } - } - } - - //for (idx=0; idx Np ){ - ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n"); - } - - // for (k=1;k Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); - else if (!(idx<0)){ - // store the idx associated with each neighbor - // store idx for self if neighbor is in solid or out of domain - //D3Q19 = {{1,0,0},{-1,0,0} - // {0,1,0},{0,-1,0} - // {0,0,1},{0,0,-1}, - // {1,1,0},{-1,-1,0}, - // {1,-1,0},{-1,1,0}, - // {1,0,1},{-1,0,-1}, - // {1,0,-1},{-1,0,1}, - // {0,1,1},{0,-1,-1}, - // {0,1,-1},{0,-1,1}}; - // note that only odd distributions need to be stored to execute the swap algorithm - int neighbor; // cycle through the neighbors of lattice site idx - neighbor=Map(i+1,j,k); - if (neighbor==-2) neighborList[idx]=-1; - else if (neighbor<0) neighborList[idx]=idx; - else neighborList[idx]=neighbor; - - neighbor=Map(i-1,j,k); - if (neighbor==-2) neighborList[Np+idx]=-1; - else if (neighbor<0) neighborList[Np+idx]=idx; - else neighborList[Np+idx]=neighbor; - - neighbor=Map(i,j+1,k); - if (neighbor==-2) neighborList[2*Np+idx]=-1; - else if (neighbor<0) neighborList[2*Np+idx]=idx; - else neighborList[2*Np+idx]=neighbor; - - neighbor=Map(i,j-1,k); - if (neighbor==-2) neighborList[3*Np+idx]=-1; - else if (neighbor<0) neighborList[3*Np+idx]=idx; - else neighborList[3*Np+idx]=neighbor; - - - neighbor=Map(i,j,k+1); - if (neighbor==-2) neighborList[4*Np+idx]=-1; - else if (neighbor<0) neighborList[4*Np+idx]=idx; - else neighborList[4*Np+idx]=neighbor; - - neighbor=Map(i,j,k-1); - if (neighbor==-2) neighborList[5*Np+idx]=-1; - else if (neighbor<0) neighborList[5*Np+idx]=idx; - else neighborList[5*Np+idx]=neighbor; - - neighbor=Map(i+1,j+1,k); - if (neighbor==-2) neighborList[6*Np+idx]=-1; - else if (neighbor<0) neighborList[6*Np+idx]=idx; - else neighborList[6*Np+idx]=neighbor; - - neighbor=Map(i-1,j-1,k); - if (neighbor==-2) neighborList[7*Np+idx]=-1; - else if (neighbor<0) neighborList[7*Np+idx]=idx; - else neighborList[7*Np+idx]=neighbor; - - - neighbor=Map(i+1,j-1,k); - if (neighbor==-2) neighborList[8*Np+idx]=-1; - else if (neighbor<0) neighborList[8*Np+idx]=idx; - else neighborList[8*Np+idx]=neighbor; - - neighbor=Map(i-1,j+1,k); - if (neighbor==-2) neighborList[9*Np+idx]=-1; - else if (neighbor<0) neighborList[9*Np+idx]=idx; - else neighborList[9*Np+idx]=neighbor; - - - neighbor=Map(i+1,j,k+1); - if (neighbor==-2) neighborList[10*Np+idx]=-1; - else if (neighbor<0) neighborList[10*Np+idx]=idx; - else neighborList[10*Np+idx]=neighbor; - - neighbor=Map(i-1,j,k-1); - if (neighbor==-2) neighborList[11*Np+idx]=-1; - else if (neighbor<0) neighborList[11*Np+idx]=idx; - else neighborList[11*Np+idx]=neighbor; - - - neighbor=Map(i+1,j,k-1); - if (neighbor==-2) neighborList[12*Np+idx]=-1; - else if (neighbor<0) neighborList[12*Np+idx]=idx; - else neighborList[12*Np+idx]=neighbor; - - neighbor=Map(i-1,j,k+1); - if (neighbor==-2) neighborList[13*Np+idx]=-1; - else if (neighbor<0) neighborList[13*Np+idx]=idx; - else neighborList[13*Np+idx]=neighbor; - - neighbor=Map(i,j+1,k+1); - if (neighbor==-2) neighborList[14*Np+idx]=-1; - else if (neighbor<0) neighborList[14*Np+idx]=idx; - else neighborList[14*Np+idx]=neighbor; - - neighbor=Map(i,j-1,k-1); - if (neighbor==-2) neighborList[15*Np+idx]=-1; - else if (neighbor<0) neighborList[15*Np+idx]=idx; - else neighborList[15*Np+idx]=neighbor; - - neighbor=Map(i,j+1,k-1); - if (neighbor==-2) neighborList[16*Np+idx]=-1; - else if (neighbor<0) neighborList[16*Np+idx]=idx; - else neighborList[16*Np+idx]=neighbor; - - neighbor=Map(i,j-1,k+1); - if (neighbor==-2) neighborList[17*Np+idx]=-1; - else if (neighbor<0) neighborList[17*Np+idx]=idx; - else neighborList[17*Np+idx]=neighbor; - } - } - } - } - - //for (idx=0; idx Np ){ - ERROR("ScaLBL_Communicator::MemoryOptimizedLayout: Failed to create memory efficient layout!\n"); - } - /* - for (k=1;k Np) printf("ScaLBL_Communicator::MemoryOptimizedLayout: Map(%i,%i,%i) = %i > %i \n",i,j,k,Map(i,j,k),Np); - else if (!(idx<0)){ - // store the idx associated with each neighbor - // store idx for self if neighbor is in solid or out of domain - //D3Q19 = {{1,0,0},{-1,0,0} - // {0,1,0},{0,-1,0} - // {0,0,1},{0,0,-1}, - // {1,1,0},{-1,-1,0}, - // {1,-1,0},{-1,1,0}, - // {1,0,1},{-1,0,-1}, - // {1,0,-1},{-1,0,1}, - // {0,1,1},{0,-1,-1}, - // {0,1,-1},{0,-1,1}}; - int neighbor; // cycle through the neighbors of lattice site idx - neighbor=Map(i-1,j,k); - if (neighbor<0) neighborList[idx]=idx + 2*Np; - else neighborList[idx]=neighbor + 1*Np; - - neighbor=Map(i+1,j,k); - if (neighbor<0) neighborList[Np+idx] = idx + 1*Np; - else neighborList[Np+idx]= neighbor + 2*Np; - - neighbor=Map(i,j-1,k); - if (neighbor<0) neighborList[2*Np+idx]=idx + 4*Np; - else neighborList[2*Np+idx]=neighbor + 3*Np; - - neighbor=Map(i,j+1,k); - if (neighbor<0) neighborList[3*Np+idx]=idx + 3*Np; - else neighborList[3*Np+idx]=neighbor + 4*Np; - - neighbor=Map(i,j,k-1); - if (neighbor<0) neighborList[4*Np+idx]=idx + 6*Np; - else neighborList[4*Np+idx]=neighbor + 5*Np; - - neighbor=Map(i,j,k+1); - if (neighbor<0) neighborList[5*Np+idx]=idx + 5*Np; - else neighborList[5*Np+idx]=neighbor + 6*Np; - - neighbor=Map(i-1,j-1,k); - if (neighbor<0) neighborList[6*Np+idx]=idx + 8*Np; - else neighborList[6*Np+idx]=neighbor + 7*Np; - - neighbor=Map(i+1,j+1,k); - if (neighbor<0) neighborList[7*Np+idx]=idx + 7*Np; - else neighborList[7*Np+idx]=neighbor+8*Np; - - neighbor=Map(i-1,j+1,k); - if (neighbor<0) neighborList[8*Np+idx]=idx + 10*Np; - else neighborList[8*Np+idx]=neighbor + 9*Np; - - neighbor=Map(i+1,j-1,k); - if (neighbor<0) neighborList[9*Np+idx]=idx + 9*Np; - else neighborList[9*Np+idx]=neighbor + 10*Np; - - neighbor=Map(i-1,j,k-1); - if (neighbor<0) neighborList[10*Np+idx]=idx + 12*Np; - else neighborList[10*Np+idx]=neighbor + 11*Np; - - neighbor=Map(i+1,j,k+1); - if (neighbor<0) neighborList[11*Np+idx]=idx + 11*Np; - else neighborList[11*Np+idx]=neighbor + 12*Np; - - neighbor=Map(i-1,j,k+1); - if (neighbor<0) neighborList[12*Np+idx]=idx + 14*Np; - else neighborList[12*Np+idx]=neighbor + 13*Np; - - neighbor=Map(i+1,j,k-1); - if (neighbor<0) neighborList[13*Np+idx]=idx + 13*Np; - else neighborList[13*Np+idx]=neighbor + 14*Np; - - neighbor=Map(i,j-1,k-1); - if (neighbor<0) neighborList[14*Np+idx]=idx + 16*Np; - else neighborList[14*Np+idx]=neighbor + 15*Np; - - neighbor=Map(i,j+1,k+1); - if (neighbor<0) neighborList[15*Np+idx]=idx + 15*Np; - else neighborList[15*Np+idx]=neighbor + 16*Np; - - neighbor=Map(i,j-1,k+1); - if (neighbor<0) neighborList[16*Np+idx]=idx + 18*Np; - else neighborList[16*Np+idx]=neighbor + 17*Np; - - neighbor=Map(i,j+1,k-1); - if (neighbor<0) neighborList[17*Np+idx]=idx + 17*Np; - else neighborList[17*Np+idx]=neighbor + 18*Np; - } - } - } - } - - //for (idx=0; idx 0 ) { - // Set the affinity - int N_procs = ThreadPool::getNumberOfProcessors(); - std::vector procs(N_procs); - for (int i=0; i fillData(Dm.Comm,Dm.rank_info,Nx-2,Ny-2,Nz-2,1,1,1,0,1); - std::vector meshData(1); - meshData[0].meshName = "domain"; - meshData[0].mesh = std::shared_ptr( new IO::DomainMesh(Dm.rank_info,Nx-2,Ny-2,Nz-2,Lx,Ly,Lz) ); - std::shared_ptr PhaseVar( new IO::Variable() ); - std::shared_ptr PressVar( new IO::Variable() ); - std::shared_ptr SignDistVar( new IO::Variable() ); - std::shared_ptr BlobIDVar( new IO::Variable() ); - PhaseVar->name = "phase"; - PhaseVar->type = IO::VariableType::VolumeVariable; - PhaseVar->dim = 1; - PhaseVar->data.resize(Nx-2,Ny-2,Nz-2); - meshData[0].vars.push_back(PhaseVar); - PressVar->name = "Pressure"; - PressVar->type = IO::VariableType::VolumeVariable; - PressVar->dim = 1; - PressVar->data.resize(Nx-2,Ny-2,Nz-2); - meshData[0].vars.push_back(PressVar); - SignDistVar->name = "SignDist"; - SignDistVar->type = IO::VariableType::VolumeVariable; - SignDistVar->dim = 1; - SignDistVar->data.resize(Nx-2,Ny-2,Nz-2); - meshData[0].vars.push_back(SignDistVar); - BlobIDVar->name = "BlobID"; - BlobIDVar->type = IO::VariableType::VolumeVariable; - BlobIDVar->dim = 1; - BlobIDVar->data.resize(Nx-2,Ny-2,Nz-2); - meshData[0].vars.push_back(BlobIDVar); //************ MAIN ITERATION LOOP ***************************************/ PROFILE_START("Loop"); - BlobIDstruct last_ids, last_index; - BlobIDList last_id_map; - writeIDMap(ID_map_struct(),0,id_map_filename); - AnalysisWaitIdStruct work_ids; + runAnalysis analysis( N_threads, RESTART_INTERVAL,ANALYSIS_INTERVAL,BLOBID_INTERVAL, + rank_info, ScaLBL_Comm, Dm, Np, Nx, Ny, Nz, Lx, Ly, Lz, pBC, beta, err, Map, LocalRestartFile ); + analysis.setAffinities( "none" ); while (timestep < timestepMax && err > tol ) { //if ( rank==0 ) { printf("Running timestep %i (%i MB)\n",timestep+1,(int)(Utilities::getMemoryUsage()/1048576)); } PROFILE_START("Update"); @@ -651,15 +611,13 @@ int main(int argc, char **argv) PROFILE_STOP("Update"); // Run the analysis - run_analysis(timestep,RESTART_INTERVAL,ANALYSIS_INTERVAL,BLOBID_INTERVAL,rank_info,ScaLBL_Comm,*Averages,last_ids,last_index,last_id_map, - Np,Nx,Ny,Nz,pBC,beta,err,Phi,Pressure,Velocity,Map,fq,Den, - LocalRestartFile,meshData,fillData,tpool,work_ids); + analysis.run( timestep, *Averages, Phi, Pressure, Velocity, fq, Den ); // Save the timers if ( timestep%50==0 ) PROFILE_SAVE("lbpm_color_simulator",1); } - tpool.wait_pool_finished(); + analysis.finish(); PROFILE_STOP("Loop"); //************************************************************************ ScaLBL_DeviceBarrier();