mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Merge pull request #5430 from atgeirr/fix-damaris-logging-parallel
Add and use DamarisOutput::handleError() helper.
This commit is contained in:
commit
46523e7cd9
@ -38,50 +38,27 @@
|
|||||||
|
|
||||||
namespace Opm::DamarisOutput {
|
namespace Opm::DamarisOutput {
|
||||||
|
|
||||||
int setPosition(const char* field, int rank, int64_t pos)
|
int setPosition(const char* field, int64_t pos)
|
||||||
{
|
{
|
||||||
int dam_err = damaris_set_position(field, &pos);
|
int dam_err = damaris_set_position(field, &pos);
|
||||||
if (dam_err != DAMARIS_OK) {
|
|
||||||
OpmLog::warning(fmt::format("damariswriter::setPosition() : ( rank:{}) "
|
|
||||||
"damaris_set_position({}, ...), Damaris Error: {} ",
|
|
||||||
rank, field, damaris_error_string(dam_err)));
|
|
||||||
}
|
|
||||||
|
|
||||||
return dam_err;
|
return dam_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
int setParameter(const char* field, int rank, int value)
|
int setParameter(const char* field, int value)
|
||||||
{
|
{
|
||||||
int dam_err = damaris_parameter_set(field, &value, sizeof(int));
|
int dam_err = damaris_parameter_set(field, &value, sizeof(int));
|
||||||
if (dam_err != DAMARIS_OK) {
|
|
||||||
OpmLog::warning(fmt::format("damariswriter::setParameter() (rank:{}) "
|
|
||||||
"damaris_parameter_set(\"{}\",...)", rank, field));
|
|
||||||
}
|
|
||||||
|
|
||||||
return dam_err;
|
return dam_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
int write(const char* field, int rank, const void* data)
|
int write(const char* field, const void* data)
|
||||||
{
|
{
|
||||||
int dam_err = damaris_write(field, data);
|
int dam_err = damaris_write(field, data);
|
||||||
if (dam_err != DAMARIS_OK) {
|
|
||||||
OpmLog::warning(fmt::format("damariswriter::write() : ( rank:{}) "
|
|
||||||
"damaris_write({}, ...), Damaris Error: {} ",
|
|
||||||
rank, field, damaris_error_string(dam_err)));
|
|
||||||
}
|
|
||||||
|
|
||||||
return dam_err;
|
return dam_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
int endIteration(int rank)
|
int endIteration()
|
||||||
{
|
{
|
||||||
int dam_err = damaris_end_iteration();
|
int dam_err = damaris_end_iteration();
|
||||||
if (dam_err != DAMARIS_OK) {
|
|
||||||
OpmLog::warning(fmt::format("damariswriter::endIteration() : ( rank:{}) "
|
|
||||||
"damaris_end_iteration(), Damaris Error: {} ",
|
|
||||||
rank, damaris_error_string(dam_err)));
|
|
||||||
}
|
|
||||||
|
|
||||||
return dam_err;
|
return dam_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,22 +94,48 @@ int setupWritingPars(Parallel::Communication comm,
|
|||||||
// Set the paramater so that the Damaris servers can allocate the correct amount of memory for the variabe
|
// Set the paramater so that the Damaris servers can allocate the correct amount of memory for the variabe
|
||||||
// Damaris parameters only support int data types. This will limit models to be under size of 2^32-1 elements
|
// Damaris parameters only support int data types. This will limit models to be under size of 2^32-1 elements
|
||||||
// ToDo: Do we need to check that local ranks are 0 based ?
|
// ToDo: Do we need to check that local ranks are 0 based ?
|
||||||
int dam_err = setParameter("n_elements_local", comm.rank(), elements_rank_sizes[comm.rank()]);
|
int dam_err = setParameter("n_elements_local", elements_rank_sizes[comm.rank()]);
|
||||||
// Damaris parameters only support int data types. This will limit models to be under size of 2^32-1 elements
|
// Damaris parameters only support int data types. This will limit models to be under size of 2^32-1 elements
|
||||||
// ToDo: Do we need to check that n_elements_global_max will fit in a C int type (INT_MAX)
|
// ToDo: Do we need to check that n_elements_global_max will fit in a C int type (INT_MAX)
|
||||||
if( n_elements_global_max <= std::numeric_limits<int>::max() ) {
|
if ( n_elements_global_max <= std::numeric_limits<int>::max() ) {
|
||||||
setParameter("n_elements_total", comm.rank(), n_elements_global_max);
|
setParameter("n_elements_total", n_elements_global_max);
|
||||||
} else {
|
} else {
|
||||||
OpmLog::error(fmt::format("( rank:{} ) The size of the global array ({}) is"
|
if (comm.rank() == 0) {
|
||||||
"greater than what a Damaris paramater type supports ({}). ",
|
OpmLog::error(fmt::format("The size of the global array ({}) is"
|
||||||
comm.rank(), n_elements_global_max, std::numeric_limits<int>::max() ));
|
"greater than what a Damaris paramater type supports ({}). ",
|
||||||
// assert( n_elements_global_max <= std::numeric_limits<int>::max() ) ;
|
n_elements_global_max, std::numeric_limits<int>::max() ));
|
||||||
|
}
|
||||||
OPM_THROW(std::runtime_error, "setupDamarisWritingPars() n_elements_global_max "
|
OPM_THROW(std::runtime_error, "setupDamarisWritingPars() n_elements_global_max "
|
||||||
"> std::numeric_limits<int>::max() " + std::to_string(dam_err));
|
"> std::numeric_limits<int>::max() " + std::to_string(dam_err));
|
||||||
}
|
}
|
||||||
|
|
||||||
return dam_err;
|
return dam_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
handleError(const int dam_err, Parallel::Communication comm, const std::string& message)
|
||||||
|
{
|
||||||
|
// Find if some rank has encountered an error.
|
||||||
|
const int isOk = (dam_err == DAMARIS_OK);
|
||||||
|
const bool error = (comm.sum(isOk) != comm.size());
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
// Form error message on ranks that had error, and put it into a DeferredLogger.
|
||||||
|
DeferredLogger logger;
|
||||||
|
if (dam_err != DAMARIS_OK) {
|
||||||
|
// Since the simulator will continue, this is a warning not an error
|
||||||
|
// from the OPM Flow point of view.
|
||||||
|
logger.warning("OPM_DAMARIS_ERROR",
|
||||||
|
fmt::format("Damaris error in {}, on rank {}, error string: {}",
|
||||||
|
message,
|
||||||
|
comm.rank(),
|
||||||
|
damaris_error_string(dam_err)));
|
||||||
|
}
|
||||||
|
DeferredLogger global = gatherDeferredLogger(logger, comm);
|
||||||
|
if (comm.rank() == 0) {
|
||||||
|
global.logMessages();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -61,13 +61,14 @@ namespace Opm {
|
|||||||
|
|
||||||
namespace DamarisOutput {
|
namespace DamarisOutput {
|
||||||
|
|
||||||
int endIteration(int rank);
|
int endIteration();
|
||||||
int setParameter(const char* field, int rank, int value);
|
int setParameter(const char* field, int value);
|
||||||
int setPosition(const char* field, int rank, int64_t pos);
|
int setPosition(const char* field, int64_t pos);
|
||||||
int write(const char* field, int rank, const void* data);
|
int write(const char* field, const void* data);
|
||||||
int setupWritingPars(Parallel::Communication comm,
|
int setupWritingPars(Parallel::Communication comm,
|
||||||
const int n_elements_local_grid,
|
const int n_elements_local_grid,
|
||||||
std::vector<unsigned long long>& elements_rank_offsets);
|
std::vector<unsigned long long>& elements_rank_offsets);
|
||||||
|
void handleError(const int dam_err, Parallel::Communication comm, const std::string& message);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@ -229,12 +230,13 @@ public:
|
|||||||
{
|
{
|
||||||
OPM_TIMEBLOCK(writeOutput);
|
OPM_TIMEBLOCK(writeOutput);
|
||||||
const int reportStepNum = simulator_.episodeIndex() + 1;
|
const int reportStepNum = simulator_.episodeIndex() + 1;
|
||||||
|
const auto& cc = simulator_.vanguard().grid().comm();
|
||||||
|
|
||||||
// added this as localCellData was not being written
|
// added this as localCellData was not being written
|
||||||
if (!isSubStep)
|
if (!isSubStep)
|
||||||
this->damarisOutputModule_->invalidateLocalData() ;
|
this->damarisOutputModule_->invalidateLocalData() ;
|
||||||
this->prepareLocalCellData(isSubStep, reportStepNum);
|
this->prepareLocalCellData(isSubStep, reportStepNum);
|
||||||
this->damarisOutputModule_->outputErrorLog(simulator_.gridView().comm());
|
this->damarisOutputModule_->outputErrorLog(cc);
|
||||||
|
|
||||||
// The damarisWriter is not outputing well or aquifer data (yet)
|
// The damarisWriter is not outputing well or aquifer data (yet)
|
||||||
auto localWellData = simulator_.problem().wellModel().wellData(); // data::Well
|
auto localWellData = simulator_.problem().wellModel().wellData(); // data::Well
|
||||||
@ -254,8 +256,7 @@ public:
|
|||||||
// which define sizes of the Damaris variables, per-rank and globally (over all ranks).
|
// which define sizes of the Damaris variables, per-rank and globally (over all ranks).
|
||||||
// Also sets the offsets to where a ranks array data sits within the global array.
|
// Also sets the offsets to where a ranks array data sits within the global array.
|
||||||
// This is usefull for HDF5 output and for defining distributed arrays in Dask.
|
// This is usefull for HDF5 output and for defining distributed arrays in Dask.
|
||||||
dam_err_ = DamarisOutput::setupWritingPars(simulator_.vanguard().grid().comm(),
|
dam_err_ = DamarisOutput::setupWritingPars(cc, numElements_, elements_rank_offsets_);
|
||||||
numElements_, elements_rank_offsets_);
|
|
||||||
|
|
||||||
// sets positions and data for non-time-varying variables MPI_RANK and GLOBAL_CELL_INDEX
|
// sets positions and data for non-time-varying variables MPI_RANK and GLOBAL_CELL_INDEX
|
||||||
this->setGlobalIndexForDamaris() ;
|
this->setGlobalIndexForDamaris() ;
|
||||||
@ -286,15 +287,13 @@ public:
|
|||||||
// Call damaris_set_position() for all available variables
|
// Call damaris_set_position() for all available variables
|
||||||
// There is an assumption that all variables are the same size, with the same offset.
|
// There is an assumption that all variables are the same size, with the same offset.
|
||||||
// see initDamarisTemplateXmlFile.cpp for the Damaris XML descriptions.
|
// see initDamarisTemplateXmlFile.cpp for the Damaris XML descriptions.
|
||||||
dam_err_ = DamarisOutput::setPosition(name.c_str(), rank_,
|
dam_err_ = DamarisOutput::setPosition(name.c_str(), this->elements_rank_offsets_[rank_]);
|
||||||
this->elements_rank_offsets_[rank_]);
|
|
||||||
|
|
||||||
// It does not seem I can test for what type of data is present (double or int)
|
// It does not seem I can test for what type of data is present (double or int)
|
||||||
// in the std::variant within the data::CellData, so I will use a try catch block.
|
// in the std::variant within the data::CellData, so I will use a try catch block.
|
||||||
try {
|
try {
|
||||||
if (dataCol.data<double>().size() >= static_cast<std::vector<double>::size_type>(this->numElements_)) {
|
if (dataCol.data<double>().size() >= static_cast<std::vector<double>::size_type>(this->numElements_)) {
|
||||||
dam_err_ = DamarisOutput::write(name.c_str(), rank_,
|
dam_err_ = DamarisOutput::write(name.c_str(), dataCol.data<double>().data()) ;
|
||||||
dataCol.data<double>().data()) ;
|
|
||||||
} else {
|
} else {
|
||||||
OpmLog::info(fmt::format("( rank:{}) The variable \"{}\" was found to be of a different size {} (not {}).", rank_, name, dataCol.data<double>().size(), this->numElements_ ));
|
OpmLog::info(fmt::format("( rank:{}) The variable \"{}\" was found to be of a different size {} (not {}).", rank_, name, dataCol.data<double>().size(), this->numElements_ ));
|
||||||
}
|
}
|
||||||
@ -302,8 +301,7 @@ public:
|
|||||||
catch (std::bad_variant_access const& ex) {
|
catch (std::bad_variant_access const& ex) {
|
||||||
// Not a std::vector<double>, must be a std::vector<int>
|
// Not a std::vector<double>, must be a std::vector<int>
|
||||||
if (dataCol.data<int>().size() >= static_cast<std::vector<int>::size_type>(this->numElements_)) {
|
if (dataCol.data<int>().size() >= static_cast<std::vector<int>::size_type>(this->numElements_)) {
|
||||||
dam_err_ = DamarisOutput::write(name.c_str(), rank_,
|
dam_err_ = DamarisOutput::write(name.c_str(), dataCol.data<int>().data()) ;
|
||||||
dataCol.data<int>().data()) ;
|
|
||||||
} else {
|
} else {
|
||||||
OpmLog::info(fmt::format("( rank:{}) The variable \"{}\" was found to be of a different size {} (not {}).", rank_, name, dataCol.data<int>().size(), this->numElements_ ));
|
OpmLog::info(fmt::format("( rank:{}) The variable \"{}\" was found to be of a different size {} (not {}).", rank_, name, dataCol.data<int>().size(), this->numElements_ ));
|
||||||
}
|
}
|
||||||
@ -311,6 +309,7 @@ public:
|
|||||||
++cell_data_written ;
|
++cell_data_written ;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
DamarisOutput::handleError(dam_err_, cc, "setPosition() and write() for available variables");
|
||||||
|
|
||||||
if (!cell_data_written) {
|
if (!cell_data_written) {
|
||||||
OpmLog::info(fmt::format("( rank:{}) No simulation data written to the Damaris server - check --damaris-limit-variables command line option (if used) has valid variable name(s) and that the Damaris XML file contains variable names that are available in your simulation.", rank_));
|
OpmLog::info(fmt::format("( rank:{}) No simulation data written to the Damaris server - check --damaris-limit-variables command line option (if used) has valid variable name(s) and that the Damaris XML file contains variable names that are available in your simulation.", rank_));
|
||||||
@ -329,12 +328,13 @@ public:
|
|||||||
std::cout << "Name of Damaris Block Varaiable : (" << rank_ << ") " << name << " part : " << part << " Value : " << dataCol << std::endl ;
|
std::cout << "Name of Damaris Block Varaiable : (" << rank_ << ") " << name << " part : " << part << " Value : " << dataCol << std::endl ;
|
||||||
}
|
}
|
||||||
|
|
||||||
dam_err_ = DamarisOutput::endIteration(rank_);
|
dam_err_ = DamarisOutput::endIteration();
|
||||||
*/
|
*/
|
||||||
if (this->damarisOutputModule_->getPRESSURE_ptr() != nullptr)
|
if (this->damarisOutputModule_->getPRESSURE_ptr() != nullptr)
|
||||||
{
|
{
|
||||||
dam_err_ = DamarisOutput::endIteration(rank_);
|
dam_err_ = DamarisOutput::endIteration();
|
||||||
}
|
}
|
||||||
|
DamarisOutput::handleError(dam_err_, cc, "endIteration()");
|
||||||
|
|
||||||
} // end of ! isSubstep
|
} // end of ! isSubstep
|
||||||
}
|
}
|
||||||
@ -358,10 +358,12 @@ private:
|
|||||||
|
|
||||||
void setGlobalIndexForDamaris ()
|
void setGlobalIndexForDamaris ()
|
||||||
{
|
{
|
||||||
|
const auto& cc = simulator_.vanguard().grid().comm();
|
||||||
// Use damaris_set_position to set the offset in the global size of the array.
|
// Use damaris_set_position to set the offset in the global size of the array.
|
||||||
// This is used so that output functionality (e.g. HDF5Store) knows the global offsets of
|
// This is used so that output functionality (e.g. HDF5Store) knows the global offsets of
|
||||||
// the data of the ranks data.
|
// the data of the ranks data.
|
||||||
dam_err_ = DamarisOutput::setPosition("GLOBAL_CELL_INDEX", rank_, elements_rank_offsets_[rank_]);
|
dam_err_ = DamarisOutput::setPosition("GLOBAL_CELL_INDEX", elements_rank_offsets_[rank_]);
|
||||||
|
DamarisOutput::handleError(dam_err_, cc, "setPosition() for GLOBAL_CELL_INDEX");
|
||||||
|
|
||||||
// This is an example of writing to the Damaris shared memory directly (i.e. we allocate the
|
// This is an example of writing to the Damaris shared memory directly (i.e. we allocate the
|
||||||
// variable directly in the shared memory region and do not use damaris_write() to copy data there.
|
// variable directly in the shared memory region and do not use damaris_write() to copy data there.
|
||||||
@ -375,13 +377,14 @@ private:
|
|||||||
if (this->collectOnIORank_.isParallel()) {
|
if (this->collectOnIORank_.isParallel()) {
|
||||||
const std::vector<int>& local_to_global =
|
const std::vector<int>& local_to_global =
|
||||||
this->collectOnIORank_.localIdxToGlobalIdxMapping();
|
this->collectOnIORank_.localIdxToGlobalIdxMapping();
|
||||||
dam_err_ = DamarisOutput::write("GLOBAL_CELL_INDEX", rank_, local_to_global.data());
|
dam_err_ = DamarisOutput::write("GLOBAL_CELL_INDEX", local_to_global.data());
|
||||||
} else {
|
} else {
|
||||||
std::vector<int> local_to_global_filled ;
|
std::vector<int> local_to_global_filled ;
|
||||||
local_to_global_filled.resize(this->numElements_) ;
|
local_to_global_filled.resize(this->numElements_) ;
|
||||||
std::iota(local_to_global_filled.begin(), local_to_global_filled.end(), 0);
|
std::iota(local_to_global_filled.begin(), local_to_global_filled.end(), 0);
|
||||||
dam_err_ = DamarisOutput::write("GLOBAL_CELL_INDEX", rank_, local_to_global_filled.data());
|
dam_err_ = DamarisOutput::write("GLOBAL_CELL_INDEX", local_to_global_filled.data());
|
||||||
}
|
}
|
||||||
|
DamarisOutput::handleError(dam_err_, cc, "write() for GLOBAL_CELL_INDEX");
|
||||||
|
|
||||||
mpi_rank_var.setDamarisParameterAndShmem( {this->numElements_ } ) ;
|
mpi_rank_var.setDamarisParameterAndShmem( {this->numElements_ } ) ;
|
||||||
// Fill the created memory area
|
// Fill the created memory area
|
||||||
@ -391,8 +394,10 @@ private:
|
|||||||
// Python code (as an example) can use the path as required.
|
// Python code (as an example) can use the path as required.
|
||||||
const auto& outputDir = simulator_.vanguard().eclState().cfg().io().getOutputDir();
|
const auto& outputDir = simulator_.vanguard().eclState().cfg().io().getOutputDir();
|
||||||
if (outputDir.size() > 0) {
|
if (outputDir.size() > 0) {
|
||||||
dam_err_ = DamarisOutput::setParameter("path_string_length", rank_, outputDir.size()) ;
|
dam_err_ = DamarisOutput::setParameter("path_string_length", outputDir.size()) ;
|
||||||
dam_err_ = DamarisOutput::write("OUTPUTDIR", rank_, outputDir.c_str());
|
DamarisOutput::handleError(dam_err_, cc, "setParameter() for path_string_length");
|
||||||
|
dam_err_ = DamarisOutput::write("OUTPUTDIR", outputDir.c_str());
|
||||||
|
DamarisOutput::handleError(dam_err_, cc, "write() for OUTPUTDIR");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -536,6 +541,7 @@ private:
|
|||||||
damarisOutputModule_->validateLocalData();
|
damarisOutputModule_->validateLocalData();
|
||||||
OPM_END_PARALLEL_TRY_CATCH("DamarisWriter::prepareLocalCellData() failed: ", simulator_.vanguard().grid().comm());
|
OPM_END_PARALLEL_TRY_CATCH("DamarisWriter::prepareLocalCellData() failed: ", simulator_.vanguard().grid().comm());
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Opm
|
} // namespace Opm
|
||||||
|
Loading…
Reference in New Issue
Block a user