Improve parallel logging in NLDD solver.

This commit is contained in:
Atgeirr Flø Rasmussen 2024-01-12 15:56:00 +01:00
parent b9b42a01cc
commit 6eb670b5c7
3 changed files with 65 additions and 67 deletions

View File

@ -52,6 +52,7 @@
#include <fmt/format.h> #include <fmt/format.h>
#include <algorithm> #include <algorithm>
#include <array>
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <cstddef> #include <cstddef>
@ -96,7 +97,7 @@ public:
//! \param param param Model parameters //! \param param param Model parameters
//! \param compNames Names of the solution components //! \param compNames Names of the solution components
BlackoilModelEbosNldd(BlackoilModelEbos<TypeTag>& model) BlackoilModelEbosNldd(BlackoilModelEbos<TypeTag>& model)
: model_(model) : model_(model), rank_(model_.ebosSimulator().vanguard().grid().comm().rank())
{ {
// Create partitions. // Create partitions.
const auto& [partition_vector, num_domains] = this->partitionCells(); const auto& [partition_vector, num_domains] = this->partitionCells();
@ -208,6 +209,7 @@ public:
const auto domain_order = this->getSubdomainOrder(); const auto domain_order = this->getSubdomainOrder();
// ----------- Solve each domain separately ----------- // ----------- Solve each domain separately -----------
DeferredLogger logger;
std::vector<SimulatorReportSingle> domain_reports(domains_.size()); std::vector<SimulatorReportSingle> domain_reports(domains_.size());
for (const int domain_index : domain_order) { for (const int domain_index : domain_order) {
const auto& domain = domains_[domain_index]; const auto& domain = domains_[domain_index];
@ -215,12 +217,12 @@ public:
try { try {
switch (model_.param().local_solve_approach_) { switch (model_.param().local_solve_approach_) {
case DomainSolveApproach::Jacobi: case DomainSolveApproach::Jacobi:
solveDomainJacobi(solution, locally_solved, local_report, solveDomainJacobi(solution, locally_solved, local_report, logger,
iteration, timer, domain); iteration, timer, domain);
break; break;
default: default:
case DomainSolveApproach::GaussSeidel: case DomainSolveApproach::GaussSeidel:
solveDomainGaussSeidel(solution, locally_solved, local_report, solveDomainGaussSeidel(solution, locally_solved, local_report, logger,
iteration, timer, domain); iteration, timer, domain);
break; break;
} }
@ -234,25 +236,37 @@ public:
// i == j, at old solution for i != j. // i == j, at old solution for i != j.
if (!local_report.converged) { if (!local_report.converged) {
// TODO: more proper treatment, including in parallel. // TODO: more proper treatment, including in parallel.
OpmLog::debug("Convergence failure in domain " + std::to_string(domain.index)); logger.debug(fmt::format("Convergence failure in domain {} on rank {}." , domain.index, rank_));
} }
domain_reports[domain.index] = local_report; domain_reports[domain.index] = local_report;
} }
// Communicate and log all messages.
auto global_logger = gatherDeferredLogger(logger, model_.ebosSimulator().vanguard().grid().comm());
global_logger.logMessages();
// Accumulate local solve data. // Accumulate local solve data.
int num_converged = 0; // Putting the counts in a single array to avoid multiple
int num_domains = domain_reports.size(); // comm.sum() calls. Keeping the named vars for readability.
std::array<int, 4> counts{ 0, 0, 0, static_cast<int>(domain_reports.size()) };
int& num_converged = counts[0];
int& num_converged_already = counts[1];
int& num_local_newtons = counts[2];
int& num_domains = counts[3];
{ {
SimulatorReportSingle rep; SimulatorReportSingle rep;
for (const auto& dr : domain_reports) { for (const auto& dr : domain_reports) {
if (dr.converged) { if (dr.converged) {
++num_converged; ++num_converged;
if (dr.total_newton_iterations == 0) {
++num_converged_already;
}
} }
rep += dr; rep += dr;
} }
num_local_newtons = rep.total_newton_iterations;
local_reports_accumulated_ += rep; local_reports_accumulated_ += rep;
} }
bool is_iorank = true;
if (model_.param().local_solve_approach_ == DomainSolveApproach::Jacobi) { if (model_.param().local_solve_approach_ == DomainSolveApproach::Jacobi) {
solution = locally_solved; solution = locally_solved;
@ -289,15 +303,14 @@ public:
model_.ebosSimulator().model().invalidateAndUpdateIntensiveQuantitiesOverlap(/*timeIdx=*/0); model_.ebosSimulator().model().invalidateAndUpdateIntensiveQuantitiesOverlap(/*timeIdx=*/0);
// Make total counts of domains converged. // Make total counts of domains converged.
num_converged = comm.sum(num_converged); comm.sum(counts.data(), counts.size());
num_domains = comm.sum(num_domains);
is_iorank = comm.rank() == 0;
} }
#endif // HAVE_MPI #endif // HAVE_MPI
const bool is_iorank = this->rank_ == 0;
if (is_iorank) { if (is_iorank) {
OpmLog::debug(fmt::format("Local solves finished. Converged for {}/{} domains.\n", OpmLog::debug(fmt::format("Local solves finished. Converged for {}/{} domains. {} domains did no work. {} total local Newton iterations.\n",
num_converged, num_domains)); num_converged, num_domains, num_converged_already, num_local_newtons));
} }
// Finish with a Newton step. // Finish with a Newton step.
@ -344,6 +357,7 @@ private:
std::pair<SimulatorReportSingle, ConvergenceReport> std::pair<SimulatorReportSingle, ConvergenceReport>
solveDomain(const Domain& domain, solveDomain(const Domain& domain,
const SimulatorTimerInterface& timer, const SimulatorTimerInterface& timer,
DeferredLogger& logger,
[[maybe_unused]] const int global_iteration, [[maybe_unused]] const int global_iteration,
const bool initial_assembly_required) const bool initial_assembly_required)
{ {
@ -376,7 +390,7 @@ private:
detailTimer.reset(); detailTimer.reset();
detailTimer.start(); detailTimer.start();
std::vector<double> resnorms; std::vector<double> resnorms;
auto convreport = this->getDomainConvergence(domain, timer, 0, resnorms); auto convreport = this->getDomainConvergence(domain, timer, 0, logger, resnorms);
if (convreport.converged()) { if (convreport.converged()) {
// TODO: set more info, timing etc. // TODO: set more info, timing etc.
report.converged = true; report.converged = true;
@ -433,7 +447,7 @@ private:
// Check for local convergence. // Check for local convergence.
detailTimer.reset(); detailTimer.reset();
detailTimer.start(); detailTimer.start();
convreport = this->getDomainConvergence(domain, timer, iter, resnorms); convreport = this->getDomainConvergence(domain, timer, iter, logger, resnorms);
// apply the Schur complement of the well model to the // apply the Schur complement of the well model to the
// reservoir linearized equations // reservoir linearized equations
@ -580,6 +594,7 @@ private:
const double dt, const double dt,
const int iteration, const int iteration,
const Domain& domain, const Domain& domain,
DeferredLogger& logger,
std::vector<Scalar>& B_avg, std::vector<Scalar>& B_avg,
std::vector<Scalar>& residual_norms) std::vector<Scalar>& residual_norms)
{ {
@ -629,32 +644,26 @@ private:
for (int ii : {0, 1}) { for (int ii : {0, 1}) {
if (std::isnan(res[ii])) { if (std::isnan(res[ii])) {
report.setReservoirFailed({types[ii], CR::Severity::NotANumber, compIdx}); report.setReservoirFailed({types[ii], CR::Severity::NotANumber, compIdx});
if (model_.terminalOutputEnabled()) { logger.debug("NaN residual for " + model_.compNames().name(compIdx) + " equation.");
OpmLog::debug("NaN residual for " + model_.compNames().name(compIdx) + " equation.");
}
} else if (res[ii] > model_.param().max_residual_allowed_) { } else if (res[ii] > model_.param().max_residual_allowed_) {
report.setReservoirFailed({types[ii], CR::Severity::TooLarge, compIdx}); report.setReservoirFailed({types[ii], CR::Severity::TooLarge, compIdx});
if (model_.terminalOutputEnabled()) { logger.debug("Too large residual for " + model_.compNames().name(compIdx) + " equation.");
OpmLog::debug("Too large residual for " + model_.compNames().name(compIdx) + " equation.");
}
} else if (res[ii] < 0.0) { } else if (res[ii] < 0.0) {
report.setReservoirFailed({types[ii], CR::Severity::Normal, compIdx}); report.setReservoirFailed({types[ii], CR::Severity::Normal, compIdx});
if (model_.terminalOutputEnabled()) { logger.debug("Negative residual for " + model_.compNames().name(compIdx) + " equation.");
OpmLog::debug("Negative residual for " + model_.compNames().name(compIdx) + " equation.");
}
} else if (res[ii] > tol[ii]) { } else if (res[ii] > tol[ii]) {
report.setReservoirFailed({types[ii], CR::Severity::Normal, compIdx}); report.setReservoirFailed({types[ii], CR::Severity::Normal, compIdx});
} }
} }
} }
// Output of residuals. // Output of residuals. If converged at initial state, log nothing.
if (model_.terminalOutputEnabled()) const bool converged_at_initial_state = (report.converged() && iteration == 0);
{ if (!converged_at_initial_state) {
// Only rank 0 does print to std::cout
if (iteration == 0) { if (iteration == 0) {
std::string msg = fmt::format("Domain {}, size {}, containing cell {}\n| Iter", // Log header.
domain.index, domain.cells.size(), domain.cells[0]); std::string msg = fmt::format("Domain {} on rank {}, size {}, containing cell {}\n| Iter",
domain.index, this->rank_, domain.cells.size(), domain.cells[0]);
for (int compIdx = 0; compIdx < numComp; ++compIdx) { for (int compIdx = 0; compIdx < numComp; ++compIdx) {
msg += " MB("; msg += " MB(";
msg += model_.compNames().name(compIdx)[0]; msg += model_.compNames().name(compIdx)[0];
@ -665,8 +674,9 @@ private:
msg += model_.compNames().name(compIdx)[0]; msg += model_.compNames().name(compIdx)[0];
msg += ") "; msg += ") ";
} }
OpmLog::debug(msg); logger.debug(msg);
} }
// Log convergence data.
std::ostringstream ss; std::ostringstream ss;
ss << "| "; ss << "| ";
const std::streamsize oprec = ss.precision(3); const std::streamsize oprec = ss.precision(3);
@ -680,7 +690,7 @@ private:
} }
ss.precision(oprec); ss.precision(oprec);
ss.flags(oflags); ss.flags(oflags);
OpmLog::debug(ss.str()); logger.debug(ss.str());
} }
return report; return report;
@ -689,6 +699,7 @@ private:
ConvergenceReport getDomainConvergence(const Domain& domain, ConvergenceReport getDomainConvergence(const Domain& domain,
const SimulatorTimerInterface& timer, const SimulatorTimerInterface& timer,
const int iteration, const int iteration,
DeferredLogger& logger,
std::vector<double>& residual_norms) std::vector<double>& residual_norms)
{ {
std::vector<Scalar> B_avg(numEq, 0.0); std::vector<Scalar> B_avg(numEq, 0.0);
@ -696,9 +707,10 @@ private:
timer.currentStepLength(), timer.currentStepLength(),
iteration, iteration,
domain, domain,
logger,
B_avg, B_avg,
residual_norms); residual_norms);
report += model_.wellModel().getDomainWellConvergence(domain, B_avg); report += model_.wellModel().getDomainWellConvergence(domain, B_avg, logger);
return report; return report;
} }
@ -772,13 +784,14 @@ private:
void solveDomainJacobi(GlobalEqVector& solution, void solveDomainJacobi(GlobalEqVector& solution,
GlobalEqVector& locally_solved, GlobalEqVector& locally_solved,
SimulatorReportSingle& local_report, SimulatorReportSingle& local_report,
DeferredLogger& logger,
const int iteration, const int iteration,
const SimulatorTimerInterface& timer, const SimulatorTimerInterface& timer,
const Domain& domain) const Domain& domain)
{ {
auto initial_local_well_primary_vars = model_.wellModel().getPrimaryVarsDomain(domain); auto initial_local_well_primary_vars = model_.wellModel().getPrimaryVarsDomain(domain);
auto initial_local_solution = Details::extractVector(solution, domain.cells); auto initial_local_solution = Details::extractVector(solution, domain.cells);
auto res = solveDomain(domain, timer, iteration, false); auto res = solveDomain(domain, timer, logger, iteration, false);
local_report = res.first; local_report = res.first;
if (local_report.converged) { if (local_report.converged) {
auto local_solution = Details::extractVector(solution, domain.cells); auto local_solution = Details::extractVector(solution, domain.cells);
@ -796,13 +809,14 @@ private:
void solveDomainGaussSeidel(GlobalEqVector& solution, void solveDomainGaussSeidel(GlobalEqVector& solution,
GlobalEqVector& locally_solved, GlobalEqVector& locally_solved,
SimulatorReportSingle& local_report, SimulatorReportSingle& local_report,
DeferredLogger& logger,
const int iteration, const int iteration,
const SimulatorTimerInterface& timer, const SimulatorTimerInterface& timer,
const Domain& domain) const Domain& domain)
{ {
auto initial_local_well_primary_vars = model_.wellModel().getPrimaryVarsDomain(domain); auto initial_local_well_primary_vars = model_.wellModel().getPrimaryVarsDomain(domain);
auto initial_local_solution = Details::extractVector(solution, domain.cells); auto initial_local_solution = Details::extractVector(solution, domain.cells);
auto res = solveDomain(domain, timer, iteration, true); auto res = solveDomain(domain, timer, logger, iteration, true);
local_report = res.first; local_report = res.first;
if (!local_report.converged) { if (!local_report.converged) {
// We look at the detailed convergence report to evaluate // We look at the detailed convergence report to evaluate
@ -825,7 +839,7 @@ private:
const double acceptable_local_cnv_sum = 1.0; const double acceptable_local_cnv_sum = 1.0;
if (mb_sum < acceptable_local_mb_sum && cnv_sum < acceptable_local_cnv_sum) { if (mb_sum < acceptable_local_mb_sum && cnv_sum < acceptable_local_cnv_sum) {
local_report.converged = true; local_report.converged = true;
OpmLog::debug("Accepting solution in unconverged domain " + std::to_string(domain.index)); logger.debug(fmt::format("Accepting solution in unconverged domain {} on rank {}.", domain.index, rank_));
} }
} }
} }
@ -942,6 +956,7 @@ private:
std::vector<std::unique_ptr<Mat>> domain_matrices_; //!< Vector of matrix operator for each subdomain std::vector<std::unique_ptr<Mat>> domain_matrices_; //!< Vector of matrix operator for each subdomain
std::vector<ISTLSolverType> domain_linsolvers_; //!< Vector of linear solvers for each domain std::vector<ISTLSolverType> domain_linsolvers_; //!< Vector of linear solvers for each domain
SimulatorReportSingle local_reports_accumulated_; //!< Accumulated convergence report for subdomain solvers SimulatorReportSingle local_reports_accumulated_; //!< Accumulated convergence report for subdomain solvers
int rank_ = 0; //!< MPI rank of this process
}; };
} // namespace Opm } // namespace Opm

View File

@ -291,7 +291,8 @@ namespace Opm {
// Check if well equations are converged locally. // Check if well equations are converged locally.
ConvergenceReport getDomainWellConvergence(const Domain& domain, ConvergenceReport getDomainWellConvergence(const Domain& domain,
const std::vector<Scalar>& B_avg) const; const std::vector<Scalar>& B_avg,
DeferredLogger& local_deferredLogger) const;
const SimulatorReportSingle& lastReport() const; const SimulatorReportSingle& lastReport() const;

View File

@ -1764,56 +1764,38 @@ namespace Opm {
ConvergenceReport ConvergenceReport
BlackoilWellModel<TypeTag>:: BlackoilWellModel<TypeTag>::
getDomainWellConvergence(const Domain& domain, getDomainWellConvergence(const Domain& domain,
const std::vector<Scalar>& B_avg) const const std::vector<Scalar>& B_avg,
DeferredLogger& local_deferredLogger) const
{ {
const auto& summary_state = ebosSimulator_.vanguard().summaryState(); const auto& summary_state = ebosSimulator_.vanguard().summaryState();
const int iterationIdx = ebosSimulator_.model().newtonMethod().numIterations(); const int iterationIdx = ebosSimulator_.model().newtonMethod().numIterations();
const bool relax_tolerance = iterationIdx > param_.strict_outer_iter_wells_; const bool relax_tolerance = iterationIdx > param_.strict_outer_iter_wells_;
Opm::DeferredLogger local_deferredLogger; ConvergenceReport report;
ConvergenceReport local_report;
for (const auto& well : well_container_) { for (const auto& well : well_container_) {
if ((well_domain_.at(well->name()) == domain.index)) { if ((well_domain_.at(well->name()) == domain.index)) {
if (well->isOperableAndSolvable() || well->wellIsStopped()) { if (well->isOperableAndSolvable() || well->wellIsStopped()) {
local_report += well->getWellConvergence(summary_state, report += well->getWellConvergence(summary_state,
this->wellState(), this->wellState(),
B_avg, B_avg,
local_deferredLogger, local_deferredLogger,
relax_tolerance); relax_tolerance);
} else { } else {
ConvergenceReport report; ConvergenceReport xreport;
using CR = ConvergenceReport; using CR = ConvergenceReport;
report.setWellFailed({CR::WellFailure::Type::Unsolvable, CR::Severity::Normal, -1, well->name()}); xreport.setWellFailed({CR::WellFailure::Type::Unsolvable, CR::Severity::Normal, -1, well->name()});
local_report += report; report += xreport;
} }
} }
} }
// This function will be called once for each domain on a process,
// and the number of domains on a process will vary, so there is
// no way to communicate here. There is also no need, as a domain
// is local to a single process in our current approach.
// Therefore there is no call to gatherDeferredLogger() or to
// gatherConvergenceReport() below. However, as of now, log messages
// on non-output ranks will be lost here.
// TODO: create the DeferredLogger outside this scope to enable it
// to be gathered and printed on the output rank.
Opm::DeferredLogger global_deferredLogger = local_deferredLogger;
ConvergenceReport report = local_report;
if (terminal_output_) {
global_deferredLogger.logMessages();
}
// Log debug messages for NaN or too large residuals. // Log debug messages for NaN or too large residuals.
// TODO: This will as of now only be logged on the output rank.
// In the similar code in getWellConvergence(), all ranks will be
// at the same spot, that does not hold for this per-domain function.
if (terminal_output_) { if (terminal_output_) {
for (const auto& f : report.wellFailures()) { for (const auto& f : report.wellFailures()) {
if (f.severity() == ConvergenceReport::Severity::NotANumber) { if (f.severity() == ConvergenceReport::Severity::NotANumber) {
OpmLog::debug("NaN residual found with phase " + std::to_string(f.phase()) + " for well " + f.wellName()); local_deferredLogger.debug("NaN residual found with phase " + std::to_string(f.phase()) + " for well " + f.wellName());
} else if (f.severity() == ConvergenceReport::Severity::TooLarge) { } else if (f.severity() == ConvergenceReport::Severity::TooLarge) {
OpmLog::debug("Too large residual found with phase " + std::to_string(f.phase()) + " for well " + f.wellName()); local_deferredLogger.debug("Too large residual found with phase " + std::to_string(f.phase()) + " for well " + f.wellName());
} }
} }
} }