diff --git a/opm/simulators/flow/ReservoirCouplingMaster.cpp b/opm/simulators/flow/ReservoirCouplingMaster.cpp index 688182b13..000c6cee1 100644 --- a/opm/simulators/flow/ReservoirCouplingMaster.cpp +++ b/opm/simulators/flow/ReservoirCouplingMaster.cpp @@ -150,7 +150,10 @@ receiveNextReportDateFromSlaves() if (this->comm_.rank() == 0) { for (unsigned int i = 0; i < num_slaves; i++) { double slave_next_report_time_offset; // Elapsed time from the beginning of the simulation - int result = MPI_Recv( + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. + MPI_Recv( &slave_next_report_time_offset, /*count=*/1, /*datatype=*/MPI_DOUBLE, @@ -159,9 +162,6 @@ receiveNextReportDateFromSlaves() this->getSlaveComm(i), MPI_STATUS_IGNORE ); - if (result != MPI_SUCCESS) { - OPM_THROW(std::runtime_error, "Failed to receive next report date from slave process"); - } this->slave_next_report_time_offsets_[i] = slave_next_report_time_offset; OpmLog::info( fmt::format( diff --git a/opm/simulators/flow/ReservoirCouplingSlave.cpp b/opm/simulators/flow/ReservoirCouplingSlave.cpp index c735d09b7..f98932c1c 100644 --- a/opm/simulators/flow/ReservoirCouplingSlave.cpp +++ b/opm/simulators/flow/ReservoirCouplingSlave.cpp @@ -48,6 +48,9 @@ ReservoirCouplingSlave( if (this->slave_master_comm_ == MPI_COMM_NULL) { OPM_THROW(std::runtime_error, "Slave process is not spawned by a master process"); } + // NOTE: By installing a custom error handler for all slave-master communicators, which + // eventually will call MPI_Abort(), there is no need to check the return value of any + // MPI_Recv() or MPI_Send() calls as errors will be caught by the error handler. ReservoirCoupling::setErrhandler(this->slave_master_comm_, /*is_master=*/false); } @@ -56,7 +59,10 @@ ReservoirCouplingSlave:: receiveNextTimeStepFromMaster() { double timestep; if (this->comm_.rank() == 0) { - int result = MPI_Recv( + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. + MPI_Recv( ×tep, /*count=*/1, /*datatype=*/MPI_DOUBLE, @@ -65,9 +71,6 @@ receiveNextTimeStepFromMaster() { this->slave_master_comm_, MPI_STATUS_IGNORE ); - if (result != MPI_SUCCESS) { - OPM_THROW(std::runtime_error, "Failed to receive next time step from master"); - } OpmLog::info( fmt::format("Slave rank 0 received next timestep {} from master.", timestep) ); @@ -84,7 +87,10 @@ receiveMasterGroupNamesFromMasterProcess() { std::vector group_names; if (this->comm_.rank() == 0) { MPI_Aint asize = 0; - int result = MPI_Recv( + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. + MPI_Recv( &asize, /*count=*/1, /*datatype=*/MPI_AINT, @@ -94,15 +100,11 @@ receiveMasterGroupNamesFromMasterProcess() { MPI_STATUS_IGNORE ); OpmLog::info("Received master group names size from master process rank 0"); - if (result != MPI_SUCCESS) { - OPM_THROW(std::runtime_error, - "Failed to receive master group names (size) from master process"); - } // NOTE: MPI_Aint and std::size_t should be compatible on most systems, but we will // cast it to std::size_t to avoid any potential issues size = static_cast(asize); group_names.resize(size); - int result2 = MPI_Recv( + MPI_Recv( group_names.data(), /*count=*/size, /*datatype=*/MPI_CHAR, @@ -111,10 +113,6 @@ receiveMasterGroupNamesFromMasterProcess() { this->slave_master_comm_, MPI_STATUS_IGNORE ); - if (result2 != MPI_SUCCESS) { - OPM_THROW(std::runtime_error, - "Failed to receive master group names from master process"); - } OpmLog::info("Received master group names from master process rank 0"); } this->comm_.broadcast(&size, /*count=*/1, /*emitter_rank=*/0); @@ -136,6 +134,9 @@ sendNextReportDateToMasterProcess() const // NOTE: This is an offset in seconds from the start date, so it will be 0 if the next report // would be the start date. In general, it should be a positive number. double next_report_time_offset = elapsed_time + current_step_length; + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. MPI_Send( &next_report_time_offset, /*count=*/1, @@ -155,6 +156,9 @@ sendActivationDateToMasterProcess() const if (this->comm_.rank() == 0) { // NOTE: The master process needs the s double activation_date = this->getGrupSlavActivationDate_(); + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. MPI_Send( &activation_date, /*count=*/1, @@ -174,6 +178,9 @@ sendSimulationStartDateToMasterProcess() const if (this->comm_.rank() == 0) { // NOTE: The master process needs the s double start_date = this->schedule_.getStartTime(); + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. MPI_Send( &start_date, /*count=*/1, diff --git a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp index 34f051954..1d9c6bfce 100644 --- a/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp +++ b/opm/simulators/flow/ReservoirCouplingSpawnSlaves.cpp @@ -154,7 +154,10 @@ receiveActivationDateFromSlaves_() if (this->comm_.rank() == 0) { for (unsigned int i = 0; i < num_slaves; i++) { double slave_activation_date; - int result = MPI_Recv( + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. + MPI_Recv( &slave_activation_date, /*count=*/1, /*datatype=*/MPI_DOUBLE, @@ -163,9 +166,6 @@ receiveActivationDateFromSlaves_() this->master_.getSlaveComm(i), MPI_STATUS_IGNORE ); - if (result != MPI_SUCCESS) { - OPM_THROW(std::runtime_error, "Failed to receive activation date from slave process"); - } if (slave_activation_date < this->master_.getActivationDate()) { OPM_THROW(std::runtime_error, "Slave process start date is earlier than " "the master process' activation date"); @@ -188,7 +188,10 @@ receiveSimulationStartDateFromSlaves_() if (this->comm_.rank() == 0) { for (unsigned int i = 0; i < num_slaves; i++) { double slave_start_date; - int result = MPI_Recv( + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. + MPI_Recv( &slave_start_date, /*count=*/1, /*datatype=*/MPI_DOUBLE, @@ -197,9 +200,6 @@ receiveSimulationStartDateFromSlaves_() this->master_.getSlaveComm(i), MPI_STATUS_IGNORE ); - if (result != MPI_SUCCESS) { - OPM_THROW(std::runtime_error, "Failed to receive start date from slave process"); - } this->master_.addSlaveStartDate(slave_start_date); OpmLog::info( fmt::format( @@ -227,6 +227,9 @@ sendMasterGroupNamesToSlaves_() for (unsigned int i = 0; i < num_slaves; i++) { auto slave_name = this->master_.getSlaveName(i); auto [group_names, size] = this->getMasterGroupNamesForSlave_(slave_name); + // NOTE: All slave-master communicators have set a custom error handler, which eventually + // will call MPI_Abort() so there is no need to check the return value of any MPI_Recv() + // or MPI_Send() calls. // NOTE: size should be of type std::size_t, so we can safely cast it to MPI_AINT MPI_Send( &size, @@ -312,6 +315,9 @@ spawnSlaveProcesses_() } OPM_THROW(std::runtime_error, "Failed to spawn slave process"); } + // NOTE: By installing a custom error handler for all slave-master communicators, which + // eventually will call MPI_Abort(), there is no need to check the return value of any + // MPI_Recv() or MPI_Send() calls as errors will be caught by the error handler. ReservoirCoupling::setErrhandler(master_slave_comm, /*is_master=*/true); OpmLog::info( fmt::format(