mirror of
https://github.com/OPM/opm-simulators.git
synced 2024-11-25 18:50:19 -06:00
Merge pull request #909 from lisajulia/feature/add-failureFlag-to-tasklets
Feature/add failure flag to tasklets
This commit is contained in:
commit
94c88f85e4
@ -27,6 +27,7 @@
|
||||
#ifndef EWOMS_TASKLETS_HH
|
||||
#define EWOMS_TASKLETS_HH
|
||||
|
||||
#include <atomic>
|
||||
#include <stdexcept>
|
||||
#include <cassert>
|
||||
#include <thread>
|
||||
@ -198,6 +199,11 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
bool failure() const
|
||||
{
|
||||
return this->failureFlag_.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Returns the index of the current worker thread.
|
||||
*
|
||||
@ -232,9 +238,11 @@ public:
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "ERROR: Uncaught std::exception when running tasklet: " << e.what() << ". Trying to continue.\n";
|
||||
failureFlag_.store(true, std::memory_order_relaxed);
|
||||
}
|
||||
catch (...) {
|
||||
std::cerr << "ERROR: Uncaught exception (general type) when running tasklet. Trying to continue.\n";
|
||||
failureFlag_.store(true, std::memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -280,6 +288,16 @@ public:
|
||||
|
||||
barrierTasklet->wait();
|
||||
}
|
||||
private:
|
||||
// Atomic flag that is set to failure if any of the tasklets run by the TaskletRunner fails.
|
||||
// This flag is checked before new tasklets run or get dispatched and in case it is true, the
|
||||
// thread execution will be stopped / no new tasklets will be started and the program will abort.
|
||||
// To set the flag and load the flag, we use std::memory_order_relaxed.
|
||||
// Atomic operations tagged memory_order_relaxed are not synchronization operations; they do not
|
||||
// impose an order among concurrent memory accesses. They guarantee atomicity and modification order
|
||||
// consistency. This is the right choice for the setting here, since it is enough to broadcast failure
|
||||
// before new run or get dispatched.
|
||||
std::atomic<bool> failureFlag_ = false;
|
||||
|
||||
protected:
|
||||
// main function of the worker thread
|
||||
@ -295,6 +313,7 @@ protected:
|
||||
void run_()
|
||||
{
|
||||
while (true) {
|
||||
|
||||
// wait until tasklets have been pushed to the queue. first we need to lock
|
||||
// mutex for access to taskletQueue_
|
||||
std::unique_lock<std::mutex> lock(taskletQueueMutex_);
|
||||
@ -330,10 +349,12 @@ protected:
|
||||
tasklet->run();
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
std::cerr << "ERROR: Uncaught std::exception when running tasklet: " << e.what() << ". Trying to continue.\n";
|
||||
std::cerr << "ERROR: Uncaught std::exception when running tasklet: " << e.what() << ".\n";
|
||||
failureFlag_.store(true, std::memory_order_relaxed);
|
||||
}
|
||||
catch (...) {
|
||||
std::cerr << "ERROR: Uncaught exception when running tasklet. Trying to continue.\n";
|
||||
std::cerr << "ERROR: Uncaught exception when running tasklet.\n";
|
||||
failureFlag_.store(true, std::memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -35,7 +35,8 @@
|
||||
|
||||
std::mutex outputMutex;
|
||||
|
||||
Opm::TaskletRunner *runner;
|
||||
// The runner is created on the heap for the assertion and outputs in the run function of the tasklets.
|
||||
std::unique_ptr<Opm::TaskletRunner> runner{};
|
||||
|
||||
class SleepTasklet : public Opm::TaskletInterface
|
||||
{
|
||||
@ -51,9 +52,8 @@ public:
|
||||
{
|
||||
assert(0 <= runner->workerThreadIndex() && runner->workerThreadIndex() < runner->numWorkerThreads());
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(mseconds_));
|
||||
outputMutex.lock();
|
||||
std::lock_guard<std::mutex> guard(outputMutex);
|
||||
std::cout << "Sleep tasklet " << n_ << " of " << mseconds_ << " ms completed by worker thread " << runner->workerThreadIndex() << std::endl;
|
||||
outputMutex.unlock();
|
||||
}
|
||||
|
||||
private:
|
||||
@ -67,9 +67,8 @@ void sleepAndPrintFunction()
|
||||
{
|
||||
int ms = 100;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(ms));
|
||||
outputMutex.lock();
|
||||
std::lock_guard<std::mutex> guard(outputMutex);
|
||||
std::cout << "Sleep completed by worker thread " << runner->workerThreadIndex() << std::endl;
|
||||
outputMutex.unlock();
|
||||
}
|
||||
|
||||
int SleepTasklet::numInstantiated_ = 0;
|
||||
@ -77,7 +76,7 @@ int SleepTasklet::numInstantiated_ = 0;
|
||||
int main()
|
||||
{
|
||||
int numWorkers = 2;
|
||||
runner = new Opm::TaskletRunner(numWorkers);
|
||||
runner = std::make_unique<Opm::TaskletRunner>(numWorkers);
|
||||
|
||||
// the master thread is not a worker thread
|
||||
assert(runner->workerThreadIndex() < 0);
|
||||
@ -96,8 +95,6 @@ int main()
|
||||
runner->dispatchFunction(sleepAndPrintFunction);
|
||||
runner->dispatchFunction(sleepAndPrintFunction, /*numInvokations=*/6);
|
||||
|
||||
delete runner;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
148
tests/models/test_tasklets_failure.cpp
Normal file
148
tests/models/test_tasklets_failure.cpp
Normal file
@ -0,0 +1,148 @@
|
||||
// -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
||||
// vi: set et ts=4 sw=4 sts=4:
|
||||
/*
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Consult the COPYING file in the top-level source directory of this
|
||||
module for the precise wording of the license and the list of
|
||||
copyright holders.
|
||||
*/
|
||||
/*!
|
||||
* \file
|
||||
*
|
||||
* \brief This file serves as an example of how to use the tasklet mechanism for
|
||||
* asynchronous work, especially for tasklets that fail.
|
||||
*/
|
||||
#define BOOST_TEST_MODULE TASKLETS_FAILURE
|
||||
|
||||
//#include <boost/test/unit_test.hpp>
|
||||
#include <boost/test/included/unit_test.hpp>
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#include <cassert>
|
||||
|
||||
#include "config.h"
|
||||
#include <opm/models/parallel/tasklets.hh>
|
||||
|
||||
std::mutex outputMutex;
|
||||
|
||||
// The runner is created on the heap for the assertion and outputs in the run function of the tasklets.
|
||||
std::unique_ptr<Opm::TaskletRunner> runner{};
|
||||
|
||||
class SleepTasklet : public Opm::TaskletInterface
|
||||
{
|
||||
public:
|
||||
SleepTasklet(int mseconds, int id)
|
||||
: mseconds_(mseconds),
|
||||
id_(id)
|
||||
{}
|
||||
|
||||
void run() override
|
||||
{
|
||||
assert(0 <= runner->workerThreadIndex() && runner->workerThreadIndex() < runner->numWorkerThreads());
|
||||
std::cout << "Sleep tasklet " << id_ << " of " << mseconds_ << " ms starting sleep on worker thread " << runner->workerThreadIndex() << std::endl;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(mseconds_));
|
||||
std::lock_guard<std::mutex> guard(outputMutex);
|
||||
std::cout << "Sleep tasklet " << id_ << " of " << mseconds_ << " ms completed by worker thread " << runner->workerThreadIndex() << std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
int mseconds_;
|
||||
int id_;
|
||||
};
|
||||
|
||||
class FailingSleepTasklet : public Opm::TaskletInterface
|
||||
{
|
||||
public:
|
||||
FailingSleepTasklet(int mseconds)
|
||||
: mseconds_(mseconds)
|
||||
{}
|
||||
void run() override
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(mseconds_));
|
||||
std::lock_guard<std::mutex> guard(outputMutex);
|
||||
std::cout << "Failing sleep tasklet of " << mseconds_ << " ms failing now, on work thread " << runner->workerThreadIndex() << std::endl;
|
||||
throw std::logic_error("Intentional failure for testing");
|
||||
}
|
||||
|
||||
private:
|
||||
int mseconds_;
|
||||
};
|
||||
|
||||
void execute () {
|
||||
int numWorkers = 2;
|
||||
runner = std::make_unique<Opm::TaskletRunner>(numWorkers);
|
||||
|
||||
// the master thread is not a worker thread
|
||||
BOOST_REQUIRE_LT(runner->workerThreadIndex(), 0);
|
||||
BOOST_REQUIRE_EQUAL(runner->numWorkerThreads(), numWorkers);
|
||||
|
||||
// Dispatch some successful tasklets
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
runner->barrier();
|
||||
|
||||
if (runner->failure()) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
auto st = std::make_shared<SleepTasklet>(10,i);
|
||||
runner->dispatch(st);
|
||||
}
|
||||
|
||||
runner->barrier();
|
||||
if (runner->failure()) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
// Dispatch a failing tasklet
|
||||
auto failingSleepTasklet = std::make_shared<FailingSleepTasklet>(100);
|
||||
runner->dispatch(failingSleepTasklet);
|
||||
|
||||
// Dispatch more successful tasklets
|
||||
for (int i = 5; i < 10; ++i) {
|
||||
runner->barrier();
|
||||
|
||||
if (runner->failure()) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
auto st = std::make_shared<SleepTasklet>(10,i);
|
||||
runner->dispatch(st);
|
||||
}
|
||||
|
||||
std::cout << "before barrier" << std::endl;
|
||||
runner->barrier();
|
||||
}
|
||||
BOOST_AUTO_TEST_SUITE(Tasklets)
|
||||
BOOST_AUTO_TEST_CASE(TASKLETS_FAILURE) {
|
||||
pid_t pid = fork(); // Create a new process, such that this child process can call exit(EXIT_FAILURE)
|
||||
if (pid == -1) {
|
||||
BOOST_FAIL("Fork failed");
|
||||
} else if (pid == 0) {
|
||||
// Child process
|
||||
execute();
|
||||
_exit(0); // Should never reach here
|
||||
} else {
|
||||
// Parent process
|
||||
std::cout << "Checking failure of child process with parent process, process id " << pid << std::endl;
|
||||
int status;
|
||||
waitpid(pid, &status, 0);
|
||||
BOOST_CHECK(WIFEXITED(status)); // Check if the child process exited
|
||||
BOOST_CHECK_EQUAL(WEXITSTATUS(status), EXIT_FAILURE); // Check if the exit status is EXIT_FAILURE
|
||||
}
|
||||
}
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
Loading…
Reference in New Issue
Block a user