Improve Hybrid CPUs support implementation (#5433)

* Make find_package(TBBBIND_2_4) silent + Remove inheritance from tbb::task_arena for custom::task_arena

* Make my_binding_observer unique_ptr

Co-authored-by: Kochin, Ivan <ivan.kochin@intel.com>
This commit is contained in:
Ivan Kochin 2021-04-28 21:27:11 +03:00 committed by GitHub
parent 8e87087acf
commit 7cad047f53
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 78 additions and 65 deletions

View File

@ -5,7 +5,10 @@
set (TARGET_NAME "inference_engine") set (TARGET_NAME "inference_engine")
if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
find_package(TBBBIND_2_4) find_package(TBBBIND_2_4 QUIET)
if (TBBBIND_2_4_FOUND)
message(STATUS "Static tbbbind_2_4 package was found")
endif()
endif() endif()
file (GLOB LIBRARY_SRC file (GLOB LIBRARY_SRC

View File

@ -32,14 +32,14 @@ struct CPUStreamsExecutor::Impl {
int _ncpus = 0; int _ncpus = 0;
int _threadBindingStep = 0; int _threadBindingStep = 0;
int _offset = 0; int _offset = 0;
Observer(tbb::task_arena& arena, Observer(custom::task_arena& arena,
CpuSet mask, CpuSet mask,
int ncpus, int ncpus,
const int streamId, const int streamId,
const int threadsPerStream, const int threadsPerStream,
const int threadBindingStep, const int threadBindingStep,
const int threadBindingOffset) : const int threadBindingOffset) :
tbb::task_scheduler_observer(arena), tbb::task_scheduler_observer(static_cast<tbb::task_arena&>(arena)),
_mask{std::move(mask)}, _mask{std::move(mask)},
_ncpus(ncpus), _ncpus(ncpus),
_threadBindingStep(threadBindingStep), _threadBindingStep(threadBindingStep),

View File

@ -22,8 +22,6 @@ namespace custom {
namespace detail { namespace detail {
#if USE_TBBBIND_2_4 #if USE_TBBBIND_2_4
class binding_handler;
extern "C" { extern "C" {
void __TBB_internal_initialize_system_topology( void __TBB_internal_initialize_system_topology(
std::size_t groups_num, std::size_t groups_num,
@ -98,40 +96,34 @@ void initialize_system_topology() {
}); });
} }
class binding_observer : public tbb::task_scheduler_observer { binding_observer::binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c)
binding_handler* my_binding_handler; : task_scheduler_observer(ta) {
public: detail::initialize_system_topology();
binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c) my_binding_handler = detail::__TBB_internal_allocate_binding_handler(num_slots, c.numa_id, c.core_type, c.max_threads_per_core);
: task_scheduler_observer(ta) { }
detail::initialize_system_topology();
my_binding_handler = detail::__TBB_internal_allocate_binding_handler(num_slots, c.numa_id, c.core_type, c.max_threads_per_core);
}
~binding_observer() {
detail::__TBB_internal_deallocate_binding_handler(my_binding_handler);
}
void on_scheduler_entry(bool) override { binding_observer::~binding_observer() {
detail::__TBB_internal_apply_affinity(my_binding_handler, tbb::this_task_arena::current_thread_index()); detail::__TBB_internal_deallocate_binding_handler(my_binding_handler);
} }
void on_scheduler_exit(bool) override {
detail::__TBB_internal_restore_affinity(my_binding_handler, tbb::this_task_arena::current_thread_index());
}
};
binding_observer* construct_binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c) { void binding_observer::on_scheduler_entry(bool) {
binding_observer* observer = nullptr; detail::__TBB_internal_apply_affinity(my_binding_handler, tbb::this_task_arena::current_thread_index());
}
void binding_observer::on_scheduler_exit(bool) {
detail::__TBB_internal_restore_affinity(my_binding_handler, tbb::this_task_arena::current_thread_index());
}
binding_oberver_ptr construct_binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c) {
binding_oberver_ptr observer{};
if (detail::is_binding_environment_valid() && if (detail::is_binding_environment_valid() &&
((c.core_type >= 0 && info::core_types().size() > 1) || (c.numa_id >= 0 && info::numa_nodes().size() > 1) || c.max_threads_per_core > 0)) { ((c.core_type >= 0 && info::core_types().size() > 1) || (c.numa_id >= 0 && info::numa_nodes().size() > 1) || c.max_threads_per_core > 0)) {
observer = new binding_observer(ta, num_slots, c); observer.reset(new binding_observer{ta, num_slots, c});
observer->observe(true); observer->observe(true);
} }
return observer; return observer;
} }
void destroy_binding_observer(binding_observer* observer) {
observer->observe(false);
delete observer;
}
#endif /*USE_TBBBIND_2_4*/ #endif /*USE_TBBBIND_2_4*/
#if TBB_NUMA_SUPPORT_PRESENT #if TBB_NUMA_SUPPORT_PRESENT
@ -149,52 +141,52 @@ tbb::task_arena::constraints convert_constraints(custom::task_arena::constraints
} // namespace detail } // namespace detail
task_arena::task_arena(int max_concurrency_, unsigned reserved_for_masters) task_arena::task_arena(int max_concurrency_, unsigned reserved_for_masters)
: tbb::task_arena{max_concurrency_, reserved_for_masters} : my_task_arena{max_concurrency_, reserved_for_masters}
, my_initialization_state{} , my_initialization_state{}
, my_constraints{} , my_constraints{}
, my_binding_observer{nullptr} , my_binding_observer{}
{} {}
task_arena::task_arena(const constraints& constraints_, unsigned reserved_for_masters) task_arena::task_arena(const constraints& constraints_, unsigned reserved_for_masters)
: tbb::task_arena{info::default_concurrency(constraints_), reserved_for_masters} : my_task_arena{info::default_concurrency(constraints_), reserved_for_masters}
, my_initialization_state{} , my_initialization_state{}
, my_constraints{constraints_} , my_constraints{constraints_}
, my_binding_observer{nullptr} , my_binding_observer{}
{} {}
task_arena::task_arena(const task_arena &s) task_arena::task_arena(const task_arena &s)
: tbb::task_arena{s} : my_task_arena{s.my_task_arena}
, my_initialization_state{} , my_initialization_state{}
, my_constraints{s.my_constraints} , my_constraints{s.my_constraints}
, my_binding_observer{nullptr} , my_binding_observer{}
{} {}
void task_arena::initialize() { void task_arena::initialize() {
#if USE_TBBBIND_2_4 #if USE_TBBBIND_2_4
std::call_once(my_initialization_state, [this] { std::call_once(my_initialization_state, [this] {
tbb::task_arena::initialize(); my_task_arena.initialize();
my_binding_observer = detail::construct_binding_observer( my_binding_observer = detail::construct_binding_observer(
*this, tbb::task_arena::max_concurrency(), my_constraints); my_task_arena, my_task_arena.max_concurrency(), my_constraints);
}); });
#elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT #elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT
tbb::task_arena::initialize(convert_constraints(my_constraints)); my_task_arena.initialize(convert_constraints(my_constraints));
#else #else
tbb::task_arena::initialize(); my_task_arena.initialize();
#endif #endif
} }
void task_arena::initialize(int max_concurrency_, unsigned reserved_for_masters) { void task_arena::initialize(int max_concurrency_, unsigned reserved_for_masters) {
#if USE_TBBBIND_2_4 #if USE_TBBBIND_2_4
std::call_once(my_initialization_state, [this, &max_concurrency_, &reserved_for_masters] { std::call_once(my_initialization_state, [this, &max_concurrency_, &reserved_for_masters] {
tbb::task_arena::initialize(max_concurrency_, reserved_for_masters); my_task_arena.initialize(max_concurrency_, reserved_for_masters);
my_binding_observer = detail::construct_binding_observer( my_binding_observer = detail::construct_binding_observer(
*this, tbb::task_arena::max_concurrency(), my_constraints); my_task_arena, my_task_arena.max_concurrency(), my_constraints);
}); });
#elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT #elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT
my_constraints.max_concurrency = max_concurrency_; my_constraints.max_concurrency = max_concurrency_;
tbb::task_arena::initialize(convert_constraints(my_constraints), reserved_for_masters); my_task_arena.initialize(convert_constraints(my_constraints), reserved_for_masters);
#else #else
tbb::task_arena::initialize(max_concurrency_, reserved_for_masters); my_task_arena.initialize(max_concurrency_, reserved_for_masters);
#endif #endif
} }
@ -202,28 +194,24 @@ void task_arena::initialize(constraints constraints_, unsigned reserved_for_mast
std::call_once(my_initialization_state, [this, &constraints_, &reserved_for_masters] { std::call_once(my_initialization_state, [this, &constraints_, &reserved_for_masters] {
my_constraints = constraints_; my_constraints = constraints_;
#if USE_TBBBIND_2_4 #if USE_TBBBIND_2_4
tbb::task_arena::initialize(info::default_concurrency(constraints_), reserved_for_masters); my_task_arena.initialize(info::default_concurrency(constraints_), reserved_for_masters);
my_binding_observer = detail::construct_binding_observer( my_binding_observer = detail::construct_binding_observer(
*this, tbb::task_arena::max_concurrency(), my_constraints); my_task_arena, my_task_arena.max_concurrency(), my_constraints);
#elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT #elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT
tbb::task_arena::initialize(convert_constraints(my_constraints), reserved_for_masters); my_task_arena.initialize(convert_constraints(my_constraints), reserved_for_masters);
#else #else
tbb::task_arena::initialize(my_constraints.max_concurrency, reserved_for_masters); my_task_arena.initialize(my_constraints.max_concurrency, reserved_for_masters);
#endif #endif
}); });
} }
int task_arena::max_concurrency() { task_arena::operator tbb::task_arena&() {
initialize(); return my_task_arena;
return tbb::task_arena::max_concurrency();
} }
task_arena::~task_arena() { int task_arena::max_concurrency() {
#if USE_TBBBIND_2_4 initialize();
if (my_binding_observer != nullptr) { return my_task_arena.max_concurrency();
detail::destroy_binding_observer(my_binding_observer);
}
#endif
} }
namespace info { namespace info {

View File

@ -18,6 +18,7 @@
#include <type_traits> #include <type_traits>
#include <mutex> #include <mutex>
#include <vector> #include <vector>
#include <memory>
namespace custom { namespace custom {
@ -26,7 +27,7 @@ using core_type_id = int;
namespace detail { namespace detail {
struct constraints { struct constraints {
constraints(numa_node_id id = -1, int maximal_concurrency = -1) constraints(numa_node_id id = tbb::task_arena::automatic, int maximal_concurrency = tbb::task_arena::automatic)
: numa_id{id} : numa_id{id}
, max_concurrency{maximal_concurrency} , max_concurrency{maximal_concurrency}
, core_type{tbb::task_arena::automatic} , core_type{tbb::task_arena::automatic}
@ -56,13 +57,34 @@ struct constraints {
int max_threads_per_core = tbb::task_arena::automatic; int max_threads_per_core = tbb::task_arena::automatic;
}; };
class binding_observer; class binding_handler;
class binding_observer : public tbb::task_scheduler_observer {
binding_handler* my_binding_handler;
public:
binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c);
~binding_observer();
void on_scheduler_entry(bool) override;
void on_scheduler_exit(bool) override;
};
struct binding_observer_deleter {
void operator()(binding_observer* observer) const {
observer->observe(false);
delete observer;
}
};
using binding_oberver_ptr = std::unique_ptr<binding_observer, binding_observer_deleter>;
} // namespace detail } // namespace detail
class task_arena : public tbb::task_arena { class task_arena {
tbb::task_arena my_task_arena;
std::once_flag my_initialization_state; std::once_flag my_initialization_state;
detail::constraints my_constraints; detail::constraints my_constraints;
detail::binding_observer* my_binding_observer; detail::binding_oberver_ptr my_binding_observer;
public: public:
using constraints = detail::constraints; using constraints = detail::constraints;
@ -76,20 +98,20 @@ public:
void initialize(int max_concurrency_, unsigned reserved_for_masters = 1); void initialize(int max_concurrency_, unsigned reserved_for_masters = 1);
void initialize(constraints constraints_, unsigned reserved_for_masters = 1); void initialize(constraints constraints_, unsigned reserved_for_masters = 1);
explicit operator tbb::task_arena&();
int max_concurrency(); int max_concurrency();
template<typename F> template<typename F>
void enqueue(F&& f) { void enqueue(F&& f) {
initialize(); initialize();
tbb::task_arena::enqueue(std::forward<F>(f)); my_task_arena.enqueue(std::forward<F>(f));
} }
template<typename F> template<typename F>
auto execute(F&& f) -> decltype(f()) { auto execute(F&& f) -> decltype(f()) {
initialize(); initialize();
return tbb::task_arena::execute(std::forward<F>(f)); return my_task_arena.execute(std::forward<F>(f));
} }
~task_arena();
}; };
namespace info { namespace info {