From b23ac860978be0743b919e37d7f3a9178d5098a6 Mon Sep 17 00:00:00 2001 From: River Li Date: Wed, 7 Sep 2022 15:14:05 +0800 Subject: [PATCH] tbbbind support CC (#12107) * Add CC support for tbbbind * Use throw to replace return * Tbbbinder will be optimized out for non-NUMA 1. With NUMA we may expect that some TBBBind API is optimized out, but some of the API is definitely here 2. The same for HYBRID cores on Alder lake 3. For no NUMA nor HYBRID I expect no TBBBind API is used Co-authored-by: Ilya Lavrenov --- src/inference/CMakeLists.txt | 1 + .../threading/ie_parallel_custom_arena.cpp | 15 ++++++ src/inference/src/threading/itt.hpp | 49 +++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 src/inference/src/threading/itt.hpp diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt index 2763cad4f4b..f09453e7dd1 100644 --- a/src/inference/CMakeLists.txt +++ b/src/inference/CMakeLists.txt @@ -146,6 +146,7 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE $) target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt openvino::util openvino::core::dev) +ie_mark_target_as_cc(${TARGET_NAME}_obj) set_ie_threading_interface_for(${TARGET_NAME}_obj) if (TBBBIND_2_5_FOUND) diff --git a/src/inference/src/threading/ie_parallel_custom_arena.cpp b/src/inference/src/threading/ie_parallel_custom_arena.cpp index c6fc254ee56..151fb39d435 100644 --- a/src/inference/src/threading/ie_parallel_custom_arena.cpp +++ b/src/inference/src/threading/ie_parallel_custom_arena.cpp @@ -4,6 +4,8 @@ #include "ie_parallel_custom_arena.hpp" +#include "itt.hpp" + #if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO # ifndef TBBBIND_2_5_AVAILABLE @@ -75,12 +77,16 @@ class TBBbindSystemTopology { TBBbindSystemTopology() { # if USE_TBBBIND_2_5 if (is_binding_environment_valid()) { + TBB_BIND_SCOPE(TBBbindSystemTopology) __TBB_internal_initialize_system_topology(get_processors_group_num(), numa_nodes_count, numa_nodes_indexes, core_types_count, core_types_indexes); } + if (numa_nodes_count > 1 || core_types_count > 1) { + TBB_BIND_NUMA_ENABLED; + } # endif } @@ -88,6 +94,7 @@ public: ~TBBbindSystemTopology() { # if USE_TBBBIND_2_5 if (is_binding_environment_valid()) { + TBB_BIND_SCOPE(TBBbindSystemTopology) __TBB_internal_destroy_system_topology(); } # endif @@ -123,6 +130,7 @@ public: } # if USE_TBBBIND_2_5 if (is_binding_environment_valid()) { + TBB_BIND_SCOPE(default_concurrency) return __TBB_internal_get_default_concurrency(c.numa_id, c.core_type, c.max_threads_per_core); } return tbb::this_task_arena::max_concurrency(); @@ -179,19 +187,23 @@ const TBBbindSystemTopology& system_topology() { binding_observer::binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c) : task_scheduler_observer(ta) { detail::system_topology(); + TBB_BIND_SCOPE(binding_observer) my_binding_handler = detail::__TBB_internal_allocate_binding_handler(num_slots, c.numa_id, c.core_type, c.max_threads_per_core); } binding_observer::~binding_observer() { + TBB_BIND_SCOPE(binding_observer) detail::__TBB_internal_deallocate_binding_handler(my_binding_handler); } void binding_observer::on_scheduler_entry(bool) { + TBB_BIND_SCOPE(on_scheduler_entry) detail::__TBB_internal_apply_affinity(my_binding_handler, tbb::this_task_arena::current_thread_index()); } void binding_observer::on_scheduler_exit(bool) { + TBB_BIND_SCOPE(on_scheduler_exit) detail::__TBB_internal_restore_affinity(my_binding_handler, tbb::this_task_arena::current_thread_index()); } @@ -240,6 +252,7 @@ task_arena::task_arena(const task_arena& s) void task_arena::initialize() { my_task_arena.initialize(); # if USE_TBBBIND_2_5 + TBB_BIND_SCOPE(task_arena_initialize); std::call_once(my_initialization_state, [this] { my_binding_observer = detail::construct_binding_observer(my_task_arena, my_task_arena.max_concurrency(), my_constraints); @@ -250,6 +263,7 @@ void task_arena::initialize() { void task_arena::initialize(int max_concurrency_, unsigned reserved_for_masters) { my_task_arena.initialize(max_concurrency_, reserved_for_masters); # if USE_TBBBIND_2_5 + TBB_BIND_SCOPE(task_arena_initialize_max_concurrency); std::call_once(my_initialization_state, [this] { my_binding_observer = detail::construct_binding_observer(my_task_arena, my_task_arena.max_concurrency(), my_constraints); @@ -260,6 +274,7 @@ void task_arena::initialize(int max_concurrency_, unsigned reserved_for_masters) void task_arena::initialize(constraints constraints_, unsigned reserved_for_masters) { my_constraints = constraints_; # if USE_TBBBIND_2_5 + TBB_BIND_SCOPE(task_arena_initialize_constraints); my_task_arena.initialize(info::default_concurrency(constraints_), reserved_for_masters); std::call_once(my_initialization_state, [this] { my_binding_observer = diff --git a/src/inference/src/threading/itt.hpp b/src/inference/src/threading/itt.hpp new file mode 100644 index 00000000000..7b573aa22df --- /dev/null +++ b/src/inference/src/threading/itt.hpp @@ -0,0 +1,49 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief Defines openvino tbbbind domains for tracing + * @file itt.hpp + */ + +#pragma once + +#include + +#include + +#include "openvino/core/except.hpp" + +namespace ov { +namespace tbbbind { +namespace itt { +namespace domains { +OV_ITT_DOMAIN(tbb_bind); +} // namespace domains +} // namespace itt +} // namespace tbbbind +} // namespace ov + +OV_CC_DOMAINS(tbb_bind); + +/* + * TBB_BIND_SCOPE macro allows to disable parts of tbb_bind calling if they are not used. + */ +#if defined(SELECTIVE_BUILD_ANALYZER) + +# define TBB_BIND_SCOPE(region) OV_SCOPE(tbb_bind, region) +# define TBB_BIND_NUMA_ENABLED OV_SCOPE(tbb_bind, NUMA) + +#elif defined(SELECTIVE_BUILD) + +# define TBB_BIND_SCOPE(region) \ + if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(tbb_bind, _, NUMA)) == 1 && \ + OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(tbb_bind, _, region)) == 1) +# define TBB_BIND_NUMA_ENABLED + +#else + +# define TBB_BIND_SCOPE(region) +# define TBB_BIND_NUMA_ENABLED +#endif