Move internal api to ov (#15964)

* Move cpu streams executor to new API

* Remove legacy headers from new dev API

* Fixed build issues

* Fixed build

* Fixed typo

* Fixed typo

* Fixed build

* Fixed code style

* Add exception for template constructor of SoPtr
This commit is contained in:
Ilya Churaev 2023-03-01 14:00:55 +04:00 committed by GitHub
parent 21ac61fef5
commit 113aefa3ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 726 additions and 428 deletions

View File

@ -12,7 +12,7 @@ TemplateNonTypeParameter: '^\w*$'
ClassTemplate: '^([A-Z][\w]+|element_type_traits)$'
TemplateTypeParameter: '^\w*$'
ParameterName: '^\w*$'
FunctionTemplate: '^(operator.+|[\w]+|Impl<.*>)$'
FunctionTemplate: '^(operator.+|[\w]+|SoPtr.+|Impl<.*>)$'
TypeAliasName: '^\w+$'
VariableReference: '^\w+$'

View File

@ -13,14 +13,14 @@
#include "openvino/runtime/icompiled_model.hpp"
#include "openvino/runtime/properties.hpp"
#include "openvino/runtime/so_ptr.hpp"
#include "openvino/runtime/tensor.hpp"
#include "so_ptr.hpp"
namespace ov {
/**
* @interface ICore
* @brief Minimal ICore interface to allow plugin to get information from Core Inference Engine class.
* @brief Minimal ICore interface to allow plugin to get information from Core OpenVINO class.
* @ingroup ov_dev_api_plugin_api
*/
class ICore {

View File

@ -0,0 +1,92 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief This is a wrapper class for handling plugin instantiation and releasing resources
* @file openvino/runtime/so_ptr.hpp
*/
#pragma once
#include <cassert>
#include <functional>
#include <memory>
#include <string>
#include <type_traits>
#include "openvino/runtime/common.hpp"
namespace ov {
/**
* @brief This class instantiate object using shared library
* @tparam T An type of object SoPtr can hold
*/
template <class T>
struct SoPtr {
/**
* @brief Default constructor
*/
SoPtr() = default;
/**
* @brief Destructor preserves unloading order of implementation object and reference to library
*/
~SoPtr() {
_ptr = {};
}
/**
* @brief Constructs an object with existing shared object reference and loaded pointer
* @param ptr pointer to the loaded object
* @param so Existing reference to library
*/
SoPtr(const std::shared_ptr<T>& ptr, const std::shared_ptr<void>& so) : _ptr{ptr}, _so{so} {}
/**
* @brief The copy-like constructor, can create So Pointer that dereferenced into child type if T is derived of U
* @param that copied SoPtr object
*/
template <typename U>
SoPtr(const SoPtr<U>& that) : _ptr{std::dynamic_pointer_cast<T>(that._ptr)},
_so{that._so} {
IE_ASSERT(_ptr != nullptr);
}
/**
* @brief Standard pointer operator
* @return underlined interface with disabled Release method
*/
T* operator->() const noexcept {
return _ptr.get();
}
explicit operator bool() const noexcept {
return _ptr != nullptr;
}
friend bool operator==(std::nullptr_t, const SoPtr& ptr) noexcept {
return !ptr;
}
friend bool operator==(const SoPtr& ptr, std::nullptr_t) noexcept {
return !ptr;
}
friend bool operator!=(std::nullptr_t, const SoPtr& ptr) noexcept {
return static_cast<bool>(ptr);
}
friend bool operator!=(const SoPtr& ptr, std::nullptr_t) noexcept {
return static_cast<bool>(ptr);
}
/**
* @brief Gets a smart pointer to the custom object
*/
std::shared_ptr<T> _ptr;
/**
* @brief The shared object or dynamic loaded library
*/
std::shared_ptr<void> _so;
};
} // namespace ov

View File

@ -12,7 +12,6 @@
#include "openvino/runtime/common.hpp"
#include "openvino/runtime/threading/istreams_executor.hpp"
#include "openvino/runtime/threading/itask_executor.hpp"
#include "threading/ie_istreams_executor.hpp"
namespace ov {

View File

@ -0,0 +1,37 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @file openvino/runtime/threading/immediate_executor.hpp
* @brief A header file for OpenVINO Immediate Executor implementation
*/
#pragma once
#include <memory>
#include <string>
#include "threading/ie_itask_executor.hpp"
namespace ov {
namespace threading {
/**
* @brief Task executor implementation that just run tasks in current thread during calling of run() method
* @ingroup ov_dev_api_threading
*/
class ImmediateExecutor : public ITaskExecutor {
public:
/**
* @brief Destroys the object.
*/
~ImmediateExecutor() override = default;
void run(Task task) override {
task();
}
};
} // namespace threading
} // namespace ov

View File

@ -3,8 +3,8 @@
//
/**
* @file ie_istreams_executor.hpp
* @brief A header file for Inference Engine Streams-based Executor Interface
* @file openvino/runtime/threading/istreams_executor.hpp
* @brief A header file for OpenVINO Streams-based Executor Interface
*/
#pragma once
@ -85,7 +85,7 @@ public:
std::string _name; //!< Used by `ITT` to name executor threads
int _streams = 1; //!< Number of streams.
int _threadsPerStream = 0; //!< Number of threads per stream that executes `ie_parallel` calls
int _threadsPerStream = 0; //!< Number of threads per stream that executes `ov_parallel` calls
ThreadBindingType _threadBindingType = ThreadBindingType::NONE; //!< Thread binding to hardware resource type.
//!< No binding by default
int _threadBindingStep = 1; //!< In case of @ref CORES binding offset type

View File

@ -0,0 +1,124 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
/**
* @brief A file containing thread local class implementation.
* @file openvino/runtime/threading/thread_local.hpp
*/
#include "openvino/core/parallel.hpp"
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
# include <tbb/enumerable_thread_specific.h>
#else
# include <functional>
# include <memory>
# include <mutex>
# include <thread>
# include <unordered_map>
# include <utility>
#endif
namespace ov {
namespace threading {
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
/**
* @brief A wrapper class to keep object to be thread local.
* @ingroup ov_dev_api_threading
* @tparam T A type of object to keep thread local.
*/
template <typename T>
using ThreadLocal = tbb::enumerable_thread_specific<T>;
#else
template <typename T>
struct ThreadLocal {
using Map = std::unordered_map<std::thread::id, T>;
using Create = std::function<T()>;
Map _map;
mutable std::mutex _mutex;
Create _create;
ThreadLocal()
: _create{[] {
return T{};
}} {}
explicit ThreadLocal(const T& init)
: _create{[init] {
return init;
}} {}
ThreadLocal(ThreadLocal&& other) : _map{std::move(other._map)}, _create{std::move(other._create)} {}
ThreadLocal& operator=(ThreadLocal&& other) {
_map = std::move(other._map);
_create = std::move(other._create);
return *this;
}
ThreadLocal(const ThreadLocal&) = delete;
ThreadLocal& operator=(const ThreadLocal&&) = delete;
explicit ThreadLocal(const Create& create_) : _create{create_} {}
T& local() {
auto threadId = std::this_thread::get_id();
std::lock_guard<std::mutex> lock{_mutex};
auto itThreadLocal = _map.find(threadId);
if (itThreadLocal != _map.end()) {
return itThreadLocal->second;
} else {
return _map.emplace(threadId, _create()).first->second;
}
}
auto size() const -> decltype(_map.size()) {
std::lock_guard<std::mutex> lock{_mutex};
return _map.size();
}
// WARNING: Thread Unsafe
template <typename It>
struct Iterator {
It it;
bool operator!=(const Iterator& other) {
return it != other.it;
}
Iterator& operator++() {
++it;
return *this;
}
auto operator*() -> decltype(it->second) {
return it->second;
}
auto operator-> () -> decltype(&(it->second)) {
return &(it->second);
}
auto operator*() const -> decltype(it->second) {
return it->second;
}
auto operator-> () const -> decltype(&(it->second)) {
return &(it->second);
}
};
auto begin() -> Iterator<decltype(_map.begin())> {
return {_map.begin()};
}
auto end() -> Iterator<decltype(_map.end())> {
return {_map.end()};
}
auto begin() const -> Iterator<decltype(_map.begin())> const {
return {_map.begin()};
}
auto end() const -> Iterator<decltype(_map.end())> const {
return {_map.end()};
}
};
#endif
} // namespace threading
} // namespace ov

View File

@ -8,85 +8,4 @@
*/
#pragma once
#include <cassert>
#include <functional>
#include <memory>
#include <string>
#include <type_traits>
#include "openvino/runtime/common.hpp"
namespace ov {
/**
* @brief This class instantiate object using shared library
* @tparam T An type of object SoPtr can hold
*/
template <class T>
struct SoPtr {
/**
* @brief Default constructor
*/
SoPtr() = default;
/**
* @brief Destructor preserves unloading order of implementation object and reference to library
*/
~SoPtr() {
_ptr = {};
}
/**
* @brief Constructs an object with existing shared object reference and loaded pointer
* @param ptr pointer to the loaded object
* @param so Existing reference to library
*/
SoPtr(const std::shared_ptr<T>& ptr, const std::shared_ptr<void>& so) : _ptr{ptr}, _so{so} {}
/**
* @brief The copy-like constructor, can create So Pointer that dereferenced into child type if T is derived of U
* @param that copied SoPtr object
*/
template <typename U>
SoPtr(const SoPtr<U>& that) : _ptr{std::dynamic_pointer_cast<T>(that._ptr)},
_so{that._so} {
IE_ASSERT(_ptr != nullptr);
}
/**
* @brief Standard pointer operator
* @return underlined interface with disabled Release method
*/
T* operator->() const noexcept {
return _ptr.get();
}
explicit operator bool() const noexcept {
return _ptr != nullptr;
}
friend bool operator==(std::nullptr_t, const SoPtr& ptr) noexcept {
return !ptr;
}
friend bool operator==(const SoPtr& ptr, std::nullptr_t) noexcept {
return !ptr;
}
friend bool operator!=(std::nullptr_t, const SoPtr& ptr) noexcept {
return static_cast<bool>(ptr);
}
friend bool operator!=(const SoPtr& ptr, std::nullptr_t) noexcept {
return static_cast<bool>(ptr);
}
/**
* @brief Gets a smart pointer to the custom object
*/
std::shared_ptr<T> _ptr;
/**
* @brief The shared object or dynamic loaded library
*/
std::shared_ptr<void> _so;
};
} // namespace ov
#include "openvino/runtime/so_ptr.hpp"

View File

@ -12,6 +12,7 @@
#include <memory>
#include <string>
#include "openvino/runtime/threading/immediate_executor.hpp"
#include "threading/ie_itask_executor.hpp"
namespace InferenceEngine {

View File

@ -33,10 +33,10 @@
#include "openvino/runtime/ivariable_state.hpp"
#include "openvino/runtime/profiling_info.hpp"
#include "openvino/runtime/remote_context.hpp"
#include "openvino/runtime/so_ptr.hpp"
#include "openvino/runtime/tensor.hpp"
#include "openvino/runtime/threading/executor_manager.hpp"
#include "openvino/runtime/variable_state.hpp"
#include "so_ptr.hpp"
#include "threading/ie_executor_manager.hpp"
#include "transformations/utils/utils.hpp"

View File

@ -8,16 +8,16 @@
#include "openvino/runtime/isync_infer_request.hpp"
#include "openvino/runtime/ivariable_state.hpp"
#include "openvino/runtime/threading/immediate_executor.hpp"
#include "openvino/runtime/threading/istreams_executor.hpp"
#include "openvino/runtime/variable_state.hpp"
#include "threading/ie_immediate_executor.hpp"
#include "threading/ie_istreams_executor.hpp"
namespace {
struct ImmediateStreamsExecutor : public ov::threading::ITaskExecutor {
explicit ImmediateStreamsExecutor(const std::shared_ptr<ov::threading::IStreamsExecutor>& streamsExecutor)
: _streamsExecutor{streamsExecutor} {}
void run(InferenceEngine::Task task) override {
void run(ov::threading::Task task) override {
_streamsExecutor->execute(std::move(task));
}
std::shared_ptr<ov::threading::IStreamsExecutor> _streamsExecutor;
@ -40,10 +40,10 @@ ov::IAsyncInferRequest::IAsyncInferRequest(const std::shared_ptr<IInferRequest>&
m_sync_request->infer();
}}};
if (m_sync_request)
m_sync_pipeline = {{std::make_shared<InferenceEngine::ImmediateExecutor>(), [this] {
m_sync_pipeline = {{std::make_shared<ov::threading::ImmediateExecutor>(), [this] {
m_sync_request->infer();
}}};
auto streams_executor = std::dynamic_pointer_cast<InferenceEngine::IStreamsExecutor>(m_request_executor);
auto streams_executor = std::dynamic_pointer_cast<ov::threading::IStreamsExecutor>(m_request_executor);
if (streams_executor != nullptr) {
m_sync_pipeline = {{std::make_shared<ImmediateStreamsExecutor>(std::move(streams_executor)), [this] {
m_sync_request->infer();
@ -123,7 +123,7 @@ void ov::IAsyncInferRequest::run_first_stage(const Pipeline::iterator itBeginSta
firstStageExecutor->run(make_next_stage_task(itBeginStage, itEndStage, std::move(callbackExecutor)));
}
InferenceEngine::Task ov::IAsyncInferRequest::make_next_stage_task(
ov::threading::Task ov::IAsyncInferRequest::make_next_stage_task(
const Pipeline::iterator itStage,
const Pipeline::iterator itEndStage,
const std::shared_ptr<ov::threading::ITaskExecutor> callbackExecutor) {

View File

@ -4,7 +4,6 @@
#include "openvino/runtime/icompiled_model.hpp"
#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp"
#include "icompiled_model_wrapper.hpp"
#include "openvino/core/model.hpp"
#include "transformations/utils/utils.hpp"

View File

@ -11,12 +11,12 @@
#include <thread>
#include <vector>
#include "dev/threading/parallel_custom_arena.hpp"
#include "dev/threading/thread_affinity.hpp"
#include "openvino/itt.hpp"
#include "openvino/runtime/system_conf.hpp"
#include "openvino/runtime/threading/executor_manager.hpp"
#include "threading/ie_parallel_custom_arena.hpp"
#include "threading/ie_thread_affinity.hpp"
#include "threading/ie_thread_local.hpp"
#include "openvino/runtime/threading/thread_local.hpp"
namespace ov {
namespace threading {
@ -24,13 +24,13 @@ struct CPUStreamsExecutor::Impl {
struct Stream {
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
struct Observer : public custom::task_scheduler_observer {
InferenceEngine::CpuSet _mask;
CpuSet _mask;
int _ncpus = 0;
int _threadBindingStep = 0;
int _offset = 0;
int _cpuIdxOffset = 0;
Observer(custom::task_arena& arena,
InferenceEngine::CpuSet mask,
CpuSet mask,
int ncpus,
const int streamId,
const int threadsPerStream,
@ -44,14 +44,14 @@ struct CPUStreamsExecutor::Impl {
_offset{streamId * threadsPerStream + threadBindingOffset},
_cpuIdxOffset(cpuIdxOffset) {}
void on_scheduler_entry(bool) override {
InferenceEngine::PinThreadToVacantCore(_offset + tbb::this_task_arena::current_thread_index(),
_threadBindingStep,
_ncpus,
_mask,
_cpuIdxOffset);
pin_thread_to_vacant_core(_offset + tbb::this_task_arena::current_thread_index(),
_threadBindingStep,
_ncpus,
_mask,
_cpuIdxOffset);
}
void on_scheduler_exit(bool) override {
PinCurrentThreadByMask(_ncpus, _mask);
pin_current_thread_by_mask(_ncpus, _mask);
}
~Observer() override = default;
};
@ -136,9 +136,9 @@ struct CPUStreamsExecutor::Impl {
_taskArena.reset(new custom::task_arena{custom::task_arena::constraints{}
.set_core_type(selected_core_type)
.set_max_concurrency(max_concurrency)});
InferenceEngine::CpuSet processMask;
CpuSet processMask;
int ncpus = 0;
std::tie(processMask, ncpus) = InferenceEngine::GetProcessMask();
std::tie(processMask, ncpus) = get_process_mask();
if (nullptr != processMask) {
_observer.reset(new Observer{*_taskArena,
std::move(processMask),
@ -157,9 +157,9 @@ struct CPUStreamsExecutor::Impl {
(ThreadBindingType::CORES == _impl->_config._threadBindingType)) {
_taskArena.reset(new custom::task_arena{concurrency});
if (ThreadBindingType::CORES == _impl->_config._threadBindingType) {
InferenceEngine::CpuSet processMask;
CpuSet processMask;
int ncpus = 0;
std::tie(processMask, ncpus) = InferenceEngine::GetProcessMask();
std::tie(processMask, ncpus) = get_process_mask();
if (nullptr != processMask) {
_observer.reset(new Observer{*_taskArena,
std::move(processMask),
@ -175,32 +175,29 @@ struct CPUStreamsExecutor::Impl {
#elif OV_THREAD == OV_THREAD_OMP
omp_set_num_threads(_impl->_config._threadsPerStream);
if (!checkOpenMpEnvVars(false) && (ThreadBindingType::NONE != _impl->_config._threadBindingType)) {
InferenceEngine::CpuSet processMask;
CpuSet processMask;
int ncpus = 0;
std::tie(processMask, ncpus) = InferenceEngine::GetProcessMask();
std::tie(processMask, ncpus) = get_process_mask();
if (nullptr != processMask) {
parallel_nt(_impl->_config._threadsPerStream, [&](int threadIndex, int threadsPerStream) {
int thrIdx = _streamId * _impl->_config._threadsPerStream + threadIndex +
_impl->_config._threadBindingOffset;
InferenceEngine::PinThreadToVacantCore(thrIdx,
_impl->_config._threadBindingStep,
ncpus,
processMask);
pin_thread_to_vacant_core(thrIdx, _impl->_config._threadBindingStep, ncpus, processMask);
});
}
}
#elif OV_THREAD == OV_THREAD_SEQ
if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) {
InferenceEngine::PinCurrentThreadToSocket(_numaNodeId);
pin_current_thread_to_socket(_numaNodeId);
} else if (ThreadBindingType::CORES == _impl->_config._threadBindingType) {
InferenceEngine::CpuSet processMask;
CpuSet processMask;
int ncpus = 0;
std::tie(processMask, ncpus) = InferenceEngine::GetProcessMask();
std::tie(processMask, ncpus) = get_process_mask();
if (nullptr != processMask) {
InferenceEngine::PinThreadToVacantCore(_streamId + _impl->_config._threadBindingOffset,
_impl->_config._threadBindingStep,
ncpus,
processMask);
pin_thread_to_vacant_core(_streamId + _impl->_config._threadBindingOffset,
_impl->_config._threadBindingStep,
ncpus,
processMask);
}
}
#endif
@ -342,7 +339,7 @@ struct CPUStreamsExecutor::Impl {
std::queue<Task> _taskQueue;
bool _isStopped = false;
std::vector<int> _usedNumaNodes;
InferenceEngine::ThreadLocal<std::shared_ptr<Stream>> _streams;
ov::threading::ThreadLocal<std::shared_ptr<Stream>> _streams;
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
// stream id mapping to the core type
// stored in the reversed order (so the big cores, with the highest core_type_id value, are populated first)

View File

@ -7,7 +7,6 @@
#include "openvino/core/parallel.hpp"
#include "openvino/runtime/properties.hpp"
#include "openvino/runtime/threading/cpu_streams_executor.hpp"
#include "threading/ie_cpu_streams_executor.hpp"
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
# if (TBB_INTERFACE_VERSION < 12000)
# include <tbb/task_scheduler_init.h>

View File

@ -0,0 +1,48 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief Defines openvino tbbbind domains for tracing
* @file openvino/runtime/threading/itt.hpp
*/
#pragma once
#include "openvino/cc/selective_build.h"
#include "openvino/core/except.hpp"
#include "openvino/itt.hpp"
namespace ov {
namespace tbbbind {
namespace itt {
namespace domains {
OV_ITT_DOMAIN(tbb_bind);
} // namespace domains
} // namespace itt
} // namespace tbbbind
} // namespace ov
OV_CC_DOMAINS(tbb_bind);
/*
* TBB_BIND_SCOPE macro allows to disable parts of tbb_bind calling if they are not used.
*/
#if defined(SELECTIVE_BUILD_ANALYZER)
# define TBB_BIND_SCOPE(region) OV_SCOPE(tbb_bind, region)
# define TBB_BIND_NUMA_ENABLED OV_SCOPE(tbb_bind, NUMA)
#elif defined(SELECTIVE_BUILD)
# define TBB_BIND_SCOPE(region) \
if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(tbb_bind, _, NUMA)) == 1 && \
OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(tbb_bind, _, region)) == 1)
# define TBB_BIND_NUMA_ENABLED
#else
# define TBB_BIND_SCOPE(region)
# define TBB_BIND_NUMA_ENABLED
#endif

View File

@ -1,13 +1,13 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ie_parallel_custom_arena.hpp"
#include "dev/threading/parallel_custom_arena.hpp"
#include <cstring>
#include "itt.hpp"
#include "dev/threading/itt.hpp"
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
# ifndef TBBBIND_2_5_AVAILABLE
# define TBBBIND_2_5_AVAILABLE 0
@ -329,4 +329,4 @@ int default_concurrency(numa_node_id id) {
} // namespace info
} // namespace custom
#endif /*IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO*/
#endif /*OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO*/

View File

@ -0,0 +1,141 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief Contains declarations and custom threading interfaces based on TBB info and task_arena APIs.
*
* @file dev/threading/parallel_custom_arena.hpp
*/
#pragma once
#include "openvino/core/parallel.hpp"
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
# include <cstddef>
# include <memory>
# include <mutex>
# include <type_traits>
# include <vector>
namespace custom {
using numa_node_id = int;
using core_type_id = int;
namespace detail {
struct constraints {
constraints(numa_node_id id = tbb::task_arena::automatic, int maximal_concurrency = tbb::task_arena::automatic)
: numa_id{id},
max_concurrency{maximal_concurrency},
core_type{tbb::task_arena::automatic},
max_threads_per_core{tbb::task_arena::automatic} {}
constraints& set_numa_id(numa_node_id id) {
numa_id = id;
return *this;
}
constraints& set_max_concurrency(int maximal_concurrency) {
max_concurrency = maximal_concurrency;
return *this;
}
constraints& set_core_type(core_type_id id) {
core_type = id;
return *this;
}
constraints& set_max_threads_per_core(int threads_number) {
max_threads_per_core = threads_number;
return *this;
}
numa_node_id numa_id = tbb::task_arena::automatic;
int max_concurrency = tbb::task_arena::automatic;
core_type_id core_type = tbb::task_arena::automatic;
int max_threads_per_core = tbb::task_arena::automatic;
};
class binding_handler;
class binding_observer : public tbb::task_scheduler_observer {
binding_handler* my_binding_handler;
public:
binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c);
~binding_observer();
void on_scheduler_entry(bool) override;
void on_scheduler_exit(bool) override;
};
struct binding_observer_deleter {
void operator()(binding_observer* observer) const {
observer->observe(false);
delete observer;
}
};
using binding_oberver_ptr = std::unique_ptr<binding_observer, binding_observer_deleter>;
} // namespace detail
class task_arena {
tbb::task_arena my_task_arena;
std::once_flag my_initialization_state;
detail::constraints my_constraints;
detail::binding_oberver_ptr my_binding_observer;
public:
using constraints = detail::constraints;
static const int automatic = tbb::task_arena::automatic;
task_arena(int max_concurrency_ = automatic, unsigned reserved_for_masters = 1);
task_arena(const constraints& constraints_, unsigned reserved_for_masters = 1);
task_arena(const task_arena& s);
void initialize();
void initialize(int max_concurrency_, unsigned reserved_for_masters = 1);
void initialize(constraints constraints_, unsigned reserved_for_masters = 1);
explicit operator tbb::task_arena&();
int max_concurrency();
template <typename F>
void enqueue(F&& f) {
initialize();
my_task_arena.enqueue(std::forward<F>(f));
}
template <typename F>
auto execute(F&& f) -> decltype(f()) {
initialize();
return my_task_arena.execute(std::forward<F>(f));
}
};
struct task_scheduler_observer : public tbb::task_scheduler_observer {
task_scheduler_observer(custom::task_arena& arena)
: tbb::task_scheduler_observer(static_cast<tbb::task_arena&>(arena)),
my_arena(arena) {}
void observe(bool state = true) {
if (state) {
my_arena.initialize();
}
tbb::task_scheduler_observer::observe(state);
}
custom::task_arena& my_arena;
};
namespace info {
std::vector<numa_node_id> numa_nodes();
std::vector<core_type_id> core_types();
int default_concurrency(numa_node_id id = task_arena::automatic);
int default_concurrency(task_arena::constraints c);
} // namespace info
} // namespace custom
#endif /*(OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)*/

View File

@ -0,0 +1,119 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "dev/threading/thread_affinity.hpp"
#include <cerrno>
#include <climits>
#include <tuple>
#include <utility>
#include "openvino/runtime/system_conf.hpp"
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
# include <sched.h>
# include <unistd.h>
#endif
namespace ov {
namespace threading {
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
std::tuple<CpuSet, int> get_process_mask() {
for (int ncpus = sizeof(cpu_set_t) / CHAR_BIT; ncpus < 32768 /* reasonable limit of #cores*/; ncpus <<= 1) {
CpuSet mask{CPU_ALLOC(ncpus)};
if (nullptr == mask)
break;
const size_t size = CPU_ALLOC_SIZE(ncpus);
CPU_ZERO_S(size, mask.get());
// the result fits the mask
if (0 == sched_getaffinity(getpid(), size, mask.get())) {
return std::make_tuple(std::move(mask), ncpus);
}
// other error
if (errno != EINVAL)
break;
}
return std::make_tuple(nullptr, 0);
}
/* Release the cores affinity mask for the current process */
void release_process_mask(cpu_set_t* mask) {
if (nullptr != mask)
CPU_FREE(mask);
}
bool pin_current_thread_by_mask(int ncores, const CpuSet& procMask) {
return 0 == sched_setaffinity(0, CPU_ALLOC_SIZE(ncores), procMask.get());
}
bool pin_thread_to_vacant_core(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
if (procMask == nullptr)
return false;
const size_t size = CPU_ALLOC_SIZE(ncores);
const int num_cpus = CPU_COUNT_S(size, procMask.get());
thrIdx %= num_cpus; // To limit unique number in [; num_cpus-1] range
// Place threads with specified step
int cpu_idx = cpuIdxOffset;
for (int i = 0, offset = 0; i < thrIdx; ++i) {
cpu_idx += hyperthreads;
if (cpu_idx >= num_cpus)
cpu_idx = ++offset;
}
// Find index of 'cpu_idx'-th bit that equals to 1
int mapped_idx = cpuIdxOffset - 1;
while (cpu_idx >= cpuIdxOffset) {
mapped_idx++;
if (CPU_ISSET_S(mapped_idx, size, procMask.get()))
--cpu_idx;
}
CpuSet targetMask{CPU_ALLOC(ncores)};
CPU_ZERO_S(size, targetMask.get());
CPU_SET_S(mapped_idx, size, targetMask.get());
bool res = pin_current_thread_by_mask(ncores, targetMask);
return res;
}
bool pin_current_thread_to_socket(int socket) {
const int sockets = ov::get_available_numa_nodes().size();
const int cores = ov::get_number_of_cpu_cores();
const int cores_per_socket = cores / sockets;
int ncpus = 0;
CpuSet mask;
std::tie(mask, ncpus) = get_process_mask();
CpuSet targetMask{CPU_ALLOC(ncpus)};
const size_t size = CPU_ALLOC_SIZE(ncpus);
CPU_ZERO_S(size, targetMask.get());
for (int core = socket * cores_per_socket; core < (socket + 1) * cores_per_socket; core++) {
CPU_SET_S(core, size, targetMask.get());
}
// respect the user-defined mask for the entire process
CPU_AND_S(size, targetMask.get(), targetMask.get(), mask.get());
bool res = false;
if (CPU_COUNT_S(size, targetMask.get())) { // if we have non-zero mask to set
res = pin_current_thread_by_mask(ncpus, targetMask);
}
return res;
}
#else // no threads pinning/binding on Win/MacOS
std::tuple<CpuSet, int> get_process_mask() {
return std::make_tuple(nullptr, 0);
}
void release_process_mask(cpu_set_t*) {}
bool pin_thread_to_vacant_core(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
return false;
}
bool pin_current_thread_by_mask(int ncores, const CpuSet& procMask) {
return false;
}
bool pin_current_thread_to_socket(int socket) {
return false;
}
#endif // !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
} // namespace threading
} // namespace ov

View File

@ -0,0 +1,93 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <tuple>
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
# include <sched.h>
#endif
namespace ov {
namespace threading {
#if (defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
using cpu_set_t = void;
#endif // (defined(__APPLE__) || defined(_WIN32))
/**
* @brief Release the cores affinity mask for the current process
* @ingroup ov_dev_api_threading
*
* @param mask The mask
*/
void release_process_mask(cpu_set_t* mask);
/**
* @brief Deleter for process mask
* @ingroup ov_dev_api_threading
*/
struct ReleaseProcessMaskDeleter {
/**
* @brief A callable operator to release object
*
* @param mask The mask to release
*/
void operator()(cpu_set_t* mask) const {
release_process_mask(mask);
}
};
/**
* @brief A unique pointer to CPU set structure with the ReleaseProcessMaskDeleter deleter
* @ingroup ov_dev_api_threading
*/
using CpuSet = std::unique_ptr<cpu_set_t, ReleaseProcessMaskDeleter>;
/**
* @brief Get the cores affinity mask for the current process
* @ingroup ov_dev_api_threading
* @return A core affinity mask
*/
std::tuple<CpuSet, int> get_process_mask();
/**
* @brief Pins current thread to a set of cores determined by the mask
* @ingroup ov_dev_api_threading
*
* @param[in] thrIdx The thr index
* @param[in] hyperThreads The hyper threads
* @param[in] ncores The ncores
* @param[in] processMask The process mask
* @return `True` in case of success, `false` otherwise
*/
bool pin_thread_to_vacant_core(int thrIdx,
int hyperThreads,
int ncores,
const CpuSet& processMask,
int cpuIdxOffset = 0);
/**
* @brief Pins thread to a spare core in the round-robin scheme, while respecting the given process mask.
* The function can also handle the hyper-threading (by populating the physical cores first)
* @ingroup ov_dev_api_threading
*
* @param[in] ncores The ncores
* @param[in] processMask The process mask
* @return `True` in case of success, `false` otherwise
*/
bool pin_current_thread_by_mask(int ncores, const CpuSet& processMask);
/**
* @brief Pins a current thread to a socket.
* @ingroup ov_dev_api_threading
*
* @param[in] socket The socket id
* @return `True` in case of success, `false` otherwise
*/
bool pin_current_thread_to_socket(int socket);
} // namespace threading
} // namespace ov

View File

@ -13,10 +13,10 @@
#include <string>
#include <vector>
#include "ie_common.h"
#include "ie_system_conf.h"
#include "dev/threading/parallel_custom_arena.hpp"
#include "openvino/core/except.hpp"
#include "openvino/runtime/system_conf.hpp"
#include "streams_executor.hpp"
#include "threading/ie_parallel_custom_arena.hpp"
namespace ov {
@ -242,7 +242,7 @@ void parse_processor_info_linux(const int _processors,
}
};
#if !((IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO))
#if !((OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO))
std::vector<int> get_available_numa_nodes() {
std::vector<int> nodes((0 == cpu._sockets) ? 1 : cpu._sockets);
std::iota(std::begin(nodes), std::end(nodes), 0);
@ -252,7 +252,7 @@ std::vector<int> get_available_numa_nodes() {
int get_number_of_cpu_cores(bool bigCoresOnly) {
unsigned numberOfProcessors = cpu._processors;
unsigned totalNumberOfCpuCores = cpu._cores;
IE_ASSERT(totalNumberOfCpuCores != 0);
OPENVINO_ASSERT(totalNumberOfCpuCores != 0);
cpu_set_t usedCoreSet, currentCoreSet, currentCpuSet;
CPU_ZERO(&currentCpuSet);
CPU_ZERO(&usedCoreSet);
@ -270,7 +270,7 @@ int get_number_of_cpu_cores(bool bigCoresOnly) {
}
}
int phys_cores = CPU_COUNT(&currentCoreSet);
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
auto core_types = custom::info::core_types();
if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ {
phys_cores = custom::info::default_concurrency(

View File

@ -11,9 +11,9 @@
#include <memory>
#include <vector>
#include "dev/threading/parallel_custom_arena.hpp"
#include "openvino/runtime/system_conf.hpp"
#include "streams_executor.hpp"
#include "threading/ie_parallel_custom_arena.hpp"
namespace ov {
@ -189,7 +189,7 @@ int get_number_of_cpu_cores(bool bigCoresOnly) {
phys_cores++;
} while (offset < sz);
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
auto core_types = custom::info::core_types();
if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ {
phys_cores = custom::info::default_concurrency(
@ -199,7 +199,7 @@ int get_number_of_cpu_cores(bool bigCoresOnly) {
return phys_cores;
}
#if !(IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
#if !(OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
// OMP/SEQ threading on the Windows doesn't support NUMA
std::vector<int> get_available_numa_nodes() {
return {-1};

View File

@ -8,8 +8,8 @@
#include <cstring>
#include <vector>
#include "dev/threading/parallel_custom_arena.hpp"
#include "openvino/core/visibility.hpp"
#include "threading/ie_parallel_custom_arena.hpp"
#define XBYAK_NO_OP_NAMES
#define XBYAK_UNDEF_JNL
@ -147,7 +147,7 @@ bool check_open_mp_env_vars(bool include_omp_num_threads) {
int get_number_of_cpu_cores(bool) {
return parallel_get_max_threads();
}
# if !((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
# if !((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO))
std::vector<int> get_available_numa_nodes() {
return {-1};
}
@ -158,7 +158,7 @@ int get_number_of_logical_cpu_cores(bool) {
#else
int get_number_of_logical_cpu_cores(bool bigCoresOnly) {
int logical_cores = parallel_get_max_threads();
# if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
# if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
auto core_types = custom::info::core_types();
if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ {
logical_cores = custom::info::default_concurrency(
@ -169,7 +169,7 @@ int get_number_of_logical_cpu_cores(bool bigCoresOnly) {
}
#endif
#if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
#if ((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO))
std::vector<int> get_available_numa_nodes() {
return custom::info::numa_nodes();
}

View File

@ -10,132 +10,4 @@
#pragma once
#include "ie_parallel.hpp"
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
#include <cstddef>
#include <type_traits>
#include <mutex>
#include <vector>
#include <memory>
namespace custom {
using numa_node_id = int;
using core_type_id = int;
namespace detail {
struct constraints {
constraints(numa_node_id id = tbb::task_arena::automatic, int maximal_concurrency = tbb::task_arena::automatic)
: numa_id{id}
, max_concurrency{maximal_concurrency}
, core_type{tbb::task_arena::automatic}
, max_threads_per_core{tbb::task_arena::automatic}
{}
constraints& set_numa_id(numa_node_id id) {
numa_id = id;
return *this;
}
constraints& set_max_concurrency(int maximal_concurrency) {
max_concurrency = maximal_concurrency;
return *this;
}
constraints& set_core_type(core_type_id id) {
core_type = id;
return *this;
}
constraints& set_max_threads_per_core(int threads_number) {
max_threads_per_core = threads_number;
return *this;
}
numa_node_id numa_id = tbb::task_arena::automatic;
int max_concurrency = tbb::task_arena::automatic;
core_type_id core_type = tbb::task_arena::automatic;
int max_threads_per_core = tbb::task_arena::automatic;
};
class binding_handler;
class binding_observer : public tbb::task_scheduler_observer {
binding_handler* my_binding_handler;
public:
binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c);
~binding_observer();
void on_scheduler_entry(bool) override;
void on_scheduler_exit(bool) override;
};
struct binding_observer_deleter {
void operator()(binding_observer* observer) const {
observer->observe(false);
delete observer;
}
};
using binding_oberver_ptr = std::unique_ptr<binding_observer, binding_observer_deleter>;
} // namespace detail
class task_arena {
tbb::task_arena my_task_arena;
std::once_flag my_initialization_state;
detail::constraints my_constraints;
detail::binding_oberver_ptr my_binding_observer;
public:
using constraints = detail::constraints;
static const int automatic = tbb::task_arena::automatic;
task_arena(int max_concurrency_ = automatic, unsigned reserved_for_masters = 1);
task_arena(const constraints& constraints_, unsigned reserved_for_masters = 1);
task_arena(const task_arena &s);
void initialize();
void initialize(int max_concurrency_, unsigned reserved_for_masters = 1);
void initialize(constraints constraints_, unsigned reserved_for_masters = 1);
explicit operator tbb::task_arena&();
int max_concurrency();
template<typename F>
void enqueue(F&& f) {
initialize();
my_task_arena.enqueue(std::forward<F>(f));
}
template<typename F>
auto execute(F&& f) -> decltype(f()) {
initialize();
return my_task_arena.execute(std::forward<F>(f));
}
};
struct task_scheduler_observer: public tbb::task_scheduler_observer {
task_scheduler_observer(custom::task_arena& arena) :
tbb::task_scheduler_observer(static_cast<tbb::task_arena&>(arena)),
my_arena(arena)
{}
void observe(bool state = true) {
if (state) {
my_arena.initialize();
}
tbb::task_scheduler_observer::observe(state);
}
custom::task_arena& my_arena;
};
namespace info {
std::vector<numa_node_id> numa_nodes();
std::vector<core_type_id> core_types();
int default_concurrency(numa_node_id id = task_arena::automatic);
int default_concurrency(task_arena::constraints c);
} // namespace info
} // namespace custom
#endif /*(IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)*/
#include "dev/threading/parallel_custom_arena.hpp"

View File

@ -4,114 +4,26 @@
#include "threading/ie_thread_affinity.hpp"
#include <cerrno>
#include <climits>
#include <tuple>
#include <utility>
#include "ie_system_conf.h"
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
# include <sched.h>
# include <unistd.h>
#endif
#include "dev/threading/thread_affinity.hpp"
namespace InferenceEngine {
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
std::tuple<CpuSet, int> GetProcessMask() {
for (int ncpus = sizeof(cpu_set_t) / CHAR_BIT; ncpus < 32768 /* reasonable limit of #cores*/; ncpus <<= 1) {
CpuSet mask{CPU_ALLOC(ncpus)};
if (nullptr == mask)
break;
const size_t size = CPU_ALLOC_SIZE(ncpus);
CPU_ZERO_S(size, mask.get());
// the result fits the mask
if (0 == sched_getaffinity(getpid(), size, mask.get())) {
return std::make_tuple(std::move(mask), ncpus);
}
// other error
if (errno != EINVAL)
break;
}
return std::make_tuple(nullptr, 0);
return ov::threading::get_process_mask();
}
/* Release the cores affinity mask for the current process */
void ReleaseProcessMask(cpu_set_t* mask) {
if (nullptr != mask)
CPU_FREE(mask);
}
bool PinCurrentThreadByMask(int ncores, const CpuSet& procMask) {
return 0 == sched_setaffinity(0, CPU_ALLOC_SIZE(ncores), procMask.get());
ov::threading::release_process_mask(mask);
}
bool PinThreadToVacantCore(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
if (procMask == nullptr)
return false;
const size_t size = CPU_ALLOC_SIZE(ncores);
const int num_cpus = CPU_COUNT_S(size, procMask.get());
thrIdx %= num_cpus; // To limit unique number in [; num_cpus-1] range
// Place threads with specified step
int cpu_idx = cpuIdxOffset;
for (int i = 0, offset = 0; i < thrIdx; ++i) {
cpu_idx += hyperthreads;
if (cpu_idx >= num_cpus)
cpu_idx = ++offset;
}
// Find index of 'cpu_idx'-th bit that equals to 1
int mapped_idx = cpuIdxOffset - 1;
while (cpu_idx >= cpuIdxOffset) {
mapped_idx++;
if (CPU_ISSET_S(mapped_idx, size, procMask.get()))
--cpu_idx;
}
CpuSet targetMask{CPU_ALLOC(ncores)};
CPU_ZERO_S(size, targetMask.get());
CPU_SET_S(mapped_idx, size, targetMask.get());
bool res = PinCurrentThreadByMask(ncores, targetMask);
return res;
}
bool PinCurrentThreadToSocket(int socket) {
const int sockets = InferenceEngine::getAvailableNUMANodes().size();
const int cores = InferenceEngine::getNumberOfCPUCores();
const int cores_per_socket = cores / sockets;
int ncpus = 0;
CpuSet mask;
std::tie(mask, ncpus) = GetProcessMask();
CpuSet targetMask{CPU_ALLOC(ncpus)};
const size_t size = CPU_ALLOC_SIZE(ncpus);
CPU_ZERO_S(size, targetMask.get());
for (int core = socket * cores_per_socket; core < (socket + 1) * cores_per_socket; core++) {
CPU_SET_S(core, size, targetMask.get());
}
// respect the user-defined mask for the entire process
CPU_AND_S(size, targetMask.get(), targetMask.get(), mask.get());
bool res = false;
if (CPU_COUNT_S(size, targetMask.get())) { // if we have non-zero mask to set
res = PinCurrentThreadByMask(ncpus, targetMask);
}
return res;
}
#else // no threads pinning/binding on Win/MacOS
std::tuple<CpuSet, int> GetProcessMask() {
return std::make_tuple(nullptr, 0);
}
void ReleaseProcessMask(cpu_set_t*) {}
bool PinThreadToVacantCore(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
return false;
return ov::threading::pin_thread_to_vacant_core(thrIdx, hyperthreads, ncores, procMask, cpuIdxOffset);
}
bool PinCurrentThreadByMask(int ncores, const CpuSet& procMask) {
return false;
return ov::threading::pin_current_thread_by_mask(ncores, procMask);
}
bool PinCurrentThreadToSocket(int socket) {
return false;
return ov::threading::pin_current_thread_to_socket(socket);
}
#endif // !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
} // namespace InferenceEngine

View File

@ -7,7 +7,7 @@
#include <memory>
#include <tuple>
#include "ie_api.h"
#include "dev/threading/thread_affinity.hpp"
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
# include <sched.h>
@ -15,7 +15,7 @@
namespace InferenceEngine {
#if (defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
using cpu_set_t = void;
using cpu_set_t = ov::threading::cpu_set_t;
#endif // (defined(__APPLE__) || defined(_WIN32))
/**
@ -26,26 +26,9 @@ using cpu_set_t = void;
*/
void ReleaseProcessMask(cpu_set_t* mask);
/**
* @brief Deleter for process mask
* @ingroup ie_dev_api_threading
*/
struct ReleaseProcessMaskDeleter {
/**
* @brief A callable operator to release object
*
* @param mask The mask to release
*/
void operator()(cpu_set_t* mask) const {
ReleaseProcessMask(mask);
}
};
using ReleaseProcessMaskDeleter = ov::threading::ReleaseProcessMaskDeleter;
/**
* @brief A unique pointer to CPU set structure with the ReleaseProcessMaskDeleter deleter
* @ingroup ie_dev_api_threading
*/
using CpuSet = std::unique_ptr<cpu_set_t, ReleaseProcessMaskDeleter>;
using CpuSet = ov::threading::CpuSet;
/**
* @brief Get the cores affinity mask for the current process

View File

@ -9,41 +9,4 @@
#pragma once
#include <openvino/cc/selective_build.h>
#include <openvino/itt.hpp>
#include "openvino/core/except.hpp"
namespace ov {
namespace tbbbind {
namespace itt {
namespace domains {
OV_ITT_DOMAIN(tbb_bind);
} // namespace domains
} // namespace itt
} // namespace tbbbind
} // namespace ov
OV_CC_DOMAINS(tbb_bind);
/*
* TBB_BIND_SCOPE macro allows to disable parts of tbb_bind calling if they are not used.
*/
#if defined(SELECTIVE_BUILD_ANALYZER)
# define TBB_BIND_SCOPE(region) OV_SCOPE(tbb_bind, region)
# define TBB_BIND_NUMA_ENABLED OV_SCOPE(tbb_bind, NUMA)
#elif defined(SELECTIVE_BUILD)
# define TBB_BIND_SCOPE(region) \
if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(tbb_bind, _, NUMA)) == 1 && \
OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(tbb_bind, _, region)) == 1)
# define TBB_BIND_NUMA_ENABLED
#else
# define TBB_BIND_SCOPE(region)
# define TBB_BIND_NUMA_ENABLED
#endif
#include "dev/threading/itt.hpp"