Move internal api to ov (#15964)
* Move cpu streams executor to new API * Remove legacy headers from new dev API * Fixed build issues * Fixed build * Fixed typo * Fixed typo * Fixed build * Fixed code style * Add exception for template constructor of SoPtr
This commit is contained in:
parent
21ac61fef5
commit
113aefa3ff
@ -12,7 +12,7 @@ TemplateNonTypeParameter: '^\w*$'
|
||||
ClassTemplate: '^([A-Z][\w]+|element_type_traits)$'
|
||||
TemplateTypeParameter: '^\w*$'
|
||||
ParameterName: '^\w*$'
|
||||
FunctionTemplate: '^(operator.+|[\w]+|Impl<.*>)$'
|
||||
FunctionTemplate: '^(operator.+|[\w]+|SoPtr.+|Impl<.*>)$'
|
||||
TypeAliasName: '^\w+$'
|
||||
VariableReference: '^\w+$'
|
||||
|
||||
|
@ -13,14 +13,14 @@
|
||||
|
||||
#include "openvino/runtime/icompiled_model.hpp"
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
#include "openvino/runtime/so_ptr.hpp"
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
#include "so_ptr.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
||||
/**
|
||||
* @interface ICore
|
||||
* @brief Minimal ICore interface to allow plugin to get information from Core Inference Engine class.
|
||||
* @brief Minimal ICore interface to allow plugin to get information from Core OpenVINO class.
|
||||
* @ingroup ov_dev_api_plugin_api
|
||||
*/
|
||||
class ICore {
|
||||
|
92
src/inference/dev_api/openvino/runtime/so_ptr.hpp
Normal file
92
src/inference/dev_api/openvino/runtime/so_ptr.hpp
Normal file
@ -0,0 +1,92 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief This is a wrapper class for handling plugin instantiation and releasing resources
|
||||
* @file openvino/runtime/so_ptr.hpp
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
#include "openvino/runtime/common.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
||||
/**
|
||||
* @brief This class instantiate object using shared library
|
||||
* @tparam T An type of object SoPtr can hold
|
||||
*/
|
||||
template <class T>
|
||||
struct SoPtr {
|
||||
/**
|
||||
* @brief Default constructor
|
||||
*/
|
||||
SoPtr() = default;
|
||||
|
||||
/**
|
||||
* @brief Destructor preserves unloading order of implementation object and reference to library
|
||||
*/
|
||||
~SoPtr() {
|
||||
_ptr = {};
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Constructs an object with existing shared object reference and loaded pointer
|
||||
* @param ptr pointer to the loaded object
|
||||
* @param so Existing reference to library
|
||||
*/
|
||||
SoPtr(const std::shared_ptr<T>& ptr, const std::shared_ptr<void>& so) : _ptr{ptr}, _so{so} {}
|
||||
|
||||
/**
|
||||
* @brief The copy-like constructor, can create So Pointer that dereferenced into child type if T is derived of U
|
||||
* @param that copied SoPtr object
|
||||
*/
|
||||
template <typename U>
|
||||
SoPtr(const SoPtr<U>& that) : _ptr{std::dynamic_pointer_cast<T>(that._ptr)},
|
||||
_so{that._so} {
|
||||
IE_ASSERT(_ptr != nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Standard pointer operator
|
||||
* @return underlined interface with disabled Release method
|
||||
*/
|
||||
T* operator->() const noexcept {
|
||||
return _ptr.get();
|
||||
}
|
||||
|
||||
explicit operator bool() const noexcept {
|
||||
return _ptr != nullptr;
|
||||
}
|
||||
|
||||
friend bool operator==(std::nullptr_t, const SoPtr& ptr) noexcept {
|
||||
return !ptr;
|
||||
}
|
||||
friend bool operator==(const SoPtr& ptr, std::nullptr_t) noexcept {
|
||||
return !ptr;
|
||||
}
|
||||
friend bool operator!=(std::nullptr_t, const SoPtr& ptr) noexcept {
|
||||
return static_cast<bool>(ptr);
|
||||
}
|
||||
friend bool operator!=(const SoPtr& ptr, std::nullptr_t) noexcept {
|
||||
return static_cast<bool>(ptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gets a smart pointer to the custom object
|
||||
*/
|
||||
std::shared_ptr<T> _ptr;
|
||||
|
||||
/**
|
||||
* @brief The shared object or dynamic loaded library
|
||||
*/
|
||||
std::shared_ptr<void> _so;
|
||||
};
|
||||
|
||||
} // namespace ov
|
@ -12,7 +12,6 @@
|
||||
#include "openvino/runtime/common.hpp"
|
||||
#include "openvino/runtime/threading/istreams_executor.hpp"
|
||||
#include "openvino/runtime/threading/itask_executor.hpp"
|
||||
#include "threading/ie_istreams_executor.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
||||
|
@ -0,0 +1,37 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @file openvino/runtime/threading/immediate_executor.hpp
|
||||
* @brief A header file for OpenVINO Immediate Executor implementation
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "threading/ie_itask_executor.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace threading {
|
||||
|
||||
/**
|
||||
* @brief Task executor implementation that just run tasks in current thread during calling of run() method
|
||||
* @ingroup ov_dev_api_threading
|
||||
*/
|
||||
class ImmediateExecutor : public ITaskExecutor {
|
||||
public:
|
||||
/**
|
||||
* @brief Destroys the object.
|
||||
*/
|
||||
~ImmediateExecutor() override = default;
|
||||
|
||||
void run(Task task) override {
|
||||
task();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace threading
|
||||
} // namespace ov
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
/**
|
||||
* @file ie_istreams_executor.hpp
|
||||
* @brief A header file for Inference Engine Streams-based Executor Interface
|
||||
* @file openvino/runtime/threading/istreams_executor.hpp
|
||||
* @brief A header file for OpenVINO Streams-based Executor Interface
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
@ -85,7 +85,7 @@ public:
|
||||
|
||||
std::string _name; //!< Used by `ITT` to name executor threads
|
||||
int _streams = 1; //!< Number of streams.
|
||||
int _threadsPerStream = 0; //!< Number of threads per stream that executes `ie_parallel` calls
|
||||
int _threadsPerStream = 0; //!< Number of threads per stream that executes `ov_parallel` calls
|
||||
ThreadBindingType _threadBindingType = ThreadBindingType::NONE; //!< Thread binding to hardware resource type.
|
||||
//!< No binding by default
|
||||
int _threadBindingStep = 1; //!< In case of @ref CORES binding offset type
|
||||
|
@ -0,0 +1,124 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
/**
|
||||
* @brief A file containing thread local class implementation.
|
||||
* @file openvino/runtime/threading/thread_local.hpp
|
||||
*/
|
||||
|
||||
#include "openvino/core/parallel.hpp"
|
||||
|
||||
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
|
||||
# include <tbb/enumerable_thread_specific.h>
|
||||
#else
|
||||
# include <functional>
|
||||
# include <memory>
|
||||
# include <mutex>
|
||||
# include <thread>
|
||||
# include <unordered_map>
|
||||
# include <utility>
|
||||
#endif
|
||||
|
||||
namespace ov {
|
||||
namespace threading {
|
||||
|
||||
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
|
||||
|
||||
/**
|
||||
* @brief A wrapper class to keep object to be thread local.
|
||||
* @ingroup ov_dev_api_threading
|
||||
* @tparam T A type of object to keep thread local.
|
||||
*/
|
||||
template <typename T>
|
||||
using ThreadLocal = tbb::enumerable_thread_specific<T>;
|
||||
|
||||
#else
|
||||
|
||||
template <typename T>
|
||||
struct ThreadLocal {
|
||||
using Map = std::unordered_map<std::thread::id, T>;
|
||||
using Create = std::function<T()>;
|
||||
Map _map;
|
||||
mutable std::mutex _mutex;
|
||||
Create _create;
|
||||
|
||||
ThreadLocal()
|
||||
: _create{[] {
|
||||
return T{};
|
||||
}} {}
|
||||
explicit ThreadLocal(const T& init)
|
||||
: _create{[init] {
|
||||
return init;
|
||||
}} {}
|
||||
ThreadLocal(ThreadLocal&& other) : _map{std::move(other._map)}, _create{std::move(other._create)} {}
|
||||
ThreadLocal& operator=(ThreadLocal&& other) {
|
||||
_map = std::move(other._map);
|
||||
_create = std::move(other._create);
|
||||
return *this;
|
||||
}
|
||||
ThreadLocal(const ThreadLocal&) = delete;
|
||||
ThreadLocal& operator=(const ThreadLocal&&) = delete;
|
||||
explicit ThreadLocal(const Create& create_) : _create{create_} {}
|
||||
|
||||
T& local() {
|
||||
auto threadId = std::this_thread::get_id();
|
||||
std::lock_guard<std::mutex> lock{_mutex};
|
||||
auto itThreadLocal = _map.find(threadId);
|
||||
if (itThreadLocal != _map.end()) {
|
||||
return itThreadLocal->second;
|
||||
} else {
|
||||
return _map.emplace(threadId, _create()).first->second;
|
||||
}
|
||||
}
|
||||
|
||||
auto size() const -> decltype(_map.size()) {
|
||||
std::lock_guard<std::mutex> lock{_mutex};
|
||||
return _map.size();
|
||||
}
|
||||
|
||||
// WARNING: Thread Unsafe
|
||||
template <typename It>
|
||||
struct Iterator {
|
||||
It it;
|
||||
bool operator!=(const Iterator& other) {
|
||||
return it != other.it;
|
||||
}
|
||||
Iterator& operator++() {
|
||||
++it;
|
||||
return *this;
|
||||
}
|
||||
auto operator*() -> decltype(it->second) {
|
||||
return it->second;
|
||||
}
|
||||
auto operator-> () -> decltype(&(it->second)) {
|
||||
return &(it->second);
|
||||
}
|
||||
auto operator*() const -> decltype(it->second) {
|
||||
return it->second;
|
||||
}
|
||||
auto operator-> () const -> decltype(&(it->second)) {
|
||||
return &(it->second);
|
||||
}
|
||||
};
|
||||
|
||||
auto begin() -> Iterator<decltype(_map.begin())> {
|
||||
return {_map.begin()};
|
||||
}
|
||||
auto end() -> Iterator<decltype(_map.end())> {
|
||||
return {_map.end()};
|
||||
}
|
||||
auto begin() const -> Iterator<decltype(_map.begin())> const {
|
||||
return {_map.begin()};
|
||||
}
|
||||
auto end() const -> Iterator<decltype(_map.end())> const {
|
||||
return {_map.end()};
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace threading
|
||||
} // namespace ov
|
@ -8,85 +8,4 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
#include "openvino/runtime/common.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
||||
/**
|
||||
* @brief This class instantiate object using shared library
|
||||
* @tparam T An type of object SoPtr can hold
|
||||
*/
|
||||
template <class T>
|
||||
struct SoPtr {
|
||||
/**
|
||||
* @brief Default constructor
|
||||
*/
|
||||
SoPtr() = default;
|
||||
|
||||
/**
|
||||
* @brief Destructor preserves unloading order of implementation object and reference to library
|
||||
*/
|
||||
~SoPtr() {
|
||||
_ptr = {};
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Constructs an object with existing shared object reference and loaded pointer
|
||||
* @param ptr pointer to the loaded object
|
||||
* @param so Existing reference to library
|
||||
*/
|
||||
SoPtr(const std::shared_ptr<T>& ptr, const std::shared_ptr<void>& so) : _ptr{ptr}, _so{so} {}
|
||||
|
||||
/**
|
||||
* @brief The copy-like constructor, can create So Pointer that dereferenced into child type if T is derived of U
|
||||
* @param that copied SoPtr object
|
||||
*/
|
||||
template <typename U>
|
||||
SoPtr(const SoPtr<U>& that) : _ptr{std::dynamic_pointer_cast<T>(that._ptr)},
|
||||
_so{that._so} {
|
||||
IE_ASSERT(_ptr != nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Standard pointer operator
|
||||
* @return underlined interface with disabled Release method
|
||||
*/
|
||||
T* operator->() const noexcept {
|
||||
return _ptr.get();
|
||||
}
|
||||
|
||||
explicit operator bool() const noexcept {
|
||||
return _ptr != nullptr;
|
||||
}
|
||||
|
||||
friend bool operator==(std::nullptr_t, const SoPtr& ptr) noexcept {
|
||||
return !ptr;
|
||||
}
|
||||
friend bool operator==(const SoPtr& ptr, std::nullptr_t) noexcept {
|
||||
return !ptr;
|
||||
}
|
||||
friend bool operator!=(std::nullptr_t, const SoPtr& ptr) noexcept {
|
||||
return static_cast<bool>(ptr);
|
||||
}
|
||||
friend bool operator!=(const SoPtr& ptr, std::nullptr_t) noexcept {
|
||||
return static_cast<bool>(ptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gets a smart pointer to the custom object
|
||||
*/
|
||||
std::shared_ptr<T> _ptr;
|
||||
|
||||
/**
|
||||
* @brief The shared object or dynamic loaded library
|
||||
*/
|
||||
std::shared_ptr<void> _so;
|
||||
};
|
||||
|
||||
} // namespace ov
|
||||
#include "openvino/runtime/so_ptr.hpp"
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "openvino/runtime/threading/immediate_executor.hpp"
|
||||
#include "threading/ie_itask_executor.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
@ -33,10 +33,10 @@
|
||||
#include "openvino/runtime/ivariable_state.hpp"
|
||||
#include "openvino/runtime/profiling_info.hpp"
|
||||
#include "openvino/runtime/remote_context.hpp"
|
||||
#include "openvino/runtime/so_ptr.hpp"
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
#include "openvino/runtime/threading/executor_manager.hpp"
|
||||
#include "openvino/runtime/variable_state.hpp"
|
||||
#include "so_ptr.hpp"
|
||||
#include "threading/ie_executor_manager.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
|
@ -8,16 +8,16 @@
|
||||
|
||||
#include "openvino/runtime/isync_infer_request.hpp"
|
||||
#include "openvino/runtime/ivariable_state.hpp"
|
||||
#include "openvino/runtime/threading/immediate_executor.hpp"
|
||||
#include "openvino/runtime/threading/istreams_executor.hpp"
|
||||
#include "openvino/runtime/variable_state.hpp"
|
||||
#include "threading/ie_immediate_executor.hpp"
|
||||
#include "threading/ie_istreams_executor.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
struct ImmediateStreamsExecutor : public ov::threading::ITaskExecutor {
|
||||
explicit ImmediateStreamsExecutor(const std::shared_ptr<ov::threading::IStreamsExecutor>& streamsExecutor)
|
||||
: _streamsExecutor{streamsExecutor} {}
|
||||
void run(InferenceEngine::Task task) override {
|
||||
void run(ov::threading::Task task) override {
|
||||
_streamsExecutor->execute(std::move(task));
|
||||
}
|
||||
std::shared_ptr<ov::threading::IStreamsExecutor> _streamsExecutor;
|
||||
@ -40,10 +40,10 @@ ov::IAsyncInferRequest::IAsyncInferRequest(const std::shared_ptr<IInferRequest>&
|
||||
m_sync_request->infer();
|
||||
}}};
|
||||
if (m_sync_request)
|
||||
m_sync_pipeline = {{std::make_shared<InferenceEngine::ImmediateExecutor>(), [this] {
|
||||
m_sync_pipeline = {{std::make_shared<ov::threading::ImmediateExecutor>(), [this] {
|
||||
m_sync_request->infer();
|
||||
}}};
|
||||
auto streams_executor = std::dynamic_pointer_cast<InferenceEngine::IStreamsExecutor>(m_request_executor);
|
||||
auto streams_executor = std::dynamic_pointer_cast<ov::threading::IStreamsExecutor>(m_request_executor);
|
||||
if (streams_executor != nullptr) {
|
||||
m_sync_pipeline = {{std::make_shared<ImmediateStreamsExecutor>(std::move(streams_executor)), [this] {
|
||||
m_sync_request->infer();
|
||||
@ -123,7 +123,7 @@ void ov::IAsyncInferRequest::run_first_stage(const Pipeline::iterator itBeginSta
|
||||
firstStageExecutor->run(make_next_stage_task(itBeginStage, itEndStage, std::move(callbackExecutor)));
|
||||
}
|
||||
|
||||
InferenceEngine::Task ov::IAsyncInferRequest::make_next_stage_task(
|
||||
ov::threading::Task ov::IAsyncInferRequest::make_next_stage_task(
|
||||
const Pipeline::iterator itStage,
|
||||
const Pipeline::iterator itEndStage,
|
||||
const std::shared_ptr<ov::threading::ITaskExecutor> callbackExecutor) {
|
||||
|
@ -4,7 +4,6 @@
|
||||
|
||||
#include "openvino/runtime/icompiled_model.hpp"
|
||||
|
||||
#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp"
|
||||
#include "icompiled_model_wrapper.hpp"
|
||||
#include "openvino/core/model.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
@ -11,12 +11,12 @@
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "dev/threading/parallel_custom_arena.hpp"
|
||||
#include "dev/threading/thread_affinity.hpp"
|
||||
#include "openvino/itt.hpp"
|
||||
#include "openvino/runtime/system_conf.hpp"
|
||||
#include "openvino/runtime/threading/executor_manager.hpp"
|
||||
#include "threading/ie_parallel_custom_arena.hpp"
|
||||
#include "threading/ie_thread_affinity.hpp"
|
||||
#include "threading/ie_thread_local.hpp"
|
||||
#include "openvino/runtime/threading/thread_local.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace threading {
|
||||
@ -24,13 +24,13 @@ struct CPUStreamsExecutor::Impl {
|
||||
struct Stream {
|
||||
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
|
||||
struct Observer : public custom::task_scheduler_observer {
|
||||
InferenceEngine::CpuSet _mask;
|
||||
CpuSet _mask;
|
||||
int _ncpus = 0;
|
||||
int _threadBindingStep = 0;
|
||||
int _offset = 0;
|
||||
int _cpuIdxOffset = 0;
|
||||
Observer(custom::task_arena& arena,
|
||||
InferenceEngine::CpuSet mask,
|
||||
CpuSet mask,
|
||||
int ncpus,
|
||||
const int streamId,
|
||||
const int threadsPerStream,
|
||||
@ -44,14 +44,14 @@ struct CPUStreamsExecutor::Impl {
|
||||
_offset{streamId * threadsPerStream + threadBindingOffset},
|
||||
_cpuIdxOffset(cpuIdxOffset) {}
|
||||
void on_scheduler_entry(bool) override {
|
||||
InferenceEngine::PinThreadToVacantCore(_offset + tbb::this_task_arena::current_thread_index(),
|
||||
_threadBindingStep,
|
||||
_ncpus,
|
||||
_mask,
|
||||
_cpuIdxOffset);
|
||||
pin_thread_to_vacant_core(_offset + tbb::this_task_arena::current_thread_index(),
|
||||
_threadBindingStep,
|
||||
_ncpus,
|
||||
_mask,
|
||||
_cpuIdxOffset);
|
||||
}
|
||||
void on_scheduler_exit(bool) override {
|
||||
PinCurrentThreadByMask(_ncpus, _mask);
|
||||
pin_current_thread_by_mask(_ncpus, _mask);
|
||||
}
|
||||
~Observer() override = default;
|
||||
};
|
||||
@ -136,9 +136,9 @@ struct CPUStreamsExecutor::Impl {
|
||||
_taskArena.reset(new custom::task_arena{custom::task_arena::constraints{}
|
||||
.set_core_type(selected_core_type)
|
||||
.set_max_concurrency(max_concurrency)});
|
||||
InferenceEngine::CpuSet processMask;
|
||||
CpuSet processMask;
|
||||
int ncpus = 0;
|
||||
std::tie(processMask, ncpus) = InferenceEngine::GetProcessMask();
|
||||
std::tie(processMask, ncpus) = get_process_mask();
|
||||
if (nullptr != processMask) {
|
||||
_observer.reset(new Observer{*_taskArena,
|
||||
std::move(processMask),
|
||||
@ -157,9 +157,9 @@ struct CPUStreamsExecutor::Impl {
|
||||
(ThreadBindingType::CORES == _impl->_config._threadBindingType)) {
|
||||
_taskArena.reset(new custom::task_arena{concurrency});
|
||||
if (ThreadBindingType::CORES == _impl->_config._threadBindingType) {
|
||||
InferenceEngine::CpuSet processMask;
|
||||
CpuSet processMask;
|
||||
int ncpus = 0;
|
||||
std::tie(processMask, ncpus) = InferenceEngine::GetProcessMask();
|
||||
std::tie(processMask, ncpus) = get_process_mask();
|
||||
if (nullptr != processMask) {
|
||||
_observer.reset(new Observer{*_taskArena,
|
||||
std::move(processMask),
|
||||
@ -175,32 +175,29 @@ struct CPUStreamsExecutor::Impl {
|
||||
#elif OV_THREAD == OV_THREAD_OMP
|
||||
omp_set_num_threads(_impl->_config._threadsPerStream);
|
||||
if (!checkOpenMpEnvVars(false) && (ThreadBindingType::NONE != _impl->_config._threadBindingType)) {
|
||||
InferenceEngine::CpuSet processMask;
|
||||
CpuSet processMask;
|
||||
int ncpus = 0;
|
||||
std::tie(processMask, ncpus) = InferenceEngine::GetProcessMask();
|
||||
std::tie(processMask, ncpus) = get_process_mask();
|
||||
if (nullptr != processMask) {
|
||||
parallel_nt(_impl->_config._threadsPerStream, [&](int threadIndex, int threadsPerStream) {
|
||||
int thrIdx = _streamId * _impl->_config._threadsPerStream + threadIndex +
|
||||
_impl->_config._threadBindingOffset;
|
||||
InferenceEngine::PinThreadToVacantCore(thrIdx,
|
||||
_impl->_config._threadBindingStep,
|
||||
ncpus,
|
||||
processMask);
|
||||
pin_thread_to_vacant_core(thrIdx, _impl->_config._threadBindingStep, ncpus, processMask);
|
||||
});
|
||||
}
|
||||
}
|
||||
#elif OV_THREAD == OV_THREAD_SEQ
|
||||
if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) {
|
||||
InferenceEngine::PinCurrentThreadToSocket(_numaNodeId);
|
||||
pin_current_thread_to_socket(_numaNodeId);
|
||||
} else if (ThreadBindingType::CORES == _impl->_config._threadBindingType) {
|
||||
InferenceEngine::CpuSet processMask;
|
||||
CpuSet processMask;
|
||||
int ncpus = 0;
|
||||
std::tie(processMask, ncpus) = InferenceEngine::GetProcessMask();
|
||||
std::tie(processMask, ncpus) = get_process_mask();
|
||||
if (nullptr != processMask) {
|
||||
InferenceEngine::PinThreadToVacantCore(_streamId + _impl->_config._threadBindingOffset,
|
||||
_impl->_config._threadBindingStep,
|
||||
ncpus,
|
||||
processMask);
|
||||
pin_thread_to_vacant_core(_streamId + _impl->_config._threadBindingOffset,
|
||||
_impl->_config._threadBindingStep,
|
||||
ncpus,
|
||||
processMask);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -342,7 +339,7 @@ struct CPUStreamsExecutor::Impl {
|
||||
std::queue<Task> _taskQueue;
|
||||
bool _isStopped = false;
|
||||
std::vector<int> _usedNumaNodes;
|
||||
InferenceEngine::ThreadLocal<std::shared_ptr<Stream>> _streams;
|
||||
ov::threading::ThreadLocal<std::shared_ptr<Stream>> _streams;
|
||||
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
|
||||
// stream id mapping to the core type
|
||||
// stored in the reversed order (so the big cores, with the highest core_type_id value, are populated first)
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include "openvino/core/parallel.hpp"
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
#include "openvino/runtime/threading/cpu_streams_executor.hpp"
|
||||
#include "threading/ie_cpu_streams_executor.hpp"
|
||||
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
|
||||
# if (TBB_INTERFACE_VERSION < 12000)
|
||||
# include <tbb/task_scheduler_init.h>
|
||||
|
48
src/inference/src/dev/threading/itt.hpp
Normal file
48
src/inference/src/dev/threading/itt.hpp
Normal file
@ -0,0 +1,48 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief Defines openvino tbbbind domains for tracing
|
||||
* @file openvino/runtime/threading/itt.hpp
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "openvino/cc/selective_build.h"
|
||||
#include "openvino/core/except.hpp"
|
||||
#include "openvino/itt.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace tbbbind {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(tbb_bind);
|
||||
} // namespace domains
|
||||
} // namespace itt
|
||||
} // namespace tbbbind
|
||||
} // namespace ov
|
||||
|
||||
OV_CC_DOMAINS(tbb_bind);
|
||||
|
||||
/*
|
||||
* TBB_BIND_SCOPE macro allows to disable parts of tbb_bind calling if they are not used.
|
||||
*/
|
||||
#if defined(SELECTIVE_BUILD_ANALYZER)
|
||||
|
||||
# define TBB_BIND_SCOPE(region) OV_SCOPE(tbb_bind, region)
|
||||
# define TBB_BIND_NUMA_ENABLED OV_SCOPE(tbb_bind, NUMA)
|
||||
|
||||
#elif defined(SELECTIVE_BUILD)
|
||||
|
||||
# define TBB_BIND_SCOPE(region) \
|
||||
if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(tbb_bind, _, NUMA)) == 1 && \
|
||||
OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(tbb_bind, _, region)) == 1)
|
||||
# define TBB_BIND_NUMA_ENABLED
|
||||
|
||||
#else
|
||||
|
||||
# define TBB_BIND_SCOPE(region)
|
||||
# define TBB_BIND_NUMA_ENABLED
|
||||
#endif
|
||||
|
@ -1,13 +1,13 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include "ie_parallel_custom_arena.hpp"
|
||||
#include "dev/threading/parallel_custom_arena.hpp"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "itt.hpp"
|
||||
#include "dev/threading/itt.hpp"
|
||||
|
||||
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
|
||||
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
|
||||
|
||||
# ifndef TBBBIND_2_5_AVAILABLE
|
||||
# define TBBBIND_2_5_AVAILABLE 0
|
||||
@ -329,4 +329,4 @@ int default_concurrency(numa_node_id id) {
|
||||
|
||||
} // namespace info
|
||||
} // namespace custom
|
||||
#endif /*IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO*/
|
||||
#endif /*OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO*/
|
141
src/inference/src/dev/threading/parallel_custom_arena.hpp
Normal file
141
src/inference/src/dev/threading/parallel_custom_arena.hpp
Normal file
@ -0,0 +1,141 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief Contains declarations and custom threading interfaces based on TBB info and task_arena APIs.
|
||||
*
|
||||
* @file dev/threading/parallel_custom_arena.hpp
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "openvino/core/parallel.hpp"
|
||||
|
||||
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
|
||||
|
||||
# include <cstddef>
|
||||
# include <memory>
|
||||
# include <mutex>
|
||||
# include <type_traits>
|
||||
# include <vector>
|
||||
|
||||
namespace custom {
|
||||
|
||||
using numa_node_id = int;
|
||||
using core_type_id = int;
|
||||
|
||||
namespace detail {
|
||||
struct constraints {
|
||||
constraints(numa_node_id id = tbb::task_arena::automatic, int maximal_concurrency = tbb::task_arena::automatic)
|
||||
: numa_id{id},
|
||||
max_concurrency{maximal_concurrency},
|
||||
core_type{tbb::task_arena::automatic},
|
||||
max_threads_per_core{tbb::task_arena::automatic} {}
|
||||
|
||||
constraints& set_numa_id(numa_node_id id) {
|
||||
numa_id = id;
|
||||
return *this;
|
||||
}
|
||||
constraints& set_max_concurrency(int maximal_concurrency) {
|
||||
max_concurrency = maximal_concurrency;
|
||||
return *this;
|
||||
}
|
||||
constraints& set_core_type(core_type_id id) {
|
||||
core_type = id;
|
||||
return *this;
|
||||
}
|
||||
constraints& set_max_threads_per_core(int threads_number) {
|
||||
max_threads_per_core = threads_number;
|
||||
return *this;
|
||||
}
|
||||
|
||||
numa_node_id numa_id = tbb::task_arena::automatic;
|
||||
int max_concurrency = tbb::task_arena::automatic;
|
||||
core_type_id core_type = tbb::task_arena::automatic;
|
||||
int max_threads_per_core = tbb::task_arena::automatic;
|
||||
};
|
||||
|
||||
class binding_handler;
|
||||
|
||||
class binding_observer : public tbb::task_scheduler_observer {
|
||||
binding_handler* my_binding_handler;
|
||||
|
||||
public:
|
||||
binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c);
|
||||
~binding_observer();
|
||||
|
||||
void on_scheduler_entry(bool) override;
|
||||
void on_scheduler_exit(bool) override;
|
||||
};
|
||||
|
||||
struct binding_observer_deleter {
|
||||
void operator()(binding_observer* observer) const {
|
||||
observer->observe(false);
|
||||
delete observer;
|
||||
}
|
||||
};
|
||||
|
||||
using binding_oberver_ptr = std::unique_ptr<binding_observer, binding_observer_deleter>;
|
||||
|
||||
} // namespace detail
|
||||
|
||||
class task_arena {
|
||||
tbb::task_arena my_task_arena;
|
||||
std::once_flag my_initialization_state;
|
||||
detail::constraints my_constraints;
|
||||
detail::binding_oberver_ptr my_binding_observer;
|
||||
|
||||
public:
|
||||
using constraints = detail::constraints;
|
||||
static const int automatic = tbb::task_arena::automatic;
|
||||
|
||||
task_arena(int max_concurrency_ = automatic, unsigned reserved_for_masters = 1);
|
||||
task_arena(const constraints& constraints_, unsigned reserved_for_masters = 1);
|
||||
task_arena(const task_arena& s);
|
||||
|
||||
void initialize();
|
||||
void initialize(int max_concurrency_, unsigned reserved_for_masters = 1);
|
||||
void initialize(constraints constraints_, unsigned reserved_for_masters = 1);
|
||||
|
||||
explicit operator tbb::task_arena&();
|
||||
|
||||
int max_concurrency();
|
||||
|
||||
template <typename F>
|
||||
void enqueue(F&& f) {
|
||||
initialize();
|
||||
my_task_arena.enqueue(std::forward<F>(f));
|
||||
}
|
||||
template <typename F>
|
||||
auto execute(F&& f) -> decltype(f()) {
|
||||
initialize();
|
||||
return my_task_arena.execute(std::forward<F>(f));
|
||||
}
|
||||
};
|
||||
|
||||
struct task_scheduler_observer : public tbb::task_scheduler_observer {
|
||||
task_scheduler_observer(custom::task_arena& arena)
|
||||
: tbb::task_scheduler_observer(static_cast<tbb::task_arena&>(arena)),
|
||||
my_arena(arena) {}
|
||||
|
||||
void observe(bool state = true) {
|
||||
if (state) {
|
||||
my_arena.initialize();
|
||||
}
|
||||
tbb::task_scheduler_observer::observe(state);
|
||||
}
|
||||
|
||||
custom::task_arena& my_arena;
|
||||
};
|
||||
|
||||
namespace info {
|
||||
std::vector<numa_node_id> numa_nodes();
|
||||
std::vector<core_type_id> core_types();
|
||||
|
||||
int default_concurrency(numa_node_id id = task_arena::automatic);
|
||||
int default_concurrency(task_arena::constraints c);
|
||||
} // namespace info
|
||||
} // namespace custom
|
||||
#endif /*(OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)*/
|
||||
|
119
src/inference/src/dev/threading/thread_affinity.cpp
Normal file
119
src/inference/src/dev/threading/thread_affinity.cpp
Normal file
@ -0,0 +1,119 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "dev/threading/thread_affinity.hpp"
|
||||
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
#include "openvino/runtime/system_conf.hpp"
|
||||
|
||||
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
||||
# include <sched.h>
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
namespace ov {
|
||||
namespace threading {
|
||||
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
||||
std::tuple<CpuSet, int> get_process_mask() {
|
||||
for (int ncpus = sizeof(cpu_set_t) / CHAR_BIT; ncpus < 32768 /* reasonable limit of #cores*/; ncpus <<= 1) {
|
||||
CpuSet mask{CPU_ALLOC(ncpus)};
|
||||
if (nullptr == mask)
|
||||
break;
|
||||
const size_t size = CPU_ALLOC_SIZE(ncpus);
|
||||
CPU_ZERO_S(size, mask.get());
|
||||
// the result fits the mask
|
||||
if (0 == sched_getaffinity(getpid(), size, mask.get())) {
|
||||
return std::make_tuple(std::move(mask), ncpus);
|
||||
}
|
||||
// other error
|
||||
if (errno != EINVAL)
|
||||
break;
|
||||
}
|
||||
return std::make_tuple(nullptr, 0);
|
||||
}
|
||||
|
||||
/* Release the cores affinity mask for the current process */
|
||||
void release_process_mask(cpu_set_t* mask) {
|
||||
if (nullptr != mask)
|
||||
CPU_FREE(mask);
|
||||
}
|
||||
|
||||
bool pin_current_thread_by_mask(int ncores, const CpuSet& procMask) {
|
||||
return 0 == sched_setaffinity(0, CPU_ALLOC_SIZE(ncores), procMask.get());
|
||||
}
|
||||
|
||||
bool pin_thread_to_vacant_core(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
|
||||
if (procMask == nullptr)
|
||||
return false;
|
||||
const size_t size = CPU_ALLOC_SIZE(ncores);
|
||||
const int num_cpus = CPU_COUNT_S(size, procMask.get());
|
||||
thrIdx %= num_cpus; // To limit unique number in [; num_cpus-1] range
|
||||
// Place threads with specified step
|
||||
int cpu_idx = cpuIdxOffset;
|
||||
for (int i = 0, offset = 0; i < thrIdx; ++i) {
|
||||
cpu_idx += hyperthreads;
|
||||
if (cpu_idx >= num_cpus)
|
||||
cpu_idx = ++offset;
|
||||
}
|
||||
|
||||
// Find index of 'cpu_idx'-th bit that equals to 1
|
||||
int mapped_idx = cpuIdxOffset - 1;
|
||||
while (cpu_idx >= cpuIdxOffset) {
|
||||
mapped_idx++;
|
||||
if (CPU_ISSET_S(mapped_idx, size, procMask.get()))
|
||||
--cpu_idx;
|
||||
}
|
||||
|
||||
CpuSet targetMask{CPU_ALLOC(ncores)};
|
||||
CPU_ZERO_S(size, targetMask.get());
|
||||
CPU_SET_S(mapped_idx, size, targetMask.get());
|
||||
bool res = pin_current_thread_by_mask(ncores, targetMask);
|
||||
return res;
|
||||
}
|
||||
|
||||
bool pin_current_thread_to_socket(int socket) {
|
||||
const int sockets = ov::get_available_numa_nodes().size();
|
||||
const int cores = ov::get_number_of_cpu_cores();
|
||||
const int cores_per_socket = cores / sockets;
|
||||
|
||||
int ncpus = 0;
|
||||
CpuSet mask;
|
||||
std::tie(mask, ncpus) = get_process_mask();
|
||||
CpuSet targetMask{CPU_ALLOC(ncpus)};
|
||||
const size_t size = CPU_ALLOC_SIZE(ncpus);
|
||||
CPU_ZERO_S(size, targetMask.get());
|
||||
|
||||
for (int core = socket * cores_per_socket; core < (socket + 1) * cores_per_socket; core++) {
|
||||
CPU_SET_S(core, size, targetMask.get());
|
||||
}
|
||||
// respect the user-defined mask for the entire process
|
||||
CPU_AND_S(size, targetMask.get(), targetMask.get(), mask.get());
|
||||
bool res = false;
|
||||
if (CPU_COUNT_S(size, targetMask.get())) { // if we have non-zero mask to set
|
||||
res = pin_current_thread_by_mask(ncpus, targetMask);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
#else // no threads pinning/binding on Win/MacOS
|
||||
std::tuple<CpuSet, int> get_process_mask() {
|
||||
return std::make_tuple(nullptr, 0);
|
||||
}
|
||||
void release_process_mask(cpu_set_t*) {}
|
||||
|
||||
bool pin_thread_to_vacant_core(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
|
||||
return false;
|
||||
}
|
||||
bool pin_current_thread_by_mask(int ncores, const CpuSet& procMask) {
|
||||
return false;
|
||||
}
|
||||
bool pin_current_thread_to_socket(int socket) {
|
||||
return false;
|
||||
}
|
||||
#endif // !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
||||
} // namespace threading
|
||||
} // namespace ov
|
93
src/inference/src/dev/threading/thread_affinity.hpp
Normal file
93
src/inference/src/dev/threading/thread_affinity.hpp
Normal file
@ -0,0 +1,93 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
|
||||
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
||||
# include <sched.h>
|
||||
#endif
|
||||
|
||||
namespace ov {
|
||||
namespace threading {
|
||||
|
||||
#if (defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
||||
using cpu_set_t = void;
|
||||
#endif // (defined(__APPLE__) || defined(_WIN32))
|
||||
|
||||
/**
|
||||
* @brief Release the cores affinity mask for the current process
|
||||
* @ingroup ov_dev_api_threading
|
||||
*
|
||||
* @param mask The mask
|
||||
*/
|
||||
void release_process_mask(cpu_set_t* mask);
|
||||
|
||||
/**
|
||||
* @brief Deleter for process mask
|
||||
* @ingroup ov_dev_api_threading
|
||||
*/
|
||||
struct ReleaseProcessMaskDeleter {
|
||||
/**
|
||||
* @brief A callable operator to release object
|
||||
*
|
||||
* @param mask The mask to release
|
||||
*/
|
||||
void operator()(cpu_set_t* mask) const {
|
||||
release_process_mask(mask);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A unique pointer to CPU set structure with the ReleaseProcessMaskDeleter deleter
|
||||
* @ingroup ov_dev_api_threading
|
||||
*/
|
||||
using CpuSet = std::unique_ptr<cpu_set_t, ReleaseProcessMaskDeleter>;
|
||||
|
||||
/**
|
||||
* @brief Get the cores affinity mask for the current process
|
||||
* @ingroup ov_dev_api_threading
|
||||
* @return A core affinity mask
|
||||
*/
|
||||
std::tuple<CpuSet, int> get_process_mask();
|
||||
|
||||
/**
|
||||
* @brief Pins current thread to a set of cores determined by the mask
|
||||
* @ingroup ov_dev_api_threading
|
||||
*
|
||||
* @param[in] thrIdx The thr index
|
||||
* @param[in] hyperThreads The hyper threads
|
||||
* @param[in] ncores The ncores
|
||||
* @param[in] processMask The process mask
|
||||
* @return `True` in case of success, `false` otherwise
|
||||
*/
|
||||
bool pin_thread_to_vacant_core(int thrIdx,
|
||||
int hyperThreads,
|
||||
int ncores,
|
||||
const CpuSet& processMask,
|
||||
int cpuIdxOffset = 0);
|
||||
|
||||
/**
|
||||
* @brief Pins thread to a spare core in the round-robin scheme, while respecting the given process mask.
|
||||
* The function can also handle the hyper-threading (by populating the physical cores first)
|
||||
* @ingroup ov_dev_api_threading
|
||||
*
|
||||
* @param[in] ncores The ncores
|
||||
* @param[in] processMask The process mask
|
||||
* @return `True` in case of success, `false` otherwise
|
||||
*/
|
||||
bool pin_current_thread_by_mask(int ncores, const CpuSet& processMask);
|
||||
|
||||
/**
|
||||
* @brief Pins a current thread to a socket.
|
||||
* @ingroup ov_dev_api_threading
|
||||
*
|
||||
* @param[in] socket The socket id
|
||||
* @return `True` in case of success, `false` otherwise
|
||||
*/
|
||||
bool pin_current_thread_to_socket(int socket);
|
||||
} // namespace threading
|
||||
} // namespace ov
|
@ -13,10 +13,10 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ie_common.h"
|
||||
#include "ie_system_conf.h"
|
||||
#include "dev/threading/parallel_custom_arena.hpp"
|
||||
#include "openvino/core/except.hpp"
|
||||
#include "openvino/runtime/system_conf.hpp"
|
||||
#include "streams_executor.hpp"
|
||||
#include "threading/ie_parallel_custom_arena.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
||||
@ -242,7 +242,7 @@ void parse_processor_info_linux(const int _processors,
|
||||
}
|
||||
};
|
||||
|
||||
#if !((IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO))
|
||||
#if !((OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO))
|
||||
std::vector<int> get_available_numa_nodes() {
|
||||
std::vector<int> nodes((0 == cpu._sockets) ? 1 : cpu._sockets);
|
||||
std::iota(std::begin(nodes), std::end(nodes), 0);
|
||||
@ -252,7 +252,7 @@ std::vector<int> get_available_numa_nodes() {
|
||||
int get_number_of_cpu_cores(bool bigCoresOnly) {
|
||||
unsigned numberOfProcessors = cpu._processors;
|
||||
unsigned totalNumberOfCpuCores = cpu._cores;
|
||||
IE_ASSERT(totalNumberOfCpuCores != 0);
|
||||
OPENVINO_ASSERT(totalNumberOfCpuCores != 0);
|
||||
cpu_set_t usedCoreSet, currentCoreSet, currentCpuSet;
|
||||
CPU_ZERO(¤tCpuSet);
|
||||
CPU_ZERO(&usedCoreSet);
|
||||
@ -270,7 +270,7 @@ int get_number_of_cpu_cores(bool bigCoresOnly) {
|
||||
}
|
||||
}
|
||||
int phys_cores = CPU_COUNT(¤tCoreSet);
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
|
||||
auto core_types = custom::info::core_types();
|
||||
if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ {
|
||||
phys_cores = custom::info::default_concurrency(
|
||||
|
@ -11,9 +11,9 @@
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "dev/threading/parallel_custom_arena.hpp"
|
||||
#include "openvino/runtime/system_conf.hpp"
|
||||
#include "streams_executor.hpp"
|
||||
#include "threading/ie_parallel_custom_arena.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
||||
@ -189,7 +189,7 @@ int get_number_of_cpu_cores(bool bigCoresOnly) {
|
||||
phys_cores++;
|
||||
} while (offset < sz);
|
||||
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
|
||||
auto core_types = custom::info::core_types();
|
||||
if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ {
|
||||
phys_cores = custom::info::default_concurrency(
|
||||
@ -199,7 +199,7 @@ int get_number_of_cpu_cores(bool bigCoresOnly) {
|
||||
return phys_cores;
|
||||
}
|
||||
|
||||
#if !(IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
#if !(OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
|
||||
// OMP/SEQ threading on the Windows doesn't support NUMA
|
||||
std::vector<int> get_available_numa_nodes() {
|
||||
return {-1};
|
||||
|
@ -8,8 +8,8 @@
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#include "dev/threading/parallel_custom_arena.hpp"
|
||||
#include "openvino/core/visibility.hpp"
|
||||
#include "threading/ie_parallel_custom_arena.hpp"
|
||||
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#define XBYAK_UNDEF_JNL
|
||||
@ -147,7 +147,7 @@ bool check_open_mp_env_vars(bool include_omp_num_threads) {
|
||||
int get_number_of_cpu_cores(bool) {
|
||||
return parallel_get_max_threads();
|
||||
}
|
||||
# if !((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
|
||||
# if !((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO))
|
||||
std::vector<int> get_available_numa_nodes() {
|
||||
return {-1};
|
||||
}
|
||||
@ -158,7 +158,7 @@ int get_number_of_logical_cpu_cores(bool) {
|
||||
#else
|
||||
int get_number_of_logical_cpu_cores(bool bigCoresOnly) {
|
||||
int logical_cores = parallel_get_max_threads();
|
||||
# if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
# if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
|
||||
auto core_types = custom::info::core_types();
|
||||
if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ {
|
||||
logical_cores = custom::info::default_concurrency(
|
||||
@ -169,7 +169,7 @@ int get_number_of_logical_cpu_cores(bool bigCoresOnly) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
|
||||
#if ((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO))
|
||||
std::vector<int> get_available_numa_nodes() {
|
||||
return custom::info::numa_nodes();
|
||||
}
|
||||
|
@ -10,132 +10,4 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ie_parallel.hpp"
|
||||
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
namespace custom {
|
||||
|
||||
using numa_node_id = int;
|
||||
using core_type_id = int;
|
||||
|
||||
namespace detail {
|
||||
struct constraints {
|
||||
constraints(numa_node_id id = tbb::task_arena::automatic, int maximal_concurrency = tbb::task_arena::automatic)
|
||||
: numa_id{id}
|
||||
, max_concurrency{maximal_concurrency}
|
||||
, core_type{tbb::task_arena::automatic}
|
||||
, max_threads_per_core{tbb::task_arena::automatic}
|
||||
{}
|
||||
|
||||
constraints& set_numa_id(numa_node_id id) {
|
||||
numa_id = id;
|
||||
return *this;
|
||||
}
|
||||
constraints& set_max_concurrency(int maximal_concurrency) {
|
||||
max_concurrency = maximal_concurrency;
|
||||
return *this;
|
||||
}
|
||||
constraints& set_core_type(core_type_id id) {
|
||||
core_type = id;
|
||||
return *this;
|
||||
}
|
||||
constraints& set_max_threads_per_core(int threads_number) {
|
||||
max_threads_per_core = threads_number;
|
||||
return *this;
|
||||
}
|
||||
|
||||
numa_node_id numa_id = tbb::task_arena::automatic;
|
||||
int max_concurrency = tbb::task_arena::automatic;
|
||||
core_type_id core_type = tbb::task_arena::automatic;
|
||||
int max_threads_per_core = tbb::task_arena::automatic;
|
||||
};
|
||||
|
||||
class binding_handler;
|
||||
|
||||
class binding_observer : public tbb::task_scheduler_observer {
|
||||
binding_handler* my_binding_handler;
|
||||
public:
|
||||
binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c);
|
||||
~binding_observer();
|
||||
|
||||
void on_scheduler_entry(bool) override;
|
||||
void on_scheduler_exit(bool) override;
|
||||
};
|
||||
|
||||
struct binding_observer_deleter {
|
||||
void operator()(binding_observer* observer) const {
|
||||
observer->observe(false);
|
||||
delete observer;
|
||||
}
|
||||
};
|
||||
|
||||
using binding_oberver_ptr = std::unique_ptr<binding_observer, binding_observer_deleter>;
|
||||
|
||||
} // namespace detail
|
||||
|
||||
class task_arena {
|
||||
tbb::task_arena my_task_arena;
|
||||
std::once_flag my_initialization_state;
|
||||
detail::constraints my_constraints;
|
||||
detail::binding_oberver_ptr my_binding_observer;
|
||||
|
||||
public:
|
||||
using constraints = detail::constraints;
|
||||
static const int automatic = tbb::task_arena::automatic;
|
||||
|
||||
task_arena(int max_concurrency_ = automatic, unsigned reserved_for_masters = 1);
|
||||
task_arena(const constraints& constraints_, unsigned reserved_for_masters = 1);
|
||||
task_arena(const task_arena &s);
|
||||
|
||||
void initialize();
|
||||
void initialize(int max_concurrency_, unsigned reserved_for_masters = 1);
|
||||
void initialize(constraints constraints_, unsigned reserved_for_masters = 1);
|
||||
|
||||
explicit operator tbb::task_arena&();
|
||||
|
||||
int max_concurrency();
|
||||
|
||||
template<typename F>
|
||||
void enqueue(F&& f) {
|
||||
initialize();
|
||||
my_task_arena.enqueue(std::forward<F>(f));
|
||||
}
|
||||
template<typename F>
|
||||
auto execute(F&& f) -> decltype(f()) {
|
||||
initialize();
|
||||
return my_task_arena.execute(std::forward<F>(f));
|
||||
}
|
||||
};
|
||||
|
||||
struct task_scheduler_observer: public tbb::task_scheduler_observer {
|
||||
task_scheduler_observer(custom::task_arena& arena) :
|
||||
tbb::task_scheduler_observer(static_cast<tbb::task_arena&>(arena)),
|
||||
my_arena(arena)
|
||||
{}
|
||||
|
||||
void observe(bool state = true) {
|
||||
if (state) {
|
||||
my_arena.initialize();
|
||||
}
|
||||
tbb::task_scheduler_observer::observe(state);
|
||||
}
|
||||
|
||||
custom::task_arena& my_arena;
|
||||
};
|
||||
|
||||
namespace info {
|
||||
std::vector<numa_node_id> numa_nodes();
|
||||
std::vector<core_type_id> core_types();
|
||||
|
||||
int default_concurrency(numa_node_id id = task_arena::automatic);
|
||||
int default_concurrency(task_arena::constraints c);
|
||||
} // namespace info
|
||||
} // namespace custom
|
||||
#endif /*(IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)*/
|
||||
#include "dev/threading/parallel_custom_arena.hpp"
|
||||
|
@ -4,114 +4,26 @@
|
||||
|
||||
#include "threading/ie_thread_affinity.hpp"
|
||||
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
#include "ie_system_conf.h"
|
||||
|
||||
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
||||
# include <sched.h>
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
#include "dev/threading/thread_affinity.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
||||
|
||||
std::tuple<CpuSet, int> GetProcessMask() {
|
||||
for (int ncpus = sizeof(cpu_set_t) / CHAR_BIT; ncpus < 32768 /* reasonable limit of #cores*/; ncpus <<= 1) {
|
||||
CpuSet mask{CPU_ALLOC(ncpus)};
|
||||
if (nullptr == mask)
|
||||
break;
|
||||
const size_t size = CPU_ALLOC_SIZE(ncpus);
|
||||
CPU_ZERO_S(size, mask.get());
|
||||
// the result fits the mask
|
||||
if (0 == sched_getaffinity(getpid(), size, mask.get())) {
|
||||
return std::make_tuple(std::move(mask), ncpus);
|
||||
}
|
||||
// other error
|
||||
if (errno != EINVAL)
|
||||
break;
|
||||
}
|
||||
return std::make_tuple(nullptr, 0);
|
||||
return ov::threading::get_process_mask();
|
||||
}
|
||||
|
||||
/* Release the cores affinity mask for the current process */
|
||||
void ReleaseProcessMask(cpu_set_t* mask) {
|
||||
if (nullptr != mask)
|
||||
CPU_FREE(mask);
|
||||
}
|
||||
|
||||
bool PinCurrentThreadByMask(int ncores, const CpuSet& procMask) {
|
||||
return 0 == sched_setaffinity(0, CPU_ALLOC_SIZE(ncores), procMask.get());
|
||||
ov::threading::release_process_mask(mask);
|
||||
}
|
||||
|
||||
bool PinThreadToVacantCore(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
|
||||
if (procMask == nullptr)
|
||||
return false;
|
||||
const size_t size = CPU_ALLOC_SIZE(ncores);
|
||||
const int num_cpus = CPU_COUNT_S(size, procMask.get());
|
||||
thrIdx %= num_cpus; // To limit unique number in [; num_cpus-1] range
|
||||
// Place threads with specified step
|
||||
int cpu_idx = cpuIdxOffset;
|
||||
for (int i = 0, offset = 0; i < thrIdx; ++i) {
|
||||
cpu_idx += hyperthreads;
|
||||
if (cpu_idx >= num_cpus)
|
||||
cpu_idx = ++offset;
|
||||
}
|
||||
|
||||
// Find index of 'cpu_idx'-th bit that equals to 1
|
||||
int mapped_idx = cpuIdxOffset - 1;
|
||||
while (cpu_idx >= cpuIdxOffset) {
|
||||
mapped_idx++;
|
||||
if (CPU_ISSET_S(mapped_idx, size, procMask.get()))
|
||||
--cpu_idx;
|
||||
}
|
||||
|
||||
CpuSet targetMask{CPU_ALLOC(ncores)};
|
||||
CPU_ZERO_S(size, targetMask.get());
|
||||
CPU_SET_S(mapped_idx, size, targetMask.get());
|
||||
bool res = PinCurrentThreadByMask(ncores, targetMask);
|
||||
return res;
|
||||
}
|
||||
|
||||
bool PinCurrentThreadToSocket(int socket) {
|
||||
const int sockets = InferenceEngine::getAvailableNUMANodes().size();
|
||||
const int cores = InferenceEngine::getNumberOfCPUCores();
|
||||
const int cores_per_socket = cores / sockets;
|
||||
|
||||
int ncpus = 0;
|
||||
CpuSet mask;
|
||||
std::tie(mask, ncpus) = GetProcessMask();
|
||||
CpuSet targetMask{CPU_ALLOC(ncpus)};
|
||||
const size_t size = CPU_ALLOC_SIZE(ncpus);
|
||||
CPU_ZERO_S(size, targetMask.get());
|
||||
|
||||
for (int core = socket * cores_per_socket; core < (socket + 1) * cores_per_socket; core++) {
|
||||
CPU_SET_S(core, size, targetMask.get());
|
||||
}
|
||||
// respect the user-defined mask for the entire process
|
||||
CPU_AND_S(size, targetMask.get(), targetMask.get(), mask.get());
|
||||
bool res = false;
|
||||
if (CPU_COUNT_S(size, targetMask.get())) { // if we have non-zero mask to set
|
||||
res = PinCurrentThreadByMask(ncpus, targetMask);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
#else // no threads pinning/binding on Win/MacOS
|
||||
std::tuple<CpuSet, int> GetProcessMask() {
|
||||
return std::make_tuple(nullptr, 0);
|
||||
}
|
||||
void ReleaseProcessMask(cpu_set_t*) {}
|
||||
|
||||
bool PinThreadToVacantCore(int thrIdx, int hyperthreads, int ncores, const CpuSet& procMask, int cpuIdxOffset) {
|
||||
return false;
|
||||
return ov::threading::pin_thread_to_vacant_core(thrIdx, hyperthreads, ncores, procMask, cpuIdxOffset);
|
||||
}
|
||||
bool PinCurrentThreadByMask(int ncores, const CpuSet& procMask) {
|
||||
return false;
|
||||
return ov::threading::pin_current_thread_by_mask(ncores, procMask);
|
||||
}
|
||||
bool PinCurrentThreadToSocket(int socket) {
|
||||
return false;
|
||||
return ov::threading::pin_current_thread_to_socket(socket);
|
||||
}
|
||||
#endif // !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
|
||||
#include "ie_api.h"
|
||||
#include "dev/threading/thread_affinity.hpp"
|
||||
|
||||
#if !(defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
||||
# include <sched.h>
|
||||
@ -15,7 +15,7 @@
|
||||
|
||||
namespace InferenceEngine {
|
||||
#if (defined(__APPLE__) || defined(__EMSCRIPTEN__) || defined(_WIN32))
|
||||
using cpu_set_t = void;
|
||||
using cpu_set_t = ov::threading::cpu_set_t;
|
||||
#endif // (defined(__APPLE__) || defined(_WIN32))
|
||||
|
||||
/**
|
||||
@ -26,26 +26,9 @@ using cpu_set_t = void;
|
||||
*/
|
||||
void ReleaseProcessMask(cpu_set_t* mask);
|
||||
|
||||
/**
|
||||
* @brief Deleter for process mask
|
||||
* @ingroup ie_dev_api_threading
|
||||
*/
|
||||
struct ReleaseProcessMaskDeleter {
|
||||
/**
|
||||
* @brief A callable operator to release object
|
||||
*
|
||||
* @param mask The mask to release
|
||||
*/
|
||||
void operator()(cpu_set_t* mask) const {
|
||||
ReleaseProcessMask(mask);
|
||||
}
|
||||
};
|
||||
using ReleaseProcessMaskDeleter = ov::threading::ReleaseProcessMaskDeleter;
|
||||
|
||||
/**
|
||||
* @brief A unique pointer to CPU set structure with the ReleaseProcessMaskDeleter deleter
|
||||
* @ingroup ie_dev_api_threading
|
||||
*/
|
||||
using CpuSet = std::unique_ptr<cpu_set_t, ReleaseProcessMaskDeleter>;
|
||||
using CpuSet = ov::threading::CpuSet;
|
||||
|
||||
/**
|
||||
* @brief Get the cores affinity mask for the current process
|
||||
|
@ -9,41 +9,4 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <openvino/cc/selective_build.h>
|
||||
|
||||
#include <openvino/itt.hpp>
|
||||
|
||||
#include "openvino/core/except.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace tbbbind {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(tbb_bind);
|
||||
} // namespace domains
|
||||
} // namespace itt
|
||||
} // namespace tbbbind
|
||||
} // namespace ov
|
||||
|
||||
OV_CC_DOMAINS(tbb_bind);
|
||||
|
||||
/*
|
||||
* TBB_BIND_SCOPE macro allows to disable parts of tbb_bind calling if they are not used.
|
||||
*/
|
||||
#if defined(SELECTIVE_BUILD_ANALYZER)
|
||||
|
||||
# define TBB_BIND_SCOPE(region) OV_SCOPE(tbb_bind, region)
|
||||
# define TBB_BIND_NUMA_ENABLED OV_SCOPE(tbb_bind, NUMA)
|
||||
|
||||
#elif defined(SELECTIVE_BUILD)
|
||||
|
||||
# define TBB_BIND_SCOPE(region) \
|
||||
if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(tbb_bind, _, NUMA)) == 1 && \
|
||||
OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(tbb_bind, _, region)) == 1)
|
||||
# define TBB_BIND_NUMA_ENABLED
|
||||
|
||||
#else
|
||||
|
||||
# define TBB_BIND_SCOPE(region)
|
||||
# define TBB_BIND_NUMA_ENABLED
|
||||
#endif
|
||||
#include "dev/threading/itt.hpp"
|
||||
|
Loading…
Reference in New Issue
Block a user