From e33de350633eed464e34424fa4769cfaebab958d Mon Sep 17 00:00:00 2001 From: Fang Xu Date: Fri, 11 Aug 2023 19:42:38 +0530 Subject: [PATCH] [CPU] Add max_threads_per_core setting into StreamExecutor (#18857) --- .../cpu_streams_executor_internal.hpp | 4 +- .../dev/threading/cpu_streams_executor.cpp | 29 +++- .../cpu_streams_executor_internal.cpp | 70 +++++--- src/inference/src/system_conf.cpp | 8 + src/inference/tests/unit/cpu_reserve_test.cpp | 159 +++++++++++++++++- .../tests/unit/cpu_stream_info_test.cpp | 42 ++++- 6 files changed, 267 insertions(+), 45 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/threading/cpu_streams_executor_internal.hpp b/src/inference/dev_api/openvino/runtime/threading/cpu_streams_executor_internal.hpp index e03388d34d5..c85e9bc3158 100644 --- a/src/inference/dev_api/openvino/runtime/threading/cpu_streams_executor_internal.hpp +++ b/src/inference/dev_api/openvino/runtime/threading/cpu_streams_executor_internal.hpp @@ -32,6 +32,7 @@ enum StreamCreateType { * @param[out] concurrency the number of threads created at the same time * @param[out] core_type core type * @param[out] numa_node_id numa node id + * @param[out] max_threads_per_core the max number of threads per cpu core */ void get_cur_stream_info(const int stream_id, const bool cpu_reservation, @@ -40,7 +41,8 @@ void get_cur_stream_info(const int stream_id, StreamCreateType& stream_type, int& concurrency, int& core_type, - int& numa_node_id); + int& numa_node_id, + int& max_threads_per_core); /** * @brief Reserve cpu resource by streams info diff --git a/src/inference/src/dev/threading/cpu_streams_executor.cpp b/src/inference/src/dev/threading/cpu_streams_executor.cpp index d43d3b1bba4..dba0082d647 100644 --- a/src/inference/src/dev/threading/cpu_streams_executor.cpp +++ b/src/inference/src/dev/threading/cpu_streams_executor.cpp @@ -135,19 +135,27 @@ struct CPUStreamsExecutor::Impl { const StreamCreateType stream_type, const int concurrency, const int core_type, - const int numa_node_id) { + const int numa_node_id, + const int max_threads_per_core) { _numaNodeId = std::max(0, numa_node_id); _socketId = get_socket_by_numa_node(_numaNodeId); if (stream_type == STREAM_WITHOUT_PARAM) { - _taskArena.reset(new custom::task_arena{concurrency}); + _taskArena.reset(new custom::task_arena{custom::task_arena::constraints{} + .set_max_concurrency(concurrency) + .set_max_threads_per_core(max_threads_per_core)}); } else if (stream_type == STREAM_WITH_NUMA_ID) { - _taskArena.reset(new custom::task_arena{custom::task_arena::constraints{_numaNodeId, concurrency}}); + _taskArena.reset(new custom::task_arena{custom::task_arena::constraints{} + .set_numa_id(_numaNodeId) + .set_max_concurrency(concurrency) + .set_max_threads_per_core(max_threads_per_core)}); } else if (stream_type == STREAM_WITH_CORE_TYPE) { const auto real_core_type = (core_type == MAIN_CORE_PROC || core_type == HYPER_THREADING_PROC) ? custom::info::core_types().back() : custom::info::core_types().front(); - _taskArena.reset(new custom::task_arena{ - custom::task_arena::constraints{}.set_core_type(real_core_type).set_max_concurrency(concurrency)}); + _taskArena.reset(new custom::task_arena{custom::task_arena::constraints{} + .set_core_type(real_core_type) + .set_max_concurrency(concurrency) + .set_max_threads_per_core(max_threads_per_core)}); } else { _taskArena.reset(new custom::task_arena{concurrency}); _cpu_ids = static_cast(_impl->_config._stream_processor_ids.size()) == _impl->_config._streams @@ -176,6 +184,7 @@ struct CPUStreamsExecutor::Impl { int concurrency; int cpu_core_type; int numa_node_id; + int max_threads_per_core; StreamCreateType stream_type; const auto org_proc_type_table = get_org_proc_type_table(); const auto stream_id = _streamId >= _impl->_config._streams ? _impl->_config._streams - 1 : _streamId; @@ -187,11 +196,17 @@ struct CPUStreamsExecutor::Impl { stream_type, concurrency, cpu_core_type, - numa_node_id); + numa_node_id, + max_threads_per_core); if (concurrency <= 0) { return; } - create_tbb_task_arena(stream_id, stream_type, concurrency, cpu_core_type, numa_node_id); + create_tbb_task_arena(stream_id, + stream_type, + concurrency, + cpu_core_type, + numa_node_id, + max_threads_per_core); } void init_stream_legacy() { diff --git a/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp b/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp index 44e2df4c53f..c05509e225c 100644 --- a/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp +++ b/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp @@ -20,7 +20,8 @@ void get_cur_stream_info(const int stream_id, StreamCreateType& stream_type, int& concurrency, int& core_type, - int& numa_node_id) { + int& numa_node_id, + int& max_threads_per_core) { int stream_total = 0; size_t stream_info_id = 0; bool cpu_reserve = cpu_reservation; @@ -34,6 +35,21 @@ void get_cur_stream_info(const int stream_id, concurrency = streams_info_table[stream_info_id][THREADS_PER_STREAM]; core_type = streams_info_table[stream_info_id][PROC_TYPE]; numa_node_id = streams_info_table[stream_info_id][STREAM_NUMA_NODE_ID]; + max_threads_per_core = 1; + if (core_type == ALL_PROC) { + for (size_t i = stream_info_id + 1; i < streams_info_table.size(); i++) { + if (streams_info_table[i][NUMBER_OF_STREAMS] == 0) { + if (streams_info_table[i][PROC_TYPE] == HYPER_THREADING_PROC) { + max_threads_per_core = 2; + break; + } + } else { + break; + } + } + } else if (core_type == HYPER_THREADING_PROC) { + max_threads_per_core = 2; + } #if defined(_WIN32) || defined(__APPLE__) cpu_reserve = false; @@ -56,52 +72,58 @@ void reserve_cpu_by_streams_info(const std::vector> _streams_in std::vector>& _proc_type_table, std::vector>& _stream_processors, const int _cpu_status) { - std::vector> streams_info_table = _streams_info_table; + std::vector> streams_table; std::vector> stream_conditions; std::vector stream_pos; std::vector stream_num; int num_streams = 0; + int num_conditions = 0; + int condition_idx = 0; + bool last_all_proc = false; - stream_pos.assign(_streams_info_table.size(), 0); - stream_num.assign(_streams_info_table.size(), 0); for (size_t i = 0; i < _streams_info_table.size(); i++) { - stream_pos[i] = num_streams; + if (_streams_info_table[i][NUMBER_OF_STREAMS] > 0) { + stream_pos.push_back(num_streams); + } num_streams += _streams_info_table[i][NUMBER_OF_STREAMS]; } + num_conditions = static_cast(stream_pos.size()); _stream_processors.assign(num_streams, std::vector()); - stream_conditions.assign(_streams_info_table.size(), std::vector()); + stream_conditions.assign(num_conditions, std::vector()); + stream_num.assign(num_conditions, 0); + for (size_t i = 0; i < _streams_info_table.size(); i++) { std::vector proc_types; std::vector numa_nodes; std::vector sockets; - if (_streams_info_table[i][PROC_TYPE] > ALL_PROC && _streams_info_table[i][NUMBER_OF_STREAMS] > 0) { + if (_streams_info_table[i][NUMBER_OF_STREAMS] > 0) { + streams_table.push_back(_streams_info_table[i]); + } + if (last_all_proc && _streams_info_table[i][NUMBER_OF_STREAMS] > 0) { + last_all_proc = false; + condition_idx++; + } + if (_streams_info_table[i][PROC_TYPE] > ALL_PROC) { proc_types.push_back(std::to_string(_streams_info_table[i][PROC_TYPE])); - } - if (num_streams == 1 && _streams_info_table[0][PROC_TYPE] == MAIN_CORE_PROC && - _streams_info_table[0][THREADS_PER_STREAM] > _proc_type_table[0][MAIN_CORE_PROC]) { - proc_types.push_back(std::to_string(HYPER_THREADING_PROC)); - } - if (_streams_info_table[i][STREAM_NUMA_NODE_ID] < 0) { - for (int j = 0; j < _numa_nodes; j++) { - numa_nodes.push_back(std::to_string(j)); - } } else { + last_all_proc = true; + } + if (_streams_info_table[i][STREAM_NUMA_NODE_ID] >= 0) { numa_nodes.push_back(std::to_string(_streams_info_table[i][STREAM_NUMA_NODE_ID])); } - if (_streams_info_table[i][STREAM_SOCKET_ID] < 0) { - for (int j = 0; j < _numa_nodes; j++) { - sockets.push_back(std::to_string(j)); - } - } else { + if (_streams_info_table[i][STREAM_SOCKET_ID] >= 0) { sockets.push_back(std::to_string(_streams_info_table[i][STREAM_SOCKET_ID])); } for (auto t : proc_types) { for (auto n : numa_nodes) { for (auto s : sockets) { - stream_conditions[i].push_back(t + n + s); + stream_conditions[condition_idx].push_back(t + n + s); } } } + if (_streams_info_table[i][PROC_TYPE] > ALL_PROC && _streams_info_table[i][NUMBER_OF_STREAMS] > 0) { + condition_idx++; + } } for (size_t i = 0; i < _cpu_mapping_table.size(); i++) { @@ -114,11 +136,11 @@ void reserve_cpu_by_streams_info(const std::vector> _streams_in _stream_processors[stream_pos[j]].push_back(_cpu_mapping_table[i][CPU_MAP_PROCESSOR_ID]); _cpu_mapping_table[i][CPU_MAP_USED_FLAG] = _cpu_status; if (static_cast(_stream_processors[stream_pos[j]].size()) == - _streams_info_table[j][THREADS_PER_STREAM]) { + streams_table[j][THREADS_PER_STREAM]) { stream_pos[j]++; stream_num[j]++; } - if (stream_num[j] >= _streams_info_table[j][NUMBER_OF_STREAMS]) { + if (stream_num[j] >= streams_table[j][NUMBER_OF_STREAMS]) { stream_conditions[j].clear(); } break; diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp index fcd018aa467..72437e758b7 100644 --- a/src/inference/src/system_conf.cpp +++ b/src/inference/src/system_conf.cpp @@ -369,6 +369,14 @@ void reserve_available_cpus(const std::vector> streams_info_tab << streams_info_table[i][THREADS_PER_STREAM] << " " << streams_info_table[i][STREAM_NUMA_NODE_ID] << " " << streams_info_table[i][STREAM_SOCKET_ID]; } + OPENVINO_DEBUG << "[ threading ] stream_processors:"; + for (size_t i = 0; i < stream_processors.size(); i++) { + OPENVINO_DEBUG << "{"; + for (size_t j = 0; j < stream_processors[i].size(); j++) { + OPENVINO_DEBUG << stream_processors[i][j] << ","; + } + OPENVINO_DEBUG << "},"; + } } void set_cpu_used(const std::vector& cpu_ids, const int used) { diff --git a/src/inference/tests/unit/cpu_reserve_test.cpp b/src/inference/tests/unit/cpu_reserve_test.cpp index 523bb792b6d..7a5427d777b 100644 --- a/src/inference/tests/unit/cpu_reserve_test.cpp +++ b/src/inference/tests/unit/cpu_reserve_test.cpp @@ -199,7 +199,11 @@ LinuxCpuReserveTestCase _2sockets_72cores_hyper_7streams = { {68, 1, 1, 68, MAIN_CORE_PROC, 68, -1}, {69, 1, 1, 69, MAIN_CORE_PROC, 69, -1}, {70, 1, 1, 70, MAIN_CORE_PROC, 70, -1}, {71, 1, 1, 71, MAIN_CORE_PROC, 71, -1}, }, - {{3, MAIN_CORE_PROC, 5, 0, 0}, {3, MAIN_CORE_PROC, 5, 1, 1}, {1, MAIN_CORE_PROC, 5, -1, -1}}, + {{3, MAIN_CORE_PROC, 5, 0, 0}, + {3, MAIN_CORE_PROC, 5, 1, 1}, + {1, ALL_PROC, 5, -1, -1}, + {0, MAIN_CORE_PROC, 3, 0, 0}, + {0, MAIN_CORE_PROC, 2, 1, 1}}, { {36, 37, 38, 39, 40}, {41, 42, 43, 44, 45}, @@ -308,7 +312,11 @@ LinuxCpuReserveTestCase _2sockets_72cores_hyper_9streams = { {68, 1, 1, 68, MAIN_CORE_PROC, 32, -1}, {69, 1, 1, 69, MAIN_CORE_PROC, 33, -1}, {70, 1, 1, 70, MAIN_CORE_PROC, 34, -1}, {71, 1, 1, 71, MAIN_CORE_PROC, 35, -1}, }, - {{4, MAIN_CORE_PROC, 4, 0, 0}, {4, MAIN_CORE_PROC, 4, 1, 1}, {1, MAIN_CORE_PROC, 4, -1, -1}}, + {{4, MAIN_CORE_PROC, 4, 0, 0}, + {4, MAIN_CORE_PROC, 4, 1, 1}, + {1, ALL_PROC, 4, -1, -1}, + {0, MAIN_CORE_PROC, 2, 0, 0}, + {0, MAIN_CORE_PROC, 2, 1, 1}}, { {36, 37, 38, 39}, {40, 41, 42, 43}, @@ -477,6 +485,142 @@ LinuxCpuReserveTestCase _2sockets_72cores_hyper_3streams_plugin_reserve = { }, CPU_USED, }; +LinuxCpuReserveTestCase _2sockets_104cores_2streams = { + 208, + 2, + {{208, 104, 0, 104, -1, -1}, + {52, 26, 0, 26, 0, 0}, + {52, 26, 0, 26, 1, 0}, + {52, 26, 0, 26, 2, 1}, + {52, 26, 0, 26, 3, 1}}, + { + {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, + {2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, {3, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, + {4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {5, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, + {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, {7, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, + {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {9, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, + {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 0, 0, 5, MAIN_CORE_PROC, 5, -1}, + {12, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {13, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, + {14, 0, 0, 7, HYPER_THREADING_PROC, 7, -1}, {15, 0, 0, 7, MAIN_CORE_PROC, 7, -1}, + {16, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {17, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, + {18, 0, 0, 9, HYPER_THREADING_PROC, 9, -1}, {19, 0, 0, 9, MAIN_CORE_PROC, 9, -1}, + {20, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {21, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, + {22, 0, 0, 11, HYPER_THREADING_PROC, 11, -1}, {23, 0, 0, 11, MAIN_CORE_PROC, 11, -1}, + {24, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {25, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, + {26, 1, 0, 13, HYPER_THREADING_PROC, 13, -1}, {27, 1, 0, 13, MAIN_CORE_PROC, 13, -1}, + {28, 1, 0, 14, HYPER_THREADING_PROC, 14, -1}, {29, 1, 0, 14, MAIN_CORE_PROC, 14, -1}, + {30, 1, 0, 15, HYPER_THREADING_PROC, 15, -1}, {31, 1, 0, 15, MAIN_CORE_PROC, 15, -1}, + {32, 1, 0, 16, HYPER_THREADING_PROC, 16, -1}, {33, 1, 0, 16, MAIN_CORE_PROC, 16, -1}, + {34, 1, 0, 17, HYPER_THREADING_PROC, 17, -1}, {35, 1, 0, 17, MAIN_CORE_PROC, 17, -1}, + {36, 1, 0, 18, HYPER_THREADING_PROC, 18, -1}, {37, 1, 0, 18, MAIN_CORE_PROC, 18, -1}, + {38, 1, 0, 19, HYPER_THREADING_PROC, 19, -1}, {39, 1, 0, 19, MAIN_CORE_PROC, 19, -1}, + {40, 1, 0, 20, HYPER_THREADING_PROC, 20, -1}, {41, 1, 0, 20, MAIN_CORE_PROC, 20, -1}, + {42, 1, 0, 21, HYPER_THREADING_PROC, 21, -1}, {43, 1, 0, 21, MAIN_CORE_PROC, 21, -1}, + {44, 1, 0, 22, HYPER_THREADING_PROC, 22, -1}, {45, 1, 0, 22, MAIN_CORE_PROC, 22, -1}, + {46, 1, 0, 23, HYPER_THREADING_PROC, 23, -1}, {47, 1, 0, 23, MAIN_CORE_PROC, 23, -1}, + {48, 1, 0, 24, HYPER_THREADING_PROC, 24, -1}, {49, 1, 0, 24, MAIN_CORE_PROC, 24, -1}, + {50, 1, 0, 25, HYPER_THREADING_PROC, 25, -1}, {51, 1, 0, 25, MAIN_CORE_PROC, 25, -1}, + {52, 2, 1, 26, HYPER_THREADING_PROC, 26, -1}, {53, 2, 1, 26, MAIN_CORE_PROC, 26, -1}, + {54, 2, 1, 27, HYPER_THREADING_PROC, 27, -1}, {55, 2, 1, 27, MAIN_CORE_PROC, 27, -1}, + {56, 2, 1, 28, HYPER_THREADING_PROC, 28, -1}, {57, 2, 1, 28, MAIN_CORE_PROC, 28, -1}, + {58, 2, 1, 29, HYPER_THREADING_PROC, 29, -1}, {59, 2, 1, 29, MAIN_CORE_PROC, 29, -1}, + {60, 2, 1, 30, HYPER_THREADING_PROC, 30, -1}, {61, 2, 1, 30, MAIN_CORE_PROC, 30, -1}, + {62, 2, 1, 31, HYPER_THREADING_PROC, 31, -1}, {63, 2, 1, 31, MAIN_CORE_PROC, 31, -1}, + {64, 2, 1, 32, HYPER_THREADING_PROC, 32, -1}, {65, 2, 1, 32, MAIN_CORE_PROC, 32, -1}, + {66, 2, 1, 33, HYPER_THREADING_PROC, 33, -1}, {67, 2, 1, 33, MAIN_CORE_PROC, 33, -1}, + {68, 2, 1, 34, HYPER_THREADING_PROC, 34, -1}, {69, 2, 1, 34, MAIN_CORE_PROC, 34, -1}, + {70, 2, 1, 35, HYPER_THREADING_PROC, 35, -1}, {71, 2, 1, 35, MAIN_CORE_PROC, 35, -1}, + {72, 2, 1, 36, HYPER_THREADING_PROC, 36, -1}, {73, 2, 1, 36, MAIN_CORE_PROC, 36, -1}, + {74, 2, 1, 37, HYPER_THREADING_PROC, 37, -1}, {75, 2, 1, 37, MAIN_CORE_PROC, 37, -1}, + {76, 2, 1, 38, HYPER_THREADING_PROC, 38, -1}, {77, 2, 1, 38, MAIN_CORE_PROC, 38, -1}, + {78, 3, 1, 39, HYPER_THREADING_PROC, 39, -1}, {79, 3, 1, 39, MAIN_CORE_PROC, 39, -1}, + {80, 3, 1, 40, HYPER_THREADING_PROC, 40, -1}, {81, 3, 1, 40, MAIN_CORE_PROC, 40, -1}, + {82, 3, 1, 41, HYPER_THREADING_PROC, 41, -1}, {83, 3, 1, 41, MAIN_CORE_PROC, 41, -1}, + {84, 3, 1, 42, HYPER_THREADING_PROC, 42, -1}, {85, 3, 1, 42, MAIN_CORE_PROC, 42, -1}, + {86, 3, 1, 43, HYPER_THREADING_PROC, 43, -1}, {87, 3, 1, 43, MAIN_CORE_PROC, 43, -1}, + {88, 3, 1, 44, HYPER_THREADING_PROC, 44, -1}, {89, 3, 1, 44, MAIN_CORE_PROC, 44, -1}, + {90, 3, 1, 45, HYPER_THREADING_PROC, 45, -1}, {91, 3, 1, 45, MAIN_CORE_PROC, 45, -1}, + {92, 3, 1, 46, HYPER_THREADING_PROC, 46, -1}, {93, 3, 1, 46, MAIN_CORE_PROC, 46, -1}, + {94, 3, 1, 47, HYPER_THREADING_PROC, 47, -1}, {95, 3, 1, 47, MAIN_CORE_PROC, 47, -1}, + {96, 3, 1, 48, HYPER_THREADING_PROC, 48, -1}, {97, 3, 1, 48, MAIN_CORE_PROC, 48, -1}, + {98, 3, 1, 49, HYPER_THREADING_PROC, 49, -1}, {99, 3, 1, 49, MAIN_CORE_PROC, 49, -1}, + {100, 3, 1, 50, HYPER_THREADING_PROC, 50, -1}, {101, 3, 1, 50, MAIN_CORE_PROC, 50, -1}, + {102, 3, 1, 51, HYPER_THREADING_PROC, 51, -1}, {103, 3, 1, 51, MAIN_CORE_PROC, 51, -1}, + {104, 0, 0, 52, HYPER_THREADING_PROC, 52, -1}, {105, 0, 0, 52, MAIN_CORE_PROC, 52, -1}, + {106, 0, 0, 53, HYPER_THREADING_PROC, 53, -1}, {107, 0, 0, 53, MAIN_CORE_PROC, 53, -1}, + {108, 0, 0, 54, HYPER_THREADING_PROC, 54, -1}, {109, 0, 0, 54, MAIN_CORE_PROC, 54, -1}, + {110, 0, 0, 55, HYPER_THREADING_PROC, 55, -1}, {111, 0, 0, 55, MAIN_CORE_PROC, 55, -1}, + {112, 0, 0, 56, HYPER_THREADING_PROC, 56, -1}, {113, 0, 0, 56, MAIN_CORE_PROC, 56, -1}, + {114, 0, 0, 57, HYPER_THREADING_PROC, 57, -1}, {115, 0, 0, 57, MAIN_CORE_PROC, 57, -1}, + {116, 0, 0, 58, HYPER_THREADING_PROC, 58, -1}, {117, 0, 0, 58, MAIN_CORE_PROC, 58, -1}, + {118, 0, 0, 59, HYPER_THREADING_PROC, 59, -1}, {119, 0, 0, 59, MAIN_CORE_PROC, 59, -1}, + {120, 0, 0, 60, HYPER_THREADING_PROC, 60, -1}, {121, 0, 0, 60, MAIN_CORE_PROC, 60, -1}, + {122, 0, 0, 61, HYPER_THREADING_PROC, 61, -1}, {123, 0, 0, 61, MAIN_CORE_PROC, 61, -1}, + {124, 0, 0, 62, HYPER_THREADING_PROC, 62, -1}, {125, 0, 0, 62, MAIN_CORE_PROC, 62, -1}, + {126, 0, 0, 63, HYPER_THREADING_PROC, 63, -1}, {127, 0, 0, 63, MAIN_CORE_PROC, 63, -1}, + {128, 0, 0, 64, HYPER_THREADING_PROC, 64, -1}, {129, 0, 0, 64, MAIN_CORE_PROC, 64, -1}, + {130, 1, 0, 65, HYPER_THREADING_PROC, 65, -1}, {131, 1, 0, 65, MAIN_CORE_PROC, 65, -1}, + {132, 1, 0, 66, HYPER_THREADING_PROC, 66, -1}, {133, 1, 0, 66, MAIN_CORE_PROC, 66, -1}, + {134, 1, 0, 67, HYPER_THREADING_PROC, 67, -1}, {135, 1, 0, 67, MAIN_CORE_PROC, 67, -1}, + {136, 1, 0, 68, HYPER_THREADING_PROC, 68, -1}, {137, 1, 0, 68, MAIN_CORE_PROC, 68, -1}, + {138, 1, 0, 69, HYPER_THREADING_PROC, 69, -1}, {139, 1, 0, 69, MAIN_CORE_PROC, 69, -1}, + {140, 1, 0, 70, HYPER_THREADING_PROC, 70, -1}, {141, 1, 0, 70, MAIN_CORE_PROC, 70, -1}, + {142, 1, 0, 71, HYPER_THREADING_PROC, 71, -1}, {143, 1, 0, 71, MAIN_CORE_PROC, 71, -1}, + {144, 1, 0, 72, HYPER_THREADING_PROC, 72, -1}, {145, 1, 0, 72, MAIN_CORE_PROC, 72, -1}, + {146, 1, 0, 73, HYPER_THREADING_PROC, 73, -1}, {147, 1, 0, 73, MAIN_CORE_PROC, 73, -1}, + {148, 1, 0, 74, HYPER_THREADING_PROC, 74, -1}, {149, 1, 0, 74, MAIN_CORE_PROC, 74, -1}, + {150, 1, 0, 75, HYPER_THREADING_PROC, 75, -1}, {151, 1, 0, 75, MAIN_CORE_PROC, 75, -1}, + {152, 1, 0, 76, HYPER_THREADING_PROC, 76, -1}, {153, 1, 0, 76, MAIN_CORE_PROC, 76, -1}, + {154, 1, 0, 77, HYPER_THREADING_PROC, 77, -1}, {155, 1, 0, 77, MAIN_CORE_PROC, 77, -1}, + {156, 2, 1, 78, HYPER_THREADING_PROC, 78, -1}, {157, 2, 1, 78, MAIN_CORE_PROC, 78, -1}, + {158, 2, 1, 79, HYPER_THREADING_PROC, 79, -1}, {159, 2, 1, 79, MAIN_CORE_PROC, 79, -1}, + {160, 2, 1, 80, HYPER_THREADING_PROC, 80, -1}, {161, 2, 1, 80, MAIN_CORE_PROC, 80, -1}, + {162, 2, 1, 81, HYPER_THREADING_PROC, 81, -1}, {163, 2, 1, 81, MAIN_CORE_PROC, 81, -1}, + {164, 2, 1, 82, HYPER_THREADING_PROC, 82, -1}, {165, 2, 1, 82, MAIN_CORE_PROC, 82, -1}, + {166, 2, 1, 83, HYPER_THREADING_PROC, 83, -1}, {167, 2, 1, 83, MAIN_CORE_PROC, 83, -1}, + {168, 2, 1, 84, HYPER_THREADING_PROC, 84, -1}, {169, 2, 1, 84, MAIN_CORE_PROC, 84, -1}, + {170, 2, 1, 85, HYPER_THREADING_PROC, 85, -1}, {171, 2, 1, 85, MAIN_CORE_PROC, 85, -1}, + {172, 2, 1, 86, HYPER_THREADING_PROC, 86, -1}, {173, 2, 1, 86, MAIN_CORE_PROC, 86, -1}, + {174, 2, 1, 87, HYPER_THREADING_PROC, 87, -1}, {175, 2, 1, 87, MAIN_CORE_PROC, 87, -1}, + {176, 2, 1, 88, HYPER_THREADING_PROC, 88, -1}, {177, 2, 1, 88, MAIN_CORE_PROC, 88, -1}, + {178, 2, 1, 89, HYPER_THREADING_PROC, 89, -1}, {179, 2, 1, 89, MAIN_CORE_PROC, 89, -1}, + {180, 2, 1, 90, HYPER_THREADING_PROC, 90, -1}, {181, 2, 1, 90, MAIN_CORE_PROC, 90, -1}, + {182, 3, 1, 91, HYPER_THREADING_PROC, 91, -1}, {183, 3, 1, 91, MAIN_CORE_PROC, 91, -1}, + {184, 3, 1, 92, HYPER_THREADING_PROC, 92, -1}, {185, 3, 1, 92, MAIN_CORE_PROC, 92, -1}, + {186, 3, 1, 93, HYPER_THREADING_PROC, 93, -1}, {187, 3, 1, 93, MAIN_CORE_PROC, 93, -1}, + {188, 3, 1, 94, HYPER_THREADING_PROC, 94, -1}, {189, 3, 1, 94, MAIN_CORE_PROC, 94, -1}, + {190, 3, 1, 95, HYPER_THREADING_PROC, 95, -1}, {191, 3, 1, 95, MAIN_CORE_PROC, 95, -1}, + {192, 3, 1, 96, HYPER_THREADING_PROC, 96, -1}, {193, 3, 1, 96, MAIN_CORE_PROC, 96, -1}, + {194, 3, 1, 97, HYPER_THREADING_PROC, 97, -1}, {195, 3, 1, 97, MAIN_CORE_PROC, 97, -1}, + {196, 3, 1, 98, HYPER_THREADING_PROC, 98, -1}, {197, 3, 1, 98, MAIN_CORE_PROC, 98, -1}, + {198, 3, 1, 99, HYPER_THREADING_PROC, 99, -1}, {199, 3, 1, 99, MAIN_CORE_PROC, 99, -1}, + {200, 3, 1, 100, HYPER_THREADING_PROC, 100, -1}, {201, 3, 1, 100, MAIN_CORE_PROC, 100, -1}, + {202, 3, 1, 101, HYPER_THREADING_PROC, 101, -1}, {203, 3, 1, 101, MAIN_CORE_PROC, 101, -1}, + {204, 3, 1, 102, HYPER_THREADING_PROC, 102, -1}, {205, 3, 1, 102, MAIN_CORE_PROC, 102, -1}, + {206, 3, 1, 103, HYPER_THREADING_PROC, 103, -1}, {207, 3, 1, 103, MAIN_CORE_PROC, 103, -1}, + }, + {{1, ALL_PROC, 104, -1, 0}, + {0, MAIN_CORE_PROC, 26, 0, 0}, + {0, MAIN_CORE_PROC, 26, 1, 0}, + {0, HYPER_THREADING_PROC, 26, 0, 0}, + {0, HYPER_THREADING_PROC, 26, 1, 0}, + {1, ALL_PROC, 104, -1, 1}, + {0, MAIN_CORE_PROC, 26, 2, 1}, + {0, MAIN_CORE_PROC, 26, 3, 1}, + {0, HYPER_THREADING_PROC, 26, 2, 1}, + {0, HYPER_THREADING_PROC, 26, 3, 1}}, + {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, + 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155}, + {52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, + 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, + 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, + 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, + 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207}}, + NOT_USED, +}; LinuxCpuReserveTestCase _2sockets_20cores_hyper_20streams = { 40, 2, @@ -574,7 +718,11 @@ LinuxCpuReserveTestCase _2sockets_20cores_hyper_5streams = { {38, 0, 0, 38, MAIN_CORE_PROC, 18, -1}, {39, 1, 1, 39, MAIN_CORE_PROC, 19, -1}, }, - {{2, MAIN_CORE_PROC, 4, 0, 0}, {2, MAIN_CORE_PROC, 4, 1, 1}, {1, MAIN_CORE_PROC, 4, -1, -1}}, + {{2, MAIN_CORE_PROC, 4, 0, 0}, + {2, MAIN_CORE_PROC, 4, 1, 1}, + {1, ALL_PROC, 4, -1, -1}, + {0, MAIN_CORE_PROC, 2, 0, 0}, + {0, MAIN_CORE_PROC, 2, 1, 1}}, { {20, 22, 24, 26}, {28, 30, 32, 34}, @@ -747,7 +895,7 @@ LinuxCpuReserveTestCase _1socket_18cores_hyper_1streams = { {32, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, {33, 0, 0, 15, MAIN_CORE_PROC, 15, -1}, {34, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, {35, 0, 0, 17, MAIN_CORE_PROC, 17, -1}, }, - {{1, MAIN_CORE_PROC, 36, 0, 0}}, + {{1, ALL_PROC, 36, 0, 0}, {0, MAIN_CORE_PROC, 18, 0, 0}, {0, HYPER_THREADING_PROC, 18, 0, 0}}, { {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}, @@ -809,7 +957,7 @@ LinuxCpuReserveTestCase _1socket_32cores_hyper_1streams = { }, {{1, ALL_PROC, 24, 0, 0}, {0, MAIN_CORE_PROC, 8, 0, 0}, {0, EFFICIENT_CORE_PROC, 16, 0, 0}}, { - {}, + {1, 3, 5, 7, 9, 11, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, }, NOT_USED, }; @@ -826,6 +974,7 @@ INSTANTIATE_TEST_SUITE_P(CPUReserve, _2sockets_72cores_hyper_3streams, _2sockets_72cores_hyper_5streams, _2sockets_72cores_hyper_3streams_plugin_reserve, + _2sockets_104cores_2streams, _2sockets_20cores_hyper_20streams, _2sockets_20cores_hyper_4streams, _2sockets_20cores_hyper_5streams, diff --git a/src/inference/tests/unit/cpu_stream_info_test.cpp b/src/inference/tests/unit/cpu_stream_info_test.cpp index f9c297f87bd..51d07015077 100644 --- a/src/inference/tests/unit/cpu_stream_info_test.cpp +++ b/src/inference/tests/unit/cpu_stream_info_test.cpp @@ -30,6 +30,7 @@ struct LinuxCpuStreamTypeCase { std::vector _concurrency; std::vector _core_type; std::vector _numa_node_id; + std::vector _max_threads_per_core; }; class LinuxCpuStreamTypeTests : public ov::test::TestsCommon, @@ -43,6 +44,7 @@ public: std::vector test_concurrencys; std::vector test_core_types; std::vector test_numa_node_ids; + std::vector test_max_threads_per_cores; int streams = 0; for (size_t i = 0; i < test_data._streams_info_table.size(); i++) { @@ -61,6 +63,7 @@ public: int test_concurrency; int test_core_type; int test_numa_node_id; + int test_max_threads_per_core; get_cur_stream_info(i, test_data._cpu_reservation, test_data._proc_type_table, @@ -68,17 +71,20 @@ public: test_stream_type, test_concurrency, test_core_type, - test_numa_node_id); + test_numa_node_id, + test_max_threads_per_core); test_stream_types.push_back(test_stream_type); test_concurrencys.push_back(test_concurrency); test_core_types.push_back(test_core_type); test_numa_node_ids.push_back(test_numa_node_id); + test_max_threads_per_cores.push_back(test_max_threads_per_core); } ASSERT_EQ(test_data._stream_type, test_stream_types); ASSERT_EQ(test_data._concurrency, test_concurrencys); ASSERT_EQ(test_data._core_type, test_core_types); ASSERT_EQ(test_data._numa_node_id, test_numa_node_ids); + ASSERT_EQ(test_data._max_threads_per_core, test_max_threads_per_cores); } }; @@ -152,6 +158,8 @@ LinuxCpuStreamTypeCase _2sockets_72cores_nobinding_36streams = { }, // param[out]: numa_node_id per stream used in new task_arena {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + // param[out]: max_threads_per_core per stream used in new task_arena + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, }; LinuxCpuStreamTypeCase _2sockets_72cores_nobinding_9streams = { false, @@ -195,7 +203,11 @@ LinuxCpuStreamTypeCase _2sockets_72cores_nobinding_9streams = { {70, 1, 1, 70, MAIN_CORE_PROC, 70, -1}, {71, 1, 1, 71, MAIN_CORE_PROC, 71, -1}, }, {{72, 36, 0, 36, -1, -1}, {36, 18, 0, 18, 0, 0}, {36, 18, 0, 18, 1, 1}}, - {{4, MAIN_CORE_PROC, 4, 0, 0}, {4, MAIN_CORE_PROC, 4, 1, 1}, {1, MAIN_CORE_PROC, 4, -1, -1}}, + {{4, MAIN_CORE_PROC, 4, 0, 0}, + {4, MAIN_CORE_PROC, 4, 1, 1}, + {1, ALL_PROC, 4, -1, -1}, + {0, MAIN_CORE_PROC, 2, 0, 0}, + {0, MAIN_CORE_PROC, 2, 1, 1}}, { STREAM_WITH_NUMA_ID, STREAM_WITH_NUMA_ID, @@ -217,9 +229,10 @@ LinuxCpuStreamTypeCase _2sockets_72cores_nobinding_9streams = { MAIN_CORE_PROC, MAIN_CORE_PROC, MAIN_CORE_PROC, - MAIN_CORE_PROC, + ALL_PROC, }, {0, 0, 0, 0, 1, 1, 1, 1, NUMA_ALL}, + {1, 1, 1, 1, 1, 1, 1, 1, 1}, }; LinuxCpuStreamTypeCase _2sockets_72cores_binding_9streams = { true, @@ -263,7 +276,11 @@ LinuxCpuStreamTypeCase _2sockets_72cores_binding_9streams = { {70, 1, 1, 70, MAIN_CORE_PROC, 70, -1}, {71, 1, 1, 71, MAIN_CORE_PROC, 71, -1}, }, {{72, 36, 0, 36, -1, -1}, {36, 18, 0, 18, 0, 0}, {36, 18, 0, 18, 1, 1}}, - {{4, MAIN_CORE_PROC, 4, 0, 0}, {4, MAIN_CORE_PROC, 4, 1, 1}, {1, MAIN_CORE_PROC, 4, -1, -1}}, + {{4, MAIN_CORE_PROC, 4, 0, 0}, + {4, MAIN_CORE_PROC, 4, 1, 1}, + {1, ALL_PROC, 4, -1, -1}, + {0, MAIN_CORE_PROC, 2, 0, 0}, + {0, MAIN_CORE_PROC, 2, 1, 1}}, # if defined(__linux__) { STREAM_WITH_OBSERVE, @@ -299,9 +316,10 @@ LinuxCpuStreamTypeCase _2sockets_72cores_binding_9streams = { MAIN_CORE_PROC, MAIN_CORE_PROC, MAIN_CORE_PROC, - MAIN_CORE_PROC, + ALL_PROC, }, {0, 0, 0, 0, 1, 1, 1, 1, NUMA_ALL}, + {1, 1, 1, 1, 1, 1, 1, 1, 1}, }; LinuxCpuStreamTypeCase _1sockets_4cores_nobinding = { false, @@ -317,11 +335,12 @@ LinuxCpuStreamTypeCase _1sockets_4cores_nobinding = { {7, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, }, {{8, 4, 0, 4, 0, 0}}, - {{1, MAIN_CORE_PROC, 8, 0, 0}}, + {{1, ALL_PROC, 8, 0, 0}, {0, MAIN_CORE_PROC, 4, 0, 0}, {0, HYPER_THREADING_PROC, 4, 0, 0}}, {STREAM_WITHOUT_PARAM}, {8}, - {MAIN_CORE_PROC}, + {ALL_PROC}, {0}, + {2}, }; LinuxCpuStreamTypeCase _1sockets_4cores_binding = { true, @@ -361,7 +380,9 @@ LinuxCpuStreamTypeCase _1sockets_4cores_binding = { MAIN_CORE_PROC, }, {0, 0, 0, 0}, + {1, 1, 1, 1}, }; + LinuxCpuStreamTypeCase _1sockets_12cores_pcore_nobinding = { false, 1, @@ -383,6 +404,7 @@ LinuxCpuStreamTypeCase _1sockets_12cores_pcore_nobinding = { {8}, {MAIN_CORE_PROC}, {0}, + {1}, }; LinuxCpuStreamTypeCase _1sockets_12cores_pcore_binding = { true, @@ -418,6 +440,7 @@ LinuxCpuStreamTypeCase _1sockets_12cores_pcore_binding = { MAIN_CORE_PROC, }, {0, 0}, + {1, 1}, }; LinuxCpuStreamTypeCase _1sockets_12cores_ecore_nobinding = { false, @@ -446,6 +469,7 @@ LinuxCpuStreamTypeCase _1sockets_12cores_ecore_nobinding = { EFFICIENT_CORE_PROC, }, {0, 0}, + {1, 1}, }; LinuxCpuStreamTypeCase _1sockets_12cores_ecore_binding = { true, @@ -487,6 +511,7 @@ LinuxCpuStreamTypeCase _1sockets_12cores_ecore_binding = { EFFICIENT_CORE_PROC, }, {0, 0, 0, 0}, + {1, 1, 1, 1}, }; LinuxCpuStreamTypeCase _1sockets_24cores_all_proc = { false, @@ -510,11 +535,12 @@ LinuxCpuStreamTypeCase _1sockets_24cores_all_proc = { {30, 0, 0, 22, EFFICIENT_CORE_PROC, 11, -1}, {31, 0, 0, 23, EFFICIENT_CORE_PROC, 11, -1}, }, {{32, 8, 16, 8, 0, 0}}, - {{1, ALL_PROC, 24, 0, 0}}, + {{1, ALL_PROC, 24, 0, 0}, {0, MAIN_CORE_PROC, 8, 0, 0}, {0, EFFICIENT_CORE_PROC, 16, 0, 0}}, {STREAM_WITHOUT_PARAM}, {24}, {ALL_PROC}, {0}, + {1}, }; TEST_P(LinuxCpuStreamTypeTests, LinuxCpuStreamType) {}