fix coredump with numactl (#21393)
* fix coredump with numactl * record original socketid and numaid * update comment * update comments --------- Co-authored-by: Chen Peter <peter.chen@intel.com>
This commit is contained in:
@@ -260,6 +260,24 @@ OPENVINO_RUNTIME_API void set_cpu_used(const std::vector<int>& cpu_ids, const in
|
||||
*/
|
||||
OPENVINO_RUNTIME_API int get_socket_by_numa_node(int numa_node_id);
|
||||
|
||||
/**
|
||||
* @brief Get original socket id by current socket id, the input socket id is recalculated after filtering (like
|
||||
* numactl), while the original socket id is the original id before filtering
|
||||
* @ingroup ie_dev_api_system_conf
|
||||
* @param[in] socket_id socket id
|
||||
* @return socket id
|
||||
*/
|
||||
OPENVINO_RUNTIME_API int get_org_socket_id(int socket_id);
|
||||
|
||||
/**
|
||||
* @brief Get original numa node id by current numa node id, the input numa node id is recalculated after filtering
|
||||
* (like numactl), while the original numa node id is the original id before filtering
|
||||
* @ingroup ie_dev_api_system_conf
|
||||
* @param[in] numa_node_id numa node id
|
||||
* @return numa node id
|
||||
*/
|
||||
OPENVINO_RUNTIME_API int get_org_numa_id(int numa_node_id);
|
||||
|
||||
/**
|
||||
* @enum ColumnOfCPUMappingTable
|
||||
* @brief This enum contains definition of each columns in CPU mapping table which use processor id as index.
|
||||
|
||||
@@ -144,7 +144,7 @@ struct CPUStreamsExecutor::Impl {
|
||||
.set_max_threads_per_core(max_threads_per_core)});
|
||||
} else if (stream_type == STREAM_WITH_NUMA_ID) {
|
||||
_taskArena.reset(new custom::task_arena{custom::task_arena::constraints{}
|
||||
.set_numa_id(_numaNodeId)
|
||||
.set_numa_id(get_org_numa_id(_numaNodeId))
|
||||
.set_max_concurrency(concurrency)
|
||||
.set_max_threads_per_core(max_threads_per_core)});
|
||||
} else if (stream_type == STREAM_WITH_CORE_TYPE) {
|
||||
|
||||
@@ -563,6 +563,10 @@ void IStreamsExecutor::Config::update_executor_config(int stream_nums,
|
||||
return;
|
||||
}
|
||||
|
||||
if (proc_type_table.size() > 1) {
|
||||
core_type = ov::threading::IStreamsExecutor::Config::ANY;
|
||||
}
|
||||
|
||||
// IStreamsExecutor::Config config = initial;
|
||||
const auto total_num_cores = proc_type_table[0][ALL_PROC];
|
||||
const auto total_num_big_cores = proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][HYPER_THREADING_PROC];
|
||||
|
||||
@@ -27,6 +27,8 @@ public:
|
||||
std::vector<std::vector<int>> _org_proc_type_table;
|
||||
std::vector<std::vector<int>> _proc_type_table;
|
||||
std::vector<std::vector<int>> _cpu_mapping_table;
|
||||
std::map<int, int> _socketid_mapping_table;
|
||||
std::map<int, int> _numaid_mapping_table;
|
||||
std::mutex _cpu_mutex;
|
||||
int _socket_idx = 0;
|
||||
};
|
||||
|
||||
@@ -169,10 +169,16 @@ CPU::CPU() {
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < valid_cpu_mapping_table.size(); i++) {
|
||||
valid_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID] =
|
||||
numa_node_map.at(valid_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID]);
|
||||
valid_cpu_mapping_table[i][CPU_MAP_SOCKET_ID] =
|
||||
sockets_map.at(valid_cpu_mapping_table[i][CPU_MAP_SOCKET_ID]);
|
||||
auto new_numa_id = numa_node_map.at(valid_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID]);
|
||||
auto new_socket_id = sockets_map.at(valid_cpu_mapping_table[i][CPU_MAP_SOCKET_ID]);
|
||||
if (_numaid_mapping_table.find(new_numa_id) == _numaid_mapping_table.end()) {
|
||||
_numaid_mapping_table.insert({new_numa_id, valid_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID]});
|
||||
}
|
||||
if (_socketid_mapping_table.find(new_socket_id) == _socketid_mapping_table.end()) {
|
||||
_socketid_mapping_table.insert({new_socket_id, valid_cpu_mapping_table[i][CPU_MAP_SOCKET_ID]});
|
||||
}
|
||||
valid_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID] = new_numa_id;
|
||||
valid_cpu_mapping_table[i][CPU_MAP_SOCKET_ID] = new_socket_id;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -265,12 +271,13 @@ CPU::CPU() {
|
||||
_cores);
|
||||
}
|
||||
}
|
||||
_org_proc_type_table = _proc_type_table;
|
||||
std::vector<std::vector<std::string>>().swap(system_info_table);
|
||||
|
||||
if (check_valid_cpu() < 0) {
|
||||
OPENVINO_THROW("CPU affinity check failed. No CPU is eligible to run inference.");
|
||||
};
|
||||
|
||||
_org_proc_type_table = _proc_type_table;
|
||||
}
|
||||
|
||||
void parse_node_info_linux(const std::vector<std::string> node_info_table,
|
||||
|
||||
@@ -215,6 +215,14 @@ int get_socket_by_numa_node(int numa_node_id) {
|
||||
return -1;
|
||||
};
|
||||
|
||||
int get_org_socket_id(int socket_id) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int get_org_numa_id(int numa_node_id) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
// for Linux and Windows the getNumberOfCPUCores (that accounts only for physical cores) implementation is OS-specific
|
||||
// (see cpp files in corresponding folders), for __APPLE__ it is default :
|
||||
@@ -267,6 +275,24 @@ int get_socket_by_numa_node(int numa_node_id) {
|
||||
return -1;
|
||||
};
|
||||
|
||||
int get_org_socket_id(int socket_id) {
|
||||
CPU& cpu = cpu_info();
|
||||
auto iter = cpu._socketid_mapping_table.find(socket_id);
|
||||
if (iter != cpu._socketid_mapping_table.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int get_org_numa_id(int numa_node_id) {
|
||||
CPU& cpu = cpu_info();
|
||||
auto iter = cpu._numaid_mapping_table.find(numa_node_id);
|
||||
if (iter != cpu._numaid_mapping_table.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
# ifndef _WIN32
|
||||
@@ -417,6 +443,25 @@ int get_number_of_logical_cpu_cores(bool bigCoresOnly) {
|
||||
# endif
|
||||
return logical_cores;
|
||||
}
|
||||
|
||||
int get_org_socket_id(int socket_id) {
|
||||
CPU& cpu = cpu_info();
|
||||
auto iter = cpu._socketid_mapping_table.find(socket_id);
|
||||
if (iter != cpu._socketid_mapping_table.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int get_org_numa_id(int numa_node_id) {
|
||||
CPU& cpu = cpu_info();
|
||||
auto iter = cpu._numaid_mapping_table.find(numa_node_id);
|
||||
if (iter != cpu._numaid_mapping_table.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if ((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO))
|
||||
|
||||
Reference in New Issue
Block a user