enable new method to generate CPU information and CPU map on Windows (#14710)
* enable new method to generate CPU information and CPU map
* fix code style issue
* fix initialization issue of variable-sized object
* fix dependency issue
* add sample of CPU map
* add description and sample for CPU map
* fix code style issue
* fix code style issue
* add comments on using second processor as physical core
* enable new method to generate CPU information and CPU map on windows
* remove debug output
* add description for CPU map table
* remove changes for linux
* update description for better understanding
* update CPU mapping table on Windows
* fix precision issue of log2()
* fix memory leak
* use shared_ptr to manage memory life cycle
* Wrap parser for Windows into a separate function for mock testing later
* Revert "Wrap parser for Windows into a separate function for mock testing later"
This reverts commit 614ad718c2.
* add core type table for each socket on windows
* separate CPU map parser on Windows for validation
* fix core type table definition
* fix DWORD issue in header file
* update parser interface for validation
* fix socket count
* update processor count for XEON
* add discrption and example for processor type table
* remove conflicts
* fix merge conflicts
* fix document issue
This commit is contained in:
@@ -212,4 +212,26 @@ void parse_processor_info_linux(const int _processors,
|
||||
std::vector<std::vector<int>>& _cpu_mapping_table);
|
||||
#endif
|
||||
|
||||
#if (defined(_WIN32) || defined(_WIN64))
|
||||
/**
|
||||
* @brief Parse processors infomation on Windows
|
||||
* @ingroup ie_dev_api_system_conf
|
||||
* @param[in] base_ptr buffer object pointer of Windows system infomation
|
||||
* @param[in] len buffer object length of Windows system infomation
|
||||
* @param[in] _processors total number for processors in system.
|
||||
* @param[out] _sockets total number for sockets in system
|
||||
* @param[out] _cores total number for physical CPU cores in system
|
||||
* @param[out] _proc_type_table summary table of number of processors per type
|
||||
* @param[out] _cpu_mapping_table CPU mapping table for each processor
|
||||
* @return
|
||||
*/
|
||||
void parse_processor_info_win(const char* base_ptr,
|
||||
const unsigned long len,
|
||||
const int _processors,
|
||||
int& _sockets,
|
||||
int& _cores,
|
||||
std::vector<std::vector<int>>& _proc_type_table,
|
||||
std::vector<std::vector<int>>& _cpu_mapping_table);
|
||||
#endif
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
||||
@@ -7,21 +7,168 @@
|
||||
#endif
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "ie_system_conf.h"
|
||||
#include "threading/ie_parallel_custom_arena.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
struct CPU {
|
||||
int _processors = 0;
|
||||
int _sockets = 0;
|
||||
int _cores = 0;
|
||||
|
||||
std::vector<int> _proc_type_table;
|
||||
std::vector<std::vector<int>> _proc_type_table;
|
||||
std::vector<std::vector<int>> _cpu_mapping_table;
|
||||
|
||||
CPU() {
|
||||
DWORD len = 0;
|
||||
if (GetLogicalProcessorInformationEx(RelationAll, nullptr, &len) ||
|
||||
GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::shared_ptr<char> base_shared_ptr(new char[len]);
|
||||
char* base_ptr = base_shared_ptr.get();
|
||||
if (!GetLogicalProcessorInformationEx(RelationAll, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)base_ptr, &len)) {
|
||||
return;
|
||||
}
|
||||
|
||||
_processors = GetMaximumProcessorCount(ALL_PROCESSOR_GROUPS);
|
||||
|
||||
parse_processor_info_win(base_ptr, len, _processors, _sockets, _cores, _proc_type_table, _cpu_mapping_table);
|
||||
}
|
||||
};
|
||||
static CPU cpu;
|
||||
|
||||
void parse_processor_info_win(const char* base_ptr,
|
||||
const unsigned long len,
|
||||
const int _processors,
|
||||
int& _sockets,
|
||||
int& _cores,
|
||||
std::vector<std::vector<int>>& _proc_type_table,
|
||||
std::vector<std::vector<int>>& _cpu_mapping_table) {
|
||||
_cpu_mapping_table.resize(_processors, std::vector<int>(CPU_MAP_TABLE_SIZE, -1));
|
||||
|
||||
std::vector<int> list;
|
||||
|
||||
char* info_ptr = (char*)base_ptr;
|
||||
int list_len = 0;
|
||||
int base_proc = 0;
|
||||
int proc_count = 0;
|
||||
int mask_len = 0;
|
||||
int group = 0;
|
||||
_sockets = -1;
|
||||
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = NULL;
|
||||
|
||||
auto MaskToList = [&](const KAFFINITY mask_input) {
|
||||
KAFFINITY mask = mask_input;
|
||||
int cnt = 0;
|
||||
|
||||
list.clear();
|
||||
list_len = 0;
|
||||
while (mask != 0) {
|
||||
if (0x1 == (mask & 0x1)) {
|
||||
list.push_back(cnt);
|
||||
list_len++;
|
||||
}
|
||||
cnt++;
|
||||
mask >>= 1;
|
||||
}
|
||||
return;
|
||||
};
|
||||
|
||||
std::vector<int> line_value_0(PROC_TYPE_TABLE_SIZE, 0);
|
||||
|
||||
for (; info_ptr < base_ptr + len; info_ptr += (DWORD)info->Size) {
|
||||
info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)info_ptr;
|
||||
|
||||
if (info->Relationship == RelationProcessorPackage) {
|
||||
_sockets++;
|
||||
MaskToList(info->Processor.GroupMask->Mask);
|
||||
mask_len = list_len;
|
||||
if (0 == _sockets) {
|
||||
_proc_type_table.push_back(line_value_0);
|
||||
} else {
|
||||
_proc_type_table.push_back(_proc_type_table[0]);
|
||||
_proc_type_table[0] = line_value_0;
|
||||
}
|
||||
} else if (info->Relationship == RelationProcessorCore) {
|
||||
MaskToList(info->Processor.GroupMask->Mask);
|
||||
|
||||
if (proc_count >= _processors) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (0 == list[0]) {
|
||||
base_proc = proc_count;
|
||||
}
|
||||
|
||||
if (2 == list_len) {
|
||||
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_PROCESSOR_ID] = list[0] + base_proc;
|
||||
_cpu_mapping_table[list[1] + base_proc][CPU_MAP_PROCESSOR_ID] = list[1] + base_proc;
|
||||
|
||||
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_SOCKET_ID] = _sockets;
|
||||
_cpu_mapping_table[list[1] + base_proc][CPU_MAP_SOCKET_ID] = _sockets;
|
||||
|
||||
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_CORE_ID] = _cores;
|
||||
_cpu_mapping_table[list[1] + base_proc][CPU_MAP_CORE_ID] = _cores;
|
||||
|
||||
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_CORE_TYPE] = HYPER_THREADING_PROC;
|
||||
_cpu_mapping_table[list[1] + base_proc][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC;
|
||||
|
||||
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_GROUP_ID] = group;
|
||||
_cpu_mapping_table[list[1] + base_proc][CPU_MAP_GROUP_ID] = group;
|
||||
|
||||
_proc_type_table[0][MAIN_CORE_PROC]++;
|
||||
_proc_type_table[0][HYPER_THREADING_PROC]++;
|
||||
group++;
|
||||
|
||||
} else {
|
||||
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_PROCESSOR_ID] = list[0] + base_proc;
|
||||
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_SOCKET_ID] = _sockets;
|
||||
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_CORE_ID] = _cores;
|
||||
}
|
||||
_proc_type_table[0][ALL_PROC] += list_len;
|
||||
proc_count += list_len;
|
||||
_cores++;
|
||||
|
||||
} else if ((info->Relationship == RelationCache) && (info->Cache.Level == 2)) {
|
||||
MaskToList(info->Cache.GroupMask.Mask);
|
||||
|
||||
if (4 == list_len) {
|
||||
for (int m = 0; m < list_len; m++) {
|
||||
_cpu_mapping_table[list[m] + base_proc][CPU_MAP_CORE_TYPE] = EFFICIENT_CORE_PROC;
|
||||
_cpu_mapping_table[list[m] + base_proc][CPU_MAP_GROUP_ID] = group;
|
||||
_proc_type_table[0][EFFICIENT_CORE_PROC]++;
|
||||
}
|
||||
group++;
|
||||
|
||||
} else if (1 == list_len) {
|
||||
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC;
|
||||
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_GROUP_ID] = group;
|
||||
_proc_type_table[0][MAIN_CORE_PROC]++;
|
||||
group++;
|
||||
}
|
||||
}
|
||||
}
|
||||
_sockets++;
|
||||
if (_sockets > 1) {
|
||||
_proc_type_table.push_back(_proc_type_table[0]);
|
||||
_proc_type_table[0] = line_value_0;
|
||||
|
||||
for (int m = 1; m <= _sockets; m++) {
|
||||
for (int n = 0; n <= EFFICIENT_CORE_PROC; n++) {
|
||||
_proc_type_table[0][n] += _proc_type_table[m][n];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int getNumberOfCPUCores(bool bigCoresOnly) {
|
||||
const int fallback_val = parallel_get_max_threads();
|
||||
DWORD sz = 0;
|
||||
|
||||
Reference in New Issue
Block a user