enable new method to generate CPU information and CPU map on Windows (#14710)

* enable new method to generate CPU information and CPU map

* fix code style issue

* fix initialization issue of variable-sized object

* fix dependency issue

* add sample of CPU map

* add description and sample for CPU map

* fix code style issue

* fix code style issue

* add comments on using second processor as physical core

* enable new method to generate CPU information and CPU map on windows

* remove debug output

* add description for CPU map table

* remove changes for linux

* update description for better understanding

* update CPU mapping table on Windows

* fix precision issue of log2()

* fix memory leak

* use shared_ptr to manage  memory life cycle

* Wrap parser for Windows into a separate function for mock testing later

* Revert "Wrap parser for Windows into a separate function for mock testing later"

This reverts commit 614ad718c2.

* add core type table for each socket on windows

* separate CPU map parser on Windows for validation

* fix core type table definition

* fix DWORD issue in header file

* update parser interface for validation

* fix socket count

* update processor count for XEON

* add discrption and example for processor type table

* remove conflicts

* fix merge conflicts

* fix document issue
This commit is contained in:
Shen, Wanglei
2023-02-07 12:28:49 +00:00
committed by GitHub
parent 44eedc8870
commit 79cad1032b
2 changed files with 170 additions and 1 deletions

View File

@@ -212,4 +212,26 @@ void parse_processor_info_linux(const int _processors,
std::vector<std::vector<int>>& _cpu_mapping_table);
#endif
#if (defined(_WIN32) || defined(_WIN64))
/**
* @brief Parse processors infomation on Windows
* @ingroup ie_dev_api_system_conf
* @param[in] base_ptr buffer object pointer of Windows system infomation
* @param[in] len buffer object length of Windows system infomation
* @param[in] _processors total number for processors in system.
* @param[out] _sockets total number for sockets in system
* @param[out] _cores total number for physical CPU cores in system
* @param[out] _proc_type_table summary table of number of processors per type
* @param[out] _cpu_mapping_table CPU mapping table for each processor
* @return
*/
void parse_processor_info_win(const char* base_ptr,
const unsigned long len,
const int _processors,
int& _sockets,
int& _cores,
std::vector<std::vector<int>>& _proc_type_table,
std::vector<std::vector<int>>& _cpu_mapping_table);
#endif
} // namespace InferenceEngine

View File

@@ -7,21 +7,168 @@
#endif
#include <windows.h>
#include <memory>
#include <vector>
#include "ie_system_conf.h"
#include "threading/ie_parallel_custom_arena.hpp"
namespace InferenceEngine {
struct CPU {
int _processors = 0;
int _sockets = 0;
int _cores = 0;
std::vector<int> _proc_type_table;
std::vector<std::vector<int>> _proc_type_table;
std::vector<std::vector<int>> _cpu_mapping_table;
CPU() {
DWORD len = 0;
if (GetLogicalProcessorInformationEx(RelationAll, nullptr, &len) ||
GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
return;
}
std::shared_ptr<char> base_shared_ptr(new char[len]);
char* base_ptr = base_shared_ptr.get();
if (!GetLogicalProcessorInformationEx(RelationAll, (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)base_ptr, &len)) {
return;
}
_processors = GetMaximumProcessorCount(ALL_PROCESSOR_GROUPS);
parse_processor_info_win(base_ptr, len, _processors, _sockets, _cores, _proc_type_table, _cpu_mapping_table);
}
};
static CPU cpu;
void parse_processor_info_win(const char* base_ptr,
const unsigned long len,
const int _processors,
int& _sockets,
int& _cores,
std::vector<std::vector<int>>& _proc_type_table,
std::vector<std::vector<int>>& _cpu_mapping_table) {
_cpu_mapping_table.resize(_processors, std::vector<int>(CPU_MAP_TABLE_SIZE, -1));
std::vector<int> list;
char* info_ptr = (char*)base_ptr;
int list_len = 0;
int base_proc = 0;
int proc_count = 0;
int mask_len = 0;
int group = 0;
_sockets = -1;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = NULL;
auto MaskToList = [&](const KAFFINITY mask_input) {
KAFFINITY mask = mask_input;
int cnt = 0;
list.clear();
list_len = 0;
while (mask != 0) {
if (0x1 == (mask & 0x1)) {
list.push_back(cnt);
list_len++;
}
cnt++;
mask >>= 1;
}
return;
};
std::vector<int> line_value_0(PROC_TYPE_TABLE_SIZE, 0);
for (; info_ptr < base_ptr + len; info_ptr += (DWORD)info->Size) {
info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)info_ptr;
if (info->Relationship == RelationProcessorPackage) {
_sockets++;
MaskToList(info->Processor.GroupMask->Mask);
mask_len = list_len;
if (0 == _sockets) {
_proc_type_table.push_back(line_value_0);
} else {
_proc_type_table.push_back(_proc_type_table[0]);
_proc_type_table[0] = line_value_0;
}
} else if (info->Relationship == RelationProcessorCore) {
MaskToList(info->Processor.GroupMask->Mask);
if (proc_count >= _processors) {
break;
}
if (0 == list[0]) {
base_proc = proc_count;
}
if (2 == list_len) {
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_PROCESSOR_ID] = list[0] + base_proc;
_cpu_mapping_table[list[1] + base_proc][CPU_MAP_PROCESSOR_ID] = list[1] + base_proc;
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_SOCKET_ID] = _sockets;
_cpu_mapping_table[list[1] + base_proc][CPU_MAP_SOCKET_ID] = _sockets;
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_CORE_ID] = _cores;
_cpu_mapping_table[list[1] + base_proc][CPU_MAP_CORE_ID] = _cores;
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_CORE_TYPE] = HYPER_THREADING_PROC;
_cpu_mapping_table[list[1] + base_proc][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC;
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_GROUP_ID] = group;
_cpu_mapping_table[list[1] + base_proc][CPU_MAP_GROUP_ID] = group;
_proc_type_table[0][MAIN_CORE_PROC]++;
_proc_type_table[0][HYPER_THREADING_PROC]++;
group++;
} else {
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_PROCESSOR_ID] = list[0] + base_proc;
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_SOCKET_ID] = _sockets;
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_CORE_ID] = _cores;
}
_proc_type_table[0][ALL_PROC] += list_len;
proc_count += list_len;
_cores++;
} else if ((info->Relationship == RelationCache) && (info->Cache.Level == 2)) {
MaskToList(info->Cache.GroupMask.Mask);
if (4 == list_len) {
for (int m = 0; m < list_len; m++) {
_cpu_mapping_table[list[m] + base_proc][CPU_MAP_CORE_TYPE] = EFFICIENT_CORE_PROC;
_cpu_mapping_table[list[m] + base_proc][CPU_MAP_GROUP_ID] = group;
_proc_type_table[0][EFFICIENT_CORE_PROC]++;
}
group++;
} else if (1 == list_len) {
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC;
_cpu_mapping_table[list[0] + base_proc][CPU_MAP_GROUP_ID] = group;
_proc_type_table[0][MAIN_CORE_PROC]++;
group++;
}
}
}
_sockets++;
if (_sockets > 1) {
_proc_type_table.push_back(_proc_type_table[0]);
_proc_type_table[0] = line_value_0;
for (int m = 1; m <= _sockets; m++) {
for (int n = 0; n <= EFFICIENT_CORE_PROC; n++) {
_proc_type_table[0][n] += _proc_type_table[m][n];
}
}
}
}
int getNumberOfCPUCores(bool bigCoresOnly) {
const int fallback_val = parallel_get_max_threads();
DWORD sz = 0;