choose Pcore to compile model for GPU plugin (#20472)

* choose Pcore to compile model for GPU plugin

* provide function to update executor config

* set callback executor to nullptr for GPU plugin

* fix code style

* fix warning

* optimize duplicate code

* set callback executor to nullptr for another gpu compile_model

* add description for new function

* add smoke test

* fix code style

* modify function definition

---------

Co-authored-by: Wanglei Shen <wanglei.shen@intel.com>
This commit is contained in:
Fang Xu 2023-10-30 16:24:36 +08:00 committed by GitHub
parent cec6535eaa
commit 82f191b0e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 289 additions and 23 deletions

View File

@ -150,6 +150,19 @@ public:
_threadPreferredCoreType(threadPreferredCoreType),
_streams_info_table{streamsInfoTable},
_cpu_reservation{cpuReservation} {}
/**
* @brief Modify _streams_info_table and related configuration according to user-specified parameters, bind
* threads to cpu cores if cpu_pinning is true.
* @param stream_nums Number of streams specified by user
* @param threads_per_stream Number of threads per stream specified by user
* @param core_type Cpu type (Big/Little/Any) specified by user
* @param cpu_pinning Whether to bind the threads to cpu cores
*/
void update_executor_config(int stream_nums,
int threads_per_stream,
PreferredCoreType core_type,
bool cpu_pinning);
};
/**

View File

@ -553,5 +553,107 @@ IStreamsExecutor::Config IStreamsExecutor::Config::reserve_cpu_threads(const ISt
return config;
}
void IStreamsExecutor::Config::update_executor_config(int stream_nums,
int threads_per_stream,
IStreamsExecutor::Config::PreferredCoreType core_type,
bool cpu_pinning) {
const auto proc_type_table = ov::get_proc_type_table();
if (proc_type_table.empty()) {
return;
}
// IStreamsExecutor::Config config = initial;
const auto total_num_cores = proc_type_table[0][ALL_PROC];
const auto total_num_big_cores = proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][HYPER_THREADING_PROC];
const auto total_num_little_cores = proc_type_table[0][EFFICIENT_CORE_PROC];
int num_cores = total_num_cores;
if (core_type == ov::threading::IStreamsExecutor::Config::BIG) {
num_cores = total_num_big_cores;
} else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) {
num_cores = total_num_little_cores;
}
int streams = std::min(stream_nums, num_cores);
if (streams == 0) {
return;
}
_streams = streams;
_threadPreferredCoreType = core_type;
_threadsPerStream = threads_per_stream;
// create stream_info_table based on core type
std::vector<int> stream_info(ov::CPU_STREAMS_TABLE_SIZE, 0);
stream_info[ov::THREADS_PER_STREAM] = _threadsPerStream;
stream_info[ov::STREAM_NUMA_NODE_ID] = 0;
stream_info[ov::STREAM_SOCKET_ID] = 0;
if (core_type == ov::threading::IStreamsExecutor::Config::BIG) {
if (proc_type_table[0][ov::MAIN_CORE_PROC] < _streams) {
stream_info[ov::NUMBER_OF_STREAMS] = proc_type_table[0][ov::MAIN_CORE_PROC];
stream_info[ov::PROC_TYPE] = ov::MAIN_CORE_PROC;
_streams_info_table.push_back(stream_info);
stream_info[ov::NUMBER_OF_STREAMS] = proc_type_table[0][ov::HYPER_THREADING_PROC];
stream_info[ov::PROC_TYPE] = ov::HYPER_THREADING_PROC;
_streams_info_table.push_back(stream_info);
} else {
stream_info[ov::PROC_TYPE] = ov::MAIN_CORE_PROC;
stream_info[ov::NUMBER_OF_STREAMS] = _streams;
_streams_info_table.push_back(stream_info);
}
} else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) {
stream_info[ov::PROC_TYPE] = ov::EFFICIENT_CORE_PROC;
stream_info[ov::NUMBER_OF_STREAMS] = _streams;
_streams_info_table.push_back(stream_info);
} else {
int total_streams = 0;
if (proc_type_table.size() == 1) {
for (int i = ov::MAIN_CORE_PROC; i <= ov::HYPER_THREADING_PROC; i++) {
if (proc_type_table[0][i] > 0) {
stream_info[ov::NUMBER_OF_STREAMS] =
(total_streams + proc_type_table[0][i] > _streams ? _streams - total_streams
: proc_type_table[0][i]);
stream_info[ov::PROC_TYPE] = i;
stream_info[ov::STREAM_NUMA_NODE_ID] = proc_type_table[0][PROC_NUMA_NODE_ID];
stream_info[ov::STREAM_SOCKET_ID] = proc_type_table[0][PROC_SOCKET_ID];
_streams_info_table.push_back(stream_info);
total_streams += stream_info[ov::NUMBER_OF_STREAMS];
}
if (total_streams >= _streams)
break;
}
} else {
for (size_t i = 1; i < proc_type_table.size(); i++) {
for (int j = ov::MAIN_CORE_PROC; j < ov::HYPER_THREADING_PROC; j++) {
if (proc_type_table[i][j] > 0) {
stream_info[ov::NUMBER_OF_STREAMS] =
(total_streams + proc_type_table[i][j] > _streams ? _streams - total_streams
: proc_type_table[i][j]);
stream_info[ov::PROC_TYPE] = j;
stream_info[ov::STREAM_NUMA_NODE_ID] = proc_type_table[i][PROC_NUMA_NODE_ID];
stream_info[ov::STREAM_SOCKET_ID] = proc_type_table[i][PROC_SOCKET_ID];
_streams_info_table.push_back(stream_info);
total_streams += stream_info[ov::NUMBER_OF_STREAMS];
}
if (total_streams >= _streams)
break;
}
if (total_streams >= _streams)
break;
}
}
}
if (cpu_pinning) {
_cpu_reservation = cpu_pinning;
auto new_config = reserve_cpu_threads(*this);
_stream_processor_ids = new_config._stream_processor_ids;
_streams = new_config._streams;
_threads = new_config._threads;
}
}
} // namespace threading
} // namespace ov

View File

@ -0,0 +1,165 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <common_test_utils/test_common.hpp>
// #include "ie_system_conf.h"
#include "openvino/runtime/threading/istreams_executor.hpp"
#include "os/cpu_map_info.hpp"
using namespace testing;
using namespace ov;
using namespace threading;
namespace {
#if defined(__linux__) || defined(_WIN32)
struct UpdateExecutorConfigTestCase {
ov::threading::IStreamsExecutor::Config _config;
std::vector<std::vector<int>> _proc_type_table;
std::vector<std::vector<int>> _cpu_mapping_table;
int _num_streams;
int _threads_per_stream;
ov::threading::IStreamsExecutor::Config::PreferredCoreType _core_type;
bool _cpu_pinning;
std::vector<std::vector<int>> _streams_info_table;
std::vector<std::vector<int>> _stream_processors;
};
class UpdateExecutorConfigTest : public ov::test::TestsCommon,
public testing::WithParamInterface<std::tuple<UpdateExecutorConfigTestCase>> {
public:
void SetUp() override {
auto test_data = std::get<0>(GetParam());
CPU& cpu = cpu_info();
cpu._org_proc_type_table = test_data._proc_type_table;
cpu._proc_type_table = test_data._proc_type_table;
cpu._cpu_mapping_table = test_data._cpu_mapping_table;
cpu._numa_nodes = 1;
test_data._config.update_executor_config(test_data._num_streams,
test_data._threads_per_stream,
test_data._core_type,
test_data._cpu_pinning);
ASSERT_EQ(test_data._num_streams, test_data._config._streams);
ASSERT_EQ(test_data._threads_per_stream, test_data._config._threadsPerStream);
ASSERT_EQ(test_data._core_type, test_data._config._threadPreferredCoreType);
ASSERT_EQ(test_data._cpu_pinning, test_data._config._cpu_reservation);
ASSERT_EQ(test_data._num_streams, test_data._config._streams);
ASSERT_EQ(test_data._streams_info_table, test_data._config._streams_info_table);
ASSERT_EQ(test_data._stream_processors, test_data._config._stream_processor_ids);
}
};
UpdateExecutorConfigTestCase _update_num_streams = {
ov::threading::IStreamsExecutor::Config{"update num streams test"}, // param[in]: initial configuration
// param[in]: proc_type_table, {total processors, number of physical processors, number of Efficient processors,
// number of hyper threading processors}
{
{12, 6, 0, 6, 0, 0},
},
// param[in]: cpu_mapping_table, {PROCESSOR_ID, NUMA_ID, SOCKET_ID, CORE_ID, CORE_TYPE, GROUP_ID, Used}
{
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 1, MAIN_CORE_PROC, 2, -1},
{3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 2, MAIN_CORE_PROC, 4, -1},
{5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 3, MAIN_CORE_PROC, 6, -1},
{7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1},
{8, 0, 0, 4, MAIN_CORE_PROC, 8, -1},
{9, 0, 0, 4, HYPER_THREADING_PROC, 9, -1},
{10, 0, 0, 5, MAIN_CORE_PROC, 10, -1},
{11, 0, 0, 5, HYPER_THREADING_PROC, 11, -1},
},
4, // param[in]: the number of streams
1, // param[in]: the number of threads per stream
ov::threading::IStreamsExecutor::Config::ANY, // param[in]: specified cpu core type
false, // param[in]: specified cpu pinning
// param[out]: streams_info_table, {NUMBER_OF_STREAMS, PROC_TYPE, THREADS_PER_STREAM, STREAM_NUMA_NODE_ID,
// STREAM_SOCKET_ID}
{
{4, MAIN_CORE_PROC, 1, 0, 0},
},
// param[out]: stream_processors, the list of processor ids on each stream.
{},
};
UpdateExecutorConfigTestCase _update_core_type = {
ov::threading::IStreamsExecutor::Config{"update core type test"},
{
{24, 8, 8, 8, 0, 0},
},
{
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 1, MAIN_CORE_PROC, 2, -1}, {3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 2, MAIN_CORE_PROC, 4, -1}, {5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 3, MAIN_CORE_PROC, 6, -1}, {7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1},
{8, 0, 0, 4, MAIN_CORE_PROC, 8, -1}, {9, 0, 0, 4, HYPER_THREADING_PROC, 9, -1},
{10, 0, 0, 5, MAIN_CORE_PROC, 10, -1}, {11, 0, 0, 5, HYPER_THREADING_PROC, 11, -1},
{12, 0, 0, 6, MAIN_CORE_PROC, 12, -1}, {13, 0, 0, 6, HYPER_THREADING_PROC, 13, -1},
{14, 0, 0, 7, MAIN_CORE_PROC, 14, -1}, {15, 0, 0, 7, HYPER_THREADING_PROC, 15, -1},
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 16, -1}, {17, 0, 0, 9, EFFICIENT_CORE_PROC, 17, -1},
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 18, -1}, {19, 0, 0, 11, EFFICIENT_CORE_PROC, 19, -1},
{20, 0, 0, 12, EFFICIENT_CORE_PROC, 20, -1}, {21, 0, 0, 13, EFFICIENT_CORE_PROC, 21, -1},
{22, 0, 0, 14, EFFICIENT_CORE_PROC, 22, -1}, {23, 0, 0, 15, EFFICIENT_CORE_PROC, 23, -1},
},
8,
1,
ov::threading::IStreamsExecutor::Config::LITTLE,
false,
{
{8, EFFICIENT_CORE_PROC, 1, 0, 0},
},
{},
};
UpdateExecutorConfigTestCase _update_cpu_pinning = {
ov::threading::IStreamsExecutor::Config{"update cpu pinning test"},
{
{8, 4, 0, 4, 0, 0},
},
{
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 1, MAIN_CORE_PROC, 2, -1},
{3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 2, MAIN_CORE_PROC, 4, -1},
{5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 3, MAIN_CORE_PROC, 6, -1},
{7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1},
},
8,
1,
ov::threading::IStreamsExecutor::Config::ANY,
true,
{
{4, MAIN_CORE_PROC, 1, 0, 0},
{4, HYPER_THREADING_PROC, 1, 0, 0},
},
{
{0},
{2},
{4},
{6},
{1},
{3},
{5},
{7},
},
};
TEST_P(UpdateExecutorConfigTest, UpdateExecutorConfig) {}
INSTANTIATE_TEST_SUITE_P(smoke_UpdateExecutorConfig,
UpdateExecutorConfigTest,
testing::Values(_update_num_streams, _update_core_type, _update_cpu_pinning));
#endif
} // namespace

View File

@ -3,6 +3,7 @@
//
#include "openvino/runtime/system_conf.hpp"
#include "openvino/runtime/threading/cpu_streams_info.hpp"
#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/runtime/engine.hpp"
@ -104,26 +105,6 @@
using namespace cldnn;
using namespace ov::intel_gpu;
static void adjust_num_cores(ov::threading::IStreamsExecutor::Config& config) {
if (ov::get_available_cores_types().size() == 1) {
return;
}
const auto total_num_cores = ov::get_number_of_logical_cpu_cores();
const auto total_num_big_cores = ov::get_number_of_logical_cpu_cores(true);
const auto total_num_little_cores = total_num_cores - total_num_big_cores;
auto core_type = config._threadPreferredCoreType;
int num_cores = total_num_cores;
if (core_type == ov::threading::IStreamsExecutor::Config::BIG) {
num_cores = total_num_big_cores;
} else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) {
num_cores = total_num_little_cores;
}
config._streams = std::min(config._streams, num_cores);
}
static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) {
ov::threading::IStreamsExecutor::Config task_executor_config(tags, 1);
task_executor_config._streams = (num_streams > 0) ? num_streams : config.get_property(ov::compilation_num_threads);
@ -135,7 +116,10 @@ static ov::threading::IStreamsExecutor::Config make_task_executor_config(const E
default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority);
}
adjust_num_cores(task_executor_config);
task_executor_config.update_executor_config(task_executor_config._streams,
1,
task_executor_config._threadPreferredCoreType,
false);
return task_executor_config;
}

View File

@ -63,7 +63,8 @@ CompiledModel::CompiledModel(std::shared_ptr<ov::Model> model,
: ov::ICompiledModel(model,
plugin,
wrap_if_old_api(context, plugin->is_new_api()),
create_task_executor(plugin, config))
create_task_executor(plugin, config),
nullptr)
, m_context(context)
, m_config(config)
, m_wait_executor(std::make_shared<ov::threading::CPUStreamsExecutor>(ov::threading::IStreamsExecutor::Config{"Intel GPU plugin wait executor"}))
@ -86,7 +87,8 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer ib,
: ov::ICompiledModel(nullptr,
plugin,
wrap_if_old_api(context, plugin->is_new_api()),
create_task_executor(plugin, config))
create_task_executor(plugin, config),
nullptr)
, m_context(context)
, m_config(config)
, m_wait_executor(std::make_shared<ov::threading::CPUStreamsExecutor>(ov::threading::IStreamsExecutor::Config{"Intel GPU plugin wait executor"}))