choose Pcore to compile model for GPU plugin (#20472)
* choose Pcore to compile model for GPU plugin * provide function to update executor config * set callback executor to nullptr for GPU plugin * fix code style * fix warning * optimize duplicate code * set callback executor to nullptr for another gpu compile_model * add description for new function * add smoke test * fix code style * modify function definition --------- Co-authored-by: Wanglei Shen <wanglei.shen@intel.com>
This commit is contained in:
parent
cec6535eaa
commit
82f191b0e7
@ -150,6 +150,19 @@ public:
|
||||
_threadPreferredCoreType(threadPreferredCoreType),
|
||||
_streams_info_table{streamsInfoTable},
|
||||
_cpu_reservation{cpuReservation} {}
|
||||
|
||||
/**
|
||||
* @brief Modify _streams_info_table and related configuration according to user-specified parameters, bind
|
||||
* threads to cpu cores if cpu_pinning is true.
|
||||
* @param stream_nums Number of streams specified by user
|
||||
* @param threads_per_stream Number of threads per stream specified by user
|
||||
* @param core_type Cpu type (Big/Little/Any) specified by user
|
||||
* @param cpu_pinning Whether to bind the threads to cpu cores
|
||||
*/
|
||||
void update_executor_config(int stream_nums,
|
||||
int threads_per_stream,
|
||||
PreferredCoreType core_type,
|
||||
bool cpu_pinning);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -553,5 +553,107 @@ IStreamsExecutor::Config IStreamsExecutor::Config::reserve_cpu_threads(const ISt
|
||||
return config;
|
||||
}
|
||||
|
||||
void IStreamsExecutor::Config::update_executor_config(int stream_nums,
|
||||
int threads_per_stream,
|
||||
IStreamsExecutor::Config::PreferredCoreType core_type,
|
||||
bool cpu_pinning) {
|
||||
const auto proc_type_table = ov::get_proc_type_table();
|
||||
|
||||
if (proc_type_table.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// IStreamsExecutor::Config config = initial;
|
||||
const auto total_num_cores = proc_type_table[0][ALL_PROC];
|
||||
const auto total_num_big_cores = proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][HYPER_THREADING_PROC];
|
||||
const auto total_num_little_cores = proc_type_table[0][EFFICIENT_CORE_PROC];
|
||||
|
||||
int num_cores = total_num_cores;
|
||||
if (core_type == ov::threading::IStreamsExecutor::Config::BIG) {
|
||||
num_cores = total_num_big_cores;
|
||||
} else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) {
|
||||
num_cores = total_num_little_cores;
|
||||
}
|
||||
|
||||
int streams = std::min(stream_nums, num_cores);
|
||||
|
||||
if (streams == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
_streams = streams;
|
||||
_threadPreferredCoreType = core_type;
|
||||
_threadsPerStream = threads_per_stream;
|
||||
|
||||
// create stream_info_table based on core type
|
||||
std::vector<int> stream_info(ov::CPU_STREAMS_TABLE_SIZE, 0);
|
||||
stream_info[ov::THREADS_PER_STREAM] = _threadsPerStream;
|
||||
stream_info[ov::STREAM_NUMA_NODE_ID] = 0;
|
||||
stream_info[ov::STREAM_SOCKET_ID] = 0;
|
||||
if (core_type == ov::threading::IStreamsExecutor::Config::BIG) {
|
||||
if (proc_type_table[0][ov::MAIN_CORE_PROC] < _streams) {
|
||||
stream_info[ov::NUMBER_OF_STREAMS] = proc_type_table[0][ov::MAIN_CORE_PROC];
|
||||
stream_info[ov::PROC_TYPE] = ov::MAIN_CORE_PROC;
|
||||
_streams_info_table.push_back(stream_info);
|
||||
stream_info[ov::NUMBER_OF_STREAMS] = proc_type_table[0][ov::HYPER_THREADING_PROC];
|
||||
stream_info[ov::PROC_TYPE] = ov::HYPER_THREADING_PROC;
|
||||
_streams_info_table.push_back(stream_info);
|
||||
} else {
|
||||
stream_info[ov::PROC_TYPE] = ov::MAIN_CORE_PROC;
|
||||
stream_info[ov::NUMBER_OF_STREAMS] = _streams;
|
||||
_streams_info_table.push_back(stream_info);
|
||||
}
|
||||
} else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) {
|
||||
stream_info[ov::PROC_TYPE] = ov::EFFICIENT_CORE_PROC;
|
||||
stream_info[ov::NUMBER_OF_STREAMS] = _streams;
|
||||
_streams_info_table.push_back(stream_info);
|
||||
} else {
|
||||
int total_streams = 0;
|
||||
if (proc_type_table.size() == 1) {
|
||||
for (int i = ov::MAIN_CORE_PROC; i <= ov::HYPER_THREADING_PROC; i++) {
|
||||
if (proc_type_table[0][i] > 0) {
|
||||
stream_info[ov::NUMBER_OF_STREAMS] =
|
||||
(total_streams + proc_type_table[0][i] > _streams ? _streams - total_streams
|
||||
: proc_type_table[0][i]);
|
||||
stream_info[ov::PROC_TYPE] = i;
|
||||
stream_info[ov::STREAM_NUMA_NODE_ID] = proc_type_table[0][PROC_NUMA_NODE_ID];
|
||||
stream_info[ov::STREAM_SOCKET_ID] = proc_type_table[0][PROC_SOCKET_ID];
|
||||
_streams_info_table.push_back(stream_info);
|
||||
total_streams += stream_info[ov::NUMBER_OF_STREAMS];
|
||||
}
|
||||
if (total_streams >= _streams)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 1; i < proc_type_table.size(); i++) {
|
||||
for (int j = ov::MAIN_CORE_PROC; j < ov::HYPER_THREADING_PROC; j++) {
|
||||
if (proc_type_table[i][j] > 0) {
|
||||
stream_info[ov::NUMBER_OF_STREAMS] =
|
||||
(total_streams + proc_type_table[i][j] > _streams ? _streams - total_streams
|
||||
: proc_type_table[i][j]);
|
||||
stream_info[ov::PROC_TYPE] = j;
|
||||
stream_info[ov::STREAM_NUMA_NODE_ID] = proc_type_table[i][PROC_NUMA_NODE_ID];
|
||||
stream_info[ov::STREAM_SOCKET_ID] = proc_type_table[i][PROC_SOCKET_ID];
|
||||
_streams_info_table.push_back(stream_info);
|
||||
total_streams += stream_info[ov::NUMBER_OF_STREAMS];
|
||||
}
|
||||
if (total_streams >= _streams)
|
||||
break;
|
||||
}
|
||||
if (total_streams >= _streams)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cpu_pinning) {
|
||||
_cpu_reservation = cpu_pinning;
|
||||
auto new_config = reserve_cpu_threads(*this);
|
||||
_stream_processor_ids = new_config._stream_processor_ids;
|
||||
_streams = new_config._streams;
|
||||
_threads = new_config._threads;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace threading
|
||||
} // namespace ov
|
||||
|
165
src/inference/tests/unit/update_executor_config_test.cpp
Normal file
165
src/inference/tests/unit/update_executor_config_test.cpp
Normal file
@ -0,0 +1,165 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <common_test_utils/test_common.hpp>
|
||||
|
||||
// #include "ie_system_conf.h"
|
||||
#include "openvino/runtime/threading/istreams_executor.hpp"
|
||||
#include "os/cpu_map_info.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ov;
|
||||
using namespace threading;
|
||||
|
||||
namespace {
|
||||
|
||||
#if defined(__linux__) || defined(_WIN32)
|
||||
|
||||
struct UpdateExecutorConfigTestCase {
|
||||
ov::threading::IStreamsExecutor::Config _config;
|
||||
std::vector<std::vector<int>> _proc_type_table;
|
||||
std::vector<std::vector<int>> _cpu_mapping_table;
|
||||
int _num_streams;
|
||||
int _threads_per_stream;
|
||||
ov::threading::IStreamsExecutor::Config::PreferredCoreType _core_type;
|
||||
bool _cpu_pinning;
|
||||
std::vector<std::vector<int>> _streams_info_table;
|
||||
std::vector<std::vector<int>> _stream_processors;
|
||||
};
|
||||
|
||||
class UpdateExecutorConfigTest : public ov::test::TestsCommon,
|
||||
public testing::WithParamInterface<std::tuple<UpdateExecutorConfigTestCase>> {
|
||||
public:
|
||||
void SetUp() override {
|
||||
auto test_data = std::get<0>(GetParam());
|
||||
|
||||
CPU& cpu = cpu_info();
|
||||
cpu._org_proc_type_table = test_data._proc_type_table;
|
||||
cpu._proc_type_table = test_data._proc_type_table;
|
||||
cpu._cpu_mapping_table = test_data._cpu_mapping_table;
|
||||
cpu._numa_nodes = 1;
|
||||
|
||||
test_data._config.update_executor_config(test_data._num_streams,
|
||||
test_data._threads_per_stream,
|
||||
test_data._core_type,
|
||||
test_data._cpu_pinning);
|
||||
|
||||
ASSERT_EQ(test_data._num_streams, test_data._config._streams);
|
||||
ASSERT_EQ(test_data._threads_per_stream, test_data._config._threadsPerStream);
|
||||
ASSERT_EQ(test_data._core_type, test_data._config._threadPreferredCoreType);
|
||||
ASSERT_EQ(test_data._cpu_pinning, test_data._config._cpu_reservation);
|
||||
ASSERT_EQ(test_data._num_streams, test_data._config._streams);
|
||||
ASSERT_EQ(test_data._streams_info_table, test_data._config._streams_info_table);
|
||||
ASSERT_EQ(test_data._stream_processors, test_data._config._stream_processor_ids);
|
||||
}
|
||||
};
|
||||
|
||||
UpdateExecutorConfigTestCase _update_num_streams = {
|
||||
ov::threading::IStreamsExecutor::Config{"update num streams test"}, // param[in]: initial configuration
|
||||
// param[in]: proc_type_table, {total processors, number of physical processors, number of Efficient processors,
|
||||
// number of hyper threading processors}
|
||||
{
|
||||
{12, 6, 0, 6, 0, 0},
|
||||
},
|
||||
// param[in]: cpu_mapping_table, {PROCESSOR_ID, NUMA_ID, SOCKET_ID, CORE_ID, CORE_TYPE, GROUP_ID, Used}
|
||||
{
|
||||
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1},
|
||||
{2, 0, 0, 1, MAIN_CORE_PROC, 2, -1},
|
||||
{3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1},
|
||||
{4, 0, 0, 2, MAIN_CORE_PROC, 4, -1},
|
||||
{5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1},
|
||||
{6, 0, 0, 3, MAIN_CORE_PROC, 6, -1},
|
||||
{7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1},
|
||||
{8, 0, 0, 4, MAIN_CORE_PROC, 8, -1},
|
||||
{9, 0, 0, 4, HYPER_THREADING_PROC, 9, -1},
|
||||
{10, 0, 0, 5, MAIN_CORE_PROC, 10, -1},
|
||||
{11, 0, 0, 5, HYPER_THREADING_PROC, 11, -1},
|
||||
},
|
||||
4, // param[in]: the number of streams
|
||||
1, // param[in]: the number of threads per stream
|
||||
ov::threading::IStreamsExecutor::Config::ANY, // param[in]: specified cpu core type
|
||||
false, // param[in]: specified cpu pinning
|
||||
// param[out]: streams_info_table, {NUMBER_OF_STREAMS, PROC_TYPE, THREADS_PER_STREAM, STREAM_NUMA_NODE_ID,
|
||||
// STREAM_SOCKET_ID}
|
||||
{
|
||||
{4, MAIN_CORE_PROC, 1, 0, 0},
|
||||
},
|
||||
// param[out]: stream_processors, the list of processor ids on each stream.
|
||||
{},
|
||||
};
|
||||
|
||||
UpdateExecutorConfigTestCase _update_core_type = {
|
||||
ov::threading::IStreamsExecutor::Config{"update core type test"},
|
||||
{
|
||||
{24, 8, 8, 8, 0, 0},
|
||||
},
|
||||
{
|
||||
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1},
|
||||
{2, 0, 0, 1, MAIN_CORE_PROC, 2, -1}, {3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1},
|
||||
{4, 0, 0, 2, MAIN_CORE_PROC, 4, -1}, {5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1},
|
||||
{6, 0, 0, 3, MAIN_CORE_PROC, 6, -1}, {7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1},
|
||||
{8, 0, 0, 4, MAIN_CORE_PROC, 8, -1}, {9, 0, 0, 4, HYPER_THREADING_PROC, 9, -1},
|
||||
{10, 0, 0, 5, MAIN_CORE_PROC, 10, -1}, {11, 0, 0, 5, HYPER_THREADING_PROC, 11, -1},
|
||||
{12, 0, 0, 6, MAIN_CORE_PROC, 12, -1}, {13, 0, 0, 6, HYPER_THREADING_PROC, 13, -1},
|
||||
{14, 0, 0, 7, MAIN_CORE_PROC, 14, -1}, {15, 0, 0, 7, HYPER_THREADING_PROC, 15, -1},
|
||||
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 16, -1}, {17, 0, 0, 9, EFFICIENT_CORE_PROC, 17, -1},
|
||||
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 18, -1}, {19, 0, 0, 11, EFFICIENT_CORE_PROC, 19, -1},
|
||||
{20, 0, 0, 12, EFFICIENT_CORE_PROC, 20, -1}, {21, 0, 0, 13, EFFICIENT_CORE_PROC, 21, -1},
|
||||
{22, 0, 0, 14, EFFICIENT_CORE_PROC, 22, -1}, {23, 0, 0, 15, EFFICIENT_CORE_PROC, 23, -1},
|
||||
},
|
||||
8,
|
||||
1,
|
||||
ov::threading::IStreamsExecutor::Config::LITTLE,
|
||||
false,
|
||||
{
|
||||
{8, EFFICIENT_CORE_PROC, 1, 0, 0},
|
||||
},
|
||||
{},
|
||||
};
|
||||
|
||||
UpdateExecutorConfigTestCase _update_cpu_pinning = {
|
||||
ov::threading::IStreamsExecutor::Config{"update cpu pinning test"},
|
||||
{
|
||||
{8, 4, 0, 4, 0, 0},
|
||||
},
|
||||
{
|
||||
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{1, 0, 0, 0, HYPER_THREADING_PROC, 1, -1},
|
||||
{2, 0, 0, 1, MAIN_CORE_PROC, 2, -1},
|
||||
{3, 0, 0, 1, HYPER_THREADING_PROC, 3, -1},
|
||||
{4, 0, 0, 2, MAIN_CORE_PROC, 4, -1},
|
||||
{5, 0, 0, 2, HYPER_THREADING_PROC, 5, -1},
|
||||
{6, 0, 0, 3, MAIN_CORE_PROC, 6, -1},
|
||||
{7, 0, 0, 3, HYPER_THREADING_PROC, 7, -1},
|
||||
},
|
||||
8,
|
||||
1,
|
||||
ov::threading::IStreamsExecutor::Config::ANY,
|
||||
true,
|
||||
{
|
||||
{4, MAIN_CORE_PROC, 1, 0, 0},
|
||||
{4, HYPER_THREADING_PROC, 1, 0, 0},
|
||||
},
|
||||
{
|
||||
{0},
|
||||
{2},
|
||||
{4},
|
||||
{6},
|
||||
{1},
|
||||
{3},
|
||||
{5},
|
||||
{7},
|
||||
},
|
||||
};
|
||||
|
||||
TEST_P(UpdateExecutorConfigTest, UpdateExecutorConfig) {}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_UpdateExecutorConfig,
|
||||
UpdateExecutorConfigTest,
|
||||
testing::Values(_update_num_streams, _update_core_type, _update_cpu_pinning));
|
||||
#endif
|
||||
} // namespace
|
@ -3,6 +3,7 @@
|
||||
//
|
||||
|
||||
#include "openvino/runtime/system_conf.hpp"
|
||||
#include "openvino/runtime/threading/cpu_streams_info.hpp"
|
||||
|
||||
#include "intel_gpu/runtime/memory.hpp"
|
||||
#include "intel_gpu/runtime/engine.hpp"
|
||||
@ -104,26 +105,6 @@
|
||||
using namespace cldnn;
|
||||
using namespace ov::intel_gpu;
|
||||
|
||||
static void adjust_num_cores(ov::threading::IStreamsExecutor::Config& config) {
|
||||
if (ov::get_available_cores_types().size() == 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto total_num_cores = ov::get_number_of_logical_cpu_cores();
|
||||
const auto total_num_big_cores = ov::get_number_of_logical_cpu_cores(true);
|
||||
const auto total_num_little_cores = total_num_cores - total_num_big_cores;
|
||||
auto core_type = config._threadPreferredCoreType;
|
||||
|
||||
int num_cores = total_num_cores;
|
||||
if (core_type == ov::threading::IStreamsExecutor::Config::BIG) {
|
||||
num_cores = total_num_big_cores;
|
||||
} else if (core_type == ov::threading::IStreamsExecutor::Config::LITTLE) {
|
||||
num_cores = total_num_little_cores;
|
||||
}
|
||||
|
||||
config._streams = std::min(config._streams, num_cores);
|
||||
}
|
||||
|
||||
static ov::threading::IStreamsExecutor::Config make_task_executor_config(const ExecutionConfig& config, std::string tags, int num_streams = 0) {
|
||||
ov::threading::IStreamsExecutor::Config task_executor_config(tags, 1);
|
||||
task_executor_config._streams = (num_streams > 0) ? num_streams : config.get_property(ov::compilation_num_threads);
|
||||
@ -135,7 +116,10 @@ static ov::threading::IStreamsExecutor::Config make_task_executor_config(const E
|
||||
default: OPENVINO_ASSERT(false, "[GPU] Can't create task executor: invalid host task priority value: ", priority);
|
||||
}
|
||||
|
||||
adjust_num_cores(task_executor_config);
|
||||
task_executor_config.update_executor_config(task_executor_config._streams,
|
||||
1,
|
||||
task_executor_config._threadPreferredCoreType,
|
||||
false);
|
||||
|
||||
return task_executor_config;
|
||||
}
|
||||
|
@ -63,7 +63,8 @@ CompiledModel::CompiledModel(std::shared_ptr<ov::Model> model,
|
||||
: ov::ICompiledModel(model,
|
||||
plugin,
|
||||
wrap_if_old_api(context, plugin->is_new_api()),
|
||||
create_task_executor(plugin, config))
|
||||
create_task_executor(plugin, config),
|
||||
nullptr)
|
||||
, m_context(context)
|
||||
, m_config(config)
|
||||
, m_wait_executor(std::make_shared<ov::threading::CPUStreamsExecutor>(ov::threading::IStreamsExecutor::Config{"Intel GPU plugin wait executor"}))
|
||||
@ -86,7 +87,8 @@ CompiledModel::CompiledModel(cldnn::BinaryInputBuffer ib,
|
||||
: ov::ICompiledModel(nullptr,
|
||||
plugin,
|
||||
wrap_if_old_api(context, plugin->is_new_api()),
|
||||
create_task_executor(plugin, config))
|
||||
create_task_executor(plugin, config),
|
||||
nullptr)
|
||||
, m_context(context)
|
||||
, m_config(config)
|
||||
, m_wait_executor(std::make_shared<ov::threading::CPUStreamsExecutor>(ov::threading::IStreamsExecutor::Config{"Intel GPU plugin wait executor"}))
|
||||
|
Loading…
Reference in New Issue
Block a user