diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index acc6f1832c7..14dc6a3c13b 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -269,16 +269,13 @@ int get_model_prefer_threads(const int num_streams, return model_prefer; } -void get_num_streams(const int streams, - const std::shared_ptr& ngraphFunc, - Config& config) { +void generate_stream_info(const int streams, + const std::shared_ptr& ngraphFunc, + Config& config, + int preferred_nthreads_per_stream) { + int model_prefer_threads = preferred_nthreads_per_stream; InferenceEngine::IStreamsExecutor::Config& executor_config = config.streamExecutorConfig; - std::vector stream_ids; - std::string log = "[ streams info ]"; - std::vector core_type_str = {" Any core: ", " PCore: ", " ECore: ", " Logical core: "}; - - std::vector> orig_proc_type_table = get_proc_type_table(); - executor_config._orig_proc_type_table = orig_proc_type_table; + auto& orig_proc_type_table = executor_config._orig_proc_type_table; std::vector> proc_type_table = apply_scheduling_core_type(config.schedulingCoreType, orig_proc_type_table); proc_type_table = apply_hyper_threading(config.enableHyperThreading, @@ -291,12 +288,30 @@ void get_num_streams(const int streams, streams, executor_config._threadBindingType, proc_type_table); - const int model_prefer = get_model_prefer_threads(streams, proc_type_table, ngraphFunc, executor_config); + if (-1 == preferred_nthreads_per_stream) { + model_prefer_threads = get_model_prefer_threads(streams, proc_type_table, ngraphFunc, executor_config); + } + executor_config._streams_info_table = get_streams_info_table(streams, executor_config._threads, config.perfHintsConfig.ovPerfHintNumRequests, - model_prefer, + model_prefer_threads, proc_type_table); +} + +void get_num_streams(const int streams, + const std::shared_ptr& ngraphFunc, + Config& config) { + InferenceEngine::IStreamsExecutor::Config& executor_config = config.streamExecutorConfig; + std::vector stream_ids; + std::string log = "[ streams info ]"; + std::vector core_type_str = {" Any core: ", " PCore: ", " ECore: ", " Logical core: "}; + + std::vector> orig_proc_type_table = get_proc_type_table(); + + executor_config._orig_proc_type_table = orig_proc_type_table; + generate_stream_info(streams, ngraphFunc, config); + executor_config._stream_core_ids = reserve_available_cpus(executor_config._streams_info_table); executor_config._threadsPerStream = executor_config._streams_info_table[0][THREADS_PER_STREAM]; executor_config._streams = 0; diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp index 3e7aff623e2..461ba9dcdbc 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.hpp @@ -59,6 +59,18 @@ int get_model_prefer_threads(const int num_streams, const std::shared_ptr& ngraphFunc, const InferenceEngine::IStreamsExecutor::Config streamExecutorConfig); +/** + * @brief Generate streams information according to processors type table + * @param[in] streams number of streams + * @param[in] ngraphFunc graph handle + * @param[in] config intel cpu configuration + * @param[in] preferred_nthreads_per_stream is initial preferred number of threads per stream + */ +void generate_stream_info(const int streams, + const std::shared_ptr& ngraphFunc, + Config& config, + int preferred_nthreads_per_stream = -1); + struct StreamCfg { int num_streams; // Number of streams int num_threads; // Number of threads diff --git a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp index c41c8ceae63..31eddcede4a 100644 --- a/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/streams_info_table_test.cpp @@ -1404,4 +1404,423 @@ INSTANTIATE_TEST_SUITE_P(StreamsInfoTable, _1sockets_ecores_tput_5, _1sockets_mock_tput_1)); +struct StreamGenerateionTestCase { + int input_stream; + int input_thread; + int input_request; + int input_model_prefer; + ov::hint::SchedulingCoreType input_type; + bool input_ht_value; + bool input_ht_changed; + bool input_cpu_value; + bool input_cpu_changed; + ov::hint::PerformanceMode input_pm_hint; + ov::threading::IStreamsExecutor::ThreadBindingType input_binding_type; + std::vector> input_proc_type_table; + ov::hint::SchedulingCoreType output_type; + bool output_ht_value; + bool output_cpu_value; + ov::hint::PerformanceMode output_pm_hint; + std::vector> output_proc_type_table; + std::vector> output_stream_info_table; +}; + +void make_config(StreamGenerateionTestCase& test_data, ov::intel_cpu::Config& config) { + config.schedulingCoreType = test_data.input_type; + config.enableCpuPinning = test_data.input_cpu_value; + config.changedCpuPinning = test_data.input_cpu_changed; + config.enableHyperThreading = test_data.input_ht_value; + config.changedHyperThreading = test_data.input_ht_changed; + config.perfHintsConfig.ovPerfHint = ov::util::to_string(test_data.input_pm_hint); + config.perfHintsConfig.ovPerfHintNumRequests = test_data.input_request; + config.streamExecutorConfig._threads = test_data.input_thread; + config.streamExecutorConfig._threadBindingType = test_data.input_binding_type; + config.streamExecutorConfig._orig_proc_type_table = test_data.input_proc_type_table; +} + +class StreamGenerationTests : public CommonTestUtils::TestsCommon, + public testing::WithParamInterface> { +public: + void SetUp() override { + auto test_data = std::get<0>(GetParam()); + ov::intel_cpu::Config config; + make_config(test_data, config); + + ov::intel_cpu::generate_stream_info(test_data.input_stream, nullptr, config, test_data.input_model_prefer); + + ASSERT_EQ(test_data.output_stream_info_table, config.streamExecutorConfig._streams_info_table); + ASSERT_EQ(test_data.output_proc_type_table, config.streamExecutorConfig._proc_type_table); + ASSERT_EQ(test_data.output_cpu_value, config.streamExecutorConfig._cpu_pinning); + ASSERT_EQ(test_data.output_ht_value, config.enableHyperThreading); + ASSERT_EQ(test_data.output_type, config.schedulingCoreType); + ASSERT_EQ(test_data.output_pm_hint, ov::util::from_string(config.perfHintsConfig.ovPerfHint, ov::hint::performance_mode)); + } +}; + +TEST_P(StreamGenerationTests, StreamsGeneration) {} + +StreamGenerateionTestCase generation_latency_1sockets_14cores_1 = { + 1, // param[in]: simulated settting for streams number + 0, // param[in]: simulated setting for threads number + 0, // param[in]: simulated setting for inference request number + 0, // param[in]: simulated setting for model prefer threads number + ov::hint::SchedulingCoreType::ANY_CORE, // param[in]: simulated setting for scheduling core type + // (PCORE_ONLY/ECORE_ONLY/ANY_CORE) + true, // param[in]: simulated setting for enableHyperThreading + true, // param[in]: simulated settting for changedHyperThreading + true, // param[in]: simulated setting for enableCpuPinning + true, // param[in]: simulated setting for changedCpuPinning + ov::hint::PerformanceMode::LATENCY, // param[in]: simulated setting for performance mode (throughput/latency) + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, // param[in]: simulated setting for + // threadBindingType + {{20, 6, 8, 6}}, // param[in]: simulated proc_type_table for platform which has one socket, 6 Pcores, 8 Ecores and + // hyper threading enabled + ov::hint::SchedulingCoreType::ANY_CORE, // param[expected out]: scheduling core type needs to be the same as input + true, // param[expected out]: enableHyperThreading needs to be the same as input + true, // param[expected out]: enableCpuPinning needs to be the same as input + ov::hint::PerformanceMode::LATENCY, // param[expected out]: performance mode needs to be the same as input + {{20, 6, 8, 6}}, // param[expected out]: since hyper threading is enabled and all core type is used, + // proc_type_table needs to be the same as input + {{1, ALL_PROC, 20}, + {0, MAIN_CORE_PROC, 6}, + {0, EFFICIENT_CORE_PROC, 8}, + {0, + HYPER_THREADING_PROC, + 6}}, // param[expected out]: since performance mode is latency and all cores is used, the final streams is 1 +}; + +StreamGenerateionTestCase generation_latency_1sockets_14cores_2 = { + 1, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::ANY_CORE, + true, + true, + true, + true, + ov::hint::PerformanceMode::LATENCY, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{14, 6, 8, 0}}, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + true, + ov::hint::PerformanceMode::LATENCY, + {{14, 6, 8, 0}}, + {{1, ALL_PROC, 14}, {0, MAIN_CORE_PROC, 6}, {0, EFFICIENT_CORE_PROC, 8}}, +}; + +StreamGenerateionTestCase generation_latency_1sockets_14cores_3 = { + 1, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + true, + false, + true, + ov::hint::PerformanceMode::LATENCY, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{14, 6, 8, 0}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + false, + ov::hint::PerformanceMode::LATENCY, + {{6, 6, 0, 0}}, + {{1, MAIN_CORE_PROC, 6}}, +}; + +StreamGenerateionTestCase generation_latency_1sockets_14cores_4 = { + 1, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + true, + false, + true, + ov::hint::PerformanceMode::LATENCY, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{20, 6, 8, 6}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + false, + ov::hint::PerformanceMode::LATENCY, + {{12, 6, 0, 6}}, + {{1, MAIN_CORE_PROC, 12}}, +}; + +StreamGenerateionTestCase generation_latency_1sockets_14cores_5 = { + 1, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + true, + false, + true, + ov::hint::PerformanceMode::LATENCY, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{20, 6, 8, 6}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + false, + ov::hint::PerformanceMode::LATENCY, + {{6, 6, 0, 0}}, + {{1, MAIN_CORE_PROC, 6}}, +}; + +StreamGenerateionTestCase generation_latency_2sockets_48cores_6 = { + 1, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + true, + false, + true, + ov::hint::PerformanceMode::LATENCY, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{96, 48, 0, 48}, {48, 24, 0, 24}, {48, 24, 0, 24}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + false, + ov::hint::PerformanceMode::LATENCY, + {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, + {{1, MAIN_CORE_PROC, 48}}, +}; + +StreamGenerateionTestCase generation_latency_2sockets_48cores_7 = { + 1, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + true, + false, + true, + ov::hint::PerformanceMode::LATENCY, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + false, + ov::hint::PerformanceMode::LATENCY, + {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, + {{1, MAIN_CORE_PROC, 48}}, +}; + +StreamGenerateionTestCase generation_tput_1sockets_14cores_1 = { + 0, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::ANY_CORE, + true, + true, + true, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::threading::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE, + {{20, 6, 8, 6}}, + ov::hint::SchedulingCoreType::ANY_CORE, + true, + true, + ov::hint::PerformanceMode::THROUGHPUT, + {{20, 6, 8, 6}}, + {{2, MAIN_CORE_PROC, 3}, {2, EFFICIENT_CORE_PROC, 3}, {2, HYPER_THREADING_PROC, 3}}, +}; + +StreamGenerateionTestCase generation_tput_1sockets_14cores_2 = { + 0, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + true, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{20, 6, 8, 6}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + false, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{6, 6, 0, 0}}, + {{2, MAIN_CORE_PROC, 3}}, +}; + +StreamGenerateionTestCase generation_tput_1sockets_14cores_3 = { + 10, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + true, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{20, 6, 8, 6}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{12, 6, 0, 6}}, + {{6, MAIN_CORE_PROC, 1}, {4, HYPER_THREADING_PROC, 1}}, +}; + +StreamGenerateionTestCase generation_tput_1sockets_14cores_4 = { + 0, + 10, + 0, + 0, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + true, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{20, 6, 8, 6}}, + ov::hint::SchedulingCoreType::PCORE_ONLY, + true, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{12, 6, 0, 6}}, + {{2, MAIN_CORE_PROC, 3}, {1, HYPER_THREADING_PROC, 3}}, +}; + +StreamGenerateionTestCase generation_tput_2sockets_48cores_5 = { + 0, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::ANY_CORE, + true, + true, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{96, 48, 0, 48}, {48, 24, 0, 24}, {48, 24, 0, 24}}, + ov::hint::SchedulingCoreType::ANY_CORE, + true, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{96, 48, 0, 48}, {48, 24, 0, 24}, {48, 24, 0, 24}}, + {{24, MAIN_CORE_PROC, 4}}, +}; + +StreamGenerateionTestCase generation_tput_2sockets_48cores_6 = { + 0, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + true, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{96, 48, 0, 48}, {48, 24, 0, 24}, {48, 24, 0, 24}}, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, + {{12, MAIN_CORE_PROC, 4}}, +}; + +StreamGenerateionTestCase generation_tput_2sockets_48cores_7 = { + 100, + 0, + 0, + 0, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + true, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{96, 48, 0, 48}, {48, 24, 0, 24}, {48, 24, 0, 24}}, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, + {{48, MAIN_CORE_PROC, 1}}, +}; + +StreamGenerateionTestCase generation_tput_2sockets_48cores_8 = { + 2, + 20, + 0, + 1, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + true, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{96, 48, 0, 48}, {48, 24, 0, 24}, {48, 24, 0, 24}}, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, + {{2, MAIN_CORE_PROC, 10}}, +}; + +StreamGenerateionTestCase generation_tput_2sockets_48cores_9 = { + 0, + 0, + 0, + 1, + ov::hint::SchedulingCoreType::ANY_CORE, + true, + false, + false, + true, + ov::hint::PerformanceMode::THROUGHPUT, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + {{96, 48, 0, 48}, {48, 24, 0, 24}, {48, 24, 0, 24}}, + ov::hint::SchedulingCoreType::ANY_CORE, + false, + false, + ov::hint::PerformanceMode::THROUGHPUT, + {{48, 48, 0, 0}, {24, 24, 0, 0}, {24, 24, 0, 0}}, + {{48, MAIN_CORE_PROC, 1}}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_StreamsGeneration, + StreamGenerationTests, + ::testing::Values(generation_latency_1sockets_14cores_1, + generation_latency_1sockets_14cores_2, + generation_latency_1sockets_14cores_3, + generation_latency_1sockets_14cores_4, + generation_latency_1sockets_14cores_5, + generation_latency_2sockets_48cores_6, + generation_latency_2sockets_48cores_7, + generation_tput_1sockets_14cores_1, + generation_tput_1sockets_14cores_2, + generation_tput_1sockets_14cores_3, + generation_tput_1sockets_14cores_4, + generation_tput_2sockets_48cores_5, + generation_tput_2sockets_48cores_6, + generation_tput_2sockets_48cores_7, + generation_tput_2sockets_48cores_8, + generation_tput_2sockets_48cores_9)); + } // namespace \ No newline at end of file