diff --git a/samples/cpp/benchmark_app/utils.cpp b/samples/cpp/benchmark_app/utils.cpp index 51d0443cfdc..fbdee288211 100644 --- a/samples/cpp/benchmark_app/utils.cpp +++ b/samples/cpp/benchmark_app/utils.cpp @@ -112,7 +112,8 @@ std::vector parse_devices(const std::string& device_string) { auto bracket = comma_separated_devices.find("("); // e.g. in BATCH:GPU(4) comma_separated_devices = comma_separated_devices.substr(colon + 1, bracket - colon - 1); } - if ((comma_separated_devices == "MULTI") || (comma_separated_devices == "HETERO")) + if ((comma_separated_devices == "AUTO") || (comma_separated_devices == "MULTI") || + (comma_separated_devices == "HETERO")) return std::vector(); auto devices = split(comma_separated_devices, ','); return devices; diff --git a/src/plugins/auto/executable_network.cpp b/src/plugins/auto/executable_network.cpp index 303c0d43dae..e351d052ce6 100644 --- a/src/plugins/auto/executable_network.cpp +++ b/src/plugins/auto/executable_network.cpp @@ -309,6 +309,29 @@ void MultiDeviceExecutableNetwork::TryToLoadNetWork(AutoLoadContext& context, auto& deviceConfig = context.deviceInfo.config; auto& deviceList = context.metaDevices; bool curDevIsCPU = (device.find("CPU") != std::string::npos); + bool curDevIsGPU = (device.find("GPU") != std::string::npos); + { + std::lock_guard lock(_confMutex); + if (curDevIsGPU && _loadContext[CPU].isEnabled) { + // user does not set the compiling threads + // limit the threads num for compiling + int maxNumThreads = 0; + try { + maxNumThreads = _core->GetConfig(device, GPU_CONFIG_KEY(MAX_NUM_THREADS)).as(); + } catch (...) { + LOG_DEBUG("[AUTOPLUGIN]: cannot get MAX_NUM_THREADS from GPU"); + } + if (maxNumThreads == static_cast(std::thread::hardware_concurrency())) { + int threadNum = maxNumThreads / 2; + deviceConfig[GPU_CONFIG_KEY(MAX_NUM_THREADS)] = std::to_string(threadNum).c_str(); + LOG_DEBUG("[AUTO PLUGIN]:gpu streams number for compiling: %s", deviceConfig[GPU_CONFIG_KEY(MAX_NUM_THREADS)].c_str()); + } else { + // user set the compiling threads num + // use the user's val anyway + LOG_DEBUG("[AUTOPLUGIN]:user defined compiling threads: %d", maxNumThreads); + } + } + } try { if (!modelPath.empty()) { context.executableNetwork = _core->LoadNetwork(modelPath, device, deviceConfig); diff --git a/src/tests/unit/auto/auto_release_helper_test.cpp b/src/tests/unit/auto/auto_release_helper_test.cpp index fee972400af..f3c97ae5ba3 100644 --- a/src/tests/unit/auto/auto_release_helper_test.cpp +++ b/src/tests/unit/auto/auto_release_helper_test.cpp @@ -118,6 +118,8 @@ public: IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, supportConfigs, {}); ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)) .WillByDefault(Return(supportConfigs)); + ON_CALL(*core, GetConfig(_, StrEq(GPU_CONFIG_KEY(MAX_NUM_THREADS)))) + .WillByDefault(Return(12)); } }; diff --git a/src/tests/unit/auto/auto_select_device_failed_test.cpp b/src/tests/unit/auto/auto_select_device_failed_test.cpp index eca8e49b8fd..29992dd74c9 100644 --- a/src/tests/unit/auto/auto_select_device_failed_test.cpp +++ b/src/tests/unit/auto/auto_select_device_failed_test.cpp @@ -161,6 +161,8 @@ public: ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)) .WillByDefault(Return(supportConfigs)); EXPECT_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)).Times(AnyNumber()); + ON_CALL(*core, GetConfig(_, StrEq(GPU_CONFIG_KEY(MAX_NUM_THREADS)))) + .WillByDefault(Return(12)); } }; diff --git a/src/tests/unit/auto/exec_network_get_metrics.cpp b/src/tests/unit/auto/exec_network_get_metrics.cpp index acd6af8ad31..80d0d1deb86 100644 --- a/src/tests/unit/auto/exec_network_get_metrics.cpp +++ b/src/tests/unit/auto/exec_network_get_metrics.cpp @@ -202,6 +202,9 @@ TEST_P(ExecNetworkGetMetric, OPTIMAL_NUMBER_OF_INFER_REQUESTS) { ON_CALL(*core.get(), GetConfig(_, StrEq(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)))) .WillByDefault(Return(std::to_string(gpuPerfHintNum))); EXPECT_CALL(*core.get(), GetConfig(_, StrEq(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)))).Times(AnyNumber()); + ON_CALL(*core, GetConfig(_, StrEq(GPU_CONFIG_KEY(MAX_NUM_THREADS)))) + .WillByDefault(Return(8)); + EXPECT_CALL(*core.get(), GetConfig(_, StrEq(GPU_CONFIG_KEY(MAX_NUM_THREADS)))).Times(AnyNumber()); } else { metaDevices.push_back({CommonTestUtils::DEVICE_CPU, {}, cpuCustomerNum, ""}); metaDevices.push_back({actualDeviceName, {}, actualCustomerNum, ""});