AUTO CTPUT single device always goes single device logic (#14425)

* AUTO CTPUT single device always goes single device logic

* CreateInferRequest goes single device logic when perf hint is ctput

* Optimize single device logic according to bell opinion

* Add code comments

* merge two isCumulative conditions

* add ctput test case

* Remove the redundant header file of auto_ctput_test.cpp

* Modify the test case according to bell comments
This commit is contained in:
guozhong wang 2023-01-13 10:29:44 +08:00 committed by GitHub
parent 04720f0453
commit 9dd08cd929
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 285 additions and 19 deletions

View File

@ -113,40 +113,53 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
}
_autoSContext->_config[IE::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = _autoSContext->_strDevices;
std::string profilingTask = "AutoSchedule::AutoSchedule:AutoMode";
bool isCumulative = (_autoSContext->_performanceHint == IE::PluginConfigParams::CUMULATIVE_THROUGHPUT) ? true : false;
// loadContext[ACTUALDEVICE] is always enabled,
// when there is CPU and there are more than two devices, loadContext[CPU] is enabled
_loadContext[ACTUALDEVICE].isEnabled = true;
if (_autoSContext->_modelPath.empty())
_loadContext[ACTUALDEVICE].networkPrecision = GetNetworkPrecision(_autoSContext->_network);
_loadContext[ACTUALDEVICE].metaDevices = _autoSContext->_devicePriorities;
bool isCumulative =
(_autoSContext->_performanceHint == IE::PluginConfigParams::CUMULATIVE_THROUGHPUT) ? true : false;
if (isCumulative) {
std::list<DeviceInformation> validDevices =
_autoSContext->_plugin->GetValidDevice(_autoSContext->_devicePriorities,
_loadContext[ACTUALDEVICE].networkPrecision);
std::string deviceName = "MULTI:";
for (auto& device : validDevices) {
deviceName += device.deviceName;
deviceName += ((device.deviceName == validDevices.back().deviceName) ? "" : ",");
if (validDevices.size() == 1) {
// When the hint is ctput and there is only one device, the single-device logic is used instead of
// the MULTI logic
_autoSContext->_performanceHint = IE::PluginConfigParams::THROUGHPUT;
_loadContext[ACTUALDEVICE].deviceInfo = validDevices.front();
_loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
IE::PluginConfigParams::THROUGHPUT;
isCumulative = false;
} else {
// When the hint is ctput and there are more than one device, the MULTI logic is used
std::string deviceName = "MULTI:";
for (auto& device : validDevices) {
deviceName += device.deviceName;
deviceName += ((device.deviceName == validDevices.back().deviceName) ? "" : ",");
}
_loadContext[ACTUALDEVICE].deviceInfo.deviceName = deviceName;
_loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT;
_loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERF_COUNT)] =
_autoSContext->_needPerfCounters ? InferenceEngine::PluginConfigParams::YES
: InferenceEngine::PluginConfigParams::NO;
if (_autoSContext->_bindBuffer)
_loadContext[ACTUALDEVICE].deviceInfo.config[ov::intel_auto::device_bind_buffer.name()] =
InferenceEngine::PluginConfigParams::YES;
}
_loadContext[ACTUALDEVICE].deviceInfo.deviceName = deviceName;
_loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT;
_loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERF_COUNT)] =
_autoSContext->_needPerfCounters ? InferenceEngine::PluginConfigParams::YES
: InferenceEngine::PluginConfigParams::NO;
if (_autoSContext->_bindBuffer)
_loadContext[ACTUALDEVICE].deviceInfo.config[ov::intel_auto::device_bind_buffer.name()] = InferenceEngine::PluginConfigParams::YES;
} else {
_loadContext[ACTUALDEVICE].deviceInfo = _autoSContext->_plugin->SelectDevice(_autoSContext->_devicePriorities,
_loadContext[ACTUALDEVICE].networkPrecision,
_autoSContext->_modelPriority);
_loadContext[ACTUALDEVICE].deviceInfo =
_autoSContext->_plugin->SelectDevice(_autoSContext->_devicePriorities,
_loadContext[ACTUALDEVICE].networkPrecision,
_autoSContext->_modelPriority);
}
LOG_INFO_TAG("select device:%s", _loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str());
bool isActualDevCPU =
_loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("CPU") !=std::string::npos && !isCumulative;
// if Actual device is CPU, disabled _loadContext[CPU], only use _loadContext[ACTUALDEVICE]
// if Actual device is CPU or perf_hint is cumulative, disabled _loadContext[CPU], only use _loadContext[ACTUALDEVICE]
if (isActualDevCPU || isCumulative) {
_loadContext[CPU].isEnabled = false;
} else {
@ -548,4 +561,4 @@ IInferPtr AutoSchedule::CreateInferRequest() {
syncRequestImpl,
execNetwork->_callbackExecutor);
}
} // namespace MultiDevicePlugin
} // namespace MultiDevicePlugin

View File

@ -0,0 +1,253 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <common_test_utils/test_constants.hpp>
#include <ngraph_functions/subgraph_builders.hpp>
#include "cpp/ie_plugin.hpp"
#include "plugin/mock_load_network_properties.hpp"
#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp"
#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp"
using ::testing::_;
using ::testing::MatcherCast;
using ::testing::NiceMock;
using ::testing::Return;
using ::testing::StrEq;
using namespace MockMultiDevice;
using Config = std::map<std::string, std::string>;
using ConfigParams = std::tuple<std::vector<std::string>>;
// define a matcher to check if perf hint expects
MATCHER_P(ComparePerfHint, perfHint, "Check if perf hint expects.") {
std::string arg_perfHint = "";
auto itor = arg.find(PluginConfigParams::KEY_PERFORMANCE_HINT);
if (itor != arg.end()) {
arg_perfHint = itor->second;
}
return perfHint == arg_perfHint;
}
class LoadNetworkWithCTPUTMockTest : public ::testing::TestWithParam<ConfigParams> {
public:
std::shared_ptr<NiceMock<MockICore>> core;
std::shared_ptr<NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>> plugin;
InferenceEngine::CNNNetwork simpleCnnNetwork;
// mock cpu exeNetwork
std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> cpuMockIExeNet;
ov::SoPtr<IExecutableNetworkInternal> cpuMockExeNetwork;
NiceMock<MockIInferencePlugin>* cpuMockIPlugin;
InferenceEngine::InferencePlugin cpuMockPlugin;
// mock gpu exeNetwork
std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> gpuMockIExeNet;
ov::SoPtr<IExecutableNetworkInternal> gpuMockExeNetwork;
NiceMock<MockIInferencePlugin>* gpuMockIPlugin;
InferenceEngine::InferencePlugin gpuMockPlugin;
std::shared_ptr<NiceMock<MockIInferRequestInternal>> inferReqInternal;
public:
static std::string getTestCaseName(testing::TestParamInfo<ConfigParams> obj) {
std::vector<std::string> targetDevices;
std::tie(targetDevices) = obj.param;
std::ostringstream result;
result << "ctput_loadnetwork_to_device_";
for (auto& device : targetDevices) {
if (device == targetDevices.back()) {
result << device;
} else {
result << device << "_";
}
}
return result.str();
}
void TearDown() override {
core.reset();
plugin.reset();
}
void SetUp() override {
// prepare cpuMockExeNetwork
cpuMockIExeNet = std::make_shared<NiceMock<MockIExecutableNetworkInternal>>();
auto cpuMockIPluginPtr = std::make_shared<NiceMock<MockIInferencePlugin>>();
ON_CALL(*cpuMockIPluginPtr, LoadNetwork(MatcherCast<const CNNNetwork&>(_), _))
.WillByDefault(Return(cpuMockIExeNet));
cpuMockPlugin = InferenceEngine::InferencePlugin{cpuMockIPluginPtr, {}};
// remove annoying ON CALL message
EXPECT_CALL(*cpuMockIPluginPtr, LoadNetwork(MatcherCast<const CNNNetwork&>(_), _)).Times(1);
cpuMockExeNetwork = cpuMockPlugin.LoadNetwork(CNNNetwork{}, {});
// prepare gpuMockExeNetwork
gpuMockIExeNet = std::make_shared<NiceMock<MockIExecutableNetworkInternal>>();
auto gpuMockIPluginPtr = std::make_shared<NiceMock<MockIInferencePlugin>>();
ON_CALL(*gpuMockIPluginPtr, LoadNetwork(MatcherCast<const CNNNetwork&>(_), _))
.WillByDefault(Return(gpuMockIExeNet));
gpuMockPlugin = InferenceEngine::InferencePlugin{gpuMockIPluginPtr, {}};
// remove annoying ON CALL message
EXPECT_CALL(*gpuMockIPluginPtr, LoadNetwork(MatcherCast<const CNNNetwork&>(_), _)).Times(1);
gpuMockExeNetwork = gpuMockPlugin.LoadNetwork(CNNNetwork{}, {});
// prepare mockicore and cnnNetwork for loading
core = std::shared_ptr<NiceMock<MockICore>>(new NiceMock<MockICore>());
auto* origin_plugin = new NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>();
plugin = std::shared_ptr<NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>>(origin_plugin);
// replace core with mock Icore
plugin->SetCore(core);
inferReqInternal = std::make_shared<NiceMock<MockIInferRequestInternal>>();
ON_CALL(*cpuMockIExeNet.get(), CreateInferRequest()).WillByDefault(Return(inferReqInternal));
ON_CALL(*gpuMockIExeNet.get(), CreateInferRequest()).WillByDefault(Return(inferReqInternal));
ON_CALL(*cpuMockIExeNet.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
.WillByDefault(Return("0"));
ON_CALL(*gpuMockIExeNet.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
.WillByDefault(Return("0"));
std::vector<std::string> availableDevs = {"CPU", "GPU"};
ON_CALL(*core, GetAvailableDevices()).WillByDefault(Return(availableDevs));
std::vector<std::string> metrics = {METRIC_KEY(SUPPORTED_CONFIG_KEYS)};
ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_METRICS)), _)).WillByDefault(Return(metrics));
std::vector<std::string> configKeys = {"SUPPORTED_CONFIG_KEYS", "NUM_STREAMS"};
ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)).WillByDefault(Return(configKeys));
ON_CALL(*plugin, ParseMetaDevices)
.WillByDefault(
[this](const std::string& priorityDevices, const std::map<std::string, std::string>& config) {
return plugin->MultiDeviceInferencePlugin::ParseMetaDevices(priorityDevices, config);
});
ON_CALL(*plugin, SelectDevice)
.WillByDefault([this](const std::vector<DeviceInformation>& metaDevices,
const std::string& netPrecision,
unsigned int priority) {
return plugin->MultiDeviceInferencePlugin::SelectDevice(metaDevices, netPrecision, priority);
});
ON_CALL(*plugin, GetValidDevice)
.WillByDefault([this](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
std::list<DeviceInformation> devices(metaDevices.begin(), metaDevices.end());
return devices;
});
ON_CALL(*core, GetSupportedConfig)
.WillByDefault([](const std::string& device, const std::map<std::string, std::string>& fullConfigs) {
auto item = fullConfigs.find(device);
Config deviceConfigs;
if (item != fullConfigs.end()) {
std::stringstream strConfigs(item->second);
ov::util::Read<Config>{}(strConfigs, deviceConfigs);
}
return deviceConfigs;
});
ON_CALL(*plugin, GetDeviceList).WillByDefault([this](const std::map<std::string, std::string>& config) {
return plugin->MultiDeviceInferencePlugin::GetDeviceList(config);
});
ON_CALL(*plugin, SelectDevice)
.WillByDefault([this](const std::vector<DeviceInformation>& metaDevices,
const std::string& netPrecision,
unsigned int Priority) {
return plugin->MultiDeviceInferencePlugin::SelectDevice(metaDevices, netPrecision, Priority);
});
std::vector<std::string> cpuCability{"FP32", "FP16", "INT8", "BIN"};
std::vector<std::string> gpuCability{"FP32", "FP16", "BATCHED_BLOB", "BIN", "INT8"};
ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_CPU), StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _))
.WillByDefault(Return(cpuCability));
ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_GPU), StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _))
.WillByDefault(Return(gpuCability));
ON_CALL(*core,
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>(StrEq("CPU")),
::testing::Matcher<const std::map<std::string, std::string>&>(_)))
.WillByDefault(Return(cpuMockExeNetwork));
ON_CALL(*core,
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>(StrEq("GPU")),
::testing::Matcher<const std::map<std::string, std::string>&>(_)))
.WillByDefault(Return(gpuMockExeNetwork));
std::shared_ptr<ngraph::Function> simpleNetwork = ngraph::builder::subgraph::makeSingleConv();
ASSERT_NO_THROW(simpleCnnNetwork = InferenceEngine::CNNNetwork(simpleNetwork));
}
};
TEST_P(LoadNetworkWithCTPUTMockTest, CTPUTSingleDevLogicTest) {
std::vector<std::string> targetDevices;
Config config;
std::tie(targetDevices) = this->GetParam();
plugin->SetName("AUTO");
config.insert({{CONFIG_KEY(PERFORMANCE_HINT), InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT}});
if (targetDevices.size() == 1) {
std::string targetDevice = targetDevices[0];
config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDevices[0]});
// Call single device logic and performance hint is THROUGHPUT
EXPECT_CALL(*core,
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>(targetDevice),
::testing::Matcher<const std::map<std::string, std::string>&>(
ComparePerfHint(InferenceEngine::PluginConfigParams::THROUGHPUT))))
.Times(1);
// no MULTI logic to be called
EXPECT_CALL(*core,
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>("MULTI:" + targetDevice),
::testing::Matcher<const std::map<std::string, std::string>&>(_)))
.Times(0);
// if target device only has GPU, no CPU helper to be called
if (targetDevice.find("GPU") != std::string::npos) {
EXPECT_CALL(*core,
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>(CommonTestUtils::DEVICE_CPU),
::testing::Matcher<const std::map<std::string, std::string>&>(
ComparePerfHint(InferenceEngine::PluginConfigParams::LATENCY))))
.Times(0);
}
} else {
std::string targetDev;
for (auto& deviceName : targetDevices) {
targetDev += deviceName;
targetDev += ((deviceName == targetDevices.back()) ? "" : ",");
}
config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev});
// Call MULTI logic
EXPECT_CALL(*core,
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>("MULTI:" + targetDev),
::testing::Matcher<const std::map<std::string, std::string>&>(_)))
.Times(1);
// no CPU helper to be called
EXPECT_CALL(*core,
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>(CommonTestUtils::DEVICE_CPU),
::testing::Matcher<const std::map<std::string, std::string>&>(
ComparePerfHint(InferenceEngine::PluginConfigParams::LATENCY))))
.Times(0);
}
ASSERT_NO_THROW(plugin->LoadExeNetworkImpl(simpleCnnNetwork, config));
}
const std::vector<ConfigParams> testConfigs = {
ConfigParams{{"CPU"}},
ConfigParams{{"GPU"}},
ConfigParams{{"CPU", "GPU"}},
ConfigParams{{"GPU", "CPU"}},
};
INSTANTIATE_TEST_SUITE_P(smoke_AutoMock_CTPUTSingleDevLogicTest,
LoadNetworkWithCTPUTMockTest,
::testing::ValuesIn(testConfigs),
LoadNetworkWithCTPUTMockTest::getTestCaseName);