Auto batch smart reshape (relies on the dim tracking) (#9964)

This commit is contained in:
Maxim Shevtsov 2022-02-10 20:43:06 +03:00 committed by GitHub
parent 510e5fb746
commit e41e1f51a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 93 additions and 96 deletions

View File

@ -5,21 +5,24 @@
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "auto_batch.hpp"
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
#include <ie_icore.hpp>
#include <ie_ngraph_utils.hpp>
#include <ie_performance_hints.hpp>
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <transformations/utils/utils.hpp>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
#include "dimension_tracker.hpp"
#include "ie_icore.hpp"
#include "ie_ngraph_utils.hpp"
#include "ie_performance_hints.hpp"
#include "openvino/pass/manager.hpp"
#include "openvino/runtime/intel_gpu/properties.hpp"
#include "transformations/common_optimizations/dimension_tracking.hpp"
#include "transformations/init_node_info.hpp"
#include "transformations/utils/utils.hpp"
namespace AutoBatchPlugin {
using namespace InferenceEngine;
@ -34,8 +37,7 @@ Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob, size
auto sizePerBatch = batched_blob->size() / batch_num;
auto layout = batched_blob->getTensorDesc().getLayout();
SizeVector dims = batched_blob->getTensorDesc().getDims();
// the below code is a placeholder for the WIP (22.1) functionality
// that will check the reshaping by the batch is robust (CVS-51744)
// for performance reason (copy avoidance) current impl of the auto-batching supports only batching by 0th dim
if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW ||
layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
layout == InferenceEngine::Layout::NDHWC) {
@ -393,7 +395,7 @@ unsigned int AutoBatchExecutableNetwork::ParseTimeoutValue(const std::string& s)
}
std::shared_ptr<InferenceEngine::RemoteContext> AutoBatchExecutableNetwork::GetContext() const {
return _network->GetContext();
return _networkWithoutBatch->GetContext();
}
InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl(
@ -504,12 +506,13 @@ std::pair<AutoBatchExecutableNetwork::WorkerInferRequest&, int> AutoBatchExecuta
}
InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequest() {
IInferRequestInternal::Ptr syncRequestImpl;
if (this->_plugin) {
const auto& core = _plugin->GetCore();
if (core && core->isNewAPI())
syncRequestImpl = CreateInferRequestImpl(_parameters, _results);
if (!_network) {
auto res = _networkWithoutBatch->CreateInferRequest();
res->setPointerToExecutableNetworkInternal(shared_from_this());
return res;
}
// trying to create the new API request first
IInferRequestInternal::Ptr syncRequestImpl = CreateInferRequestImpl(_parameters, _results);
if (!syncRequestImpl)
syncRequestImpl = CreateInferRequestImpl(_networkInputs, _networkOutputs);
syncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this());
@ -523,7 +526,8 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
}
std::shared_ptr<ngraph::Function> AutoBatchExecutableNetwork::GetExecGraphInfo() {
return _network->GetExecGraphInfo() ? _network->GetExecGraphInfo() : _networkWithoutBatch->GetExecGraphInfo();
return _network && _network->GetExecGraphInfo() ? _network->GetExecGraphInfo()
: _networkWithoutBatch->GetExecGraphInfo();
}
void AutoBatchExecutableNetwork::SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) {
@ -542,10 +546,10 @@ InferenceEngine::Parameter AutoBatchExecutableNetwork::GetConfig(const std::stri
return it->second;
} else {
// find config key among networks config keys
auto param = _network->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
auto param = _networkWithoutBatch->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
for (auto&& configKey : param.as<std::vector<std::string>>()) {
if (configKey == name) {
return _network->GetConfig(configKey);
return _networkWithoutBatch->GetConfig(configKey);
}
}
IE_THROW(NotFound) << name << " not found in the ExecutableNetwork config";
@ -556,18 +560,18 @@ InferenceEngine::Parameter AutoBatchExecutableNetwork::GetMetric(const std::stri
if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
auto reqs = 0;
try {
auto hint = _network->GetConfig(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as<std::string>();
auto hint = _networkWithoutBatch->GetConfig(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as<std::string>();
reqs = InferenceEngine::PerfHintsConfig::CheckPerformanceHintRequestValue(hint);
if (!reqs) // no limitations from user, let's deduce the full blown #requests
// (multiplied by the devices capabilities to run multiple <batched> requests for further perf)
reqs = _device.batchForDevice *
_network->GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
_networkWithoutBatch->GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
} catch (const InferenceEngine::Exception& iie) {
}
reqs = std::max(reqs, _device.batchForDevice); // round up to the possible user's value
IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, reqs);
} else if (name == METRIC_KEY(NETWORK_NAME)) {
IE_SET_METRIC_RETURN(NETWORK_NAME, _network->GetMetric(METRIC_KEY(NETWORK_NAME)).as<std::string>());
IE_SET_METRIC_RETURN(NETWORK_NAME, _networkWithoutBatch->GetMetric(METRIC_KEY(NETWORK_NAME)).as<std::string>());
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
IE_SET_METRIC_RETURN(SUPPORTED_METRICS,
{METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS),
@ -649,12 +653,15 @@ RemoteContext::Ptr AutoBatchInferencePlugin::CreateContext(const InferenceEngine
auto cfg = config;
auto it = cfg.find(CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG));
if (it == cfg.end())
IE_THROW() << "Value for KEY_AUTO_BATCH is not set";
IE_THROW() << "Value for KEY_AUTO_BATCH_DEVICE_CONFIG is not set";
auto val = it->second.as<std::string>();
auto core = GetCore();
if (!core)
return nullptr;
auto metaDevice = ParseMetaDevice(val, std::map<std::string, std::string>());
cfg.erase(it);
return GetCore()->CreateContext(metaDevice.deviceName, cfg);
return core->CreateContext(metaDevice.deviceName, cfg);
}
Parameter AutoBatchInferencePlugin::GetConfig(const std::string& name,
@ -735,64 +742,70 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
const InferenceEngine::CNNNetwork& network,
const std::shared_ptr<InferenceEngine::RemoteContext> ctx,
const std::map<std::string, std::string>& config) {
if (GetCore() == nullptr) {
auto core = GetCore();
if (core == nullptr) {
IE_THROW() << "Please, work with Auto-Batching device via InferencEngine::Core object";
}
auto fullConfig = mergeConfigs(_config, config);
auto device_batch = fullConfig.find(CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG));
if (device_batch == fullConfig.end()) {
IE_THROW() << "KEY_AUTO_BATCH key is not set for BATCH device";
}
auto metaDevice = ParseMetaDevice(device_batch->second, fullConfig);
const auto& deviceName = metaDevice.deviceName;
const auto& deviceConfig = metaDevice.config;
auto config_without_autobatch = config, deviceConfigNoAutoBatch = deviceConfig;
auto deviceConfigNoAutoBatch = deviceConfig;
// avoid recursive auto-batching
config_without_autobatch[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(NO);
deviceConfigNoAutoBatch[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(NO);
auto function = network.getFunction();
std::set<std::string> batched_inputs;
// check that the auto-batching is applicable in general
try {
// if applicable, the Auto-Batching is implicitly enabled via the performance hints
const auto tput = CONFIG_VALUE(THROUGHPUT);
const bool bTputInPlg = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>() == tput;
const auto& mode = deviceConfig.find(CONFIG_KEY(PERFORMANCE_HINT));
const bool bTputInLoadCfg = (mode != deviceConfig.end() && mode->second == tput);
// if the auto-batching is enabled implicitly, we shall check the dims carefully, to avoid outstanding failures
const bool check_dims = (bTputInPlg || bTputInLoadCfg);
CNNNetwork clonedNetwork(InferenceEngine::details::cloneNetwork(network));
auto function = clonedNetwork.getFunction();
// find the batch dim
ov::pass::Manager m;
m.register_pass<ngraph::pass::InitNodeInfo>();
if (check_dims)
m.register_pass<ov::pass::FindBatch>();
else
m.register_pass<ov::pass::FindBatchDontTrack>();
m.run_passes(function);
// do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts
// the below code is a placeholder for the WIP (22.1) functionality
// that will check the reshaping by the batch is robust (CVS-51744)
const InputsDataMap inputInfo = network.getInputsInfo();
bool atLeastOneInputIsBatched = false;
for (const InputsDataMap::value_type& item : inputInfo) {
auto layout = item.second->getTensorDesc().getLayout();
if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW ||
layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
layout == InferenceEngine::Layout::NDHWC) {
if (1 != item.second->getTensorDesc().getDims()[0]) // do not reshape/re-batch batched networks
// input(s) should have the batch dim as the first dim or none (current limitation of the auto-batching impl)
const auto& params = function->get_parameters();
for (size_t input_id = 0; input_id < params.size(); input_id++) {
const auto& input = params[input_id];
const auto& shape = input->get_partial_shape();
// currently no plugin support batched execution for dynamic networks
if (shape.is_dynamic())
IE_THROW(NotImplemented) << "Auto-batching does not support dynamic networks!";
// check the batch dim: either 0th (and the original batch size of 1) or none
if (shape.size() && ov::DimensionTracker::get_label(shape[0])) {
const auto& static_shape = input->get_shape();
if (static_shape[0] != 1)
IE_THROW(NotImplemented) << "Auto-batching does not reshape/re-batch originally batched networks!";
else
atLeastOneInputIsBatched = true;
batched_inputs.insert(
ngraph::op::util::get_ie_output_name(params[input_id]->output(0))); // batched dim for the input
} else {
// if the 0-th dim is not for the batch, then we support only the case when NONE dimension is batch
for (size_t s = 1; s < shape.size(); s++)
if (ov::DimensionTracker::get_label(shape[s]))
IE_THROW(NotImplemented)
<< "Auto-batching operates only networks with inputs/outputs batched by 0th dimension";
}
}
bool atLeastOneOutputIsBatched = false;
const OutputsDataMap outputInfo = network.getOutputsInfo();
for (const OutputsDataMap::value_type& item : outputInfo) {
auto layout = item.second->getTensorDesc().getLayout();
if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW ||
layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
layout == InferenceEngine::Layout::NDHWC) {
if (1 != item.second->getTensorDesc().getDims()[0]) // do not reshape/re-batch batched networks
IE_THROW(NotImplemented) << "Auto-batching does not reshape/re-batch originally batched networks!";
else
atLeastOneOutputIsBatched = true;
}
}
if (!atLeastOneInputIsBatched || !atLeastOneOutputIsBatched)
IE_THROW(NotImplemented)
<< "Auto-batching supports only networks featuring inputs/outputs with the batched layouts !";
if (!batched_inputs.size())
IE_THROW(NotImplemented) << "Auto-batching supports only networks with inputs featuring batched dim!";
} catch (...) {
// fallback to loading as if no Auto-Batching was involved
auto res = GetCore()->LoadNetwork(network, deviceName, deviceConfigNoAutoBatch);
_additionalSOPtrs.push_back(res._so);
return res._ptr;
metaDevice.batchForDevice = 1;
}
if (!metaDevice.batchForDevice) {
@ -802,9 +815,8 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
// let's query the optimal batch size
std::map<std::string, InferenceEngine::Parameter> options;
options["MODEL_PTR"] = std::const_pointer_cast<ngraph::Function>(network.getFunction());
auto optBatchSize =
GetCore()->GetMetric(deviceName, METRIC_KEY(OPTIMAL_BATCH_SIZE), options).as<unsigned int>();
auto res = GetCore()->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as<std::string>();
auto optBatchSize = core->GetMetric(deviceName, METRIC_KEY(OPTIMAL_BATCH_SIZE), options).as<unsigned int>();
auto res = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as<std::string>();
requests = PerfHintsConfig::CheckPerformanceHintRequestValue(res);
const auto& reqs = config.find(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS));
if (reqs != config.end())
@ -817,8 +829,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
const auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT);
const auto perfConfigInTargetPlugin =
GetCore()->GetConfig(deviceName, PluginConfigParams::KEY_PERF_COUNT).as<std::string>() ==
PluginConfigParams::YES;
core->GetConfig(deviceName, PluginConfigParams::KEY_PERF_COUNT).as<std::string>() == PluginConfigParams::YES;
const bool enablePerfCounters = perfConfigInTargetPlugin || ((fullConfig.end() != perfConfig) &&
(perfConfig->second == PluginConfigParams::YES));
auto report_footprint = [](std::shared_ptr<ICore> pCore, std::string device) -> size_t {
@ -833,11 +844,11 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
size_t batch1_footprint = 0;
if (deviceName.find("GPU") != std::string::npos)
batch1_footprint = report_footprint(GetCore(), deviceName);
auto executableNetworkWithoutBatch = ctx ? GetCore()->LoadNetwork(network, ctx, deviceConfigNoAutoBatch)
: GetCore()->LoadNetwork(network, deviceName, deviceConfigNoAutoBatch);
batch1_footprint = report_footprint(core, deviceName);
auto executableNetworkWithoutBatch = ctx ? core->LoadNetwork(network, ctx, deviceConfigNoAutoBatch)
: core->LoadNetwork(network, deviceName, deviceConfigNoAutoBatch);
if (deviceName.find("GPU") != std::string::npos) {
batch1_footprint = report_footprint(GetCore(), deviceName) - batch1_footprint;
batch1_footprint = report_footprint(core, deviceName) - batch1_footprint;
if (batch1_footprint) {
const auto total_mem =
GetCore()->GetMetric(deviceName, GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE)).as<uint64_t>();
@ -855,36 +866,20 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
}
InferenceEngine::SoExecutableNetworkInternal executableNetworkWithBatch;
if (metaDevice.batchForDevice > 1) {
if (metaDevice.batchForDevice > 1 && batched_inputs.size()) {
try {
CNNNetwork clonedNetwork(InferenceEngine::details::cloneNetwork(network));
const InputsDataMap inputInfo = clonedNetwork.getInputsInfo();
ICNNNetwork::InputShapes shapes = clonedNetwork.getInputShapes();
for (const InputsDataMap::value_type& item : inputInfo) {
auto layout = item.second->getTensorDesc().getLayout();
// the below code is a placeholder for the WIP (22.1) functionality
// that will check the reshaping by the batch is robust (CVS-51744)
if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW ||
layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
layout == InferenceEngine::Layout::NDHWC) {
assert(1 == shapes[item.first][0]); // do not reshape/re-batch originally batched networks
shapes[item.first][0] = metaDevice.batchForDevice;
}
}
clonedNetwork.reshape(shapes);
executableNetworkWithBatch =
ctx ? GetCore()->LoadNetwork(CNNNetwork{clonedNetwork}, ctx, deviceConfigNoAutoBatch)
: GetCore()->LoadNetwork(CNNNetwork{clonedNetwork}, deviceName, deviceConfigNoAutoBatch);
CNNNetwork reshaped(InferenceEngine::details::cloneNetwork(network));
ICNNNetwork::InputShapes shapes = reshaped.getInputShapes();
for (const auto& input : batched_inputs)
shapes[input][0] = metaDevice.batchForDevice;
reshaped.reshape(shapes);
executableNetworkWithBatch = ctx ? core->LoadNetwork(reshaped, ctx, deviceConfigNoAutoBatch)
: core->LoadNetwork(reshaped, deviceName, deviceConfigNoAutoBatch);
} catch (...) {
executableNetworkWithBatch = {nullptr, nullptr};
metaDevice.batchForDevice = 1;
}
}
if (!executableNetworkWithBatch) {
executableNetworkWithBatch = executableNetworkWithoutBatch;
metaDevice.batchForDevice = 1;
}
return std::make_shared<AutoBatchExecutableNetwork>(executableNetworkWithBatch,
executableNetworkWithoutBatch,
metaDevice,
@ -902,13 +897,16 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadE
InferenceEngine::QueryNetworkResult AutoBatchInferencePlugin::QueryNetwork(
const InferenceEngine::CNNNetwork& network,
const std::map<std::string, std::string>& config) const {
auto core = GetCore();
if (!core)
return InferenceEngine::QueryNetworkResult();
auto cfg = config;
for (auto c : cfg) {
if (c.first == CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG)) {
auto val = c.second;
cfg.erase(c.first);
auto metaDevice = ParseMetaDevice(val, cfg);
return GetCore()->QueryNetwork(network, metaDevice.deviceName, cfg);
return core->QueryNetwork(network, metaDevice.deviceName, cfg);
}
}
IE_THROW() << "Value for KEY_AUTO_BATCH is not set";

View File

@ -173,7 +173,6 @@ protected:
const InferenceEngine::CNNNetwork& network,
const std::shared_ptr<InferenceEngine::RemoteContext> context,
const std::map<std::string, std::string>& config);
std::vector<std::shared_ptr<void>> _additionalSOPtrs;
};
} // namespace AutoBatchPlugin