avoiding layouts (#10560)

This commit is contained in:
Maxim Shevtsov 2022-02-22 12:15:19 +03:00 committed by GitHub
parent 100fff83bf
commit 5247fdfcaf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 122 additions and 24 deletions

View File

@ -30,19 +30,19 @@ using namespace InferenceEngine;
std::vector<std::string> supported_configKeys = {CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG), CONFIG_KEY(AUTO_BATCH_TIMEOUT)}; std::vector<std::string> supported_configKeys = {CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG), CONFIG_KEY(AUTO_BATCH_TIMEOUT)};
template <Precision::ePrecision precision> template <Precision::ePrecision precision>
Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob, size_t batch_id, size_t batch_num) { Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob,
std::string name,
const std::set<std::string>& batched_names,
size_t batch_id,
size_t batch_num) {
typedef typename PrecisionTrait<precision>::value_type TYPE; typedef typename PrecisionTrait<precision>::value_type TYPE;
typedef typename std::add_pointer<TYPE>::type TYPEPTR; typedef typename std::add_pointer<TYPE>::type TYPEPTR;
auto ptr = batched_blob->buffer().as<TYPEPTR>(); auto ptr = batched_blob->buffer().as<TYPEPTR>();
auto sizePerBatch = batched_blob->size() / batch_num; auto sizePerBatch = batched_blob->size() / batch_num;
auto layout = batched_blob->getTensorDesc().getLayout();
SizeVector dims = batched_blob->getTensorDesc().getDims(); SizeVector dims = batched_blob->getTensorDesc().getDims();
// for performance reason (copy avoidance) current impl of the auto-batching supports only batching by 0th dim // for performance reason (copy avoidance) current impl of the auto-batching supports only batching by 0th dim
if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW || if (batched_names.count(name)) {
layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
layout == InferenceEngine::Layout::NDHWC) {
dims[0] = 1; dims[0] = 1;
assert(batched_blob->getTensorDesc().getPrecision() == precision);
return make_shared_blob<TYPE>({precision, dims, batched_blob->getTensorDesc().getLayout()}, return make_shared_blob<TYPE>({precision, dims, batched_blob->getTensorDesc().getLayout()},
ptr + sizePerBatch * batch_id, ptr + sizePerBatch * batch_id,
sizePerBatch); sizePerBatch);
@ -57,27 +57,32 @@ AutoBatchInferRequest::AutoBatchInferRequest(const std::vector<std::shared_ptr<c
const std::vector<std::shared_ptr<const ov::Node>>& outputs, const std::vector<std::shared_ptr<const ov::Node>>& outputs,
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest, AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
int batch_id, int batch_id,
int num_batch) int num_batch,
const std::set<std::string>& batchedInputs,
const std::set<std::string>& batchedOutputs)
: IInferRequestInternal(inputs, outputs), : IInferRequestInternal(inputs, outputs),
_myBatchedRequestWrapper(workerRequest), _myBatchedRequestWrapper(workerRequest),
_batchId(batch_id), _batchId(batch_id),
_batchSize(num_batch) { _batchSize(num_batch) {
ShareBlobsWithBatchRequest(); ShareBlobsWithBatchRequest(batchedInputs, batchedOutputs);
} }
AutoBatchInferRequest::AutoBatchInferRequest(const InputsDataMap& networkInputs, AutoBatchInferRequest::AutoBatchInferRequest(const InputsDataMap& networkInputs,
const OutputsDataMap& networkOutputs, const OutputsDataMap& networkOutputs,
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest, AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
int batch_id, int batch_id,
int num_batch) int num_batch,
const std::set<std::string>& batchedInputs,
const std::set<std::string>& batchedOutputs)
: IInferRequestInternal(networkInputs, networkOutputs), : IInferRequestInternal(networkInputs, networkOutputs),
_myBatchedRequestWrapper(workerRequest), _myBatchedRequestWrapper(workerRequest),
_batchId(batch_id), _batchId(batch_id),
_batchSize(num_batch) { _batchSize(num_batch) {
ShareBlobsWithBatchRequest(); ShareBlobsWithBatchRequest(batchedInputs, batchedOutputs);
} }
void AutoBatchInferRequest::ShareBlobsWithBatchRequest() { void AutoBatchInferRequest::ShareBlobsWithBatchRequest(const std::set<std::string>& batchedInputs,
const std::set<std::string>& batchedOutputs) {
// Allocate all input blobs // Allocate all input blobs
for (const auto& it : _networkInputs) { for (const auto& it : _networkInputs) {
auto blob = _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first); auto blob = _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first);
@ -86,78 +91,104 @@ void AutoBatchInferRequest::ShareBlobsWithBatchRequest() {
case InferenceEngine::Precision::FP32: case InferenceEngine::Precision::FP32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::I32: case InferenceEngine::Precision::I32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::I8: case InferenceEngine::Precision::I8:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::I16: case InferenceEngine::Precision::I16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::U16: case InferenceEngine::Precision::U16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::U32: case InferenceEngine::Precision::U32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U32>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U32>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::FP64: case InferenceEngine::Precision::FP64:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP64>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP64>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::FP16: case InferenceEngine::Precision::FP16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP16>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::BF16: case InferenceEngine::Precision::BF16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BF16>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BF16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::U64: case InferenceEngine::Precision::U64:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U64>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U64>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::I64: case InferenceEngine::Precision::I64:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I64>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I64>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::U8: case InferenceEngine::Precision::U8:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::BOOL: case InferenceEngine::Precision::BOOL:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BOOL>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BOOL>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
@ -174,78 +205,104 @@ void AutoBatchInferRequest::ShareBlobsWithBatchRequest() {
case InferenceEngine::Precision::FP32: case InferenceEngine::Precision::FP32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::I32: case InferenceEngine::Precision::I32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::I8: case InferenceEngine::Precision::I8:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::I16: case InferenceEngine::Precision::I16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::U16: case InferenceEngine::Precision::U16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::U32: case InferenceEngine::Precision::U32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U32>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U32>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::FP64: case InferenceEngine::Precision::FP64:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP64>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP64>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::FP16: case InferenceEngine::Precision::FP16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP16>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::BF16: case InferenceEngine::Precision::BF16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BF16>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BF16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::U64: case InferenceEngine::Precision::U64:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U64>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U64>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::I64: case InferenceEngine::Precision::I64:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I64>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I64>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::U8: case InferenceEngine::Precision::U8:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
case InferenceEngine::Precision::BOOL: case InferenceEngine::Precision::BOOL:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BOOL>( res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BOOL>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId, _batchId,
_batchSize); _batchSize);
break; break;
@ -371,12 +428,16 @@ AutoBatchExecutableNetwork::AutoBatchExecutableNetwork(
const InferenceEngine::SoExecutableNetworkInternal& networkWithBatch, const InferenceEngine::SoExecutableNetworkInternal& networkWithBatch,
const InferenceEngine::SoExecutableNetworkInternal& networkWithoutBatch, const InferenceEngine::SoExecutableNetworkInternal& networkWithoutBatch,
const DeviceInformation& networkDevice, const DeviceInformation& networkDevice,
const std::unordered_map<std::string, InferenceEngine::Parameter>& config) const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
const std::set<std::string>& batchedInputs,
const std::set<std::string>& batchedOutputs)
: InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, : InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr,
std::make_shared<InferenceEngine::ImmediateExecutor>()), std::make_shared<InferenceEngine::ImmediateExecutor>()),
_network{networkWithBatch}, _network{networkWithBatch},
_networkWithoutBatch{networkWithoutBatch}, _networkWithoutBatch{networkWithoutBatch},
_config{config} { _config{config},
_batchedInputs(batchedInputs),
_batchedOutputs(batchedOutputs) {
// WA for gcc 4.8 ( fails compilation with member init-list) // WA for gcc 4.8 ( fails compilation with member init-list)
_device = networkDevice; _device = networkDevice;
auto time_out = config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT)); auto time_out = config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT));
@ -411,7 +472,9 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
networkOutputs, networkOutputs,
workerRequestPtrAndId.first, workerRequestPtrAndId.first,
workerRequestPtrAndId.second, workerRequestPtrAndId.second,
_device.batchForDevice); _device.batchForDevice,
_batchedInputs,
_batchedOutputs);
} }
InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl( InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl(
@ -427,7 +490,9 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
outputs, outputs,
workerRequestPtrAndId.first, workerRequestPtrAndId.first,
workerRequestPtrAndId.second, workerRequestPtrAndId.second,
_device.batchForDevice); _device.batchForDevice,
_batchedInputs,
_batchedOutputs);
} }
std::pair<AutoBatchExecutableNetwork::WorkerInferRequest&, int> AutoBatchExecutableNetwork::GetWorkerInferRequest() { std::pair<AutoBatchExecutableNetwork::WorkerInferRequest&, int> AutoBatchExecutableNetwork::GetWorkerInferRequest() {
@ -761,6 +826,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
deviceConfigNoAutoBatch[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(NO); deviceConfigNoAutoBatch[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(NO);
std::set<std::string> batched_inputs; std::set<std::string> batched_inputs;
std::set<std::string> batched_outputs;
// check that the auto-batching is applicable in general // check that the auto-batching is applicable in general
try { try {
// if applicable, the Auto-Batching is implicitly enabled via the performance hints // if applicable, the Auto-Batching is implicitly enabled via the performance hints
@ -768,7 +834,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
const bool bTputInPlg = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>() == tput; const bool bTputInPlg = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>() == tput;
const auto& mode = deviceConfig.find(CONFIG_KEY(PERFORMANCE_HINT)); const auto& mode = deviceConfig.find(CONFIG_KEY(PERFORMANCE_HINT));
const bool bTputInLoadCfg = (mode != deviceConfig.end() && mode->second == tput); const bool bTputInLoadCfg = (mode != deviceConfig.end() && mode->second == tput);
// if the auto-batching is enabled implicitly, we shall check the dims carefully, to avoid outstanding failures // if the auto-batching is enabled implicitly, check the dims carefully, to avoid outstanding failures
const bool check_dims = (bTputInPlg || bTputInLoadCfg); const bool check_dims = (bTputInPlg || bTputInLoadCfg);
CNNNetwork clonedNetwork(InferenceEngine::details::cloneNetwork(network)); CNNNetwork clonedNetwork(InferenceEngine::details::cloneNetwork(network));
auto function = clonedNetwork.getFunction(); auto function = clonedNetwork.getFunction();
@ -778,7 +844,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
m.register_pass<ov::pass::FindBatch>(true, check_dims); m.register_pass<ov::pass::FindBatch>(true, check_dims);
m.run_passes(function); m.run_passes(function);
// do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts // do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts
// input(s) should have the batch dim as the first dim or none (current limitation of the auto-batching impl) // input(s) should have the batch dim as the first dim (current limitation of the auto-batching impl)
const auto& params = function->get_parameters(); const auto& params = function->get_parameters();
for (size_t input_id = 0; input_id < params.size(); input_id++) { for (size_t input_id = 0; input_id < params.size(); input_id++) {
const auto& input = params[input_id]; const auto& input = params[input_id];
@ -801,8 +867,28 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
<< "Auto-batching operates only networks with inputs/outputs batched by 0th dimension"; << "Auto-batching operates only networks with inputs/outputs batched by 0th dimension";
} }
} }
if (!batched_inputs.size()) const auto& results = function->get_results();
IE_THROW(NotImplemented) << "Auto-batching supports only networks with inputs featuring batched dim!"; for (size_t output_id = 0; output_id < results.size(); output_id++) {
const auto& output = results[output_id];
const auto& shape = output->get_output_partial_shape(0);
// check the batch dim: either 0th (and the original batch size of 1) or none
if (shape.size() && ov::DimensionTracker::get_label(shape[0])) {
if (shape[0] != 1)
IE_THROW(NotImplemented) << "Auto-batching does not reshape/re-batch originally batched networks!";
const auto& node = output->input_value(0);
batched_outputs.insert(ngraph::op::util::get_ie_output_name(
ov::Output<const ov::Node>(node.get_node(), node.get_index())));
} else {
// if the 0-th dim is not for the batch, then we support only the case when NONE dimension is batch
for (size_t s = 1; s < shape.size(); s++)
if (ov::DimensionTracker::get_label(shape[s]))
IE_THROW(NotImplemented)
<< "Auto-batching operates only networks with outputs batched by 0th dimension";
}
}
if (!batched_inputs.size() || !batched_outputs.size())
IE_THROW(NotImplemented)
<< "Auto-batching supports only networks with inputs/outputs featuring batched dim!";
} catch (...) { } catch (...) {
metaDevice.batchForDevice = 1; metaDevice.batchForDevice = 1;
} }
@ -878,7 +964,9 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
return std::make_shared<AutoBatchExecutableNetwork>(executableNetworkWithBatch, return std::make_shared<AutoBatchExecutableNetwork>(executableNetworkWithBatch,
executableNetworkWithoutBatch, executableNetworkWithoutBatch,
metaDevice, metaDevice,
networkConfig); networkConfig,
batched_inputs,
batched_outputs);
} }
InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl( InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(

View File

@ -49,7 +49,9 @@ public:
const InferenceEngine::SoExecutableNetworkInternal& networkForDevice, const InferenceEngine::SoExecutableNetworkInternal& networkForDevice,
const InferenceEngine::SoExecutableNetworkInternal& networkForDeviceWithoutBatch, const InferenceEngine::SoExecutableNetworkInternal& networkForDeviceWithoutBatch,
const DeviceInformation& networkDevices, const DeviceInformation& networkDevices,
const std::unordered_map<std::string, InferenceEngine::Parameter>& config); const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
const std::set<std::string>& batchedIntputs,
const std::set<std::string>& batchedOutputs);
void SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) override; void SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) override;
InferenceEngine::Parameter GetConfig(const std::string& name) const override; InferenceEngine::Parameter GetConfig(const std::string& name) const override;
@ -80,6 +82,9 @@ protected:
bool _needPerfCounters = false; bool _needPerfCounters = false;
std::atomic_size_t _numRequestsCreated = {0}; std::atomic_size_t _numRequestsCreated = {0};
std::atomic_int _timeOut = {0}; // in ms std::atomic_int _timeOut = {0}; // in ms
const std::set<std::string> _batchedInputs;
const std::set<std::string> _batchedOutputs;
}; };
class AutoBatchInferRequest : public InferenceEngine::IInferRequestInternal { class AutoBatchInferRequest : public InferenceEngine::IInferRequestInternal {
@ -89,12 +94,16 @@ public:
const InferenceEngine::OutputsDataMap& networkOutputs, const InferenceEngine::OutputsDataMap& networkOutputs,
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr, AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr,
int batch_id, int batch_id,
int num_batch); int num_batch,
const std::set<std::string>& batchedIntputs,
const std::set<std::string>& batchedOutputs);
explicit AutoBatchInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs, explicit AutoBatchInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs, const std::vector<std::shared_ptr<const ov::Node>>& outputs,
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr, AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr,
int batch_id, int batch_id,
int num_batch); int num_batch,
const std::set<std::string>& batchedIntputs,
const std::set<std::string>& batchedOutputs);
// Batch-Device impl specific: sets the data (blobs from the device request to the batched device request) // Batch-Device impl specific: sets the data (blobs from the device request to the batched device request)
void SetBlobsToAnotherRequest(InferenceEngine::SoIInferRequestInternal& req); void SetBlobsToAnotherRequest(InferenceEngine::SoIInferRequestInternal& req);
@ -110,7 +119,8 @@ public:
protected: protected:
void CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, InferenceEngine::Blob::Ptr dst, bool bInput); void CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, InferenceEngine::Blob::Ptr dst, bool bInput);
void ShareBlobsWithBatchRequest(); void ShareBlobsWithBatchRequest(const std::set<std::string>& batchedIntputs,
const std::set<std::string>& batchedOutputs);
size_t _batchId; size_t _batchId;
size_t _batchSize; size_t _batchSize;
}; };