avoiding layouts (#10560)

This commit is contained in:
Maxim Shevtsov 2022-02-22 12:15:19 +03:00 committed by GitHub
parent 100fff83bf
commit 5247fdfcaf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 122 additions and 24 deletions

View File

@ -30,19 +30,19 @@ using namespace InferenceEngine;
std::vector<std::string> supported_configKeys = {CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG), CONFIG_KEY(AUTO_BATCH_TIMEOUT)};
template <Precision::ePrecision precision>
Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob, size_t batch_id, size_t batch_num) {
Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob,
std::string name,
const std::set<std::string>& batched_names,
size_t batch_id,
size_t batch_num) {
typedef typename PrecisionTrait<precision>::value_type TYPE;
typedef typename std::add_pointer<TYPE>::type TYPEPTR;
auto ptr = batched_blob->buffer().as<TYPEPTR>();
auto sizePerBatch = batched_blob->size() / batch_num;
auto layout = batched_blob->getTensorDesc().getLayout();
SizeVector dims = batched_blob->getTensorDesc().getDims();
// for performance reason (copy avoidance) current impl of the auto-batching supports only batching by 0th dim
if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW ||
layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
layout == InferenceEngine::Layout::NDHWC) {
if (batched_names.count(name)) {
dims[0] = 1;
assert(batched_blob->getTensorDesc().getPrecision() == precision);
return make_shared_blob<TYPE>({precision, dims, batched_blob->getTensorDesc().getLayout()},
ptr + sizePerBatch * batch_id,
sizePerBatch);
@ -57,27 +57,32 @@ AutoBatchInferRequest::AutoBatchInferRequest(const std::vector<std::shared_ptr<c
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
int batch_id,
int num_batch)
int num_batch,
const std::set<std::string>& batchedInputs,
const std::set<std::string>& batchedOutputs)
: IInferRequestInternal(inputs, outputs),
_myBatchedRequestWrapper(workerRequest),
_batchId(batch_id),
_batchSize(num_batch) {
ShareBlobsWithBatchRequest();
ShareBlobsWithBatchRequest(batchedInputs, batchedOutputs);
}
AutoBatchInferRequest::AutoBatchInferRequest(const InputsDataMap& networkInputs,
const OutputsDataMap& networkOutputs,
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
int batch_id,
int num_batch)
int num_batch,
const std::set<std::string>& batchedInputs,
const std::set<std::string>& batchedOutputs)
: IInferRequestInternal(networkInputs, networkOutputs),
_myBatchedRequestWrapper(workerRequest),
_batchId(batch_id),
_batchSize(num_batch) {
ShareBlobsWithBatchRequest();
ShareBlobsWithBatchRequest(batchedInputs, batchedOutputs);
}
void AutoBatchInferRequest::ShareBlobsWithBatchRequest() {
void AutoBatchInferRequest::ShareBlobsWithBatchRequest(const std::set<std::string>& batchedInputs,
const std::set<std::string>& batchedOutputs) {
// Allocate all input blobs
for (const auto& it : _networkInputs) {
auto blob = _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first);
@ -86,78 +91,104 @@ void AutoBatchInferRequest::ShareBlobsWithBatchRequest() {
case InferenceEngine::Precision::FP32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::I32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::I8:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::I16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::U16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::U32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U32>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::FP64:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP64>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::FP16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::BF16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BF16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::U64:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U64>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::I64:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I64>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::U8:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::BOOL:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BOOL>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedInputs,
_batchId,
_batchSize);
break;
@ -174,78 +205,104 @@ void AutoBatchInferRequest::ShareBlobsWithBatchRequest() {
case InferenceEngine::Precision::FP32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::I32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::I8:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::I16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::U16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::U32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U32>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::FP64:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP64>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::FP16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::BF16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BF16>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::U64:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U64>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::I64:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I64>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::U8:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId,
_batchSize);
break;
case InferenceEngine::Precision::BOOL:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BOOL>(
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
it.first,
batchedOutputs,
_batchId,
_batchSize);
break;
@ -371,12 +428,16 @@ AutoBatchExecutableNetwork::AutoBatchExecutableNetwork(
const InferenceEngine::SoExecutableNetworkInternal& networkWithBatch,
const InferenceEngine::SoExecutableNetworkInternal& networkWithoutBatch,
const DeviceInformation& networkDevice,
const std::unordered_map<std::string, InferenceEngine::Parameter>& config)
const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
const std::set<std::string>& batchedInputs,
const std::set<std::string>& batchedOutputs)
: InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr,
std::make_shared<InferenceEngine::ImmediateExecutor>()),
_network{networkWithBatch},
_networkWithoutBatch{networkWithoutBatch},
_config{config} {
_config{config},
_batchedInputs(batchedInputs),
_batchedOutputs(batchedOutputs) {
// WA for gcc 4.8 ( fails compilation with member init-list)
_device = networkDevice;
auto time_out = config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT));
@ -411,7 +472,9 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
networkOutputs,
workerRequestPtrAndId.first,
workerRequestPtrAndId.second,
_device.batchForDevice);
_device.batchForDevice,
_batchedInputs,
_batchedOutputs);
}
InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl(
@ -427,7 +490,9 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
outputs,
workerRequestPtrAndId.first,
workerRequestPtrAndId.second,
_device.batchForDevice);
_device.batchForDevice,
_batchedInputs,
_batchedOutputs);
}
std::pair<AutoBatchExecutableNetwork::WorkerInferRequest&, int> AutoBatchExecutableNetwork::GetWorkerInferRequest() {
@ -761,6 +826,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
deviceConfigNoAutoBatch[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(NO);
std::set<std::string> batched_inputs;
std::set<std::string> batched_outputs;
// check that the auto-batching is applicable in general
try {
// if applicable, the Auto-Batching is implicitly enabled via the performance hints
@ -768,7 +834,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
const bool bTputInPlg = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>() == tput;
const auto& mode = deviceConfig.find(CONFIG_KEY(PERFORMANCE_HINT));
const bool bTputInLoadCfg = (mode != deviceConfig.end() && mode->second == tput);
// if the auto-batching is enabled implicitly, we shall check the dims carefully, to avoid outstanding failures
// if the auto-batching is enabled implicitly, check the dims carefully, to avoid outstanding failures
const bool check_dims = (bTputInPlg || bTputInLoadCfg);
CNNNetwork clonedNetwork(InferenceEngine::details::cloneNetwork(network));
auto function = clonedNetwork.getFunction();
@ -778,7 +844,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
m.register_pass<ov::pass::FindBatch>(true, check_dims);
m.run_passes(function);
// do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts
// input(s) should have the batch dim as the first dim or none (current limitation of the auto-batching impl)
// input(s) should have the batch dim as the first dim (current limitation of the auto-batching impl)
const auto& params = function->get_parameters();
for (size_t input_id = 0; input_id < params.size(); input_id++) {
const auto& input = params[input_id];
@ -801,8 +867,28 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
<< "Auto-batching operates only networks with inputs/outputs batched by 0th dimension";
}
}
if (!batched_inputs.size())
IE_THROW(NotImplemented) << "Auto-batching supports only networks with inputs featuring batched dim!";
const auto& results = function->get_results();
for (size_t output_id = 0; output_id < results.size(); output_id++) {
const auto& output = results[output_id];
const auto& shape = output->get_output_partial_shape(0);
// check the batch dim: either 0th (and the original batch size of 1) or none
if (shape.size() && ov::DimensionTracker::get_label(shape[0])) {
if (shape[0] != 1)
IE_THROW(NotImplemented) << "Auto-batching does not reshape/re-batch originally batched networks!";
const auto& node = output->input_value(0);
batched_outputs.insert(ngraph::op::util::get_ie_output_name(
ov::Output<const ov::Node>(node.get_node(), node.get_index())));
} else {
// if the 0-th dim is not for the batch, then we support only the case when NONE dimension is batch
for (size_t s = 1; s < shape.size(); s++)
if (ov::DimensionTracker::get_label(shape[s]))
IE_THROW(NotImplemented)
<< "Auto-batching operates only networks with outputs batched by 0th dimension";
}
}
if (!batched_inputs.size() || !batched_outputs.size())
IE_THROW(NotImplemented)
<< "Auto-batching supports only networks with inputs/outputs featuring batched dim!";
} catch (...) {
metaDevice.batchForDevice = 1;
}
@ -878,7 +964,9 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
return std::make_shared<AutoBatchExecutableNetwork>(executableNetworkWithBatch,
executableNetworkWithoutBatch,
metaDevice,
networkConfig);
networkConfig,
batched_inputs,
batched_outputs);
}
InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(

View File

@ -49,7 +49,9 @@ public:
const InferenceEngine::SoExecutableNetworkInternal& networkForDevice,
const InferenceEngine::SoExecutableNetworkInternal& networkForDeviceWithoutBatch,
const DeviceInformation& networkDevices,
const std::unordered_map<std::string, InferenceEngine::Parameter>& config);
const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
const std::set<std::string>& batchedIntputs,
const std::set<std::string>& batchedOutputs);
void SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) override;
InferenceEngine::Parameter GetConfig(const std::string& name) const override;
@ -80,6 +82,9 @@ protected:
bool _needPerfCounters = false;
std::atomic_size_t _numRequestsCreated = {0};
std::atomic_int _timeOut = {0}; // in ms
const std::set<std::string> _batchedInputs;
const std::set<std::string> _batchedOutputs;
};
class AutoBatchInferRequest : public InferenceEngine::IInferRequestInternal {
@ -89,12 +94,16 @@ public:
const InferenceEngine::OutputsDataMap& networkOutputs,
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr,
int batch_id,
int num_batch);
int num_batch,
const std::set<std::string>& batchedIntputs,
const std::set<std::string>& batchedOutputs);
explicit AutoBatchInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr,
int batch_id,
int num_batch);
int num_batch,
const std::set<std::string>& batchedIntputs,
const std::set<std::string>& batchedOutputs);
// Batch-Device impl specific: sets the data (blobs from the device request to the batched device request)
void SetBlobsToAnotherRequest(InferenceEngine::SoIInferRequestInternal& req);
@ -110,7 +119,8 @@ public:
protected:
void CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, InferenceEngine::Blob::Ptr dst, bool bInput);
void ShareBlobsWithBatchRequest();
void ShareBlobsWithBatchRequest(const std::set<std::string>& batchedIntputs,
const std::set<std::string>& batchedOutputs);
size_t _batchId;
size_t _batchSize;
};