avoiding layouts (#10560)
This commit is contained in:
parent
100fff83bf
commit
5247fdfcaf
@ -30,19 +30,19 @@ using namespace InferenceEngine;
|
|||||||
std::vector<std::string> supported_configKeys = {CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG), CONFIG_KEY(AUTO_BATCH_TIMEOUT)};
|
std::vector<std::string> supported_configKeys = {CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG), CONFIG_KEY(AUTO_BATCH_TIMEOUT)};
|
||||||
|
|
||||||
template <Precision::ePrecision precision>
|
template <Precision::ePrecision precision>
|
||||||
Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob, size_t batch_id, size_t batch_num) {
|
Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob,
|
||||||
|
std::string name,
|
||||||
|
const std::set<std::string>& batched_names,
|
||||||
|
size_t batch_id,
|
||||||
|
size_t batch_num) {
|
||||||
typedef typename PrecisionTrait<precision>::value_type TYPE;
|
typedef typename PrecisionTrait<precision>::value_type TYPE;
|
||||||
typedef typename std::add_pointer<TYPE>::type TYPEPTR;
|
typedef typename std::add_pointer<TYPE>::type TYPEPTR;
|
||||||
auto ptr = batched_blob->buffer().as<TYPEPTR>();
|
auto ptr = batched_blob->buffer().as<TYPEPTR>();
|
||||||
auto sizePerBatch = batched_blob->size() / batch_num;
|
auto sizePerBatch = batched_blob->size() / batch_num;
|
||||||
auto layout = batched_blob->getTensorDesc().getLayout();
|
|
||||||
SizeVector dims = batched_blob->getTensorDesc().getDims();
|
SizeVector dims = batched_blob->getTensorDesc().getDims();
|
||||||
// for performance reason (copy avoidance) current impl of the auto-batching supports only batching by 0th dim
|
// for performance reason (copy avoidance) current impl of the auto-batching supports only batching by 0th dim
|
||||||
if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW ||
|
if (batched_names.count(name)) {
|
||||||
layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
|
|
||||||
layout == InferenceEngine::Layout::NDHWC) {
|
|
||||||
dims[0] = 1;
|
dims[0] = 1;
|
||||||
assert(batched_blob->getTensorDesc().getPrecision() == precision);
|
|
||||||
return make_shared_blob<TYPE>({precision, dims, batched_blob->getTensorDesc().getLayout()},
|
return make_shared_blob<TYPE>({precision, dims, batched_blob->getTensorDesc().getLayout()},
|
||||||
ptr + sizePerBatch * batch_id,
|
ptr + sizePerBatch * batch_id,
|
||||||
sizePerBatch);
|
sizePerBatch);
|
||||||
@ -57,27 +57,32 @@ AutoBatchInferRequest::AutoBatchInferRequest(const std::vector<std::shared_ptr<c
|
|||||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
|
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
|
||||||
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
|
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
|
||||||
int batch_id,
|
int batch_id,
|
||||||
int num_batch)
|
int num_batch,
|
||||||
|
const std::set<std::string>& batchedInputs,
|
||||||
|
const std::set<std::string>& batchedOutputs)
|
||||||
: IInferRequestInternal(inputs, outputs),
|
: IInferRequestInternal(inputs, outputs),
|
||||||
_myBatchedRequestWrapper(workerRequest),
|
_myBatchedRequestWrapper(workerRequest),
|
||||||
_batchId(batch_id),
|
_batchId(batch_id),
|
||||||
_batchSize(num_batch) {
|
_batchSize(num_batch) {
|
||||||
ShareBlobsWithBatchRequest();
|
ShareBlobsWithBatchRequest(batchedInputs, batchedOutputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
AutoBatchInferRequest::AutoBatchInferRequest(const InputsDataMap& networkInputs,
|
AutoBatchInferRequest::AutoBatchInferRequest(const InputsDataMap& networkInputs,
|
||||||
const OutputsDataMap& networkOutputs,
|
const OutputsDataMap& networkOutputs,
|
||||||
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
|
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
|
||||||
int batch_id,
|
int batch_id,
|
||||||
int num_batch)
|
int num_batch,
|
||||||
|
const std::set<std::string>& batchedInputs,
|
||||||
|
const std::set<std::string>& batchedOutputs)
|
||||||
: IInferRequestInternal(networkInputs, networkOutputs),
|
: IInferRequestInternal(networkInputs, networkOutputs),
|
||||||
_myBatchedRequestWrapper(workerRequest),
|
_myBatchedRequestWrapper(workerRequest),
|
||||||
_batchId(batch_id),
|
_batchId(batch_id),
|
||||||
_batchSize(num_batch) {
|
_batchSize(num_batch) {
|
||||||
ShareBlobsWithBatchRequest();
|
ShareBlobsWithBatchRequest(batchedInputs, batchedOutputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AutoBatchInferRequest::ShareBlobsWithBatchRequest() {
|
void AutoBatchInferRequest::ShareBlobsWithBatchRequest(const std::set<std::string>& batchedInputs,
|
||||||
|
const std::set<std::string>& batchedOutputs) {
|
||||||
// Allocate all input blobs
|
// Allocate all input blobs
|
||||||
for (const auto& it : _networkInputs) {
|
for (const auto& it : _networkInputs) {
|
||||||
auto blob = _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first);
|
auto blob = _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first);
|
||||||
@ -86,78 +91,104 @@ void AutoBatchInferRequest::ShareBlobsWithBatchRequest() {
|
|||||||
case InferenceEngine::Precision::FP32:
|
case InferenceEngine::Precision::FP32:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedInputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::I32:
|
case InferenceEngine::Precision::I32:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedInputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::I8:
|
case InferenceEngine::Precision::I8:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedInputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::I16:
|
case InferenceEngine::Precision::I16:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedInputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::U16:
|
case InferenceEngine::Precision::U16:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedInputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::U32:
|
case InferenceEngine::Precision::U32:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U32>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U32>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedInputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::FP64:
|
case InferenceEngine::Precision::FP64:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP64>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP64>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedInputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::FP16:
|
case InferenceEngine::Precision::FP16:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP16>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP16>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedInputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::BF16:
|
case InferenceEngine::Precision::BF16:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BF16>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BF16>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedInputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::U64:
|
case InferenceEngine::Precision::U64:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U64>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U64>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedInputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::I64:
|
case InferenceEngine::Precision::I64:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I64>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I64>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedInputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::U8:
|
case InferenceEngine::Precision::U8:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedInputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::BOOL:
|
case InferenceEngine::Precision::BOOL:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BOOL>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BOOL>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedInputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
@ -174,78 +205,104 @@ void AutoBatchInferRequest::ShareBlobsWithBatchRequest() {
|
|||||||
case InferenceEngine::Precision::FP32:
|
case InferenceEngine::Precision::FP32:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedOutputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::I32:
|
case InferenceEngine::Precision::I32:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedOutputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::I8:
|
case InferenceEngine::Precision::I8:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedOutputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::I16:
|
case InferenceEngine::Precision::I16:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedOutputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::U16:
|
case InferenceEngine::Precision::U16:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedOutputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::U32:
|
case InferenceEngine::Precision::U32:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U32>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U32>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedOutputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::FP64:
|
case InferenceEngine::Precision::FP64:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP64>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP64>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedOutputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::FP16:
|
case InferenceEngine::Precision::FP16:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP16>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP16>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedOutputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::BF16:
|
case InferenceEngine::Precision::BF16:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BF16>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BF16>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedOutputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::U64:
|
case InferenceEngine::Precision::U64:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U64>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U64>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedOutputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::I64:
|
case InferenceEngine::Precision::I64:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I64>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I64>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedOutputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::U8:
|
case InferenceEngine::Precision::U8:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedOutputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
case InferenceEngine::Precision::BOOL:
|
case InferenceEngine::Precision::BOOL:
|
||||||
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BOOL>(
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BOOL>(
|
||||||
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
||||||
|
it.first,
|
||||||
|
batchedOutputs,
|
||||||
_batchId,
|
_batchId,
|
||||||
_batchSize);
|
_batchSize);
|
||||||
break;
|
break;
|
||||||
@ -371,12 +428,16 @@ AutoBatchExecutableNetwork::AutoBatchExecutableNetwork(
|
|||||||
const InferenceEngine::SoExecutableNetworkInternal& networkWithBatch,
|
const InferenceEngine::SoExecutableNetworkInternal& networkWithBatch,
|
||||||
const InferenceEngine::SoExecutableNetworkInternal& networkWithoutBatch,
|
const InferenceEngine::SoExecutableNetworkInternal& networkWithoutBatch,
|
||||||
const DeviceInformation& networkDevice,
|
const DeviceInformation& networkDevice,
|
||||||
const std::unordered_map<std::string, InferenceEngine::Parameter>& config)
|
const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
|
||||||
|
const std::set<std::string>& batchedInputs,
|
||||||
|
const std::set<std::string>& batchedOutputs)
|
||||||
: InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr,
|
: InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr,
|
||||||
std::make_shared<InferenceEngine::ImmediateExecutor>()),
|
std::make_shared<InferenceEngine::ImmediateExecutor>()),
|
||||||
_network{networkWithBatch},
|
_network{networkWithBatch},
|
||||||
_networkWithoutBatch{networkWithoutBatch},
|
_networkWithoutBatch{networkWithoutBatch},
|
||||||
_config{config} {
|
_config{config},
|
||||||
|
_batchedInputs(batchedInputs),
|
||||||
|
_batchedOutputs(batchedOutputs) {
|
||||||
// WA for gcc 4.8 ( fails compilation with member init-list)
|
// WA for gcc 4.8 ( fails compilation with member init-list)
|
||||||
_device = networkDevice;
|
_device = networkDevice;
|
||||||
auto time_out = config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT));
|
auto time_out = config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT));
|
||||||
@ -411,7 +472,9 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
|
|||||||
networkOutputs,
|
networkOutputs,
|
||||||
workerRequestPtrAndId.first,
|
workerRequestPtrAndId.first,
|
||||||
workerRequestPtrAndId.second,
|
workerRequestPtrAndId.second,
|
||||||
_device.batchForDevice);
|
_device.batchForDevice,
|
||||||
|
_batchedInputs,
|
||||||
|
_batchedOutputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl(
|
InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl(
|
||||||
@ -427,7 +490,9 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
|
|||||||
outputs,
|
outputs,
|
||||||
workerRequestPtrAndId.first,
|
workerRequestPtrAndId.first,
|
||||||
workerRequestPtrAndId.second,
|
workerRequestPtrAndId.second,
|
||||||
_device.batchForDevice);
|
_device.batchForDevice,
|
||||||
|
_batchedInputs,
|
||||||
|
_batchedOutputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<AutoBatchExecutableNetwork::WorkerInferRequest&, int> AutoBatchExecutableNetwork::GetWorkerInferRequest() {
|
std::pair<AutoBatchExecutableNetwork::WorkerInferRequest&, int> AutoBatchExecutableNetwork::GetWorkerInferRequest() {
|
||||||
@ -761,6 +826,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
|
|||||||
deviceConfigNoAutoBatch[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(NO);
|
deviceConfigNoAutoBatch[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(NO);
|
||||||
|
|
||||||
std::set<std::string> batched_inputs;
|
std::set<std::string> batched_inputs;
|
||||||
|
std::set<std::string> batched_outputs;
|
||||||
// check that the auto-batching is applicable in general
|
// check that the auto-batching is applicable in general
|
||||||
try {
|
try {
|
||||||
// if applicable, the Auto-Batching is implicitly enabled via the performance hints
|
// if applicable, the Auto-Batching is implicitly enabled via the performance hints
|
||||||
@ -768,7 +834,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
|
|||||||
const bool bTputInPlg = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>() == tput;
|
const bool bTputInPlg = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>() == tput;
|
||||||
const auto& mode = deviceConfig.find(CONFIG_KEY(PERFORMANCE_HINT));
|
const auto& mode = deviceConfig.find(CONFIG_KEY(PERFORMANCE_HINT));
|
||||||
const bool bTputInLoadCfg = (mode != deviceConfig.end() && mode->second == tput);
|
const bool bTputInLoadCfg = (mode != deviceConfig.end() && mode->second == tput);
|
||||||
// if the auto-batching is enabled implicitly, we shall check the dims carefully, to avoid outstanding failures
|
// if the auto-batching is enabled implicitly, check the dims carefully, to avoid outstanding failures
|
||||||
const bool check_dims = (bTputInPlg || bTputInLoadCfg);
|
const bool check_dims = (bTputInPlg || bTputInLoadCfg);
|
||||||
CNNNetwork clonedNetwork(InferenceEngine::details::cloneNetwork(network));
|
CNNNetwork clonedNetwork(InferenceEngine::details::cloneNetwork(network));
|
||||||
auto function = clonedNetwork.getFunction();
|
auto function = clonedNetwork.getFunction();
|
||||||
@ -778,7 +844,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
|
|||||||
m.register_pass<ov::pass::FindBatch>(true, check_dims);
|
m.register_pass<ov::pass::FindBatch>(true, check_dims);
|
||||||
m.run_passes(function);
|
m.run_passes(function);
|
||||||
// do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts
|
// do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts
|
||||||
// input(s) should have the batch dim as the first dim or none (current limitation of the auto-batching impl)
|
// input(s) should have the batch dim as the first dim (current limitation of the auto-batching impl)
|
||||||
const auto& params = function->get_parameters();
|
const auto& params = function->get_parameters();
|
||||||
for (size_t input_id = 0; input_id < params.size(); input_id++) {
|
for (size_t input_id = 0; input_id < params.size(); input_id++) {
|
||||||
const auto& input = params[input_id];
|
const auto& input = params[input_id];
|
||||||
@ -801,8 +867,28 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
|
|||||||
<< "Auto-batching operates only networks with inputs/outputs batched by 0th dimension";
|
<< "Auto-batching operates only networks with inputs/outputs batched by 0th dimension";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!batched_inputs.size())
|
const auto& results = function->get_results();
|
||||||
IE_THROW(NotImplemented) << "Auto-batching supports only networks with inputs featuring batched dim!";
|
for (size_t output_id = 0; output_id < results.size(); output_id++) {
|
||||||
|
const auto& output = results[output_id];
|
||||||
|
const auto& shape = output->get_output_partial_shape(0);
|
||||||
|
// check the batch dim: either 0th (and the original batch size of 1) or none
|
||||||
|
if (shape.size() && ov::DimensionTracker::get_label(shape[0])) {
|
||||||
|
if (shape[0] != 1)
|
||||||
|
IE_THROW(NotImplemented) << "Auto-batching does not reshape/re-batch originally batched networks!";
|
||||||
|
const auto& node = output->input_value(0);
|
||||||
|
batched_outputs.insert(ngraph::op::util::get_ie_output_name(
|
||||||
|
ov::Output<const ov::Node>(node.get_node(), node.get_index())));
|
||||||
|
} else {
|
||||||
|
// if the 0-th dim is not for the batch, then we support only the case when NONE dimension is batch
|
||||||
|
for (size_t s = 1; s < shape.size(); s++)
|
||||||
|
if (ov::DimensionTracker::get_label(shape[s]))
|
||||||
|
IE_THROW(NotImplemented)
|
||||||
|
<< "Auto-batching operates only networks with outputs batched by 0th dimension";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!batched_inputs.size() || !batched_outputs.size())
|
||||||
|
IE_THROW(NotImplemented)
|
||||||
|
<< "Auto-batching supports only networks with inputs/outputs featuring batched dim!";
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
metaDevice.batchForDevice = 1;
|
metaDevice.batchForDevice = 1;
|
||||||
}
|
}
|
||||||
@ -878,7 +964,9 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
|
|||||||
return std::make_shared<AutoBatchExecutableNetwork>(executableNetworkWithBatch,
|
return std::make_shared<AutoBatchExecutableNetwork>(executableNetworkWithBatch,
|
||||||
executableNetworkWithoutBatch,
|
executableNetworkWithoutBatch,
|
||||||
metaDevice,
|
metaDevice,
|
||||||
networkConfig);
|
networkConfig,
|
||||||
|
batched_inputs,
|
||||||
|
batched_outputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(
|
InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(
|
||||||
|
@ -49,7 +49,9 @@ public:
|
|||||||
const InferenceEngine::SoExecutableNetworkInternal& networkForDevice,
|
const InferenceEngine::SoExecutableNetworkInternal& networkForDevice,
|
||||||
const InferenceEngine::SoExecutableNetworkInternal& networkForDeviceWithoutBatch,
|
const InferenceEngine::SoExecutableNetworkInternal& networkForDeviceWithoutBatch,
|
||||||
const DeviceInformation& networkDevices,
|
const DeviceInformation& networkDevices,
|
||||||
const std::unordered_map<std::string, InferenceEngine::Parameter>& config);
|
const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
|
||||||
|
const std::set<std::string>& batchedIntputs,
|
||||||
|
const std::set<std::string>& batchedOutputs);
|
||||||
|
|
||||||
void SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) override;
|
void SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) override;
|
||||||
InferenceEngine::Parameter GetConfig(const std::string& name) const override;
|
InferenceEngine::Parameter GetConfig(const std::string& name) const override;
|
||||||
@ -80,6 +82,9 @@ protected:
|
|||||||
bool _needPerfCounters = false;
|
bool _needPerfCounters = false;
|
||||||
std::atomic_size_t _numRequestsCreated = {0};
|
std::atomic_size_t _numRequestsCreated = {0};
|
||||||
std::atomic_int _timeOut = {0}; // in ms
|
std::atomic_int _timeOut = {0}; // in ms
|
||||||
|
|
||||||
|
const std::set<std::string> _batchedInputs;
|
||||||
|
const std::set<std::string> _batchedOutputs;
|
||||||
};
|
};
|
||||||
|
|
||||||
class AutoBatchInferRequest : public InferenceEngine::IInferRequestInternal {
|
class AutoBatchInferRequest : public InferenceEngine::IInferRequestInternal {
|
||||||
@ -89,12 +94,16 @@ public:
|
|||||||
const InferenceEngine::OutputsDataMap& networkOutputs,
|
const InferenceEngine::OutputsDataMap& networkOutputs,
|
||||||
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr,
|
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr,
|
||||||
int batch_id,
|
int batch_id,
|
||||||
int num_batch);
|
int num_batch,
|
||||||
|
const std::set<std::string>& batchedIntputs,
|
||||||
|
const std::set<std::string>& batchedOutputs);
|
||||||
explicit AutoBatchInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
explicit AutoBatchInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
|
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
|
||||||
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr,
|
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr,
|
||||||
int batch_id,
|
int batch_id,
|
||||||
int num_batch);
|
int num_batch,
|
||||||
|
const std::set<std::string>& batchedIntputs,
|
||||||
|
const std::set<std::string>& batchedOutputs);
|
||||||
|
|
||||||
// Batch-Device impl specific: sets the data (blobs from the device request to the batched device request)
|
// Batch-Device impl specific: sets the data (blobs from the device request to the batched device request)
|
||||||
void SetBlobsToAnotherRequest(InferenceEngine::SoIInferRequestInternal& req);
|
void SetBlobsToAnotherRequest(InferenceEngine::SoIInferRequestInternal& req);
|
||||||
@ -110,7 +119,8 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
void CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, InferenceEngine::Blob::Ptr dst, bool bInput);
|
void CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, InferenceEngine::Blob::Ptr dst, bool bInput);
|
||||||
void ShareBlobsWithBatchRequest();
|
void ShareBlobsWithBatchRequest(const std::set<std::string>& batchedIntputs,
|
||||||
|
const std::set<std::string>& batchedOutputs);
|
||||||
size_t _batchId;
|
size_t _batchId;
|
||||||
size_t _batchSize;
|
size_t _batchSize;
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user