* support cache dir in multi/auto Signed-off-by: fishbell <bell.song@intel.com> * support cache_dir in bat h Signed-off-by: fishbell <bell.song@intel.com> * fix case failure Signed-off-by: fishbell <bell.song@intel.com> * clang format Signed-off-by: fishbell <bell.song@intel.com> Signed-off-by: fishbell <bell.song@intel.com>
1007 lines
48 KiB
C++
1007 lines
48 KiB
C++
// Copyright (C) 2018-2022 Intel Corporation
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
#include "auto_batch.hpp"
|
|
|
|
#include <iostream>
|
|
#include <map>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
|
|
#include "dimension_tracker.hpp"
|
|
#include "ie_icore.hpp"
|
|
#include "ie_ngraph_utils.hpp"
|
|
#include "ie_performance_hints.hpp"
|
|
#include "openvino/pass/manager.hpp"
|
|
#include "openvino/runtime/intel_gpu/properties.hpp"
|
|
#include "transformations/common_optimizations/dimension_tracking.hpp"
|
|
#include "transformations/init_node_info.hpp"
|
|
#include "transformations/utils/utils.hpp"
|
|
|
|
namespace AutoBatchPlugin {
|
|
using namespace InferenceEngine;
|
|
|
|
std::vector<std::string> supported_configKeys = {CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG),
|
|
CONFIG_KEY(AUTO_BATCH_TIMEOUT),
|
|
CONFIG_KEY(CACHE_DIR)};
|
|
|
|
template <Precision::ePrecision precision>
|
|
Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob,
|
|
std::string name,
|
|
const std::set<std::string>& batched_names,
|
|
size_t batch_id,
|
|
size_t batch_num) {
|
|
typedef typename PrecisionTrait<precision>::value_type TYPE;
|
|
typedef typename std::add_pointer<TYPE>::type TYPEPTR;
|
|
auto ptr = batched_blob->buffer().as<TYPEPTR>();
|
|
auto sizePerBatch = batched_blob->size() / batch_num;
|
|
SizeVector dims = batched_blob->getTensorDesc().getDims();
|
|
// for performance reason (copy avoidance) current impl of the auto-batching supports only batching by 0th dim
|
|
if (batched_names.count(name)) {
|
|
dims[0] = 1;
|
|
return make_shared_blob<TYPE>({precision, dims, batched_blob->getTensorDesc().getLayout()},
|
|
ptr + sizePerBatch * batch_id,
|
|
sizePerBatch);
|
|
} else {
|
|
// same blob for all requests (e.g. constants)
|
|
return make_shared_blob<TYPE>({precision, dims, batched_blob->getTensorDesc().getLayout()}, ptr);
|
|
}
|
|
}
|
|
|
|
// ------------------------------AutoBatchInferRequest----------------------------
|
|
AutoBatchInferRequest::AutoBatchInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
|
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
|
|
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
|
|
int batch_id,
|
|
int num_batch,
|
|
const std::set<std::string>& batchedInputs,
|
|
const std::set<std::string>& batchedOutputs)
|
|
: IInferRequestInternal(inputs, outputs),
|
|
_myBatchedRequestWrapper(workerRequest),
|
|
_batchId(batch_id),
|
|
_batchSize(num_batch) {
|
|
ShareBlobsWithBatchRequest(batchedInputs, batchedOutputs);
|
|
}
|
|
|
|
AutoBatchInferRequest::AutoBatchInferRequest(const InputsDataMap& networkInputs,
|
|
const OutputsDataMap& networkOutputs,
|
|
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
|
|
int batch_id,
|
|
int num_batch,
|
|
const std::set<std::string>& batchedInputs,
|
|
const std::set<std::string>& batchedOutputs)
|
|
: IInferRequestInternal(networkInputs, networkOutputs),
|
|
_myBatchedRequestWrapper(workerRequest),
|
|
_batchId(batch_id),
|
|
_batchSize(num_batch) {
|
|
ShareBlobsWithBatchRequest(batchedInputs, batchedOutputs);
|
|
}
|
|
|
|
void AutoBatchInferRequest::ShareBlobsWithBatchRequest(const std::set<std::string>& batchedInputs,
|
|
const std::set<std::string>& batchedOutputs) {
|
|
// Allocate all input blobs
|
|
for (const auto& it : _networkInputs) {
|
|
auto blob = _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first);
|
|
Blob::Ptr res;
|
|
switch (it.second->getTensorDesc().getPrecision()) {
|
|
case InferenceEngine::Precision::FP32:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedInputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::I32:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedInputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::I8:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedInputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::I16:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedInputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::U16:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedInputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::U32:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U32>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedInputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::FP64:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP64>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedInputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::FP16:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP16>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedInputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::BF16:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BF16>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedInputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::U64:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U64>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedInputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::I64:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I64>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedInputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::U8:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedInputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::BOOL:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BOOL>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedInputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
default:
|
|
IE_THROW() << "Unsupported input precision " << it.second->getTensorDesc().getPrecision();
|
|
}
|
|
_inputs[it.first] = res;
|
|
}
|
|
// Allocate all output blobs
|
|
for (const auto& it : _networkOutputs) {
|
|
auto blob = _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first);
|
|
Blob::Ptr res;
|
|
switch (it.second->getTensorDesc().getPrecision()) {
|
|
case InferenceEngine::Precision::FP32:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedOutputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::I32:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedOutputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::I8:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedOutputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::I16:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedOutputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::U16:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedOutputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::U32:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U32>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedOutputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::FP64:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP64>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedOutputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::FP16:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP16>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedOutputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::BF16:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BF16>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedOutputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::U64:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U64>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedOutputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::I64:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I64>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedOutputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::U8:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedOutputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
case InferenceEngine::Precision::BOOL:
|
|
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BOOL>(
|
|
_myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
|
|
it.first,
|
|
batchedOutputs,
|
|
_batchId,
|
|
_batchSize);
|
|
break;
|
|
default:
|
|
IE_THROW(NotImplemented) << "Unsupported input precision " << it.second->getTensorDesc().getPrecision();
|
|
}
|
|
_outputs[it.first] = res;
|
|
}
|
|
}
|
|
void AutoBatchInferRequest::SetBlobsToAnotherRequest(SoIInferRequestInternal& req) {
|
|
for (const auto& it : _networkInputs) {
|
|
auto& name = it.first;
|
|
// this request is already in BUSY state, so using the internal functions safely
|
|
auto blob = GetBlob(name);
|
|
if (req->GetBlob(name) != blob)
|
|
req->SetBlob(name, blob);
|
|
}
|
|
for (const auto& it : _networkOutputs) {
|
|
auto& name = it.first;
|
|
// this request is already in BUSY state, so using the internal functions safely
|
|
auto blob = GetBlob(name);
|
|
if (req->GetBlob(name) != blob)
|
|
req->SetBlob(name, blob);
|
|
}
|
|
}
|
|
|
|
void AutoBatchInferRequest::CopyInputsIfNeeded() {
|
|
for (const auto& it : _networkInputs) {
|
|
auto& name = it.first;
|
|
// this request is already in BUSY state, so using the internal functions safely
|
|
CopyBlobIfNeeded(GetBlob(name), _myBatchedRequestWrapper._inferRequestBatched->GetBlob(name), true);
|
|
}
|
|
}
|
|
|
|
void AutoBatchInferRequest::CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src,
|
|
InferenceEngine::Blob::Ptr dst,
|
|
bool bInput) {
|
|
auto bufferDst = dst->buffer();
|
|
auto ptrDst = bufferDst.as<char*>();
|
|
auto bufferSrc = src->cbuffer();
|
|
auto ptrSrc = bufferSrc.as<const char*>();
|
|
ptrdiff_t szDst = dst->byteSize();
|
|
ptrdiff_t szSrc = src->byteSize();
|
|
if (bInput) {
|
|
ptrdiff_t offset = szSrc != szDst ? _batchId * szDst / _batchSize : 0;
|
|
if ((ptrDst + offset) == ptrSrc)
|
|
return;
|
|
else
|
|
memcpy(ptrDst + offset, ptrSrc, szSrc);
|
|
} else {
|
|
ptrdiff_t offset = szSrc != szDst ? _batchId * szSrc / _batchSize : 0;
|
|
if ((ptrSrc + offset) == ptrDst)
|
|
return;
|
|
else
|
|
memcpy(ptrDst, ptrSrc + offset, szDst);
|
|
}
|
|
}
|
|
|
|
void AutoBatchInferRequest::CopyOutputsIfNeeded() {
|
|
for (const auto& it : _networkOutputs) {
|
|
auto& name = it.first;
|
|
// this request is already in BUSY state, so using the internal functions safely
|
|
CopyBlobIfNeeded(_myBatchedRequestWrapper._inferRequestBatched->GetBlob(name), GetBlob(name), false);
|
|
}
|
|
}
|
|
|
|
AutoBatchAsyncInferRequest::AutoBatchAsyncInferRequest(
|
|
const AutoBatchInferRequest::Ptr& inferRequest,
|
|
InferenceEngine::SoIInferRequestInternal& inferRequestWithoutBatch,
|
|
const ITaskExecutor::Ptr& callbackExecutor)
|
|
: AsyncInferRequestThreadSafeDefault(inferRequest, nullptr, callbackExecutor),
|
|
_inferRequestWithoutBatch(inferRequestWithoutBatch),
|
|
_inferRequest{inferRequest} {
|
|
// this executor starts the inference while the task (checking the result) is passed to the next stage
|
|
struct ThisRequestExecutor : public ITaskExecutor {
|
|
explicit ThisRequestExecutor(AutoBatchAsyncInferRequest* _this_) : _this{_this_} {}
|
|
void run(Task task) override {
|
|
auto& workerInferRequest = _this->_inferRequest->_myBatchedRequestWrapper;
|
|
std::pair<AutoBatchAsyncInferRequest*, InferenceEngine::Task> t;
|
|
t.first = _this;
|
|
t.second = std::move(task);
|
|
workerInferRequest._tasks.push(t);
|
|
// it is ok to call size() here as the queue only grows (and the bulk removal happens under the mutex)
|
|
const int sz = static_cast<int>(workerInferRequest._tasks.size());
|
|
if (sz == workerInferRequest._batchSize) {
|
|
workerInferRequest._cond.notify_one();
|
|
}
|
|
};
|
|
AutoBatchAsyncInferRequest* _this = nullptr;
|
|
};
|
|
_pipeline = {{/*TaskExecutor*/ std::make_shared<ThisRequestExecutor>(this), /*task*/ [this] {
|
|
if (this->_inferRequest->_exceptionPtr) // if the exception happened in the batch1 fallback
|
|
std::rethrow_exception(this->_inferRequest->_exceptionPtr);
|
|
auto& batchReq = this->_inferRequest->_myBatchedRequestWrapper;
|
|
if (batchReq._exceptionPtr) // when the batchN execution failed
|
|
std::rethrow_exception(batchReq._exceptionPtr);
|
|
// in the case of non-batched execution the blobs were set explicitly
|
|
if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED ==
|
|
this->_inferRequest->_wasBatchedRequestUsed)
|
|
this->_inferRequest->CopyOutputsIfNeeded();
|
|
}}};
|
|
}
|
|
|
|
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> AutoBatchAsyncInferRequest::GetPerformanceCounts()
|
|
const {
|
|
CheckState();
|
|
if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED == _inferRequest->_wasBatchedRequestUsed)
|
|
return _inferRequest->_myBatchedRequestWrapper._inferRequestBatched->GetPerformanceCounts();
|
|
else
|
|
return _inferRequestWithoutBatch->GetPerformanceCounts();
|
|
}
|
|
|
|
void AutoBatchAsyncInferRequest::Infer_ThreadUnsafe() {
|
|
InferUsingAsync();
|
|
}
|
|
|
|
AutoBatchAsyncInferRequest::~AutoBatchAsyncInferRequest() {
|
|
StopAndWait();
|
|
}
|
|
|
|
// ------------------------------AutoBatchExecutableNetwork----------------------------
|
|
AutoBatchExecutableNetwork::AutoBatchExecutableNetwork(
|
|
const InferenceEngine::SoExecutableNetworkInternal& networkWithBatch,
|
|
const InferenceEngine::SoExecutableNetworkInternal& networkWithoutBatch,
|
|
const DeviceInformation& networkDevice,
|
|
const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
|
|
const std::set<std::string>& batchedInputs,
|
|
const std::set<std::string>& batchedOutputs)
|
|
: InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr,
|
|
std::make_shared<InferenceEngine::ImmediateExecutor>()),
|
|
_network{networkWithBatch},
|
|
_networkWithoutBatch{networkWithoutBatch},
|
|
_config{config},
|
|
_batchedInputs(batchedInputs),
|
|
_batchedOutputs(batchedOutputs) {
|
|
// WA for gcc 4.8 ( fails compilation with member init-list)
|
|
_device = networkDevice;
|
|
auto time_out = config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT));
|
|
IE_ASSERT(time_out != config.end());
|
|
_timeOut = ParseTimeoutValue(time_out->second.as<std::string>());
|
|
}
|
|
|
|
AutoBatchExecutableNetwork::~AutoBatchExecutableNetwork() {
|
|
_terminate = true;
|
|
for (auto w : _workerRequests) {
|
|
w->_thread.join();
|
|
}
|
|
_workerRequests.clear();
|
|
}
|
|
|
|
unsigned int AutoBatchExecutableNetwork::ParseTimeoutValue(const std::string& s) {
|
|
auto val = std::stoi(s);
|
|
if (val < 0)
|
|
IE_THROW(ParameterMismatch) << "Value for the " << CONFIG_KEY(AUTO_BATCH_TIMEOUT) << " should be unsigned int";
|
|
return val;
|
|
}
|
|
|
|
std::shared_ptr<InferenceEngine::RemoteContext> AutoBatchExecutableNetwork::GetContext() const {
|
|
return _networkWithoutBatch->GetContext();
|
|
}
|
|
|
|
InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl(
|
|
InferenceEngine::InputsDataMap networkInputs,
|
|
InferenceEngine::OutputsDataMap networkOutputs) {
|
|
auto workerRequestPtrAndId = GetWorkerInferRequest();
|
|
return std::make_shared<AutoBatchInferRequest>(networkInputs,
|
|
networkOutputs,
|
|
workerRequestPtrAndId.first,
|
|
workerRequestPtrAndId.second,
|
|
_device.batchForDevice,
|
|
_batchedInputs,
|
|
_batchedOutputs);
|
|
}
|
|
|
|
InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl(
|
|
const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
|
const std::vector<std::shared_ptr<const ov::Node>>& outputs) {
|
|
if (!this->_plugin || !_plugin->IsNewAPI())
|
|
return nullptr;
|
|
auto workerRequestPtrAndId = GetWorkerInferRequest();
|
|
return std::make_shared<AutoBatchInferRequest>(inputs,
|
|
outputs,
|
|
workerRequestPtrAndId.first,
|
|
workerRequestPtrAndId.second,
|
|
_device.batchForDevice,
|
|
_batchedInputs,
|
|
_batchedOutputs);
|
|
}
|
|
|
|
std::pair<AutoBatchExecutableNetwork::WorkerInferRequest&, int> AutoBatchExecutableNetwork::GetWorkerInferRequest() {
|
|
auto num = _numRequestsCreated++;
|
|
std::lock_guard<std::mutex> lock(_workerRequestsMutex);
|
|
auto batch_id = num % _device.batchForDevice;
|
|
if (!batch_id) { // need new request
|
|
_workerRequests.push_back(std::make_shared<WorkerInferRequest>());
|
|
auto workerRequestPtr = _workerRequests.back().get();
|
|
workerRequestPtr->_inferRequestBatched = {_network->CreateInferRequest(), _network._so};
|
|
workerRequestPtr->_batchSize = _device.batchForDevice;
|
|
workerRequestPtr->_completionTasks.resize(workerRequestPtr->_batchSize);
|
|
workerRequestPtr->_inferRequestBatched->SetCallback(
|
|
[workerRequestPtr, this](std::exception_ptr exceptionPtr) mutable {
|
|
if (exceptionPtr)
|
|
workerRequestPtr->_exceptionPtr = exceptionPtr;
|
|
IE_ASSERT(workerRequestPtr->_completionTasks.size() == (size_t)workerRequestPtr->_batchSize);
|
|
// notify the individual requests on the completion
|
|
for (int c = 0; c < workerRequestPtr->_batchSize; c++) {
|
|
workerRequestPtr->_completionTasks[c]();
|
|
}
|
|
// reset the timeout
|
|
workerRequestPtr->_cond.notify_one();
|
|
});
|
|
|
|
workerRequestPtr->_thread = std::thread([workerRequestPtr, this] {
|
|
while (1) {
|
|
std::cv_status status;
|
|
{
|
|
std::unique_lock<std::mutex> lock(workerRequestPtr->_mutex);
|
|
status = workerRequestPtr->_cond.wait_for(lock, std::chrono::milliseconds(_timeOut));
|
|
}
|
|
if (_terminate) {
|
|
break;
|
|
} else {
|
|
// as we pop the tasks from the queue only here
|
|
// it is ok to call size() (as the _tasks can only grow in parallel)
|
|
const int sz = static_cast<int>(workerRequestPtr->_tasks.size());
|
|
if (sz == workerRequestPtr->_batchSize) {
|
|
std::pair<AutoBatchAsyncInferRequest*, InferenceEngine::Task> t;
|
|
for (int n = 0; n < sz; n++) {
|
|
IE_ASSERT(workerRequestPtr->_tasks.try_pop(t));
|
|
workerRequestPtr->_completionTasks[n] = std::move(t.second);
|
|
t.first->_inferRequest->CopyInputsIfNeeded();
|
|
t.first->_inferRequest->_wasBatchedRequestUsed =
|
|
AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED;
|
|
}
|
|
workerRequestPtr->_inferRequestBatched->StartAsync();
|
|
} else if ((status == std::cv_status::timeout) && sz) {
|
|
// timeout to collect the batch is over, have to execute the requests in the batch1 mode
|
|
std::pair<AutoBatchAsyncInferRequest*, InferenceEngine::Task> t;
|
|
// popping all tasks collected by the moment of the time-out and execute each with batch1
|
|
std::atomic<int> arrived = {0};
|
|
std::promise<void> all_completed;
|
|
auto all_completed_future = all_completed.get_future();
|
|
for (int n = 0; n < sz; n++) {
|
|
IE_ASSERT(workerRequestPtr->_tasks.try_pop(t));
|
|
t.first->_inferRequestWithoutBatch->SetCallback(
|
|
[t, sz, &arrived, &all_completed](std::exception_ptr p) {
|
|
if (p)
|
|
t.first->_inferRequest->_exceptionPtr = p;
|
|
t.second();
|
|
if (sz == ++arrived)
|
|
all_completed.set_value();
|
|
});
|
|
t.first->_inferRequest->_wasBatchedRequestUsed =
|
|
AutoBatchInferRequest::eExecutionFlavor::TIMEOUT_EXECUTED;
|
|
t.first->_inferRequest->SetBlobsToAnotherRequest(t.first->_inferRequestWithoutBatch);
|
|
t.first->_inferRequestWithoutBatch->StartAsync();
|
|
}
|
|
all_completed_future.get();
|
|
// now when all the tasks for this batch are completed, start waiting for the timeout again
|
|
}
|
|
}
|
|
}
|
|
});
|
|
}
|
|
return {*_workerRequests.back(), static_cast<int>(batch_id)};
|
|
}
|
|
|
|
InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequest() {
|
|
if (!_network) {
|
|
auto res = _networkWithoutBatch->CreateInferRequest();
|
|
res->setPointerToExecutableNetworkInternal(shared_from_this());
|
|
res->setPointerToSo(_networkWithoutBatch._so);
|
|
_so = _networkWithoutBatch._so;
|
|
return res;
|
|
}
|
|
// trying to create the new API request first
|
|
IInferRequestInternal::Ptr syncRequestImpl = CreateInferRequestImpl(_parameters, _results);
|
|
if (!syncRequestImpl)
|
|
syncRequestImpl = CreateInferRequestImpl(_networkInputs, _networkOutputs);
|
|
syncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this());
|
|
InferenceEngine::SoIInferRequestInternal inferRequestWithoutBatch = {_networkWithoutBatch->CreateInferRequest(),
|
|
_networkWithoutBatch._so};
|
|
return std::make_shared<AutoBatchAsyncInferRequest>(
|
|
std::static_pointer_cast<AutoBatchInferRequest>(syncRequestImpl),
|
|
inferRequestWithoutBatch,
|
|
_callbackExecutor);
|
|
}
|
|
|
|
std::shared_ptr<ngraph::Function> AutoBatchExecutableNetwork::GetExecGraphInfo() {
|
|
return _network && _network->GetExecGraphInfo() ? _network->GetExecGraphInfo()
|
|
: _networkWithoutBatch->GetExecGraphInfo();
|
|
}
|
|
|
|
void AutoBatchExecutableNetwork::SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) {
|
|
auto timeout = config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT));
|
|
if (timeout == config.end() || config.size() > 1) {
|
|
IE_THROW() << "The only config that can be changed on the fly for the AutoBatching the is the "
|
|
<< CONFIG_KEY(AUTO_BATCH_TIMEOUT);
|
|
} else {
|
|
_timeOut = ParseTimeoutValue(timeout->second.as<std::string>());
|
|
}
|
|
}
|
|
|
|
InferenceEngine::Parameter AutoBatchExecutableNetwork::GetConfig(const std::string& name) const {
|
|
auto it = _config.find(name);
|
|
if (it != _config.end()) {
|
|
return it->second;
|
|
} else {
|
|
// find config key among networks config keys
|
|
auto param = _networkWithoutBatch->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
|
for (auto&& configKey : param.as<std::vector<std::string>>()) {
|
|
if (configKey == name) {
|
|
return _networkWithoutBatch->GetConfig(configKey);
|
|
}
|
|
}
|
|
IE_THROW(NotFound) << name << " not found in the ExecutableNetwork config";
|
|
}
|
|
}
|
|
|
|
InferenceEngine::Parameter AutoBatchExecutableNetwork::GetMetric(const std::string& name) const {
|
|
if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
|
|
auto reqs = 0;
|
|
try {
|
|
auto hint = _networkWithoutBatch->GetConfig(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as<std::string>();
|
|
reqs = InferenceEngine::PerfHintsConfig::CheckPerformanceHintRequestValue(hint);
|
|
if (!reqs) // no limitations from user, let's deduce the full blown #requests
|
|
// (multiplied by the devices capabilities to run multiple <batched> requests for further perf)
|
|
reqs = _device.batchForDevice *
|
|
_networkWithoutBatch->GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
|
|
} catch (const InferenceEngine::Exception&) {
|
|
}
|
|
reqs = std::max(reqs, _device.batchForDevice); // round up to the possible user's value
|
|
IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, reqs);
|
|
} else if (name == METRIC_KEY(NETWORK_NAME)) {
|
|
IE_SET_METRIC_RETURN(NETWORK_NAME, _networkWithoutBatch->GetMetric(METRIC_KEY(NETWORK_NAME)).as<std::string>());
|
|
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
|
|
IE_SET_METRIC_RETURN(SUPPORTED_METRICS,
|
|
{METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS),
|
|
METRIC_KEY(SUPPORTED_METRICS),
|
|
METRIC_KEY(NETWORK_NAME),
|
|
METRIC_KEY(SUPPORTED_CONFIG_KEYS)});
|
|
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
|
|
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS,
|
|
{CONFIG_KEY(AUTO_BATCH_TIMEOUT)}); // only timeout can be changed on the fly
|
|
} else {
|
|
IE_THROW() << "Unsupported Network metric: " << name;
|
|
}
|
|
}
|
|
|
|
// ------------------------------AutoBatchInferencePlugin----------------------------
|
|
|
|
namespace {
|
|
|
|
std::map<std::string, std::string> mergeConfigs(std::map<std::string, std::string> config,
|
|
const std::map<std::string, std::string>& local) {
|
|
for (auto&& kvp : local) {
|
|
config[kvp.first] = kvp.second;
|
|
}
|
|
return config;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
DeviceInformation AutoBatchInferencePlugin::ParseBatchDevice(const std::string& deviceWithBatch) {
|
|
auto&& d = deviceWithBatch;
|
|
auto openingBracket = d.find_first_of('(');
|
|
auto closingBracket = d.find_first_of(')', openingBracket);
|
|
auto deviceName = d.substr(0, openingBracket);
|
|
|
|
int batch = 0;
|
|
if (closingBracket != std::string::npos && openingBracket < closingBracket) {
|
|
batch = std::stol(d.substr(openingBracket + 1, closingBracket - 1));
|
|
|
|
if (batch <= 0) {
|
|
IE_THROW() << "Batch value for '" << deviceName << "' must be > 0, while " << batch << "is passed";
|
|
}
|
|
}
|
|
return {deviceName, {{}}, batch};
|
|
}
|
|
|
|
DeviceInformation AutoBatchInferencePlugin::ParseMetaDevice(const std::string& devicesBatchCfg,
|
|
const std::map<std::string, std::string>& config) const {
|
|
auto getDeviceConfig = [&](const DeviceName& deviceWithID) {
|
|
DeviceIDParser deviceParser(deviceWithID);
|
|
std::string deviceName = deviceParser.getDeviceName();
|
|
std::map<std::string, std::string> tconfig = mergeConfigs(_config, config);
|
|
|
|
// set device ID if any
|
|
std::string deviceIDLocal = deviceParser.getDeviceID();
|
|
if (!deviceIDLocal.empty()) {
|
|
tconfig[PluginConfigParams::KEY_DEVICE_ID] = deviceIDLocal;
|
|
}
|
|
// passthrough the cache dir to core->loadnetwork when underlying device does not support cache dir
|
|
auto deviceConfig = GetCore()->GetSupportedConfig(deviceName, tconfig);
|
|
if (tconfig.find(CONFIG_KEY(CACHE_DIR)) != tconfig.end() &&
|
|
deviceConfig.find(CONFIG_KEY(CACHE_DIR)) == deviceConfig.end()) {
|
|
auto tmpiter = tconfig.find(CONFIG_KEY(CACHE_DIR));
|
|
if (tmpiter != tconfig.end())
|
|
deviceConfig.insert({tmpiter->first, tmpiter->second});
|
|
}
|
|
return deviceConfig;
|
|
};
|
|
|
|
auto metaDevice = ParseBatchDevice(devicesBatchCfg);
|
|
metaDevice.config = getDeviceConfig(metaDevice.deviceName);
|
|
|
|
auto cfg = config;
|
|
// check that no irrelevant config-keys left
|
|
for (auto k : config) {
|
|
const auto& name = k.first;
|
|
auto found_in_supported_cfg = std::find(supported_configKeys.begin(), supported_configKeys.end(), k.first);
|
|
auto found_in_device_cfg = metaDevice.config.find(k.first);
|
|
if (found_in_device_cfg == metaDevice.config.end() && found_in_supported_cfg == supported_configKeys.end()) {
|
|
IE_THROW() << "Unsupported config key: " << name;
|
|
}
|
|
}
|
|
return metaDevice;
|
|
}
|
|
|
|
RemoteContext::Ptr AutoBatchInferencePlugin::CreateContext(const InferenceEngine::ParamMap& config) {
|
|
auto cfg = config;
|
|
auto it = cfg.find(CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG));
|
|
if (it == cfg.end())
|
|
IE_THROW() << "Value for KEY_AUTO_BATCH_DEVICE_CONFIG is not set";
|
|
|
|
auto val = it->second.as<std::string>();
|
|
auto core = GetCore();
|
|
if (!core)
|
|
return nullptr;
|
|
auto metaDevice = ParseMetaDevice(val, std::map<std::string, std::string>());
|
|
cfg.erase(it);
|
|
return core->CreateContext(metaDevice.deviceName, cfg);
|
|
}
|
|
|
|
Parameter AutoBatchInferencePlugin::GetConfig(const std::string& name,
|
|
const std::map<std::string, Parameter>& options) const {
|
|
if (supported_configKeys.end() != std::find(supported_configKeys.begin(), supported_configKeys.end(), name)) {
|
|
auto it = _config.find(name);
|
|
if (it == _config.end()) {
|
|
IE_THROW() << "Value for " << name << " is not set";
|
|
} else {
|
|
return {it->second};
|
|
}
|
|
} else {
|
|
IE_THROW() << "Unsupported config key: " << name;
|
|
}
|
|
}
|
|
|
|
void AutoBatchInferencePlugin::CheckConfig(const std::map<std::string, std::string>& config) {
|
|
for (auto&& kvp : config) {
|
|
const auto name = kvp.first;
|
|
const auto val = kvp.second;
|
|
if (supported_configKeys.end() == std::find(supported_configKeys.begin(), supported_configKeys.end(), name))
|
|
IE_THROW() << "Unsupported config key: " << name;
|
|
if (name == CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG)) {
|
|
ParseBatchDevice(val);
|
|
} else if (name == CONFIG_KEY(AUTO_BATCH_TIMEOUT)) {
|
|
try {
|
|
auto t = std::stoi(val);
|
|
if (t < 0)
|
|
IE_THROW(ParameterMismatch);
|
|
} catch (const std::exception&) {
|
|
IE_THROW(ParameterMismatch)
|
|
<< " Expecting unsigned int value for " << CONFIG_KEY(AUTO_BATCH_TIMEOUT) << " got " << val;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void AutoBatchInferencePlugin::SetConfig(const std::map<std::string, std::string>& config) {
|
|
CheckConfig(config);
|
|
for (auto&& kvp : config) {
|
|
_config[kvp.first] = kvp.second;
|
|
}
|
|
}
|
|
|
|
static const Version version = {{2, 1}, CI_BUILD_NUMBER, "AutoBatchPlugin"};
|
|
IE_DEFINE_PLUGIN_CREATE_FUNCTION(AutoBatchInferencePlugin, version)
|
|
|
|
AutoBatchInferencePlugin::AutoBatchInferencePlugin() {
|
|
_pluginName = "BATCH";
|
|
_config[CONFIG_KEY(AUTO_BATCH_TIMEOUT)] = "1000"; // default value, in ms
|
|
}
|
|
|
|
InferenceEngine::Parameter AutoBatchInferencePlugin::GetMetric(
|
|
const std::string& name,
|
|
const std::map<std::string, InferenceEngine::Parameter>& options) const {
|
|
if (name == METRIC_KEY(SUPPORTED_METRICS)) {
|
|
std::vector<std::string> metrics;
|
|
metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
|
|
metrics.push_back(METRIC_KEY(FULL_DEVICE_NAME));
|
|
metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
|
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
|
|
} else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
|
|
IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, _pluginName);
|
|
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
|
|
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, supported_configKeys);
|
|
} else {
|
|
IE_THROW(NotFound) << "Unsupported metric key " << name;
|
|
}
|
|
}
|
|
|
|
IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(
|
|
const InferenceEngine::CNNNetwork& network,
|
|
const std::map<std::string, std::string>& config) {
|
|
return LoadNetworkImpl(network, nullptr, config);
|
|
}
|
|
|
|
InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadNetworkImpl(
|
|
const InferenceEngine::CNNNetwork& network,
|
|
const std::shared_ptr<InferenceEngine::RemoteContext> ctx,
|
|
const std::map<std::string, std::string>& config) {
|
|
auto core = GetCore();
|
|
if (core == nullptr) {
|
|
IE_THROW() << "Please, work with Auto-Batching device via InferencEngine::Core object";
|
|
}
|
|
auto fullConfig = mergeConfigs(_config, config);
|
|
auto device_batch = fullConfig.find(CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG));
|
|
if (device_batch == fullConfig.end()) {
|
|
IE_THROW() << "KEY_AUTO_BATCH key is not set for BATCH device";
|
|
}
|
|
auto metaDevice = ParseMetaDevice(device_batch->second, fullConfig);
|
|
const auto& deviceName = metaDevice.deviceName;
|
|
const auto& deviceConfig = metaDevice.config;
|
|
auto deviceConfigNoAutoBatch = deviceConfig;
|
|
// avoid recursive auto-batching
|
|
deviceConfigNoAutoBatch[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(NO);
|
|
|
|
std::set<std::string> batched_inputs;
|
|
std::set<std::string> batched_outputs;
|
|
// check that the auto-batching is applicable in general
|
|
try {
|
|
// if applicable, the Auto-Batching is implicitly enabled via the performance hints
|
|
const auto tput = CONFIG_VALUE(THROUGHPUT);
|
|
const bool bTputInPlg = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>() == tput;
|
|
const auto& mode = deviceConfig.find(CONFIG_KEY(PERFORMANCE_HINT));
|
|
const bool bTputInLoadCfg = (mode != deviceConfig.end() && mode->second == tput);
|
|
// if the auto-batching is enabled implicitly, check the dims carefully, to avoid outstanding failures
|
|
const bool check_dims = (bTputInPlg || bTputInLoadCfg);
|
|
CNNNetwork clonedNetwork(InferenceEngine::details::cloneNetwork(network));
|
|
auto function = clonedNetwork.getFunction();
|
|
// find the batch dim
|
|
ov::pass::Manager m;
|
|
m.register_pass<ngraph::pass::InitNodeInfo>();
|
|
m.register_pass<ov::pass::FindBatch>(false, check_dims);
|
|
m.run_passes(function);
|
|
// do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts
|
|
// input(s) should have the batch dim as the first dim (current limitation of the auto-batching impl)
|
|
const auto& params = function->get_parameters();
|
|
for (size_t input_id = 0; input_id < params.size(); input_id++) {
|
|
const auto& input = params[input_id];
|
|
const auto& shape = input->get_partial_shape();
|
|
// currently no plugin support batched execution for dynamic networks
|
|
if (shape.is_dynamic())
|
|
IE_THROW(NotImplemented) << "Auto-batching does not support dynamic networks!";
|
|
// check the batch dim: either 0th (and the original batch size of 1) or none
|
|
if (shape.size() && ov::DimensionTracker::get_label(shape[0])) {
|
|
const auto& static_shape = input->get_shape();
|
|
if (static_shape[0] != 1)
|
|
IE_THROW(NotImplemented) << "Auto-batching does not reshape/re-batch originally batched networks!";
|
|
batched_inputs.insert(
|
|
ngraph::op::util::get_ie_output_name(params[input_id]->output(0))); // batched dim for the input
|
|
} else {
|
|
// if the 0-th dim is not for the batch, then we support only the case when NONE dimension is batch
|
|
for (size_t s = 1; s < shape.size(); s++)
|
|
if (ov::DimensionTracker::get_label(shape[s]))
|
|
IE_THROW(NotImplemented)
|
|
<< "Auto-batching operates only networks with inputs/outputs batched by 0th dimension";
|
|
}
|
|
}
|
|
const auto& results = function->get_results();
|
|
for (size_t output_id = 0; output_id < results.size(); output_id++) {
|
|
const auto& output = results[output_id];
|
|
const auto& shape = output->get_output_partial_shape(0);
|
|
if (shape.is_dynamic())
|
|
IE_THROW(NotImplemented) << "Auto-batching does not support dynamic networks!";
|
|
// check the batch dim: either 0th (and the original batch size of 1) or none
|
|
if (shape.size() && ov::DimensionTracker::get_label(shape[0])) {
|
|
if (shape[0] != 1)
|
|
IE_THROW(NotImplemented) << "Auto-batching does not reshape/re-batch originally batched networks!";
|
|
const auto& node = output->input_value(0);
|
|
batched_outputs.insert(ngraph::op::util::get_ie_output_name(
|
|
ov::Output<const ov::Node>(node.get_node(), node.get_index())));
|
|
} else {
|
|
// if the 0-th dim is not for the batch, then we support only the case when NONE dimension is batch
|
|
for (size_t s = 1; s < shape.size(); s++)
|
|
if (ov::DimensionTracker::get_label(shape[s]))
|
|
IE_THROW(NotImplemented)
|
|
<< "Auto-batching operates only networks with outputs batched by 0th dimension";
|
|
}
|
|
}
|
|
if (!batched_inputs.size() || !batched_outputs.size())
|
|
IE_THROW(NotImplemented)
|
|
<< "Auto-batching supports only networks with inputs/outputs featuring batched dim!";
|
|
} catch (...) {
|
|
metaDevice.batchForDevice = 1;
|
|
}
|
|
|
|
if (!metaDevice.batchForDevice) {
|
|
unsigned int requests = 0;
|
|
// batch size is not set explicitly via device name e.g. BATCH:GPU(4)
|
|
// let's query the optimal batch size
|
|
std::map<std::string, InferenceEngine::Parameter> options;
|
|
options["MODEL_PTR"] = std::const_pointer_cast<ngraph::Function>(network.getFunction());
|
|
auto optBatchSize = core->GetMetric(deviceName, METRIC_KEY(OPTIMAL_BATCH_SIZE), options).as<unsigned int>();
|
|
auto res = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as<std::string>();
|
|
requests = PerfHintsConfig::CheckPerformanceHintRequestValue(res);
|
|
const auto& reqs = config.find(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS));
|
|
if (reqs != config.end())
|
|
requests = static_cast<unsigned int>(PerfHintsConfig::CheckPerformanceHintRequestValue(reqs->second));
|
|
if (requests)
|
|
optBatchSize = std::max(1u, std::min(requests, optBatchSize));
|
|
if (optBatchSize > 2) // batching is usually in-efficient for batch<4 (as batch1 kernels are heavily optimized)
|
|
metaDevice.batchForDevice = optBatchSize;
|
|
else
|
|
metaDevice.batchForDevice = 1;
|
|
}
|
|
|
|
auto report_footprint = [](std::shared_ptr<ICore> pCore, std::string device) -> size_t {
|
|
size_t footprint = 0;
|
|
// TODO: use the per-network metric (22.2) rather than plugin-level
|
|
auto stats =
|
|
pCore->GetMetric(device, ov::intel_gpu::memory_statistics.name()).as<std::map<std::string, uint64_t>>();
|
|
for (auto s : stats)
|
|
footprint += s.second;
|
|
return footprint;
|
|
};
|
|
|
|
size_t batch1_footprint = 0;
|
|
if (deviceName.find("GPU") != std::string::npos)
|
|
batch1_footprint = report_footprint(core, deviceName);
|
|
auto executableNetworkWithoutBatch = ctx ? core->LoadNetwork(network, ctx, deviceConfigNoAutoBatch)
|
|
: core->LoadNetwork(network, deviceName, deviceConfigNoAutoBatch);
|
|
if (deviceName.find("GPU") != std::string::npos) {
|
|
batch1_footprint = report_footprint(core, deviceName) - batch1_footprint;
|
|
if (batch1_footprint) {
|
|
const auto total_mem =
|
|
GetCore()->GetMetric(deviceName, GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE)).as<uint64_t>();
|
|
const int estimated_batch = static_cast<int>((total_mem - batch1_footprint) / batch1_footprint);
|
|
int closest = static_cast<int>(pow(2, floor(log(estimated_batch) / log(2))));
|
|
closest = std::max(1, closest);
|
|
metaDevice.batchForDevice = std::min(metaDevice.batchForDevice, closest);
|
|
}
|
|
}
|
|
// auto-batch settings
|
|
std::unordered_map<std::string, InferenceEngine::Parameter> networkConfig;
|
|
for (auto c : fullConfig) {
|
|
if (supported_configKeys.end() != std::find(supported_configKeys.begin(), supported_configKeys.end(), c.first))
|
|
networkConfig.insert(c);
|
|
}
|
|
|
|
InferenceEngine::SoExecutableNetworkInternal executableNetworkWithBatch;
|
|
if (metaDevice.batchForDevice > 1 && batched_inputs.size()) {
|
|
try {
|
|
CNNNetwork reshaped(InferenceEngine::details::cloneNetwork(network));
|
|
ICNNNetwork::InputShapes shapes = reshaped.getInputShapes();
|
|
for (const auto& input : batched_inputs)
|
|
shapes[input][0] = metaDevice.batchForDevice;
|
|
reshaped.reshape(shapes);
|
|
executableNetworkWithBatch = ctx ? core->LoadNetwork(reshaped, ctx, deviceConfigNoAutoBatch)
|
|
: core->LoadNetwork(reshaped, deviceName, deviceConfigNoAutoBatch);
|
|
} catch (...) {
|
|
metaDevice.batchForDevice = 1;
|
|
}
|
|
}
|
|
|
|
return std::make_shared<AutoBatchExecutableNetwork>(executableNetworkWithBatch,
|
|
executableNetworkWithoutBatch,
|
|
metaDevice,
|
|
networkConfig,
|
|
batched_inputs,
|
|
batched_outputs);
|
|
}
|
|
|
|
InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(
|
|
const InferenceEngine::CNNNetwork& network,
|
|
const std::shared_ptr<InferenceEngine::RemoteContext>& context,
|
|
const std::map<std::string, std::string>& config) {
|
|
return LoadNetworkImpl(network, context, config);
|
|
}
|
|
|
|
InferenceEngine::QueryNetworkResult AutoBatchInferencePlugin::QueryNetwork(
|
|
const InferenceEngine::CNNNetwork& network,
|
|
const std::map<std::string, std::string>& config) const {
|
|
auto core = GetCore();
|
|
if (!core)
|
|
return InferenceEngine::QueryNetworkResult();
|
|
auto cfg = config;
|
|
for (auto c : cfg) {
|
|
if (c.first == CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG)) {
|
|
auto val = c.second;
|
|
cfg.erase(c.first);
|
|
auto metaDevice = ParseMetaDevice(val, cfg);
|
|
return core->QueryNetwork(network, metaDevice.deviceName, cfg);
|
|
}
|
|
}
|
|
IE_THROW() << "Value for KEY_AUTO_BATCH is not set";
|
|
}
|
|
} // namespace AutoBatchPlugin
|