auto-batching POC squashed (all commits from auto-batch-2021.3 branch)

(cherry picked from commit d7742f2c747bc514a126cc9a4d5b99f0ff5cbbc7)
This commit is contained in:
myshevts 2021-03-25 15:01:30 +03:00
parent f8439eeed8
commit 9357d3fd3d
8 changed files with 689 additions and 10 deletions

View File

@ -666,14 +666,8 @@ int main(int argc, char* argv[]) {
next_step(ss.str());
// warming up - out of scope
auto inferRequest = inferRequestsQueue.getIdleRequest();
if (!inferRequest) {
IE_THROW() << "No idle Infer Requests!";
}
if (FLAGS_api == "sync") {
inferRequest->infer();
} else {
inferRequest->startAsync();
for (size_t i =0 ; i < inferRequestsQueue.requests.size(); i++) {
inferRequestsQueue.getIdleRequest()->startAsync();
}
inferRequestsQueue.waitAll();
auto duration_ms = double_to_string(inferRequestsQueue.getLatencies()[0]);
@ -694,7 +688,7 @@ int main(int argc, char* argv[]) {
while ((niter != 0LL && iteration < niter) ||
(duration_nanoseconds != 0LL && (uint64_t)execTime < duration_nanoseconds) ||
(FLAGS_api == "async" && iteration % nireq != 0)) {
inferRequest = inferRequestsQueue.getIdleRequest();
auto inferRequest = inferRequestsQueue.getIdleRequest();
if (!inferRequest) {
IE_THROW() << "No idle Infer Requests!";
}

View File

@ -32,6 +32,8 @@ add_subdirectory(hetero_plugin)
add_subdirectory(multi_device)
add_subdirectory(auto_batch)
add_subdirectory(transformations)
add_subdirectory(inference_engine)

View File

@ -0,0 +1,25 @@
# Copyright (C) 2018-2020 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
set (TARGET_NAME "AutoBatchPlugin")
if(ENABLE_LTO)
ie_enable_lto()
endif()
file(GLOB SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
)
file(GLOB HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/*.hpp
)
ie_add_plugin(NAME ${TARGET_NAME}
DEVICE_NAME "BATCH"
SOURCES ${SOURCES} ${HEADERS}
VERSION_DEFINES_FOR auto_batch.cpp)
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine inference_engine_legacy)
set_ie_threading_interface_for(${TARGET_NAME})

View File

@ -0,0 +1,470 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include <string>
#include <vector>
#include <iostream>
#include <memory>
#include <utility>
#include <map>
#include <unordered_map>
#include <unordered_set>
#include "ie_metric_helpers.hpp"
#include <cpp_interfaces/base/ie_infer_async_request_base.hpp>
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
#include <legacy/ie_util_internal.hpp>
#include <ie_plugin_config.hpp>
#include "auto_batch.hpp"
namespace AutoBatchPlugin {
using namespace InferenceEngine;
template <Precision::ePrecision precision>
Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob, size_t batch_id, size_t batch_num) {
typedef typename PrecisionTrait<precision>::value_type TYPE;
typedef typename std::add_pointer<TYPE>::type TYPEPTR;
auto ptr = batched_blob->buffer().as<TYPEPTR>();
auto sizePerBatch = batched_blob->size() / batch_num;
auto layout = batched_blob->getTensorDesc().getLayout();
SizeVector dims = batched_blob->getTensorDesc().getDims();
if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW
|| layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC
|| layout == InferenceEngine::Layout::NDHWC) {
dims[0] = 1;
assert(batched_blob->getTensorDesc().getPrecision() == precision);
return make_shared_blob<TYPE>({precision, dims, batched_blob->getTensorDesc().getLayout()},
ptr + sizePerBatch * batch_id, sizePerBatch);
} else {
// same blob for all requests (e.g. constants)
return make_shared_blob<TYPE>({precision, dims, batched_blob->getTensorDesc().getLayout()},
ptr);
}
}
// ------------------------------AutoBatchInferRequest----------------------------
AutoBatchInferRequest::AutoBatchInferRequest(const InputsDataMap& networkInputs,
const OutputsDataMap& networkOutputs,
AutoBatchExecutableNetwork::WorkerInferRequest* workerRequestPtr,
int batch_id, int num_batch,
bool needPerfCounters)
: InferRequestInternal(networkInputs, networkOutputs), _workerInferRequest(workerRequestPtr),
_needPerfCounters(needPerfCounters) {
// Allocate all input blobs
for (const auto &it : networkInputs) {
auto blob = workerRequestPtr->_inferRequest.GetBlob(it.first);
Blob::Ptr res;
switch (it.second->getTensorDesc().getPrecision()) {
case InferenceEngine::Precision::FP32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>
(workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch);
break;
case InferenceEngine::Precision::I32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>
(workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch);
break;
case InferenceEngine::Precision::I8:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>
(workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch);
break;
case InferenceEngine::Precision::U16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>
(workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch);
break;
case InferenceEngine::Precision::I16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>
(workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch);
break;
case InferenceEngine::Precision::U8:
case InferenceEngine::Precision::BOOL:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>
(workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch);
break;
default:
THROW_IE_EXCEPTION << "Unsupported input precision " << it.second->getTensorDesc().getPrecision();
}
_inputs[it.first] = res;
}
// Allocate all output blobs
for (const auto &it : networkOutputs) {
auto blob = workerRequestPtr->_inferRequest.GetBlob(it.first);
Blob::Ptr res;
switch (it.second->getTensorDesc().getPrecision()) {
case InferenceEngine::Precision::FP32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>
(workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch);
break;
case InferenceEngine::Precision::I32:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>
(workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch);
break;
case InferenceEngine::Precision::I8:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>
(workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch);
break;
case InferenceEngine::Precision::U16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>
(workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch);
break;
case InferenceEngine::Precision::I16:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>
(workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch);
break;
case InferenceEngine::Precision::U8:
case InferenceEngine::Precision::BOOL:
res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>
(workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch);
break;
default:
THROW_IE_EXCEPTION << "Unsupported input precision " << it.second->getTensorDesc().getPrecision();
}
_outputs[it.first] = res;
}
}
void AutoBatchInferRequest::SetBlobsToAnotherRequest(InferRequest& req) {
// todo call Set for REMOTE BLOB
}
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> AutoBatchInferRequest::GetPerformanceCounts() const {
return _perfMap;
}
void AutoBatchInferRequest::InferImpl() {
auto _event = _workerInferRequest->_event;
auto numReady = ++_workerInferRequest->_numRequestsReady;
if (numReady == _workerInferRequest->_batchSize) {
_workerInferRequest->_numRequestsReady = 0;
_workerInferRequest->_inferRequest.StartAsync();
}
_event.get();
if (_needPerfCounters) {
_perfMap = _workerInferRequest->_inferRequest.GetPerformanceCounts();
}
}
AutoBatchAsyncInferRequest::AutoBatchAsyncInferRequest(
const AutoBatchInferRequest::Ptr& inferRequest,
const bool needPerfCounters,
const AutoBatchExecutableNetwork::Ptr& autoBatchExecutableNetwork,
const ITaskExecutor::Ptr& callbackExecutor) :
AsyncInferRequestThreadSafeDefault(inferRequest,
std::make_shared<CPUStreamsExecutor>(
IStreamsExecutor::Config{"AutoBatch", 1, 1,
IStreamsExecutor::ThreadBindingType::NONE, 1, 0, 1}),
callbackExecutor),
_AutoBatchExecutableNetwork{autoBatchExecutableNetwork},
_inferRequest{inferRequest} {
}
void AutoBatchAsyncInferRequest::Infer_ThreadUnsafe() {
InferUsingAsync();
}
AutoBatchAsyncInferRequest::~AutoBatchAsyncInferRequest() {
StopAndWait();
}
// ------------------------------AutoBatchExecutableNetwork----------------------------
AutoBatchExecutableNetwork::AutoBatchExecutableNetwork(const InferenceEngine::ExecutableNetwork& networkForDevice,
const DeviceInformation& networkDevice,
const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
const bool needPerfCounters) :
InferenceEngine::ExecutableNetworkThreadSafeDefault(
nullptr,
std::make_shared<InferenceEngine::ImmediateExecutor>()),
_device{networkDevice},
_network{networkForDevice},
_config{config},
_needPerfCounters{needPerfCounters} {
}
AutoBatchExecutableNetwork::~AutoBatchExecutableNetwork() {
// {
// std::lock_guard<std::mutex> lock(_mutex);
// _device = {};
// }
_terminate = true;
/* NOTE: The only threads that use `AutoBatchExecutableNetwork` Context are those that are used by Worker infer requests.
* But AsyncInferRequest destructor should waits for all asynchronous tasks that are used by the request
*/
_workerRequests.clear();
}
InferenceEngine::InferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs) {
// todo : guard request creation from another thread/on-the-fly
auto num = _numRequestsCreated++;
auto batch_id = num % _device.batchForDevice;
if (!batch_id) { //need new request
_workerRequests.push_back(std::make_shared<WorkerInferRequest>());
auto workerRequestPtr = _workerRequests.back();
workerRequestPtr->_inferRequest = _network.CreateInferRequest();
workerRequestPtr->_batchSize = _device.batchForDevice;
workerRequestPtr->_cond = std::promise<void>();
workerRequestPtr->_event = workerRequestPtr->_cond.get_future().share();
// _idleWorkerRequests.push(workerRequestPtr);
workerRequestPtr->_inferRequest.SetCompletionCallback<std::function<void(InferRequest, StatusCode)>>(
[workerRequestPtr, this] (InferRequest , StatusCode status) mutable {
workerRequestPtr->_status = status;
auto signal = std::move(workerRequestPtr->_cond);
// reset the promise/future for next use
workerRequestPtr->_cond = std::promise<void>();
workerRequestPtr->_event = workerRequestPtr->_cond.get_future().share();
signal.set_value();
});
}
return std::make_shared<AutoBatchInferRequest>(networkInputs, networkOutputs, _workerRequests.back().get(),
batch_id, _device.batchForDevice, _needPerfCounters);
}
InferenceEngine::IInferRequest::Ptr AutoBatchExecutableNetwork::CreateInferRequest() {
auto syncRequestImpl = CreateInferRequestImpl(_networkInputs, _networkOutputs);
syncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this());
auto asyncTreadSafeImpl = std::make_shared<AutoBatchAsyncInferRequest>(std::static_pointer_cast<AutoBatchInferRequest>(syncRequestImpl),
_needPerfCounters,
std::static_pointer_cast<AutoBatchExecutableNetwork>(shared_from_this()),
_callbackExecutor);
IInferRequest::Ptr asyncRequest;
asyncRequest.reset(new InferRequestBase(asyncTreadSafeImpl), [](IInferRequest* p) { p->Release(); });
asyncTreadSafeImpl->SetPointerToPublicInterface(asyncRequest);
return asyncRequest;
}
void AutoBatchExecutableNetwork::SetConfig(const std::map<std::string, InferenceEngine::Parameter> &config) {
// TODO
THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
}
InferenceEngine::Parameter AutoBatchExecutableNetwork::GetConfig(const std::string &name) const {
auto res = _config.find(name);
if (res != _config.end()) {
return res->second;
} else {
THROW_IE_EXCEPTION << NOT_FOUND_str << name <<" not found in the ExecutableNetwork config";
}
}
InferenceEngine::Parameter AutoBatchExecutableNetwork::GetMetric(const std::string &name) const {
if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
unsigned int res = 0u;
try {
res = _network.GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
} catch (const details::InferenceEngineException &iie) {
THROW_IE_EXCEPTION
<< "Every device used with the Auto-Batching should "
<< "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. "
<< "Failed to query the metric for the "
<< _network.GetMetric(METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>()
<< " with error:" << iie.what();
}
IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, res * _device.batchForDevice);
} else if (name == METRIC_KEY(NETWORK_NAME)) {
IE_SET_METRIC_RETURN(NETWORK_NAME, _network.GetMetric(
METRIC_KEY(NETWORK_NAME)).as<std::string>());
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, {
METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS),
METRIC_KEY(SUPPORTED_METRICS),
METRIC_KEY(NETWORK_NAME),
METRIC_KEY(SUPPORTED_CONFIG_KEYS)
});
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
std::vector<std::string> configKeys = { CONFIG_KEY(AUTO_BATCH) };
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
} else {
THROW_IE_EXCEPTION << "Unsupported Network metric: " << name;
}
}
// ------------------------------AutoBatchInferencePlugin----------------------------
namespace {
std::map<std::string, std::string> mergeConfigs(std::map<std::string, std::string> config,
const std::map<std::string, std::string> & local) {
for (auto && kvp : local) {
config[kvp.first] = kvp.second;
}
return config;
}
} // namespace
std::map<std::string, std::string> AutoBatchInferencePlugin::GetSupportedConfig(
const std::map<std::string, std::string> & config, const std::string & deviceName) const {
std::vector<std::string> supportedConfigKeys = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
std::map<std::string, std::string> supportedConfig;
for (auto&& key : supportedConfigKeys) {
auto itKey = config.find(key);
if (config.end() != itKey) {
supportedConfig[key] = itKey->second;
}
}
return supportedConfig;
}
DeviceInformation AutoBatchInferencePlugin::ParseMetaDevice(const std::string& devicesBatchCfg,
const std::map<std::string, std::string> & config) const {
DeviceInformation metaDevice;
auto getDeviceConfig = [&] (const DeviceName & deviceWithID) {
DeviceIDParser deviceParser(deviceWithID);
std::string deviceName = deviceParser.getDeviceName();
std::map<std::string, std::string> tconfig = mergeConfigs(_config, config);
// set device ID if any
std::string deviceIDLocal = deviceParser.getDeviceID();
if (!deviceIDLocal.empty()) {
tconfig[PluginConfigParams::KEY_DEVICE_ID] = deviceIDLocal;
}
return GetSupportedConfig(tconfig, deviceName);
};
auto && d = devicesBatchCfg;
{
auto openingBracket = d.find_first_of('(');
auto closingBracket = d.find_first_of(')', openingBracket);
auto deviceName = d.substr(0, openingBracket);
int batch = -1;
if (closingBracket != std::string::npos && openingBracket < closingBracket) {
batch = std::stol(d.substr(openingBracket + 1, closingBracket - 1));
if (batch <= 0) {
THROW_IE_EXCEPTION << "Batch value for '" << deviceName << "' must be > 0, while " << batch
<< "is passed";
}
}
// create meta device
auto cfg = getDeviceConfig(deviceName);
std::vector<std::string> supportedConfigKeys = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
if (std::find(std::begin(supportedConfigKeys), std::end(supportedConfigKeys), CONFIG_KEY_INTERNAL(AGGREGATED_PLUGIN))
!= std::end(supportedConfigKeys)) {
cfg.emplace(CONFIG_KEY_INTERNAL(AGGREGATED_PLUGIN), "");
}
metaDevice = { deviceName, cfg, batch };
}
return metaDevice;
}
Parameter AutoBatchInferencePlugin::GetConfig(const std::string& name,
const std::map<std::string, Parameter> & options) const {
if (name == CONFIG_KEY(AUTO_BATCH)) {
auto it = _config.find(CONFIG_KEY(AUTO_BATCH));
if (it == _config.end()) {
THROW_IE_EXCEPTION << "Value for KEY_AUTO_BATCH is not set";
} else {
return { it->second };
}
} else {
THROW_IE_EXCEPTION << "Unsupported config key: " << name;
}
}
void AutoBatchInferencePlugin::SetConfig(const std::map<std::string, std::string> & config) {
for (auto && kvp : config) {
_config[kvp.first] = kvp.second;
}
}
static const Version version = {{2, 1}, CI_BUILD_NUMBER, "AutoBatchPlugin"};
IE_DEFINE_PLUGIN_CREATE_FUNCTION(AutoBatchInferencePlugin, version)
AutoBatchInferencePlugin::AutoBatchInferencePlugin() {
_pluginName = "BATCH";
}
InferenceEngine::Parameter AutoBatchInferencePlugin::GetMetric(const std::string& name,
const std::map<std::string, InferenceEngine::Parameter> & options) const {
if (name == METRIC_KEY(SUPPORTED_METRICS)) {
std::vector<std::string> metrics;
metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
metrics.push_back(METRIC_KEY(FULL_DEVICE_NAME));
metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
} else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
std::string name = { "BATCH" };
IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, name);
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
std::vector<std::string> configKeys = {
CONFIG_KEY_INTERNAL(AGGREGATED_PLUGIN)};
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
} else {
THROW_IE_EXCEPTION << "Unsupported metric key " << name;
}
}
ExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork&network,
const std::map<std::string, std::string>& config) {
if (GetCore() == nullptr) {
THROW_IE_EXCEPTION << "Please, work with MULTI device via InferencEngine::Core object";
}
auto fullConfig = mergeConfigs(_config, config);
auto device_batch = fullConfig.find(CONFIG_KEY(AUTO_BATCH));
if (device_batch == fullConfig.end()) {
THROW_IE_EXCEPTION << "KEY_AUTO_BATCH key is not set for BATCH device";
}
auto metaDevice = ParseMetaDevice(device_batch->second, fullConfig);
// collect the settings that are applicable to the devices we are loading the network to
std::unordered_map<std::string, InferenceEngine::Parameter> networkConfig;
networkConfig.insert(*device_batch);
ExecutableNetwork executableNetworkForDevice;
auto & deviceName = metaDevice.deviceName;
auto & deviceConfig = metaDevice.config;
// network.serialize("out_orig.xml", "out_orig.bin");
CNNNetwork clonedNetwork(InferenceEngine::cloneNetwork(network));
const InputsDataMap inputInfo = clonedNetwork.getInputsInfo();
ICNNNetwork::InputShapes shapes = clonedNetwork.getInputShapes();
for (const InputsDataMap::value_type &item : inputInfo) {
auto layout = item.second->getTensorDesc().getLayout();
if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW
|| layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC
|| layout == InferenceEngine::Layout::NDHWC) {
shapes[item.first][0] = metaDevice.batchForDevice;
std::cout << " reshaping the input " << item.first << " (layout " << layout << ")" << " by the batch" << std::endl;
}
}
std::cout << "Reshaped network by batch to " << metaDevice.batchForDevice << std::endl;
clonedNetwork.reshape(shapes);
// clonedNetwork.serialize("out_batch4.xml", "out_batch4.bin");
std::map<std::string, std::string> deviceConfig0 = deviceConfig;
// deviceConfig0["DO_NOT_AUTO_BATCH"] = "TRUE";
executableNetworkForDevice = GetCore()->LoadNetwork(CNNNetwork{clonedNetwork}, deviceName, deviceConfig0);
networkConfig.insert(deviceConfig.begin(), deviceConfig.end());
if ((std::shared_ptr<InferenceEngine::IExecutableNetwork>)executableNetworkForDevice == nullptr)
THROW_IE_EXCEPTION << NOT_FOUND_str << "Failed to load Executable network the device "
<< "that the BATCH device is initialized to work with";
auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT);
bool enablePerfCounters = (fullConfig.end() != perfConfig) && (perfConfig->second == PluginConfigParams::YES);
return std::make_shared<AutoBatchExecutableNetwork>(executableNetworkForDevice,
metaDevice,
networkConfig,
enablePerfCounters);
}
InferenceEngine::QueryNetworkResult AutoBatchInferencePlugin::QueryNetwork(const InferenceEngine::CNNNetwork& network,
const std::map<std::string, std::string>& config) const {
// THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
const std::map<std::string, std::string> cfg;
return GetCore()->QueryNetwork(network, "CPU", cfg);
}
} // namespace AutoBatchPlugin

View File

@ -0,0 +1,176 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <atomic>
#include <mutex>
#include <queue>
#include <unordered_map>
#include <map>
#include <vector>
#include <utility>
#include <memory>
#include <string>
#include <cpp_interfaces/impl/ie_plugin_internal.hpp>
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
#include <cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp>
#include "ie_iinfer_request.hpp"
#include "details/ie_exception_conversion.hpp"
#include <ie_parallel.hpp>
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
# include <tbb/concurrent_queue.h>
#endif
namespace AutoBatchPlugin {
using DeviceName = std::string;
struct DeviceInformation {
DeviceName deviceName;
std::map<std::string, std::string> config;
int batchForDevice;
};
#if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
template <typename T>
using ThreadSafeQueue = tbb::concurrent_queue<T>;
#else
template <typename T>
class ThreadSafeQueue {
public:
void push(T value) {
std::lock_guard<std::mutex> lock(_mutex);
_queue.push(std::move(value));
}
bool try_pop(T& value) {
std::lock_guard<std::mutex> lock(_mutex);
if (!_queue.empty()) {
value = std::move(_queue.front());
_queue.pop();
return true;
} else {
return false;
}
}
bool empty() {
std::lock_guard<std::mutex> lock(_mutex);
return _queue.empty();
}
protected:
std::queue<T> _queue;
std::mutex _mutex;
};
#endif
class AutoBatchAsyncInferRequest;
class AutoBatchExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
public:
using Ptr = std::shared_ptr<AutoBatchExecutableNetwork>;
struct WorkerInferRequest {
using Ptr = std::shared_ptr<WorkerInferRequest>;
InferenceEngine::InferRequest _inferRequest;
InferenceEngine::StatusCode _status = InferenceEngine::StatusCode::OK;
int _batchSize;
std::promise<void> _cond;
std::shared_future<void> _event;
std::atomic_int _numRequestsReady = {0};
void ReportArrival() {
_numRequestsReady++;
if (_numRequestsReady == _batchSize) {
_numRequestsReady = 0;
_inferRequest.StartAsync();
}
// workerRequestPtr->_cond.
}
};
using NotBusyWorkerRequests = ThreadSafeQueue<WorkerInferRequest*>;
explicit AutoBatchExecutableNetwork(const InferenceEngine::ExecutableNetwork& networkForDevice,
const DeviceInformation& networkDevices,
const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
const bool needPerfCounters = false);
void SetConfig(const std::map<std::string, InferenceEngine::Parameter> &config) override;
InferenceEngine::Parameter GetConfig(const std::string &name) const override;
InferenceEngine::Parameter GetMetric(const std::string &name) const override;
InferenceEngine::IInferRequest::Ptr CreateInferRequest() override;
InferenceEngine::InferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs) override;
~AutoBatchExecutableNetwork() override;
std::atomic_bool _terminate = {false};
DeviceInformation _device;
InferenceEngine::ExecutableNetwork _network;
std::vector<WorkerInferRequest::Ptr> _workerRequests;
std::unordered_map<std::string, InferenceEngine::Parameter> _config;
bool _needPerfCounters = false;
std::atomic_size_t _numRequestsCreated = {0};
};
class AutoBatchInferRequest : public InferenceEngine::InferRequestInternal {
public:
using Ptr = std::shared_ptr<AutoBatchInferRequest>;
explicit AutoBatchInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
const InferenceEngine::OutputsDataMap& networkOutputs,
AutoBatchExecutableNetwork::WorkerInferRequest* workerRequestPtr,
int batch_id, int num_batch, bool _needPerfCounters = false);
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
void InferImpl() override;
// Batch-Device impl specific: sets the data (blobs from the device request to the batched device request)
void SetBlobsToAnotherRequest(InferenceEngine::InferRequest& req);
AutoBatchExecutableNetwork::WorkerInferRequest* _workerInferRequest;
protected:
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> _perfMap;
bool _needPerfCounters = false;
};
class AutoBatchAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
public:
using Ptr = std::shared_ptr<AutoBatchAsyncInferRequest>;
explicit AutoBatchAsyncInferRequest(const AutoBatchInferRequest::Ptr& inferRequest,
const bool needPerfCounters,
const AutoBatchExecutableNetwork::Ptr& AutoBatchExecutableNetwork,
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor);
void Infer_ThreadUnsafe() override;
~AutoBatchAsyncInferRequest() override;
protected:
AutoBatchExecutableNetwork::Ptr _AutoBatchExecutableNetwork;
AutoBatchInferRequest::Ptr _inferRequest;
};
class AutoBatchInferencePlugin : public InferenceEngine::InferencePluginInternal {
public:
AutoBatchInferencePlugin();
~AutoBatchInferencePlugin() override = default;
InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::CNNNetwork& network,
const std::map<std::string, std::string>& config) override;
void SetConfig(const std::map<std::string, std::string>& config) override;
InferenceEngine::Parameter GetConfig(const std::string& name,
const std::map<std::string, InferenceEngine::Parameter> & options) const override;
InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network,
const std::map<std::string, std::string>& config) const override;
InferenceEngine::Parameter GetMetric(const std::string& name,
const std::map<std::string, InferenceEngine::Parameter>& options) const override;
DeviceInformation ParseMetaDevice(const std::string & devicesBatchCfg,
const std::map<std::string, std::string> & config) const;
protected:
std::map<std::string, std::string> GetSupportedConfig(const std::map<std::string, std::string>& config,
const DeviceName & deviceName) const;
};
} // namespace AutoBatchPlugin

View File

@ -677,6 +677,12 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
<< static_cast<int>(device_info.gfx_ver.revision);
}
IE_SET_METRIC_RETURN(GPU_UARCH_VERSION, s.str());
} else if (name == METRIC_KEY(OPTIMAL_BATCH)) {
auto network = options.find("MODEL_ADDRESS")->second.as<InferenceEngine::CNNNetwork const*>();
// auto transformedNetwork = CloneAndTransformNetwork(*network, _impl->m_config);
unsigned int batch = 8;
std::cout << "SELECTED BATCH: " << batch << std::endl;
IE_SET_METRIC_RETURN(OPTIMAL_BATCH, batch);
} else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
auto deviceName = StringRightTrim(device_info.dev_name, "NEO", false);
deviceName += std::string(" (") + (device_info.dev_type == cldnn::device_type::discrete_gpu ? "dGPU" : "iGPU") + ")";

View File

@ -118,6 +118,7 @@ DECLARE_METRIC_VALUE(BATCHED_BLOB);
* String value for metric name is "RANGE_FOR_STREAMS".
*/
DECLARE_METRIC_KEY(RANGE_FOR_STREAMS, std::tuple<unsigned int, unsigned int>);
DECLARE_METRIC_KEY(OPTIMAL_BATCH, unsigned int);
/**
* @brief Metric to provide a hint for a range for number of async infer requests. If device supports streams,
@ -250,6 +251,11 @@ DECLARE_CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS);
DECLARE_CONFIG_VALUE(YES);
DECLARE_CONFIG_VALUE(NO);
/**
* @brief Auto-batching to the `#batch`.
*/
DECLARE_CONFIG_KEY(AUTO_BATCH);
/**
* @brief Limit `#threads` that are used by Inference Engine for inference on the CPU.
*/
@ -312,7 +318,6 @@ DECLARE_CONFIG_KEY(PERF_COUNT);
* >0 - Direct value of limit. Batch size to process is min(new batch_limit, original_batch)
*/
DECLARE_CONFIG_KEY(DYN_BATCH_LIMIT);
/**
* @brief The key checks whether dynamic batch is enabled.
*/

View File

@ -169,6 +169,7 @@ public:
static std::vector<std::string> getHeteroDevices(std::string fallbackDevice);
static std::vector<std::string> getMultiDevices(std::string devicesList);
static std::string getBatchDevice(std::string devicesList);
};
} // namespace InferenceEngine