From 9357d3fd3dd4f89ad4dc86ec909836acd77451c6 Mon Sep 17 00:00:00 2001 From: myshevts Date: Thu, 25 Mar 2021 15:01:30 +0300 Subject: [PATCH] auto-batching POC squashed (all commits from auto-batch-2021.3 branch) (cherry picked from commit d7742f2c747bc514a126cc9a4d5b99f0ff5cbbc7) --- .../samples/benchmark_app/main.cpp | 12 +- inference-engine/src/CMakeLists.txt | 2 + .../src/auto_batch/CMakeLists.txt | 25 + .../src/auto_batch/auto_batch.cpp | 470 ++++++++++++++++++ .../src/auto_batch/auto_batch.hpp | 176 +++++++ .../src/cldnn_engine/cldnn_engine.cpp | 6 + .../include/ie/ie_plugin_config.hpp | 7 +- inference-engine/src/plugin_api/ie_icore.hpp | 1 + 8 files changed, 689 insertions(+), 10 deletions(-) create mode 100644 inference-engine/src/auto_batch/CMakeLists.txt create mode 100644 inference-engine/src/auto_batch/auto_batch.cpp create mode 100644 inference-engine/src/auto_batch/auto_batch.hpp diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp index 33798d5c68f..c4a30023c70 100644 --- a/inference-engine/samples/benchmark_app/main.cpp +++ b/inference-engine/samples/benchmark_app/main.cpp @@ -666,14 +666,8 @@ int main(int argc, char* argv[]) { next_step(ss.str()); // warming up - out of scope - auto inferRequest = inferRequestsQueue.getIdleRequest(); - if (!inferRequest) { - IE_THROW() << "No idle Infer Requests!"; - } - if (FLAGS_api == "sync") { - inferRequest->infer(); - } else { - inferRequest->startAsync(); + for (size_t i =0 ; i < inferRequestsQueue.requests.size(); i++) { + inferRequestsQueue.getIdleRequest()->startAsync(); } inferRequestsQueue.waitAll(); auto duration_ms = double_to_string(inferRequestsQueue.getLatencies()[0]); @@ -694,7 +688,7 @@ int main(int argc, char* argv[]) { while ((niter != 0LL && iteration < niter) || (duration_nanoseconds != 0LL && (uint64_t)execTime < duration_nanoseconds) || (FLAGS_api == "async" && iteration % nireq != 0)) { - inferRequest = inferRequestsQueue.getIdleRequest(); + auto inferRequest = inferRequestsQueue.getIdleRequest(); if (!inferRequest) { IE_THROW() << "No idle Infer Requests!"; } diff --git a/inference-engine/src/CMakeLists.txt b/inference-engine/src/CMakeLists.txt index 8b198bfbf28..138aab5735b 100644 --- a/inference-engine/src/CMakeLists.txt +++ b/inference-engine/src/CMakeLists.txt @@ -32,6 +32,8 @@ add_subdirectory(hetero_plugin) add_subdirectory(multi_device) +add_subdirectory(auto_batch) + add_subdirectory(transformations) add_subdirectory(inference_engine) diff --git a/inference-engine/src/auto_batch/CMakeLists.txt b/inference-engine/src/auto_batch/CMakeLists.txt new file mode 100644 index 00000000000..f083593fbbd --- /dev/null +++ b/inference-engine/src/auto_batch/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (C) 2018-2020 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +set (TARGET_NAME "AutoBatchPlugin") + +if(ENABLE_LTO) + ie_enable_lto() +endif() + +file(GLOB SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp +) + +file(GLOB HEADERS + ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp +) + +ie_add_plugin(NAME ${TARGET_NAME} + DEVICE_NAME "BATCH" + SOURCES ${SOURCES} ${HEADERS} + VERSION_DEFINES_FOR auto_batch.cpp) + +target_link_libraries(${TARGET_NAME} PRIVATE inference_engine inference_engine_legacy) +set_ie_threading_interface_for(${TARGET_NAME}) diff --git a/inference-engine/src/auto_batch/auto_batch.cpp b/inference-engine/src/auto_batch/auto_batch.cpp new file mode 100644 index 00000000000..e28f4a61027 --- /dev/null +++ b/inference-engine/src/auto_batch/auto_batch.cpp @@ -0,0 +1,470 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ie_metric_helpers.hpp" +#include +#include +#include +#include +#include "auto_batch.hpp" + +namespace AutoBatchPlugin { + using namespace InferenceEngine; + + template + Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob, size_t batch_id, size_t batch_num) { + typedef typename PrecisionTrait::value_type TYPE; + typedef typename std::add_pointer::type TYPEPTR; + auto ptr = batched_blob->buffer().as(); + auto sizePerBatch = batched_blob->size() / batch_num; + auto layout = batched_blob->getTensorDesc().getLayout(); + SizeVector dims = batched_blob->getTensorDesc().getDims(); + + if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW + || layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC + || layout == InferenceEngine::Layout::NDHWC) { + dims[0] = 1; + assert(batched_blob->getTensorDesc().getPrecision() == precision); + return make_shared_blob({precision, dims, batched_blob->getTensorDesc().getLayout()}, + ptr + sizePerBatch * batch_id, sizePerBatch); + } else { + // same blob for all requests (e.g. constants) + return make_shared_blob({precision, dims, batched_blob->getTensorDesc().getLayout()}, + ptr); + } + } + +// ------------------------------AutoBatchInferRequest---------------------------- +AutoBatchInferRequest::AutoBatchInferRequest(const InputsDataMap& networkInputs, + const OutputsDataMap& networkOutputs, + AutoBatchExecutableNetwork::WorkerInferRequest* workerRequestPtr, + int batch_id, int num_batch, + bool needPerfCounters) + : InferRequestInternal(networkInputs, networkOutputs), _workerInferRequest(workerRequestPtr), + _needPerfCounters(needPerfCounters) { + // Allocate all input blobs + for (const auto &it : networkInputs) { + auto blob = workerRequestPtr->_inferRequest.GetBlob(it.first); + Blob::Ptr res; + switch (it.second->getTensorDesc().getPrecision()) { + case InferenceEngine::Precision::FP32: + res = create_shared_blob_on_top_of_batched_blob + (workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch); + break; + case InferenceEngine::Precision::I32: + res = create_shared_blob_on_top_of_batched_blob + (workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch); + break; + case InferenceEngine::Precision::I8: + res = create_shared_blob_on_top_of_batched_blob + (workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch); + break; + case InferenceEngine::Precision::U16: + res = create_shared_blob_on_top_of_batched_blob + (workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch); + break; + + case InferenceEngine::Precision::I16: + res = create_shared_blob_on_top_of_batched_blob + (workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch); + + break; + case InferenceEngine::Precision::U8: + case InferenceEngine::Precision::BOOL: + res = create_shared_blob_on_top_of_batched_blob + (workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch); + break; + default: + THROW_IE_EXCEPTION << "Unsupported input precision " << it.second->getTensorDesc().getPrecision(); + } + _inputs[it.first] = res; + } + // Allocate all output blobs + for (const auto &it : networkOutputs) { + auto blob = workerRequestPtr->_inferRequest.GetBlob(it.first); + Blob::Ptr res; + switch (it.second->getTensorDesc().getPrecision()) { + case InferenceEngine::Precision::FP32: + res = create_shared_blob_on_top_of_batched_blob + (workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch); + break; + case InferenceEngine::Precision::I32: + res = create_shared_blob_on_top_of_batched_blob + (workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch); + break; + case InferenceEngine::Precision::I8: + res = create_shared_blob_on_top_of_batched_blob + (workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch); + break; + case InferenceEngine::Precision::U16: + res = create_shared_blob_on_top_of_batched_blob + (workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch); + break; + + case InferenceEngine::Precision::I16: + res = create_shared_blob_on_top_of_batched_blob + (workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch); + + break; + case InferenceEngine::Precision::U8: + case InferenceEngine::Precision::BOOL: + res = create_shared_blob_on_top_of_batched_blob + (workerRequestPtr->_inferRequest.GetBlob(it.first), batch_id, num_batch); + break; + default: + THROW_IE_EXCEPTION << "Unsupported input precision " << it.second->getTensorDesc().getPrecision(); + } + _outputs[it.first] = res; + } +} + +void AutoBatchInferRequest::SetBlobsToAnotherRequest(InferRequest& req) { + // todo call Set for REMOTE BLOB +} + +std::map AutoBatchInferRequest::GetPerformanceCounts() const { + return _perfMap; +} + +void AutoBatchInferRequest::InferImpl() { + auto _event = _workerInferRequest->_event; + auto numReady = ++_workerInferRequest->_numRequestsReady; + if (numReady == _workerInferRequest->_batchSize) { + _workerInferRequest->_numRequestsReady = 0; + _workerInferRequest->_inferRequest.StartAsync(); + } + _event.get(); + if (_needPerfCounters) { + _perfMap = _workerInferRequest->_inferRequest.GetPerformanceCounts(); + } +} + +AutoBatchAsyncInferRequest::AutoBatchAsyncInferRequest( + const AutoBatchInferRequest::Ptr& inferRequest, + const bool needPerfCounters, + const AutoBatchExecutableNetwork::Ptr& autoBatchExecutableNetwork, + const ITaskExecutor::Ptr& callbackExecutor) : + AsyncInferRequestThreadSafeDefault(inferRequest, + std::make_shared( + IStreamsExecutor::Config{"AutoBatch", 1, 1, + IStreamsExecutor::ThreadBindingType::NONE, 1, 0, 1}), + callbackExecutor), + _AutoBatchExecutableNetwork{autoBatchExecutableNetwork}, + _inferRequest{inferRequest} { + } + +void AutoBatchAsyncInferRequest::Infer_ThreadUnsafe() { + InferUsingAsync(); +} + +AutoBatchAsyncInferRequest::~AutoBatchAsyncInferRequest() { + StopAndWait(); +} + +// ------------------------------AutoBatchExecutableNetwork---------------------------- +AutoBatchExecutableNetwork::AutoBatchExecutableNetwork(const InferenceEngine::ExecutableNetwork& networkForDevice, + const DeviceInformation& networkDevice, + const std::unordered_map& config, + const bool needPerfCounters) : + InferenceEngine::ExecutableNetworkThreadSafeDefault( + nullptr, + std::make_shared()), + _device{networkDevice}, + _network{networkForDevice}, + _config{config}, + _needPerfCounters{needPerfCounters} { +} + +AutoBatchExecutableNetwork::~AutoBatchExecutableNetwork() { +// { +// std::lock_guard lock(_mutex); +// _device = {}; +// } + _terminate = true; + /* NOTE: The only threads that use `AutoBatchExecutableNetwork` Context are those that are used by Worker infer requests. + * But AsyncInferRequest destructor should waits for all asynchronous tasks that are used by the request + */ + _workerRequests.clear(); +} + +InferenceEngine::InferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, + InferenceEngine::OutputsDataMap networkOutputs) { + // todo : guard request creation from another thread/on-the-fly + auto num = _numRequestsCreated++; + auto batch_id = num % _device.batchForDevice; + if (!batch_id) { //need new request + _workerRequests.push_back(std::make_shared()); + auto workerRequestPtr = _workerRequests.back(); + workerRequestPtr->_inferRequest = _network.CreateInferRequest(); + workerRequestPtr->_batchSize = _device.batchForDevice; + workerRequestPtr->_cond = std::promise(); + workerRequestPtr->_event = workerRequestPtr->_cond.get_future().share(); + // _idleWorkerRequests.push(workerRequestPtr); + workerRequestPtr->_inferRequest.SetCompletionCallback>( + [workerRequestPtr, this] (InferRequest , StatusCode status) mutable { + workerRequestPtr->_status = status; + auto signal = std::move(workerRequestPtr->_cond); + // reset the promise/future for next use + workerRequestPtr->_cond = std::promise(); + workerRequestPtr->_event = workerRequestPtr->_cond.get_future().share(); + signal.set_value(); + }); + } + return std::make_shared(networkInputs, networkOutputs, _workerRequests.back().get(), + batch_id, _device.batchForDevice, _needPerfCounters); +} + +InferenceEngine::IInferRequest::Ptr AutoBatchExecutableNetwork::CreateInferRequest() { + auto syncRequestImpl = CreateInferRequestImpl(_networkInputs, _networkOutputs); + syncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this()); + auto asyncTreadSafeImpl = std::make_shared(std::static_pointer_cast(syncRequestImpl), + _needPerfCounters, + std::static_pointer_cast(shared_from_this()), + _callbackExecutor); + IInferRequest::Ptr asyncRequest; + asyncRequest.reset(new InferRequestBase(asyncTreadSafeImpl), [](IInferRequest* p) { p->Release(); }); + asyncTreadSafeImpl->SetPointerToPublicInterface(asyncRequest); + return asyncRequest; +} + +void AutoBatchExecutableNetwork::SetConfig(const std::map &config) { + // TODO + THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str; +} + +InferenceEngine::Parameter AutoBatchExecutableNetwork::GetConfig(const std::string &name) const { + auto res = _config.find(name); + if (res != _config.end()) { + return res->second; + } else { + THROW_IE_EXCEPTION << NOT_FOUND_str << name <<" not found in the ExecutableNetwork config"; + } +} + +InferenceEngine::Parameter AutoBatchExecutableNetwork::GetMetric(const std::string &name) const { + if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) { + unsigned int res = 0u; + try { + res = _network.GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as(); + } catch (const details::InferenceEngineException &iie) { + THROW_IE_EXCEPTION + << "Every device used with the Auto-Batching should " + << "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. " + << "Failed to query the metric for the " + << _network.GetMetric(METRIC_KEY(FULL_DEVICE_NAME)).as() + << " with error:" << iie.what(); + } + IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, res * _device.batchForDevice); + } else if (name == METRIC_KEY(NETWORK_NAME)) { + IE_SET_METRIC_RETURN(NETWORK_NAME, _network.GetMetric( + METRIC_KEY(NETWORK_NAME)).as()); + } else if (name == METRIC_KEY(SUPPORTED_METRICS)) { + IE_SET_METRIC_RETURN(SUPPORTED_METRICS, { + METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS), + METRIC_KEY(SUPPORTED_METRICS), + METRIC_KEY(NETWORK_NAME), + METRIC_KEY(SUPPORTED_CONFIG_KEYS) + }); + } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) { + std::vector configKeys = { CONFIG_KEY(AUTO_BATCH) }; + IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys); + } else { + THROW_IE_EXCEPTION << "Unsupported Network metric: " << name; + } +} + +// ------------------------------AutoBatchInferencePlugin---------------------------- + +namespace { + +std::map mergeConfigs(std::map config, + const std::map & local) { + for (auto && kvp : local) { + config[kvp.first] = kvp.second; + } + return config; +} + +} // namespace + +std::map AutoBatchInferencePlugin::GetSupportedConfig( + const std::map & config, const std::string & deviceName) const { + std::vector supportedConfigKeys = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS)); + std::map supportedConfig; + for (auto&& key : supportedConfigKeys) { + auto itKey = config.find(key); + if (config.end() != itKey) { + supportedConfig[key] = itKey->second; + } + } + return supportedConfig; +} + +DeviceInformation AutoBatchInferencePlugin::ParseMetaDevice(const std::string& devicesBatchCfg, + const std::map & config) const { + DeviceInformation metaDevice; + auto getDeviceConfig = [&] (const DeviceName & deviceWithID) { + DeviceIDParser deviceParser(deviceWithID); + std::string deviceName = deviceParser.getDeviceName(); + std::map tconfig = mergeConfigs(_config, config); + + // set device ID if any + std::string deviceIDLocal = deviceParser.getDeviceID(); + if (!deviceIDLocal.empty()) { + tconfig[PluginConfigParams::KEY_DEVICE_ID] = deviceIDLocal; + } + + return GetSupportedConfig(tconfig, deviceName); + }; + + auto && d = devicesBatchCfg; + { + auto openingBracket = d.find_first_of('('); + auto closingBracket = d.find_first_of(')', openingBracket); + auto deviceName = d.substr(0, openingBracket); + + int batch = -1; + if (closingBracket != std::string::npos && openingBracket < closingBracket) { + batch = std::stol(d.substr(openingBracket + 1, closingBracket - 1)); + + if (batch <= 0) { + THROW_IE_EXCEPTION << "Batch value for '" << deviceName << "' must be > 0, while " << batch + << "is passed"; + } + } + + // create meta device + auto cfg = getDeviceConfig(deviceName); + std::vector supportedConfigKeys = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS)); + if (std::find(std::begin(supportedConfigKeys), std::end(supportedConfigKeys), CONFIG_KEY_INTERNAL(AGGREGATED_PLUGIN)) + != std::end(supportedConfigKeys)) { + cfg.emplace(CONFIG_KEY_INTERNAL(AGGREGATED_PLUGIN), ""); + } + metaDevice = { deviceName, cfg, batch }; + } + + return metaDevice; +} + +Parameter AutoBatchInferencePlugin::GetConfig(const std::string& name, + const std::map & options) const { + if (name == CONFIG_KEY(AUTO_BATCH)) { + auto it = _config.find(CONFIG_KEY(AUTO_BATCH)); + if (it == _config.end()) { + THROW_IE_EXCEPTION << "Value for KEY_AUTO_BATCH is not set"; + } else { + return { it->second }; + } + } else { + THROW_IE_EXCEPTION << "Unsupported config key: " << name; + } +} + +void AutoBatchInferencePlugin::SetConfig(const std::map & config) { + for (auto && kvp : config) { + _config[kvp.first] = kvp.second; + } +} + +static const Version version = {{2, 1}, CI_BUILD_NUMBER, "AutoBatchPlugin"}; +IE_DEFINE_PLUGIN_CREATE_FUNCTION(AutoBatchInferencePlugin, version) + +AutoBatchInferencePlugin::AutoBatchInferencePlugin() { + _pluginName = "BATCH"; +} + +InferenceEngine::Parameter AutoBatchInferencePlugin::GetMetric(const std::string& name, + const std::map & options) const { + if (name == METRIC_KEY(SUPPORTED_METRICS)) { + std::vector metrics; + metrics.push_back(METRIC_KEY(SUPPORTED_METRICS)); + metrics.push_back(METRIC_KEY(FULL_DEVICE_NAME)); + metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); + IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics); + } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) { + std::string name = { "BATCH" }; + IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, name); + } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) { + std::vector configKeys = { + CONFIG_KEY_INTERNAL(AGGREGATED_PLUGIN)}; + IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys); + } else { + THROW_IE_EXCEPTION << "Unsupported metric key " << name; + } +} + +ExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork&network, + const std::map& config) { + if (GetCore() == nullptr) { + THROW_IE_EXCEPTION << "Please, work with MULTI device via InferencEngine::Core object"; + } + + auto fullConfig = mergeConfigs(_config, config); + auto device_batch = fullConfig.find(CONFIG_KEY(AUTO_BATCH)); + if (device_batch == fullConfig.end()) { + THROW_IE_EXCEPTION << "KEY_AUTO_BATCH key is not set for BATCH device"; + } + + auto metaDevice = ParseMetaDevice(device_batch->second, fullConfig); + + // collect the settings that are applicable to the devices we are loading the network to + std::unordered_map networkConfig; + networkConfig.insert(*device_batch); + + ExecutableNetwork executableNetworkForDevice; + auto & deviceName = metaDevice.deviceName; + auto & deviceConfig = metaDevice.config; + // network.serialize("out_orig.xml", "out_orig.bin"); + + CNNNetwork clonedNetwork(InferenceEngine::cloneNetwork(network)); + const InputsDataMap inputInfo = clonedNetwork.getInputsInfo(); + ICNNNetwork::InputShapes shapes = clonedNetwork.getInputShapes(); + + for (const InputsDataMap::value_type &item : inputInfo) { + auto layout = item.second->getTensorDesc().getLayout(); + if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW + || layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC + || layout == InferenceEngine::Layout::NDHWC) { + shapes[item.first][0] = metaDevice.batchForDevice; + std::cout << " reshaping the input " << item.first << " (layout " << layout << ")" << " by the batch" << std::endl; + } + } + std::cout << "Reshaped network by batch to " << metaDevice.batchForDevice << std::endl; + clonedNetwork.reshape(shapes); + // clonedNetwork.serialize("out_batch4.xml", "out_batch4.bin"); + + std::map deviceConfig0 = deviceConfig; + // deviceConfig0["DO_NOT_AUTO_BATCH"] = "TRUE"; + executableNetworkForDevice = GetCore()->LoadNetwork(CNNNetwork{clonedNetwork}, deviceName, deviceConfig0); + networkConfig.insert(deviceConfig.begin(), deviceConfig.end()); + if ((std::shared_ptr)executableNetworkForDevice == nullptr) + THROW_IE_EXCEPTION << NOT_FOUND_str << "Failed to load Executable network the device " + << "that the BATCH device is initialized to work with"; + + auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT); + bool enablePerfCounters = (fullConfig.end() != perfConfig) && (perfConfig->second == PluginConfigParams::YES); + + return std::make_shared(executableNetworkForDevice, + metaDevice, + networkConfig, + enablePerfCounters); +} + +InferenceEngine::QueryNetworkResult AutoBatchInferencePlugin::QueryNetwork(const InferenceEngine::CNNNetwork& network, + const std::map& config) const { +// THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str; + const std::map cfg; + return GetCore()->QueryNetwork(network, "CPU", cfg); +} +} // namespace AutoBatchPlugin diff --git a/inference-engine/src/auto_batch/auto_batch.hpp b/inference-engine/src/auto_batch/auto_batch.hpp new file mode 100644 index 00000000000..a09d370b57a --- /dev/null +++ b/inference-engine/src/auto_batch/auto_batch.hpp @@ -0,0 +1,176 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "ie_iinfer_request.hpp" +#include "details/ie_exception_conversion.hpp" +#include + +#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) +# include +#endif + +namespace AutoBatchPlugin { + +using DeviceName = std::string; + +struct DeviceInformation { + DeviceName deviceName; + std::map config; + int batchForDevice; +}; + +#if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO)) +template +using ThreadSafeQueue = tbb::concurrent_queue; +#else +template +class ThreadSafeQueue { +public: + void push(T value) { + std::lock_guard lock(_mutex); + _queue.push(std::move(value)); + } + + bool try_pop(T& value) { + std::lock_guard lock(_mutex); + if (!_queue.empty()) { + value = std::move(_queue.front()); + _queue.pop(); + return true; + } else { + return false; + } + } + + bool empty() { + std::lock_guard lock(_mutex); + return _queue.empty(); + } + +protected: + std::queue _queue; + std::mutex _mutex; +}; +#endif + +class AutoBatchAsyncInferRequest; +class AutoBatchExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault { +public: + using Ptr = std::shared_ptr; + struct WorkerInferRequest { + using Ptr = std::shared_ptr; + InferenceEngine::InferRequest _inferRequest; + InferenceEngine::StatusCode _status = InferenceEngine::StatusCode::OK; + int _batchSize; + std::promise _cond; + std::shared_future _event; + std::atomic_int _numRequestsReady = {0}; + void ReportArrival() { + _numRequestsReady++; + if (_numRequestsReady == _batchSize) { + _numRequestsReady = 0; + _inferRequest.StartAsync(); + } + // workerRequestPtr->_cond. + } + }; + using NotBusyWorkerRequests = ThreadSafeQueue; + + explicit AutoBatchExecutableNetwork(const InferenceEngine::ExecutableNetwork& networkForDevice, + const DeviceInformation& networkDevices, + const std::unordered_map& config, + const bool needPerfCounters = false); + + void SetConfig(const std::map &config) override; + InferenceEngine::Parameter GetConfig(const std::string &name) const override; + InferenceEngine::Parameter GetMetric(const std::string &name) const override; + InferenceEngine::IInferRequest::Ptr CreateInferRequest() override; + InferenceEngine::InferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, + InferenceEngine::OutputsDataMap networkOutputs) override; + ~AutoBatchExecutableNetwork() override; + + std::atomic_bool _terminate = {false}; + DeviceInformation _device; + InferenceEngine::ExecutableNetwork _network; + std::vector _workerRequests; + std::unordered_map _config; + bool _needPerfCounters = false; + std::atomic_size_t _numRequestsCreated = {0}; +}; + +class AutoBatchInferRequest : public InferenceEngine::InferRequestInternal { +public: + using Ptr = std::shared_ptr; + explicit AutoBatchInferRequest(const InferenceEngine::InputsDataMap& networkInputs, + const InferenceEngine::OutputsDataMap& networkOutputs, + AutoBatchExecutableNetwork::WorkerInferRequest* workerRequestPtr, + int batch_id, int num_batch, bool _needPerfCounters = false); + std::map GetPerformanceCounts() const override; + void InferImpl() override; + + // Batch-Device impl specific: sets the data (blobs from the device request to the batched device request) + void SetBlobsToAnotherRequest(InferenceEngine::InferRequest& req); + AutoBatchExecutableNetwork::WorkerInferRequest* _workerInferRequest; +protected: + std::map _perfMap; + bool _needPerfCounters = false; +}; + +class AutoBatchAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault { +public: + using Ptr = std::shared_ptr; + + explicit AutoBatchAsyncInferRequest(const AutoBatchInferRequest::Ptr& inferRequest, + const bool needPerfCounters, + const AutoBatchExecutableNetwork::Ptr& AutoBatchExecutableNetwork, + const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor); + void Infer_ThreadUnsafe() override; + ~AutoBatchAsyncInferRequest() override; + +protected: + AutoBatchExecutableNetwork::Ptr _AutoBatchExecutableNetwork; + AutoBatchInferRequest::Ptr _inferRequest; +}; + +class AutoBatchInferencePlugin : public InferenceEngine::InferencePluginInternal { +public: + AutoBatchInferencePlugin(); + ~AutoBatchInferencePlugin() override = default; + + InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::CNNNetwork& network, + const std::map& config) override; + + void SetConfig(const std::map& config) override; + InferenceEngine::Parameter GetConfig(const std::string& name, + const std::map & options) const override; + InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network, + const std::map& config) const override; + InferenceEngine::Parameter GetMetric(const std::string& name, + const std::map& options) const override; + + DeviceInformation ParseMetaDevice(const std::string & devicesBatchCfg, + const std::map & config) const; + +protected: + std::map GetSupportedConfig(const std::map& config, + const DeviceName & deviceName) const; +}; + +} // namespace AutoBatchPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp index 863c8079b6b..93fb9e59f7b 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp @@ -677,6 +677,12 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map(device_info.gfx_ver.revision); } IE_SET_METRIC_RETURN(GPU_UARCH_VERSION, s.str()); + } else if (name == METRIC_KEY(OPTIMAL_BATCH)) { + auto network = options.find("MODEL_ADDRESS")->second.as(); + // auto transformedNetwork = CloneAndTransformNetwork(*network, _impl->m_config); + unsigned int batch = 8; + std::cout << "SELECTED BATCH: " << batch << std::endl; + IE_SET_METRIC_RETURN(OPTIMAL_BATCH, batch); } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) { auto deviceName = StringRightTrim(device_info.dev_name, "NEO", false); deviceName += std::string(" (") + (device_info.dev_type == cldnn::device_type::discrete_gpu ? "dGPU" : "iGPU") + ")"; diff --git a/inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp index 09f62301f7e..be65df32a28 100644 --- a/inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp @@ -118,6 +118,7 @@ DECLARE_METRIC_VALUE(BATCHED_BLOB); * String value for metric name is "RANGE_FOR_STREAMS". */ DECLARE_METRIC_KEY(RANGE_FOR_STREAMS, std::tuple); +DECLARE_METRIC_KEY(OPTIMAL_BATCH, unsigned int); /** * @brief Metric to provide a hint for a range for number of async infer requests. If device supports streams, @@ -250,6 +251,11 @@ DECLARE_CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS); DECLARE_CONFIG_VALUE(YES); DECLARE_CONFIG_VALUE(NO); +/** + * @brief Auto-batching to the `#batch`. + */ +DECLARE_CONFIG_KEY(AUTO_BATCH); + /** * @brief Limit `#threads` that are used by Inference Engine for inference on the CPU. */ @@ -312,7 +318,6 @@ DECLARE_CONFIG_KEY(PERF_COUNT); * >0 - Direct value of limit. Batch size to process is min(new batch_limit, original_batch) */ DECLARE_CONFIG_KEY(DYN_BATCH_LIMIT); - /** * @brief The key checks whether dynamic batch is enabled. */ diff --git a/inference-engine/src/plugin_api/ie_icore.hpp b/inference-engine/src/plugin_api/ie_icore.hpp index d863fbded9d..155577a5d2a 100644 --- a/inference-engine/src/plugin_api/ie_icore.hpp +++ b/inference-engine/src/plugin_api/ie_icore.hpp @@ -169,6 +169,7 @@ public: static std::vector getHeteroDevices(std::string fallbackDevice); static std::vector getMultiDevices(std::string devicesList); + static std::string getBatchDevice(std::string devicesList); }; } // namespace InferenceEngine