Files
openvino/docs/template_plugin/src/template_executable_network.cpp
Anton Pankratv 18836f53cd Implemented inference in template plugin (#1308)
* Implemented inference in template plugin

* Fixed tests

* Removed thirdparty dependency

* Simplified executor configuration

* removed half

* Fixed cmake

* Fixed ngraph node check

* device blob allocation

* Fixed enum error
2020-07-28 17:25:31 +03:00

158 lines
7.7 KiB
C++

// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <atomic>
#include <set>
#include <utility>
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include <ie_metric_helpers.hpp>
#include <ie_util_internal.hpp>
#include <ie_plugin_config.hpp>
#include <threading/ie_executor_manager.hpp>
#include <details/ie_cnn_network_tools.h>
#include "template/template_config.hpp"
#include "template_plugin.hpp"
#include "template_executable_network.hpp"
using namespace TemplatePlugin;
// ! [executable_network:ctor_cnnnetwork]
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(const std::shared_ptr<ngraph::Function>& function,
const Configuration& cfg,
const Plugin::Ptr& plugin) :
InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, nullptr), // Disable default threads creation
_cfg(cfg),
_plugin(plugin),
_function(function) {
// TODO: if your plugin supports device ID (more that single instance of device can be on host machine)
// you should select proper device based on KEY_DEVICE_ID or automatic behavior
// In this case, _waitExecutor should also be created per device.
try {
CompileGraph();
InitExecutor();
} catch (const InferenceEngineException&) {
throw;
} catch (const std::exception & e) {
THROW_IE_EXCEPTION << "Standard exception from compilation library: " << e.what();
} catch (...) {
THROW_IE_EXCEPTION << "Generic exception is thrown";
}
}
// ! [executable_network:ctor_cnnnetwork]
// ! [executable_network:ctor_import_stream]
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream & model,
const Configuration& cfg,
const Plugin::Ptr& plugin) :
_cfg(cfg),
_plugin(plugin) {
// TODO: since Import network is not a mandatory functionality, this ctor can just be removed
}
// ! [executable_network:ctor_import_stream]
// ! [executable_network:compile_graph]
void TemplatePlugin::ExecutableNetwork::CompileGraph() {
// TODO: perform actual graph compilation taking `_cfg` into account
// Generate backend specific blob mappings. For example Inference Engine uses not ngraph::Result nodes friendly name
// as inference request output names but the name of the layer before.
for (auto&& result : _function->get_results()) {
auto previousOutput = result->get_input_source_output(0);
auto outputName = previousOutput.get_node()->get_friendly_name();
if (previousOutput.get_node()->get_output_size() > 1) {
outputName += '.' + std::to_string(previousOutput.get_index());
}
_outputIndex.emplace(outputName, _function->get_result_index(result));
}
for (auto&& parameter : _function->get_parameters()) {
_inputIndex.emplace(parameter->get_friendly_name(), _function->get_parameter_index(parameter));
}
// Perform any other steps like allocation and filling device buffers, and so on
}
// ! [executable_network:compile_graph]
// ! [executable_network:init_executor]
void TemplatePlugin::ExecutableNetwork::InitExecutor() {
// Default mutlitthreaded configuration is balanced for throughtput and latency cases and takes into account
// real hardware cores and NUMA nodes.
auto streamsExecutorConfig = InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg._streamsExecutorConfig);
streamsExecutorConfig._name = "TemplateStreamsExecutor";
// As Inference Engine CPU Streams Executor creates some additional therads
// it is better to avoid threads recreateion as some OSs memory allocator can not manage such usage cases
// and memory consumption can be larger than it is expected.
// So Inference Engone provides executors cache.
_taskExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig);
// NOTE: callback Executor is not configured. So callback will be called in the thread of tha last stage of inference request pipeline
// _callbackExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateCallbackExecutor"});
}
// ! [executable_network:init_executor]
// ! [executable_network:create_infer_request_impl]
InferenceEngine::InferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs) {
return std::make_shared<TemplateInferRequest>(networkInputs, networkOutputs, std::static_pointer_cast<ExecutableNetwork>(shared_from_this()));
}
// ! [executable_network:create_infer_request_impl]
// ! [executable_network:create_infer_request]
void TemplatePlugin::ExecutableNetwork::CreateInferRequest(IInferRequest::Ptr& asyncRequest) {
auto internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs);
auto asyncThreadSafeImpl = std::make_shared<TemplateAsyncInferRequest>(std::static_pointer_cast<TemplateInferRequest>(internalRequest),
_taskExecutor, _plugin->_waitExecutor, _callbackExecutor);
asyncRequest.reset(new InferenceEngine::InferRequestBase<TemplateAsyncInferRequest>(asyncThreadSafeImpl),
[](InferenceEngine::IInferRequest *p) { p->Release(); });
asyncThreadSafeImpl->SetPointerToPublicInterface(asyncRequest);
}
// ! [executable_network:create_infer_request]
// ! [executable_network:get_config]
void TemplatePlugin::ExecutableNetwork::GetConfig(const std::string &name, Parameter &result, ResponseDesc *resp) const {
result = _cfg.Get(name);
}
// ! [executable_network:get_config]
// ! [executable_network:get_metric]
void TemplatePlugin::ExecutableNetwork::GetMetric(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *) const {
// TODO: return more supported values for metrics
if (METRIC_KEY(SUPPORTED_METRICS) == name) {
result = IE_SET_METRIC(SUPPORTED_METRICS, std::vector<std::string>{
METRIC_KEY(NETWORK_NAME),
METRIC_KEY(SUPPORTED_METRICS),
METRIC_KEY(SUPPORTED_CONFIG_KEYS),
METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)});
} else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
std::vector<std::string> configKeys = {
CONFIG_KEY(DEVICE_ID),
CONFIG_KEY(PERF_COUNT),
TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS) };
auto streamExecutorConfigKeys = IStreamsExecutor::Config{}.SupportedKeys();
for (auto&& configKey : streamExecutorConfigKeys) {
configKeys.emplace_back(configKey);
}
result = IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, configKeys);
} else if (METRIC_KEY(NETWORK_NAME) == name) {
auto networkName = _function->get_friendly_name();
result = IE_SET_METRIC(NETWORK_NAME, networkName);
} else if (METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS) == name) {
unsigned int value = _cfg._streamsExecutorConfig._streams;
result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, value);
} else {
THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork metric: " << name;
}
}
// ! [executable_network:get_metric]
// ! [executable_network:export_impl]
void TemplatePlugin::ExecutableNetwork::ExportImpl(std::ostream& dlaModel) {
// TODO: Code which exports graph from std::ostream
}
// ! [executable_network:export_impl]