202 lines
10 KiB
C++
202 lines
10 KiB
C++
// Copyright (C) 2018-2021 Intel Corporation
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
|
|
#include "template_executable_network.hpp"
|
|
|
|
#include <ie_metric_helpers.hpp>
|
|
#include <ie_plugin_config.hpp>
|
|
#include <threading/ie_executor_manager.hpp>
|
|
|
|
#include "ie_icore.hpp"
|
|
#include "template/template_config.hpp"
|
|
#include "template_itt.hpp"
|
|
#include "template_plugin.hpp"
|
|
#include "transformations/serialize.hpp"
|
|
|
|
using namespace TemplatePlugin;
|
|
|
|
// ! [executable_network:ctor_cnnnetwork]
|
|
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(const std::shared_ptr<const ngraph::Function>& function,
|
|
const InferenceEngine::InputsDataMap& inputInfoMap, const InferenceEngine::OutputsDataMap& outputsInfoMap,
|
|
const Configuration& cfg, const Plugin::Ptr& plugin)
|
|
: InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, nullptr), // Disable default threads creation
|
|
_cfg(cfg),
|
|
_plugin(plugin) {
|
|
// TODO: if your plugin supports device ID (more that single instance of device can be on host machine)
|
|
// you should select proper device based on KEY_DEVICE_ID or automatic behavior
|
|
// In this case, _waitExecutor should also be created per device.
|
|
try {
|
|
CompileNetwork(function, inputInfoMap, outputsInfoMap);
|
|
InitExecutor(); // creates thread-based executor using for async requests
|
|
} catch (const InferenceEngine::Exception&) {
|
|
throw;
|
|
} catch (const std::exception& e) {
|
|
IE_THROW(Unexpected) << "Standard exception from compilation library: " << e.what();
|
|
} catch (...) {
|
|
IE_THROW(Unexpected) << "Generic exception is thrown";
|
|
}
|
|
}
|
|
// ! [executable_network:ctor_cnnnetwork]
|
|
|
|
// ! [executable_network:ctor_import_stream]
|
|
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream& model, const Configuration& cfg, const Plugin::Ptr& plugin): _cfg(cfg), _plugin(plugin) {
|
|
// read XML content
|
|
std::string xmlString;
|
|
std::uint64_t dataSize = 0;
|
|
model.read(reinterpret_cast<char*>(&dataSize), sizeof(dataSize));
|
|
xmlString.resize(dataSize);
|
|
model.read(const_cast<char*>(xmlString.c_str()), dataSize);
|
|
|
|
// read blob content
|
|
InferenceEngine::Blob::Ptr dataBlob;
|
|
model.read(reinterpret_cast<char*>(&dataSize), sizeof(dataSize));
|
|
if (0 != dataSize) {
|
|
dataBlob = InferenceEngine::make_shared_blob<std::uint8_t>(
|
|
InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, {static_cast<std::size_t>(dataSize)}, InferenceEngine::Layout::C));
|
|
dataBlob->allocate();
|
|
model.read(dataBlob->buffer(), dataSize);
|
|
}
|
|
|
|
// TODO: implement Import / Export of configuration options and merge with `cfg`
|
|
// TODO: implement Import / Export of network precisions, layouts, preprocessing info
|
|
InferenceEngine::InputsDataMap inputInfoMap;
|
|
InferenceEngine::OutputsDataMap outputInfoMap;
|
|
|
|
auto cnnnetwork = _plugin->GetCore()->ReadNetwork(xmlString, std::move(dataBlob));
|
|
|
|
setNetworkInputs(cnnnetwork.getInputsInfo());
|
|
setNetworkOutputs(cnnnetwork.getOutputsInfo());
|
|
SetPointerToPlugin(_plugin->shared_from_this());
|
|
|
|
try {
|
|
CompileNetwork(cnnnetwork.getFunction(), inputInfoMap, outputInfoMap);
|
|
InitExecutor(); // creates thread-based executor using for async requests
|
|
} catch (const InferenceEngine::Exception&) {
|
|
throw;
|
|
} catch (const std::exception& e) {
|
|
IE_THROW(Unexpected) << "Standard exception from compilation library: " << e.what();
|
|
} catch (...) {
|
|
IE_THROW(Unexpected) << "Generic exception is thrown";
|
|
}
|
|
}
|
|
// ! [executable_network:ctor_import_stream]
|
|
|
|
// ! [executable_network:map_graph]
|
|
// forward declaration
|
|
std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function, const InferenceEngine::InputsDataMap& inputInfoMap,
|
|
const InferenceEngine::OutputsDataMap& outputsInfoMap);
|
|
|
|
void TemplatePlugin::ExecutableNetwork::CompileNetwork(const std::shared_ptr<const ngraph::Function>& function,
|
|
const InferenceEngine::InputsDataMap& inputInfoMap,
|
|
const InferenceEngine::OutputsDataMap& outputsInfoMap) {
|
|
// TODO: perform actual graph compilation / mapping to backend graph representation / kernels
|
|
|
|
// apply plugins transformations
|
|
_function = TransformNetwork(function, inputInfoMap, outputsInfoMap);
|
|
|
|
// Generate backend specific blob mappings. For example Inference Engine uses not ngraph::Result nodes friendly name
|
|
// as inference request output names but the name of the layer before.
|
|
for (auto&& result : _function->get_results()) {
|
|
auto previousOutput = result->get_input_source_output(0);
|
|
auto outputName = previousOutput.get_node()->get_friendly_name();
|
|
if (previousOutput.get_node()->get_output_size() > 1) {
|
|
outputName += '.' + std::to_string(previousOutput.get_index());
|
|
}
|
|
_outputIndex.emplace(outputName, _function->get_result_index(result));
|
|
}
|
|
for (auto&& parameter : _function->get_parameters()) {
|
|
_inputIndex.emplace(parameter->get_friendly_name(), _function->get_parameter_index(parameter));
|
|
}
|
|
|
|
// Perform any other steps like allocation and filling backend specific memory handles and so on
|
|
}
|
|
// ! [executable_network:map_graph]
|
|
|
|
// ! [executable_network:init_executor]
|
|
void TemplatePlugin::ExecutableNetwork::InitExecutor() {
|
|
// Default multi-threaded configuration is balanced for throughtput and latency cases and takes into account
|
|
// real hardware cores and NUMA nodes.
|
|
auto streamsExecutorConfig = InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg._streamsExecutorConfig);
|
|
streamsExecutorConfig._name = "TemplateStreamsExecutor";
|
|
// As Inference Engine CPU Streams Executor creates some additional therads
|
|
// it is better to avoid threads recreateion as some OSs memory allocator can not manage such usage cases
|
|
// and memory consumption can be larger than it is expected.
|
|
// So Inference Engone provides executors cache.
|
|
_taskExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig);
|
|
// NOTE: callback Executor is not configured. So callback will be called in the thread of the last stage of inference request pipeline
|
|
// _callbackExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateCallbackExecutor"});
|
|
}
|
|
// ! [executable_network:init_executor]
|
|
|
|
// ! [executable_network:create_infer_request_impl]
|
|
InferenceEngine::IInferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
|
|
InferenceEngine::OutputsDataMap networkOutputs) {
|
|
return std::make_shared<TemplateInferRequest>(networkInputs, networkOutputs, std::static_pointer_cast<ExecutableNetwork>(shared_from_this()));
|
|
}
|
|
// ! [executable_network:create_infer_request_impl]
|
|
|
|
// ! [executable_network:create_infer_request]
|
|
InferenceEngine::IInferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequest() {
|
|
auto internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs);
|
|
return std::make_shared<TemplateAsyncInferRequest>(std::static_pointer_cast<TemplateInferRequest>(internalRequest), _taskExecutor, _plugin->_waitExecutor,
|
|
_callbackExecutor);
|
|
}
|
|
// ! [executable_network:create_infer_request]
|
|
|
|
// ! [executable_network:get_config]
|
|
InferenceEngine::Parameter TemplatePlugin::ExecutableNetwork::GetConfig(const std::string& name) const {
|
|
return _cfg.Get(name);
|
|
}
|
|
// ! [executable_network:get_config]
|
|
|
|
// ! [executable_network:get_metric]
|
|
InferenceEngine::Parameter TemplatePlugin::ExecutableNetwork::GetMetric(const std::string& name) const {
|
|
// TODO: return more supported values for metrics
|
|
if (EXEC_NETWORK_METRIC_KEY(SUPPORTED_METRICS) == name) {
|
|
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, std::vector<std::string> {METRIC_KEY(NETWORK_NAME), METRIC_KEY(SUPPORTED_METRICS),
|
|
METRIC_KEY(SUPPORTED_CONFIG_KEYS), METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)});
|
|
} else if (EXEC_NETWORK_METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
|
|
std::vector<std::string> configKeys = {CONFIG_KEY(DEVICE_ID), CONFIG_KEY(PERF_COUNT), TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)};
|
|
auto streamExecutorConfigKeys = InferenceEngine::IStreamsExecutor::Config {}.SupportedKeys();
|
|
for (auto&& configKey : streamExecutorConfigKeys) {
|
|
configKeys.emplace_back(configKey);
|
|
}
|
|
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
|
|
} else if (EXEC_NETWORK_METRIC_KEY(NETWORK_NAME) == name) {
|
|
auto networkName = _function->get_friendly_name();
|
|
IE_SET_METRIC_RETURN(NETWORK_NAME, networkName);
|
|
} else if (EXEC_NETWORK_METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS) == name) {
|
|
unsigned int value = _cfg._streamsExecutorConfig._streams;
|
|
IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, value);
|
|
} else {
|
|
IE_THROW() << "Unsupported ExecutableNetwork metric: " << name;
|
|
}
|
|
}
|
|
// ! [executable_network:get_metric]
|
|
|
|
// ! [executable_network:export_impl]
|
|
void TemplatePlugin::ExecutableNetwork::ExportImpl(std::ostream& modelStream) {
|
|
OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "ExecutableNetwork::ExportImpl");
|
|
|
|
// Note: custom ngraph extensions are not supported
|
|
std::map<std::string, ngraph::OpSet> custom_opsets;
|
|
std::stringstream xmlFile, binFile;
|
|
ngraph::pass::Serialize serializer(xmlFile, binFile, ngraph::pass::Serialize::Version::IR_V10, custom_opsets);
|
|
serializer.run_on_function(_function);
|
|
|
|
auto m_constants = binFile.str();
|
|
auto m_model = xmlFile.str();
|
|
|
|
auto dataSize = static_cast<std::uint64_t>(m_model.size());
|
|
modelStream.write(reinterpret_cast<char*>(&dataSize), sizeof(dataSize));
|
|
modelStream.write(m_model.c_str(), dataSize);
|
|
|
|
dataSize = static_cast<std::uint64_t>(m_constants.size());
|
|
modelStream.write(reinterpret_cast<char*>(&dataSize), sizeof(dataSize));
|
|
modelStream.write(reinterpret_cast<char*>(&m_constants[0]), dataSize);
|
|
|
|
// TODO: implement network precision, layout, preprocessing info serialization
|
|
}
|
|
// ! [executable_network:export_impl]
|