Implemented inference in template plugin (#1308)
* Implemented inference in template plugin * Fixed tests * Removed thirdparty dependency * Simplified executor configuration * removed half * Fixed cmake * Fixed ngraph node check * device blob allocation * Fixed enum error
This commit is contained in:
@@ -45,14 +45,11 @@ namespace TemplateConfigParams {
|
||||
#define DECLARE_TEMPLATE_CONFIG_KEY(name) DECLARE_CONFIG_KEY(TEMPLATE_##name)
|
||||
#define DECLARE_TEMPLATE_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(TEMPLATE_##name)
|
||||
|
||||
|
||||
/**
|
||||
* @brief The key to define the type of transformations for TEMPLATE inputs and outputs.
|
||||
* TEMPLATE use custom data layout for input and output blobs. IE TEMPLATE Plugin provides custom
|
||||
* optimized version of transformation functions that do not use OpenMP and much more faster
|
||||
* than native TEMPLATE functions. Values: "NO" - optimized plugin transformations
|
||||
* are used, "YES" - native TEMPLATE transformations are used.
|
||||
* @brief Defines the number of throutput streams used by TEMPLATE plugin.
|
||||
*/
|
||||
DECLARE_TEMPLATE_CONFIG_KEY(ANY_CONFIG_KEY);
|
||||
DECLARE_TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS);
|
||||
|
||||
|
||||
} // namespace TemplateConfigParams
|
||||
|
||||
@@ -20,14 +20,25 @@ ie_add_plugin(NAME ${TARGET_NAME}
|
||||
VERSION_DEFINES_FOR template_plugin.cpp)
|
||||
|
||||
target_include_directories(${TARGET_NAME} PRIVATE
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
|
||||
target_include_directories(${TARGET_NAME} PRIVATE
|
||||
"${IE_MAIN_TEMPLATE_PLUGIN_SOURCE_DIR}/include")
|
||||
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE IE::inference_engine IE::inference_engine_transformations ${NGRAPH_LIBRARIES} ${INTEL_ITT_LIBS})
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE
|
||||
IE::inference_engine
|
||||
IE::inference_engine_transformations
|
||||
${INTEL_ITT_LIBS}
|
||||
${NGRAPH_LIBRARIES})
|
||||
|
||||
# Link inference backend library to plugin. Here we use ngraph interpreter_backend as example
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE
|
||||
ngraph_backend
|
||||
interpreter_backend)
|
||||
|
||||
# ATTENTION: uncomment to register a plugin in the plugins.xml file
|
||||
# ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
|
||||
# POSSIBLE_PLUGINS ${TARGET_NAME})
|
||||
# POSSIBLE_PLUGINS ${TARGET_NAME})
|
||||
# [cmake:plugin]
|
||||
|
||||
# ATTENTION: uncomment to install component
|
||||
|
||||
@@ -19,21 +19,28 @@ TemplateAsyncInferRequest::TemplateAsyncInferRequest(
|
||||
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) :
|
||||
AsyncInferRequestThreadSafeDefault(inferRequest, cpuTaskExecutor, callbackExecutor),
|
||||
_inferRequest(inferRequest), _waitExecutor(waitExecutor) {
|
||||
_pipeline = {
|
||||
{cpuTaskExecutor, [this] {
|
||||
IE_PROFILING_AUTO_SCOPE(PreprocessingAndStartPipeline)
|
||||
_inferRequest->inferPreprocess();
|
||||
_inferRequest->startPipeline();
|
||||
}},
|
||||
{_waitExecutor, [this] {
|
||||
IE_PROFILING_AUTO_SCOPE(WaitPipeline)
|
||||
_inferRequest->waitPipeline();
|
||||
}},
|
||||
{cpuTaskExecutor, [this] {
|
||||
IE_PROFILING_AUTO_SCOPE(Postprocessing)
|
||||
_inferRequest->inferPostprocess();
|
||||
}}
|
||||
};
|
||||
constexpr const auto remoteDevice = false;
|
||||
// By default single stage pipeline is created.
|
||||
// This stage executes InferRequest::Infer() using cpuTaskExecutor.
|
||||
// But if remote asynchronous device is used the pipeline can by splitted tasks that are executed by cpuTaskExecutor
|
||||
// and waiting tasks. Waiting tasks can lock execution thread so they use separate threads from other executor.
|
||||
if (remoteDevice) {
|
||||
_pipeline = {
|
||||
{cpuTaskExecutor, [this] {
|
||||
IE_PROFILING_AUTO_SCOPE(PreprocessingAndStartPipeline)
|
||||
_inferRequest->inferPreprocess();
|
||||
_inferRequest->startPipeline();
|
||||
}},
|
||||
{_waitExecutor, [this] {
|
||||
IE_PROFILING_AUTO_SCOPE(WaitPipeline)
|
||||
_inferRequest->waitPipeline();
|
||||
}},
|
||||
{cpuTaskExecutor, [this] {
|
||||
IE_PROFILING_AUTO_SCOPE(Postprocessing)
|
||||
_inferRequest->inferPostprocess();
|
||||
}}
|
||||
};
|
||||
}
|
||||
}
|
||||
// ! [async_infer_request:ctor]
|
||||
|
||||
|
||||
@@ -9,10 +9,12 @@
|
||||
|
||||
#include <ie_util_internal.hpp>
|
||||
#include <ie_plugin_config.hpp>
|
||||
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
|
||||
#include <file_utils.h>
|
||||
#include <cpp_interfaces/exception2status.hpp>
|
||||
|
||||
#include "template_config.hpp"
|
||||
#include "template/template_config.hpp"
|
||||
|
||||
using namespace TemplatePlugin;
|
||||
|
||||
@@ -20,12 +22,22 @@ Configuration::Configuration() { }
|
||||
|
||||
Configuration::Configuration(const ConfigMap& config, const Configuration & defaultCfg, bool throwOnUnsupported) {
|
||||
*this = defaultCfg;
|
||||
// If plugin needs to use InferenceEngine::StreamsExecutor it should be able to process its configuration
|
||||
auto streamExecutorConfigKeys = _streamsExecutorConfig.SupportedKeys();
|
||||
for (auto&& c : config) {
|
||||
const auto& key = c.first;
|
||||
const auto& value = c.second;
|
||||
|
||||
if (CONFIG_KEY(DEVICE_ID) == key) {
|
||||
if (TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS) == key) {
|
||||
_streamsExecutorConfig.SetConfig(CONFIG_KEY(CPU_THROUGHPUT_STREAMS), value);
|
||||
} else if (streamExecutorConfigKeys.end() !=
|
||||
std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) {
|
||||
_streamsExecutorConfig.SetConfig(key, value);
|
||||
} else if (CONFIG_KEY(DEVICE_ID) == key) {
|
||||
deviceId = std::stoi(value);
|
||||
if (deviceId > 0) {
|
||||
THROW_IE_EXCEPTION << "Device ID " << deviceId << " is not supported";
|
||||
}
|
||||
} else if (CONFIG_KEY(PERF_COUNT) == key) {
|
||||
perfCount = (CONFIG_VALUE(YES) == value);
|
||||
} else if (throwOnUnsupported) {
|
||||
@@ -39,6 +51,14 @@ InferenceEngine::Parameter Configuration::Get(const std::string& name) const {
|
||||
return {std::to_string(deviceId)};
|
||||
} else if (name == CONFIG_KEY(PERF_COUNT)) {
|
||||
return {perfCount};
|
||||
} else if (name == TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS) || name == CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) {
|
||||
return {std::to_string(_streamsExecutorConfig._streams)};
|
||||
} else if (name == CONFIG_KEY(CPU_BIND_THREAD)) {
|
||||
return const_cast<InferenceEngine::IStreamsExecutor::Config&>(_streamsExecutorConfig).GetConfig(name);
|
||||
} else if (name == CONFIG_KEY(CPU_THREADS_NUM)) {
|
||||
return {std::to_string(_streamsExecutorConfig._threads)};
|
||||
} else if (name == CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)) {
|
||||
return {std::to_string(_streamsExecutorConfig._threadsPerStream)};
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << NOT_FOUND_str << ": " << name;
|
||||
}
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
|
||||
#include <ie_parameter.hpp>
|
||||
|
||||
#include <threading/ie_istreams_executor.hpp>
|
||||
|
||||
namespace TemplatePlugin {
|
||||
|
||||
template<typename T>
|
||||
@@ -34,6 +36,7 @@ struct Configuration {
|
||||
|
||||
int deviceId = 0;
|
||||
bool perfCount = true;
|
||||
InferenceEngine::IStreamsExecutor::Config _streamsExecutorConfig;
|
||||
};
|
||||
// ! [configuration:header]
|
||||
|
||||
|
||||
@@ -16,40 +16,31 @@
|
||||
#include <threading/ie_executor_manager.hpp>
|
||||
#include <details/ie_cnn_network_tools.h>
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
|
||||
#include <transformations/common_optimizations/common_optimizations.hpp>
|
||||
|
||||
#include "template/template_config.hpp"
|
||||
#include "template_plugin.hpp"
|
||||
#include "template_executable_network.hpp"
|
||||
#include "template_pattern_transformation.hpp"
|
||||
|
||||
using namespace TemplatePlugin;
|
||||
|
||||
// ! [executable_network:ctor_cnnnetwork]
|
||||
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(InferenceEngine::ICNNNetwork& network,
|
||||
const Configuration& cfg):
|
||||
_name(network.getName()),
|
||||
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(const std::shared_ptr<ngraph::Function>& function,
|
||||
const Configuration& cfg,
|
||||
const Plugin::Ptr& plugin) :
|
||||
InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, nullptr), // Disable default threads creation
|
||||
_cfg(cfg),
|
||||
_waitExecutor(InferenceEngine::ExecutorManager::getInstance()->getExecutor("Template")) {
|
||||
_plugin(plugin),
|
||||
_function(function) {
|
||||
// TODO: if your plugin supports device ID (more that single instance of device can be on host machine)
|
||||
// you should select proper device based on KEY_DEVICE_ID or automatic behavior
|
||||
// In this case, _waitExecutor should also be created per device.
|
||||
|
||||
try {
|
||||
if (std::shared_ptr<const ngraph::Function> ngraphFunction = network.getFunction()) {
|
||||
CompileGraph(ngraphFunction);
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << "TEMPLATE plugin can compile only IR v10 networks";
|
||||
}
|
||||
}
|
||||
catch (const InferenceEngineException & e) {
|
||||
throw e;
|
||||
}
|
||||
catch (const std::exception & e) {
|
||||
CompileGraph();
|
||||
InitExecutor();
|
||||
} catch (const InferenceEngineException&) {
|
||||
throw;
|
||||
} catch (const std::exception & e) {
|
||||
THROW_IE_EXCEPTION << "Standard exception from compilation library: " << e.what();
|
||||
}
|
||||
catch (...) {
|
||||
} catch (...) {
|
||||
THROW_IE_EXCEPTION << "Generic exception is thrown";
|
||||
}
|
||||
}
|
||||
@@ -57,53 +48,53 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(InferenceEngine::ICNNNetwor
|
||||
|
||||
// ! [executable_network:ctor_import_stream]
|
||||
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream & model,
|
||||
const Configuration& cfg) :
|
||||
_cfg(cfg) {
|
||||
const Configuration& cfg,
|
||||
const Plugin::Ptr& plugin) :
|
||||
_cfg(cfg),
|
||||
_plugin(plugin) {
|
||||
// TODO: since Import network is not a mandatory functionality, this ctor can just be removed
|
||||
}
|
||||
// ! [executable_network:ctor_import_stream]
|
||||
|
||||
// ! [executable_network:compile_graph]
|
||||
void TemplatePlugin::ExecutableNetwork::CompileGraph(const std::shared_ptr<const ngraph::Function> & ngraphFunction) {
|
||||
void TemplatePlugin::ExecutableNetwork::CompileGraph() {
|
||||
// TODO: perform actual graph compilation taking `_cfg` into account
|
||||
|
||||
// 1.Copy ngraph::Function first to apply some transformations later in
|
||||
// ExecutableNetwork::CompileGraph, which modify original ngraph::Function
|
||||
const bool shareConsts = false, constFolding = false;
|
||||
std::vector<::ngraph::element::Type> new_types;
|
||||
std::vector<::ngraph::PartialShape> new_shapes;
|
||||
|
||||
for (const auto ¶meter : ngraphFunction->get_parameters()) {
|
||||
new_shapes.emplace_back(parameter->get_partial_shape());
|
||||
new_types.emplace_back(parameter->get_element_type());
|
||||
// Generate backend specific blob mappings. For example Inference Engine uses not ngraph::Result nodes friendly name
|
||||
// as inference request output names but the name of the layer before.
|
||||
for (auto&& result : _function->get_results()) {
|
||||
auto previousOutput = result->get_input_source_output(0);
|
||||
auto outputName = previousOutput.get_node()->get_friendly_name();
|
||||
if (previousOutput.get_node()->get_output_size() > 1) {
|
||||
outputName += '.' + std::to_string(previousOutput.get_index());
|
||||
}
|
||||
_outputIndex.emplace(outputName, _function->get_result_index(result));
|
||||
}
|
||||
for (auto&& parameter : _function->get_parameters()) {
|
||||
_inputIndex.emplace(parameter->get_friendly_name(), _function->get_parameter_index(parameter));
|
||||
}
|
||||
|
||||
auto copyFunction = ngraph::specialize_function(std::const_pointer_cast<ngraph::Function>(ngraphFunction),
|
||||
new_types, new_shapes, std::vector<void *>(new_types.size(), nullptr), constFolding, shareConsts);
|
||||
|
||||
// 2. Perform common optimizations and device-specific transformations
|
||||
ngraph::pass::Manager passManager;
|
||||
// Example: register CommonOptimizations transformation from transformations library
|
||||
passManager.register_pass<ngraph::pass::CommonOptimizations>();
|
||||
// Example: register plugin specific transformation
|
||||
passManager.register_pass<ngraph::pass::DecomposeDivideMatcher>();
|
||||
passManager.register_pass<ngraph::pass::ReluReluFusionMatcher>();
|
||||
// Register any other transformations
|
||||
// ..
|
||||
|
||||
// After `run_passes`, we have the transformed function, where operations match device operations,
|
||||
// and we can create device hardware-dependent graph
|
||||
passManager.run_passes(copyFunction);
|
||||
|
||||
// 3. Iterate over operations and create hardware-specific ngraph
|
||||
for (const auto& op : copyFunction->get_ordered_ops()) {
|
||||
// TODO: map ngraph `op` to device operation
|
||||
}
|
||||
|
||||
// 4. Perform any other steps like allocation and filling device buffers, and so on
|
||||
// Perform any other steps like allocation and filling device buffers, and so on
|
||||
}
|
||||
// ! [executable_network:compile_graph]
|
||||
|
||||
// ! [executable_network:init_executor]
|
||||
void TemplatePlugin::ExecutableNetwork::InitExecutor() {
|
||||
// Default mutlitthreaded configuration is balanced for throughtput and latency cases and takes into account
|
||||
// real hardware cores and NUMA nodes.
|
||||
auto streamsExecutorConfig = InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg._streamsExecutorConfig);
|
||||
streamsExecutorConfig._name = "TemplateStreamsExecutor";
|
||||
// As Inference Engine CPU Streams Executor creates some additional therads
|
||||
// it is better to avoid threads recreateion as some OSs memory allocator can not manage such usage cases
|
||||
// and memory consumption can be larger than it is expected.
|
||||
// So Inference Engone provides executors cache.
|
||||
_taskExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig);
|
||||
// NOTE: callback Executor is not configured. So callback will be called in the thread of tha last stage of inference request pipeline
|
||||
// _callbackExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateCallbackExecutor"});
|
||||
}
|
||||
// ! [executable_network:init_executor]
|
||||
|
||||
|
||||
// ! [executable_network:create_infer_request_impl]
|
||||
InferenceEngine::InferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
|
||||
InferenceEngine::OutputsDataMap networkOutputs) {
|
||||
@@ -115,7 +106,7 @@ InferenceEngine::InferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::Cr
|
||||
void TemplatePlugin::ExecutableNetwork::CreateInferRequest(IInferRequest::Ptr& asyncRequest) {
|
||||
auto internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs);
|
||||
auto asyncThreadSafeImpl = std::make_shared<TemplateAsyncInferRequest>(std::static_pointer_cast<TemplateInferRequest>(internalRequest),
|
||||
_taskExecutor, _waitExecutor, _callbackExecutor);
|
||||
_taskExecutor, _plugin->_waitExecutor, _callbackExecutor);
|
||||
asyncRequest.reset(new InferenceEngine::InferRequestBase<TemplateAsyncInferRequest>(asyncThreadSafeImpl),
|
||||
[](InferenceEngine::IInferRequest *p) { p->Release(); });
|
||||
asyncThreadSafeImpl->SetPointerToPublicInterface(asyncRequest);
|
||||
@@ -124,13 +115,7 @@ void TemplatePlugin::ExecutableNetwork::CreateInferRequest(IInferRequest::Ptr& a
|
||||
|
||||
// ! [executable_network:get_config]
|
||||
void TemplatePlugin::ExecutableNetwork::GetConfig(const std::string &name, Parameter &result, ResponseDesc *resp) const {
|
||||
// TODO: return more supported values for config keys
|
||||
if (name == CONFIG_KEY(DEVICE_ID) ||
|
||||
name == CONFIG_KEY(PERF_COUNT)) {
|
||||
result = _cfg.Get(name);
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork config key: " << name;
|
||||
}
|
||||
result = _cfg.Get(name);
|
||||
}
|
||||
// ! [executable_network:get_config]
|
||||
|
||||
@@ -144,14 +129,20 @@ void TemplatePlugin::ExecutableNetwork::GetMetric(const std::string &name, Infer
|
||||
METRIC_KEY(SUPPORTED_CONFIG_KEYS),
|
||||
METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)});
|
||||
} else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
|
||||
result = IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, std::vector<std::string>{
|
||||
std::vector<std::string> configKeys = {
|
||||
CONFIG_KEY(DEVICE_ID),
|
||||
CONFIG_KEY(PERF_COUNT)});
|
||||
CONFIG_KEY(PERF_COUNT),
|
||||
TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS) };
|
||||
auto streamExecutorConfigKeys = IStreamsExecutor::Config{}.SupportedKeys();
|
||||
for (auto&& configKey : streamExecutorConfigKeys) {
|
||||
configKeys.emplace_back(configKey);
|
||||
}
|
||||
result = IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, configKeys);
|
||||
} else if (METRIC_KEY(NETWORK_NAME) == name) {
|
||||
result = IE_SET_METRIC(NETWORK_NAME, _name);
|
||||
auto networkName = _function->get_friendly_name();
|
||||
result = IE_SET_METRIC(NETWORK_NAME, networkName);
|
||||
} else if (METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS) == name) {
|
||||
// TODO: fill with actual number
|
||||
unsigned int value = 1;
|
||||
unsigned int value = _cfg._streamsExecutorConfig._streams;
|
||||
result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, value);
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork metric: " << name;
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
|
||||
namespace TemplatePlugin {
|
||||
|
||||
class Engine;
|
||||
class Plugin;
|
||||
|
||||
/**
|
||||
* @class ExecutableNetwork
|
||||
@@ -36,11 +36,13 @@ class Engine;
|
||||
// ! [executable_network:header]
|
||||
class ExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
|
||||
public:
|
||||
ExecutableNetwork(InferenceEngine::ICNNNetwork& network,
|
||||
const Configuration& cfg);
|
||||
ExecutableNetwork(const std::shared_ptr<ngraph::Function>& function,
|
||||
const Configuration& cfg,
|
||||
const std::shared_ptr<Plugin>& plugin);
|
||||
|
||||
ExecutableNetwork(std::istream & model,
|
||||
const Configuration& cfg);
|
||||
ExecutableNetwork(std::istream& model,
|
||||
const Configuration& cfg,
|
||||
const std::shared_ptr<Plugin>& plugin);
|
||||
|
||||
~ExecutableNetwork() override = default;
|
||||
|
||||
@@ -53,15 +55,18 @@ public:
|
||||
void GetMetric(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override;
|
||||
void GetConfig(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override;
|
||||
|
||||
std::atomic<std::size_t> _requestId = {0};
|
||||
std::string _name;
|
||||
Configuration _cfg;
|
||||
|
||||
private:
|
||||
void CompileGraph(const std::shared_ptr<const ngraph::Function> & ngraphFunction);
|
||||
friend class TemplateInferRequest;
|
||||
|
||||
std::shared_ptr<Engine> _plugin;
|
||||
InferenceEngine::ITaskExecutor::Ptr _waitExecutor;
|
||||
void CompileGraph();
|
||||
void InitExecutor();
|
||||
|
||||
std::atomic<std::size_t> _requestId = {0};
|
||||
Configuration _cfg;
|
||||
std::shared_ptr<Plugin> _plugin;
|
||||
std::shared_ptr<ngraph::Function> _function;
|
||||
std::map<std::string, std::size_t> _inputIndex;
|
||||
std::map<std::string, std::size_t> _outputIndex;
|
||||
};
|
||||
// ! [executable_network:header]
|
||||
|
||||
|
||||
@@ -18,17 +18,16 @@
|
||||
#include <ie_parallel.hpp>
|
||||
#include <ie_memcpy.h>
|
||||
#include <precision_utils.h>
|
||||
#include <template/template_config.hpp>
|
||||
|
||||
#include "template/template_config.hpp"
|
||||
#include "template_infer_request.hpp"
|
||||
#include "template_executable_network.hpp"
|
||||
#include "template_plugin.hpp"
|
||||
|
||||
using namespace TemplatePlugin;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
using Time = std::chrono::high_resolution_clock;
|
||||
using ns = std::chrono::nanoseconds;
|
||||
using fsec = std::chrono::duration<float>;
|
||||
|
||||
// ! [infer_request:ctor]
|
||||
TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
|
||||
@@ -38,10 +37,9 @@ TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap&
|
||||
_executableNetwork(executableNetwork) {
|
||||
// TODO: allocate infer request device and host buffers if needed, fill actual list of profiling tasks
|
||||
|
||||
auto requestID = std::to_string(_executableNetwork->_requestId);
|
||||
_executableNetwork->_requestId++;
|
||||
auto requestID = std::to_string(_executableNetwork->_requestId.fetch_add(1));
|
||||
|
||||
std::string name = _executableNetwork->_name + "_Req" + requestID;
|
||||
std::string name = _executableNetwork->_function->get_friendly_name() + "_Req" + requestID;
|
||||
_profilingTask = { {
|
||||
{ ProfilingTask("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_Preprocess") },
|
||||
{ ProfilingTask("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_Postprocess") },
|
||||
@@ -49,9 +47,12 @@ TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap&
|
||||
{ ProfilingTask("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_WaitPipline") },
|
||||
} };
|
||||
|
||||
_executable = _executableNetwork->_plugin->_backend->compile(_executableNetwork->_function);
|
||||
_parameters = _executableNetwork->_function->get_parameters();
|
||||
_results = _executableNetwork->_function->get_results();
|
||||
|
||||
allocateDeviceBuffers();
|
||||
allocateInputBlobs();
|
||||
allocateOutputBlobs();
|
||||
allocateBlobs();
|
||||
}
|
||||
// ! [infer_request:ctor]
|
||||
|
||||
@@ -62,92 +63,66 @@ TemplateInferRequest::~TemplateInferRequest() {
|
||||
// ! [infer_request:dtor]
|
||||
|
||||
void TemplateInferRequest::allocateDeviceBuffers() {
|
||||
// TODO: allocate device buffers if Template device is a remote one
|
||||
// Allocate plugin backend specific memory handles
|
||||
_inputTensors.resize(_networkInputs.size());
|
||||
_outputTensors.resize(_networkOutputs.size());
|
||||
}
|
||||
|
||||
void TemplateInferRequest::allocateInputBlobs() {
|
||||
for (auto &networkInput : _networkInputs) {
|
||||
SizeVector dims = networkInput.second->getTensorDesc().getDims();
|
||||
Precision precision = networkInput.second->getTensorDesc().getPrecision();
|
||||
Layout input_layout = networkInput.second->getInputData()->getLayout();
|
||||
Blob::Ptr inputBlob;
|
||||
Blob::Ptr inputBlobNCHW;
|
||||
template<typename BlobDataMap, typename GetNetworkPrecisionF>
|
||||
static void AllocateImpl(const BlobDataMap& blobDataMap,
|
||||
BlobMap& blobMap,
|
||||
BlobMap& networkBlobMap,
|
||||
GetNetworkPrecisionF&& GetNetworkPrecision) {
|
||||
for (auto&& blobData : blobDataMap) {
|
||||
auto& dims = blobData.second->getTensorDesc().getDims();
|
||||
auto& precision = blobData.second->getTensorDesc().getPrecision();
|
||||
auto layout = blobData.second->getTensorDesc().getLayout();
|
||||
Blob::Ptr blob;
|
||||
switch (precision) {
|
||||
case Precision::FP32 :
|
||||
inputBlobNCHW = inputBlob = InferenceEngine::make_shared_blob<float>({ precision, dims, input_layout });
|
||||
if (input_layout == Layout::NHWC) {
|
||||
inputBlobNCHW = InferenceEngine::make_shared_blob<float>({ precision, dims, Layout::NCHW });
|
||||
}
|
||||
break;
|
||||
case Precision::FP16 :
|
||||
case Precision::I16 :
|
||||
inputBlobNCHW = inputBlob = InferenceEngine::make_shared_blob<int16_t>({ precision, dims, input_layout });
|
||||
if (input_layout == Layout::NHWC) {
|
||||
inputBlobNCHW = InferenceEngine::make_shared_blob<int16_t>({ precision, dims, Layout::NCHW });
|
||||
}
|
||||
break;
|
||||
case Precision::U8 :
|
||||
inputBlobNCHW = inputBlob = InferenceEngine::make_shared_blob<uint8_t>({ precision, dims, input_layout });
|
||||
if (input_layout == Layout::NHWC) {
|
||||
inputBlobNCHW = InferenceEngine::make_shared_blob<uint8_t>({ precision, dims, Layout::NCHW });
|
||||
}
|
||||
break;
|
||||
default:
|
||||
THROW_IE_EXCEPTION << "Unsupported network precision: " << precision
|
||||
<< precision << "! Supported precisions are: FP32, FP16, I16, U8";
|
||||
case Precision::U8: {
|
||||
blob = InferenceEngine::make_shared_blob<std::uint8_t>({precision, dims, layout});
|
||||
} break;
|
||||
case Precision::FP32 : {
|
||||
blob = InferenceEngine::make_shared_blob<float>({precision, dims, layout});
|
||||
} break;
|
||||
default: THROW_IE_EXCEPTION << "Template Plugin: Unsupported Input/Output Presision";
|
||||
}
|
||||
// allocate the input blob
|
||||
inputBlob->allocate();
|
||||
_inputs[networkInput.first] = inputBlob;
|
||||
if (inputBlobNCHW != inputBlob) {
|
||||
inputBlobNCHW->allocate();
|
||||
blob->allocate();
|
||||
blobMap[blobData.first] = blob;
|
||||
|
||||
auto networkPresion = GetNetworkPrecision(blobData.first);
|
||||
Blob::Ptr networkBlob;
|
||||
switch (networkPresion) {
|
||||
case ngraph::element::Type_t::f32 : {
|
||||
if (precision == Precision::FP32) {
|
||||
networkBlob = blob;
|
||||
} else {
|
||||
networkBlob = InferenceEngine::make_shared_blob<float>({Precision::FP32, dims, layout});
|
||||
}
|
||||
} break;
|
||||
default: THROW_IE_EXCEPTION << "Template Plugin: Unsupported network Input/Output Presision";
|
||||
}
|
||||
_inputsNCHW[networkInput.first] = inputBlobNCHW;
|
||||
if (blob != networkBlob) {
|
||||
networkBlob->allocate();
|
||||
}
|
||||
networkBlobMap[blobData.first] = networkBlob;
|
||||
}
|
||||
}
|
||||
|
||||
void TemplateInferRequest::allocateOutputBlobs() {
|
||||
for (auto &networkOutput : _networkOutputs) {
|
||||
SizeVector dims = networkOutput.second->getTensorDesc().getDims();
|
||||
Precision precision = networkOutput.second->getPrecision();
|
||||
Blob::Ptr outputBlob;
|
||||
|
||||
// allocate the output blob
|
||||
Blob::Ptr outputBlobNCHW;
|
||||
switch (precision) {
|
||||
case Precision::FP32 :
|
||||
outputBlobNCHW = outputBlob = InferenceEngine::make_shared_blob<float>({ precision, dims, networkOutput.second->getLayout() });
|
||||
if (networkOutput.second->getLayout() == Layout::NHWC) {
|
||||
outputBlobNCHW = InferenceEngine::make_shared_blob<float>({ precision, dims, Layout::NCHW });
|
||||
}
|
||||
break;
|
||||
case Precision::FP16 :
|
||||
outputBlobNCHW = outputBlob = InferenceEngine::make_shared_blob<int16_t>({ precision, dims, networkOutput.second->getLayout() });
|
||||
if (networkOutput.second->getLayout() == Layout::NHWC) {
|
||||
outputBlobNCHW = InferenceEngine::make_shared_blob<int16_t>({ precision, dims, Layout::NCHW });
|
||||
}
|
||||
break;
|
||||
default:
|
||||
THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported output precision: "
|
||||
<< precision << "! Supported precisions are: FP32, FP16";
|
||||
}
|
||||
// allocate the output blob
|
||||
outputBlob->allocate();
|
||||
_outputs[networkOutput.first] = outputBlob;
|
||||
if (outputBlobNCHW != outputBlob) {
|
||||
outputBlobNCHW->allocate();
|
||||
}
|
||||
_outputsNCHW[networkOutput.first] = outputBlobNCHW;
|
||||
}
|
||||
|
||||
if (_networkOutputs.empty() || _networkInputs.empty()) {
|
||||
THROW_IE_EXCEPTION << "Internal error: no information about network's output/input";
|
||||
}
|
||||
void TemplateInferRequest::allocateBlobs() {
|
||||
auto&& parameters = _executableNetwork->_function->get_parameters();
|
||||
AllocateImpl(_networkInputs, _inputs, _networkInputBlobs, [&] (const std::string& blobName) {
|
||||
return parameters.at(_executableNetwork->_inputIndex.at(blobName))->get_element_type();
|
||||
});
|
||||
auto&& results = _executableNetwork->_function->get_results();
|
||||
AllocateImpl(_networkOutputs, _outputs, _networkOutputBlobs, [&] (const std::string& blobName) {
|
||||
return results.at(_executableNetwork->_outputIndex.at(blobName))->get_element_type();
|
||||
});
|
||||
}
|
||||
|
||||
// ! [infer_request:infer_impl]
|
||||
void TemplateInferRequest::InferImpl() {
|
||||
// TODO: fill with actual list of pipeline stages, which are executed syncronously for sync infer requests
|
||||
// TODO: fill with actual list of pipeline stages, which are executed synchronously for sync infer requests
|
||||
inferPreprocess();
|
||||
startPipeline();
|
||||
waitPipeline();
|
||||
@@ -155,50 +130,109 @@ void TemplateInferRequest::InferImpl() {
|
||||
}
|
||||
// ! [infer_request:infer_impl]
|
||||
|
||||
// ! [infer_request:infer_preprocess]
|
||||
void TemplateInferRequest::inferPreprocess() {
|
||||
auto prev = Time::now();
|
||||
template<typename SrcT, typename DstT>
|
||||
static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
|
||||
std::copy_n(InferenceEngine::as<InferenceEngine::MemoryBlob>(src)->rmap().as<const SrcT*>(),
|
||||
src->size(),
|
||||
InferenceEngine::as<InferenceEngine::MemoryBlob>(dst)->wmap().as<DstT*>());
|
||||
}
|
||||
|
||||
// execute input pre-processing.
|
||||
InferRequestInternal::execDataPreprocessing(_inputs);
|
||||
|
||||
for (auto &input : InferRequestInternal::_inputs) {
|
||||
auto& src = input.second;
|
||||
auto& dst = _inputsNCHW[input.first];
|
||||
if (src != dst) {
|
||||
if (src->getTensorDesc().getPrecision() == dst->getTensorDesc().getPrecision()
|
||||
&& src->getTensorDesc().getDims() == dst->getTensorDesc().getDims()
|
||||
&& src->getTensorDesc().getLayout() == dst->getTensorDesc().getLayout()) {
|
||||
_inputsNCHW[input.first] = input.second;
|
||||
} else { // Convert Layout to NCHW
|
||||
InferenceEngine::blob_copy(src, dst);
|
||||
static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
|
||||
switch (src->getTensorDesc().getPrecision()) {
|
||||
case Precision::U8 : {
|
||||
switch (dst->getTensorDesc().getPrecision()) {
|
||||
case Precision::U8 : break;
|
||||
case Precision::FP32 : {
|
||||
blobCopy<std::uint8_t, float>(src, dst);
|
||||
} break;
|
||||
default : {
|
||||
THROW_IE_EXCEPTION << "Unsupported precision conversion from "
|
||||
<< src->getTensorDesc().getPrecision() <<" to " << dst->getTensorDesc().getPrecision();
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case Precision::FP32 : {
|
||||
switch (dst->getTensorDesc().getPrecision()) {
|
||||
case Precision::FP32 : break;
|
||||
case Precision::U8 : {
|
||||
blobCopy<float, std::uint8_t>(src, dst);
|
||||
} break;
|
||||
default : {
|
||||
THROW_IE_EXCEPTION << "Unsupported precision conversion from "
|
||||
<< src->getTensorDesc().getPrecision() <<" to " << dst->getTensorDesc().getPrecision();
|
||||
}
|
||||
}
|
||||
} break;
|
||||
default : {
|
||||
THROW_IE_EXCEPTION << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Preprocessing on inputs if needed: work _inputsNCHW
|
||||
|
||||
_inputPreprocessTime = static_cast<double>(std::chrono::duration_cast<ns>(Time::now() - prev).count());
|
||||
// ! [infer_request:infer_preprocess]
|
||||
void TemplateInferRequest::inferPreprocess() {
|
||||
IE_PROFILING_AUTO_SCOPE_TASK(_profilingTask[Preprocess]);
|
||||
auto start = Time::now();
|
||||
// NOTE: After InferRequestInternal::execDataPreprocessing call
|
||||
// input can points to other memory region than it was allocated in constructor.
|
||||
InferRequestInternal::execDataPreprocessing(_inputs);
|
||||
for (auto&& input : _inputs) {
|
||||
auto inputBlob = input.second;
|
||||
auto networkInput = _networkInputBlobs[input.first];
|
||||
if (inputBlob->getTensorDesc().getPrecision() == networkInput->getTensorDesc().getPrecision()) {
|
||||
networkInput = inputBlob;
|
||||
} else {
|
||||
blobCopy(inputBlob, networkInput);
|
||||
}
|
||||
auto index = _executableNetwork->_inputIndex[input.first];
|
||||
const auto& parameter = _parameters[index];
|
||||
const auto& parameterShape = parameter->get_shape();
|
||||
const auto& parameterType = parameter->get_element_type();
|
||||
_inputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(parameterType, parameterShape,
|
||||
InferenceEngine::as<InferenceEngine::MemoryBlob>(networkInput)->rmap().as<void*>());
|
||||
}
|
||||
for (auto&& output : _outputs) {
|
||||
auto outputBlob = output.second;
|
||||
auto networkOutput = _networkOutputBlobs[output.first];
|
||||
auto index = _executableNetwork->_outputIndex[output.first];
|
||||
if (outputBlob->getTensorDesc().getPrecision() == networkOutput->getTensorDesc().getPrecision()) {
|
||||
networkOutput = outputBlob;
|
||||
}
|
||||
const auto& result = _results[index];
|
||||
const auto& resultShape = result->get_shape();
|
||||
const auto& resultType = result->get_element_type();
|
||||
_outputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(resultType, resultShape,
|
||||
InferenceEngine::as<InferenceEngine::MemoryBlob>(networkOutput)->wmap().as<void*>());
|
||||
}
|
||||
_durations[Preprocess] = Time::now() - start;
|
||||
}
|
||||
// ! [infer_request:infer_preprocess]
|
||||
|
||||
void TemplateInferRequest::startPipeline() {
|
||||
IE_PROFILING_AUTO_SCOPE_TASK(_profilingTask[StartPipeline])
|
||||
// TODO: Start pipeline and fill _inputTransferTime, _executeTime, _outputTransferTime
|
||||
auto start = Time::now();
|
||||
_executable->call(_outputTensors, _inputTensors);
|
||||
_durations[StartPipeline] = Time::now() - start;
|
||||
}
|
||||
|
||||
void TemplateInferRequest::waitPipeline() {
|
||||
IE_PROFILING_AUTO_SCOPE_TASK(_profilingTask[WaitPipeline])
|
||||
auto prev = Time::now();
|
||||
// TODO: Wait pipeline using driver API or other synronizations methods
|
||||
_inputPreprocessTime = static_cast<double>(std::chrono::duration_cast<ns>(Time::now() - prev).count());
|
||||
auto start = Time::now();
|
||||
// TODO: Wait pipeline using driver API or other synchronizations methods
|
||||
_durations[WaitPipeline] = Time::now() - start;
|
||||
}
|
||||
|
||||
void TemplateInferRequest::inferPostprocess() {
|
||||
IE_PROFILING_AUTO_SCOPE_TASK(_profilingTask[Postprocess])
|
||||
auto prev = Time::now();
|
||||
// TODO: perform post-processing and convert to NHWC layout
|
||||
_outputPostProcessTime = static_cast<double>(std::chrono::duration_cast<ns>(Time::now() - prev).count());
|
||||
IE_PROFILING_AUTO_SCOPE_TASK(_profilingTask[Postprocess]);
|
||||
auto start = Time::now();
|
||||
for (auto&& output : _outputs) {
|
||||
auto outputBlob = output.second;
|
||||
auto networkOutput = _networkOutputBlobs[output.first];
|
||||
if (outputBlob->getTensorDesc().getPrecision() != networkOutput->getTensorDesc().getPrecision()) {
|
||||
blobCopy(networkOutput, outputBlob);
|
||||
}
|
||||
}
|
||||
_durations[Postprocess] = Time::now() - start;
|
||||
}
|
||||
|
||||
// ! [infer_request:get_performance_counts]
|
||||
@@ -206,18 +240,19 @@ void TemplateInferRequest::GetPerformanceCounts(std::map<std::string, InferenceE
|
||||
InferenceEngineProfileInfo info;
|
||||
info.execution_index = 0;
|
||||
info.status = InferenceEngineProfileInfo::EXECUTED;
|
||||
info.cpu_uSec = info.realTime_uSec = _inputPreprocessTime / 1000;
|
||||
info.cpu_uSec = info.realTime_uSec = _durations[Preprocess].count();
|
||||
perfMap["1. input preprocessing"] = info;
|
||||
info.cpu_uSec = 0;
|
||||
info.realTime_uSec = _inputTransferTime / 1000;
|
||||
info.realTime_uSec = 0;
|
||||
perfMap["2. input transfer to a device"] = info;
|
||||
info.cpu_uSec = 0;
|
||||
info.realTime_uSec = _executeTime / 1000;
|
||||
info.status = InferenceEngineProfileInfo::EXECUTED;
|
||||
info.cpu_uSec = info.realTime_uSec = _durations[StartPipeline].count();
|
||||
perfMap["3. execution time"] = info;
|
||||
info.cpu_uSec = 0;
|
||||
info.realTime_uSec = _outputTransferTime / 1000;
|
||||
info.realTime_uSec = 0;
|
||||
perfMap["4. output transfer from a device"] = info;
|
||||
info.cpu_uSec = info.realTime_uSec = _outputPostProcessTime / 1000;
|
||||
info.cpu_uSec = info.realTime_uSec = _durations[Postprocess].count();
|
||||
perfMap["5. output postprocessing"] = info;
|
||||
}
|
||||
// ! [infer_request:get_performance_counts]
|
||||
|
||||
@@ -17,8 +17,13 @@
|
||||
#include <cpp_interfaces/impl/ie_executable_network_internal.hpp>
|
||||
#include <threading/ie_itask_executor.hpp>
|
||||
|
||||
#include <ngraph/runtime/tensor.hpp>
|
||||
#include <ngraph/runtime/tensor.hpp>
|
||||
#include <executable.hpp>
|
||||
|
||||
#include "template_config.hpp"
|
||||
|
||||
|
||||
namespace TemplatePlugin {
|
||||
|
||||
class ExecutableNetwork;
|
||||
@@ -46,8 +51,7 @@ public:
|
||||
|
||||
private:
|
||||
void allocateDeviceBuffers();
|
||||
void allocateInputBlobs();
|
||||
void allocateOutputBlobs();
|
||||
void allocateBlobs();
|
||||
|
||||
enum {
|
||||
Preprocess,
|
||||
@@ -57,17 +61,18 @@ private:
|
||||
numOfStages
|
||||
};
|
||||
|
||||
std::array<InferenceEngine::ProfilingTask, numOfStages> _profilingTask;
|
||||
std::array<InferenceEngine::ProfilingTask, numOfStages> _profilingTask;
|
||||
// for performance counters
|
||||
std::array<std::chrono::duration<float, std::micro>, numOfStages> _durations;
|
||||
|
||||
InferenceEngine::BlobMap _inputsNCHW;
|
||||
InferenceEngine::BlobMap _outputsNCHW;
|
||||
InferenceEngine::BlobMap _networkInputBlobs;
|
||||
InferenceEngine::BlobMap _networkOutputBlobs;
|
||||
ngraph::ParameterVector _parameters;
|
||||
ngraph::ResultVector _results;
|
||||
|
||||
// for performance counts
|
||||
double _inputPreprocessTime = 0.0;
|
||||
double _inputTransferTime = 0.0;
|
||||
double _executeTime = 0.0;
|
||||
double _outputTransferTime = 0.0;
|
||||
double _outputPostProcessTime = 0.0;
|
||||
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> _inputTensors;
|
||||
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> _outputTensors;
|
||||
std::shared_ptr<ngraph::runtime::Executable> _executable;
|
||||
};
|
||||
// ! [infer_request:header]
|
||||
|
||||
|
||||
@@ -24,11 +24,17 @@
|
||||
#include <ie_input_info.hpp>
|
||||
#include <ie_layouts.h>
|
||||
#include <hetero/hetero_plugin_config.hpp>
|
||||
#include <template/template_config.hpp>
|
||||
|
||||
#include <backend.hpp>
|
||||
#include <ngraph/specialize_function.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
#include <ngraph/opsets/opset.hpp>
|
||||
#include <transformations/common_optimizations/common_optimizations.hpp>
|
||||
#include <transformations/rt_info/fused_names_attribute.hpp>
|
||||
#include "template/template_config.hpp"
|
||||
#include "template_plugin.hpp"
|
||||
#include "template_executable_network.hpp"
|
||||
#include "template_infer_request.hpp"
|
||||
#include "template_pattern_transformation.hpp"
|
||||
|
||||
using namespace TemplatePlugin;
|
||||
|
||||
@@ -36,9 +42,61 @@ using namespace TemplatePlugin;
|
||||
Plugin::Plugin() {
|
||||
// TODO: fill with actual device name
|
||||
_pluginName = "TEMPLATE";
|
||||
ngraph::runtime::Backend::set_backend_shared_library_search_directory("");
|
||||
_backend = ngraph::runtime::Backend::create("INTERPRETER");
|
||||
_waitExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateWaitExecutor"});
|
||||
}
|
||||
// ! [plugin:ctor]
|
||||
|
||||
// ! [plugin:dtor]
|
||||
Plugin::~Plugin() {
|
||||
// Plugin should remove executors from executor cache to avoid threads number growth in the whole application
|
||||
ExecutorManager::getInstance()->clear("TemplateStreamsExecutor");
|
||||
ExecutorManager::getInstance()->clear("TemplateWaitExecutor");
|
||||
// NOTE: Uncomment this if Inference Engine Executor cache is used to create callback executor
|
||||
// ExecutorManager::getInstance()->clear("TemplateCallbackExecutor");
|
||||
}
|
||||
// ! [plugin:dtor]
|
||||
|
||||
// ! [plugin:transform]
|
||||
std::shared_ptr<ngraph::Function> Plugin::Transform(const std::shared_ptr<const ngraph::Function>& function) {
|
||||
// 1.Copy ngraph::Function first to apply some transformations which modify original ngraph::Function
|
||||
const bool shareConsts = false, constFolding = false;
|
||||
std::vector<::ngraph::element::Type> new_types;
|
||||
std::vector<::ngraph::PartialShape> new_shapes;
|
||||
|
||||
for (const auto ¶meter : function->get_parameters()) {
|
||||
new_shapes.emplace_back(parameter->get_partial_shape());
|
||||
new_types.emplace_back(parameter->get_element_type());
|
||||
}
|
||||
|
||||
auto copyFunction = ngraph::specialize_function(std::const_pointer_cast<ngraph::Function>(function),
|
||||
new_types, new_shapes, std::vector<void *>(new_types.size(), nullptr), constFolding, shareConsts);
|
||||
|
||||
copyFunction->set_friendly_name(function->get_friendly_name());
|
||||
|
||||
// 2. Perform common optimizations and device-specific transformations
|
||||
ngraph::pass::Manager passManager;
|
||||
// Example: register CommonOptimizations transformation from transformations library
|
||||
passManager.register_pass<ngraph::pass::CommonOptimizations>();
|
||||
// Example: register plugin specific transformation
|
||||
passManager.register_pass<ngraph::pass::DecomposeDivideMatcher>();
|
||||
passManager.register_pass<ngraph::pass::ReluReluFusionMatcher>();
|
||||
// Register any other transformations
|
||||
// ..
|
||||
|
||||
// After `run_passes`, we have the transformed function, where operations match device operations,
|
||||
// and we can create device hardware-dependent graph
|
||||
passManager.run_passes(copyFunction);
|
||||
|
||||
// 3. Iterate over operations and create hardware-specific ngraph
|
||||
for (const auto& op : copyFunction->get_ordered_ops()) {
|
||||
// TODO: map ngraph `op` to device operation
|
||||
}
|
||||
return copyFunction;
|
||||
}
|
||||
// ! [plugin:transform]
|
||||
|
||||
// ! [plugin:load_exe_network_impl]
|
||||
InferenceEngine::ExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork & network,
|
||||
const ConfigMap &config) {
|
||||
@@ -72,9 +130,12 @@ InferenceEngine::ExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const
|
||||
}
|
||||
}
|
||||
|
||||
auto clonedNetwork = cloneNet(network);
|
||||
auto function = network.getFunction();
|
||||
if (function == nullptr) {
|
||||
THROW_IE_EXCEPTION << "TEMPLATE plugin can compile only IR v10 networks";
|
||||
}
|
||||
|
||||
return std::make_shared<ExecutableNetwork>(*clonedNetwork, cfg);
|
||||
return std::make_shared<ExecutableNetwork>(Transform(function), cfg, std::static_pointer_cast<Plugin>(shared_from_this()));
|
||||
}
|
||||
// ! [plugin:load_exe_network_impl]
|
||||
|
||||
@@ -90,7 +151,7 @@ InferenceEngine::ExecutableNetwork Plugin::ImportNetworkImpl(std::istream& model
|
||||
auto cfg = Configuration(config, exportedCfg);
|
||||
|
||||
IExecutableNetwork::Ptr executableNetwork;
|
||||
auto exec_network_impl = std::make_shared<ExecutableNetwork>(model, cfg);
|
||||
auto exec_network_impl = std::make_shared<ExecutableNetwork>(model, cfg, std::static_pointer_cast<Plugin>(shared_from_this()));
|
||||
executableNetwork.reset(new ExecutableNetworkBase<ExecutableNetworkInternal>(exec_network_impl),
|
||||
[](InferenceEngine::details::IRelease *p) {p->Release(); });
|
||||
|
||||
@@ -101,19 +162,42 @@ InferenceEngine::ExecutableNetwork Plugin::ImportNetworkImpl(std::istream& model
|
||||
// ! [plugin:query_network]
|
||||
void Plugin::QueryNetwork(const ICNNNetwork &network, const ConfigMap& config, QueryNetworkResult &res) const {
|
||||
Configuration cfg{config, _cfg, false};
|
||||
res.rc = StatusCode::OK;
|
||||
|
||||
if (std::shared_ptr<const ngraph::Function> ngraphFunction = network.getFunction()) {
|
||||
auto ops = ngraphFunction->get_ordered_ops();
|
||||
for (auto&& op : ops) {
|
||||
// TODO: investigate if an op is actually supported by Template device
|
||||
bool supported = true;
|
||||
if (supported) {
|
||||
res.supportedLayersMap.insert({ op->get_friendly_name(), GetName() });
|
||||
auto function = network.getFunction();
|
||||
if (function == nullptr) {
|
||||
THROW_IE_EXCEPTION << "Template Plugin supports only ngraph cnn network representation";
|
||||
}
|
||||
// First of all we should store initial input operation set
|
||||
std::unordered_set<std::string> originalOps;
|
||||
for (auto&& node : function->get_ops()) {
|
||||
originalOps.emplace(node->get_friendly_name());
|
||||
}
|
||||
// It is needed to apply all transformations as it is done in LoadExeNetworkImpl
|
||||
auto transformedFunction = Transform(function);
|
||||
// The same input node can be transformed into supported and unsupported backend node
|
||||
// So we need store as supported ether unsupported node sets
|
||||
std::unordered_set<std::string> supported;
|
||||
std::unordered_set<std::string> unsupported;
|
||||
auto opset = ngraph::get_opset4();
|
||||
for (auto&& node : transformedFunction->get_ops()) {
|
||||
if (!ngraph::op::is_constant(node) && !ngraph::op::is_parameter(node) && !ngraph::op::is_output(node)) {
|
||||
// Extract transformation history from transformed node as list of nodes
|
||||
for (auto&& fusedLayerName : ngraph::getFusedNamesVector(node)) {
|
||||
// Filter just nodes from original operation set
|
||||
if (contains(originalOps, fusedLayerName)) {
|
||||
if (opset.contains_type_insensitive(fusedLayerName)) {
|
||||
supported.emplace(fusedLayerName);
|
||||
} else {
|
||||
unsupported.emplace(fusedLayerName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << "TEMPLATE plugin can query only IR v10 networks";
|
||||
}
|
||||
// The result set should contains just nodes from supported set
|
||||
for (auto&& layerName : supported) {
|
||||
if (!contains(unsupported, layerName)) {
|
||||
res.supportedLayersMap.emplace(layerName, GetName());
|
||||
}
|
||||
}
|
||||
}
|
||||
// ! [plugin:query_network]
|
||||
@@ -148,10 +232,17 @@ InferenceEngine::Parameter Plugin::GetMetric(const std::string& name, const std:
|
||||
METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS) };
|
||||
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, supportedMetrics);
|
||||
} else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
|
||||
std::vector<std::string> confiKeys = {
|
||||
std::vector<std::string> configKeys = {
|
||||
CONFIG_KEY(DEVICE_ID),
|
||||
CONFIG_KEY(PERF_COUNT) };
|
||||
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, confiKeys);
|
||||
CONFIG_KEY(PERF_COUNT),
|
||||
TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)};
|
||||
auto streamExecutorConfigKeys = IStreamsExecutor::Config{}.SupportedKeys();
|
||||
for (auto&& configKey : streamExecutorConfigKeys) {
|
||||
if (configKey != InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) {
|
||||
configKeys.emplace_back(configKey);
|
||||
}
|
||||
}
|
||||
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
|
||||
} else if (METRIC_KEY(AVAILABLE_DEVICES) == name) {
|
||||
// TODO: fill list of available devices
|
||||
std::vector<std::string> availableDevices = { "" };
|
||||
@@ -161,7 +252,7 @@ InferenceEngine::Parameter Plugin::GetMetric(const std::string& name, const std:
|
||||
IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, name);
|
||||
} else if (METRIC_KEY(OPTIMIZATION_CAPABILITIES) == name) {
|
||||
// TODO: fill actual list of supported capabilities: e.g. Template device supports only FP32
|
||||
std::vector<std::string> capabilities = { METRIC_VALUE(FP32), TEMPLATE_METRIC_VALUE(HARDWARE_CONVOLUTION) };
|
||||
std::vector<std::string> capabilities = { METRIC_VALUE(FP32) /*, TEMPLATE_METRIC_VALUE(HARDWARE_CONVOLUTION)*/ };
|
||||
IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
|
||||
} else if (METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS) == name) {
|
||||
// TODO: fill with actual values
|
||||
|
||||
@@ -17,6 +17,8 @@
|
||||
#include "template_executable_network.hpp"
|
||||
#include "template_config.hpp"
|
||||
|
||||
#include "backend.hpp"
|
||||
|
||||
//! [plugin:header]
|
||||
namespace TemplatePlugin {
|
||||
|
||||
@@ -25,7 +27,7 @@ public:
|
||||
using Ptr = std::shared_ptr<Plugin>;
|
||||
|
||||
Plugin();
|
||||
~Plugin() override = default;
|
||||
~Plugin() override;
|
||||
|
||||
void SetConfig(const std::map<std::string, std::string> &config) override;
|
||||
void QueryNetwork(const InferenceEngine::ICNNNetwork &network,
|
||||
@@ -40,7 +42,14 @@ public:
|
||||
InferenceEngine::ExecutableNetwork ImportNetworkImpl(std::istream& model, const std::map<std::string, std::string>& config) override;
|
||||
|
||||
private:
|
||||
Configuration _cfg;
|
||||
friend class ExecutableNetwork;
|
||||
friend class TemplateInferRequest;
|
||||
|
||||
static std::shared_ptr<ngraph::Function> Transform(const std::shared_ptr<const ngraph::Function>& function);
|
||||
|
||||
Configuration _cfg;
|
||||
std::shared_ptr<ngraph::runtime::Backend> _backend;
|
||||
InferenceEngine::ITaskExecutor::Ptr _waitExecutor;
|
||||
};
|
||||
|
||||
} // namespace TemplatePlugin
|
||||
|
||||
@@ -16,3 +16,5 @@ addIeTargetTest(
|
||||
LABELS
|
||||
TEMPLATE
|
||||
)
|
||||
|
||||
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include "multi-device/multi_device_config.hpp"
|
||||
|
||||
#include "behavior/config.hpp"
|
||||
#include <template/template_config.hpp>
|
||||
|
||||
using namespace BehaviorTestsDefinitions;
|
||||
namespace {
|
||||
@@ -14,14 +15,20 @@ namespace {
|
||||
};
|
||||
|
||||
const std::vector<std::map<std::string, std::string>> configs = {
|
||||
{}
|
||||
{{TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS), InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}},
|
||||
{{TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS), InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_NUMA}},
|
||||
{{TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS), "8"}},
|
||||
};
|
||||
|
||||
const std::vector<std::map<std::string, std::string>> inconfigs = {
|
||||
{{TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS), "OFF"}},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, IncorrectConfigTests,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values("TEMPLATE"),
|
||||
::testing::ValuesIn(configs)),
|
||||
::testing::ValuesIn(inconfigs)),
|
||||
IncorrectConfigTests::getTestCaseName);
|
||||
|
||||
|
||||
@@ -29,7 +36,7 @@ namespace {
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values("TEMPLATE"),
|
||||
::testing::ValuesIn(configs)),
|
||||
::testing::ValuesIn(inconfigs)),
|
||||
IncorrectConfigAPITests::getTestCaseName);
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,115 @@
|
||||
// Copyright (C) 2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "single_layer_tests/convolution.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
};
|
||||
|
||||
/* ============= 2D Convolution ============= */
|
||||
const std::vector<std::vector<size_t >> kernels = {{3, 3},
|
||||
{3, 5}};
|
||||
const std::vector<std::vector<size_t >> strides = {{1, 1},
|
||||
{1, 3}};
|
||||
const std::vector<std::vector<ptrdiff_t>> padBegins = {{0, 0},
|
||||
{0, 3}};
|
||||
const std::vector<std::vector<ptrdiff_t>> padEnds = {{0, 0},
|
||||
{0, 3}};
|
||||
const std::vector<std::vector<size_t >> dilations = {{1, 1},
|
||||
{3, 1}};
|
||||
const std::vector<size_t> numOutChannels = {1, 5};
|
||||
const std::vector<ngraph::op::PadType> padTypes = {
|
||||
ngraph::op::PadType::EXPLICIT,
|
||||
ngraph::op::PadType::VALID
|
||||
};
|
||||
|
||||
const auto conv2DParams_ExplicitPadding = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels),
|
||||
::testing::ValuesIn(strides),
|
||||
::testing::ValuesIn(padBegins),
|
||||
::testing::ValuesIn(padEnds),
|
||||
::testing::ValuesIn(dilations),
|
||||
::testing::ValuesIn(numOutChannels),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
const auto conv2DParams_AutoPadValid = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels),
|
||||
::testing::ValuesIn(strides),
|
||||
::testing::Values(std::vector<ptrdiff_t>({0, 0})),
|
||||
::testing::Values(std::vector<ptrdiff_t>({0, 0})),
|
||||
::testing::ValuesIn(dilations),
|
||||
::testing::ValuesIn(numOutChannels),
|
||||
::testing::Values(ngraph::op::PadType::VALID)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Convolution2D_ExplicitPadding, ConvolutionLayerTest,
|
||||
::testing::Combine(
|
||||
conv2DParams_ExplicitPadding,
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t >({1, 3, 30, 30})),
|
||||
::testing::Values("TEMPLATE")),
|
||||
ConvolutionLayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Convolution2D_AutoPadValid, ConvolutionLayerTest,
|
||||
::testing::Combine(
|
||||
conv2DParams_AutoPadValid,
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t >({1, 3, 30, 30})),
|
||||
::testing::Values("TEMPLATE")),
|
||||
ConvolutionLayerTest::getTestCaseName);
|
||||
/* ============= 3D Convolution ============= */
|
||||
const std::vector<std::vector<size_t >> kernels3d = {{3, 3, 3},
|
||||
{3, 5, 3}};
|
||||
const std::vector<std::vector<ptrdiff_t>> paddings3d = {{0, 0, 0},
|
||||
{0, 2, 0}};
|
||||
|
||||
const std::vector<std::vector<size_t >> strides3d = {{1, 1, 1},
|
||||
{1, 2, 1}};
|
||||
const std::vector<std::vector<size_t >> dilations3d = {{1, 1, 1},
|
||||
{1, 2, 1}};
|
||||
|
||||
const auto conv3DParams_ExplicitPadding = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels3d),
|
||||
::testing::ValuesIn(strides3d),
|
||||
::testing::ValuesIn(paddings3d),
|
||||
::testing::ValuesIn(paddings3d),
|
||||
::testing::ValuesIn(dilations3d),
|
||||
::testing::Values(5),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
const auto conv3DParams_AutoPadValid = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels3d),
|
||||
::testing::ValuesIn(strides3d),
|
||||
::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
|
||||
::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
|
||||
::testing::ValuesIn(dilations3d),
|
||||
::testing::Values(5),
|
||||
::testing::Values(ngraph::op::PadType::VALID)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Convolution3D_ExplicitPadding, ConvolutionLayerTest,
|
||||
::testing::Combine(
|
||||
conv3DParams_ExplicitPadding,
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t >({1, 3, 10, 10, 10})),
|
||||
::testing::Values("TEMPLATE")),
|
||||
ConvolutionLayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Convolution3D_AutoPadValid, ConvolutionLayerTest,
|
||||
::testing::Combine(
|
||||
conv3DParams_AutoPadValid,
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t >({1, 3, 10, 10, 10})),
|
||||
::testing::Values("TEMPLATE")),
|
||||
ConvolutionLayerTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
@@ -0,0 +1,36 @@
|
||||
// Copyright (C) 2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "single_layer_tests/reshape.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ReshapeCheckDynBatch, ReshapeLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(true),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
|
||||
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
|
||||
::testing::Values("TEMPLATE"),
|
||||
::testing::Values(std::map<std::string, std::string>({}))),
|
||||
ReshapeLayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ReshapeCheck, ReshapeLayerTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(true),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(std::vector<size_t>({10, 10, 10, 10})),
|
||||
::testing::Values(std::vector<size_t>({10, 0, 100})),
|
||||
::testing::Values("TEMPLATE"),
|
||||
::testing::Values(std::map<std::string, std::string>({}))),
|
||||
ReshapeLayerTest::getTestCaseName);
|
||||
} // namespace
|
||||
@@ -0,0 +1,72 @@
|
||||
// Copyright (C) 2019 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "single_layer_tests/softmax.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
};
|
||||
|
||||
const std::vector<InferenceEngine::Layout> inputLayouts2D = {
|
||||
InferenceEngine::Layout::NC,
|
||||
};
|
||||
|
||||
const std::vector<InferenceEngine::SizeVector> inputShapes2D = {
|
||||
InferenceEngine::SizeVector {1, 100},
|
||||
InferenceEngine::SizeVector {100, 1},
|
||||
InferenceEngine::SizeVector {10, 10},
|
||||
};
|
||||
|
||||
const std::vector<size_t> axis2D = {
|
||||
0, 1
|
||||
};
|
||||
|
||||
const auto params2D = testing::Combine(
|
||||
testing::ValuesIn(netPrecisions),
|
||||
testing::ValuesIn(inputLayouts2D),
|
||||
testing::ValuesIn(inputShapes2D),
|
||||
testing::ValuesIn(axis2D),
|
||||
testing::Values("TEMPLATE"),
|
||||
testing::Values(std::map<std::string, std::string>())
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SoftMax2D,
|
||||
SoftMaxLayerTest,
|
||||
params2D,
|
||||
SoftMaxLayerTest::getTestCaseName
|
||||
);
|
||||
|
||||
const std::vector<InferenceEngine::SizeVector> inputShapes4D = {
|
||||
InferenceEngine::SizeVector {1, 100, 1, 1},
|
||||
InferenceEngine::SizeVector {1, 3, 4, 3},
|
||||
InferenceEngine::SizeVector {2, 3, 4, 5},
|
||||
};
|
||||
|
||||
const std::vector<size_t> axis4D = {0, 1, 2, 3};
|
||||
|
||||
const auto params4D = testing::Combine(
|
||||
testing::ValuesIn(netPrecisions),
|
||||
testing::Values(InferenceEngine::Layout::NCHW),
|
||||
testing::ValuesIn(inputShapes4D),
|
||||
testing::ValuesIn(axis4D),
|
||||
testing::Values("TEMPLATE"),
|
||||
testing::Values(std::map<std::string, std::string>())
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SoftMax4D,
|
||||
SoftMaxLayerTest,
|
||||
params4D,
|
||||
SoftMaxLayerTest::getTestCaseName
|
||||
);
|
||||
|
||||
} // namespace
|
||||
@@ -9,5 +9,9 @@
|
||||
|
||||
std::vector<std::string> disabledTestPatterns() {
|
||||
return {
|
||||
".*ExclusiveAsyncRequests.*",
|
||||
".*reusableCPUStreamsExecutor.*",
|
||||
".*registerPlugin.*",
|
||||
".*IEClassGetAvailableDevices.*"
|
||||
};
|
||||
}
|
||||
@@ -18,7 +18,7 @@
|
||||
using namespace testing;
|
||||
|
||||
// ! [transformation:test]
|
||||
TEST(TransformationTests, TemplateTest) {
|
||||
TEST(TransformationTests, DISABLED_TemplateTest) {
|
||||
std::shared_ptr<ngraph::Function> f, f_ref;
|
||||
// f - ngraph::Function for applying transformation
|
||||
// f_ref - ngraph::Function that is expected after applying transformation
|
||||
|
||||
Reference in New Issue
Block a user