add plugin template (#515)

2020-05-22 22:34:00 +03:00
parent 2e3928071f
commit 0064c299c3
49 changed files with 2330 additions and 0 deletions
--- a/docs/template_plugin/src/CMakeLists.txt
+++ b/docs/template_plugin/src/CMakeLists.txt
@@ -0,0 +1,43 @@
+# Copyright (C) 2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+# [cmake:plugin]
+set(TARGET_NAME "templatePlugin")
+
+if(ENABLE_LTO)
+    ie_enable_lto()
+endif()
+
+file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
+file(GLOB_RECURSE HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
+
+# adds a shared library with plugin
+ie_add_plugin(NAME ${TARGET_NAME}
+              DEVICE_NAME "TEMPLATE"
+              SOURCES ${SOURCES} ${HEADERS}
+              SKIP_INSTALL # ATTENTION: uncomment to install component
+              VERSION_DEFINES_FOR template_plugin.cpp)
+
+target_include_directories(${TARGET_NAME} PRIVATE
+    "${CMAKE_CURRENT_SOURCE_DIR}"
+    "${IE_MAIN_TEMPLATE_PLUGIN_SOURCE_DIR}/include")
+
+target_link_libraries(${TARGET_NAME} PRIVATE IE::inference_engine IE::inference_engine_transformations ${NGRAPH_LIBRARIES} ${INTEL_ITT_LIBS})
+
+# ATTENTION: uncomment to register a plugin in the plugins.xml file
+# ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
+                    # POSSIBLE_PLUGINS ${TARGET_NAME})
+# [cmake:plugin]
+
+# ATTENTION: uncomment to install component
+# install
+
+# set(component_name template)
+# ie_cpack_add_component(${component_name} REQUIRED)
+
+# install(TARGETS ${TARGET_NAME}
+#         RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH}
+#         ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH}
+#         LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH}
+#         COMPONENT ${component_name})
--- a/docs/template_plugin/src/template_async_infer_request.cpp
+++ b/docs/template_plugin/src/template_async_infer_request.cpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <utility>
+
+#include <ie_profiling.hpp>
+
+#include "template_async_infer_request.hpp"
+#include "template_executable_network.hpp"
+
+using namespace TemplatePlugin;
+
+// ! [async_infer_request:ctor]
+TemplateAsyncInferRequest::TemplateAsyncInferRequest(
+    const TemplateInferRequest::Ptr&           inferRequest,
+    const InferenceEngine::ITaskExecutor::Ptr& cpuTaskExecutor,
+    const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
+    const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) :
+    AsyncInferRequestThreadSafeDefault(inferRequest, cpuTaskExecutor, callbackExecutor),
+    _inferRequest(inferRequest), _waitExecutor(waitExecutor) {
+    _pipeline = {
+        {cpuTaskExecutor, [this] {
+            IE_PROFILING_AUTO_SCOPE(PreprocessingAndStartPipeline)
+            _inferRequest->inferPreprocess();
+            _inferRequest->startPipeline();
+        }},
+        {_waitExecutor, [this] {
+            IE_PROFILING_AUTO_SCOPE(WaitPipeline)
+            _inferRequest->waitPipeline();
+        }},
+        {cpuTaskExecutor, [this] {
+            IE_PROFILING_AUTO_SCOPE(Postprocessing)
+            _inferRequest->inferPostprocess();
+        }}
+    };
+}
+// ! [async_infer_request:ctor]
+
+// ! [async_infer_request:dtor]
+TemplateAsyncInferRequest::~TemplateAsyncInferRequest() {
+    InferenceEngine::AsyncInferRequestThreadSafeDefault::StopAndWait();
+}
+// ! [async_infer_request:dtor]
--- a/docs/template_plugin/src/template_async_infer_request.hpp
+++ b/docs/template_plugin/src/template_async_infer_request.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#pragma once
+
+#include <cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp>
+
+#include "template_infer_request.hpp"
+
+namespace TemplatePlugin {
+
+// ! [async_infer_request:header]
+class TemplateAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
+public:
+    TemplateAsyncInferRequest(const TemplateInferRequest::Ptr&           inferRequest,
+                              const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
+                              const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
+                              const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor);
+
+    ~TemplateAsyncInferRequest() override;
+
+private:
+    TemplateInferRequest::Ptr           _inferRequest;
+    InferenceEngine::ITaskExecutor::Ptr _waitExecutor;
+};
+// ! [async_infer_request:header]
+
+}  // namespace TemplatePlugin
--- a/docs/template_plugin/src/template_config.cpp
+++ b/docs/template_plugin/src/template_config.cpp
@@ -0,0 +1,45 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include <string>
+#include <vector>
+#include <algorithm>
+
+#include <ie_util_internal.hpp>
+#include <ie_plugin_config.hpp>
+#include <file_utils.h>
+#include <cpp_interfaces/exception2status.hpp>
+
+#include "template_config.hpp"
+
+using namespace TemplatePlugin;
+
+Configuration::Configuration() { }
+
+Configuration::Configuration(const ConfigMap& config, const Configuration & defaultCfg, bool throwOnUnsupported) {
+    *this = defaultCfg;
+    for (auto&& c : config) {
+        const auto& key = c.first;
+        const auto& value = c.second;
+
+        if (CONFIG_KEY(DEVICE_ID) == key) {
+            deviceId = std::stoi(value);
+        } else if (CONFIG_KEY(PERF_COUNT) == key) {
+            perfCount = (CONFIG_VALUE(YES) == value);
+        } else if (throwOnUnsupported) {
+            THROW_IE_EXCEPTION << NOT_FOUND_str << ": " << key;
+        }
+    }
+}
+
+InferenceEngine::Parameter Configuration::Get(const std::string& name) const {
+    if (name == CONFIG_KEY(DEVICE_ID)) {
+        return {std::to_string(deviceId)};
+    } else if (name == CONFIG_KEY(PERF_COUNT)) {
+        return {perfCount};
+    } else {
+        THROW_IE_EXCEPTION << NOT_FOUND_str << ": " << name;
+    }
+}
--- a/docs/template_plugin/src/template_config.hpp
+++ b/docs/template_plugin/src/template_config.hpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <string>
+#include <map>
+#include <unordered_map>
+
+#include <ie_parameter.hpp>
+
+namespace TemplatePlugin {
+
+template<typename T>
+using IOMap = std::unordered_map<std::string, T>;
+
+// ! [configuration:header]
+using ConfigMap = std::map<std::string, std::string>;
+
+struct Configuration {
+    Configuration();
+    Configuration(const Configuration&)             = default;
+    Configuration(Configuration&&)                  = default;
+    Configuration& operator=(const Configuration&)  = default;
+    Configuration& operator=(Configuration&&)       = default;
+
+    explicit Configuration(const ConfigMap& config, const Configuration & defaultCfg = {}, const bool throwOnUnsupported = true);
+
+    InferenceEngine::Parameter Get(const std::string& name) const;
+
+    // Plugin configuration parameters
+
+    int deviceId                = 0;
+    bool perfCount              = true;
+};
+// ! [configuration:header]
+
+}  //  namespace TemplatePlugin
--- a/docs/template_plugin/src/template_executable_network.cpp
+++ b/docs/template_plugin/src/template_executable_network.cpp
@@ -0,0 +1,167 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <atomic>
+#include <set>
+#include <utility>
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <ie_metric_helpers.hpp>
+#include <ie_util_internal.hpp>
+#include <ie_plugin_config.hpp>
+#include <network_serializer.h>
+#include <threading/ie_executor_manager.hpp>
+#include <details/ie_cnn_network_tools.h>
+
+#include <ngraph/specialize_function.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+
+#include <transformations/convert_divide.hpp>
+
+#include "template_plugin.hpp"
+#include "template_executable_network.hpp"
+
+using namespace TemplatePlugin;
+
+// ! [executable_network:ctor_cnnnetwork]
+TemplatePlugin::ExecutableNetwork::ExecutableNetwork(InferenceEngine::ICNNNetwork&  network,
+                                                     const Configuration&           cfg):
+    _name(network.getName()),
+    _cfg(cfg),
+    _waitExecutor(InferenceEngine::ExecutorManager::getInstance()->getExecutor("Template")) {
+    // TODO: if your plugin supports device ID (more that single instance of device can be on host machine)
+    // you should select proper device based on KEY_DEVICE_ID or automatic behavior
+    // In this case, _waitExecutor should also be created per device.
+
+    try {
+        if (std::shared_ptr<const ngraph::Function> ngraphFunction = network.getFunction()) {
+            CompileGraph(ngraphFunction);
+        } else {
+            THROW_IE_EXCEPTION << "TEMPLATE plugin can compile only IR v10 networks";
+        }
+    }
+    catch (const InferenceEngineException & e) {
+        throw e;
+    }
+    catch (const std::exception & e) {
+        THROW_IE_EXCEPTION << "Standard exception from compilation library: " << e.what();
+    }
+    catch (...) {
+        THROW_IE_EXCEPTION << "Generic exception is thrown";
+    }
+}
+// ! [executable_network:ctor_cnnnetwork]
+
+// ! [executable_network:ctor_import_stream]
+TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream &                 model,
+                                                     const Configuration&           cfg) :
+                  _cfg(cfg) {
+    // TODO: since Import network is not a mandatory functionality, this ctor can just be removed
+}
+// ! [executable_network:ctor_import_stream]
+
+// ! [executable_network:compile_graph]
+void TemplatePlugin::ExecutableNetwork::CompileGraph(const std::shared_ptr<const ngraph::Function> & ngraphFunction) {
+    // TODO: perform actual graph compilation taking `_cfg` into account
+
+    // 1.Copy ngraph::Function first to apply some transformations later in
+    // ExecutableNetwork::CompileGraph, which modify original ngraph::Function
+    const bool shareConsts = false, constFolding = false;
+    std::vector<::ngraph::element::Type> new_types;
+    std::vector<::ngraph::PartialShape> new_shapes;
+
+    for (const auto &parameter : ngraphFunction->get_parameters()) {
+        new_shapes.emplace_back(parameter->get_partial_shape());
+        new_types.emplace_back(parameter->get_element_type());
+    }
+
+    auto copyFunction = ngraph::specialize_function(std::const_pointer_cast<ngraph::Function>(ngraphFunction),
+        new_types, new_shapes, std::vector<void *>(new_types.size(), nullptr), constFolding, shareConsts);
+
+    // 2. Perform common and device-specific transformations
+    ngraph::pass::Manager passManager;
+    // Example: register standard ngraph transformation from ngraph::ngraph
+    passManager.register_pass<ngraph::pass::ConstantFolding>();
+    // Example: register inference engine optimization transformation for IE::inference_engine_transformations
+    passManager.register_pass<ngraph::pass::ConvertDivide>();
+    // Register any other transformations
+    // ..
+
+    // After `run_passes`, we have the transformed function, where operations match device operations,
+    // and we can create device hardware-dependent graph
+    passManager.run_passes(copyFunction);
+
+    // 3. Iterate over operations and create hardware-specific ngraph
+    for (const auto& op : copyFunction->get_ordered_ops()) {
+        // TODO: map ngraph `op` to device operation
+    }
+
+    // 4. Perform any other steps like allocation and filling device buffers, and so on
+}
+// ! [executable_network:compile_graph]
+
+// ! [executable_network:create_infer_request_impl]
+InferenceEngine::InferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
+                                                                                                     InferenceEngine::OutputsDataMap networkOutputs) {
+    return std::make_shared<TemplateInferRequest>(networkInputs, networkOutputs, std::static_pointer_cast<ExecutableNetwork>(shared_from_this()));
+}
+// ! [executable_network:create_infer_request_impl]
+
+// ! [executable_network:create_infer_request]
+void TemplatePlugin::ExecutableNetwork::CreateInferRequest(IInferRequest::Ptr& asyncRequest) {
+    auto internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs);
+    auto asyncThreadSafeImpl = std::make_shared<TemplateAsyncInferRequest>(std::static_pointer_cast<TemplateInferRequest>(internalRequest),
+                                                                           _taskExecutor, _waitExecutor, _callbackExecutor);
+    asyncRequest.reset(new InferenceEngine::InferRequestBase<TemplateAsyncInferRequest>(asyncThreadSafeImpl),
+                       [](InferenceEngine::IInferRequest *p) { p->Release(); });
+    asyncThreadSafeImpl->SetPointerToPublicInterface(asyncRequest);
+}
+// ! [executable_network:create_infer_request]
+
+// ! [executable_network:get_config]
+void TemplatePlugin::ExecutableNetwork::GetConfig(const std::string &name, Parameter &result, ResponseDesc *resp) const {
+    // TODO: return more supported values for config keys
+    if (name == CONFIG_KEY(DEVICE_ID) ||
+        name == CONFIG_KEY(PERF_COUNT)) {
+        result = _cfg.Get(name);
+    } else {
+        THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork config key: " << name;
+    }
+}
+// ! [executable_network:get_config]
+
+// ! [executable_network:get_metric]
+void TemplatePlugin::ExecutableNetwork::GetMetric(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *) const {
+    // TODO: return more supported values for metrics
+    if (METRIC_KEY(SUPPORTED_METRICS) == name) {
+        result = IE_SET_METRIC(SUPPORTED_METRICS, std::vector<std::string>{
+            METRIC_KEY(NETWORK_NAME),
+            METRIC_KEY(SUPPORTED_METRICS),
+            METRIC_KEY(SUPPORTED_CONFIG_KEYS),
+            METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)});
+    } else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
+        result = IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, std::vector<std::string>{
+            CONFIG_KEY(DEVICE_ID),
+            CONFIG_KEY(PERF_COUNT)});
+    } else if (METRIC_KEY(NETWORK_NAME) == name) {
+        result = IE_SET_METRIC(NETWORK_NAME, _name);
+    } else if (METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS) == name) {
+        // TODO: fill with actual number
+        unsigned int value = 1;
+        result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, value);
+    } else {
+        THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork metric: " << name;
+    }
+}
+// ! [executable_network:get_metric]
+
+// ! [executable_network:export_impl]
+void TemplatePlugin::ExecutableNetwork::ExportImpl(std::ostream& dlaModel) {
+    // TODO: Code which exports graph from std::ostream
+}
+// ! [executable_network:export_impl]
--- a/docs/template_plugin/src/template_executable_network.hpp
+++ b/docs/template_plugin/src/template_executable_network.hpp
@@ -0,0 +1,68 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#pragma once
+
+#include <utility>
+#include <tuple>
+#include <memory>
+#include <string>
+#include <vector>
+#include <map>
+#include <unordered_map>
+#include <list>
+
+#include <ie_common.h>
+#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
+#include <cnn_network_impl.hpp>
+#include <threading/ie_itask_executor.hpp>
+
+#include <ngraph/function.hpp>
+
+#include "template_config.hpp"
+#include "template_infer_request.hpp"
+#include "template_async_infer_request.hpp"
+
+namespace TemplatePlugin {
+
+class Engine;
+
+/**
+ * @class ExecutableNetwork
+ * @brief Interface of executable network
+ */
+// ! [executable_network:header]
+class ExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
+public:
+    ExecutableNetwork(InferenceEngine::ICNNNetwork&  network,
+                      const Configuration&           cfg);
+
+    ExecutableNetwork(std::istream &                 model,
+                      const Configuration&           cfg);
+
+    ~ExecutableNetwork() override = default;
+
+    // Methods from a base class ExecutableNetworkThreadSafeDefault
+
+    void ExportImpl(std::ostream& model) override;
+    InferenceEngine::InferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
+                                                                      InferenceEngine::OutputsDataMap networkOutputs) override;
+    void CreateInferRequest(InferenceEngine::IInferRequest::Ptr &asyncRequest) override;
+    void GetMetric(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override;
+    void GetConfig(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override;
+
+    std::atomic<std::size_t>                    _requestId = {0};
+    std::string                                 _name;
+    Configuration                               _cfg;
+
+private:
+    void CompileGraph(const std::shared_ptr<const ngraph::Function> & ngraphFunction);
+
+    std::shared_ptr<Engine>                     _plugin;
+    InferenceEngine::ITaskExecutor::Ptr         _waitExecutor;
+};
+// ! [executable_network:header]
+
+}  // namespace TemplatePlugin
--- a/docs/template_plugin/src/template_infer_request.cpp
+++ b/docs/template_plugin/src/template_infer_request.cpp
@@ -0,0 +1,224 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include <utility>
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <map>
+
+#include <ie_blob.h>
+#include <ie_plugin.hpp>
+#include <description_buffer.hpp>
+#include <debug.h>
+#include <ie_layouts.h>
+#include <threading/ie_executor_manager.hpp>
+#include <blob_transform.hpp>
+#include <ie_parallel.hpp>
+#include <ie_memcpy.h>
+#include <precision_utils.h>
+#include <template/template_config.hpp>
+
+#include "template_infer_request.hpp"
+#include "template_executable_network.hpp"
+#include "template_plugin.hpp"
+
+using namespace TemplatePlugin;
+
+using Time = std::chrono::high_resolution_clock;
+using ns = std::chrono::nanoseconds;
+using fsec = std::chrono::duration<float>;
+
+// ! [infer_request:ctor]
+TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap&                     networkInputs,
+                                           const InferenceEngine::OutputsDataMap&                    networkOutputs,
+                                           const std::shared_ptr<TemplatePlugin::ExecutableNetwork>& executableNetwork) :
+    InferRequestInternal(networkInputs, networkOutputs),
+    _executableNetwork(executableNetwork) {
+    // TODO: allocate infer request device and host buffers if needed, fill actual list of profiling tasks
+
+    auto requestID = std::to_string(_executableNetwork->_requestId);
+    _executableNetwork->_requestId++;
+
+    std::string name = _executableNetwork->_name + "_Req" + requestID;
+    _profilingTask = { {
+        { ProfilingTask("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_Preprocess") },
+        { ProfilingTask("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_Postprocess") },
+        { ProfilingTask("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_StartPipline") },
+        { ProfilingTask("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_WaitPipline") },
+    } };
+
+    allocateDeviceBuffers();
+    allocateInputBlobs();
+    allocateOutputBlobs();
+}
+// ! [infer_request:ctor]
+
+// ! [infer_request:dtor]
+TemplateInferRequest::~TemplateInferRequest() {
+    _executableNetwork->_requestId--;
+}
+// ! [infer_request:dtor]
+
+void TemplateInferRequest::allocateDeviceBuffers() {
+    // TODO: allocate device buffers if Template device is a remote one
+}
+
+void TemplateInferRequest::allocateInputBlobs() {
+    for (auto &networkInput : _networkInputs) {
+        SizeVector dims = networkInput.second->getTensorDesc().getDims();
+        Precision precision = networkInput.second->getTensorDesc().getPrecision();
+        Layout input_layout = networkInput.second->getInputData()->getLayout();
+        Blob::Ptr inputBlob;
+        Blob::Ptr inputBlobNCHW;
+        switch (precision) {
+        case Precision::FP32 :
+            inputBlobNCHW = inputBlob = InferenceEngine::make_shared_blob<float>({ precision, dims, input_layout });
+            if (input_layout == Layout::NHWC) {
+                inputBlobNCHW = InferenceEngine::make_shared_blob<float>({ precision, dims, Layout::NCHW });
+            }
+            break;
+        case Precision::FP16 :
+        case Precision::I16 :
+            inputBlobNCHW = inputBlob = InferenceEngine::make_shared_blob<int16_t>({ precision, dims, input_layout });
+            if (input_layout == Layout::NHWC) {
+                inputBlobNCHW = InferenceEngine::make_shared_blob<int16_t>({ precision, dims, Layout::NCHW });
+            }
+            break;
+        case Precision::U8 :
+            inputBlobNCHW = inputBlob = InferenceEngine::make_shared_blob<uint8_t>({ precision, dims, input_layout });
+            if (input_layout == Layout::NHWC) {
+                inputBlobNCHW = InferenceEngine::make_shared_blob<uint8_t>({ precision, dims, Layout::NCHW });
+            }
+            break;
+        default:
+            THROW_IE_EXCEPTION << "Unsupported network precision: " << precision
+                << precision << "! Supported precisions are: FP32, FP16, I16, U8";
+        }
+        // allocate the input blob
+        inputBlob->allocate();
+        _inputs[networkInput.first] = inputBlob;
+        if (inputBlobNCHW != inputBlob) {
+            inputBlobNCHW->allocate();
+        }
+        _inputsNCHW[networkInput.first] = inputBlobNCHW;
+    }
+}
+
+void TemplateInferRequest::allocateOutputBlobs() {
+    for (auto &networkOutput : _networkOutputs) {
+        SizeVector dims = networkOutput.second->getTensorDesc().getDims();
+        Precision precision = networkOutput.second->getPrecision();
+        Blob::Ptr outputBlob;
+
+        // allocate the output blob
+        Blob::Ptr outputBlobNCHW;
+        switch (precision) {
+        case Precision::FP32 :
+            outputBlobNCHW = outputBlob = InferenceEngine::make_shared_blob<float>({ precision, dims, networkOutput.second->getLayout() });
+            if (networkOutput.second->getLayout() == Layout::NHWC) {
+                outputBlobNCHW = InferenceEngine::make_shared_blob<float>({ precision, dims,  Layout::NCHW });
+            }
+            break;
+        case Precision::FP16 :
+            outputBlobNCHW = outputBlob = InferenceEngine::make_shared_blob<int16_t>({ precision, dims, networkOutput.second->getLayout() });
+            if (networkOutput.second->getLayout() == Layout::NHWC) {
+                outputBlobNCHW = InferenceEngine::make_shared_blob<int16_t>({ precision, dims, Layout::NCHW });
+            }
+            break;
+        default:
+            THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported output precision: "
+                << precision << "! Supported precisions are: FP32, FP16";
+        }
+        // allocate the output blob
+        outputBlob->allocate();
+        _outputs[networkOutput.first] = outputBlob;
+        if (outputBlobNCHW != outputBlob) {
+            outputBlobNCHW->allocate();
+        }
+        _outputsNCHW[networkOutput.first] = outputBlobNCHW;
+    }
+
+    if (_networkOutputs.empty() || _networkInputs.empty()) {
+        THROW_IE_EXCEPTION << "Internal error: no information about network's output/input";
+    }
+}
+
+// ! [infer_request:infer_impl]
+void TemplateInferRequest::InferImpl() {
+    // TODO: fill with actual list of pipeline stages, which are executed syncronously for sync infer requests
+    inferPreprocess();
+    startPipeline();
+    waitPipeline();
+    inferPostprocess();
+}
+// ! [infer_request:infer_impl]
+
+// ! [infer_request:infer_preprocess]
+void TemplateInferRequest::inferPreprocess() {
+    auto prev = Time::now();
+
+    // execute input pre-processing.
+    InferRequestInternal::execDataPreprocessing(_inputs);
+
+    for (auto &input : InferRequestInternal::_inputs) {
+        auto& src = input.second;
+        auto& dst = _inputsNCHW[input.first];
+        if (src != dst) {
+            if (src->getTensorDesc().getPrecision() == dst->getTensorDesc().getPrecision()
+                && src->getTensorDesc().getDims() == dst->getTensorDesc().getDims()
+                && src->getTensorDesc().getLayout() == dst->getTensorDesc().getLayout()) {
+                _inputsNCHW[input.first] = input.second;
+            } else {  // Convert Layout to NCHW
+                InferenceEngine::blob_copy(src, dst);
+            }
+        }
+    }
+
+    // TODO: Preprocessing on inputs if needed: work _inputsNCHW
+
+    _inputPreprocessTime = static_cast<double>(std::chrono::duration_cast<ns>(Time::now() - prev).count());
+}
+// ! [infer_request:infer_preprocess]
+
+void TemplateInferRequest::startPipeline() {
+    IE_PROFILING_AUTO_SCOPE_TASK(_profilingTask[StartPipeline])
+    // TODO: Start pipeline and fill _inputTransferTime, _executeTime, _outputTransferTime
+}
+
+void TemplateInferRequest::waitPipeline() {
+    IE_PROFILING_AUTO_SCOPE_TASK(_profilingTask[WaitPipeline])
+    auto prev = Time::now();
+    // TODO: Wait pipeline using driver API or other synronizations methods
+    _inputPreprocessTime = static_cast<double>(std::chrono::duration_cast<ns>(Time::now() - prev).count());
+}
+
+void TemplateInferRequest::inferPostprocess() {
+    IE_PROFILING_AUTO_SCOPE_TASK(_profilingTask[Postprocess])
+    auto prev = Time::now();
+    // TODO: perform post-processing and convert to NHWC layout
+    _outputPostProcessTime = static_cast<double>(std::chrono::duration_cast<ns>(Time::now() - prev).count());
+}
+
+// ! [infer_request:get_performance_counts]
+void TemplateInferRequest::GetPerformanceCounts(std::map<std::string, InferenceEngineProfileInfo> &perfMap) const {
+    InferenceEngineProfileInfo info;
+    info.execution_index = 0;
+    info.status = InferenceEngineProfileInfo::EXECUTED;
+    info.cpu_uSec = info.realTime_uSec = _inputPreprocessTime / 1000;
+    perfMap["1. input preprocessing"] = info;
+    info.cpu_uSec = 0;
+    info.realTime_uSec = _inputTransferTime / 1000;
+    perfMap["2. input transfer to a device"] = info;
+    info.cpu_uSec = 0;
+    info.realTime_uSec = _executeTime / 1000;
+    perfMap["3. execution time"] = info;
+    info.cpu_uSec = 0;
+    info.realTime_uSec = _outputTransferTime / 1000;
+    perfMap["4. output transfer from a device"] = info;
+    info.cpu_uSec = info.realTime_uSec = _outputPostProcessTime / 1000;
+    perfMap["5. output postprocessing"] = info;
+}
+// ! [infer_request:get_performance_counts]
--- a/docs/template_plugin/src/template_infer_request.hpp
+++ b/docs/template_plugin/src/template_infer_request.hpp
@@ -0,0 +1,74 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+#include <memory>
+#include <unordered_map>
+
+#include <ie_common.h>
+#include <ie_profiling.hpp>
+#include <cpp_interfaces/impl/ie_infer_request_internal.hpp>
+#include <cpp_interfaces/impl/ie_executable_network_internal.hpp>
+#include <threading/ie_itask_executor.hpp>
+
+#include "template_config.hpp"
+
+namespace TemplatePlugin {
+
+class ExecutableNetwork;
+
+// ! [infer_request:header]
+class TemplateInferRequest : public InferenceEngine::InferRequestInternal {
+public:
+    typedef std::shared_ptr<TemplateInferRequest> Ptr;
+
+    TemplateInferRequest(const InferenceEngine::InputsDataMap&     networkInputs,
+                         const InferenceEngine::OutputsDataMap&    networkOutputs,
+                         const std::shared_ptr<ExecutableNetwork>& executableNetwork);
+    ~TemplateInferRequest() override;
+
+    void InferImpl() override;
+    void GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& perfMap) const override;
+
+    // pipeline methods-stages which are used in async infer request implementation and assigned to particular executor
+    void inferPreprocess();
+    void startPipeline();
+    void waitPipeline();
+    void inferPostprocess();
+
+    std::shared_ptr<ExecutableNetwork>                      _executableNetwork;
+
+private:
+    void allocateDeviceBuffers();
+    void allocateInputBlobs();
+    void allocateOutputBlobs();
+
+    enum {
+        Preprocess,
+        Postprocess,
+        StartPipeline,
+        WaitPipeline,
+        numOfStages
+    };
+
+    std::array<InferenceEngine::ProfilingTask, numOfStages> _profilingTask;
+
+    InferenceEngine::BlobMap                                _inputsNCHW;
+    InferenceEngine::BlobMap                                _outputsNCHW;
+
+    // for performance counts
+    double                                                  _inputPreprocessTime   = 0.0;
+    double                                                  _inputTransferTime     = 0.0;
+    double                                                  _executeTime           = 0.0;
+    double                                                  _outputTransferTime    = 0.0;
+    double                                                  _outputPostProcessTime = 0.0;
+};
+// ! [infer_request:header]
+
+}  // namespace TemplatePlugin
--- a/docs/template_plugin/src/template_plugin.cpp
+++ b/docs/template_plugin/src/template_plugin.cpp
@@ -0,0 +1,193 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include <utility>
+#include <memory>
+#include <vector>
+#include <sstream>
+#include <regex>
+#include <string>
+#include <map>
+
+#include <ie_metric_helpers.hpp>
+#include <details/ie_cnn_network_tools.h>
+#include <ie_plugin_config.hpp>
+#include <ie_util_internal.hpp>
+#include <inference_engine.hpp>
+#include <file_utils.h>
+#include <cpp_interfaces/base/ie_plugin_base.hpp>
+#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
+#include <threading/ie_executor_manager.hpp>
+#include <graph_tools.hpp>
+#include <ie_input_info.hpp>
+#include <ie_layouts.h>
+#include <hetero/hetero_plugin_config.hpp>
+#include <template/template_config.hpp>
+
+#include "template_plugin.hpp"
+#include "template_executable_network.hpp"
+#include "template_infer_request.hpp"
+
+using namespace TemplatePlugin;
+
+// ! [plugin:ctor]
+Plugin::Plugin() {
+    // TODO: fill with actual device name
+    _pluginName = "TEMPLATE";
+}
+// ! [plugin:ctor]
+
+// ! [plugin:load_exe_network_impl]
+InferenceEngine::ExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork & network,
+                                                                           const ConfigMap &config) {
+    auto cfg = Configuration{ config, _cfg };
+    InferenceEngine::InputsDataMap networkInputs;
+    InferenceEngine::OutputsDataMap networkOutputs;
+
+    network.getInputsInfo(networkInputs);
+    network.getOutputsInfo(networkOutputs);
+
+    // TODO: check with precisions supported by Template device
+
+    for (auto networkOutput : networkOutputs) {
+        auto output_precision = networkOutput.second->getPrecision();
+
+        if (output_precision != Precision::FP32 &&
+            output_precision != Precision::FP16) {
+            THROW_IE_EXCEPTION << "Template device supports only FP16 and FP32 output precision.";
+        }
+    }
+
+    for (auto networkInput : networkInputs) {
+        auto input_precision = networkInput.second->getTensorDesc().getPrecision();
+
+        if (input_precision != InferenceEngine::Precision::FP32 &&
+            input_precision != InferenceEngine::Precision::FP16 &&
+            input_precision != InferenceEngine::Precision::I16 &&
+            input_precision != InferenceEngine::Precision::U8) {
+            THROW_IE_EXCEPTION << "Input image format " << input_precision << " is not supported yet.\n"
+                       << "Supported formats are: FP32, FP16, I16 and U8.";
+        }
+    }
+
+    auto clonedNetwork = cloneNet(network);
+    ConstTransformer transformator(clonedNetwork.get());
+    transformator.fullTrim();
+
+    return std::make_shared<ExecutableNetwork>(*clonedNetwork, cfg);
+}
+// ! [plugin:load_exe_network_impl]
+
+// ! [plugin:import_network_impl]
+InferenceEngine::ExecutableNetwork Plugin::ImportNetworkImpl(std::istream& model, const std::map<std::string, std::string>& config) {
+    // TODO: Import network from stream is not mandatory functionality;
+    // Can just throw an exception and remove the code below
+    Configuration exportedCfg;
+
+    // some code below which reads exportedCfg from `model` stream
+    // ..
+
+    auto cfg = Configuration(config, exportedCfg);
+
+    IExecutableNetwork::Ptr executableNetwork;
+    auto exec_network_impl = std::make_shared<ExecutableNetwork>(model, cfg);
+    executableNetwork.reset(new ExecutableNetworkBase<ExecutableNetworkInternal>(exec_network_impl),
+                            [](InferenceEngine::details::IRelease *p) {p->Release(); });
+
+    return InferenceEngine::ExecutableNetwork{ executableNetwork };
+}
+// ! [plugin:import_network_impl]
+
+// ! [plugin:query_network]
+void Plugin::QueryNetwork(const ICNNNetwork &network, const ConfigMap& config, QueryNetworkResult &res) const {
+    Configuration cfg{config, _cfg, false};
+    res.rc = StatusCode::OK;
+
+    if (std::shared_ptr<const ngraph::Function> ngraphFunction = network.getFunction()) {
+        auto ops = ngraphFunction->get_ordered_ops();
+        for (auto&& op : ops) {
+            // TODO: investigate if an op is actually supported by Template device
+            bool supported = true;
+            if (supported) {
+                res.supportedLayersMap.insert({ op->get_friendly_name(), GetName() });
+            }
+        }
+    } else {
+        THROW_IE_EXCEPTION << "TEMPLATE plugin can query only IR v10 networks";
+    }
+}
+// ! [plugin:query_network]
+
+// ! [plugin:add_extension]
+void Plugin::AddExtension(InferenceEngine::IExtensionPtr /*extension*/) {
+    // TODO: add extensions if plugin supports extensions
+}
+// ! [plugin:add_extension]
+
+// ! [plugin:set_config]
+void Plugin::SetConfig(const ConfigMap &config) {
+    _cfg = Configuration{config, _cfg};
+}
+// ! [plugin:set_config]
+
+// ! [plugin:get_config]
+InferenceEngine::Parameter Plugin::GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter> & /*options*/) const {
+    return _cfg.Get(name);
+}
+// ! [plugin:get_config]
+
+// ! [plugin:get_metric]
+InferenceEngine::Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter> & options) const {
+    if (METRIC_KEY(SUPPORTED_METRICS) == name) {
+        std::vector<std::string> supportedMetrics = {
+            METRIC_KEY(AVAILABLE_DEVICES),
+            METRIC_KEY(SUPPORTED_METRICS),
+            METRIC_KEY(SUPPORTED_CONFIG_KEYS),
+            METRIC_KEY(FULL_DEVICE_NAME),
+            METRIC_KEY(OPTIMIZATION_CAPABILITIES),
+            METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS) };
+        IE_SET_METRIC_RETURN(SUPPORTED_METRICS, supportedMetrics);
+    } else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
+        std::vector<std::string> confiKeys = {
+            CONFIG_KEY(DEVICE_ID),
+            CONFIG_KEY(PERF_COUNT) };
+        IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, confiKeys);
+    } else if (METRIC_KEY(AVAILABLE_DEVICES) == name) {
+        // TODO: fill list of available devices
+        std::vector<std::string> availableDevices = { "" };
+        IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
+    } else if (METRIC_KEY(FULL_DEVICE_NAME) == name) {
+        std::string name = "Template Device Full Name";
+        IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, name);
+    } else if (METRIC_KEY(OPTIMIZATION_CAPABILITIES) == name) {
+        // TODO: fill actual list of supported capabilities: e.g. Template device supports only FP32
+        std::vector<std::string> capabilities = { METRIC_VALUE(FP32), TEMPLATE_METRIC_VALUE(HARDWARE_CONVOLUTION) };
+        IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
+    } else if (METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS) == name) {
+        // TODO: fill with actual values
+        using uint = unsigned int;
+        IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, std::make_tuple(uint{1}, uint{1}, uint{1}));
+    } else  {
+        THROW_IE_EXCEPTION << "Unsupported device metric: " << name;
+    }
+}
+// ! [plugin:get_metric]
+
+IE_SUPPRESS_DEPRECATED_START
+
+// ! [plugin:create_plugin_engine]
+INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin *&plugin, ResponseDesc *resp) noexcept {
+    try {
+        plugin = make_ie_compatible_plugin({2, 1, CI_BUILD_NUMBER, "templatePlugin"},
+                                           std::make_shared<Plugin>());
+        return OK;
+    }
+    catch (std::exception &ex) {
+        return DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();
+    }
+}
+// ! [plugin:create_plugin_engine]
+
+IE_SUPPRESS_DEPRECATED_END
--- a/docs/template_plugin/src/template_plugin.hpp
+++ b/docs/template_plugin/src/template_plugin.hpp
@@ -0,0 +1,47 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <inference_engine.hpp>
+#include <description_buffer.hpp>
+#include <cpp_interfaces/impl/ie_plugin_internal.hpp>
+
+#include <memory>
+#include <string>
+#include <map>
+#include <unordered_map>
+#include <vector>
+
+#include "template_executable_network.hpp"
+#include "template_config.hpp"
+
+//! [plugin:header]
+namespace TemplatePlugin {
+
+class Plugin : public InferenceEngine::InferencePluginInternal {
+public:
+    using Ptr = std::shared_ptr<Plugin>;
+
+    Plugin();
+    ~Plugin() override = default;
+
+    void SetConfig(const std::map<std::string, std::string> &config) override;
+    void QueryNetwork(const InferenceEngine::ICNNNetwork &network,
+                      const std::map<std::string, std::string>& config,
+                      InferenceEngine::QueryNetworkResult &res) const override;
+    InferenceEngine::ExecutableNetworkInternal::Ptr
+    LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network,
+                       const std::map<std::string, std::string> &config) override;
+    void AddExtension(InferenceEngine::IExtensionPtr extension) override;
+    InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter> & options) const override;
+    InferenceEngine::Parameter GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter> & options) const override;
+    InferenceEngine::ExecutableNetwork ImportNetworkImpl(std::istream& model, const std::map<std::string, std::string>& config) override;
+
+private:
+    Configuration                    _cfg;
+};
+
+}  // namespace TemplatePlugin
+//! [plugin:header]