From 18836f53cd6853c5fbc1f97b9de51f014a5a84cc Mon Sep 17 00:00:00 2001 From: Anton Pankratv Date: Tue, 28 Jul 2020 17:25:31 +0300 Subject: [PATCH] Implemented inference in template plugin (#1308) * Implemented inference in template plugin * Fixed tests * Removed thirdparty dependency * Simplified executor configuration * removed half * Fixed cmake * Fixed ngraph node check * device blob allocation * Fixed enum error --- .../include/template/template_config.hpp | 9 +- docs/template_plugin/src/CMakeLists.txt | 17 +- .../src/template_async_infer_request.cpp | 37 ++- docs/template_plugin/src/template_config.cpp | 22 +- docs/template_plugin/src/template_config.hpp | 3 + .../src/template_executable_network.cpp | 131 ++++----- .../src/template_executable_network.hpp | 29 +- .../src/template_infer_request.cpp | 263 ++++++++++-------- .../src/template_infer_request.hpp | 27 +- docs/template_plugin/src/template_plugin.cpp | 131 +++++++-- docs/template_plugin/src/template_plugin.hpp | 13 +- .../tests/functional/CMakeLists.txt | 2 + .../behavior/config.cpp | 13 +- .../single_layer_tests/convolution.cpp | 115 ++++++++ .../single_layer_tests/reshape.cpp | 36 +++ .../single_layer_tests/softmax.cpp | 72 +++++ .../tests/functional/skip_tests_config.cpp | 4 + .../template_transformations_test.cpp | 2 +- 18 files changed, 668 insertions(+), 258 deletions(-) create mode 100644 docs/template_plugin/tests/functional/shared_tests_instances/single_layer_tests/convolution.cpp create mode 100644 docs/template_plugin/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp create mode 100644 docs/template_plugin/tests/functional/shared_tests_instances/single_layer_tests/softmax.cpp diff --git a/docs/template_plugin/include/template/template_config.hpp b/docs/template_plugin/include/template/template_config.hpp index 78760214c7f..62f603cbf16 100644 --- a/docs/template_plugin/include/template/template_config.hpp +++ b/docs/template_plugin/include/template/template_config.hpp @@ -45,14 +45,11 @@ namespace TemplateConfigParams { #define DECLARE_TEMPLATE_CONFIG_KEY(name) DECLARE_CONFIG_KEY(TEMPLATE_##name) #define DECLARE_TEMPLATE_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(TEMPLATE_##name) + /** - * @brief The key to define the type of transformations for TEMPLATE inputs and outputs. - * TEMPLATE use custom data layout for input and output blobs. IE TEMPLATE Plugin provides custom - * optimized version of transformation functions that do not use OpenMP and much more faster - * than native TEMPLATE functions. Values: "NO" - optimized plugin transformations - * are used, "YES" - native TEMPLATE transformations are used. + * @brief Defines the number of throutput streams used by TEMPLATE plugin. */ -DECLARE_TEMPLATE_CONFIG_KEY(ANY_CONFIG_KEY); +DECLARE_TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS); } // namespace TemplateConfigParams diff --git a/docs/template_plugin/src/CMakeLists.txt b/docs/template_plugin/src/CMakeLists.txt index 2b784882859..f5921e20d97 100644 --- a/docs/template_plugin/src/CMakeLists.txt +++ b/docs/template_plugin/src/CMakeLists.txt @@ -20,14 +20,25 @@ ie_add_plugin(NAME ${TARGET_NAME} VERSION_DEFINES_FOR template_plugin.cpp) target_include_directories(${TARGET_NAME} PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}" + "${CMAKE_CURRENT_SOURCE_DIR}") + +target_include_directories(${TARGET_NAME} PRIVATE "${IE_MAIN_TEMPLATE_PLUGIN_SOURCE_DIR}/include") -target_link_libraries(${TARGET_NAME} PRIVATE IE::inference_engine IE::inference_engine_transformations ${NGRAPH_LIBRARIES} ${INTEL_ITT_LIBS}) +target_link_libraries(${TARGET_NAME} PRIVATE + IE::inference_engine + IE::inference_engine_transformations + ${INTEL_ITT_LIBS} + ${NGRAPH_LIBRARIES}) + +# Link inference backend library to plugin. Here we use ngraph interpreter_backend as example +target_link_libraries(${TARGET_NAME} PRIVATE + ngraph_backend + interpreter_backend) # ATTENTION: uncomment to register a plugin in the plugins.xml file # ie_register_plugins(MAIN_TARGET ${TARGET_NAME} - # POSSIBLE_PLUGINS ${TARGET_NAME}) +# POSSIBLE_PLUGINS ${TARGET_NAME}) # [cmake:plugin] # ATTENTION: uncomment to install component diff --git a/docs/template_plugin/src/template_async_infer_request.cpp b/docs/template_plugin/src/template_async_infer_request.cpp index 0b9516bb461..f1024185c44 100644 --- a/docs/template_plugin/src/template_async_infer_request.cpp +++ b/docs/template_plugin/src/template_async_infer_request.cpp @@ -19,21 +19,28 @@ TemplateAsyncInferRequest::TemplateAsyncInferRequest( const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) : AsyncInferRequestThreadSafeDefault(inferRequest, cpuTaskExecutor, callbackExecutor), _inferRequest(inferRequest), _waitExecutor(waitExecutor) { - _pipeline = { - {cpuTaskExecutor, [this] { - IE_PROFILING_AUTO_SCOPE(PreprocessingAndStartPipeline) - _inferRequest->inferPreprocess(); - _inferRequest->startPipeline(); - }}, - {_waitExecutor, [this] { - IE_PROFILING_AUTO_SCOPE(WaitPipeline) - _inferRequest->waitPipeline(); - }}, - {cpuTaskExecutor, [this] { - IE_PROFILING_AUTO_SCOPE(Postprocessing) - _inferRequest->inferPostprocess(); - }} - }; + constexpr const auto remoteDevice = false; + // By default single stage pipeline is created. + // This stage executes InferRequest::Infer() using cpuTaskExecutor. + // But if remote asynchronous device is used the pipeline can by splitted tasks that are executed by cpuTaskExecutor + // and waiting tasks. Waiting tasks can lock execution thread so they use separate threads from other executor. + if (remoteDevice) { + _pipeline = { + {cpuTaskExecutor, [this] { + IE_PROFILING_AUTO_SCOPE(PreprocessingAndStartPipeline) + _inferRequest->inferPreprocess(); + _inferRequest->startPipeline(); + }}, + {_waitExecutor, [this] { + IE_PROFILING_AUTO_SCOPE(WaitPipeline) + _inferRequest->waitPipeline(); + }}, + {cpuTaskExecutor, [this] { + IE_PROFILING_AUTO_SCOPE(Postprocessing) + _inferRequest->inferPostprocess(); + }} + }; + } } // ! [async_infer_request:ctor] diff --git a/docs/template_plugin/src/template_config.cpp b/docs/template_plugin/src/template_config.cpp index e60d0ac46fc..b01f7a6cabd 100644 --- a/docs/template_plugin/src/template_config.cpp +++ b/docs/template_plugin/src/template_config.cpp @@ -9,10 +9,12 @@ #include #include +#include #include #include #include "template_config.hpp" +#include "template/template_config.hpp" using namespace TemplatePlugin; @@ -20,12 +22,22 @@ Configuration::Configuration() { } Configuration::Configuration(const ConfigMap& config, const Configuration & defaultCfg, bool throwOnUnsupported) { *this = defaultCfg; + // If plugin needs to use InferenceEngine::StreamsExecutor it should be able to process its configuration + auto streamExecutorConfigKeys = _streamsExecutorConfig.SupportedKeys(); for (auto&& c : config) { const auto& key = c.first; const auto& value = c.second; - if (CONFIG_KEY(DEVICE_ID) == key) { + if (TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS) == key) { + _streamsExecutorConfig.SetConfig(CONFIG_KEY(CPU_THROUGHPUT_STREAMS), value); + } else if (streamExecutorConfigKeys.end() != + std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) { + _streamsExecutorConfig.SetConfig(key, value); + } else if (CONFIG_KEY(DEVICE_ID) == key) { deviceId = std::stoi(value); + if (deviceId > 0) { + THROW_IE_EXCEPTION << "Device ID " << deviceId << " is not supported"; + } } else if (CONFIG_KEY(PERF_COUNT) == key) { perfCount = (CONFIG_VALUE(YES) == value); } else if (throwOnUnsupported) { @@ -39,6 +51,14 @@ InferenceEngine::Parameter Configuration::Get(const std::string& name) const { return {std::to_string(deviceId)}; } else if (name == CONFIG_KEY(PERF_COUNT)) { return {perfCount}; + } else if (name == TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS) || name == CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) { + return {std::to_string(_streamsExecutorConfig._streams)}; + } else if (name == CONFIG_KEY(CPU_BIND_THREAD)) { + return const_cast(_streamsExecutorConfig).GetConfig(name); + } else if (name == CONFIG_KEY(CPU_THREADS_NUM)) { + return {std::to_string(_streamsExecutorConfig._threads)}; + } else if (name == CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)) { + return {std::to_string(_streamsExecutorConfig._threadsPerStream)}; } else { THROW_IE_EXCEPTION << NOT_FOUND_str << ": " << name; } diff --git a/docs/template_plugin/src/template_config.hpp b/docs/template_plugin/src/template_config.hpp index 6f6940aea7c..453c85a3e6d 100644 --- a/docs/template_plugin/src/template_config.hpp +++ b/docs/template_plugin/src/template_config.hpp @@ -11,6 +11,8 @@ #include +#include + namespace TemplatePlugin { template @@ -34,6 +36,7 @@ struct Configuration { int deviceId = 0; bool perfCount = true; + InferenceEngine::IStreamsExecutor::Config _streamsExecutorConfig; }; // ! [configuration:header] diff --git a/docs/template_plugin/src/template_executable_network.cpp b/docs/template_plugin/src/template_executable_network.cpp index db3da8876b2..b0d0e66e80e 100644 --- a/docs/template_plugin/src/template_executable_network.cpp +++ b/docs/template_plugin/src/template_executable_network.cpp @@ -16,40 +16,31 @@ #include #include
-#include - -#include - +#include "template/template_config.hpp" #include "template_plugin.hpp" #include "template_executable_network.hpp" -#include "template_pattern_transformation.hpp" using namespace TemplatePlugin; // ! [executable_network:ctor_cnnnetwork] -TemplatePlugin::ExecutableNetwork::ExecutableNetwork(InferenceEngine::ICNNNetwork& network, - const Configuration& cfg): - _name(network.getName()), +TemplatePlugin::ExecutableNetwork::ExecutableNetwork(const std::shared_ptr& function, + const Configuration& cfg, + const Plugin::Ptr& plugin) : + InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, nullptr), // Disable default threads creation _cfg(cfg), - _waitExecutor(InferenceEngine::ExecutorManager::getInstance()->getExecutor("Template")) { + _plugin(plugin), + _function(function) { // TODO: if your plugin supports device ID (more that single instance of device can be on host machine) // you should select proper device based on KEY_DEVICE_ID or automatic behavior // In this case, _waitExecutor should also be created per device. - try { - if (std::shared_ptr ngraphFunction = network.getFunction()) { - CompileGraph(ngraphFunction); - } else { - THROW_IE_EXCEPTION << "TEMPLATE plugin can compile only IR v10 networks"; - } - } - catch (const InferenceEngineException & e) { - throw e; - } - catch (const std::exception & e) { + CompileGraph(); + InitExecutor(); + } catch (const InferenceEngineException&) { + throw; + } catch (const std::exception & e) { THROW_IE_EXCEPTION << "Standard exception from compilation library: " << e.what(); - } - catch (...) { + } catch (...) { THROW_IE_EXCEPTION << "Generic exception is thrown"; } } @@ -57,53 +48,53 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(InferenceEngine::ICNNNetwor // ! [executable_network:ctor_import_stream] TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream & model, - const Configuration& cfg) : - _cfg(cfg) { + const Configuration& cfg, + const Plugin::Ptr& plugin) : + _cfg(cfg), + _plugin(plugin) { // TODO: since Import network is not a mandatory functionality, this ctor can just be removed } // ! [executable_network:ctor_import_stream] // ! [executable_network:compile_graph] -void TemplatePlugin::ExecutableNetwork::CompileGraph(const std::shared_ptr & ngraphFunction) { +void TemplatePlugin::ExecutableNetwork::CompileGraph() { // TODO: perform actual graph compilation taking `_cfg` into account - // 1.Copy ngraph::Function first to apply some transformations later in - // ExecutableNetwork::CompileGraph, which modify original ngraph::Function - const bool shareConsts = false, constFolding = false; - std::vector<::ngraph::element::Type> new_types; - std::vector<::ngraph::PartialShape> new_shapes; - - for (const auto ¶meter : ngraphFunction->get_parameters()) { - new_shapes.emplace_back(parameter->get_partial_shape()); - new_types.emplace_back(parameter->get_element_type()); + // Generate backend specific blob mappings. For example Inference Engine uses not ngraph::Result nodes friendly name + // as inference request output names but the name of the layer before. + for (auto&& result : _function->get_results()) { + auto previousOutput = result->get_input_source_output(0); + auto outputName = previousOutput.get_node()->get_friendly_name(); + if (previousOutput.get_node()->get_output_size() > 1) { + outputName += '.' + std::to_string(previousOutput.get_index()); + } + _outputIndex.emplace(outputName, _function->get_result_index(result)); + } + for (auto&& parameter : _function->get_parameters()) { + _inputIndex.emplace(parameter->get_friendly_name(), _function->get_parameter_index(parameter)); } - auto copyFunction = ngraph::specialize_function(std::const_pointer_cast(ngraphFunction), - new_types, new_shapes, std::vector(new_types.size(), nullptr), constFolding, shareConsts); - - // 2. Perform common optimizations and device-specific transformations - ngraph::pass::Manager passManager; - // Example: register CommonOptimizations transformation from transformations library - passManager.register_pass(); - // Example: register plugin specific transformation - passManager.register_pass(); - passManager.register_pass(); - // Register any other transformations - // .. - - // After `run_passes`, we have the transformed function, where operations match device operations, - // and we can create device hardware-dependent graph - passManager.run_passes(copyFunction); - - // 3. Iterate over operations and create hardware-specific ngraph - for (const auto& op : copyFunction->get_ordered_ops()) { - // TODO: map ngraph `op` to device operation - } - - // 4. Perform any other steps like allocation and filling device buffers, and so on + // Perform any other steps like allocation and filling device buffers, and so on } // ! [executable_network:compile_graph] +// ! [executable_network:init_executor] +void TemplatePlugin::ExecutableNetwork::InitExecutor() { + // Default mutlitthreaded configuration is balanced for throughtput and latency cases and takes into account + // real hardware cores and NUMA nodes. + auto streamsExecutorConfig = InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg._streamsExecutorConfig); + streamsExecutorConfig._name = "TemplateStreamsExecutor"; + // As Inference Engine CPU Streams Executor creates some additional therads + // it is better to avoid threads recreateion as some OSs memory allocator can not manage such usage cases + // and memory consumption can be larger than it is expected. + // So Inference Engone provides executors cache. + _taskExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig); + // NOTE: callback Executor is not configured. So callback will be called in the thread of tha last stage of inference request pipeline + // _callbackExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateCallbackExecutor"}); +} +// ! [executable_network:init_executor] + + // ! [executable_network:create_infer_request_impl] InferenceEngine::InferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs) { @@ -115,7 +106,7 @@ InferenceEngine::InferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::Cr void TemplatePlugin::ExecutableNetwork::CreateInferRequest(IInferRequest::Ptr& asyncRequest) { auto internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs); auto asyncThreadSafeImpl = std::make_shared(std::static_pointer_cast(internalRequest), - _taskExecutor, _waitExecutor, _callbackExecutor); + _taskExecutor, _plugin->_waitExecutor, _callbackExecutor); asyncRequest.reset(new InferenceEngine::InferRequestBase(asyncThreadSafeImpl), [](InferenceEngine::IInferRequest *p) { p->Release(); }); asyncThreadSafeImpl->SetPointerToPublicInterface(asyncRequest); @@ -124,13 +115,7 @@ void TemplatePlugin::ExecutableNetwork::CreateInferRequest(IInferRequest::Ptr& a // ! [executable_network:get_config] void TemplatePlugin::ExecutableNetwork::GetConfig(const std::string &name, Parameter &result, ResponseDesc *resp) const { - // TODO: return more supported values for config keys - if (name == CONFIG_KEY(DEVICE_ID) || - name == CONFIG_KEY(PERF_COUNT)) { - result = _cfg.Get(name); - } else { - THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork config key: " << name; - } + result = _cfg.Get(name); } // ! [executable_network:get_config] @@ -144,14 +129,20 @@ void TemplatePlugin::ExecutableNetwork::GetMetric(const std::string &name, Infer METRIC_KEY(SUPPORTED_CONFIG_KEYS), METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)}); } else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) { - result = IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, std::vector{ + std::vector configKeys = { CONFIG_KEY(DEVICE_ID), - CONFIG_KEY(PERF_COUNT)}); + CONFIG_KEY(PERF_COUNT), + TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS) }; + auto streamExecutorConfigKeys = IStreamsExecutor::Config{}.SupportedKeys(); + for (auto&& configKey : streamExecutorConfigKeys) { + configKeys.emplace_back(configKey); + } + result = IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, configKeys); } else if (METRIC_KEY(NETWORK_NAME) == name) { - result = IE_SET_METRIC(NETWORK_NAME, _name); + auto networkName = _function->get_friendly_name(); + result = IE_SET_METRIC(NETWORK_NAME, networkName); } else if (METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS) == name) { - // TODO: fill with actual number - unsigned int value = 1; + unsigned int value = _cfg._streamsExecutorConfig._streams; result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, value); } else { THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork metric: " << name; diff --git a/docs/template_plugin/src/template_executable_network.hpp b/docs/template_plugin/src/template_executable_network.hpp index 8b49eb68e28..b2755b39c69 100644 --- a/docs/template_plugin/src/template_executable_network.hpp +++ b/docs/template_plugin/src/template_executable_network.hpp @@ -27,7 +27,7 @@ namespace TemplatePlugin { -class Engine; +class Plugin; /** * @class ExecutableNetwork @@ -36,11 +36,13 @@ class Engine; // ! [executable_network:header] class ExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault { public: - ExecutableNetwork(InferenceEngine::ICNNNetwork& network, - const Configuration& cfg); + ExecutableNetwork(const std::shared_ptr& function, + const Configuration& cfg, + const std::shared_ptr& plugin); - ExecutableNetwork(std::istream & model, - const Configuration& cfg); + ExecutableNetwork(std::istream& model, + const Configuration& cfg, + const std::shared_ptr& plugin); ~ExecutableNetwork() override = default; @@ -53,15 +55,18 @@ public: void GetMetric(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override; void GetConfig(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override; - std::atomic _requestId = {0}; - std::string _name; - Configuration _cfg; - private: - void CompileGraph(const std::shared_ptr & ngraphFunction); + friend class TemplateInferRequest; - std::shared_ptr _plugin; - InferenceEngine::ITaskExecutor::Ptr _waitExecutor; + void CompileGraph(); + void InitExecutor(); + + std::atomic _requestId = {0}; + Configuration _cfg; + std::shared_ptr _plugin; + std::shared_ptr _function; + std::map _inputIndex; + std::map _outputIndex; }; // ! [executable_network:header] diff --git a/docs/template_plugin/src/template_infer_request.cpp b/docs/template_plugin/src/template_infer_request.cpp index 7aabd2edbe8..e33e1c92927 100644 --- a/docs/template_plugin/src/template_infer_request.cpp +++ b/docs/template_plugin/src/template_infer_request.cpp @@ -18,17 +18,16 @@ #include #include #include -#include