Merge remote-tracking branch 'github/master' into auto-batch-master
# Conflicts: # inference-engine/src/multi_device/multi_device_exec_network.cpp
This commit is contained in:
commit
eca839d8ea
@ -216,12 +216,12 @@ jobs:
|
||||
- script: $(INSTALL_DIR)/samples/cpp/build_samples.sh
|
||||
workingDirectory: $(BUILD_SAMPLES_DIR)
|
||||
displayName: 'Build cpp samples'
|
||||
condition: eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'ON')
|
||||
continueOnError: false
|
||||
|
||||
- script: $(INSTALL_DIR)/samples/c/build_samples.sh
|
||||
workingDirectory: $(BUILD_SAMPLES_DIR)
|
||||
displayName: 'Build c samples'
|
||||
condition: eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'ON')
|
||||
continueOnError: false
|
||||
|
||||
- script: rm -fr $(BUILD_DIR)
|
||||
displayName: 'Clean build dir'
|
||||
@ -241,12 +241,12 @@ jobs:
|
||||
displayName: 'Model Optimizer UT'
|
||||
continueOnError: false
|
||||
|
||||
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
|
||||
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
|
||||
workingDirectory: $(INSTALL_TEST_DIR)
|
||||
displayName: 'nGraph UT'
|
||||
continueOnError: false
|
||||
|
||||
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/paddlepaddle_tests --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-PaddlePaddle.xml
|
||||
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/paddlepaddle_tests --gtest_print_time=1 --gtest_output=xml:TEST-PaddlePaddle.xml
|
||||
displayName: 'PaddlePaddle Frontend UT'
|
||||
continueOnError: false
|
||||
|
||||
@ -276,6 +276,10 @@ jobs:
|
||||
displayName: 'VPU UT'
|
||||
continueOnError: false
|
||||
|
||||
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ieMultiPluginUnitTests --gtest_output=xml:TEST-ieMultiPluginUnitTests.xml
|
||||
displayName: 'MULTI UT'
|
||||
continueOnError: false
|
||||
|
||||
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/onnxImporterUnitTests --gtest_output=xml:TEST-onnxImporterUnitTests.xml
|
||||
displayName: 'ONNX Importer UT'
|
||||
continueOnError: false
|
||||
|
@ -153,6 +153,10 @@ jobs:
|
||||
displayName: 'ONNX Importer UT'
|
||||
continueOnError: false
|
||||
|
||||
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ieMultiPluginUnitTests --gtest_output=xml:TEST-ieMultiPluginUnitTests.xml
|
||||
displayName: 'MULTI UT'
|
||||
continueOnError: false
|
||||
|
||||
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ieFuncTests --gtest_output=xml:TEST-ieFuncTests.xml
|
||||
displayName: 'IE FuncTests'
|
||||
continueOnError: false
|
||||
|
@ -111,6 +111,8 @@ jobs:
|
||||
python -m pip install -r $(REPO_DIR)\model-optimizer\requirements.txt
|
||||
python -m pip install -r $(REPO_DIR)\model-optimizer\requirements_dev.txt
|
||||
rem Speed up build
|
||||
certutil -urlcache -split -f https://github.com/Kitware/CMake/releases/download/v3.17.0/cmake-3.17.0-win64-x64.zip cmake-3.17.0-win64-x64.zip
|
||||
powershell -command "Expand-Archive -Force cmake-3.17.0-win64-x64.zip"
|
||||
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip ninja-win.zip
|
||||
powershell -command "Expand-Archive -Force ninja-win.zip"
|
||||
git clone https://github.com/google/gtest-parallel.git
|
||||
@ -119,7 +121,7 @@ jobs:
|
||||
|
||||
- script: |
|
||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
||||
call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_CLDNN=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_GAPI_PREPROCESSING=$(CMAKE_BUILD_SHARED_LIBS) -DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_REQUIREMENTS_INSTALL=OFF -DENABLE_FASTER_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.7.6\x64\python.exe" -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
|
||||
call "$(MSVS_VARS_PATH)" && $(WORK_DIR)\cmake-3.17.0-win64-x64\cmake-3.17.0-win64-x64\bin\cmake.exe -GNinja -DENABLE_ONEDNN_FOR_GPU=OFF -DENABLE_GNA=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_CLDNN=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_GAPI_PREPROCESSING=$(CMAKE_BUILD_SHARED_LIBS) -DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_REQUIREMENTS_INSTALL=OFF -DENABLE_FASTER_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.7.6\x64\python.exe" -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'CMake'
|
||||
|
||||
@ -135,14 +137,14 @@ jobs:
|
||||
- script: dir $(REPO_DIR)\bin\ /s
|
||||
displayName: 'List bin files'
|
||||
|
||||
- script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
|
||||
- script: $(WORK_DIR)\cmake-3.17.0-win64-x64\cmake-3.17.0-win64-x64\bin\cmake.exe -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Install'
|
||||
|
||||
- script: dir $(INSTALL_DIR) /s
|
||||
displayName: 'List install files'
|
||||
|
||||
- script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake && xcopy $(REPO_DIR)\inference-engine\temp\opencv_4.5.2\opencv\* $(INSTALL_DIR)\opencv\ /e /h /y
|
||||
- script: $(WORK_DIR)\cmake-3.17.0-win64-x64\cmake-3.17.0-win64-x64\bin\cmake.exe -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake && xcopy $(REPO_DIR)\inference-engine\temp\opencv_4.5.2\opencv\* $(INSTALL_DIR)\opencv\ /e /h /y
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Install tests'
|
||||
|
||||
@ -152,23 +154,23 @@ jobs:
|
||||
- script: $(INSTALL_DIR)\samples\cpp\build_samples_msvc.bat
|
||||
workingDirectory: $(BUILD_SAMPLES_DIR)
|
||||
displayName: 'Build cpp samples'
|
||||
condition: eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'ON')
|
||||
continueOnError: false
|
||||
|
||||
- script: $(INSTALL_DIR)\samples\c\build_samples_msvc.bat
|
||||
workingDirectory: $(BUILD_SAMPLES_DIR)
|
||||
displayName: 'Build c samples'
|
||||
condition: eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'ON')
|
||||
continueOnError: false
|
||||
|
||||
- script: rd /Q /S $(BUILD_DIR)
|
||||
displayName: 'Clean build dir'
|
||||
continueOnError: false
|
||||
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\unit-test --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
|
||||
workingDirectory: $(INSTALL_TEST_DIR)
|
||||
displayName: 'nGraph UT'
|
||||
continueOnError: false
|
||||
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\paddlepaddle_tests --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-PaddlePaddle.xml
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\paddlepaddle_tests --gtest_print_time=1 --gtest_output=xml:TEST-PaddlePaddle.xml
|
||||
displayName: 'PaddlePaddle Frontend UT'
|
||||
continueOnError: false
|
||||
condition: eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'ON')
|
||||
@ -195,6 +197,7 @@ jobs:
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\gnaUnitTests --gtest_output=xml:TEST-gnaUnitTests.xml
|
||||
displayName: 'GNA UT'
|
||||
continueOnError: false
|
||||
condition: eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'ON')
|
||||
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\vpuUnitTests --gtest_output=xml:TEST-vpuUnitTests.xml
|
||||
displayName: 'VPU UT'
|
||||
@ -204,6 +207,10 @@ jobs:
|
||||
displayName: 'ONNX Importer UT'
|
||||
continueOnError: false
|
||||
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieMultiPluginUnitTests --gtest_output=xml:TEST-ieMultiPluginUnitTests.xml
|
||||
displayName: 'MULTI UT'
|
||||
continueOnError: false
|
||||
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieFuncTests --gtest_output=xml:TEST-ieFuncTests.xml
|
||||
displayName: 'IE FuncTests'
|
||||
continueOnError: false
|
||||
|
3
.github/dependabot.yml
vendored
3
.github/dependabot.yml
vendored
@ -11,7 +11,8 @@ updates:
|
||||
time: "13:00"
|
||||
open-pull-requests-limit: 10
|
||||
reviewers:
|
||||
- postrational
|
||||
- jiwaszki
|
||||
- akuporos
|
||||
labels:
|
||||
- "category: dependencies"
|
||||
|
||||
|
@ -2,7 +2,14 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
if(DEFINED BUILD_SHARED_LIBS AND NOT BUILD_SHARED_LIBS)
|
||||
# 'target_link_libraries' does not work correctly when called from
|
||||
# different directly where 'add_library' is called: CMake generates
|
||||
# incorrect OpenVINOConfig.cmake in this case
|
||||
cmake_minimum_required(VERSION 3.17)
|
||||
else()
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
endif()
|
||||
|
||||
project(OpenVINO DESCRIPTION "OpenVINO toolkit")
|
||||
|
||||
|
@ -83,9 +83,9 @@ function(ie_add_plugin)
|
||||
FILEDESCRIPTION "Inference Engine ${IE_PLUGIN_DEVICE_NAME} device plugin library")
|
||||
|
||||
if(TARGET IE::inference_engine_plugin_api)
|
||||
target_link_libraries(${IE_PLUGIN_NAME} PRIVATE IE::inference_engine_plugin_api)
|
||||
target_link_libraries(${IE_PLUGIN_NAME} PRIVATE IE::inference_engine IE::inference_engine_plugin_api)
|
||||
else()
|
||||
target_link_libraries(${IE_PLUGIN_NAME} PRIVATE inference_engine_plugin_api)
|
||||
target_link_libraries(${IE_PLUGIN_NAME} PRIVATE inference_engine inference_engine_plugin_api)
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
@ -108,8 +108,12 @@ function(ie_add_plugin)
|
||||
endif()
|
||||
|
||||
add_dependencies(ie_plugins ${IE_PLUGIN_NAME})
|
||||
if(TARGET inference_engine_preproc AND BUILD_SHARED_LIBS)
|
||||
add_dependencies(${IE_PLUGIN_NAME} inference_engine_preproc)
|
||||
if(TARGET inference_engine_preproc)
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_dependencies(${IE_PLUGIN_NAME} inference_engine_preproc)
|
||||
else()
|
||||
target_link_libraries(${IE_PLUGIN_NAME} PRIVATE inference_engine_preproc)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# fake dependencies to build in the following order:
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <iterator>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
@ -31,7 +31,6 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release")
|
||||
endif()
|
||||
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE clDNN_lib pugixml::static
|
||||
inference_engine
|
||||
inference_engine_transformations
|
||||
inference_engine_lp_transformations
|
||||
ngraph)
|
||||
|
@ -39,19 +39,19 @@ static ConvoltuionParameters GetConvolutionParameters(const ngraph::CoordinateDi
|
||||
switch (strides.size()) {
|
||||
case 3: {
|
||||
stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[2], strides[1], strides[0]));
|
||||
padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pads_begin[2], -pads_begin[1], -pads_begin[0]));
|
||||
padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pads_begin[2], pads_begin[1], pads_begin[0]));
|
||||
dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[2], dilations[1], dilations[0]));
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[1], strides[0], 1));
|
||||
padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pads_begin[1], -pads_begin[0], 0));
|
||||
padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pads_begin[1], pads_begin[0], 0));
|
||||
dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[1], dilations[0], 1));
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[0], 1, 1));
|
||||
padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pads_begin[0], 0, 0));
|
||||
padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pads_begin[0], 0, 0));
|
||||
dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[0], 1, 1));
|
||||
break;
|
||||
}
|
||||
|
@ -33,22 +33,22 @@ static PoolingParameters GetPoolingParameters(const ngraph::Shape& kernel,
|
||||
case 3: {
|
||||
k = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(kernel[2], kernel[1], kernel[0]));
|
||||
s = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[2], strides[1], strides[0]));
|
||||
pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pb_casted[2], -pb_casted[1], -pb_casted[0]));
|
||||
pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pe_casted[2], -pe_casted[1], -pe_casted[0]));
|
||||
pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pb_casted[2], pb_casted[1], pb_casted[0]));
|
||||
pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pe_casted[2], pe_casted[1], pe_casted[0]));
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
k = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(kernel[1], kernel[0], 1));
|
||||
s = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[1], strides[0], 1));
|
||||
pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pb_casted[1], -pb_casted[0], 0));
|
||||
pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pe_casted[1], -pe_casted[0], 0));
|
||||
pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pb_casted[1], pb_casted[0], 0));
|
||||
pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pe_casted[1], pe_casted[0], 0));
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
k = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(kernel[0], 1, 1));
|
||||
s = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[0], 1, 1));
|
||||
pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pb_casted[0], 0, 0));
|
||||
pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pe_casted[0], 0, 0));
|
||||
pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pb_casted[0], 0, 0));
|
||||
pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pe_casted[0], 0, 0));
|
||||
break;
|
||||
}
|
||||
default: IE_THROW() << "Unsupported pooling parameters size. Only 1d, 2d, and 3d cases are supported";
|
||||
|
@ -38,7 +38,7 @@ ie_add_plugin(NAME ${TARGET_NAME}
|
||||
# Enable support of CC for the plugin
|
||||
ie_mark_target_as_cc(${TARGET_NAME})
|
||||
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine inference_engine_legacy inference_engine_transformations
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine_legacy inference_engine_transformations
|
||||
Threads::Threads libGNA)
|
||||
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
|
@ -70,6 +70,8 @@
|
||||
#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
|
||||
#include "transformations/remove_single_input_concat.hpp"
|
||||
#include "transformations/broadcast_const.hpp"
|
||||
#include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp"
|
||||
#include "transformations/decompose_mvn.hpp"
|
||||
#include "transformations/substitute_softsign.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset7.hpp>
|
||||
@ -687,6 +689,8 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
fake_quantized = ngraph::op::util::has_op_with_type<ngraph::opset7::FakeQuantize>(graph);
|
||||
manager.register_pass<ngraph::pass::ConvertMVN1ToMVN6>();
|
||||
manager.register_pass<DecomposeMVN>();
|
||||
manager.register_pass<ngraph::pass::CommonOptimizations>();
|
||||
manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
|
||||
manager.register_pass<ConvertDWSCToScaleShifts>();
|
||||
|
@ -0,0 +1,265 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <openvino/cc/ngraph/itt.hpp>
|
||||
|
||||
#include "transformations/decompose_mvn.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset8.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include "backend/gna_limitations.hpp"
|
||||
|
||||
|
||||
using namespace GNAPluginNS;
|
||||
using namespace ngraph;
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(DecomposeMVN, "DecomposeMVN", 0);
|
||||
|
||||
struct MVNData {
|
||||
size_t N;
|
||||
size_t C;
|
||||
size_t H;
|
||||
size_t W;
|
||||
size_t num_parts;
|
||||
float eps;
|
||||
op::MVNEpsMode eps_mode;
|
||||
bool normalize_variance;
|
||||
element::Type element_type;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
static bool ValidateAxes(const std::shared_ptr<opset8::Constant> axes_const, const size_t& mvn_shape_size) {
|
||||
T axes_value;
|
||||
size_t axes_vector_size;
|
||||
|
||||
std::vector<T> axes_const_vector = axes_const->cast_vector<T>();
|
||||
IE_ASSERT(!axes_const_vector.empty());
|
||||
axes_value = axes_const_vector[0];
|
||||
axes_vector_size = axes_const_vector.size();
|
||||
|
||||
if (axes_vector_size != mvn_shape_size - 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Verify supported first axes value
|
||||
if (axes_value != 2 && axes_value != 2 - mvn_shape_size)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool GetVerifiedMVNData(const std::shared_ptr<opset8::MVN> mvn, MVNData& mvn_data) {
|
||||
const auto mvn_shape = mvn->get_output_shape(0);
|
||||
auto mvn_shape_size = mvn_shape.size();
|
||||
|
||||
// Validate axes parameter
|
||||
auto axes_const = std::dynamic_pointer_cast<opset8::Constant>(mvn->input_value(1).get_node_shared_ptr());
|
||||
IE_ASSERT(axes_const);
|
||||
auto element_type = axes_const->get_element_type();
|
||||
|
||||
if (!(element_type == element::Type_t::i64 ? ValidateAxes<int64_t>(axes_const, mvn_shape_size) :
|
||||
ValidateAxes<int32_t>(axes_const, mvn_shape_size)))
|
||||
return false;
|
||||
|
||||
if (mvn_shape_size == 4) {
|
||||
mvn_data.N = mvn_shape[0];
|
||||
mvn_data.C = mvn_shape[1];
|
||||
mvn_data.H = mvn_shape[2];
|
||||
mvn_data.W = mvn_shape[3];
|
||||
} else if (mvn_shape_size == 3) {
|
||||
mvn_data.N = 1;
|
||||
mvn_data.C = mvn_shape[0];
|
||||
mvn_data.H = mvn_shape[1];
|
||||
mvn_data.W = mvn_shape[2];
|
||||
}
|
||||
|
||||
// Check if average must be split
|
||||
mvn_data.num_parts = 1;
|
||||
while (mvn_data.W / mvn_data.num_parts > GNALimitations::convFilterMaxSize) {
|
||||
mvn_data.num_parts *= 2;
|
||||
}
|
||||
|
||||
// Abort if W is not divisible by power of 2
|
||||
if ((mvn_data.W / mvn_data.num_parts) * mvn_data.num_parts != mvn_data.W) {
|
||||
return false;
|
||||
}
|
||||
|
||||
mvn_data.eps = mvn->get_eps();
|
||||
mvn_data.eps_mode = mvn->get_eps_mode();
|
||||
mvn_data.normalize_variance = mvn->get_normalize_variance();
|
||||
mvn_data.element_type = mvn->get_element_type();
|
||||
mvn_data.name = mvn->get_friendly_name();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static std::shared_ptr<Node> NormalizeVariance(const std::shared_ptr<opset8::MVN> mvn, const MVNData& mvn_data,
|
||||
const std::shared_ptr<opset8::Add>& subtract_mean, const std::shared_ptr<opset8::Constant>& avg_broadcast_const) {
|
||||
// Prepare consts
|
||||
auto combined_C_H = mvn_data.C * mvn_data.H;
|
||||
|
||||
std::vector<float> avg_weights(8 * mvn_data.W / mvn_data.num_parts, 1.0f / mvn_data.W);
|
||||
auto avg_weights_const = opset8::Constant::create(mvn_data.element_type, Shape{8, mvn_data.W / mvn_data.num_parts, 1, 1}, avg_weights);
|
||||
std::vector<float> eps_tensor(combined_C_H * mvn_data.W, mvn_data.eps);
|
||||
auto eps_tensor_const = opset8::Constant::create(mvn_data.element_type, Shape{1, combined_C_H * mvn_data.W}, eps_tensor);
|
||||
std::vector<float> minus_half(combined_C_H * mvn_data.W, -0.5f);
|
||||
auto minus_half_const = opset8::Constant::create(mvn_data.element_type, Shape{1, combined_C_H * mvn_data.W}, minus_half);
|
||||
|
||||
// Calculate square of the difference between input and its mean
|
||||
auto squared_diff = std::make_shared<opset8::Multiply>(subtract_mean, subtract_mean);
|
||||
squared_diff->set_friendly_name(mvn_data.name + "_SqrDiff");
|
||||
|
||||
// Calculate sum of the squares
|
||||
auto squared_diff_reshape = std::make_shared<opset8::Reshape>(squared_diff,
|
||||
opset8::Constant::create(element::i32, Shape{4}, Shape{mvn_data.N, combined_C_H * mvn_data.num_parts, 1ull, mvn_data.W / mvn_data.num_parts}), false);
|
||||
auto transposed_input_3 = std::make_shared<opset8::Transpose>(squared_diff_reshape, opset8::Constant::create(element::i32, Shape{4}, {0, 3, 1, 2}));
|
||||
auto transposed_avg_conv_3 = std::make_shared<opset8::Convolution>(transposed_input_3, avg_weights_const,
|
||||
Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1}, op::PadType::VALID);
|
||||
transposed_avg_conv_3->set_friendly_name(mvn_data.name + "_Avg3");
|
||||
auto avg_conv_3 = std::make_shared<opset8::Transpose>(transposed_avg_conv_3, opset8::Constant::create(element::i32, Shape{4}, {0, 2, 3, 1}));
|
||||
auto reshape_avg_conv_3 = std::make_shared<opset8::Reshape>(avg_conv_3,
|
||||
opset8::Constant::create(element::i32, Shape{4}, Shape{mvn_data.N, 1ull, combined_C_H, 8 * mvn_data.num_parts}), false);
|
||||
auto transposed_input_4 = std::make_shared<opset8::Transpose>(reshape_avg_conv_3, opset8::Constant::create(element::i32, Shape{4}, {0, 3, 1, 2}));
|
||||
auto transposed_avg_conv_4 = std::make_shared<opset8::Convolution>(transposed_input_4,
|
||||
avg_broadcast_const, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1}, op::PadType::VALID);
|
||||
transposed_avg_conv_4->set_friendly_name(mvn_data.name + "_Avg4");
|
||||
auto avg_conv_4 = std::make_shared<opset8::Transpose>(transposed_avg_conv_4,
|
||||
opset8::Constant::create(element::i32, Shape{4}, {0, 2, 3, 1}));
|
||||
auto reshape_avg_conv_4 = std::make_shared<opset8::Reshape>(avg_conv_4,
|
||||
opset8::Constant::create(element::i32, Shape{2}, Shape{1ull, combined_C_H * mvn_data.W}), false);
|
||||
std::shared_ptr<Node> inv_stdev;
|
||||
|
||||
// Create normalization part of the graph
|
||||
// We ignore inside/outside epsilon position here and always use inside, to get better accuracy
|
||||
// even though the built-in MVN1 to MVN6 transformation enforces outside setting
|
||||
|
||||
// Add epsilon inside the square root
|
||||
auto add_epsilon = std::make_shared<opset8::Add>(eps_tensor_const, reshape_avg_conv_4);
|
||||
|
||||
// Calculate square root and inversion
|
||||
auto log_var_eps = std::make_shared<opset8::Log>(add_epsilon);
|
||||
log_var_eps->set_friendly_name(mvn_data.name + "_LogVarEps");
|
||||
auto log_inv_stdev = std::make_shared<opset8::Multiply>(log_var_eps, minus_half_const);
|
||||
log_inv_stdev->set_friendly_name(mvn_data.name + "_LogInvStdev");
|
||||
inv_stdev = std::make_shared<opset8::Exp>(log_inv_stdev);
|
||||
inv_stdev->set_friendly_name(mvn_data.name + "_InvStdev");
|
||||
copy_runtime_info(mvn, {add_epsilon, log_var_eps, log_inv_stdev, inv_stdev});
|
||||
|
||||
auto normalized_output = std::make_shared<opset8::Multiply>(subtract_mean, inv_stdev);
|
||||
normalized_output->set_friendly_name(mvn_data.name + "_Output");
|
||||
|
||||
copy_runtime_info(mvn, {squared_diff, squared_diff_reshape, transposed_input_3, transposed_avg_conv_3, avg_conv_3, reshape_avg_conv_3,
|
||||
transposed_input_4, transposed_avg_conv_4, avg_conv_4, reshape_avg_conv_4});
|
||||
|
||||
return normalized_output;
|
||||
}
|
||||
|
||||
static void Decompose(const std::shared_ptr<opset8::MVN> mvn, const MVNData& mvn_data) {
|
||||
// Prepare data
|
||||
auto combined_C_H = mvn_data.C * mvn_data.H;
|
||||
|
||||
std::vector<float> neg_avg_weights(8 * mvn_data.W / mvn_data.num_parts, -1.0f / mvn_data.W);
|
||||
auto neg_avg_weights_const = opset8::Constant::create(mvn_data.element_type, Shape{8, mvn_data.W / mvn_data.num_parts, 1, 1}, neg_avg_weights);
|
||||
|
||||
std::vector<float> avg_broadcast(8 * mvn_data.W * mvn_data.num_parts, 0.0f);
|
||||
for (size_t i = 0; i < mvn_data.W * mvn_data.num_parts; i++) {
|
||||
avg_broadcast[i * 8] = 1.0f;
|
||||
}
|
||||
auto avg_broadcast_const = opset8::Constant::create(mvn_data.element_type, Shape{mvn_data.W, 8 * mvn_data.num_parts, 1, 1}, avg_broadcast);
|
||||
|
||||
// Create average calculation part of the graph
|
||||
// We assume C = 1 case (combined channels)
|
||||
const auto input = mvn->input_value(0);
|
||||
auto reshape = std::make_shared<opset8::Reshape>(input,
|
||||
opset8::Constant::create(element::i32, Shape{4}, Shape{mvn_data.N, 1ull, combined_C_H, mvn_data.W}), false);
|
||||
auto input_4d = std::make_shared<opset8::Reshape>(reshape,
|
||||
opset8::Constant::create(element::i32, Shape{4}, Shape{mvn_data.N, combined_C_H * mvn_data.num_parts, 1ull, mvn_data.W / mvn_data.num_parts}), false);
|
||||
auto input_2d = std::make_shared<opset8::Reshape>(reshape,
|
||||
opset8::Constant::create(element::i32, Shape{2}, Shape{1ull, combined_C_H * mvn_data.W}), false);
|
||||
auto transposed_input_1 = std::make_shared<opset8::Transpose>(input_4d, opset8::Constant::create(element::i32, Shape{4}, {0, 3, 1, 2}));
|
||||
auto transposed_avg_conv_1 = std::make_shared<opset8::Convolution>(transposed_input_1, neg_avg_weights_const,
|
||||
Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1}, op::PadType::VALID);
|
||||
transposed_avg_conv_1->set_friendly_name(mvn_data.name + "_Avg1");
|
||||
auto avg_conv_1 = std::make_shared<opset8::Transpose>(transposed_avg_conv_1, opset8::Constant::create(element::i32, Shape{4}, {0, 2, 3, 1}));
|
||||
auto reshape_avg_conv_1 = std::make_shared<opset8::Reshape>(avg_conv_1,
|
||||
opset8::Constant::create(element::i32, Shape{4}, Shape{mvn_data.N, 1ull, combined_C_H, 8 * mvn_data.num_parts}), false);
|
||||
auto transposed_input_2 = std::make_shared<opset8::Transpose>(reshape_avg_conv_1, opset8::Constant::create(element::i32, Shape{4}, {0, 3, 1, 2}));
|
||||
auto transposed_avg_conv_2 = std::make_shared<opset8::Convolution>(transposed_input_2,
|
||||
avg_broadcast_const, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1}, op::PadType::VALID);
|
||||
transposed_avg_conv_2->set_friendly_name(mvn_data.name + "_Avg2");
|
||||
auto avg_conv_2 = std::make_shared<opset8::Transpose>(transposed_avg_conv_2,
|
||||
opset8::Constant::create(element::i32, Shape{4}, {0, 2, 3, 1}));
|
||||
auto avg_conv_2_2d = std::make_shared<opset8::Reshape>(avg_conv_2,
|
||||
opset8::Constant::create(element::i32, Shape{2}, Shape{1ull, combined_C_H * mvn_data.W}), false);
|
||||
auto subtract_mean = std::make_shared<opset8::Add>(input_2d, avg_conv_2_2d);
|
||||
subtract_mean->set_friendly_name(mvn_data.name + "_SubMean");
|
||||
|
||||
std::shared_ptr<Node> mvn_output, pre_output = subtract_mean;
|
||||
|
||||
// Normalize variance if required
|
||||
if (mvn_data.normalize_variance) {
|
||||
pre_output = NormalizeVariance(mvn, mvn_data, subtract_mean, avg_broadcast_const);
|
||||
}
|
||||
|
||||
// Reshape (combined channels) back to get the final output
|
||||
if (mvn->get_output_shape(0).size() == 3) {
|
||||
mvn_output = std::make_shared<opset8::Reshape>(pre_output,
|
||||
opset8::Constant::create(element::i32, Shape{3}, {mvn_data.C, mvn_data.H, mvn_data.W}), false);
|
||||
} else {
|
||||
mvn_output = std::make_shared<opset8::Reshape>(pre_output,
|
||||
opset8::Constant::create(element::i32, Shape{4}, {mvn_data.N, mvn_data.C, mvn_data.H, mvn_data.W}), false);
|
||||
}
|
||||
|
||||
copy_runtime_info(mvn, {reshape, input_4d, input_2d, transposed_input_1, transposed_avg_conv_1, avg_conv_1, reshape_avg_conv_1,
|
||||
transposed_input_2, transposed_avg_conv_2, avg_conv_2, avg_conv_2_2d, subtract_mean, mvn_output});
|
||||
|
||||
// We need retain the MVN layer name, so its output can be used as a network result
|
||||
replace_node(mvn, mvn_output);
|
||||
mvn_output->set_friendly_name(mvn_data.name);
|
||||
}
|
||||
|
||||
static bool Convert(std::shared_ptr<Node> mvn_node) {
|
||||
const auto mvn = std::dynamic_pointer_cast<opset8::MVN>(mvn_node);
|
||||
MVNData mvn_data;
|
||||
|
||||
if (!GetVerifiedMVNData(mvn, mvn_data))
|
||||
return false;
|
||||
|
||||
Decompose(mvn, mvn_data);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static std::function<bool(Output<Node>)> verify_rank_batch() {
|
||||
return [=](Output<Node> output) -> bool {
|
||||
// Only rank 3 and 4 and batch 1 are supported for now
|
||||
auto rank = output.get_partial_shape().rank();
|
||||
if (rank != 3 && rank != 4)
|
||||
return false;
|
||||
|
||||
auto batch = (rank == 3 ? 1 : output.get_partial_shape()[0]);
|
||||
if (batch != 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
};
|
||||
}
|
||||
|
||||
DecomposeMVN::DecomposeMVN() {
|
||||
MATCHER_SCOPE(DecomposeMVN);
|
||||
|
||||
auto axes = pattern::wrap_type<opset8::Constant>();
|
||||
auto mvn = pattern::wrap_type<opset8::MVN>({pattern::any_input(), axes}, verify_rank_batch());
|
||||
|
||||
matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
const auto& pattern_map = m.get_pattern_value_map();
|
||||
return Convert(pattern_map.at(mvn).get_node_shared_ptr());
|
||||
};
|
||||
|
||||
auto m = std::make_shared<pattern::Matcher>(mvn, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
@ -0,0 +1,24 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
|
||||
/**
|
||||
* @brief Decompose MVN operation
|
||||
* See official OpenVINO documentation for the MVN formula
|
||||
* implemented partially by this decomposition:
|
||||
* https://docs.openvino.ai/latest/openvino_docs_ops_normalization_MVN_6.html
|
||||
*
|
||||
*/
|
||||
class DecomposeMVN : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
DecomposeMVN();
|
||||
};
|
||||
|
||||
} // namespace GNAPluginNS
|
@ -16,7 +16,7 @@ ie_faster_build(${TARGET_NAME}
|
||||
UNITY
|
||||
)
|
||||
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE pugixml::static inference_engine
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE pugixml::static
|
||||
ngraph inference_engine_transformations)
|
||||
|
||||
ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
|
||||
|
@ -18,7 +18,6 @@
|
||||
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include "cpp/ie_infer_request.hpp"
|
||||
#include "details/ie_so_loader.h"
|
||||
#include "ie_iexecutable_network.hpp"
|
||||
#include "ie_parameter.hpp"
|
||||
#include "ie_remote_context.hpp"
|
||||
@ -36,7 +35,7 @@ class IExecutableNetworkInternal;
|
||||
* @brief This is an interface of an executable network
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
|
||||
details::SharedObjectLoader _so;
|
||||
std::shared_ptr<void> _so;
|
||||
std::shared_ptr<IExecutableNetworkInternal> _impl;
|
||||
|
||||
/**
|
||||
@ -45,7 +44,7 @@ class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
|
||||
* object is destroyed.
|
||||
* @param impl Initialized shared pointer
|
||||
*/
|
||||
ExecutableNetwork(const details::SharedObjectLoader& so, const std::shared_ptr<IExecutableNetworkInternal>& impl);
|
||||
ExecutableNetwork(const std::shared_ptr<void>& so, const std::shared_ptr<IExecutableNetworkInternal>& impl);
|
||||
friend class Core;
|
||||
friend class ov::runtime::Core;
|
||||
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include <string>
|
||||
|
||||
#include "cpp/ie_memory_state.hpp"
|
||||
#include "details/ie_so_loader.h"
|
||||
#include "ie_blob.h"
|
||||
#include "ie_iinfer_request.hpp"
|
||||
|
||||
@ -33,7 +32,7 @@ class ICompletionCallbackWrapper;
|
||||
* It can throw exceptions safely for the application, where it is properly handled.
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(InferRequest) {
|
||||
details::SharedObjectLoader _so;
|
||||
std::shared_ptr<void> _so;
|
||||
std::shared_ptr<IInferRequestInternal> _impl;
|
||||
|
||||
/**
|
||||
@ -42,7 +41,7 @@ class INFERENCE_ENGINE_API_CLASS(InferRequest) {
|
||||
* destroyed.
|
||||
* @param impl Initialized shared pointer
|
||||
*/
|
||||
InferRequest(const details::SharedObjectLoader& so, const std::shared_ptr<IInferRequestInternal>& impl);
|
||||
InferRequest(const std::shared_ptr<void>& so, const std::shared_ptr<IInferRequestInternal>& impl);
|
||||
friend class ExecutableNetwork;
|
||||
|
||||
public:
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "details/ie_so_loader.h"
|
||||
#include "ie_api.h"
|
||||
#include "ie_blob.h"
|
||||
|
||||
@ -25,7 +24,7 @@ class IVariableStateInternal;
|
||||
* @brief VariableState class
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(VariableState) {
|
||||
details::SharedObjectLoader _so;
|
||||
std::shared_ptr<void> _so;
|
||||
std::shared_ptr<IVariableStateInternal> _impl;
|
||||
|
||||
/**
|
||||
@ -34,7 +33,7 @@ class INFERENCE_ENGINE_API_CLASS(VariableState) {
|
||||
* @param so Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin
|
||||
* object is destroyed.
|
||||
*/
|
||||
VariableState(const details::SharedObjectLoader& so, const std::shared_ptr<IVariableStateInternal>& impl);
|
||||
VariableState(const std::shared_ptr<void>& so, const std::shared_ptr<IVariableStateInternal>& impl);
|
||||
friend class InferRequest;
|
||||
friend class ExecutableNetwork;
|
||||
|
||||
|
@ -17,9 +17,11 @@ namespace InferenceEngine {
|
||||
namespace details {
|
||||
|
||||
/**
|
||||
* @deprecated This is internal stuff. Use Inference Engine Plugin API
|
||||
* @brief This class provides an OS shared module abstraction
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(SharedObjectLoader) {
|
||||
class INFERENCE_ENGINE_DEPRECATED("This is internal stuff. Use Inference Engine Plugin API")
|
||||
INFERENCE_ENGINE_API_CLASS(SharedObjectLoader) {
|
||||
std::shared_ptr<void> _so;
|
||||
|
||||
public:
|
||||
|
@ -35,11 +35,12 @@ using enableIfSupportedChar =
|
||||
typename std::enable_if<(std::is_same<C, char>::value || std::is_same<C, wchar_t>::value)>::type;
|
||||
|
||||
/**
|
||||
* @deprecated This is internal stuff. Use Inference Engine Plugin API
|
||||
* @brief This class instantiate object using shared library
|
||||
* @tparam T An type of object SOPointer can hold
|
||||
*/
|
||||
template <class T>
|
||||
class SOPointer {
|
||||
class INFERENCE_ENGINE_DEPRECATED("This is internal stuff. Use Inference Engine Plugin API") SOPointer {
|
||||
template <class U>
|
||||
friend class SOPointer;
|
||||
|
||||
|
@ -14,27 +14,10 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "details/ie_so_pointer.hpp"
|
||||
#include "ie_iextension.h"
|
||||
#include "ngraph/opsets/opset.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace details {
|
||||
|
||||
/**
|
||||
* @brief The SOCreatorTrait class specialization for IExtension case, defines the name of the fabric method for
|
||||
* creating IExtension object in DLL
|
||||
*/
|
||||
template <>
|
||||
class SOCreatorTrait<IExtension> {
|
||||
public:
|
||||
/**
|
||||
* @brief A name of the fabric method for creating an IExtension object in DLL
|
||||
*/
|
||||
static constexpr auto name = "CreateExtension";
|
||||
};
|
||||
|
||||
} // namespace details
|
||||
|
||||
/**
|
||||
* @brief This class is a C++ helper to work with objects created using extensions.
|
||||
@ -46,8 +29,16 @@ public:
|
||||
*
|
||||
* @param name Full or relative path to extension library
|
||||
*/
|
||||
template <typename C, typename = details::enableIfSupportedChar<C>>
|
||||
explicit Extension(const std::basic_string<C>& name) : actual(name) {}
|
||||
explicit Extension(const std::string& name);
|
||||
|
||||
#ifdef ENABLE_UNICODE_PATH_SUPPORT
|
||||
/**
|
||||
* @brief Loads extension from a shared library
|
||||
*
|
||||
* @param name Full or relative path to extension library
|
||||
*/
|
||||
explicit Extension(const std::wstring& name);
|
||||
#endif // ENABLE_UNICODE_PATH_SUPPORT
|
||||
|
||||
/**
|
||||
* @brief Gets the extension version information
|
||||
@ -55,14 +46,14 @@ public:
|
||||
* @param versionInfo A pointer to version info, set by the plugin
|
||||
*/
|
||||
void GetVersion(const InferenceEngine::Version*& versionInfo) const noexcept override {
|
||||
actual->GetVersion(versionInfo);
|
||||
_actual->GetVersion(versionInfo);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Cleans the resources up
|
||||
*/
|
||||
void Unload() noexcept override {
|
||||
actual->Unload();
|
||||
_actual->Unload();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -80,7 +71,7 @@ public:
|
||||
std::vector<std::string> getImplTypes(const std::shared_ptr<ngraph::Node>& node) override {
|
||||
if (node == nullptr)
|
||||
IE_THROW() << "Provided ngraph::Node pointer is nullptr.";
|
||||
return actual->getImplTypes(node);
|
||||
return _actual->getImplTypes(node);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -92,14 +83,19 @@ public:
|
||||
ILayerImpl::Ptr getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) override {
|
||||
if (node == nullptr)
|
||||
IE_THROW() << "Provided ngraph::Node pointer is nullptr.";
|
||||
return actual->getImplementation(node, implType);
|
||||
return _actual->getImplementation(node, implType);
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* @brief A SOPointer instance to the loaded templated object
|
||||
* @brief A shared library
|
||||
*/
|
||||
details::SOPointer<IExtension> actual;
|
||||
std::shared_ptr<void> _so;
|
||||
|
||||
/**
|
||||
* @brief A instance to the loaded templated object
|
||||
*/
|
||||
std::shared_ptr<InferenceEngine::IExtension> _actual;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -33,7 +33,7 @@ namespace InferenceEngine {
|
||||
OPENVINO_ASSERT(false, "Unexpected exception"); \
|
||||
}
|
||||
|
||||
ExecutableNetwork::ExecutableNetwork(const details::SharedObjectLoader& so, const IExecutableNetworkInternal::Ptr& impl)
|
||||
ExecutableNetwork::ExecutableNetwork(const std::shared_ptr<void>& so, const IExecutableNetworkInternal::Ptr& impl)
|
||||
: _so(so),
|
||||
_impl(impl) {
|
||||
IE_ASSERT(_impl != nullptr);
|
||||
|
@ -0,0 +1,77 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ie_extension.h"
|
||||
|
||||
#include "openvino/util/shared_object.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T>
|
||||
std::shared_ptr<T> CreateExtensionFromLibrary(std::shared_ptr<void> _so) {
|
||||
std::shared_ptr<T> _ptr = nullptr;
|
||||
constexpr char createFuncName[] = "CreateExtension";
|
||||
|
||||
try {
|
||||
void* create = nullptr;
|
||||
try {
|
||||
create = ov::util::get_symbol(_so, (createFuncName + std::string("Shared")).c_str());
|
||||
} catch (const std::runtime_error&) {
|
||||
}
|
||||
|
||||
if (create == nullptr) {
|
||||
create = ov::util::get_symbol(_so, createFuncName);
|
||||
using CreateF = StatusCode(T*&, ResponseDesc*);
|
||||
T* object = nullptr;
|
||||
ResponseDesc desc;
|
||||
StatusCode sts = reinterpret_cast<CreateF*>(create)(object, &desc);
|
||||
if (sts != OK) {
|
||||
IE_EXCEPTION_SWITCH(
|
||||
sts,
|
||||
ExceptionType,
|
||||
details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << IE_LOCATION << desc.msg)
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
_ptr = std::shared_ptr<T>(object, [](T* ptr) {
|
||||
ptr->Release();
|
||||
});
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
} else {
|
||||
using CreateF = void(std::shared_ptr<T>&);
|
||||
reinterpret_cast<CreateF*>(create)(_ptr);
|
||||
}
|
||||
} catch (...) {
|
||||
details::Rethrow();
|
||||
}
|
||||
|
||||
return _ptr;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Extension::Extension(const std::string& name) {
|
||||
try {
|
||||
_so = ov::util::load_shared_object(name.c_str());
|
||||
} catch (const std::runtime_error&) {
|
||||
details::Rethrow();
|
||||
}
|
||||
_actual = CreateExtensionFromLibrary<IExtension>(_so);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_UNICODE_PATH_SUPPORT
|
||||
Extension::Extension(const std::wstring& name) {
|
||||
try {
|
||||
_so = ov::util::load_shared_object(name.c_str());
|
||||
} catch (const std::runtime_error&) {
|
||||
details::Rethrow();
|
||||
}
|
||||
_actual = CreateExtensionFromLibrary<IExtension>(_so);
|
||||
}
|
||||
#endif // ENABLE_UNICODE_PATH_SUPPORT
|
||||
|
||||
std::map<std::string, ngraph::OpSet> Extension::getOpSets() {
|
||||
return _actual->getOpSets();
|
||||
}
|
@ -58,7 +58,7 @@ namespace InferenceEngine {
|
||||
OPENVINO_ASSERT(false, "Unexpected exception"); \
|
||||
}
|
||||
|
||||
InferRequest::InferRequest(const details::SharedObjectLoader& so, const IInferRequestInternal::Ptr& impl)
|
||||
InferRequest::InferRequest(const std::shared_ptr<void>& so, const IInferRequestInternal::Ptr& impl)
|
||||
: _so(so),
|
||||
_impl(impl) {
|
||||
IE_ASSERT(_impl != nullptr);
|
||||
|
@ -61,17 +61,17 @@ struct InferencePlugin {
|
||||
PLUGIN_CALL_STATEMENT(_ptr->SetConfig(config));
|
||||
}
|
||||
|
||||
details::SOPointer<IExecutableNetworkInternal> LoadNetwork(const CNNNetwork& network, const std::map<std::string, std::string>& config) {
|
||||
ov::runtime::SoPtr<IExecutableNetworkInternal> LoadNetwork(const CNNNetwork& network, const std::map<std::string, std::string>& config) {
|
||||
PLUGIN_CALL_STATEMENT(return {_so, _ptr->LoadNetwork(network, config)});
|
||||
}
|
||||
|
||||
details::SOPointer<IExecutableNetworkInternal> LoadNetwork(const CNNNetwork& network,
|
||||
ov::runtime::SoPtr<IExecutableNetworkInternal> LoadNetwork(const CNNNetwork& network,
|
||||
const std::shared_ptr<RemoteContext>& context,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
PLUGIN_CALL_STATEMENT(return {_so, _ptr->LoadNetwork(network, config, context)});
|
||||
}
|
||||
|
||||
details::SOPointer<IExecutableNetworkInternal> LoadNetwork(const std::string& modelPath, const std::map<std::string, std::string>& config) {
|
||||
ov::runtime::SoPtr<IExecutableNetworkInternal> LoadNetwork(const std::string& modelPath, const std::map<std::string, std::string>& config) {
|
||||
PLUGIN_CALL_STATEMENT(return {_so, _ptr->LoadNetwork(modelPath, config)});
|
||||
}
|
||||
|
||||
@ -83,17 +83,17 @@ struct InferencePlugin {
|
||||
return res;
|
||||
}
|
||||
|
||||
details::SOPointer<IExecutableNetworkInternal> ImportNetwork(const std::string& modelFileName,
|
||||
ov::runtime::SoPtr<IExecutableNetworkInternal> ImportNetwork(const std::string& modelFileName,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
PLUGIN_CALL_STATEMENT(return {_so, _ptr->ImportNetwork(modelFileName, config)});
|
||||
}
|
||||
|
||||
details::SOPointer<IExecutableNetworkInternal> ImportNetwork(std::istream& networkModel,
|
||||
ov::runtime::SoPtr<IExecutableNetworkInternal> ImportNetwork(std::istream& networkModel,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
PLUGIN_CALL_STATEMENT(return {_so, _ptr->ImportNetwork(networkModel, config)});
|
||||
}
|
||||
|
||||
details::SOPointer<IExecutableNetworkInternal> ImportNetwork(std::istream& networkModel,
|
||||
ov::runtime::SoPtr<IExecutableNetworkInternal> ImportNetwork(std::istream& networkModel,
|
||||
const std::shared_ptr<RemoteContext>& context,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
PLUGIN_CALL_STATEMENT(return {_so, _ptr->ImportNetwork(networkModel, context, config)});
|
||||
@ -103,11 +103,11 @@ struct InferencePlugin {
|
||||
PLUGIN_CALL_STATEMENT(return _ptr->GetMetric(name, options));
|
||||
}
|
||||
|
||||
details::SOPointer<RemoteContext> CreateContext(const ParamMap& params) {
|
||||
ov::runtime::SoPtr<RemoteContext> CreateContext(const ParamMap& params) {
|
||||
PLUGIN_CALL_STATEMENT(return {_so, _ptr->CreateContext(params)});
|
||||
}
|
||||
|
||||
details::SOPointer<RemoteContext> GetDefaultContext(const ParamMap& params) {
|
||||
ov::runtime::SoPtr<RemoteContext> GetDefaultContext(const ParamMap& params) {
|
||||
PLUGIN_CALL_STATEMENT(return {_so, _ptr->GetDefaultContext(params)});
|
||||
}
|
||||
|
||||
|
@ -28,7 +28,7 @@
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
VariableState::VariableState(const details::SharedObjectLoader& so, const IVariableStateInternal::Ptr& impl)
|
||||
VariableState::VariableState(const std::shared_ptr<void>& so, const IVariableStateInternal::Ptr& impl)
|
||||
: _so(so),
|
||||
_impl(impl) {
|
||||
if (_impl == nullptr)
|
||||
|
@ -52,7 +52,7 @@ namespace InferenceEngine {
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename C, typename = InferenceEngine::details::enableIfSupportedChar<C>>
|
||||
template <typename C, typename = FileUtils::enableIfSupportedChar<C>>
|
||||
std::basic_string<C> getPathName(const std::basic_string<C>& s) {
|
||||
size_t i = s.rfind(ov::util::FileTraits<C>::file_separator, s.length());
|
||||
if (i != std::string::npos) {
|
||||
|
@ -40,12 +40,6 @@ std::map<std::string, ngraph::OpSet> IExtension::getOpSets() {
|
||||
return {};
|
||||
}
|
||||
|
||||
//
|
||||
// ie_extension.h
|
||||
//
|
||||
std::map<std::string, ngraph::OpSet> Extension::getOpSets() {
|
||||
return actual->getOpSets();
|
||||
}
|
||||
namespace details {
|
||||
|
||||
void Rethrow() {
|
||||
|
@ -1154,7 +1154,7 @@ private:
|
||||
extensions.emplace_back(extension);
|
||||
}
|
||||
|
||||
template <typename C, typename = InferenceEngine::details::enableIfSupportedChar<C>>
|
||||
template <typename C, typename = FileUtils::enableIfSupportedChar<C>>
|
||||
void TryToRegisterLibraryAsExtensionUnsafe(const std::basic_string<C>& path) const {
|
||||
try {
|
||||
const auto extension_ptr = std::make_shared<InferenceEngine::Extension>(path);
|
||||
|
@ -13,7 +13,6 @@
|
||||
|
||||
#include "cnn_network_ngraph_impl.hpp"
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include "details/ie_so_pointer.hpp"
|
||||
#include "file_utils.h"
|
||||
#include "frontend_manager/frontend_manager.hpp"
|
||||
#include "ie_api.h"
|
||||
@ -34,6 +33,8 @@
|
||||
#include "openvino/core/preprocess/input_tensor_info.hpp"
|
||||
#include "openvino/core/preprocess/pre_post_process.hpp"
|
||||
#include "openvino/core/type/element_type.hpp"
|
||||
#include "openvino/util/shared_object.hpp"
|
||||
#include "so_ptr.hpp"
|
||||
#include "transformations/rt_info/old_api_map_order_attribute.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
@ -77,22 +78,6 @@ namespace InferenceEngine {
|
||||
|
||||
#ifdef ENABLE_IR_V7_READER
|
||||
|
||||
namespace details {
|
||||
|
||||
/**
|
||||
* @brief This class defines the name of the fabric for creating an IReader object in DLL
|
||||
*/
|
||||
template <>
|
||||
class SOCreatorTrait<IReader> {
|
||||
public:
|
||||
/**
|
||||
* @brief A name of the fabric for creating IReader object in DLL
|
||||
*/
|
||||
static constexpr auto name = "CreateReader";
|
||||
};
|
||||
|
||||
} // namespace details
|
||||
|
||||
/**
|
||||
* @brief This class is a wrapper for reader interfaces
|
||||
*/
|
||||
@ -100,7 +85,7 @@ class Reader : public IReader {
|
||||
# ifdef OPENVINO_STATIC_LIBRARY
|
||||
using ReaderPtr = std::shared_ptr<IReader>;
|
||||
# else
|
||||
using ReaderPtr = InferenceEngine::details::SOPointer<IReader>;
|
||||
using ReaderPtr = ov::runtime::SoPtr<IReader>;
|
||||
# endif
|
||||
ReaderPtr ptr;
|
||||
std::once_flag readFlag;
|
||||
@ -123,7 +108,12 @@ class Reader : public IReader {
|
||||
<< ov::util::from_file_path(::FileUtils::makePluginLibraryName({}, libraryName)) << " is in "
|
||||
<< getIELibraryPath();
|
||||
}
|
||||
ptr = {readersLibraryPath};
|
||||
|
||||
auto so = ov::util::load_shared_object(readersLibraryPath.c_str());
|
||||
std::shared_ptr<IReader> plugin_impl;
|
||||
using createFunc = void(std::shared_ptr<IReader>&);
|
||||
reinterpret_cast<createFunc*>(ov::util::get_symbol(so, "CreateReader"))(plugin_impl);
|
||||
ptr = {so, plugin_impl};
|
||||
# endif // OPENVINO_STATIC_LIBRARY
|
||||
});
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include "openvino/util/file_util.hpp"
|
||||
#include "openvino/util/shared_object.hpp"
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace details {
|
||||
|
||||
@ -41,3 +43,5 @@ std::shared_ptr<void> SharedObjectLoader::get() const {
|
||||
|
||||
} // namespace details
|
||||
} // namespace InferenceEngine
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
@ -25,6 +25,9 @@ jit_mkldnn_emitter::jit_mkldnn_emitter(jit_generator *host, cpu_isa_t host_isa,
|
||||
jit_mkldnn_emitter::jit_mkldnn_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, InferenceEngine::Precision exec_prc)
|
||||
: jit_emitter(host, host_isa, node, exec_prc) {
|
||||
auto eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode*>(node);
|
||||
if (!eltwiseNode) {
|
||||
IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNEltwiseNode";
|
||||
}
|
||||
kind = static_cast<mkldnn_alg_kind_t>(eltwiseNode->getMKLDNNAlgorithm());
|
||||
alpha = eltwiseNode->getAlpha();
|
||||
beta = eltwiseNode->getBeta();
|
||||
|
@ -122,6 +122,9 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
|
||||
for (auto &node : GetGraph()._graph.GetNodes()) {
|
||||
if (node->getType() == MemoryInput) {
|
||||
auto memoryNode = dynamic_cast<MKLDNNMemoryInputNode*>(node.get());
|
||||
if (!memoryNode) {
|
||||
IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNMemoryInputNode";
|
||||
}
|
||||
auto state_store = memoryNode->getStore();
|
||||
auto state_name = memoryNode->getId();
|
||||
|
||||
|
@ -1277,6 +1277,9 @@ void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph)
|
||||
if (!childNode->getFusedWith().empty())
|
||||
return false;
|
||||
auto interpolateNode = dynamic_cast<MKLDNNInterpolateNode*>(parentNode.get());
|
||||
if (!interpolateNode) {
|
||||
IE_THROW() << "Cannot cast " << parentNode->getName() << " to MKLDNNInterpolateNode";
|
||||
}
|
||||
return interpolateNode->canFuse(childNode);
|
||||
};
|
||||
|
||||
@ -1922,8 +1925,8 @@ void MKLDNNGraphOptimizer::reshapeRnnSeq(MKLDNNGraph &graph) {
|
||||
return rnnNode && !rnnNode->hasNativeOrder() && node->outputShapes[0].getRank() == 4 && node->outputShapes[0].getDims()[1] == 1;
|
||||
};
|
||||
|
||||
for (int i = 0; i < graphNodes.size(); i++) {
|
||||
auto& parentNode = graphNodes[i];
|
||||
for (size_t i = 0; i < graphNodes.size(); i++) {
|
||||
auto parentNode = graphNodes[i];
|
||||
if (!isSutableParentNode(parentNode)) {
|
||||
continue;
|
||||
}
|
||||
@ -1934,15 +1937,15 @@ void MKLDNNGraphOptimizer::reshapeRnnSeq(MKLDNNGraph &graph) {
|
||||
const auto newShape = Shape(origShape);
|
||||
parentNode->outputShapes[0] = newShape;
|
||||
|
||||
for (size_t i = 0; i < childrenEdges.size(); i++) {
|
||||
auto edge = childrenEdges[i];
|
||||
for (size_t j = 0; j < childrenEdges.size(); j++) {
|
||||
auto edge = childrenEdges[j];
|
||||
auto childNode = edge->getChild();
|
||||
|
||||
const auto secondInput = std::make_shared<ngraph::opset1::Constant>(ov::element::i32, ngraph::Shape{1}, std::vector<int>{1});
|
||||
const auto unsqueeze = std::make_shared<ngraph::opset1::Unsqueeze>(
|
||||
std::make_shared<ngraph::opset1::Parameter>(details::convertPrecision(parentNode->getOriginalOutputPrecisionAtPort(0)),
|
||||
parentNode->getOutputShapeAtPort(0).toPartialShape()), secondInput);
|
||||
unsqueeze->set_friendly_name(parentNode->getName() + "_abc_a1bc_" + std::to_string(i));
|
||||
unsqueeze->set_friendly_name(parentNode->getName() + "_abc_a1bc_" + std::to_string(j));
|
||||
|
||||
const auto cpuUnsqueeze = std::make_shared<MKLDNNReshapeNode>(unsqueeze, graph.getEngine(), graph.weightsCache);
|
||||
graph.InsertNode(parentNode, childNode, cpuUnsqueeze, edge->getInputNum(), edge->getOutputNum(), false);
|
||||
@ -1958,4 +1961,4 @@ void MKLDNNGraphOptimizer::reshapeRnnSeq(MKLDNNGraph &graph) {
|
||||
graph.RemoveEdge(edge);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -63,6 +63,9 @@ void MKLDNNPlugin::MKLDNNInferRequest::CreateInferRequest() {
|
||||
for (auto& node : graph->GetNodes()) {
|
||||
if (node->getType() == MemoryInput) {
|
||||
auto memoryNode = dynamic_cast<MKLDNNMemoryInputNode*>(node.get());
|
||||
if (!memoryNode) {
|
||||
IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNMemoryInputNode";
|
||||
}
|
||||
auto state_store = memoryNode->getStore();
|
||||
auto state_name = memoryNode->getId();
|
||||
|
||||
@ -137,6 +140,9 @@ void MKLDNNPlugin::MKLDNNInferRequest::PushStates() {
|
||||
for (auto &node : graph->GetNodes()) {
|
||||
if (node->getType() == MemoryInput) {
|
||||
auto cur_node = dynamic_cast<MKLDNNMemoryInputNode*>(node.get());
|
||||
if (!cur_node) {
|
||||
IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNMemoryInputNode";
|
||||
}
|
||||
auto cur_id = cur_node->getId();
|
||||
for (const auto& state : memoryStates) {
|
||||
if (state->GetName() == cur_id) {
|
||||
@ -271,7 +277,9 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
|
||||
if (preProcessedInput != std::end(_networkInputs)) {
|
||||
InferenceEngine::InputInfo::Ptr foundInput;
|
||||
InferenceEngine::DataPtr foundOutput;
|
||||
findInputAndOutputBlobByName(name, foundInput, foundOutput);
|
||||
if (!findInputAndOutputBlobByName(name, foundInput, foundOutput)) {
|
||||
IE_THROW() << "Blob with name: " << name << " absents in network inputs";
|
||||
}
|
||||
if (preProcessingRequired(foundInput, data)) {
|
||||
_preProcData.emplace(name, InferenceEngine::CreatePreprocDataHelper());
|
||||
_preProcData[name]->isApplicable(data, _inputs[name]);
|
||||
|
@ -169,7 +169,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
|
||||
|
||||
MKLDNNNode::MKLDNNNode(const std::string& type, const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache)
|
||||
: selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
|
||||
weightCache(w_cache), engine(eng), name(name), typeStr(type),
|
||||
weightCache(w_cache), engine(eng), fusingPort(-1), name(name), typeStr(type),
|
||||
type(TypeFromName(type)), profiling(name) {
|
||||
// TODO [NM]: What about filling inDims and outDims?
|
||||
}
|
||||
@ -1219,6 +1219,9 @@ std::pair<std::vector<float>, std::vector<float>> MKLDNNNode::getScalesAndShifts
|
||||
|
||||
const auto fillValuesFrom = [&](const MKLDNNNodePtr& constInput, std::vector<float>& buffer) {
|
||||
auto *constInputNode = dynamic_cast<MKLDNNInputNode *>(constInput.get());
|
||||
if (!constInputNode) {
|
||||
IE_THROW() << "Cannot cast " << constInput->getName() << " to MKLDNNInputNode";
|
||||
}
|
||||
auto constBlob = constInputNode->getMemoryPtr();
|
||||
const auto elementsCount = constBlob->GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
|
||||
buffer.resize(elementsCount);
|
||||
@ -1372,7 +1375,7 @@ void MKLDNNNode::createShapeInferSubgraph(const std::shared_ptr<ngraph::Node>& o
|
||||
ngraph::OutputVector inputsForShapeInfer;
|
||||
for (size_t i = 0; i < inputShapes.size(); i++) {
|
||||
if (dynamic_cast<ngraph::opset1::Constant *>(op->get_input_node_ptr(i))) {
|
||||
inputsForShapeInfer.push_back(op->get_input_node_shared_ptr(i));
|
||||
inputsForShapeInfer.push_back(op->get_input_node_ptr(i)->clone_with_new_inputs(ngraph::OutputVector{}));
|
||||
} else {
|
||||
inputsForShapeInfer.push_back(std::make_shared<ngraph::opset1::Parameter>(op->get_input_element_type(i),
|
||||
op->get_input_partial_shape(i)));
|
||||
|
@ -388,7 +388,8 @@ public:
|
||||
if (srcDescs.empty() || selectedDescs.empty())
|
||||
return false;
|
||||
for (size_t i = 0; i < srcDescs.size() && i < selectedDescs.size(); i++) {
|
||||
return srcDescs[i]->isCompatible(*selectedDescs[i].desc);
|
||||
if (!srcDescs[i]->isCompatible(*selectedDescs[i].desc))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
@ -27,7 +27,7 @@ public:
|
||||
ngraph::element::Type get_output_type() const { return m_output_type; }
|
||||
|
||||
private:
|
||||
float m_negative_slope;
|
||||
float m_negative_slope = 0.f;
|
||||
ngraph::element::Type m_output_type;
|
||||
};
|
||||
|
||||
|
@ -62,6 +62,7 @@ MKLDNNAdaptivePoolingNode::MKLDNNAdaptivePoolingNode(const std::shared_ptr<ngrap
|
||||
} else if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveMaxPool::get_type_info_static())) {
|
||||
algorithm = Algorithm::AdaptivePoolingMax;
|
||||
}
|
||||
spatialDimsCount = getInputShapeAtPort(0).getRank() - 2;
|
||||
}
|
||||
|
||||
void MKLDNNAdaptivePoolingNode::getSupportedDescriptors() {
|
||||
|
@ -51,7 +51,7 @@ bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngr
|
||||
MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
|
||||
: MKLDNNNode(op, eng, cache), withBiases(false), withSum(false), withDWConv(false),
|
||||
isGrouped(false), dw_conv_oc(0), dw_conv_ih(0), dw_conv_iw(0), dw_conv_in_dt(memory::data_type::undef),
|
||||
groupNum(1lu), eltwisePrecision(Precision::FP32) {
|
||||
groupNum(1lu), IC(1), groupIC(1), groupOC(1), eltwisePrecision(Precision::FP32) {
|
||||
std::string errorMessage;
|
||||
if (!isSupportedOperation(op, errorMessage)) {
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
|
@ -47,7 +47,6 @@ private:
|
||||
bool isDW = false;
|
||||
bool isInt8 = false;
|
||||
size_t groupNum = 1;
|
||||
size_t outDepth;
|
||||
size_t IC;
|
||||
size_t OC;
|
||||
std::vector<ptrdiff_t> kernel;
|
||||
|
@ -29,7 +29,7 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_
|
||||
|
||||
constexpr static int sampledPointsPerPixel = MKLDNNDeformableConvolutionNode::sampledPointsPerPixel;
|
||||
|
||||
explicit jit_uni_def_conv_kernel_f32(jit_def_conv_params jcp) : jit_uni_def_conv_kernel(jcp), jit_generator() {}
|
||||
explicit jit_uni_def_conv_kernel_f32(jit_def_conv_params jcp) : jit_uni_def_conv_kernel(std::move(jcp)), jit_generator() {}
|
||||
|
||||
void create_ker() override {
|
||||
jit_generator::create_kernel();
|
||||
@ -1083,4 +1083,4 @@ InferenceEngine::Precision MKLDNNDeformableConvolutionNode::getRuntimePrecision(
|
||||
return getMaxPrecision(getInputPrecisions());
|
||||
}
|
||||
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNDeformableConvolutionNode, DeformableConvolution);
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNDeformableConvolutionNode, DeformableConvolution);
|
||||
|
@ -59,7 +59,7 @@ struct jit_uni_def_conv_kernel {
|
||||
ker_(args);
|
||||
}
|
||||
|
||||
explicit jit_uni_def_conv_kernel(jit_def_conv_params jcp) : ker_(nullptr), jcp_(jcp) {}
|
||||
explicit jit_uni_def_conv_kernel(jit_def_conv_params jcp) : ker_(nullptr), jcp_(std::move(jcp)) {}
|
||||
virtual ~jit_uni_def_conv_kernel() {}
|
||||
|
||||
virtual void create_ker() = 0;
|
||||
|
@ -79,7 +79,8 @@ template <cpu_isa_t isa>
|
||||
struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_eltwise_generic)
|
||||
|
||||
explicit jit_uni_eltwise_generic(jit_eltwise_params jep, MKLDNNEltwiseNode& eltwiseNode) : jit_uni_eltwise_kernel(jep, eltwiseNode), jit_generator() {}
|
||||
explicit jit_uni_eltwise_generic(jit_eltwise_params jep, MKLDNNEltwiseNode& eltwiseNode) :
|
||||
jit_uni_eltwise_kernel(std::move(jep), eltwiseNode), jit_generator() {}
|
||||
|
||||
void create_ker() override {
|
||||
jit_generator::create_kernel();
|
||||
@ -128,6 +129,9 @@ struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, pu
|
||||
post_op_emitters.push_back(create_eltwise_emitter(*eltwiseNode.getFusedWith()[i].get(), exec_prc));
|
||||
} else if (eltwiseNode.getFusedWith()[i].get()->getType() == FakeQuantize) {
|
||||
auto fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode*>(eltwiseNode.getFusedWith()[i].get());
|
||||
if (!fakeQuantizeNode) {
|
||||
IE_THROW() << "Cannot cast " << eltwiseNode.getFusedWith()[i]->getName() << " to MKLDNNFakeQuantizeNode";
|
||||
}
|
||||
fakeQuantizeNode->appendPostOps(post_ops);
|
||||
|
||||
quantization_injectors.push_back(std::make_shared<jit_uni_quantization_injector_f32<isa>>(
|
||||
@ -1390,7 +1394,7 @@ void MKLDNNEltwiseNode::prepareParams() {
|
||||
while (currentJitWorkAmount < minimalJitWorkAmount && currentJitWorkAmount < fullWorkAmount &&
|
||||
// we shouldn't collapse batch dimension in case dynamic batch is enabled
|
||||
(!isDynBatchEnabled || (currentOutBlkDims.size() - collapsedDims > 2))) {
|
||||
if (jep.dims.size() - collapsedDims - 2 < 0)
|
||||
if (static_cast<int>(jep.dims.size()) - collapsedDims - 2 < 0)
|
||||
break;
|
||||
|
||||
for (int j = 1; j < dims_in.size(); j++) {
|
||||
|
@ -54,7 +54,7 @@ struct jit_uni_eltwise_kernel {
|
||||
ker_(const_args, indexes);
|
||||
}
|
||||
|
||||
explicit jit_uni_eltwise_kernel(jit_eltwise_params jep, MKLDNNEltwiseNode& node) : ker_(nullptr), jep_(jep), eltwiseNode(node) {}
|
||||
explicit jit_uni_eltwise_kernel(jit_eltwise_params jep, MKLDNNEltwiseNode& node) : ker_(nullptr), jep_(std::move(jep)), eltwiseNode(node) {}
|
||||
virtual ~jit_uni_eltwise_kernel() {}
|
||||
|
||||
virtual void create_ker() = 0;
|
||||
|
@ -29,7 +29,7 @@ private:
|
||||
void initFromInputs() override;
|
||||
void getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) override;
|
||||
|
||||
const int* _indices;
|
||||
const int* _indices = nullptr;
|
||||
size_t _batch = 0;
|
||||
size_t _indicesPerBag = 0;
|
||||
};
|
||||
|
@ -34,8 +34,8 @@ private:
|
||||
|
||||
int numSegments_ = 0;
|
||||
|
||||
const int* indices_;
|
||||
const int* segmentIds_;
|
||||
const int* indices_ = nullptr;
|
||||
const int* segmentIds_ = nullptr;
|
||||
const int* defaultIndices_ = nullptr;
|
||||
|
||||
size_t indicesSize_ = 0;
|
||||
|
@ -332,21 +332,12 @@ MKLDNNExtractImagePatchesNode::MKLDNNExtractImagePatchesNode(const std::shared_p
|
||||
_ksizes.clear();
|
||||
_strides.clear();
|
||||
_rates.clear();
|
||||
for (const auto& x : ksizes) {
|
||||
if (x < 0)
|
||||
IE_THROW() << "Kernel sizes must be non-negative, got '" << x << "'.";
|
||||
_ksizes.push_back(static_cast<size_t>(x));
|
||||
}
|
||||
for (const auto& x : strides) {
|
||||
if (x < 0)
|
||||
IE_THROW() << "Strides must be non-negative, got '" << x << "'.";
|
||||
_strides.push_back(static_cast<size_t>(x));
|
||||
}
|
||||
for (const auto& x : rates) {
|
||||
if (x < 0)
|
||||
IE_THROW() << "Rates must be non-negative, got '" << x << "'.";
|
||||
_rates.push_back(static_cast<size_t>(x));
|
||||
}
|
||||
for (const auto& x : ksizes)
|
||||
_ksizes.push_back(x);
|
||||
for (const auto& x : strides)
|
||||
_strides.push_back(x);
|
||||
for (const auto& x : rates)
|
||||
_rates.push_back(x);
|
||||
|
||||
SizeVector in_dims = op->get_input_shape(0);
|
||||
_pad_left = 0;
|
||||
|
@ -45,7 +45,7 @@ template <cpu_isa_t isa>
|
||||
struct jit_uni_binarization_kernel : public jit_uni_quantize_kernel, public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_binarization_kernel)
|
||||
|
||||
explicit jit_uni_binarization_kernel(jit_quantize_params jqp) : jit_uni_quantize_kernel(jqp), jit_generator() {}
|
||||
explicit jit_uni_binarization_kernel(jit_quantize_params jqp) : jit_uni_quantize_kernel(std::move(jqp)), jit_generator() {}
|
||||
|
||||
void create_ker() override {
|
||||
jit_generator::create_kernel();
|
||||
@ -213,7 +213,7 @@ template <cpu_isa_t isa>
|
||||
struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_quantization_kernel)
|
||||
|
||||
explicit jit_uni_quantization_kernel(jit_quantize_params jqp) : jit_uni_quantize_kernel(jqp), jit_generator() {}
|
||||
explicit jit_uni_quantization_kernel(jit_quantize_params jqp) : jit_uni_quantize_kernel(std::move(jqp)), jit_generator() {}
|
||||
|
||||
void create_ker() override {
|
||||
jit_generator::create_kernel();
|
||||
|
@ -56,7 +56,7 @@ struct jit_uni_quantize_kernel {
|
||||
ker_(args);
|
||||
}
|
||||
|
||||
explicit jit_uni_quantize_kernel(jit_quantize_params jqp) : ker_(nullptr), jqp_(jqp) {}
|
||||
explicit jit_uni_quantize_kernel(jit_quantize_params jqp) : ker_(nullptr), jqp_(std::move(jqp)) {}
|
||||
virtual ~jit_uni_quantize_kernel() {}
|
||||
|
||||
virtual void create_ker() = 0;
|
||||
|
@ -29,7 +29,7 @@ private:
|
||||
const size_t indicesIndex_ = 1;
|
||||
|
||||
size_t axis_;
|
||||
size_t dataTypeSize_;
|
||||
size_t dataTypeSize_ = 0;
|
||||
int strideAxDst_;
|
||||
int dstAxDim_;
|
||||
int strideAx1Diff_ = 0;
|
||||
|
@ -51,7 +51,6 @@ MKLDNNGatherNDNode::MKLDNNGatherNDNode(const std::shared_ptr<ngraph::Node>& op,
|
||||
} else {
|
||||
THROW_ERROR << "has support only opset5.";
|
||||
}
|
||||
|
||||
if (attrs.batchDims >= std::min(inputDataRank, indicesDimsRank))
|
||||
THROW_ERROR << "has invalid batch_dims attribute: " << attrs.batchDims;
|
||||
}
|
||||
|
@ -85,7 +85,7 @@ void MKLDNNGatherTreeNode::execute(mkldnn::stream strm) {
|
||||
}
|
||||
|
||||
template<typename DATA_T>
|
||||
void MKLDNNGatherTreeNode::gatherTreeKernel() noexcept {
|
||||
void MKLDNNGatherTreeNode::gatherTreeKernel() {
|
||||
const auto *step_idx = reinterpret_cast<DATA_T *>(getParentEdgeAt(GATHER_TREE_STEP_IDX)->getMemoryPtr()->GetPtr());
|
||||
const auto * const parent_idx = reinterpret_cast<DATA_T *>(getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemoryPtr()->GetPtr());
|
||||
const size_t parent_idx_size = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemory().GetShape().getElementsCount()
|
||||
|
@ -21,10 +21,10 @@ public:
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
private:
|
||||
template<typename DATA_T>
|
||||
void gatherTreeKernel() noexcept;
|
||||
void gatherTreeKernel();
|
||||
|
||||
private:
|
||||
static const size_t GATHER_TREE_STEP_IDX = 0;
|
||||
static const size_t GATHER_TREE_PARENT_IDX = 1;
|
||||
static const size_t GATHER_TREE_MAX_SEQ_LEN = 2;
|
||||
|
@ -167,15 +167,16 @@ private:
|
||||
SizeVector dstDim;
|
||||
SizeVector srcDim;
|
||||
SizeVector srcDimPad;
|
||||
int spatialDimSize;
|
||||
int spatialDimSize = 1;
|
||||
|
||||
mkldnn::primitive_attr attr;
|
||||
std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
|
||||
|
||||
InferenceEngine::Precision inputPrec, outputPrec;
|
||||
size_t srcDataSize, dstDataSize;
|
||||
size_t srcDataSize = 0;
|
||||
size_t dstDataSize = 0;
|
||||
|
||||
InterpolateLayoutType configured_for_layout;
|
||||
InterpolateLayoutType configured_for_layout = InterpolateLayoutType::planar;
|
||||
|
||||
std::vector<int> indexTable;
|
||||
|
||||
|
@ -88,9 +88,9 @@ private:
|
||||
std::vector<std::vector<int64_t>> m_numPerBatchClass;
|
||||
std::vector<BoxInfo> m_filteredBoxes;
|
||||
std::vector<int> m_classOffset;
|
||||
size_t m_realNumClasses;
|
||||
size_t m_realNumBoxes;
|
||||
float (*m_decay_fn)(float, float, float);
|
||||
size_t m_realNumClasses = 0;
|
||||
size_t m_realNumBoxes = 0;
|
||||
float (*m_decay_fn)(float, float, float) = nullptr;
|
||||
void checkPrecision(const InferenceEngine::Precision prec, const std::vector<InferenceEngine::Precision> precList, const std::string name,
|
||||
const std::string type);
|
||||
|
||||
|
@ -146,7 +146,7 @@ void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) {
|
||||
|
||||
int* selected_indices = reinterpret_cast<int*>(getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr()->GetPtr());
|
||||
|
||||
float* selected_outputs = selected_outputs = reinterpret_cast<float*>(getChildEdgesAtPort(NMS_SELECTEDOUTPUTS)[0]->getMemoryPtr()->GetPtr());
|
||||
float* selected_outputs = reinterpret_cast<float*>(getChildEdgesAtPort(NMS_SELECTEDOUTPUTS)[0]->getMemoryPtr()->GetPtr());
|
||||
|
||||
int* selected_num = reinterpret_cast<int*>(getChildEdgesAtPort(NMS_SELECTEDNUM)[0]->getMemoryPtr()->GetPtr());
|
||||
|
||||
@ -414,4 +414,4 @@ void MKLDNNMultiClassNmsNode::checkPrecision(const Precision prec, const std::ve
|
||||
IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec;
|
||||
}
|
||||
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNMultiClassNmsNode, MulticlassNms)
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNMultiClassNmsNode, MulticlassNms)
|
||||
|
@ -42,9 +42,9 @@ private:
|
||||
bool sort_result_across_batch = false;
|
||||
MulticlassNmsSortResultType sort_result_type = MulticlassNmsSortResultType::NONE;
|
||||
|
||||
size_t num_batches;
|
||||
size_t num_boxes;
|
||||
size_t num_classes;
|
||||
size_t num_batches = 0;
|
||||
size_t num_boxes = 0;
|
||||
size_t num_classes = 0;
|
||||
|
||||
int max_output_boxes_per_class = 0;
|
||||
float iou_threshold = 0.0f;
|
||||
|
@ -122,7 +122,8 @@ private:
|
||||
MVNEpsMode epsMode_;
|
||||
|
||||
InferenceEngine::Precision input_prec, output_prec;
|
||||
size_t src_data_size, dst_data_size;
|
||||
size_t src_data_size = 0;
|
||||
size_t dst_data_size = 0;
|
||||
|
||||
mkldnn::primitive_attr attr;
|
||||
|
||||
|
@ -57,21 +57,21 @@ public:
|
||||
|
||||
private:
|
||||
// input
|
||||
enum : size_t {
|
||||
enum {
|
||||
NMS_BOXES,
|
||||
NMS_SCORES,
|
||||
NMS_MAXOUTPUTBOXESPERCLASS,
|
||||
NMS_IOUTHRESHOLD,
|
||||
NMS_SCORETHRESHOLD,
|
||||
NMS_SOFTNMSSIGMA,
|
||||
} InputNumber;
|
||||
};
|
||||
|
||||
// output
|
||||
enum : size_t {
|
||||
enum {
|
||||
NMS_SELECTEDINDICES,
|
||||
NMS_SELECTEDSCORES,
|
||||
NMS_VALIDOUTPUTS
|
||||
} OutputNumber;
|
||||
};
|
||||
|
||||
|
||||
enum class boxEncoding {
|
||||
@ -81,9 +81,9 @@ private:
|
||||
boxEncoding boxEncodingType = boxEncoding::CORNER;
|
||||
bool sort_result_descending = true;
|
||||
|
||||
size_t num_batches;
|
||||
size_t num_boxes;
|
||||
size_t num_classes;
|
||||
size_t num_batches = 0;
|
||||
size_t num_boxes = 0;
|
||||
size_t num_classes = 0;
|
||||
|
||||
size_t max_output_boxes_per_class = 0lu;
|
||||
float iou_threshold = 0.0f;
|
||||
|
@ -2,16 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <mkldnn_types.h>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "utils/bfloat16.hpp"
|
||||
#include <mkldnn_selective_build.h>
|
||||
#include "mkldnn_one_hot_node.h"
|
||||
#include <nodes/common/blocked_desc_creator.h>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include <utils/shape_inference/static_shape.hpp>
|
||||
#include <utils/shape_inference/shape_inference.hpp>
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -19,19 +20,11 @@ using namespace InferenceEngine;
|
||||
|
||||
bool MKLDNNOneHotNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
if (isDynamicNgraphNode(op)) {
|
||||
errorMessage = "Doesn't support op with dynamic shapes";
|
||||
return false;
|
||||
}
|
||||
const auto oneHot = std::dynamic_pointer_cast<const ngraph::opset1::OneHot>(op);
|
||||
if (!oneHot) {
|
||||
errorMessage = "Only opset1 OneHot operation is supported";
|
||||
return false;
|
||||
}
|
||||
if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(oneHot->get_input_node_shared_ptr(DEPTH_ID)) == nullptr) {
|
||||
errorMessage = "Only const 'depth' input is supported";
|
||||
return false;
|
||||
}
|
||||
if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(oneHot->get_input_node_shared_ptr(ON_VALUE_ID)) == nullptr) {
|
||||
errorMessage = "Only const 'on_value' input is supported";
|
||||
return false;
|
||||
@ -56,20 +49,21 @@ MKLDNNOneHotNode::MKLDNNOneHotNode(const std::shared_ptr<ngraph::Node>& op, cons
|
||||
errorPrefix = "OneHot layer with name '" + op->get_friendly_name() + "'";
|
||||
const auto oneHot = std::dynamic_pointer_cast<const ngraph::opset1::OneHot>(op);
|
||||
const auto depthNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(oneHot->get_input_node_shared_ptr(DEPTH_ID));
|
||||
const auto onValueNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(oneHot->get_input_node_shared_ptr(ON_VALUE_ID));
|
||||
const auto offValueNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(oneHot->get_input_node_shared_ptr(OFF_VALUEAXES_ID));
|
||||
depth = depthNode->cast_vector<uint32_t>()[0];
|
||||
axis = oneHot->get_axis();
|
||||
src_dims = oneHot->get_input_shape(INDICES_ID);
|
||||
if (ngraph::is_scalar(src_dims)) {
|
||||
src_dims = SizeVector{1};
|
||||
if (depthNode) {
|
||||
depth = depthNode->cast_vector<uint32_t>()[0];
|
||||
}
|
||||
dst_dims = oneHot->get_output_shape(0);
|
||||
if (ngraph::is_scalar(dst_dims)) {
|
||||
dst_dims = SizeVector{1};
|
||||
axis = oneHot->get_axis();
|
||||
|
||||
VectorDims srcDims = getInputShapeAtPort(INDICES_ID).getDims();
|
||||
if (ngraph::is_scalar(srcDims)) {
|
||||
srcDims = SizeVector{1};
|
||||
}
|
||||
VectorDims dstDims = getOutputShapeAtPort(0).getDims();
|
||||
if (ngraph::is_scalar(dstDims)) {
|
||||
dstDims = SizeVector{1};
|
||||
}
|
||||
|
||||
int output_dims_size = dst_dims.size();
|
||||
int output_dims_size = dstDims.size();
|
||||
if (axis < 0) {
|
||||
axis += output_dims_size;
|
||||
}
|
||||
@ -77,11 +71,40 @@ MKLDNNOneHotNode::MKLDNNOneHotNode(const std::shared_ptr<ngraph::Node>& op, cons
|
||||
IE_THROW() << errorPrefix << " has unsupported 'axis' attribute: " << oneHot->get_axis();
|
||||
}
|
||||
|
||||
if (!( ((1 + src_dims.size()) == dst_dims.size()) ||
|
||||
(src_dims.size() == 1 && dst_dims.size() == 1 && dst_dims[0] == depth && src_dims[0] == 1)))
|
||||
if (!(((1 + srcDims.size()) == dstDims.size()) ||
|
||||
(depthNode && (srcDims.size() == 1 && dstDims.size() == 1 && dstDims[0] == depth && srcDims[0] == 1))))
|
||||
IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions!";
|
||||
}
|
||||
|
||||
bool MKLDNNOneHotNode::needShapeInfer() const {
|
||||
const auto depthNodePtr = reinterpret_cast<int32_t *>(getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPtr());
|
||||
if (depth != depthNodePtr[0])
|
||||
return true;
|
||||
return MKLDNNNode::needShapeInfer();
|
||||
}
|
||||
|
||||
std::vector<VectorDims> MKLDNNOneHotNode::shapeInfer() const {
|
||||
std::vector<ov::StaticShape> input_shapes = {
|
||||
getParentEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims(),
|
||||
getParentEdgesAtPort(1)[0]->getMemory().GetShape().getStaticDims(),
|
||||
getParentEdgesAtPort(2)[0]->getMemory().GetShape().getStaticDims(),
|
||||
getParentEdgesAtPort(3)[0]->getMemory().GetShape().getStaticDims()
|
||||
};
|
||||
std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>> input_values = {
|
||||
{1, std::make_shared<ngraph::runtime::HostTensor>(ngraph::element::Type_t::i32, VectorDims{ }, getParentEdgesAtPort(1)[0]->getMemory().GetPtr())},
|
||||
{2, std::make_shared<ngraph::runtime::HostTensor>(opToShapeInfer->get_input_node_shared_ptr(2))},
|
||||
{3, std::make_shared<ngraph::runtime::HostTensor>(opToShapeInfer->get_input_node_shared_ptr(3))}
|
||||
};
|
||||
std::vector<ov::StaticShape> output_shapes = {{}};
|
||||
shape_inference(opToShapeInfer.get(), input_shapes, output_shapes, input_values);
|
||||
|
||||
std::vector<VectorDims> result(output_shapes.size());
|
||||
std::transform(output_shapes.begin(), output_shapes.end(), result.begin(), [](const ov::StaticShape& s){ return s.to_shape(); });
|
||||
|
||||
depth = reinterpret_cast<int32_t *>(getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPtr())[0];
|
||||
return result;
|
||||
}
|
||||
|
||||
void MKLDNNOneHotNode::initSupportedPrimitiveDescriptors() {
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
@ -131,7 +154,7 @@ void MKLDNNOneHotNode::execute(mkldnn::stream strm) {
|
||||
std::size_t prefix_size = 1;
|
||||
auto input_dims = getParentEdgeAt(0)->getMemory().getStaticDims();
|
||||
|
||||
std::size_t actual_axis = (axis == -1) ? src_dims.size() : axis;
|
||||
std::size_t actual_axis = (axis == -1) ? input_dims.size() : axis;
|
||||
for (size_t i = 0; i < actual_axis; ++i)
|
||||
prefix_size *= input_dims[i];
|
||||
|
||||
|
@ -23,6 +23,11 @@ public:
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
bool needShapeInfer() const override;
|
||||
std::vector<VectorDims> shapeInfer() const override;
|
||||
bool needPrepareParams() const override { return false; };
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); };
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
private:
|
||||
@ -41,10 +46,8 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
uint32_t depth;
|
||||
mutable Dim depth = Shape::UNDEFINED_DIM;
|
||||
int32_t axis = -1;
|
||||
InferenceEngine::SizeVector src_dims;
|
||||
InferenceEngine::SizeVector dst_dims;
|
||||
|
||||
InferenceEngine::Precision output_precision;
|
||||
|
||||
|
@ -45,8 +45,8 @@ private:
|
||||
|
||||
// for Deformable PSROIPolling
|
||||
bool noTrans;
|
||||
int partSize;
|
||||
float transStd;
|
||||
int partSize = 1;
|
||||
float transStd = 1.f;
|
||||
|
||||
std::string errorPrefix;
|
||||
|
||||
|
@ -101,7 +101,7 @@ void MKLDNNRangeNode::execute(mkldnn::stream strm) {
|
||||
}
|
||||
}
|
||||
template <typename data_t>
|
||||
size_t MKLDNNRangeNode::getWorkAmount(data_t *startPtr, data_t *stopPtr, data_t *stepPtr) const noexcept {
|
||||
size_t MKLDNNRangeNode::getWorkAmount(data_t *startPtr, data_t *stopPtr, data_t *stepPtr) const {
|
||||
data_t start = 0, limit = 0, delta = 0;
|
||||
if (startPtr == nullptr)
|
||||
startPtr = &start;
|
||||
@ -123,7 +123,7 @@ size_t MKLDNNRangeNode::getWorkAmount(data_t *startPtr, data_t *stopPtr, data_t
|
||||
}
|
||||
}
|
||||
template <typename data_t>
|
||||
InferenceEngine::StatusCode MKLDNNRangeNode::rangeKernel() noexcept {
|
||||
InferenceEngine::StatusCode MKLDNNRangeNode::rangeKernel() {
|
||||
data_t start = 0, delta = 0;
|
||||
size_t work_amount_dst = getWorkAmount<data_t>(&start, nullptr, &delta);
|
||||
if (isDynamicNode()) {
|
||||
|
@ -26,12 +26,12 @@ public:
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
template <typename data_t>
|
||||
InferenceEngine::StatusCode rangeKernel() noexcept;
|
||||
template <typename data_t>
|
||||
size_t getWorkAmount(data_t *startPtr = nullptr, data_t *stopPtr = nullptr, data_t *stepPtr = nullptr) const noexcept;
|
||||
|
||||
private:
|
||||
template <typename data_t>
|
||||
InferenceEngine::StatusCode rangeKernel();
|
||||
template <typename data_t>
|
||||
size_t getWorkAmount(data_t* startPtr = nullptr, data_t* stopPtr = nullptr, data_t* stepPtr = nullptr) const;
|
||||
|
||||
static const size_t RANGE_START = 0;
|
||||
static const size_t RANGE_LIMIT = 1;
|
||||
static const size_t RANGE_DELTA = 2;
|
||||
|
@ -1825,7 +1825,8 @@ inline void MKLDNNReduceNode::reduce_kernel_process(const uint8_t *in_p, uint8_t
|
||||
}
|
||||
|
||||
inline void MKLDNNReduceNode::reduce_kernel_post_process(uint8_t *out_ptr) {
|
||||
const float divisor = static_cast<float>(IB * IC * ID * IH * IW / (OB * OC * OD * OH * OW));
|
||||
const size_t integerDivisor = IB * IC * ID * IH * IW / (OB * OC * OD * OH * OW);
|
||||
const float divisor = static_cast<float>(integerDivisor);
|
||||
if (planar_layout) {
|
||||
size_t parallel_amount = OB * OC * OD;
|
||||
parallel_for(parallel_amount, [&](size_t i) {
|
||||
|
@ -261,6 +261,7 @@ MKLDNNRegionYoloNode::MKLDNNRegionYoloNode(const std::shared_ptr<ngraph::Node>&
|
||||
num = regionYolo->get_num_regions();
|
||||
do_softmax = regionYolo->get_do_softmax();
|
||||
mask = regionYolo->get_mask();
|
||||
block_size = 1;
|
||||
}
|
||||
|
||||
void MKLDNNRegionYoloNode::initSupportedPrimitiveDescriptors() {
|
||||
|
@ -238,7 +238,7 @@ void MKLDNNROIAlignNode::executeSpecified() {
|
||||
auto samplingRatioX = samplingRatio == 0 ? static_cast<int>(ceil(binWidth)) : samplingRatio;
|
||||
auto samplingRatioY = samplingRatio == 0 ? static_cast<int>(ceil(binHeight)) : samplingRatio;
|
||||
|
||||
uint64_t numSamplesInBin = samplingRatioX * samplingRatioY;
|
||||
uint64_t numSamplesInBin = static_cast<uint64_t>(samplingRatioX) * samplingRatioY;
|
||||
|
||||
float sampleDistanceX = binWidth / samplingRatioX;
|
||||
float sampleDistanceY = binHeight / samplingRatioY;
|
||||
|
@ -25,8 +25,8 @@ struct jit_roi_pooling_params {
|
||||
|
||||
InferenceEngine::Precision src_prc;
|
||||
InferenceEngine::Precision dst_prc;
|
||||
int src_data_size;
|
||||
int dst_data_size;
|
||||
int src_data_size = 0;
|
||||
int dst_data_size = 0;
|
||||
|
||||
Algorithm alg;
|
||||
};
|
||||
|
@ -31,7 +31,7 @@ private:
|
||||
|
||||
Mode mode;
|
||||
size_t blockSize;
|
||||
size_t blockStep;
|
||||
size_t blockStep = 1;
|
||||
|
||||
std::unique_ptr<PermuteKernel> permuteKernel;
|
||||
};
|
||||
|
@ -57,7 +57,7 @@ private:
|
||||
|
||||
void optimizedNspc2Ncsp(size_t MB);
|
||||
|
||||
bool canUseOptimizedNspc2Ncsp;
|
||||
bool canUseOptimizedNspc2Ncsp = false;
|
||||
|
||||
size_t axis = 1;
|
||||
std::vector<uint8_t*> dstMemPtrs;
|
||||
|
@ -225,7 +225,7 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() {
|
||||
const auto& srcDims = getInputShapeAtPort(DATA_ID).getDims();
|
||||
if (srcDims[1] == Shape::UNDEFINED_DIM)
|
||||
return false;
|
||||
auto channelBeginNormalized = attrs.begin[1] > 0 ? attrs.begin[1] : attrs.begin[1] + srcDims[1];
|
||||
auto channelBeginNormalized = attrs.begin[1] > 0 ? attrs.begin[1] : attrs.begin[1] + static_cast<std::int64_t>(srcDims[1]);
|
||||
return srcDims[1] % blockSize == 0 && abs(attrs.stride[1]) == 1 &&
|
||||
(channelBeginNormalized > srcDims[1] || channelBeginNormalized % blockSize == 0 || channelBeginNormalized < 0 || attrs.beginMask[1] == 0);
|
||||
};
|
||||
|
@ -17,7 +17,7 @@ ie_add_plugin(NAME ${TARGET_NAME}
|
||||
PSEUDO_PLUGIN_FOR "MULTI"
|
||||
DEFAULT_CONFIG "MULTI_WORK_MODE_AS_AUTO:YES")
|
||||
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ngraph inference_engine_transformations)
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE ngraph inference_engine_transformations)
|
||||
|
||||
set_ie_threading_interface_for(${TARGET_NAME})
|
||||
|
||||
|
@ -15,6 +15,13 @@
|
||||
#include "multi_device_infer_request.hpp"
|
||||
#include "multi_device_exec_network.hpp"
|
||||
|
||||
#ifdef MULTIUNITTEST
|
||||
#define MOCKTESTMACRO virtual
|
||||
#define MultiDevicePlugin MockMultiDevicePlugin
|
||||
#else
|
||||
#define MOCKTESTMACRO
|
||||
#endif
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
|
||||
class MultiDeviceAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
|
||||
|
@ -159,114 +159,178 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string&
|
||||
_core = _multiPlugin->GetCore(); // shared_ptr that holds the Core
|
||||
_config[MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = strDevices;
|
||||
|
||||
std::vector<DeviceInformation> needLoadDevices;
|
||||
std::string profilingTask = "MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork:AutoMode";
|
||||
// check if have cpu device
|
||||
const auto CPUIter = std::find_if(metaDevices.begin(), metaDevices.end(),
|
||||
[=](const DeviceInformation& d)->bool{return d.deviceName.find("CPU") != std::string::npos;});
|
||||
if (CPUIter != metaDevices.end()) {
|
||||
_cpuDevice = *CPUIter;
|
||||
_config.insert(_cpuDevice.config.begin(), _cpuDevice.config.end());
|
||||
needLoadDevices.push_back(_cpuDevice);
|
||||
_cpuFuture = _cpuPromise.get_future();
|
||||
profilingTask += _cpuDevice.deviceName;
|
||||
}
|
||||
|
||||
// get accelerator device, like GPU
|
||||
auto networkPrecision = GetNetworkPrecision(network);
|
||||
_acceleratorDevice = _multiPlugin->SelectDevice(metaDevices, networkPrecision);
|
||||
bool isAccelerator =
|
||||
_acceleratorDevice.deviceName.find("CPU") == std::string::npos;
|
||||
if (isAccelerator) {
|
||||
_config.insert(_acceleratorDevice.config.begin(), _acceleratorDevice.config.end());
|
||||
needLoadDevices.push_back(_acceleratorDevice);
|
||||
_acceleratorFuture = _acceleratorPromise.get_future();
|
||||
profilingTask += _acceleratorDevice.deviceName;
|
||||
}
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, openvino::itt::handle(profilingTask));
|
||||
if (needLoadDevices.size() == 0) {
|
||||
IE_THROW() << "No device set";
|
||||
}
|
||||
|
||||
// will not wait for loading accelerator network,
|
||||
// so the executor can't be destroyed before finished the task,
|
||||
// so use executor as a member of MultiDeviceExecutableNetwork.
|
||||
_executor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(
|
||||
IStreamsExecutor::Config{"AutoDeviceAsyncLoad",
|
||||
static_cast<int>(std::thread::hardware_concurrency()) /* max possible #streams*/,
|
||||
0 /*default threads per stream, workaround for ticket 62376*/,
|
||||
IStreamsExecutor::ThreadBindingType::NONE});
|
||||
|
||||
for (auto& p : needLoadDevices) {
|
||||
// initialize these containers firstly to avoid insert operation in threads
|
||||
_idleWorkerRequests[p.deviceName];
|
||||
_workerRequests[p.deviceName];
|
||||
_inferPipelineTasksDeviceSpecific[p.deviceName] = NULL;
|
||||
const auto device = p.deviceName;
|
||||
auto deviceConfig = p.config;
|
||||
if (device == "GPU") {
|
||||
deviceConfig[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(YES);
|
||||
// loadContext[ACTUALDEVICE] is always enabled,
|
||||
// when there is CPU and there are more than two devices, loadContext[CPU] is enabled
|
||||
_loadContext[ACTUALDEVICE].isEnabled = true;
|
||||
_loadContext[ACTUALDEVICE].networkPrecision = GetNetworkPrecision(network);
|
||||
_loadContext[ACTUALDEVICE].metaDevices = metaDevices;
|
||||
_loadContext[ACTUALDEVICE].deviceInfo = _multiPlugin->SelectDevice(metaDevices, _loadContext[ACTUALDEVICE].networkPrecision);
|
||||
bool isActualDevCPU =
|
||||
_loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("CPU") != std::string::npos;
|
||||
// if Actual device is CPU, disabled _loadContext[CPU], only use _loadContext[ACTUALDEVICE]
|
||||
if (isActualDevCPU) {
|
||||
_loadContext[CPU].isEnabled = false;
|
||||
} else {
|
||||
const auto CPUIter = std::find_if(metaDevices.begin(), metaDevices.end(),
|
||||
[=](const DeviceInformation& d)->bool{return d.deviceName.find("CPU") != std::string::npos;});
|
||||
// if have CPU Device, enable _loadContext[CPU]
|
||||
if (CPUIter != metaDevices.end()) {
|
||||
_loadContext[CPU].isEnabled = true;
|
||||
_loadContext[CPU].deviceInfo = *CPUIter;
|
||||
} else {
|
||||
_loadContext[CPU].isEnabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// initialize the rest members of load context
|
||||
for (int i = 0; i < CONTEXTNUM; i++) {
|
||||
if (_loadContext[i].isEnabled) {
|
||||
_loadContext[i].future = _loadContext[i].promise.get_future();
|
||||
auto* contextPtr = &_loadContext[i];
|
||||
_loadContext[i].task = [this, contextPtr, modelPath, network]() mutable {
|
||||
TryToLoadNetWork(*contextPtr, modelPath, network);
|
||||
if (contextPtr->isLoadSuccess) {
|
||||
GenerateWorkers(contextPtr->deviceInfo.deviceName, contextPtr->executableNetwork);
|
||||
//need lock
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_confMutex);
|
||||
_config.insert(contextPtr->deviceInfo.config.begin(),
|
||||
contextPtr->deviceInfo.config.end());
|
||||
}
|
||||
contextPtr->isAlready = true;
|
||||
}
|
||||
contextPtr->promise.set_value();
|
||||
// the first load network process finished
|
||||
std::call_once(_firstLoadOC, [this] () {
|
||||
_firstLoadPromise.set_value();
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, openvino::itt::handle(profilingTask));
|
||||
if (_loadContext[CPU].isEnabled) {
|
||||
_firstLoadFuture = _firstLoadPromise.get_future();
|
||||
// will not wait for loading accelerator network,
|
||||
// so some parameters need to be transferred by value.
|
||||
_executor->run([&, modelPath, network, device, deviceConfig]() {
|
||||
std::cout << "DEVICE in AUTO:" << device << std::endl;
|
||||
SoExecutableNetworkInternal executableNetwork;
|
||||
if (!modelPath.empty()) {
|
||||
executableNetwork = _core->LoadNetwork(modelPath, device, deviceConfig);
|
||||
} else {
|
||||
executableNetwork = _core->LoadNetwork(network, device, deviceConfig);
|
||||
}
|
||||
|
||||
GenerateWorkers(device, executableNetwork);
|
||||
std::cout << "DEVICE in AUTO:" << device << " ENDED" <<std::endl;
|
||||
|
||||
if (device.find("CPU") == std::string::npos) {
|
||||
_alreadyActualNetwork = true;
|
||||
_acceleratorPromise.set_value(executableNetwork);
|
||||
} else {
|
||||
_cpuPromise.set_value(executableNetwork);
|
||||
}
|
||||
});
|
||||
// so the executor can't be destroyed before finished the task,
|
||||
// so use executor as a member of MultiDeviceExecutableNetwork.
|
||||
_executor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(
|
||||
IStreamsExecutor::Config{"AutoDeviceAsyncLoad",
|
||||
static_cast<int>(std::thread::hardware_concurrency()) /* max possible #streams*/,
|
||||
0 /*default threads per stream, workaround for ticket 62376*/,
|
||||
IStreamsExecutor::ThreadBindingType::NONE});
|
||||
for (auto&& device : metaDevices) {
|
||||
// initialize containers before run async task
|
||||
_idleWorkerRequests[device.deviceName];
|
||||
_workerRequests[device.deviceName];
|
||||
_inferPipelineTasksDeviceSpecific[device.deviceName] = nullptr;
|
||||
}
|
||||
_executor->run(_loadContext[CPU].task);
|
||||
_executor->run(_loadContext[ACTUALDEVICE].task);
|
||||
} else {
|
||||
// only one device need to load network, do not need to load it async
|
||||
_loadContext[ACTUALDEVICE].task();
|
||||
}
|
||||
|
||||
WaitFirstNetworkReady();
|
||||
}
|
||||
void MultiDeviceExecutableNetwork::TryToLoadNetWork(AutoLoadContext& context,
|
||||
const std::string& modelPath,
|
||||
const InferenceEngine::CNNNetwork& network) {
|
||||
auto& device = context.deviceInfo.deviceName;
|
||||
auto& deviceConfig = context.deviceInfo.config;
|
||||
auto& deviceList = context.metaDevices;
|
||||
bool curDevIsCPU = (device.find("CPU") != std::string::npos);
|
||||
try {
|
||||
if (!modelPath.empty()) {
|
||||
context.executableNetwork = _core->LoadNetwork(modelPath, device, deviceConfig);
|
||||
} else {
|
||||
context.executableNetwork = _core->LoadNetwork(network, device, deviceConfig);
|
||||
}
|
||||
context.isLoadSuccess = true;
|
||||
} catch (const std::exception& e) {
|
||||
context.errMessage += device + ":" + e.what();
|
||||
context.isLoadSuccess = false;
|
||||
}
|
||||
|
||||
void MultiDeviceExecutableNetwork::WaitFirstNetworkReady() {
|
||||
if (_alreadyActualNetwork) {
|
||||
if (context.isLoadSuccess || curDevIsCPU) {
|
||||
return;
|
||||
}
|
||||
if (_cpuFuture.valid() && _acceleratorFuture.valid()) {
|
||||
try {
|
||||
_networkFirstReady = _cpuFuture.get();
|
||||
} catch (const std::exception& e) {
|
||||
printf("Warning: load network to CPU failed: %s\n", e.what());
|
||||
_networkActualNeeded = _acceleratorFuture.get();
|
||||
}
|
||||
} else if (_acceleratorFuture.valid()) { // only accelerator is valid, like AUTO:GPU
|
||||
_networkActualNeeded = _acceleratorFuture.get();
|
||||
} else if (_cpuFuture.valid()) { // only CPU is valid, like AUTO:CPU
|
||||
_networkActualNeeded = _cpuFuture.get();
|
||||
} else {
|
||||
IE_THROW() << "No device task available";
|
||||
|
||||
// remove the current device from deviceList
|
||||
auto eraseDevice = std::find_if(deviceList.begin(), deviceList.end(),
|
||||
[device](DeviceInformation& d){
|
||||
return d.deviceName == device;
|
||||
});
|
||||
deviceList.erase(eraseDevice);
|
||||
|
||||
if (deviceList.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// if there is only one device or loading CPU device is failed,
|
||||
// the ActualNetwork is already ok now.
|
||||
if (!_acceleratorFuture.valid()) {
|
||||
_alreadyActualNetwork = true;
|
||||
// select next candidate device
|
||||
try {
|
||||
context.deviceInfo = _multiPlugin->SelectDevice(deviceList, context.networkPrecision);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
return;
|
||||
}
|
||||
|
||||
// if selec device is CPU, do not need to load CPU again, context[CPU] must have loaded CPU
|
||||
curDevIsCPU = (context.deviceInfo.deviceName.find("CPU") != std::string::npos);
|
||||
if (curDevIsCPU) {
|
||||
return;
|
||||
}
|
||||
|
||||
// try to load this candidate device
|
||||
TryToLoadNetWork(context, modelPath, network);
|
||||
}
|
||||
|
||||
void MultiDeviceExecutableNetwork::WaitFirstNetworkReady() {
|
||||
if (_firstLoadFuture.valid()) {
|
||||
// wait for the first loading finished
|
||||
_firstLoadFuture.wait();
|
||||
}
|
||||
|
||||
// check if there is any device that have loaded network successfully
|
||||
for (int i = CONTEXTNUM - 1; i >= 0; i--) {
|
||||
if (_loadContext[i].isEnabled && _loadContext[i].isAlready) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// the first loading is failed, wait for another loading
|
||||
for (int i = CONTEXTNUM - 1; i >= 0; i--) {
|
||||
if (_loadContext[i].isEnabled) {
|
||||
_loadContext[i].future.wait();
|
||||
// check if loading is successful
|
||||
if (_loadContext[i].isAlready) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ToDo need to print failed error mesage
|
||||
IE_THROW() << "[AUTO] load all devices failed";
|
||||
}
|
||||
|
||||
void MultiDeviceExecutableNetwork::WaitActualNetworkReady() const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceExecutableNetwork::WaitActualNetworkReady");
|
||||
// Maybe different API will call this function, so add call once here
|
||||
// for every MultiDeviceExecutableNetwork instance
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceExecutableNetwork::WaitActualNetworkReady");
|
||||
std::call_once(_oc, [&] () {
|
||||
if (_acceleratorFuture.valid()) {
|
||||
_networkActualNeeded = _acceleratorFuture.get();
|
||||
}
|
||||
std::call_once(_oc, [this] () {
|
||||
if (_loadContext[ACTUALDEVICE].future.valid()) {
|
||||
_loadContext[ACTUALDEVICE].future.get();
|
||||
}
|
||||
// if _loadContext[ACTUALDEVICE] load failed, fall back to _loadContext[CPU]
|
||||
if (!_loadContext[ACTUALDEVICE].isAlready) {
|
||||
_loadContext[ACTUALDEVICE].executableNetwork = _loadContext[CPU].executableNetwork;
|
||||
_loadContext[ACTUALDEVICE].deviceInfo = _loadContext[CPU].deviceInfo;
|
||||
_loadContext[ACTUALDEVICE].isAlready = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@ -275,19 +339,18 @@ void MultiDeviceExecutableNetwork::ScheduleToWorkerInferRequest(Task inferPipeli
|
||||
// AUTO work mode
|
||||
if (_workModeIsAUTO) {
|
||||
if (!preferred_device.empty()) {
|
||||
// the preferred_device should be the selected device in AUTO work mode
|
||||
if (preferred_device != _acceleratorDevice.deviceName) {
|
||||
IE_THROW(NotFound) << "The preferred_device should be the selected device";
|
||||
}
|
||||
// if the device needed by customer is not ready, need to wait for it
|
||||
WaitActualNetworkReady();
|
||||
devices.push_back(_acceleratorDevice);
|
||||
// the preferred_device should be the selected device in AUTO work mode
|
||||
if (preferred_device != _loadContext[ACTUALDEVICE].deviceInfo.deviceName) {
|
||||
IE_THROW(NotFound) << "The preferred_device should be the selected device";
|
||||
}
|
||||
devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
|
||||
} else {
|
||||
// _acceleratorDevice could be the same as _cpuDevice, such as AUTO:CPU
|
||||
if (_alreadyActualNetwork) {
|
||||
devices.push_back(_acceleratorDevice);
|
||||
if (_loadContext[ACTUALDEVICE].isAlready) {
|
||||
devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
|
||||
} else {
|
||||
devices.push_back(_cpuDevice);
|
||||
devices.push_back(_loadContext[CPU].deviceInfo);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -334,7 +397,8 @@ void MultiDeviceExecutableNetwork::run(Task inferPipelineTask) {
|
||||
|
||||
MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() {
|
||||
// this is necessary to guarantee member destroyed after getting future
|
||||
if (_workModeIsAUTO) {
|
||||
if (_workModeIsAUTO && _loadContext[CPU].isEnabled) {
|
||||
_loadContext[CPU].future.get();
|
||||
WaitActualNetworkReady();
|
||||
// it's necessary to wait the loading network threads to stop here.
|
||||
InferenceEngine::ExecutorManager::getInstance()->clear("AutoDeviceAsyncLoad");
|
||||
@ -357,7 +421,7 @@ MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() {
|
||||
std::shared_ptr<InferenceEngine::RemoteContext> MultiDeviceExecutableNetwork::GetContext() const {
|
||||
if (_workModeIsAUTO) {
|
||||
WaitActualNetworkReady();
|
||||
return _networkActualNeeded->GetContext();
|
||||
return _loadContext[ACTUALDEVICE].executableNetwork->GetContext();
|
||||
}
|
||||
auto devices = [&] {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
@ -388,8 +452,8 @@ InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::Create
|
||||
InferenceEngine::SoIInferRequestInternal request_to_share_blobs_with;
|
||||
|
||||
if (_workModeIsAUTO) {
|
||||
if (!_networkFirstReady && _networkActualNeeded) {
|
||||
auto& dev_requests = _workerRequests[_acceleratorDevice.deviceName];
|
||||
if (!_loadContext[CPU].isEnabled && _loadContext[ACTUALDEVICE].isAlready) {
|
||||
auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
|
||||
if (num < dev_requests.size()) {
|
||||
request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
|
||||
}
|
||||
@ -418,8 +482,8 @@ InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::Create
|
||||
InferenceEngine::SoIInferRequestInternal request_to_share_blobs_with;
|
||||
|
||||
if (_workModeIsAUTO) {
|
||||
if (!_networkFirstReady && _networkActualNeeded) {
|
||||
auto& dev_requests = _workerRequests[_acceleratorDevice.deviceName];
|
||||
if (!_loadContext[CPU].isEnabled && _loadContext[ACTUALDEVICE].isAlready) {
|
||||
auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
|
||||
if (num < dev_requests.size()) {
|
||||
request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
|
||||
}
|
||||
@ -487,16 +551,21 @@ void MultiDeviceExecutableNetwork::SetConfig(const std::map<std::string, Inferen
|
||||
_devicePriorities = metaDevices;
|
||||
|
||||
// update value in config
|
||||
_confMutex.lock();
|
||||
_config[MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = priorities->second;
|
||||
_confMutex.unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetConfig(const std::string &name) const {
|
||||
_confMutex.lock();
|
||||
auto it = _config.find(name);
|
||||
if (it != _config.end()) {
|
||||
_confMutex.unlock();
|
||||
return it->second;
|
||||
} else {
|
||||
_confMutex.unlock();
|
||||
// find config key among networks config keys
|
||||
for (const auto& desc : _networksPerDevice) {
|
||||
const auto& execNetwork = desc.second;
|
||||
@ -514,11 +583,10 @@ InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetConfig(const std::st
|
||||
InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetMetric(const std::string &name) const {
|
||||
if (_workModeIsAUTO) {
|
||||
// fixme: should we wait actual device? meanwhile it will block inference, how to fix?
|
||||
if (_alreadyActualNetwork) {
|
||||
WaitActualNetworkReady();
|
||||
return _networkActualNeeded->GetMetric(name);
|
||||
if (_loadContext[ACTUALDEVICE].isAlready) {
|
||||
return _loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name);
|
||||
}
|
||||
return _networkFirstReady->GetMetric(name);
|
||||
return _loadContext[CPU].executableNetwork->GetMetric(name);
|
||||
}
|
||||
|
||||
if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
|
||||
|
@ -23,6 +23,12 @@
|
||||
# include <tbb/concurrent_queue.h>
|
||||
#endif
|
||||
|
||||
#ifdef MULTIUNITTEST
|
||||
#define MOCKTESTMACRO virtual
|
||||
#define MultiDevicePlugin MockMultiDevicePlugin
|
||||
#else
|
||||
#define MOCKTESTMACRO
|
||||
#endif
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
|
||||
@ -39,6 +45,26 @@ struct DeviceInformation {
|
||||
std::string defaultDeviceID;
|
||||
};
|
||||
|
||||
struct AutoLoadContext {
|
||||
std::atomic<bool> isEnabled = {false};
|
||||
std::atomic<bool> isAlready = {false};
|
||||
std::atomic<bool> isLoadSuccess = {false};
|
||||
std::future<void> future;
|
||||
std::promise<void> promise;
|
||||
InferenceEngine::SoExecutableNetworkInternal executableNetwork;
|
||||
DeviceInformation deviceInfo;
|
||||
std::vector<DeviceInformation> metaDevices;
|
||||
std::string networkPrecision;
|
||||
std::string errMessage;
|
||||
InferenceEngine::Task task;
|
||||
};
|
||||
|
||||
enum AutoLoadContextIndex {
|
||||
CPU = 0,
|
||||
ACTUALDEVICE = 1,
|
||||
CONTEXTNUM = 2
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
using DeviceMap = std::unordered_map<DeviceName, T>;
|
||||
|
||||
@ -163,22 +189,21 @@ private:
|
||||
static bool RunPipelineTask(InferenceEngine::Task& inferPipelineTask,
|
||||
NotBusyWorkerRequests& idleWorkerRequests,
|
||||
const DeviceName& preferred_device);
|
||||
void TryToLoadNetWork(AutoLoadContext& context,
|
||||
const std::string& modelPath,
|
||||
const InferenceEngine::CNNNetwork& network);
|
||||
|
||||
private:
|
||||
std::shared_ptr<InferenceEngine::ICore> _core;
|
||||
InferenceEngine::IStreamsExecutor::Ptr _executor;
|
||||
MultiDeviceInferencePlugin* _multiPlugin;
|
||||
InferenceEngine::SoExecutableNetworkInternal _networkFirstReady;
|
||||
mutable InferenceEngine::SoExecutableNetworkInternal _networkActualNeeded;
|
||||
NetworkFuture _cpuFuture;
|
||||
NetworkPromise _cpuPromise;
|
||||
mutable NetworkFuture _acceleratorFuture;
|
||||
mutable NetworkPromise _acceleratorPromise;
|
||||
mutable std::atomic<bool> _alreadyActualNetwork = {false};
|
||||
bool _workModeIsAUTO = {false};
|
||||
DeviceInformation _cpuDevice;
|
||||
DeviceInformation _acceleratorDevice;
|
||||
mutable std::once_flag _oc;
|
||||
std::once_flag _firstLoadOC;
|
||||
std::future<void> _firstLoadFuture;
|
||||
std::promise<void> _firstLoadPromise;
|
||||
mutable AutoLoadContext _loadContext[CONTEXTNUM];
|
||||
mutable std::mutex _confMutex;
|
||||
};
|
||||
|
||||
} // namespace MultiDevicePlugin
|
||||
|
@ -16,6 +16,13 @@
|
||||
#include <string>
|
||||
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
|
||||
|
||||
#ifdef MULTIUNITTEST
|
||||
#define MOCKTESTMACRO virtual
|
||||
#define MultiDevicePlugin MockMultiDevicePlugin
|
||||
#else
|
||||
#define MOCKTESTMACRO
|
||||
#endif
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
|
||||
class MultiDeviceInferRequest : public InferenceEngine::IInferRequestInternal {
|
||||
|
@ -13,6 +13,13 @@
|
||||
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
|
||||
#include "multi_device_exec_network.hpp"
|
||||
|
||||
#ifdef MULTIUNITTEST
|
||||
#define MOCKTESTMACRO virtual
|
||||
#define MultiDevicePlugin MockMultiDevicePlugin
|
||||
#else
|
||||
#define MOCKTESTMACRO
|
||||
#endif
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
|
||||
class MultiDeviceInferencePlugin : public InferenceEngine::IInferencePlugin {
|
||||
@ -33,11 +40,11 @@ public:
|
||||
InferenceEngine::Parameter GetMetric(const std::string& name,
|
||||
const std::map<std::string, InferenceEngine::Parameter>& options) const override;
|
||||
|
||||
std::vector<MultiDevicePlugin::DeviceInformation> ParseMetaDevices(const std::string & devicesRequestsCfg,
|
||||
MOCKTESTMACRO std::vector<MultiDevicePlugin::DeviceInformation> ParseMetaDevices(const std::string & devicesRequestsCfg,
|
||||
const std::map<std::string, std::string> & config) const;
|
||||
|
||||
std::string GetDeviceList(const std::map<std::string, std::string>& config) const;
|
||||
DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
|
||||
MOCKTESTMACRO DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
|
||||
|
||||
protected:
|
||||
std::map<std::string, std::string> GetSupportedConfig(const std::map<std::string, std::string>& config,
|
||||
|
@ -261,7 +261,7 @@ private:
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief SOPointer to IInferRequestInternal.
|
||||
* @brief SoPtr to IInferRequestInternal.
|
||||
*/
|
||||
using SoIInferRequestInternal = ov::runtime::SoPtr<IInferRequestInternal>;
|
||||
|
||||
|
@ -16,7 +16,6 @@
|
||||
|
||||
#include "blob_factory.hpp"
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include "details/ie_so_pointer.hpp"
|
||||
#include "ie_iextension.h"
|
||||
#include "ie_input_info.hpp"
|
||||
#include "ie_parameter.hpp"
|
||||
@ -346,13 +345,6 @@ using CreateExtensionFunc = void(std::shared_ptr<IExtension>&);
|
||||
*/
|
||||
constexpr static const auto create_plugin_function = OV_PP_TOSTRING(IE_CREATE_PLUGIN);
|
||||
|
||||
namespace details {
|
||||
template <>
|
||||
class SOCreatorTrait<IInferencePlugin> {
|
||||
public:
|
||||
static constexpr auto name = create_plugin_function;
|
||||
};
|
||||
} // namespace details
|
||||
} // namespace InferenceEngine
|
||||
|
||||
/**
|
||||
|
@ -14,11 +14,18 @@
|
||||
#include <cstring>
|
||||
|
||||
#include "ie_api.h"
|
||||
#include "details/ie_so_pointer.hpp"
|
||||
#include "openvino/util/file_util.hpp"
|
||||
|
||||
namespace FileUtils {
|
||||
|
||||
/**
|
||||
* @brief Enables only `char` or `wchar_t` template specializations
|
||||
* @tparam C A char type
|
||||
*/
|
||||
template <typename C>
|
||||
using enableIfSupportedChar =
|
||||
typename std::enable_if<(std::is_same<C, char>::value || std::is_same<C, wchar_t>::value)>::type;
|
||||
|
||||
/**
|
||||
* @brief Interface function to get absolute path of file
|
||||
* @ingroup ie_dev_api_file_utils
|
||||
@ -74,7 +81,7 @@ inline long long fileSize(const wchar_t* fileName) {
|
||||
* @param f - string name of the file
|
||||
* @return size of the file
|
||||
*/
|
||||
template <typename C, typename = InferenceEngine::details::enableIfSupportedChar<C>>
|
||||
template <typename C, typename = enableIfSupportedChar<C>>
|
||||
inline long long fileSize(const std::basic_string<C> &f) {
|
||||
return fileSize(f.c_str());
|
||||
}
|
||||
@ -85,7 +92,7 @@ inline long long fileSize(const std::basic_string<C> &f) {
|
||||
* @param fileName - given filename
|
||||
* @return true is exists
|
||||
*/
|
||||
template <typename C, typename = InferenceEngine::details::enableIfSupportedChar<C>>
|
||||
template <typename C, typename = enableIfSupportedChar<C>>
|
||||
inline bool fileExist(const C * fileName) {
|
||||
return fileSize(fileName) >= 0;
|
||||
}
|
||||
@ -96,7 +103,7 @@ inline bool fileExist(const C * fileName) {
|
||||
* @param fileName - string with a given filename
|
||||
* @return true is exists
|
||||
*/
|
||||
template <typename C, typename = InferenceEngine::details::enableIfSupportedChar<C>>
|
||||
template <typename C, typename = enableIfSupportedChar<C>>
|
||||
inline bool fileExist(const std::basic_string<C> &fileName) {
|
||||
return fileExist(fileName.c_str());
|
||||
}
|
||||
@ -109,7 +116,7 @@ inline bool fileExist(const std::basic_string<C> &fileName) {
|
||||
* @return string with combination of the path and the filename divided by file separator
|
||||
*/
|
||||
|
||||
template <typename C, typename = InferenceEngine::details::enableIfSupportedChar<C>>
|
||||
template <typename C, typename = enableIfSupportedChar<C>>
|
||||
inline std::basic_string<C> makePath(const std::basic_string<C> &folder, const std::basic_string<C> &file) {
|
||||
if (folder.empty())
|
||||
return file;
|
||||
@ -122,7 +129,7 @@ inline std::basic_string<C> makePath(const std::basic_string<C> &folder, const s
|
||||
* @param filename - string with the name of the file which extension should be extracted
|
||||
* @return string with extracted file extension
|
||||
*/
|
||||
template <typename C, typename = InferenceEngine::details::enableIfSupportedChar<C>>
|
||||
template <typename C, typename = enableIfSupportedChar<C>>
|
||||
inline std::basic_string<C> fileExt(const std::basic_string<C> &filename) {
|
||||
auto pos = filename.rfind(ov::util::FileTraits<C>::dot_symbol);
|
||||
if (pos == std::string::npos)
|
||||
@ -130,7 +137,7 @@ inline std::basic_string<C> fileExt(const std::basic_string<C> &filename) {
|
||||
return filename.substr(pos + 1);
|
||||
}
|
||||
|
||||
template <typename C, typename = InferenceEngine::details::enableIfSupportedChar<C>>
|
||||
template <typename C, typename = enableIfSupportedChar<C>>
|
||||
inline std::basic_string<C> makePluginLibraryName(const std::basic_string<C> &path, const std::basic_string<C> &input) {
|
||||
std::basic_string<C> separator(1, ov::util::FileTraits<C>::file_separator);
|
||||
if (path.empty())
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "ie_parallel.hpp"
|
||||
#include "threading/ie_istreams_executor.hpp"
|
||||
|
||||
#if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
|
||||
namespace InferenceEngine {
|
||||
/**
|
||||
* @class TBBStreamsExecutor
|
||||
@ -31,3 +32,4 @@ private:
|
||||
std::unique_ptr<Impl> _impl;
|
||||
};
|
||||
} // namespace InferenceEngine
|
||||
#endif
|
||||
|
@ -143,9 +143,9 @@ if(ENABLE_GAPI_PREPROCESSING)
|
||||
endif()
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
# for static linkage the dependencies are in opposite order
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine)
|
||||
else()
|
||||
# for static linkage the dependencies are in opposite order
|
||||
target_link_libraries(inference_engine PRIVATE ${TARGET_NAME})
|
||||
endif()
|
||||
|
||||
|
@ -16,8 +16,6 @@
|
||||
#include <file_utils.h>
|
||||
#include <ie_preprocess.hpp>
|
||||
|
||||
#include <details/ie_so_pointer.hpp>
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
/**
|
||||
|
@ -45,10 +45,11 @@ if(WIN32)
|
||||
set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
|
||||
endif()
|
||||
|
||||
if(NOT BUILD_SHARED_LIBS)
|
||||
target_link_libraries(inference_engine PRIVATE ${TARGET_NAME})
|
||||
else()
|
||||
if(BUILD_SHARED_LIBS)
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine)
|
||||
else()
|
||||
# for static linkage the dependencies are in opposite order
|
||||
target_link_libraries(inference_engine PRIVATE ${TARGET_NAME})
|
||||
endif()
|
||||
|
||||
# code style
|
||||
|
@ -0,0 +1,28 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <transformations_visibility.hpp>
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API ReshapeSequenceFusion;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief ReshpaeSequenceFusion fuses sequence of Reshape operation into single Reshape
|
||||
*/
|
||||
|
||||
class ngraph::pass::ReshapeSequenceFusion: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ReshapeSequenceFusion();
|
||||
};
|
@ -17,6 +17,7 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API TransposeSinking;
|
||||
class TRANSFORMATIONS_API TransposeConvert;
|
||||
class TRANSFORMATIONS_API TransposeReduction;
|
||||
class TRANSFORMATIONS_API TransposeFQReduction;
|
||||
class TRANSFORMATIONS_API TransposeFuse;
|
||||
@ -44,6 +45,16 @@ public:
|
||||
TransposeFQReduction();
|
||||
};
|
||||
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief TransposeConvert transformation sinks Transpose through Convert
|
||||
*/
|
||||
class ngraph::pass::TransposeConvert : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
TransposeConvert();
|
||||
};
|
||||
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief TransposeFuse transformation eliminates 2 consequtive Transposes if they result in no changes to input or fuses them
|
||||
@ -65,6 +76,7 @@ public:
|
||||
TransposeSinking() {
|
||||
add_matcher<ngraph::pass::TransposeFQReduction>();
|
||||
add_matcher<ngraph::pass::TransposeReduction>();
|
||||
add_matcher<ngraph::pass::TransposeConvert>();
|
||||
add_matcher<ngraph::pass::TransposeFuse>();
|
||||
}
|
||||
};
|
||||
|
@ -114,7 +114,6 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
|
||||
common_fusions->add_matcher<ngraph::pass::ShuffleChannelsFusion>(false);
|
||||
common_fusions->add_matcher<ngraph::pass::SpaceToBatchFusion>();
|
||||
common_fusions->add_matcher<ngraph::pass::BatchToSpaceFusion>();
|
||||
common_fusions->add_matcher<ngraph::pass::TransposeToReshape>();
|
||||
common_fusions->set_name("ngraph::pass::CommonFusions");
|
||||
|
||||
manager.register_pass<ngraph::pass::ConvertPadToGroupConvolution, false>();
|
||||
|
@ -52,6 +52,7 @@
|
||||
#include <transformations/op_conversions/convert_divide.hpp>
|
||||
#include <transformations/common_optimizations/divide_fusion.hpp>
|
||||
#include <transformations/common_optimizations/subtract_fusion.hpp>
|
||||
#include <transformations/common_optimizations/reshape_sequence_fusion.hpp>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0);
|
||||
|
||||
@ -134,6 +135,8 @@ bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr<ngraph::F
|
||||
common_fusions->add_matcher<ngraph::pass::SplitConcatPairToInterpolateFusion>(m_use_shapes);
|
||||
common_fusions->add_matcher<ngraph::pass::DivideFusion>();
|
||||
common_fusions->add_matcher<ngraph::pass::SubtractFusion>();
|
||||
common_fusions->add_matcher<ngraph::pass::TransposeToReshape>();
|
||||
common_fusions->add_matcher<ngraph::pass::ReshapeSequenceFusion>();
|
||||
common_fusions->set_name("ngraph::pass::CommonFusions");
|
||||
|
||||
manager.register_pass<ngraph::pass::BinarizeWeights>();
|
||||
|
@ -0,0 +1,68 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "transformations/common_optimizations/reshape_sequence_fusion.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/opsets/opset8.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include "itt.hpp"
|
||||
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ReshapeSequenceFusion, "ReshapeSequenceFusion", 0);
|
||||
|
||||
namespace {
|
||||
bool has_valid_pattern(const std::shared_ptr<ngraph::Node> & node) {
|
||||
auto const_node = std::dynamic_pointer_cast<ngraph::opset8::Constant>(node);
|
||||
if (!const_node) return false;
|
||||
const auto & values = const_node->cast_vector<int64_t>();
|
||||
// We can not fuse Reshapes if their pattern values have special numbers like -1 and 0
|
||||
return std::all_of(values.cbegin(), values.cend(), [](int64_t value) { return value > 0;});
|
||||
}
|
||||
}
|
||||
|
||||
ngraph::pass::ReshapeSequenceFusion::ReshapeSequenceFusion() {
|
||||
MATCHER_SCOPE(ReshapeSequenceFusion);
|
||||
auto reshape_input = pattern::any_input();
|
||||
auto reshape_a_pattern = pattern::wrap_type<opset8::Constant>();
|
||||
auto reshape_a = pattern::wrap_type<opset8::Reshape>({reshape_input, reshape_a_pattern}, pattern::consumers_count(1));
|
||||
auto reshape_b_pattern = pattern::wrap_type<opset8::Constant>();
|
||||
auto reshape_b = pattern::wrap_type<opset8::Reshape>({reshape_a, reshape_b_pattern});
|
||||
|
||||
matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
const auto & pattern_map = m.get_pattern_value_map();
|
||||
auto input = pattern_map.at(reshape_input);
|
||||
auto reshape = m.get_match_root();
|
||||
|
||||
auto pattern_a = pattern_map.at(reshape_a_pattern).get_node_shared_ptr();
|
||||
auto pattern_b = pattern_map.at(reshape_b_pattern).get_node_shared_ptr();
|
||||
// skip reshapes which patterns contain special numbers like -1 or 0
|
||||
if (!has_valid_pattern(pattern_a) || !has_valid_pattern(pattern_b)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// vector of nodes which runtime info must be copied
|
||||
NodeVector nodes{pattern_map.at(reshape_a).get_node_shared_ptr(), reshape};
|
||||
while (std::dynamic_pointer_cast<opset8::Reshape>(input.get_node_shared_ptr())) {
|
||||
auto node = input.get_node_shared_ptr();
|
||||
if (!has_valid_pattern(node->get_input_node_shared_ptr(1)) ||
|
||||
input.get_target_inputs().size() != 1) {
|
||||
break;
|
||||
}
|
||||
nodes.push_back(node);
|
||||
input = node->input_value(0);
|
||||
}
|
||||
|
||||
reshape->input(0).replace_source_output(input);
|
||||
copy_runtime_info(nodes, reshape);
|
||||
return false;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(reshape_b, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
@ -16,6 +16,7 @@
|
||||
#include <numeric>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeSinking, "TransposeSinking", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeConvert, "TransposeConvert", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeReduction, "TransposeReduction", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeFQReduction, "TransposeFQReduction", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeFuse, "TransposeFuse", 0);
|
||||
@ -60,6 +61,33 @@ std::shared_ptr<ngraph::opset6::Constant> get_reversed_order_constant(const std:
|
||||
|
||||
} // namespace
|
||||
|
||||
ngraph::pass::TransposeConvert::TransposeConvert() {
|
||||
MATCHER_SCOPE(TransposeConvert);
|
||||
|
||||
auto transpose_label = pattern::wrap_type<opset6::Transpose>({pattern::any_input(),
|
||||
pattern::wrap_type<opset6::Constant>()},
|
||||
pattern::consumers_count(1));
|
||||
auto convert_label = pattern::wrap_type<opset6::Convert>({transpose_label});
|
||||
|
||||
matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher &m) {
|
||||
const auto &pattern_to_output = m.get_pattern_value_map();
|
||||
auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
|
||||
auto convert = pattern_to_output.at(convert_label).get_node_shared_ptr();
|
||||
|
||||
auto new_convert = convert->clone_with_new_inputs({transpose->input_value(0)});
|
||||
auto new_transpose = transpose->clone_with_new_inputs({new_convert, transpose->input_value(1)});
|
||||
register_new_node(new_transpose);
|
||||
|
||||
new_transpose->set_friendly_name(convert->get_friendly_name());
|
||||
copy_runtime_info({transpose, convert}, {new_convert, new_transpose});
|
||||
replace_node(convert, new_transpose);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(convert_label, matcher_name);
|
||||
register_matcher(m, matcher_pass_callback);
|
||||
}
|
||||
|
||||
ngraph::pass::TransposeReduction::TransposeReduction() {
|
||||
MATCHER_SCOPE(TransposeReduction);
|
||||
|
||||
@ -165,7 +193,7 @@ ngraph::pass::TransposeFQReduction::TransposeFQReduction() {
|
||||
auto new_fq = fq->clone_with_new_inputs(fq_inputs);
|
||||
new_ops.push_back(new_fq);
|
||||
|
||||
auto new_transpose = std::make_shared<ngraph::opset6::Transpose>(new_fq, transpose_order);
|
||||
auto new_transpose = register_new_node<ngraph::opset6::Transpose>(new_fq, transpose_order);
|
||||
new_ops.push_back(new_transpose);
|
||||
new_transpose->set_friendly_name(fq->get_friendly_name());
|
||||
|
||||
|
@ -18,100 +18,93 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeToReshape, "TransposeToReshape", 0
|
||||
|
||||
using namespace ngraph;
|
||||
|
||||
namespace {
|
||||
|
||||
bool replace_transpose_with_reshape(const std::shared_ptr<Node>& transpose) {
|
||||
auto data = transpose->input_value(0);
|
||||
const auto input_shape = transpose->input(0).get_partial_shape();
|
||||
|
||||
const size_t input_shape_rank = input_shape.rank().get_length();
|
||||
|
||||
auto order = ov::as_type_ptr<opset6::Constant>(transpose->input_value(1).get_node_shared_ptr());
|
||||
if (!order || !ngraph::shape_size(order->get_shape())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto order_value = order->cast_vector<int64_t>();
|
||||
|
||||
// Check that transpose order without 1 dims has an ascending order
|
||||
int64_t last_dim(-1);
|
||||
for (size_t i = 0; i < input_shape_rank; ++i) {
|
||||
if (input_shape[order_value[i]].is_dynamic() || input_shape[order_value[i]] != 1) {
|
||||
if (order_value[i] < last_dim) {
|
||||
return false;
|
||||
}
|
||||
last_dim = order_value[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Transpose operation can be removed if original transpose order is sorted
|
||||
// or dimension that changes their places equal to 1
|
||||
using DimensionToPosition = struct {
|
||||
Dimension dim;
|
||||
size_t pos;
|
||||
};
|
||||
std::vector<DimensionToPosition> dims;
|
||||
for (size_t i = 0; i < input_shape_rank; ++i) {
|
||||
if (order_value[i] != static_cast<int64_t>(i)) {
|
||||
dims.push_back({ input_shape[order_value[i]], i });
|
||||
}
|
||||
}
|
||||
|
||||
// If number of dimensions != 1 to move equal to 0 we can remove this Transpose
|
||||
if (count_if(dims.begin(), dims.end(), [](const DimensionToPosition& item) {
|
||||
return !(item.dim.is_static() && item.dim.get_length() == 1);
|
||||
}) == 0) {
|
||||
return replace_output_update_name(transpose->output(0), transpose->input_value(0));
|
||||
}
|
||||
|
||||
// Transpose can be replaced with Reshape in two ways:
|
||||
// 1. Reshape with dims as Constant
|
||||
// 2. Reshape with dims as input (ShapeOf->Gather)
|
||||
//
|
||||
// The first case is possible only if one or less dynamic dimensions changes their position
|
||||
// For example: input_shape {?, 3, 1, ?} and order {0, 1, 3, 2} can be replaced with Reshape
|
||||
// with Constant {0, 3, -1, 1} but if input_shape {?, 1, 1, ?} and order {1, 0, 3, 2} transpose
|
||||
// cannot be replaced int the same way and in this case its only possible to use Gather(ShapeOf,
|
||||
// order)
|
||||
|
||||
Output<Node> reshape_dim;
|
||||
NodeVector new_ops;
|
||||
|
||||
if (count_if(dims.begin(), dims.end(), [](const DimensionToPosition& item) {
|
||||
return item.dim.is_dynamic();
|
||||
}) < 2) {
|
||||
std::vector<int64_t> reshape_value(input_shape_rank, 0);
|
||||
for (const auto& item : dims) {
|
||||
reshape_value[item.pos] = item.dim.is_dynamic() ? -1 : item.dim.get_length();
|
||||
}
|
||||
reshape_dim =
|
||||
opset3::Constant::create(element::i64, Shape{ reshape_value.size() }, reshape_value);
|
||||
} else {
|
||||
auto shape_of = std::make_shared<opset3::ShapeOf>(data);
|
||||
new_ops.push_back(shape_of);
|
||||
reshape_dim = std::make_shared<opset3::Gather>(
|
||||
shape_of, order, opset3::Constant::create(element::i64, Shape{ 1 }, { 0 }));
|
||||
new_ops.push_back(reshape_dim.get_node_shared_ptr());
|
||||
}
|
||||
|
||||
auto reshape_op = std::make_shared<opset3::Reshape>(data, reshape_dim, true);
|
||||
new_ops.push_back(reshape_op);
|
||||
|
||||
reshape_op->set_friendly_name(transpose->get_friendly_name());
|
||||
copy_runtime_info(transpose, new_ops);
|
||||
replace_node(transpose, reshape_op);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ngraph::pass::TransposeToReshape::TransposeToReshape() {
|
||||
MATCHER_SCOPE(TransposeToReshape);
|
||||
|
||||
auto transpose_label = pattern::wrap_type<opset6::Transpose>(
|
||||
{ pattern::any_input(pattern::has_static_rank()), pattern::wrap_type<opset6::Constant>() });
|
||||
ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher& m) {
|
||||
return replace_transpose_with_reshape(m.get_match_root());
|
||||
auto transpose = m.get_match_root();
|
||||
auto data = transpose->input_value(0);
|
||||
const auto input_shape = transpose->input(0).get_partial_shape();
|
||||
|
||||
const size_t input_shape_rank = input_shape.rank().get_length();
|
||||
|
||||
auto order = ov::as_type_ptr<opset6::Constant>(transpose->input_value(1).get_node_shared_ptr());
|
||||
if (!order || !ngraph::shape_size(order->get_shape())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto order_value = order->cast_vector<int64_t>();
|
||||
|
||||
// Check that transpose order without 1 dims has an ascending order
|
||||
int64_t last_dim(-1);
|
||||
for (size_t i = 0; i < input_shape_rank; ++i) {
|
||||
if (input_shape[order_value[i]].is_dynamic() || input_shape[order_value[i]] != 1) {
|
||||
if (order_value[i] < last_dim) {
|
||||
return false;
|
||||
}
|
||||
last_dim = order_value[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Transpose operation can be removed if original transpose order is sorted
|
||||
// or dimension that changes their places equal to 1
|
||||
using DimensionToPosition = struct {
|
||||
Dimension dim;
|
||||
size_t pos;
|
||||
};
|
||||
std::vector<DimensionToPosition> dims;
|
||||
for (size_t i = 0; i < input_shape_rank; ++i) {
|
||||
if (order_value[i] != static_cast<int64_t>(i)) {
|
||||
dims.push_back({ input_shape[order_value[i]], i });
|
||||
}
|
||||
}
|
||||
|
||||
// If number of dimensions != 1 to move equal to 0 we can remove this Transpose
|
||||
if (count_if(dims.begin(), dims.end(), [](const DimensionToPosition& item) {
|
||||
return !(item.dim.is_static() && item.dim.get_length() == 1);
|
||||
}) == 0) {
|
||||
return replace_output_update_name(transpose->output(0), transpose->input_value(0));
|
||||
}
|
||||
|
||||
// Transpose can be replaced with Reshape in two ways:
|
||||
// 1. Reshape with dims as Constant
|
||||
// 2. Reshape with dims as input (ShapeOf->Gather)
|
||||
//
|
||||
// The first case is possible only if one or less dynamic dimensions changes their position
|
||||
// For example: input_shape {?, 3, 1, ?} and order {0, 1, 3, 2} can be replaced with Reshape
|
||||
// with Constant {0, 3, -1, 1} but if input_shape {?, 1, 1, ?} and order {1, 0, 3, 2} transpose
|
||||
// cannot be replaced int the same way and in this case its only possible to use Gather(ShapeOf,
|
||||
// order)
|
||||
|
||||
Output<Node> reshape_dim;
|
||||
NodeVector new_ops;
|
||||
|
||||
if (count_if(dims.begin(), dims.end(), [](const DimensionToPosition& item) {
|
||||
return item.dim.is_dynamic();
|
||||
}) < 2) {
|
||||
std::vector<int64_t> reshape_value(input_shape_rank, 0);
|
||||
for (const auto& item : dims) {
|
||||
reshape_value[item.pos] = item.dim.is_dynamic() ? -1 : item.dim.get_length();
|
||||
}
|
||||
reshape_dim =
|
||||
opset3::Constant::create(element::i64, Shape{ reshape_value.size() }, reshape_value);
|
||||
} else {
|
||||
auto shape_of = std::make_shared<opset3::ShapeOf>(data);
|
||||
new_ops.push_back(shape_of);
|
||||
reshape_dim = std::make_shared<opset3::Gather>(
|
||||
shape_of, order, opset3::Constant::create(element::i64, Shape{ 1 }, { 0 }));
|
||||
new_ops.push_back(reshape_dim.get_node_shared_ptr());
|
||||
}
|
||||
|
||||
auto reshape_op = register_new_node<opset3::Reshape>(data, reshape_dim, true);
|
||||
new_ops.push_back(reshape_op);
|
||||
|
||||
reshape_op->set_friendly_name(transpose->get_friendly_name());
|
||||
copy_runtime_info(transpose, new_ops);
|
||||
replace_node(transpose, reshape_op);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(transpose_label, matcher_name);
|
||||
|
@ -39,7 +39,7 @@ endif()
|
||||
# "mvnc" must be the first library in the link list
|
||||
target_link_libraries(${TARGET_NAME}
|
||||
PRIVATE
|
||||
mvnc inference_engine inference_engine_legacy vpu_graph_transformer)
|
||||
mvnc inference_engine_legacy vpu_graph_transformer)
|
||||
|
||||
# MyriadPlugin is not safe to unload it at runtime
|
||||
if(LINUX AND LINUX_OS_NAME MATCHES "Ubuntu")
|
||||
|
@ -14,7 +14,6 @@
|
||||
|
||||
#include "ie_core.hpp"
|
||||
#include "ngraph/function.hpp"
|
||||
#include "details/ie_so_loader.h"
|
||||
#include "ie_metric_helpers.hpp"
|
||||
#include "openvino/op/logical_not.hpp"
|
||||
|
||||
@ -168,7 +167,7 @@ public:
|
||||
|
||||
class CachingTest : public ::testing::TestWithParam<std::tuple<TestParam, std::string>> {
|
||||
public:
|
||||
std::unique_ptr<SharedObjectLoader> sharedObjectLoader;
|
||||
std::shared_ptr<void> sharedObjectLoader;
|
||||
std::function<void(IInferencePlugin*)> injectProxyEngine;
|
||||
std::string modelName = "Caching_test.xml";
|
||||
std::string weightsName = "Caching_test.bin";
|
||||
@ -270,7 +269,7 @@ public:
|
||||
mockPlugin = std::make_shared<MockCachingInferencePlugin>();
|
||||
setupMock(*mockPlugin);
|
||||
std::string libraryName = get_mock_engine_name();
|
||||
sharedObjectLoader.reset(new SharedObjectLoader(libraryName.c_str()));
|
||||
sharedObjectLoader = ov::util::load_shared_object(libraryName.c_str());
|
||||
injectProxyEngine = make_std_function<void(IInferencePlugin*)>("InjectProxyEngine");
|
||||
|
||||
FuncTestUtils::TestModel::generateTestModel(modelName, weightsName);
|
||||
@ -337,7 +336,8 @@ public:
|
||||
private:
|
||||
template <class T>
|
||||
std::function<T> make_std_function(const std::string& functionName) {
|
||||
std::function <T> ptr(reinterpret_cast<T*>(sharedObjectLoader->get_symbol(functionName.c_str())));
|
||||
std::function <T> ptr(reinterpret_cast<T*>(
|
||||
ov::util::get_symbol(sharedObjectLoader, functionName.c_str())));
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <file_utils.h>
|
||||
#include "details/ie_so_loader.h"
|
||||
#include "openvino/util/shared_object.hpp"
|
||||
#include <cpp/ie_plugin.hpp>
|
||||
|
||||
using namespace std;
|
||||
@ -20,14 +20,15 @@ protected:
|
||||
}
|
||||
|
||||
void loadDll(const string &libraryName) {
|
||||
sharedObjectLoader.reset(new details::SharedObjectLoader(libraryName.c_str()));
|
||||
sharedObjectLoader = ov::util::load_shared_object(libraryName.c_str());
|
||||
}
|
||||
unique_ptr<SharedObjectLoader> sharedObjectLoader;
|
||||
std::shared_ptr<void> sharedObjectLoader;
|
||||
|
||||
using CreateF = void(std::shared_ptr<IInferencePlugin>&);
|
||||
|
||||
std::function<CreateF> make_std_function(const std::string& functionName) {
|
||||
std::function<CreateF> ptr(reinterpret_cast<CreateF*>(sharedObjectLoader->get_symbol(functionName.c_str())));
|
||||
std::function<CreateF> ptr(reinterpret_cast<CreateF*>(
|
||||
ov::util::get_symbol(sharedObjectLoader, functionName.c_str())));
|
||||
return ptr;
|
||||
}
|
||||
};
|
||||
|
@ -1,41 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <gmock/gmock.h>
|
||||
#include <gmock/gmock-spec-builders.h>
|
||||
|
||||
#include <file_utils.h>
|
||||
|
||||
#include <memory>
|
||||
#include <common_test_utils/test_assertions.hpp>
|
||||
#include <details/ie_so_pointer.hpp>
|
||||
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
using namespace ::testing;
|
||||
using ::testing::InSequence;
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
namespace details {
|
||||
|
||||
struct UnknownPlugin : std::enable_shared_from_this<UnknownPlugin> {};
|
||||
|
||||
template<>
|
||||
class SOCreatorTrait<InferenceEngine::details::UnknownPlugin> {
|
||||
public:
|
||||
static constexpr auto name = "CreateUnknownPlugin";
|
||||
};
|
||||
|
||||
} // namespace details
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
||||
class SoPointerTests : public ::testing::Test {};
|
||||
|
||||
TEST_F(SoPointerTests, UnknownPlugin) {
|
||||
ASSERT_THROW(SOPointer<InferenceEngine::details::UnknownPlugin>{std::string{"UnknownPlugin"}}, Exception);
|
||||
}
|
@ -0,0 +1,105 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
|
||||
#include <ngraph/function.hpp>
|
||||
#include <ngraph/opsets/opset6.hpp>
|
||||
#include <transformations/common_optimizations/reshape_sequence_fusion.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
|
||||
using namespace testing;
|
||||
using namespace ngraph;
|
||||
|
||||
namespace {
|
||||
Output<Node> reshape(Output<Node> input, std::vector<int64_t> values, bool special_zero = true) {
|
||||
return std::make_shared<opset6::Reshape>(input,
|
||||
opset6::Constant::create(element::i64, Shape{values.size()}, values), special_zero);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, ReshapeSequenceFusion1) {
|
||||
{
|
||||
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
auto a = reshape(data, {3, 2});
|
||||
auto b = reshape(a, {2, 3});
|
||||
auto c = reshape(b, {6});
|
||||
function = std::make_shared<Function>(OutputVector{c}, ParameterVector{data});
|
||||
|
||||
manager.register_pass<pass::ReshapeSequenceFusion>();
|
||||
}
|
||||
|
||||
{
|
||||
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
auto c = reshape(data, {6});
|
||||
function_ref = std::make_shared<Function>(OutputVector{c}, ParameterVector{data});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, ReshapeSequenceFusion2) {
|
||||
{
|
||||
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
auto a = reshape(data, {3, 2});
|
||||
auto b = reshape(a, {6});
|
||||
function = std::make_shared<Function>(OutputVector{b}, ParameterVector{data});
|
||||
|
||||
manager.register_pass<pass::ReshapeSequenceFusion>();
|
||||
}
|
||||
|
||||
{
|
||||
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
auto c = reshape(data, {6});
|
||||
function_ref = std::make_shared<Function>(OutputVector{c}, ParameterVector{data});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, ReshapeSequenceFusionNeg1) {
|
||||
{
|
||||
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
auto a = reshape(data, {-1, 2});
|
||||
auto b = reshape(a, {6});
|
||||
function = std::make_shared<Function>(OutputVector{b}, ParameterVector{data});
|
||||
|
||||
manager.register_pass<pass::ReshapeSequenceFusion>();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, ReshapeSequenceFusionNeg2) {
|
||||
{
|
||||
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
auto a = reshape(data, {-1, 3});
|
||||
auto b = reshape(a, {6});
|
||||
function = std::make_shared<Function>(OutputVector{b}, ParameterVector{data});
|
||||
|
||||
manager.register_pass<pass::ReshapeSequenceFusion>();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, ReshapeSequenceFusionNeg3) {
|
||||
{
|
||||
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
auto a = reshape(data, {2, 3});
|
||||
auto b = reshape(a, {6});
|
||||
function = std::make_shared<Function>(OutputVector{a, b}, ParameterVector{data});
|
||||
|
||||
manager.register_pass<pass::ReshapeSequenceFusion>();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, ReshapeSequenceFusionNeg4) {
|
||||
{
|
||||
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
auto a = reshape(data, {2, 3});
|
||||
auto b = reshape(a, {0, 3});
|
||||
function = std::make_shared<Function>(OutputVector{b}, ParameterVector{data});
|
||||
|
||||
manager.register_pass<pass::ReshapeSequenceFusion>();
|
||||
}
|
||||
}
|
@ -13,6 +13,7 @@
|
||||
#include <transformations/common_optimizations/transpose_sinking.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <ngraph_functions/utils/ngraph_helpers.hpp>
|
||||
#include <openvino/core/preprocess/pre_post_process.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
@ -278,4 +279,66 @@ TEST_F(TransformationTestsF, TransposeReduceNegative) {
|
||||
function = std::make_shared<ngraph::Function>(ngraph::NodeVector{ sub }, ngraph::ParameterVector{ input });
|
||||
manager.register_pass<ngraph::pass::TransposeReduction>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, TransposeConvert) {
|
||||
{
|
||||
auto input = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, 640, 20, 2, 2 });
|
||||
auto order = ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{ 6 }, { 0, 5, 1, 2, 3, 4 });
|
||||
auto transpose = std::make_shared<ngraph::opset6::Transpose>(input, order);
|
||||
auto convert = std::make_shared<ngraph::opset6::Convert>(transpose, element::f16);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(ngraph::NodeVector{ convert }, ngraph::ParameterVector{ input });
|
||||
manager.register_pass<ngraph::pass::TransposeConvert>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, 640, 20, 2, 2 });
|
||||
auto convert = std::make_shared<ngraph::opset6::Convert>(input, element::f16);
|
||||
auto order = ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{ 6 }, { 0, 5, 1, 2, 3, 4 });
|
||||
auto transpose = std::make_shared<ngraph::opset6::Transpose>(convert, order);
|
||||
|
||||
function_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ transpose }, ngraph::ParameterVector{ input });
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, TransposeConvertNegativeConsumers) {
|
||||
{
|
||||
auto input = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 2, 640, 20, 2, 2 });
|
||||
auto order = ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{ 6 }, { 0, 5, 1, 2, 3, 4 });
|
||||
auto transpose = std::make_shared<ngraph::opset6::Transpose>(input, order);
|
||||
auto convert = std::make_shared<ngraph::opset6::Convert>(transpose, element::f16);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(ngraph::NodeVector{ convert, transpose }, ngraph::ParameterVector{ input });
|
||||
manager.register_pass<ngraph::pass::TransposeConvert>();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, TransposePreProcessing) {
|
||||
{
|
||||
auto input = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 3, 64 });
|
||||
auto order = ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{ 3 }, { 2, 1, 0 });
|
||||
auto transpose = std::make_shared<ngraph::opset6::Transpose>(input, order);
|
||||
auto relu = std::make_shared<ngraph::opset6::Relu>(transpose);
|
||||
|
||||
function = std::make_shared<ngraph::Function>(ngraph::NodeVector{ relu }, ngraph::ParameterVector{ input });
|
||||
|
||||
using namespace ov::preprocess;
|
||||
PrePostProcessor p(function);
|
||||
p.input(0).tensor().set_element_type(element::f16);
|
||||
p.input(0).preprocess().convert_layout({2, 0, 1});
|
||||
p.build();
|
||||
|
||||
manager.register_pass<ngraph::pass::TransposeSinking>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f16, ngraph::Shape{ 3, 64, 1 });
|
||||
auto convert = std::make_shared<ngraph::opset6::Convert>(input, element::f32);
|
||||
auto order = ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{ 3 }, { 1, 0, 2 });
|
||||
auto transpose = std::make_shared<ngraph::opset6::Transpose>(convert, order);
|
||||
auto relu = std::make_shared<ngraph::opset6::Relu>(transpose);
|
||||
|
||||
function_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ relu }, ngraph::ParameterVector{ input });
|
||||
}
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*Auto.*Behavior.*ExecutableNetworkBaseTest.*canLoadCorrectNetworkToGetExecutableWithIncorrectConfig.*)",
|
||||
R"(.*(Auto|Multi).*Behavior.*CorrectConfigAPITests.*CanSetExclusiveAsyncRequests.*)",
|
||||
R"(.*(Auto|Multi).*Behavior.*IncorrectConfigTests.*CanNotLoadNetworkWithIncorrectConfig.*)",
|
||||
R"(.*OVExecutableNetworkBaseTest.*(CanGetInputsInfoAndCheck|CanSetConfigToExecNet|canLoadCorrectNetworkToGetExecutableWithIncorrectConfig).*)",
|
||||
R"(.*OVExecutableNetworkBaseTest.*(CanGetInputsInfoAndCheck|CanSetConfigToExecNet).*)",
|
||||
R"(.*Behavior.*CorrectConfigCheck.*(canSetConfigAndCheckGetConfig|canSetConfigTwiceAndCheckGetConfig).*CPU_BIND_THREAD=YES.*)",
|
||||
// TODO: 56520 Accuracy mismatch
|
||||
R"(.*ReduceOpsLayerTest.*type=Mean_.*netPRC=(I64|I32).*)",
|
||||
@ -95,7 +95,6 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*(Auto|Multi).*Behavior.*ExecutableNetworkBaseTest.*CheckExecGraphInfoSerialization.*)",
|
||||
R"(.*Behavior.*ExecutableNetworkBaseTest.*canExport.*)",
|
||||
R"(.*Behavior.*ExecutableNetworkBaseTest.*canSetConfigToExecNetWithIncorrectConfig.*)",
|
||||
R"(.*OVExecutableNetworkBaseTest.*canLoadCorrectNetworkToGetExecutableWithIncorrectConfig.*)",
|
||||
R"(.*Hetero.*Behavior.*ExecutableNetworkBaseTest.*ExecGraphInfo.*)",
|
||||
R"(.*Hetero.*Behavior.*ExecutableNetworkBaseTest.*CanCreateTwoExeNetworksAndCheckFunction.*)",
|
||||
|
||||
|
@ -126,8 +126,7 @@ TEST_P(ActivationLayerCPUTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
run();
|
||||
// TODO: Should be uncommented after updating the CheckPluginRelatedResults() method
|
||||
// CheckPluginRelatedResults(executableNetwork, "Eltwise");
|
||||
CheckPluginRelatedResults(executableNetwork, "Eltwise");
|
||||
}
|
||||
|
||||
|
||||
@ -163,8 +162,7 @@ std::vector<std::vector<ov::Shape>> basic4D = {
|
||||
};
|
||||
|
||||
std::vector<Precision> netPrc = {
|
||||
// TODO: Should be uncommented after PR #8339 merge
|
||||
// Precision::BF16
|
||||
Precision::BF16,
|
||||
Precision::FP32
|
||||
};
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user