Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Steve Yoo 2021-07-08 08:15:46 +09:00
commit 54182c03b2
220 changed files with 8946 additions and 1710 deletions

View File

@ -1,5 +1,20 @@
jobs:
- job: nGraph_ONNX_Lin
- job: OpenVINO_ONNX_CI
strategy:
matrix:
Release:
BUILD_TYPE: 'Release'
PROTOBUF_LITE: 'OFF'
TOX_COMMAND: 'tox && tox -e zoo_models'
Debug:
BUILD_TYPE: 'Debug'
PROTOBUF_LITE: 'OFF'
TOX_COMMAND: 'tox'
Protobuf_lite:
BUILD_TYPE: 'Release'
PROTOBUF_LITE: 'ON'
TOX_COMMAND: 'tox && tox -e zoo_models'
maxParallel: 3
# About 300% of total time
timeoutInMinutes: 90
@ -12,7 +27,6 @@ jobs:
VSTS_HTTP_RETRY: 5
VSTS_HTTP_TIMEOUT: 200
WORKERS_NUMBER: 8
BUILD_TYPE: Release
REPO_DIR: $(Build.Repository.LocalPath)
WORK_DIR: $(Pipeline.Workspace)/_w
MODELS_DIR: /mount/cinfsshare/onnxtestdata
@ -54,31 +68,16 @@ jobs:
submodules: recursive
path: openvino
- script: docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile .
displayName: 'Docker build'
- script: docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg PROTOBUF_LITE=$(PROTOBUF_LITE) .
displayName: 'Docker build $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)'
- script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(TMP_DIR) -o -s "$(ONNX_MODEL_ZOO_SHA)"
displayName: 'Get models'
- script: |
##wget -O "$(TMP_DIR)/msft.zip" https://onnxruntimetestdata.blob.core.windows.net/models/20191107.zip
##unzip "$(TMP_DIR)/msft.zip" -d "$(MODELS_DIR)/msft"
#unzip "/mnt/onnxtestdata/models/20191107.zip" -d "$(MODELS_DIR)/msft"
#mv $(MODELS_DIR)/msft/opset9/LSTM_Seq_lens_unpacked/seq_lens_sorted $(MODELS_DIR)/msft/opset9/LSTM_Seq_lens_unpacked/test_data_set_0
#mv $(MODELS_DIR)/msft/opset9/LSTM_Seq_lens_unpacked/seq_lens_unsorted $(MODELS_DIR)/msft/opset9/LSTM_Seq_lens_unpacked/test_data_set_1
displayName: 'Get MSFT models'
enabled: false
- script: |
ls -alR $(MODELS_DIR)
ls -alR $(TMP_DIR)
displayName: 'List models'
enabled: false
condition: ne(variables['BUILD_TYPE'], 'Debug')
- script: sudo fallocate -l 48G /swapfile ; sudo mkswap /swapfile ; sudo swapon /swapfile ; df ; free -h
displayName: 'Create swap'
- script: |
docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "tox && tox -e zoo_models"
displayName: 'Docker run'
docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "$(TOX_COMMAND)"
displayName: 'Docker run $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)'

14
.gitmodules vendored
View File

@ -37,4 +37,16 @@
[submodule "thirdparty/ocl/clhpp_headers"]
path = thirdparty/ocl/clhpp_headers
url = https://github.com/KhronosGroup/OpenCL-CLHPP.git
ignore = dirty
ignore = dirty
[submodule "thirdparty/onnx"]
path = thirdparty/onnx/onnx
url = https://github.com/openvinotoolkit/onnx.git
[submodule "thirdparty/protobuf"]
path = thirdparty/protobuf/protobuf
url = https://github.com/protocolbuffers/protobuf.git
[submodule "ngraph/python/pybind11"]
path = ngraph/python/pybind11
url = https://github.com/pybind/pybind11.git
[submodule "thirdparty/ittapi/ittapi"]
path = thirdparty/ittapi/ittapi
url = https://github.com/intel/ittapi.git

View File

@ -63,12 +63,6 @@ function(build_ngraph)
ngraph_set(NGRAPH_PDPD_FRONTEND_ENABLE OFF)
endif()
if(ENABLE_PYTHON)
ngraph_set(NGRAPH_PYTHON_BUILD_ENABLE ON)
else()
ngraph_set(NGRAPH_PYTHON_BUILD_ENABLE OFF)
endif()
if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
ie_add_compiler_flags(-Wno-error=uninitialized -Wno-error=literal-conversion)
elseif(UNIX)

View File

@ -32,12 +32,12 @@ if(COMMAND get_linux_name)
endif()
if(CMAKE_CROSSCOMPILING AND CMAKE_HOST_SYSTEM_NAME MATCHES Linux AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(protoc_version "3.7.1")
set(protoc_version "3.9.2")
RESOLVE_DEPENDENCY(SYSTEM_PROTOC_ROOT
ARCHIVE_LIN "protoc-${protoc_version}-linux-x86_64.tar.gz"
TARGET_PATH "${TEMP}/protoc-${protoc_version}-linux-x86_64"
SHA256 "a1bedd5c05ca51e49f8f254faa3d7331e05b3a806c151fb111d582f154d0fee8"
SHA256 "1d6da1d97d0cbfcd333558afe24533eb3cb48dc1e0ab5e971aa1e50ede8bcf45"
)
debug_message(STATUS "host protoc-${protoc_version} root path = " ${SYSTEM_PROTOC_ROOT})

View File

@ -249,6 +249,25 @@ function(ie_mark_target_as_cc TARGET_NAME)
set_source_files_properties(${sources} PROPERTIES OBJECT_DEPENDS ${GENERATED_HEADER})
endfunction()
# check python package
function(ie_check_pip_package name message_type)
find_package(PythonInterp 3 REQUIRED)
execute_process(
COMMAND ${PYTHON_EXECUTABLE} -m pip show ${name}
RESULT_VARIABLE PIP_EXIT_CODE
OUTPUT_QUIET
)
if(NOT PIP_EXIT_CODE EQUAL 0)
set(${name}_FOUND OFF PARENT_SCOPE)
message(${message_type} "${name} package is not installed. Please use \"${PYTHON_EXECUTABLE} -m pip install ${name}\".")
else()
set(${name}_FOUND ON PARENT_SCOPE)
endif()
endfunction()
# Code style utils
include(cpplint/cpplint)

View File

@ -88,9 +88,12 @@ function(_ie_add_api_validator_post_build_step)
macro(api_validator_get_target_name)
get_target_property(IS_IMPORTED ${target} IMPORTED)
get_target_property(orig_target ${target} ALIASED_TARGET)
if(IS_IMPORTED)
get_target_property(target_location ${target} LOCATION)
get_filename_component(target_name "${target_location}" NAME_WE)
elseif(TARGET "${orig_target}")
set(target_name ${orig_target})
else()
set(target_name ${target})
endif()

View File

@ -5,13 +5,36 @@
include(CheckCXXCompilerFlag)
if (ENABLE_SANITIZER)
set(SANITIZER_COMPILER_FLAGS "-g -fsanitize=address -fno-omit-frame-pointer")
CHECK_CXX_COMPILER_FLAG("-fsanitize-recover=address" SANITIZE_RECOVER_SUPPORTED)
if (SANITIZE_RECOVER_SUPPORTED)
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=address")
CHECK_CXX_COMPILER_FLAG("-fsanitize-recover=address" SANITIZE_RECOVER_ADDRESS_SUPPORTED)
if (SANITIZE_RECOVER_ADDRESS_SUPPORTED)
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=address")
endif()
set(SANITIZER_LINKER_FLAGS "-fsanitize=address")
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=address")
endif()
if (ENABLE_UB_SANITIZER)
# TODO: Remove -fno-sanitize=null as thirdparty/ocl/clhpp_headers UBSAN compatibility resolved:
# https://github.com/KhronosGroup/OpenCL-CLHPP/issues/17
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=undefined -fno-sanitize=null")
CHECK_CXX_COMPILER_FLAG("-fsanitize-recover=undefined" SANITIZE_RECOVER_UNDEFINED_SUPPORTED)
if (SANITIZE_RECOVER_UNDEFINED_SUPPORTED)
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=undefined")
endif()
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=undefined")
endif()
if (ENABLE_THREAD_SANITIZER)
set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=thread")
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=thread")
endif()
# common sanitizer options
if (DEFINED SANITIZER_COMPILER_FLAGS)
# ensure sumbols are present
set(SANITIZER_COMPILER_FLAGS "-g -fno-omit-frame-pointer")
# prevent unloading libraries at runtime, so sanitizer can resolve their symbols
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -Wl,-z,nodelete")
@ -28,23 +51,4 @@ if (ENABLE_SANITIZER)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${SANITIZER_LINKER_FLAGS}")
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${SANITIZER_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${SANITIZER_LINKER_FLAGS}")
endif()
if (ENABLE_THREAD_SANITIZER)
set(SANITIZER_COMPILER_FLAGS "-g -fsanitize=thread -fno-omit-frame-pointer")
set(SANITIZER_LINKER_FLAGS "-fsanitize=thread")
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -Wl,-z,nodelete")
if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$" AND NOT WIN32)
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0)
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld")
else()
set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -static-libsan")
endif()
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${SANITIZER_LINKER_FLAGS}")
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${SANITIZER_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${SANITIZER_LINKER_FLAGS}")
endif()
endif()

View File

@ -20,10 +20,12 @@ endif()
# FIXME: ARM cross-compiler generates several "false positive" warnings regarding __builtin_memcpy buffer overflow
ie_dependent_option (TREAT_WARNING_AS_ERROR "Treat build warnings as errors" ON "X86 OR X86_64" OFF)
ie_option (ENABLE_INTEGRITYCHECK "build DLLs with /INTEGRITYCHECK flag" OFF)
ie_dependent_option (ENABLE_INTEGRITYCHECK "build DLLs with /INTEGRITYCHECK flag" OFF "CMAKE_CXX_COMPILER_ID STREQUAL MSVC" OFF)
ie_option (ENABLE_SANITIZER "enable checking memory errors via AddressSanitizer" OFF)
ie_option (ENABLE_UB_SANITIZER "enable UndefinedBahavior sanitizer" OFF)
ie_option (ENABLE_THREAD_SANITIZER "enable checking data races via ThreadSanitizer" OFF)
ie_dependent_option (ENABLE_COVERAGE "enable code coverage" OFF "CMAKE_CXX_COMPILER_ID STREQUAL GNU" OFF)

View File

@ -13,7 +13,8 @@ set_and_check(IE_MAIN_SOURCE_DIR "@IE_MAIN_SOURCE_DIR@") # HDDL
# Variables to export in plugin's projects
set(ie_options "@IE_OPTIONS@;CMAKE_BUILD_TYPE;CMAKE_SKIP_RPATH")
set(ie_options "@IE_OPTIONS@;CMAKE_BUILD_TYPE;CMAKE_SKIP_RPATH;")
list(APPEND ie_options CMAKE_CXX_COMPILER_LAUNCHER CMAKE_C_COMPILER_LAUNCHER)
file(TO_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}" cache_path)
message(STATUS "The following CMake options are exported from Inference Engine Developer package")

View File

@ -0,0 +1,50 @@
#
# Copyright 2020 Intel Corporation.
#
# LEGAL NOTICE: Your use of this software and any required dependent software
# (the "Software Package") is subject to the terms and conditions of
# the Intel(R) OpenVINO(TM) Distribution License for the Software Package,
# which may also include notices, disclaimers, or license terms for
# third party or open source software included in or with the Software Package,
# and your use indicates your acceptance of all such terms. Please refer
# to the "third-party-programs.txt" or other similarly-named text file
# included with the Software Package for additional details.
#
if(DEFINED OECORE_BASE_DIR)
# OECORE_BASE_DIR was passed via CMake command line, nothing to do
elseif(DEFINED ENV{OECORE_BASE_DIR})
# User sets OECORE_BASE_DIR environment variable
set(OECORE_BASE_DIR $ENV{OECORE_BASE_DIR})
elseif(DEFINED ENV{OECORE_NATIVE_SYSROOT})
# OECORE_NATIVE_SYSROOT is a default environment variable for the OECore toolchain
set(OECORE_BASE_DIR "$ENV{OECORE_NATIVE_SYSROOT}/../..")
else()
# Use default value
set(OECORE_BASE_DIR "/usr/local/oecore-x86_64")
endif()
set(OECORE_TARGET_NAME "aarch64-ese-linux")
set(OECORE_TARGET_SYSROOT "${OECORE_BASE_DIR}/sysroots/${OECORE_TARGET_NAME}")
set(OECORE_HOST_SYSROOT "${OECORE_BASE_DIR}/sysroots/x86_64-esesdk-linux")
set(OECORE_HOST_COMPILER_BIN_DIR "${OECORE_HOST_SYSROOT}/usr/bin/${OECORE_TARGET_NAME}")
set(CMAKE_SYSTEM_NAME "Linux")
set(CMAKE_SYSTEM_PROCESSOR "aarch64")
set(CMAKE_SYSROOT "${OECORE_TARGET_SYSROOT}")
set(CMAKE_C_COMPILER "${OECORE_HOST_COMPILER_BIN_DIR}/aarch64-ese-linux-gcc")
set(CMAKE_CXX_COMPILER "${OECORE_HOST_COMPILER_BIN_DIR}/aarch64-ese-linux-g++")
set(CMAKE_C_FLAGS_INIT "-mcpu=cortex-a53 -mtune=cortex-a53 --sysroot=${OECORE_TARGET_SYSROOT}")
set(CMAKE_CXX_FLAGS_INIT "-mcpu=cortex-a53 -mtune=cortex-a53 --sysroot=${OECORE_TARGET_SYSROOT}")
set(CMAKE_EXE_LINKER_FLAGS_INIT "-Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed --sysroot=${OECORE_TARGET_SYSROOT}")
set(CMAKE_SHARED_LINKER_FLAGS_INIT "-Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed --sysroot=${OECORE_TARGET_SYSROOT}")
set(CMAKE_MODULE_LINKER_FLAGS_INIT "-Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed --sysroot=${OECORE_TARGET_SYSROOT}")
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)

View File

@ -48,7 +48,6 @@ if(NOT ENABLE_DOCKER)
LIBRARY DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT tests EXCLUDE_FROM_ALL)
endif()
set(LINKCHECKER_PY "" CACHE FILEPATH "Path to linkchecker.py for documentation check")
set(OMZ_DOCS_DIR "" CACHE PATH "Path to open_model_zoo documentation")
set(WORKBENCH_DOCS_DIR "" CACHE PATH "Path to workbench documentation")
set(POT_DOCS_DIR "" CACHE PATH "Path to post-training-compression-tool documentation")
@ -56,18 +55,14 @@ set(GST_DOCS_DIR "" CACHE PATH "Path to gst-video-analytics documentation")
function(build_docs)
find_package(Doxygen REQUIRED dot)
find_package(PythonInterp 3 REQUIRED)
find_package(LATEX REQUIRED)
execute_process(
COMMAND ${PYTHON_EXECUTABLE} -m pip show lxml
RESULT_VARIABLE PIP_EXIT_CODE
OUTPUT_QUIET
)
ie_check_pip_package(lxml FATAL_ERROR)
ie_check_pip_package(LinkChecker WARNING)
if (NOT ${PIP_EXIT_CODE} EQUAL 0)
message(FATAL_ERROR "lxml package is not installed. Please use \"pip install lxml\".")
endif()
find_host_program(LINKCHECKER_PY
NAMES linkchecker
DOC "linkchecker tools for documentation check")
set(DOCS_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}")
set(DOXYGEN_DIR "${OpenVINO_SOURCE_DIR}/docs/doxygen")
@ -357,7 +352,7 @@ function(build_docs)
if(EXISTS "${LINKCHECKER_PY}")
add_custom_target(docs_check
COMMAND ${PYTHON_EXECUTABLE} "${LINKCHECKER_PY}" -v "${DOCS_BUILD_DIR}/html/"
COMMAND "${LINKCHECKER_PY}" -v "${DOCS_BUILD_DIR}/html/"
COMMENT "Check links in generated documentation"
WORKING_DIRECTORY "${DOCS_BUILD_DIR}"
VERBATIM)

View File

@ -210,11 +210,6 @@ It's allowed to specify additional build options (e.g. to build CMake project on
### Run Your Application
> **NOTE**: Before running, make sure you completed **Set the Environment Variables** section in [OpenVINO Installation](../../inference-engine/samples/hello_nv12_input_classification/README.md) document so that the application can find the libraries.
To run compiled applications on Microsoft* Windows* OS, make sure that Microsoft* Visual C++ 2017
Redistributable and Intel® C++ Compiler 2017 Redistributable packages are installed and
`<INSTALL_DIR>/bin/intel64/Release/*.dll` files are placed to the
application folder or accessible via `%PATH%` environment variable.
Before running, make sure you completed **Set the Environment Variables** section in [OpenVINO Installation](../../inference-engine/samples/hello_nv12_input_classification/README.md) document so that the application can find the libraries.
[integration_process]: img/integration_process.png

View File

@ -1,6 +1,5 @@
openvino/inference-engine/samples/hello_reshape_ssd/README.md
openvino/docs/index.md
inference-engine/include/ie_icnn_network.hpp
openvino/docs/get_started/get_started_dl_workbench.md
openvino/docs/get_started/get_started_linux.md
openvino/docs/get_started/get_started_raspbian.md
@ -11,25 +10,14 @@ openvino/docs/install_guides/deployment-manager-tool.md
openvino/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md
openvino/docs/ovsa/ovsa_get_started.md
openvino/inference-engine/ie_bridges/c/docs/api_overview.md
inference-engine/include/cpp/ie_infer_request.hpp
inference-engine/include/ie_parallel.hpp
inference-engine/include/gpu/gpu_context_api_ocl.hpp
inference-engine/include/gpu/gpu_context_api_va.hpp
inference-engine/include/ie_plugin_config.hpp
inference-engine/include/ie_unicode.hpp
inference-engine/include/vpu/myriad_config.hpp
inference-engine/include/vpu/vpu_config.hpp
inference-engine/include/vpu/vpu_plugin_config.hpp
openvino/docs/benchmarks/performance_int8_vs_fp32.md
openvino/docs/get_started/get_started_macos.md
openvino/docs/optimization_guide/dldt_optimization_guide.md
openvino/docs/IE_DG/ShapeInference.md
inference-engine/include/details/ie_so_pointer.hpp
inference-engine/include/ie_compound_blob.h
inference-engine/include/ie_data.h
inference-engine/include/ie_blob.h
inference-engine/include/ie_precision.hpp
inference-engine/include/ie_remote_context.hpp
inference-engine/include/gpu/gpu_context_api_dx.hpp
build/docs/openvino_docs.xml
openvino/docs/install_guides/installing-openvino-linux-ivad-vpu.md
openvino/docs/install_guides/installing-openvino-linux-ivad-vpu.md
inference-engine/include/ie_parallel.hpp
inference-engine/include/ie_plugin_config.hpp
inference-engine/include/vpu/myriad_config.hpp
inference-engine/include/vpu/vpu_config.hpp
inference-engine/include/vpu/vpu_plugin_config.hpp

View File

@ -913,12 +913,14 @@ EXCLUDE_SYMBOLS = InferenceEngine::details \
DECLARE_*METRIC_KEY \
DECLARE_*METRIC_VALUE \
DECLARE_*CONFIG_KEY \
DECLARE_VPU_CONFIG \
VPU_CONFIG_KEY \
VPU_CONFIG_VALUE \
VPU_METRIC \
DECLARE_*CONFIG_VALUE \
DECLARE_PARAM_KEY_IMPL \
TBB_PREVIEW_LOCAL_OBSERVER \
PARTITIONING \
CALL_STATUS_FNC* \
CALL_FNC* \
__PRETTY_FUNCTION__ \
PRINT_COLOR_FORMAT \
PRINT_LAYOUT \
@ -943,6 +945,8 @@ EXCLUDE_SYMBOLS = InferenceEngine::details \
InferenceEngine::parallel_* \
NOMINMAX \
TBB_PREVIEW_NUMA_SUPPORT \
TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION \
_TBB_REDUCE_FUNC \
IE_THREAD_*
# The EXAMPLE_PATH tag can be used to specify one or more files or directories

View File

@ -6,31 +6,27 @@
**Short description**: *Atan* performs element-wise inverse tangent (arctangent) operation with given tensor.
**Attributes**:
No attributes available.
**Inputs**
* **1**: An tensor of type *T*. **Required.**
**Outputs**
* **1**: The result of element-wise atan operation. A tensor of type *T*.
**Types**
* *T*: any numeric type.
*atan* does the following with the input tensor *a*:
**Detailed description**: Operation takes one input tensor and performs the element-wise inverse tangent function on a given input tensor, based on the following mathematical formula:
\f[
a_{i} = atan(a_{i})
\f]
**Examples**
**Attributes**: *Atan* operation has no attributes.
*Example 1*
**Inputs**
* **1**: A tensor of type *T* and arbitrary shape. **Required.**
**Outputs**
* **1**: The result of element-wise *Atan* applied to the input tensor. A tensor of type *T* and same shape as the input tensor.
**Types**
* *T*: any supported numeric type.
**Examples**
```xml
<layer ... type="Atan">

View File

@ -4,11 +4,15 @@
**Category**: Arithmetic unary operation
**Short description**: *Sinh* performs element-wise hyperbolic sine (sinh) operation with given tensor.
**Short description**: *Sinh* performs element-wise hyperbolic sine (sinh) operation on a given input tensor
**Attributes**:
**Detailed description**: *Sinh* performs element-wise hyperbolic sine (sinh) operation on a given input tensor, based on the following mathematical formula:
No attributes available.
\f[
a_{i} = sinh(a_{i})
\f]
**Attributes**: *Sinh* operation has no attributes.
**Inputs**
@ -16,21 +20,13 @@
**Outputs**
* **1**: The result of element-wise sinh operation. A tensor of type *T*.
* **1**: The result of element-wise *Sinh* operation applied to the input tensor. A tensor of type *T* and the same shape as input tensor.
**Types**
* *T*: any numeric type.
* *T*: any supported numeric type.
*sinh* does the following with the input tensor *a*:
\f[
a_{i} = sinh(a_{i})
\f]
**Examples**
*Example 1*
**Example**
```xml
<layer ... type="Sinh">

View File

@ -44,7 +44,7 @@ Output(i,j,k) = max(Input[d_{start}:d_{end}, h_{start}:h_{end}, w_{start}:w_{end
**Outputs**:
* **1**: Output of type *T* and shape `[N, C, H_out]`, `[N, C, H_out, W_out]` or `[N, C, D_out, H_out, W_out]`.
* **2**: Output of type specified by *index_element_type* and same shape as the first output containing indices of elements in the first output. The values of indices are computed as if input was flatten 1-D tensor, so the values are in the range `[0, N * C * H * W * D)`.
* **2**: Output of type specified by *index_element_type* and same shape as the first output containing indices of elements in the first output. The values of indices are computed as if input spatial dimensions were flatten, so the values are in the range `[0, H * W * D)`.
**Types**

View File

@ -235,9 +235,8 @@ IEStatusCode ie_core_create(const char *xml_config_file, ie_core_t **core) {
IEStatusCode status = IEStatusCode::OK;
try {
std::unique_ptr<ie_core_t> tmp(new ie_core_t);
tmp->object = IE::Core(xml_config_file);
*core = tmp.release();
auto object = IE::Core(xml_config_file);
*core = new ie_core_t { std::move(object) };
} CATCH_IE_EXCEPTIONS
return status;

View File

@ -68,7 +68,7 @@ if(ENABLE_WHEEL)
add_subdirectory(wheel)
endif()
if (NGRAPH_PYTHON_BUILD_ENABLE)
if(TARGET _pyngraph)
add_dependencies(ie_api _pyngraph)
endif()

View File

@ -61,11 +61,6 @@ sudo apt install patchelf
-DENABLE_PYTHON=ON
-DENABLE_WHEEL=ON
```
If you need to include other components to the package you need to enable them too.
For example, to include ngraph python API:
```shellscript
-NGRAPH_PYTHON_BUILD_ENABLE=ON
```
## Running sample

View File

@ -66,7 +66,6 @@ public:
* This method need to be called to find out input names for using them later
* when calling InferenceEngine::InferRequest::SetBlob
*
* @param inputs Reference to InferenceEngine::ConstInputsDataMap object.
* @return A collection that contains string as key, and const InputInfo smart pointer as value
*/
ConstInputsDataMap GetInputsInfo() const;

View File

@ -235,6 +235,9 @@ public:
bool operator==(const InferRequest&) const noexcept;
};
/**
* @private
*/
template<>
struct InferRequest::SetCallback<std::function<void(InferRequest, StatusCode)>> {
void operator()(std::function<void(InferRequest, StatusCode)> f) {
@ -245,6 +248,9 @@ struct InferRequest::SetCallback<std::function<void(InferRequest, StatusCode)>>
IE_SUPPRESS_DEPRECATED_START
/**
* @private
*/
template<>
struct InferRequest::SetCallback<IInferRequest::CompletionCallback> {
void operator()(IInferRequest::CompletionCallback f) {

View File

@ -106,8 +106,8 @@ public:
}
/**
* @brief Returns plane ID of underlying video decoder surface,
* or 0 if no video surface was shared.
* @brief Returns plane ID of underlying video decoder surface, or 0 if no video surface was shared.
* @return Plane ID
*/
uint32_t plane() {
return _ObjFromParams<uint32_t, uint32_t>(getParams(),

View File

@ -39,6 +39,7 @@ public:
/**
* @brief Returns the underlying OpenCL context handle.
* @return `cl_context`
*/
cl_context get() {
return _ObjFromParams<cl_context, gpu_handle_param>(getParams(), GPU_PARAM_KEY(OCL_CONTEXT),
@ -47,7 +48,7 @@ public:
/**
* @brief OpenCL context handle conversion operator for the ClContext object.
* @return Underlying OpenCL context handle
* @return `cl_context`
*/
operator cl_context() {
return get();
@ -55,7 +56,7 @@ public:
/**
* @brief Standard Khronos cl::Context wrapper conversion operator for the ClContext object.
* @return cl::Context object
* @return `cl::Context` object
*/
operator cl::Context() {
return cl::Context(get(), true);
@ -101,6 +102,7 @@ public:
/**
* @brief Returns the underlying OpenCL memory object handle.
* @return underlying OpenCL memory object handle
*/
cl_mem get() {
return _ObjFromParams<cl_mem, gpu_handle_param>(getParams(), GPU_PARAM_KEY(MEM_HANDLE),
@ -109,6 +111,7 @@ public:
/**
* @brief OpenCL memory handle conversion operator.
* @return `cl_mem`
*/
operator cl_mem() {
return get();
@ -116,7 +119,7 @@ public:
/**
* @brief Standard Khronos cl::Buffer wrapper conversion operator.
* @return cl::Buffer object
* @return `cl::Buffer` object
*/
operator cl::Buffer() {
return cl::Buffer(get(), true);
@ -144,6 +147,7 @@ public:
/**
* @brief Returns the underlying OpenCL memory object handle.
* @return `cl_mem`
*/
cl_mem get() {
return _ObjFromParams<cl_mem, gpu_handle_param>(getParams(), GPU_PARAM_KEY(MEM_HANDLE),
@ -152,6 +156,7 @@ public:
/**
* @brief OpenCL memory handle conversion operator.
* @return `cl_mem`
*/
operator cl_mem() {
return get();
@ -159,7 +164,7 @@ public:
/**
* @brief Standard Khronos cl::Image2D wrapper conversion operator for the ClContext object.
* @return cl::Image2D object
* @return `cl::Image2D` object
*/
operator cl::Image2D() {
return cl::Image2D(get(), true);
@ -269,7 +274,7 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::
* @brief This function is used to obtain remote blob object from user-supplied cl::Image2D wrapper object
* @param desc A tensor descriptor object representing remote blob configuration
* @param ctx A remote context used to create remote blob
* @param buffer A cl::Image2D object wrapped by a remote blob
* @param image A cl::Image2D object wrapped by a remote blob
* @return A remote blob instance
*/
static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx, cl::Image2D& image) {

View File

@ -36,8 +36,8 @@ public:
using Ptr = std::shared_ptr<VAContext>;
/**
* @brief VADisplay conversion operator for the VAContext object.
* @return Underlying VADisplay object handle
* @brief `VADisplay` conversion operator for the VAContext object.
* @return Underlying `VADisplay` object handle
*/
operator VADisplay() {
return _ObjFromParams<VADisplay, gpu_handle_param>(getParams(),
@ -67,7 +67,7 @@ public:
/**
* @brief VASurfaceID conversion operator for the VASurfaceBlob object.
* @return VA surface handle
* @return `VASurfaceID` handle
*/
operator VASurfaceID() {
return _ObjFromParams<VASurfaceID, uint32_t>(getParams(),
@ -77,6 +77,7 @@ public:
/**
* @brief Returns plane ID of underlying video decoder surface
* @return Plane ID
*/
uint32_t plane() {
return _ObjFromParams<uint32_t, uint32_t>(getParams(),
@ -86,11 +87,16 @@ public:
};
/**
* @brief This function is used to obtain a NV12 compound blob object from NV12 VA decoder output.
* The resulting compound contains two remote blobs for Y and UV planes of the surface.
*/
* @brief This function is used to obtain a NV12 compound blob object from NV12 VA decoder output.
* The resulting compound contains two remote blobs for Y and UV planes of the surface.
* @param height A height of Y plane
* @param width A width of Y plane
* @param ctx A remote context instance
* @param nv12_surf NV12 `VASurfaceID` to create NV12 from
* @return A remote NV12 blob wrapping `VASurfaceID`
*/
static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, RemoteContext::Ptr ctx, VASurfaceID nv12_surf) {
// despite of layout, blob dimensions always follow in N,C,H,W order
// despite of layout, blob dimensions always follow in N, C, H, W order
TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
ParamMap blobParams = {
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) },
@ -107,8 +113,12 @@ static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, Remot
}
/**
* @brief This function is used to obtain remote context object from VA display handle
*/
* @brief This function is used to obtain remote context object from VA display handle
* @param core Inference Engine Core object
* @param deviceName A device name to create a remote context for
* @param device A `VADisplay` to create remote context from
* @return A remote context wrapping `VADisplay`
*/
static inline VAContext::Ptr make_shared_context(Core& core, std::string deviceName, VADisplay device) {
ParamMap contextParams = {
{ GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED) },
@ -118,8 +128,13 @@ static inline VAContext::Ptr make_shared_context(Core& core, std::string deviceN
}
/**
* @brief This function is used to obtain remote blob object from VA surface handle
*/
* @brief This function is used to obtain remote blob object from VA surface handle
* @param desc Tensor descriptor
* @param ctx A remote context instance
* @param surface A `VASurfaceID` to create remote blob from
* @param plane An index of a plane inside `VASurfaceID` to create blob from
* @return A remote blob wrapping `VASurfaceID`
*/
static inline VASurfaceBlob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx, VASurfaceID surface, uint32_t plane = 0) {
auto casted = std::dynamic_pointer_cast<VAContext>(ctx);
if (nullptr == casted) {

View File

@ -304,6 +304,7 @@ public:
/**
* @brief Returns the tensor description
* @return A tensor description
*/
const TensorDesc& getTensorDesc() const noexcept override {
return tensorDesc;
@ -311,6 +312,7 @@ public:
/**
* @brief Returns the tensor description
* @return A tensor description
*/
TensorDesc& getTensorDesc() noexcept override {
return tensorDesc;
@ -395,7 +397,7 @@ public:
*
* @return A LockedMemory object
*/
virtual LockedMemory<void> rwmap()noexcept = 0;
virtual LockedMemory<void> rwmap() noexcept = 0;
/**
* @brief Gets read only access to the memory in virtual space of the process.
@ -419,7 +421,7 @@ public:
*
* @return A LockedMemory object
*/
virtual LockedMemory<const void> rmap()const noexcept = 0;
virtual LockedMemory<const void> rmap() const noexcept = 0;
/**
* @brief Gets "write only direction" access to the memory in virtual space of the process.
@ -446,7 +448,7 @@ public:
*
* @return A LockedMemory object
*/
virtual LockedMemory<void> wmap()noexcept = 0;
virtual LockedMemory<void> wmap() noexcept = 0;
protected:
/**
@ -567,11 +569,6 @@ public:
*/
virtual ~TBlob();
/**
* @brief Gets the size of the given type.
*
* @return Size of the type
*/
size_t element_size() const noexcept override {
return sizeof(T);
}
@ -594,9 +591,6 @@ public:
return std::move(lockme<const T>());
}
/**
* @brief Allocates or reallocates memory
*/
void allocate() noexcept override {
const auto allocator = getAllocator();
const auto rawHandle = allocator->alloc(byteSize());
@ -612,27 +606,14 @@ public:
});
}
/**
* @brief Frees all allocated data
*/
bool deallocate() noexcept override {
return free();
}
/**
* @brief Creates a new LockedMemory instance holding void pointer.
*
* @return LockedMemory instance holding void pointer
*/
LockedMemory<void> buffer() noexcept override {
return std::move(lockme<void>());
}
/**
* @brief Creates a new LockedMemory instance holding constant void pointer.
*
* @return LockedMemory instance holding constant void pointer
*/
LockedMemory<const void> cbuffer() const noexcept override {
return std::move(lockme<const void>());
}
@ -734,6 +715,7 @@ protected:
/**
* @brief Frees handler and cleans up the stored data.
* @return `true` if memory was freed
*/
virtual bool free() {
bool bCanRelease = _handle != nullptr;
@ -753,11 +735,6 @@ protected:
// getTensorDesc().getBlockingDesc().getOffsetPadding());
}
/**
* @brief Gets an allocator or creates a default one.
*
* @return IAllocator instance
*/
const std::shared_ptr<IAllocator>& getAllocator() const noexcept override {
// in case when constructor without allocator was used
if (!_allocator) {
@ -767,9 +744,6 @@ protected:
return _allocator;
}
/**
* @brief Returns handle to the stored data.
*/
void* getHandle() const noexcept override {
return _handle.get();
}

View File

@ -73,16 +73,19 @@ public:
/**
* @brief Always returns an empty LockedMemory object
* @return Empty locked memory
*/
LockedMemory<void> buffer() noexcept override;
/**
* @brief Always returns an empty LockedMemory object
* @return Empty locked memory
*/
LockedMemory<const void> cbuffer() const noexcept override;
/**
* @brief Returns the number of underlying blobs in the compound blob
* @return A number of underlying blobs
*/
size_t size() const noexcept override;
@ -109,9 +112,6 @@ protected:
*/
std::vector<Blob::Ptr> _blobs;
/**
* @brief Returns nullptr as CompoundBlob is not allocator-based
*/
const std::shared_ptr<IAllocator>& getAllocator() const noexcept override;
};
@ -148,21 +148,25 @@ public:
/**
* @brief Returns a shared pointer to Y plane
* @return Y plane
*/
virtual Blob::Ptr& y() noexcept;
/**
* @brief Returns a shared pointer to Y plane
* @return Y plane
*/
virtual const Blob::Ptr& y() const noexcept;
/**
* @brief Returns a shared pointer to UV plane
* @return UV plane
*/
virtual Blob::Ptr& uv() noexcept;
/**
* @brief Returns a shared pointer to UV plane
* @return UV plane
*/
virtual const Blob::Ptr& uv() const noexcept;

View File

@ -93,6 +93,7 @@ public:
/**
* @brief Gets the layout value for this Data instance
* @return Layout
*/
Layout getLayout() const;

View File

@ -264,9 +264,9 @@ DECLARE_CONFIG_VALUE(HYBRID_AWARE);
* (and what is the optimal number of streams)
* - finally, specifying the positive integer value creates the requested number of streams
*/
DECLARE_CONFIG_KEY(CPU_THROUGHPUT_STREAMS);
DECLARE_CONFIG_VALUE(CPU_THROUGHPUT_NUMA);
DECLARE_CONFIG_VALUE(CPU_THROUGHPUT_AUTO);
DECLARE_CONFIG_KEY(CPU_THROUGHPUT_STREAMS);
/**
* @brief The name for setting performance counters option.

View File

@ -91,13 +91,21 @@ public:
precisionInfo.value = CUSTOM;
}
/** @brief Creates custom precision with specific underlined type */
/**
* @brief Creates custom precision with specific underlined type
* @param typeName A string name of precision
* @return Precision converted from string name
*/
template <class T>
static Precision fromType(const char* typeName = nullptr) {
return Precision(8 * sizeof(T), typeName == nullptr ? typeid(T).name() : typeName);
}
/** @brief checks whether given storage class T can be used to store objects of current precision */
/**
* @brief checks whether given storage class T can be used to store objects of current precision
* @param typeName A string name of precision
* @return `true` if `typeName` has underlaying storage type
*/
template <class T>
bool hasStorageType(const char* typeName = nullptr) const noexcept {
try {

View File

@ -46,9 +46,6 @@ public:
*/
explicit RemoteBlob(const TensorDesc& tensorDesc): MemoryBlob(tensorDesc) {}
/**
* @brief Returns the number of bytes per element.
*/
size_t element_size() const noexcept override {
return tensorDesc.getPrecision().size();
}

View File

@ -13,7 +13,7 @@ source_group("src" FILES ${LIBRARY_SRC})
source_group("include" FILES ${LIBRARY_HEADERS})
# Create library file from sources.
add_library(${TARGET_NAME} SHARED EXCLUDE_FROM_ALL ${MAIN_SRC} ${LIBRARY_HEADERS})
add_library(${TARGET_NAME} SHARED ${MAIN_SRC} ${LIBRARY_HEADERS})
# Find OpenCV components if exist
find_package(OpenCV COMPONENTS core imgproc imgcodecs QUIET)
@ -39,4 +39,4 @@ set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}
if(COMMAND add_clang_format_target)
add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
endif()
endif()

View File

@ -78,7 +78,6 @@ void AutoInferRequest::HotSwapRequests() {
InferenceEngine::SoExecutableNetworkInternal tempSoExecNetwork;
if (_autoExecutableNetwork->TryGetActualNetwork(tempSoExecNetwork)) {
_alreadyActualNetwork = true;
std::cout << "!!! DEBUG: HotSwapRequests !!!" << std::endl;
_inferRequest = {tempSoExecNetwork, tempSoExecNetwork->CreateInferRequest()};
_inferRequest->SetCallback(_callback);
}

View File

@ -84,14 +84,11 @@ std::shared_ptr<AutoExecutableNetwork> AutoInferencePlugin::LoadNetworkImpl(cons
[core, modelPath, network](const std::string& device)
-> IE::SoExecutableNetworkInternal {
IE::SoExecutableNetworkInternal executableNetwork;
std::cout << "!!! DEBUG: Starting Async loading to the " << device << " !!!" << std::endl;
std::cout << "!!! DEBUG: device full name: " << core->GetMetric(device, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>() << std::endl;
if (!modelPath.empty()) {
executableNetwork = core->LoadNetwork(modelPath, device, {});
} else {
executableNetwork = core->LoadNetwork(network, device, {});
}
std::cout << "!!! DEBUG: " << device << " was loaded !!!" << std::endl;
return executableNetwork;
};

View File

@ -61,6 +61,7 @@
#include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
#include <transformations/op_conversions/convert_gather_0d.hpp>
#include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
#include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
#include <transformations/convert_precision.hpp>
#include <transformations/init_node_info.hpp>
@ -191,6 +192,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
manager.register_pass<ngraph::pass::ConvertNMS4ToNMS5>();
manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
manager.register_pass<ngraph::pass::ConvertGather0D>();
manager.register_pass<ngraph::pass::ConvertDeformableConv8To1>();
static const precisions_array convert_precision_list {
{ngraph::element::i64, ngraph::element::i32},

View File

@ -208,5 +208,8 @@ REGISTER_FACTORY(v6, MVN);
// ------------------------------ Supported v7 ops ------------------------------ //
REGISTER_FACTORY(v7, Gather);
// ------------------------------ Supported v8 ops ------------------------------ //
REGISTER_FACTORY(v8, Gather);
// --------------------------- Supported internal ops --------------------------- //
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);

View File

@ -57,51 +57,8 @@ static cldnn::gather::gather_axis GetGatherAxis(int32_t axis, cldnn::format inpu
}
}
void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v1::Gather>& op) {
p.ValidateInputs(op, {2, 3});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
int32_t axis = static_cast<int32_t>(op->get_axis());
std::vector<cldnn::primitive_id> reorderedInputs;
reorderedInputs.resize(inputPrimitives.size());
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
if (inputDataType == cldnn::data_types::i64) {
// clDNN primitive does not support i64 inputs,
// so we need additional reorders to convert them to i32
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
auto preprocessPrim = cldnn::reorder(reorderPrimName,
inputPrimitives[portIndex],
targetFormat,
cldnn::data_types::i32);
p.AddPrimitive(preprocessPrim);
p.AddInnerPrimitiveToProfiler(reorderPrimName, layerName, op);
reorderedInputs[portIndex] = reorderPrimName;
} else {
reorderedInputs[portIndex] = inputPrimitives[portIndex];
}
}
auto outLayout = DefaultFormatForDims(op->get_output_shape(0).size());
auto gatherPrim = cldnn::gather(layerName,
reorderedInputs[0],
reorderedInputs[1],
GetGatherAxis(axis, DefaultFormatForDims(op->get_input_shape(0).size())),
outLayout,
CldnnTensorFromIEDims(op->get_output_shape(0)));
p.AddPrimitive(gatherPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v1, Gather);
void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v7::Gather>& op) {
p.ValidateInputs(op, {2, 3, 4});
template <typename T>
void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t batch_dim = 0, bool support_neg_ind = false) {
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
@ -136,11 +93,32 @@ void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v7::Gather>& o
GetGatherAxis(axis, DefaultFormatForDims(op->get_input_shape(0).size())),
outLayout,
CldnnTensorFromIEDims(op->get_output_shape(0)),
op->get_batch_dims());
batch_dim,
support_neg_ind);
p.AddPrimitive(gatherPrim);
p.AddPrimitiveToProfiler(op);
}
void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v1::Gather>& op) {
p.ValidateInputs(op, {2, 3});
CreateGatherOpBase<ngraph::op::v1::Gather>(p, op);
}
REGISTER_FACTORY_IMPL(v1, Gather);
void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v7::Gather>& op) {
p.ValidateInputs(op, {2, 3, 4});
CreateGatherOpBase<ngraph::op::v7::Gather>(p, op, op->get_batch_dims());
}
REGISTER_FACTORY_IMPL(v7, Gather);
void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v8::Gather>& op) {
p.ValidateInputs(op, {2, 3, 4});
CreateGatherOpBase<ngraph::op::v8::Gather>(p, op, op->get_batch_dims(), true);
}
REGISTER_FACTORY_IMPL(v8, Gather);
} // namespace CLDNNPlugin

View File

@ -25,6 +25,7 @@
#include "dnn_types.h"
#include "gna_types.h"
#include "gna_limitations.hpp"
#include "layers/gna_convolution_layer.hpp"
#if GNA_LIB_VER == 2
#include <gna2-model-api.h>
@ -50,6 +51,9 @@
using namespace GNAPluginNS::backend;
using GNAPluginNS::GNAConvolutionLayer::outputFromConv;
using GNAPluginNS::GNAConvolutionLayer::outputFromPooling;
using GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy;
void GNAPluginNS::backend::AMIntelDNN::BeginNewWrite(uint32_t index) {
dump_write_index = index;
@ -152,8 +156,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
uint32_t num_bytes_per_bias,
uint32_t num_filters,
uint32_t num_filter_coefficients,
uint32_t num_feature_map_rows,
uint32_t num_feature_map_columns,
const uint32_t convStride,
float weight_scale_factor,
float output_scale_factor,
void *&ptr_inputs,
@ -177,8 +180,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
comp.op.conv1D.num_bytes_per_bias = num_bytes_per_bias;
comp.op.conv1D.num_filters = num_filters;
comp.op.conv1D.num_filter_coefficients = num_filter_coefficients;
comp.op.conv1D.num_feature_map_rows = num_feature_map_rows;
comp.op.conv1D.num_feature_map_columns = num_feature_map_columns;
comp.op.conv1D.convStride = convStride;
comp.op.conv1D.weight_scale_factor = weight_scale_factor;
comp.output_scale_factor = output_scale_factor;
comp.input_scale_factor = output_scale_factor / weight_scale_factor;
@ -195,18 +197,17 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
ptr_outputs = &comp.ptr_outputs;
}
if (comp.num_columns_in % 8 != 0) {
THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << comp.num_columns_in <<
if (num_columns_in % 8 != 0) {
THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << num_columns_in <<
") is not a multiply by 8";
}
if (comp.op.conv1D.num_filters < GNALimitations::convMinFiltersNum ||
comp.op.conv1D.num_filters > GNALimitations::convMaxFiltersNum ||
comp.op.conv1D.num_filters % GNALimitations::convFiltersNumDivider != 0) {
THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << comp.op.conv1D.num_filters;
if (num_filters < GNALimitations::convMinFiltersNum ||
num_filters > GNALimitations::convMaxFiltersNum ||
num_filters % GNALimitations::convFiltersNumDivider != 0) {
THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << num_filters;
}
auto filter_stride_size = comp.op.conv1D.num_feature_map_columns;
auto max_number_of_out_elements = (comp.num_columns_in - comp.op.conv1D.num_filter_coefficients) / filter_stride_size + 1;
if (comp.num_columns_out / max_number_of_out_elements != comp.op.conv1D.num_filters) {
auto max_number_of_out_elements = outputFromConv(num_columns_in, num_filter_coefficients, convStride);
if (num_columns_out / max_number_of_out_elements != num_filters) {
THROW_GNA_EXCEPTION << "Number of outputs or feature map config is incorrect in Convolutional1DComponent";
}
}
@ -538,8 +539,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteGraphWizModel(const char *filename)
auto &conv = components[k].op.conv1D;
graph << " <TR><TD> num_filters</TD><TD>" << conv.num_filters<< "</TD></TR>\n";
graph << " <TR><TD> num_filter_coefficients</TD><TD>" << conv.num_filter_coefficients<< "</TD></TR>\n";
graph << " <TR><TD> num_feature_map_rows</TD><TD>" << conv.num_feature_map_rows<< "</TD></TR>\n";
graph << " <TR><TD> num_feature_map_columns</TD><TD>" << conv.num_feature_map_columns<< "</TD></TR>\n";
graph << " <TR><TD> conv_stride</TD><TD>" << conv.convStride<< "</TD></TR>\n";
graph << " <TR><TD> wscale</TD><TD>" << conv.weight_scale_factor<< "</TD></TR>\n";
graph << " <TR><TD> wbit</TD><TD>" << conv.num_bytes_per_weight<< "</TD></TR>\n";
graph << " <TR><TD> bbit</TD><TD>" << conv.num_bytes_per_bias<< "</TD></TR>\n";
@ -936,16 +936,14 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
case kDnnConvolutional1dOp: {
uint32_t num_filters = component[i].op.conv1D.num_filters;
uint32_t num_filter_coefficients = component[i].op.conv1D.num_filter_coefficients;
uint32_t num_feature_map_rows = component[i].op.conv1D.num_feature_map_rows;
uint32_t num_feature_map_columns = component[i].op.conv1D.num_feature_map_columns;
const auto convStride = component[i].op.conv1D.convStride;
uint32_t num_bytes_per_weight = component[i].op.conv1D.num_bytes_per_weight;
uint32_t num_bytes_per_bias = component[i].op.conv1D.num_bytes_per_bias;
float weight_scale_factor = component[i].op.conv1D.weight_scale_factor;
float output_scale_factor = component[i].output_scale_factor;
out_file << "<num_filters> " << std::dec << num_filters << "\n";
out_file << "<num_filter_coefficients> " << std::dec << num_filter_coefficients << "\n";
out_file << "<num_feature_map_rows> " << std::dec << num_feature_map_rows << "\n";
out_file << "<num_feature_map_columns> " << std::dec << num_feature_map_columns << "\n";
out_file << "<conv_stride> " << std::dec << convStride << "\n";
if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
out_file << "<num_bytes_per_weight> " << std::dec << 4 << "\n";
out_file << "<num_bytes_per_bias> " << std::dec << 4 << "\n";
@ -1362,35 +1360,6 @@ uint32_t GNAPluginNS::backend::AMIntelDNN::CountLayers() {
return n;
}
namespace {
uint32_t outputFromConv(const uint32_t in, const uint32_t flt, const uint32_t stride) {
// floor[(in - flt)/stride] + 1, GNA Spec 1.24
if (flt > in || flt == 0 || stride == 0) {
THROW_GNA_EXCEPTION << "Invalid (input, filter, stride) = (" << in << "," << flt << "," << stride << ")";
}
return (in - flt) / stride + 1;
}
uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint32_t stride) {
// ceil[(in - window)/stride] + 1, GNA Spec 1.24
if (window > in || window == 0 || stride == 0) {
THROW_GNA_EXCEPTION << "Invalid (input, window, stride) = (" << in << "," << window << "," << stride << ")";
}
if (window == in) return 1;
return (in - window - 1) / stride + 2;
}
uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride) {
// floor[(in - 1)/stride] + 1, GNA 1.0/2.0 HW Spec
if (in == 0 || stride == 0) {
THROW_GNA_EXCEPTION << "Invalid (input, stride) = (" << in << "," << stride << ")";
}
return (in - 1) / stride + 1;
}
} // namespace
#if GNA_LIB_VER == 2
void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel) {
Gna2Operation * gnaOperation;
@ -1593,7 +1562,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
comp.op.conv1D.ptr_biases),
nullptr,
create_shape1D_parameter(
comp.op.conv1D.num_feature_map_columns),
comp.op.conv1D.convStride),
nullptr,
nullptr);
@ -1619,11 +1588,11 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
pConvolutionalLayer->nBytesBias = component[i].op.conv1D.num_bytes_per_bias;
pConvolutionalLayer->nBytesFilterCoefficient = component[i].op.conv1D.num_bytes_per_weight;
pConvolutionalLayer->nFilters = component[i].op.conv1D.num_filters;
pConvolutionalLayer->nFilterRows = comp.op.conv1D.num_filter_coefficients / comp.op.conv1D.num_feature_map_columns;
pConvolutionalLayer->nFilterRows = comp.op.conv1D.num_filter_coefficients / comp.op.conv1D.convStride;
pConvolutionalLayer->nFilterCoefficients = component[i].op.conv1D.num_filter_coefficients;
pConvolutionalLayer->nFeatureMaps = 1;
pConvolutionalLayer->nFeatureMapRows = component[i].op.conv1D.num_feature_map_rows;
pConvolutionalLayer->nFeatureMapColumns = component[i].op.conv1D.num_feature_map_columns;
pConvolutionalLayer->nFeatureMapColumns = component[i].op.conv1D.convStride;
pConvolutionalLayer->nFeatureMapRows = pLayer->nInputColumns / pConvolutionalLayer->nFeatureMapColumns;
pConvolutionalLayer->poolType = INTEL_NO_POOLING; // will be overwritten
pConvolutionalLayer->nPoolSize = 0; // will be overwritten
pConvolutionalLayer->nPoolStride = 0; // will be overwritten
@ -1750,8 +1719,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
auto fltStrideSz = pConvolutionalLayer->nFeatureMaps * pConvolutionalLayer->nFeatureMapColumns; // always move 1 "row"
auto outFromConv = outputFromConv(pLayer->nInputColumns, nFltSize, fltStrideSz);
// FLAT input matrix, pooled outputs per filter
// TODO: Issue 50386 check why (outFromConv - 1) an not (outFromConv - nPoolSize)
pLayer->nOutputColumns = pConvolutionalLayer->nFilters * ((outFromConv - 1) / pConvolutionalLayer->nPoolStride + 1);
pLayer->nOutputColumns = pConvolutionalLayer->nFilters * outputFromPoolingLegacy(outFromConv, pConvolutionalLayer->nPoolStride);
}
#endif
} else {

View File

@ -97,8 +97,7 @@ public:
uint32_t num_bytes_per_bias,
uint32_t num_filters,
uint32_t num_filter_coefficients,
uint32_t num_feature_map_rows,
uint32_t num_feature_map_columns,
uint32_t convStride,
float weight_scale_factor,
float output_scale_factor,
A *&ptr_inputs,
@ -114,8 +113,7 @@ public:
num_bytes_per_bias,
num_filters,
num_filter_coefficients,
num_feature_map_rows,
num_feature_map_columns,
convStride,
weight_scale_factor,
output_scale_factor,
(void *&) ptr_inputs,
@ -428,8 +426,7 @@ private:
uint32_t num_bytes_per_bias,
uint32_t num_filters,
uint32_t num_filter_coefficients,
uint32_t num_feature_map_rows,
uint32_t num_feature_map_columns,
uint32_t convStride,
float weight_scale_factor,
float output_scale_factor,
void *&ptr_inputs,

View File

@ -146,8 +146,7 @@ typedef struct {
uint32_t num_bytes_per_bias;
uint32_t num_filters;
uint32_t num_filter_coefficients;
uint32_t num_feature_map_rows;
uint32_t num_feature_map_columns;
uint32_t convStride;
float weight_scale_factor;
void *ptr_filters; // filters stored one after the other
void *ptr_biases;

View File

@ -16,6 +16,7 @@ constexpr uint32_t bufferMaxSize = 65528;
constexpr uint32_t convMinFiltersNum = 4;
constexpr uint32_t convMaxFiltersNum = 65532;
constexpr uint32_t convFiltersNumDivider = 4;
constexpr uint32_t convFilterSizeDivider = 8;
constexpr uint32_t convFilterMaxSize = 768;
constexpr uint32_t convEachKernelByteAlignment = 16;
constexpr uint32_t noOfInputsDivisor = 8;

View File

@ -1138,7 +1138,7 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
double weights_reducer = 1.0;
auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl);
if (conv) {
if (conv && !LayerInfo(conv).isConvolutionFilter()) {
const auto inDepth = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
weights_reducer = GNAConvolutionLayer::getWeightsReducer(*conv);
weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();

View File

@ -390,6 +390,7 @@ void DumpGna2Model(const Gna2Model& gnaModel, const std::string dumpFolderNameGN
dumpFile << "\tOperand " << j << " (" << GetOperandName(operation.Type, j) << ")"
<< " type: " << GetOperandType(operand.Type) <<
" shape: " << GetSimpleString(operand.Shape) <<
" data: " << operand.Data <<
" layout: ";
DumpCharArray(dumpFile, operand.Layout, GNA2_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS);

View File

@ -162,7 +162,7 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
InferenceEngine::details::product(begin(dataOutput->getDims()),
end(dataOutput->getDims())) * dataOutput->getPrecision().size();
if (LayerInfo(outFunctionalLayer.first).isAffineFilter()) {
if (LayerInfo(outFunctionalLayer.first).isConvolutionFilter()) {
size_t aligned64_offset = outFunctionalLayer.first->GetParamAsInt("offset");
layerInfoItem.splitOutputLayers.emplace_back(
outFunctionalLayer.first,
@ -351,37 +351,33 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
}
// have to pad input to let last kernel meets it's corresponding input
uint32_t num_inputs = in_width * in_channels;
const auto num_inputs = in_width * in_channels;
uint32_t num_input_padding = ALIGN(num_inputs, 8) - num_inputs;
// convert to 2D and set GNA input feature map size
uint32_t effectiveStride = convolution._stride_x * convolution._stride_y;
auto convStride = convolution._stride_x * convolution._stride_y;
if (convolution._stride_y != 1) {
effectiveStride = convolution._stride_x;
convStride = convolution._stride_x;
} else if (in_width == 1 && convolution._stride_x != 1) {
effectiveStride = convolution._stride_y;
convStride = convolution._stride_y;
}
uint32_t num_feature_map_columns = in_channels * effectiveStride;
uint32_t num_feature_map_rows = (in_channels * in_width) / num_feature_map_columns;
const auto effectiveStride = in_channels * convStride;
uint32_t num_filters = convolution._out_depth;
uint32_t num_filter_coefficients = single_conv_kernel_size + num_conv_kernel_padding;
uint32_t num_columns_in = num_inputs + num_input_padding;
uint32_t num_columns_out = (((num_inputs - num_filter_coefficients) / num_feature_map_columns) + 1) * convolution._out_depth;
uint32_t num_columns_out_unpadded = (((num_inputs - single_conv_kernel_size) / num_feature_map_columns) + 1) * convolution._out_depth;
uint32_t num_columns_out = (((num_inputs - num_filter_coefficients) / effectiveStride) + 1) * convolution._out_depth;
uint32_t num_columns_out_unpadded = (((num_inputs - single_conv_kernel_size) / effectiveStride) + 1) * convolution._out_depth;
uint32_t original_num_feature_map_rows = num_feature_map_rows;
uint32_t original_input_padding = num_input_padding;
uint32_t additional_padding = 0;
// if kernel padding to multiple of 8 will cause missed outputs, need to pad further
while (num_columns_out < out_batch * out_channels * out_width) {
num_input_padding = original_input_padding + additional_padding;
num_feature_map_rows = original_num_feature_map_rows + (num_input_padding) / num_feature_map_columns;
num_columns_in = num_inputs + num_input_padding;
num_columns_out = (((num_inputs + num_input_padding - num_filter_coefficients) / num_feature_map_columns) + 1) * convolution._out_depth;
num_columns_out = (((num_inputs + num_input_padding - num_filter_coefficients) / effectiveStride) + 1) * convolution._out_depth;
dnn->new_num_conv_columns = num_columns_out;
additional_padding += 8;
}
@ -427,8 +423,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
num_bytes_per_bias,
num_filters,
num_filter_coefficients,
num_feature_map_rows,
num_feature_map_columns,
effectiveStride,
weight_scale_factor,
output_scale_factor,
ptr_inputs,
@ -457,8 +452,8 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
if (inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) {
// Kaldi features are opposite orientation
dnn->do_rotate_input = true;
dnn->num_rotate_rows = num_feature_map_columns;
dnn->num_rotate_columns = original_num_feature_map_rows;
dnn->num_rotate_rows = effectiveStride;
dnn->num_rotate_columns = num_inputs / effectiveStride;
} else {
dnn->do_rotate_input = false;
}
@ -559,20 +554,10 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
const auto outputs = convolution.outData.front();
// have to pad input to let last kernel meets it's corresponding input
uint32_t num_inputs = in_width * in_height * in_channels;
const auto num_inputs = in_width * in_height * in_channels;
uint32_t num_input_padding = ALIGN(num_inputs, 8) - num_inputs;
// convert to 2D and set GNA input feature map size
uint32_t num_feature_map_columns = in_channels * convolution._stride_x * convolution._stride_y;
if (in_height == 1 && convolution._stride_y != 1) {
num_feature_map_columns = in_channels * convolution._stride_x;
} else if (in_width == 1 && convolution._stride_x != 1) {
num_feature_map_columns = in_channels * convolution._stride_y;
}
uint32_t num_feature_map_rows = (in_channels * in_height * in_width) / num_feature_map_columns;
const uint32_t filter_n = convolution._out_depth;
uint32_t original_num_feature_map_rows = num_feature_map_rows;
// if kernel padding to multiple of 8 will cause missed outputs, need to pad further
if (num_input_padding == 0) {
@ -638,15 +623,17 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
if (!dnn->do_rotate_input) {
if (inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) {
// Kaldi features are opposite orientation
dnn->do_rotate_input = true;
dnn->num_rotate_rows = num_feature_map_columns;
dnn->num_rotate_columns = original_num_feature_map_rows;
} else {
dnn->do_rotate_input = false;
if (!dnn->do_rotate_input && inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) {
// Kaldi features are opposite orientation
dnn->do_rotate_input = true;
dnn->num_rotate_rows = in_channels;
if (in_height != 1) {
dnn->num_rotate_rows *= convolution._stride_y;
}
if (in_width != 1) {
dnn->num_rotate_rows *= convolution._stride_x;
}
dnn->num_rotate_columns = num_inputs / dnn->num_rotate_rows;
}
connectOutput(layer, ptr_outputs, num_data_bytes_out);
@ -654,7 +641,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
const auto kernelHW = convolution._kernel_y * convolution._kernel_x;
std::vector<uint8_t> transposedWeights;
const auto singleKernelSize = in_channels* kernelHW* convolution.precision.size();
const auto singleKernelSize = in_channels* kernelHW * convolution.precision.size();
const auto kernelPad = Gna2RoundUp(singleKernelSize, 16) - singleKernelSize;
for (uint32_t k = 0; k < convolution._out_depth; k++) {
uint8_t* ptr_filt_current
@ -1728,8 +1715,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
}
}
void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto filterLayer = dynamic_cast<InferenceEngine::WeightableLayer*> (layer.get());
void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto filterLayer = dynamic_cast<InferenceEngine::ConvolutionLayer*> (layer.get());
if (filterLayer == nullptr) {
return;
@ -1752,62 +1739,57 @@ void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer)
auto outputs = *layer->outData.begin();
auto inputs = layer->insData.begin()->lock();
const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ?
const auto noOfInputsDivisor = gnaFlags->input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
uint32_t num_columns_in = GetDataDimSize(inputs, 2);
uint32_t num_rows_out = GetDataDimSize(outputs, 1);
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
const uint32_t orginalInputSize = GetDataDimSize(inputs, 1);
const uint32_t orginalOutputSize = GetDataDimSize(outputs, 1);
if (orginalInputSize != orginalOutputSize) {
THROW_GNA_LAYER_EXCEPTION(filterLayer) << "Number in inputs (" << orginalInputSize <<
") should be equal to number of outputs (" << orginalOutputSize << ")!";
}
const auto numberOfFilters = filterLayer->_out_depth;
const auto convolutionStride = numberOfFilters;
const auto filterWidth = filterLayer->_kernel_x;
const auto minOutputsPerFilter = ALIGN(orginalOutputSize, numberOfFilters) / numberOfFilters;
const auto minInputsNeeded = (minOutputsPerFilter - 1) * convolutionStride + filterWidth;
const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, noOfInputsDivisor);
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
auto biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision();
auto numOutputs = GNAConvolutionLayer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride);
numOutputs *= numberOfFilters;
const auto& biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision();
auto& currentComponent = dnnComponents.addComponent(layer->name, "affine");
dnn->InitAffineComponent(currentComponent,
num_rows_in + num_padding,
num_columns_in,
num_rows_out,
layer->params["num_rows_for_pwl"] = std::to_string(numOutputs);
dnn->InitConvolutional1DComponent(currentComponent,
numInputsFullyPadedAndAligned,
numOutputs,
inputs->getPrecision().size(),
outputs->getPrecision().size(),
filterLayer->_weights->getTensorDesc().getPrecision().size(),
biasPrecision.size(),
numberOfFilters,
filterWidth,
convolutionStride,
quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
ptr_inputs,
ptr_outputs,
ptr_weights,
ptr_biases,
false);
ptr_biases);
size_t num_data_bytes_out =
InferenceEngine::details::product(
begin(outputs->getDims()), end(outputs->getDims())) * 4;
size_t num_data_bytes_in = num_columns_in *
ALIGN(num_rows_in, noOfInputsDivisor) * inputs->getPrecision().size();
size_t num_data_bytes_in = numInputsFullyPadedAndAligned * inputs->getPrecision().size();
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
connectOutput(layer, ptr_outputs, num_data_bytes_out);
if (num_padding == 0) {
gnamem->readonly().push_ptr(ptr_weights,
filterLayer->_weights->cbuffer().as<const void*>(),
filterLayer->_weights->byteSize(),
64);
} else {
auto elementsIn = (num_rows_in + num_padding) * num_columns_in;
auto paddedWeights = elementsIn * num_rows_out;
auto paddedWeightsSize = paddedWeights * filterLayer->precision.size();
gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
size_t offset = 0;
for (uint32_t i = 0; i < num_rows_out && size >= offset; i++) {
ie_memcpy(reinterpret_cast<uint8_t*>(data) + offset, size - offset,
filterLayer->_weights->cbuffer().as<const uint8_t*>() + num_rows_in * i * filterLayer->precision.size(),
num_rows_in* filterLayer->precision.size());
offset += (num_rows_in + num_padding) * filterLayer->precision.size();
}
}, 64);
}
gnamem->readonly().push_ptr(ptr_weights,
filterLayer->_weights->cbuffer().as<const void*>(),
filterLayer->_weights->byteSize(),
64);
if (filterLayer->_biases) {
gnamem->readonly().push_ptr(ptr_biases,
@ -1815,7 +1797,7 @@ void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer)
filterLayer->_biases->byteSize(),
64);
} else {
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
gnamem->readonly().push_value(ptr_biases, 0.0f, numberOfFilters, 64);
}
}
@ -1878,13 +1860,18 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
}
// TODO: solve this by layer level transformations
auto concatAlignFilter = CNNNetPrevLayer(layer, 0);
if (LayerInfo(concatAlignFilter).isConcatAlignFilter()) {
auto rowsCopiedOffset = concatAlignFilter->GetParamAsInt("rows_copied_offset");
auto prevLayer = CNNNetPrevLayer(layer, 0);
if (LayerInfo(prevLayer).isConcatAlignFilter()) {
auto rowsCopiedOffset = prevLayer->GetParamAsInt("rows_copied_offset");
if (rowsCopiedOffset != 0) {
num_rows -= rowsCopiedOffset / outputs->getPrecision().size();
layer->params["output_offset"] = std::to_string(rowsCopiedOffset);
}
} else if (LayerInfo(prevLayer).isConvolutionFilter()) {
const auto num_rows_for_pwl = prevLayer->GetParamAsInt("num_rows_for_pwl", 0);
if (num_rows_for_pwl != 0) {
num_rows = num_rows_for_pwl;
}
}
size_t num_data_bytes_out = num_columns * num_rows * outputs->getPrecision().size();
size_t num_data_bytes_in = num_columns * num_rows * inputs->getPrecision().size();
@ -2135,7 +2122,7 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
{{"FullyConnected", "InnerProduct"}, CREATE(AffinePrimitive)},
{{"Gemm"}, CREATE(GemmPrimitive)},
{{"ScaleShift"}, CREATE(DiagonalPrimitive)},
{{"AffineFilter"}, CREATE(AffineFilterPrimitive)},
{{"ConvolutionFilter"}, CREATE(ConvolutionFilterPrimitive)},
{{"ConcatAlignFilter"}, CREATE(ConcatAlignFilterPrimitive)},
{{"Const"}, CREATE(ConstPrimitive)},
{{"Eltwise"}, CREATE(EltwisePrimitive)}, // same as diagonal while weights are not taken from network, rather than from another output

View File

@ -108,7 +108,7 @@ public:
void CreateLayerPrimitive(InferenceEngine::CNNLayerPtr);
void AffinePrimitive(InferenceEngine::CNNLayerPtr, bool isDiag = false);
void AffineFilterPrimitive(InferenceEngine::CNNLayerPtr);
void ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr);
void ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr);
void DiagonalPrimitive(InferenceEngine::CNNLayerPtr);
void ConstPrimitive(InferenceEngine::CNNLayerPtr);

View File

@ -0,0 +1,79 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "gna_convolution_layer.hpp"
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <utility>
#include <vector>
#include <legacy/ie_layers.h>
#include "gna_graph_tools.hpp"
#include "gna_plugin_log.hpp"
namespace GNAPluginNS {
namespace GNAConvolutionLayer {
bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) {
return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1;
}
// 3D input or 2D kernel
bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
const uint32_t kernelHeight, const uint32_t kernelWidth) {
return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
}
double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
using KRT = std::pair<uint32_t, double>;
// Empirically determined weights reducers for 2D Convolution
// i.e.:
// for kernelSize >= 9 -> 1.3
// for kernelSize in {7, 8} -> 1.2
const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
auto reducer = 1.0;
const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
!isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) {
const auto kernelSize = conv._kernel_x * conv._kernel_y;
auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
[](const KRT& l, const KRT::first_type& r) {return l.first > r; });
if (r != reducers.end())
reducer = r->second;
}
return reducer;
}
uint32_t outputFromConv(const uint32_t in, const uint32_t flt, const uint32_t stride) {
// floor[(in - flt)/stride] + 1, GNA Spec 1.24
if (flt > in || flt == 0 || stride == 0) {
THROW_GNA_EXCEPTION << "Invalid (input, filter, stride) = (" << in << "," << flt << "," << stride << ")";
}
return (in - flt) / stride + 1;
}
uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint32_t stride) {
// ceil[(in - window)/stride] + 1, GNA Spec 1.24
if (window > in || window == 0 || stride == 0) {
THROW_GNA_EXCEPTION << "Invalid (input, window, stride) = (" << in << "," << window << "," << stride << ")";
}
if (window == in) return 1;
return (in - window - 1) / stride + 2;
}
uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride) {
// floor[(in - 1)/stride] + 1, GNA 1.0/2.0 HW Spec
// See issue 50386 for details
if (in == 0 || stride == 0) {
THROW_GNA_EXCEPTION << "Invalid (input, stride) = (" << in << "," << stride << ")";
}
return (in - 1) / stride + 1;
}
} // namespace GNAConvolutionLayer
} // namespace GNAPluginNS

View File

@ -4,46 +4,25 @@
#pragma once
#include <algorithm>
#include <cmath>
#include <utility>
#include <vector>
#include <cstdint>
#include <legacy/ie_layers.h>
#include "../gna_graph_tools.hpp"
namespace GNAPluginNS {
struct GNAConvolutionLayer {
static bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) {
return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1;
}
namespace GNAConvolutionLayer {
bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth);
// 3D input or 2D kernel
static bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
const uint32_t kernelHeight, const uint32_t kernelWidth) {
return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
}
// 3D input or 2D kernel
bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
const uint32_t kernelHeight, const uint32_t kernelWidth);
static double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
using KRT = std::pair<uint32_t, double>;
// Empirically determined weights reducers for 2D Convolution
// i.e.:
// for kernelSize >= 9 -> 1.3
// for kernelSize in {7, 8} -> 1.2
const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
auto reducer = 1.0;
const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
!isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) {
const auto kernelSize = conv._kernel_x * conv._kernel_y;
auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
[](const KRT& l, const KRT::first_type& r) {return l.first > r; });
if (r != reducers.end())
reducer = r->second;
}
return reducer;
}
};
} // namespace GNAPluginNS
double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv);
uint32_t outputFromConv(const uint32_t in, const uint32_t flt, const uint32_t stride);
uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint32_t stride);
uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride);
} // namespace GNAConvolutionLayer
} // namespace GNAPluginNS

View File

@ -70,6 +70,7 @@ class LayerInfo {
[this]() { return isFullyConnected(); },
[this]() { return isAffineFilter(); },
[this]() { return isConcatAlignFilter(); },
[this]() { return isConvolutionFilter(); },
[this]() { return isEltwise(); },
[this]() { return isScaleShift(); },
[this]() { return isConvolution(); },
@ -157,6 +158,9 @@ class LayerInfo {
bool isAffineFilter() const noexcept {
return isOfType("AffineFilter");
}
bool isConvolutionFilter() const noexcept {
return isOfType("ConvolutionFilter");
}
bool isRelu() const noexcept {
return isOfType("relu");
}

View File

@ -41,6 +41,7 @@
#include "gna_data_types.hpp"
#include "gna_tensor_tools.hpp"
#include "gna_itt.hpp"
#include "backend/gna_limitations.hpp"
using namespace InferenceEngine;
using namespace InferenceEngine::details;
@ -1277,35 +1278,49 @@ void InsertSplitAligningFilterPass::run() {
gnalog() << std::endl;
#endif
auto filterLayer =
std::make_shared<WeightableLayer>(LayerParams({filterName, "AffineFilter", Precision::FP32}));
std::make_shared<ConvolutionLayer>(LayerParams({filterName, "ConvolutionFilter", Precision::FP32}));
auto inputData = splitOutput;
size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(currentOffset) - 64));
size_t
newOutputSize = (currentOffset + ALIGN(outputSize, 8) * bytesPerSplitElement - aligned64_offset)
/ bytesPerSplitElement;
IE_ASSERT(filterLayer != nullptr);
// encodes offset to beginning of split layer input
filterLayer->params["offset"] = std::to_string(aligned64_offset / bytesPerSplitElement);
auto dims = splitOutput->getTensorDesc().getDims();
if (dims.size() > 3) {
THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
}
auto num_rows_out = dims[1] * (dims.size() != 2 ? dims[2] : 1);
std::vector<float> filterWeights(newOutputSize * num_rows_out, 0.f);
const auto offsetOfUnalignment = (currentOffset - aligned64_offset) / bytesPerSplitElement;
// TODO consider to use a different number of filters do decrese the number of trailing zeros (additionalPaddingOfFilter)
const auto numberOfFilters = GNALimitations::convMinFiltersNum;
const auto filterSize = ALIGN(offsetOfUnalignment + numberOfFilters, GNALimitations::convFilterSizeDivider);
auto offset = (currentOffset - aligned64_offset) / bytesPerSplitElement;
for (int i = 0; i != outputSize; i++) {
filterWeights[offset] = 1.0f;
offset += newOutputSize + 1;
// filterWeights: numberOfFilters X (offsetOfUnalignment + additionalPaddingOfFilter + numberOfFilters)
// offsetOfUnalignment - the leading zeros in the filter
// |
// | additionalPaddingOfFilter = filterSize - offsetOfUnalignment - numberOfFilters
// ____|___ ___|___
// | | | |
// 0 0 ... 0 1 0 0 0 0 ... 0
// 0 0 ... 0 0 1 0 0 0 ... 0
// 0 0 ... 0 0 0 1 0 0 ... 0
// 0 0 ... 0 0 0 0 1 0 ... 0
std::vector<float> filterWeights(filterSize * 4, 0.f);
for (auto f = 0u; f < numberOfFilters; f++) {
filterWeights[f * filterSize + f + offsetOfUnalignment] = 1;
}
filterLayer->_out_depth = numberOfFilters;
filterLayer->_stride_x = numberOfFilters;
filterLayer->_stride_y = 1;
filterLayer->_kernel_x = filterSize;
filterLayer->_kernel_y = 1;
filterLayer->_padding_x = 0;
filterLayer->_padding_y = 0;
filterLayer->_weights = make_shared_blob<float>(TensorDesc(
inputData->getTensorDesc().getPrecision(),
SizeVector({filterWeights.size()}),
@ -1313,6 +1328,15 @@ void InsertSplitAligningFilterPass::run() {
filterLayer->_weights->allocate();
CopyVectorToBlob(filterLayer->_weights, filterWeights);
std::vector<float> biasWeights(numberOfFilters, 0.f);
filterLayer->_biases = make_shared_blob<float>(TensorDesc(
inputData->getTensorDesc().getPrecision(),
SizeVector({ biasWeights.size() }),
Layout::C));
filterLayer->_biases->allocate();
CopyVectorToBlob(filterLayer->_biases, biasWeights);
auto outData = std::make_shared<Data>(filterName,
TensorDesc(splitOutput->getTensorDesc().getPrecision(),
splitOutput->getTensorDesc().getDims(),

View File

@ -12,7 +12,9 @@
#include "backend/dnn_types.h"
#include "backend/gna_limitations.hpp"
#include "gna_lib_ver_selector.hpp"
#include "layers/gna_convolution_layer.hpp"
using namespace GNAPluginNS::GNAConvolutionLayer;
void CNNFilter32(intel_dnn_component_t *component) {
auto filters = reinterpret_cast<float *>(component->op.conv1D.ptr_filters);
@ -20,11 +22,10 @@ void CNNFilter32(intel_dnn_component_t *component) {
auto input = reinterpret_cast<float *>(component->ptr_inputs);
auto output = reinterpret_cast<float *>(component->ptr_outputs);
const auto convolutionStride = component->op.conv1D.num_feature_map_columns;
const auto convolutionStride = component->op.conv1D.convStride;
const auto filterSize = component->op.conv1D.num_filter_coefficients;
const auto numberOfInputs = component->num_columns_in;
// TODO: reuse outputFromConv() from backend\am_intel_dnn.cpp
const auto numberOfOutputsPerFilter = (numberOfInputs - filterSize) / convolutionStride + 1;
const auto numberOfOutputsPerFilter = outputFromConv(numberOfInputs, filterSize, convolutionStride);
const auto numberOfFilters = component->op.conv1D.num_filters;
std::string layer_name;

View File

@ -775,7 +775,7 @@ public:
}
/**
* @brief Porvides a list of plugin names in registry; physically such plugins may not be created
* @brief Provides a list of plugin names in registry; physically such plugins may not be created
* @return A list of plugin names
*/
std::vector<std::string> GetListOfDevicesInRegistry() const {

View File

@ -98,7 +98,7 @@ class SharedObjectLoader::Impl {
// Exclude current directory from DLL search path process wise.
// If application specific path was configured before then
// current directory is already excluded.
// GetDLLDirectory does not distinguish if aplication specific
// GetDLLDirectory does not distinguish if application specific
// path was set to "" or NULL so reset it to "" to keep
// application safe.
void ExcludeCurrentDirectoryA() {

View File

@ -40,6 +40,7 @@ ngraph::pass::InitConstMask::InitConstMask(const ngraph::AxisSet & dims,
end[dim] = value + 1;
bool skip_dim_value = false;
NGRAPH_SUPPRESS_DEPRECATED_START
CoordinateTransform iter(shape, begin, end);
for (const Coordinate & coord : iter) {
if (!condition(values.at(iter.index(coord)))) {
@ -47,6 +48,7 @@ ngraph::pass::InitConstMask::InitConstMask(const ngraph::AxisSet & dims,
break;
}
}
NGRAPH_SUPPRESS_DEPRECATED_END
if (!skip_dim_value) {
mask->at(dim).insert(value);
}

View File

@ -16,7 +16,7 @@ source_group("src" FILES ${LIBRARY_SRC})
# Create module library
add_library(${TARGET_NAME} MODULE EXCLUDE_FROM_ALL ${LIBRARY_SRC})
add_library(${TARGET_NAME} MODULE ${LIBRARY_SRC})
ie_faster_build(${TARGET_NAME}
UNITY

View File

@ -14,6 +14,7 @@ namespace ngraph {
namespace pass {
class TRANSFORMATIONS_API EliminateUnsqueezeGather;
class TRANSFORMATIONS_API EliminateGatherUnsqueeze;
} // namespace pass
} // namespace ngraph
@ -29,3 +30,15 @@ public:
NGRAPH_RTTI_DECLARATION;
EliminateUnsqueezeGather();
};
/**
* @ingroup ie_transformation_common_api
* @brief Remove Gather -> Unsqueeze pair, if Gather takes a scalar and
* Unsqueeze makes it a 1D tensor
*/
class ngraph::pass::EliminateGatherUnsqueeze : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
EliminateGatherUnsqueeze();
};

View File

@ -0,0 +1,60 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include <memory>
#include <transformations_visibility.hpp>
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset3.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
#include <ngraph/util.hpp>
namespace ngraph {
namespace pass {
class TRANSFORMATIONS_API SimplifyShapeOfSubGraph;
class TRANSFORMATIONS_API SharedShapeOf;
class TRANSFORMATIONS_API GroupedGatherElimination;
} // namespace pass
} // namespace ngraph
/**
* @ingroup ie_transformation_common_api
* @brief SharedShapeOf transformation replaces group of ShapeOf
* operations with the first ShapeOf in this group. All ShapeOfs in this group
* must be equal and consume the same output port.
*/
class ngraph::pass::SharedShapeOf: public ngraph::pass::FunctionPass {
public:
NGRAPH_RTTI_DECLARATION;
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
};
/**
* @ingroup ie_transformation_common_api
* @brief GroupedGatherElimination transformation replaces group of Gather
* operations with the first Gather in this group and updated indices input
* in case all Gathers in the group are consumed by the same Concat in incremental order.
*/
class ngraph::pass::GroupedGatherElimination: public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
GroupedGatherElimination();
};
/**
* @ingroup ie_transformation_common_api
* @brief SimplifyShapeOfSubGraph transformation runs specific optimizations of shape sub-graphs
*/
class ngraph::pass::SimplifyShapeOfSubGraph: public ngraph::pass::FunctionPass {
public:
NGRAPH_RTTI_DECLARATION;
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
};

View File

@ -0,0 +1,27 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <transformations_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
namespace ngraph {
namespace pass {
class TRANSFORMATIONS_API ConvertDeformableConv8To1;
} // namespace pass
} // namespace ngraph
/**
* @ingroup ie_transformation_common_api
* @brief ConvertDeformableConv8To1 converts v8::DeformableConvolution into v1::DeformableConvolution.
*/
class ngraph::pass::ConvertDeformableConv8To1 : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
ConvertDeformableConv8To1();
};

View File

@ -76,6 +76,7 @@
#include <ngraph/pass/manager.hpp>
#include <ngraph/pass/constant_folding.hpp>
#include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
#include <transformations/common_optimizations/simplify_shape_of_sub_graph.hpp>
NGRAPH_RTTI_DEFINITION(ngraph::pass::CommonOptimizations, "CommonOptimizations", 0);
@ -85,6 +86,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
// This pass must be called first in pipeline
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::SimplifyShapeOfSubGraph>();
manager.register_pass<ngraph::pass::ConstantFolding>();
manager.register_pass<ngraph::pass::RemoveFilteringBoxesBySize>(); // Resolves dynamism (replaces NonZero), CF needed

View File

@ -7,6 +7,7 @@
#include <ngraph/opsets/opset6.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/rt_info.hpp>
#include <transformations/utils/utils.hpp>
#include "itt.hpp"
NGRAPH_RTTI_DEFINITION(ngraph::pass::EliminateUnsqueezeGather, "EliminateUnsqueezeGather", 0);
@ -58,3 +59,36 @@ ngraph::pass::EliminateUnsqueezeGather::EliminateUnsqueezeGather() {
auto m = std::make_shared<ngraph::pattern::Matcher>(gather, "EliminateUnsqueezeGather");
register_matcher(m, callback);
}
NGRAPH_RTTI_DEFINITION(ngraph::pass::EliminateGatherUnsqueeze, "EliminateGatherUnsqueeze", 0);
ngraph::pass::EliminateGatherUnsqueeze::EliminateGatherUnsqueeze() {
MATCHER_SCOPE(EliminateGatherUnsqueeze);
const auto gather_indices_label = ngraph::pattern::wrap_type<ngraph::op::Constant>(pattern::rank_equals(0));
const auto gather_axis_label = ngraph::pattern::wrap_type<ngraph::op::Constant>();
const auto gather_label = ngraph::pattern::wrap_type<ngraph::op::util::GatherBase>(
{ngraph::pattern::any_input(), gather_indices_label, gather_axis_label}, pattern::rank_equals(0));
const auto unsqueeze_label = ngraph::pattern::wrap_type<ngraph::opset6::Unsqueeze>(
{gather_label, ngraph::pattern::any_input()}, pattern::rank_equals(1));
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
auto pattern_nodes = m.get_pattern_map();
auto& gather_indices = pattern_nodes.at(gather_indices_label);
auto& gather = pattern_nodes.at(gather_label);
auto& unsqueeze = pattern_nodes.at(unsqueeze_label);
auto new_indices = ngraph::op::util::make_try_fold<ngraph::opset6::Reshape>(gather_indices, opset6::Constant::create(element::i32, {1}, {1}), false);
auto new_gather = gather->clone_with_new_inputs({gather->input_value(0), new_indices, gather->input_value(2)});
new_gather->set_friendly_name(gather->get_friendly_name());
ngraph::copy_runtime_info({unsqueeze, gather}, {new_gather, new_indices});
ngraph::replace_node(unsqueeze, new_gather);
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(unsqueeze_label, "EliminateGatherUnsqueeze");
register_matcher(m, callback);
}

View File

@ -0,0 +1,101 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <memory>
#include <vector>
#include "itt.hpp"
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset3.hpp>
#include <ngraph/opsets/opset7.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <transformations/common_optimizations/simplify_shape_of_sub_graph.hpp>
#include <transformations/common_optimizations/eliminate_unsqueeze_gather.hpp>
#include <transformations/utils/utils.hpp>
NGRAPH_RTTI_DEFINITION(ngraph::pass::SharedShapeOf, "SharedShapeOf", 0);
bool ngraph::pass::SharedShapeOf::run_on_function(std::shared_ptr<ngraph::Function> f) {
RUN_ON_FUNCTION_SCOPE(SharedShapeOf);
bool graph_rewritten = false;
std::map<ngraph::Output<Node>, std::vector<std::shared_ptr<ngraph::Node>>> source_to_shape_of;
for (const auto & node : f->get_ordered_ops()) {
// Recursively apply transformation for sub-graph based operations
if (auto sub_graph_node = std::dynamic_pointer_cast<op::util::SubGraphOp>(node))
if (auto sub_graph = sub_graph_node->get_function())
graph_rewritten |= run_on_function(sub_graph);
if (is_type<ngraph::opset1::ShapeOf>(node) || is_type<ngraph::opset3::ShapeOf>(node))
source_to_shape_of[node->input_value(0)].push_back(node);
}
for (const auto& pair : source_to_shape_of) {
if (pair.second.size() < 2)
continue;
const auto& root_ss = pair.second[0];
for (const auto& child_ss : pair.second)
if (root_ss->get_instance_id() != child_ss->get_instance_id() && root_ss->get_output_element_type(0) == root_ss->get_output_element_type(0))
graph_rewritten |= replace_output_update_name(child_ss->output(0), root_ss->output(0));
}
return graph_rewritten;
}
NGRAPH_RTTI_DEFINITION(ngraph::pass::GroupedGatherElimination, "GroupedGatherElimination", 0);
ngraph::pass::GroupedGatherElimination::GroupedGatherElimination() {
MATCHER_SCOPE(GroupedGatherElimination);
auto concat_label = ngraph::pattern::wrap_type<ngraph::opset1::Concat>(pattern::rank_equals(1));
ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) {
auto concat = m.get_match_root();
OutputVector inputs = concat->input_values();
NodeVector new_ops;
size_t i = 0, original_inputs_size = inputs.size();
while (inputs.size() > i + 1) {
auto curr = inputs[i].get_node_shared_ptr(), next = inputs[i + 1].get_node_shared_ptr();
if (curr->get_type_info() != next->get_type_info() ||
(!is_type<opset1::Gather>(curr) && !is_type<opset7::Gather>(curr)) ||
(curr->input_value(0) != next->input_value(0))) {
++i;
continue;
} // curr and next are the same type of gather which takes data from the same source
auto joint_indices = ngraph::op::util::make_try_fold<opset1::Concat>(OutputVector{curr->input_value(1), next->input_value(1)}, 0);
auto new_gather = curr->clone_with_new_inputs(
{curr->input_value(0), joint_indices, ngraph::opset1::Constant::create(element::i64, {}, {0})});
new_ops.push_back(joint_indices);
new_ops.push_back(new_gather);
inputs.erase(inputs.begin() + i);
inputs[i] = new_gather->output(0);
}
if (original_inputs_size > inputs.size()) {
auto new_concat = std::make_shared<opset1::Concat>(inputs, 0);
new_ops.push_back(new_concat);
new_concat->set_friendly_name(concat->get_friendly_name());
ngraph::copy_runtime_info(concat, new_ops);
ngraph::replace_node(concat, new_concat);
return true;
}
return false;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(concat_label, matcher_name);
this->register_matcher(m, callback);
}
NGRAPH_RTTI_DEFINITION(ngraph::pass::SimplifyShapeOfSubGraph, "SimplifyShapeOfSubGraph", 0);
bool ngraph::pass::SimplifyShapeOfSubGraph::run_on_function(std::shared_ptr<ngraph::Function> f) {
RUN_ON_FUNCTION_SCOPE(GroupedGatherElimination);
ngraph::pass::Manager manager;
manager.set_per_pass_validation(false);
manager.register_pass<ngraph::pass::EliminateGatherUnsqueeze>();
manager.register_pass<ngraph::pass::SharedShapeOf>();
manager.register_pass<ngraph::pass::GroupedGatherElimination>();
manager.register_pass<ngraph::pass::Validate>();
manager.run_passes(f);
return false;
}

View File

@ -0,0 +1,52 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp"
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset8.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include "itt.hpp"
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertDeformableConv8To1, "ConvertDeformableConv8To1", 0);
ngraph::pass::ConvertDeformableConv8To1::ConvertDeformableConv8To1() {
MATCHER_SCOPE(ConvertDeformableConv8To1);
auto deformable_conv_v8 = pattern::wrap_type<ngraph::opset8::DeformableConvolution>();
ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
auto deformable_conv_v8_node = std::dynamic_pointer_cast<ngraph::opset8::DeformableConvolution>(m.get_match_root());
if (!deformable_conv_v8_node)
return false;
if (deformable_conv_v8_node->get_input_size() != 3
|| deformable_conv_v8_node->get_bilinear_interpolation_pad())
return false;
auto arg = deformable_conv_v8_node->input_value(0);
auto offsets = deformable_conv_v8_node->input_value(1);
auto filters = deformable_conv_v8_node->input_value(2);
auto deformable_conv_v1 =
std::make_shared<ngraph::opset1::DeformableConvolution>(arg,
offsets,
filters,
deformable_conv_v8_node->get_strides(),
deformable_conv_v8_node->get_pads_begin(),
deformable_conv_v8_node->get_pads_end(),
deformable_conv_v8_node->get_dilations(),
deformable_conv_v8_node->get_auto_pad(),
deformable_conv_v8_node->get_group(),
deformable_conv_v8_node->get_deformable_group());
deformable_conv_v1->set_friendly_name(deformable_conv_v8_node->get_friendly_name());
ngraph::copy_runtime_info(deformable_conv_v8_node, deformable_conv_v1);
ngraph::replace_node(deformable_conv_v8_node, deformable_conv_v1);
return true;
};
auto m = std::make_shared<pattern::Matcher>(deformable_conv_v8, matcher_name);
register_matcher(m, callback);
}

View File

@ -27,7 +27,6 @@ IeParsedNetwork parseNetwork(const ie::CNNNetwork& network) {
out.networkOutputs = network.getOutputsInfo();
env.log->trace("Got %d inputs and %d outputs", out.networkInputs.size(), out.networkOutputs.size());
IE_ASSERT(!out.networkInputs.empty());
IE_ASSERT(!out.networkOutputs.empty());
env.log->trace("Perform topological sort");

View File

@ -7,6 +7,7 @@
#include <ie_metric_helpers.hpp>
#include <legacy/cnn_network_impl.hpp>
#include <legacy/convert_function_to_cnn_network.hpp>
#include "exec_graph_info.hpp"
#include <myriad_executable_network.h>
#include <vpu/blob_reader.hpp>
@ -25,7 +26,6 @@ namespace MyriadPlugin {
ExecutableNetwork::ExecutableNetwork(
std::shared_ptr<IMvnc> mvnc,
std::vector<DevicePtr>& devicePool,
const MyriadConfiguration& config,
const std::shared_ptr<ie::ICore> core) :
_config(config),
@ -40,10 +40,6 @@ ExecutableNetwork::ExecutableNetwork(
defaultOutput(_config.pluginLogFilePath()));
_executor = std::make_shared<MyriadExecutor>(_config.forceReset(), std::move(mvnc), logLevel, _log);
_device = _executor->openDevice(devicePool, _config);
const auto& revision = _device->revision();
_actualNumExecutors = config.compileConfig().numExecutors != -1 ? config.compileConfig().numExecutors : DefaultAllocation::numStreams(revision, config);
_supportedMetrics = {
METRIC_KEY(NETWORK_NAME),
@ -54,13 +50,19 @@ ExecutableNetwork::ExecutableNetwork(
};
}
void ExecutableNetwork::openDevice(std::vector<DevicePtr>& devicePool) {
_device = _executor->openDevice(devicePool, _config);
const auto& revision = _device->revision();
_actualNumExecutors = _config.compileConfig().numExecutors != -1 ? _config.compileConfig().numExecutors : DefaultAllocation::numStreams(revision, _config);
}
ExecutableNetwork::ExecutableNetwork(
const ie::CNNNetwork& network,
std::shared_ptr<IMvnc> mvnc,
std::vector<DevicePtr>& devicePool,
const MyriadConfiguration& config,
const std::shared_ptr<ie::ICore> core) :
ExecutableNetwork(std::move(mvnc), devicePool, config, core) {
ExecutableNetwork(std::move(mvnc), config, core) {
VPU_PROFILE(ExecutableNetwork);
const auto compilerLog = std::make_shared<Logger>(
@ -68,11 +70,9 @@ ExecutableNetwork::ExecutableNetwork(
_config.get<LogLevelOption>(),
defaultOutput(_config.compilerLogFilePath()));
if (_device == nullptr)
IE_THROW() << "No device was detected";
auto compiledGraph = compileNetwork(
network,
_device->_platform,
NC_MYRIAD_X,
_config,
compilerLog,
_core);
@ -84,12 +84,7 @@ ExecutableNetwork::ExecutableNetwork(
_inputInfo = std::move(compiledGraph->inputInfo);
_outputInfo = std::move(compiledGraph->outputInfo);
if (!_device->isBooted()) {
return;
}
const auto& networkName = network.getName();
_executor->allocateGraph(_device, _graphDesc, _graphBlob, compiledGraph->blobHeader, compiledGraph->numActiveStages, networkName, _actualNumExecutors);
if (_config.exclusiveAsyncRequests()) {
ExecutorManager *executorManager = ExecutorManager::getInstance();
_taskExecutor = executorManager->getExecutor("MYRIAD");
@ -100,6 +95,21 @@ ExecutableNetwork::ExecutableNetwork(
idStream << networkName << "_TaskExecutorGetResult" << i;
_taskExecutorGetResultIds.emplace(idStream.str());
}
if (_inputInfo.totalSize == 0) {
_isNetworkConstant = true;
const auto& nGraphFunc = network.getFunction();
const auto& sortedLayers = nGraphFunc->get_ordered_ops();
for (const auto& layer : sortedLayers) {
if (strcmp(layer->get_type_info().name, "Constant") == 0) {
const auto& constOp = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(layer);
auto name = constOp->get_friendly_name();
_constDatas[name] = ie::details::shareWeights(constOp);
}
}
return;
}
openDevice(devicePool);
_executor->allocateGraph(_device, _graphDesc, _graphBlob, compiledGraph->blobHeader, compiledGraph->numActiveStages, networkName, _actualNumExecutors);
}
void ExecutableNetwork::Import(std::istream& strm, std::vector<DevicePtr> &devicePool, const MyriadConfiguration& configuration) {
@ -110,10 +120,6 @@ void ExecutableNetwork::Import(std::istream& strm, std::vector<DevicePtr> &devic
strm.seekg(currentPos, strm.beg);
strm.read(&_graphBlob[0], blobSize);
if (!_device->isBooted()) {
return;
}
std::string networkName = importedNetworkName;
BlobReader blobReader;
@ -126,9 +132,8 @@ void ExecutableNetwork::Import(std::istream& strm, std::vector<DevicePtr> &devic
_inputInfo = blobReader.getInputInfo();
_outputInfo = blobReader.getOutputInfo();
openDevice(devicePool);
_executor->allocateGraph(_device, _graphDesc, _graphBlob, blobHeader, numStages, networkName, _actualNumExecutors);
_graphMetaData.stagesMeta.resize(numStages);
for (auto &meta : _graphMetaData.stagesMeta) {
meta.stageName = meta.stageType = meta.layerName = meta.layerType = "UNKNOWN";
@ -147,9 +152,12 @@ void ExecutableNetwork::Import(std::istream& strm, std::vector<DevicePtr> &devic
}
}
ExecutableNetwork::ExecutableNetwork(std::istream& strm, std::shared_ptr<IMvnc> mvnc, std::vector<DevicePtr> &devicePool,
const MyriadConfiguration& config, const std::shared_ptr<ie::ICore> core) :
ExecutableNetwork(std::move(mvnc), devicePool, config, core) {
ExecutableNetwork::ExecutableNetwork(std::istream& strm,
std::shared_ptr<IMvnc> mvnc,
std::vector<DevicePtr> &devicePool,
const MyriadConfiguration& config,
const std::shared_ptr<ie::ICore> core) :
ExecutableNetwork(std::move(mvnc), config, core) {
VPU_PROFILE(ExecutableNetwork);
Import(strm, devicePool, config);
}
@ -160,7 +168,7 @@ ExecutableNetwork::ExecutableNetwork(
std::vector<DevicePtr>& devicePool,
const MyriadConfiguration& config,
const std::shared_ptr<ie::ICore> core) :
ExecutableNetwork(std::move(mvnc), devicePool, config, core) {
ExecutableNetwork(std::move(mvnc), config, core) {
VPU_PROFILE(ExecutableNetwork);
std::ifstream blobFile{blobFilename, std::ios::binary};
Import(blobFile, devicePool, config);

View File

@ -44,7 +44,9 @@ public:
virtual ~ExecutableNetwork() {
try {
_executor->deallocateGraph(_device, _graphDesc);
if (_device != nullptr) {
_executor->deallocateGraph(_device, _graphDesc);
}
}
catch (...) {
std::cerr << "ERROR ~ExecutableNetwork():\n"
@ -54,18 +56,19 @@ public:
ie::IInferRequestInternal::Ptr CreateInferRequestImpl(ie::InputsDataMap networkInputs,
ie::OutputsDataMap networkOutputs) override {
if (_device == nullptr || !_device->isBooted()) {
if (!_isNetworkConstant && (_device == nullptr || !_device->isBooted())) {
IE_THROW() << "Can not create infer request: there is no available devices with platform "
<< _device->_platform;
}
return std::make_shared<MyriadInferRequest>(_graphDesc, networkInputs, networkOutputs,
_inputInfo, _outputInfo,
_graphMetaData.stagesMeta, _config, _log, _executor);
_graphMetaData.stagesMeta, _config, _log, _executor,
_constDatas, _isNetworkConstant);
}
ie::IInferRequestInternal::Ptr CreateInferRequest() override {
if (_device == nullptr || !_device->isBooted()) {
if (!_isNetworkConstant && (_device == nullptr || !_device->isBooted())) {
IE_THROW() << "Can not create infer request: there is no available devices with platform "
<< _device->_platform;
}
@ -73,7 +76,7 @@ public:
auto syncRequestImpl = std::make_shared<MyriadInferRequest>(_graphDesc, _networkInputs, _networkOutputs,
_inputInfo, _outputInfo,
_graphMetaData.stagesMeta, _config, _log,
_executor);
_executor, _constDatas, _isNetworkConstant);
syncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this());
auto taskExecutorGetResult = getNextTaskExecutor();
return std::make_shared<MyriadAsyncInferRequest>(
@ -84,6 +87,16 @@ public:
model.write(_graphBlob.data(), _graphBlob.size());
}
void Export(const std::string &modelFileName) override {
std::ofstream modelFile(modelFileName, std::ios::out | std::ios::binary);
if (modelFile.is_open()) {
Export(modelFile);
} else {
IE_THROW() << "The " << modelFileName << " file can not be opened for export";
}
}
ie::Parameter GetMetric(const std::string &name) const override;
ie::CNNNetwork GetExecGraphInfo() override;
@ -98,9 +111,11 @@ private:
DevicePtr _device;
GraphMetaInfo _graphMetaData;
MyriadConfiguration _config;
bool _isNetworkConstant = false;
const std::shared_ptr<ie::ICore> _core = nullptr;
int _actualNumExecutors = 0;
std::vector<std::string> _supportedMetrics;
std::map<std::string, ie::Blob::Ptr> _constDatas;
DataInfo _inputInfo;
DataInfo _outputInfo;
@ -109,9 +124,8 @@ private:
std::queue<std::string> _taskExecutorGetResultIds;
ExecutableNetwork(std::shared_ptr<IMvnc> mvnc,
std::vector<DevicePtr> &devicePool,
const MyriadConfiguration& config,
const std::shared_ptr<ie::ICore> core);
const MyriadConfiguration& config,
const std::shared_ptr<ie::ICore> core);
ie::ITaskExecutor::Ptr getNextTaskExecutor() {
std::string id = _taskExecutorGetResultIds.front();
@ -124,6 +138,8 @@ private:
return taskExecutor;
}
void openDevice(std::vector<DevicePtr>& devicePool);
};
} // namespace MyriadPlugin

View File

@ -33,11 +33,13 @@ MyriadInferRequest::MyriadInferRequest(GraphDesc &graphDesc,
const std::vector<StageMetaInfo> &blobMetaData,
const MyriadConfig& myriadConfig,
const Logger::Ptr &log,
const MyriadExecutorPtr &executor) :
const MyriadExecutorPtr &executor,
std::map<std::string, ie::Blob::Ptr> constDatas,
bool isNetworkConstant = true) :
IInferRequestInternal(networkInputs, networkOutputs), _executor(executor),
_log(log), _stagesMetaData(blobMetaData), _config(myriadConfig),
_inputInfo(compilerInputsInfo), _outputInfo(compilerOutputsInfo),
_graphDesc(graphDesc) {
_graphDesc(graphDesc), _constDatas(constDatas), _isNetworkConstant(isNetworkConstant) {
VPU_PROFILE(MyriadInferRequest);
const auto& ioStrides = _config.compileConfig().ioStrides;
@ -83,7 +85,7 @@ MyriadInferRequest::MyriadInferRequest(GraphDesc &graphDesc,
resultBuffer.resize(compilerOutputsInfo.totalSize);
VPU_THROW_UNLESS(
!_networkOutputs.empty() && !_networkInputs.empty(),
!_networkOutputs.empty() && !(_networkInputs.empty() && !_isNetworkConstant),
"No information about network's output/input");
}
@ -93,6 +95,9 @@ void MyriadInferRequest::InferImpl() {
}
void MyriadInferRequest::InferAsync() {
if (_isNetworkConstant) {
return;
}
VPU_PROFILE(InferAsync);
// execute input pre-processing
@ -104,7 +109,7 @@ void MyriadInferRequest::InferAsync() {
auto getOffset = [&inputInfo] (const std::string& name) {
const auto offsetIt = inputInfo.offset.find(name);
IE_ASSERT(offsetIt != inputInfo.offset.end()) << "MyriadInferRequest::InferAsync()\n"
<< "Input offset [" << name << "] is not provided.";
<< "Input offset [" << name << "] is not provided.";
return offsetIt->second;
};
@ -123,9 +128,9 @@ void MyriadInferRequest::InferAsync() {
const auto byteSize = blob->byteSize();
const auto requiredSize = vpu::checked_cast<size_t>(offset) + byteSize;
IE_ASSERT(requiredSize <= inputBuffer.size()) << "MyriadInferRequest::InferAsync()\n"
<< "Input offset is too big. "
<< "Required size: " << requiredSize
<< ", Input buffer size: " << inputBuffer.size();
<< "Input offset is too big. "
<< "Required size: " << requiredSize
<< ", Input buffer size: " << inputBuffer.size();
const auto foundBlob = getNetInputInfo(name);
const auto vpuLayout = foundBlob->second->getTensorDesc().getLayout();
@ -139,9 +144,8 @@ void MyriadInferRequest::InferAsync() {
}
_executor->queueInference(_graphDesc, inputBuffer.data(),
_inputInfo.totalSize, nullptr, 0);
_inputInfo.totalSize, nullptr, 0);
}
static void copyBlobAccordingUpperBound(
const Blob::Ptr& in,
const Blob::Ptr& out) {
@ -199,10 +203,22 @@ void MyriadInferRequest::GetResult() {
const auto getVpuLayout = [&networkOutputs] (const std::string& name){
const auto foundBlob = networkOutputs.find(name);
IE_ASSERT(foundBlob != networkOutputs.end()) << "MyriadInferRequest::InferAsync()\n"
<< "Output [" << name << "] is not provided.";
<< "Output [" << name << "] is not provided.";
return foundBlob->second->getTensorDesc().getLayout();
};
if (_isNetworkConstant) {
for (const auto& output : _outputs) {
const auto& ieBlobName = output.first;
const auto& ieBlob = output.second;
IE_ASSERT(_constDatas.find(ieBlobName) != _constDatas.end()) <<
"Input [" << ieBlobName << "] is not provided.";
std::copy_n(
_constDatas[ieBlobName]->cbuffer().as<uint8_t *>(),
_constDatas[ieBlobName]->byteSize(),
ieBlob->buffer().as<uint8_t *>());
}
return;
}
// For networks with only one output
if (_outputInfo.offset.size() == 1) {
const auto& it = _outputs.begin();
@ -224,12 +240,12 @@ void MyriadInferRequest::GetResult() {
const auto resultOffset = [&](const std::string& name) {
const auto offset_it = _outputInfo.offset.find(name);
IE_ASSERT(offset_it != _outputInfo.offset.end()) << "MyriadInferRequest::InferAsync()\n"
<< "Output offset [" << name << "] error.";
<< "Output offset [" << name << "] error.";
const auto offset = vpu::checked_cast<size_t>(offset_it->second);
IE_ASSERT(offset <= resultBuffer.size()) << "MyriadInferRequest::InferAsync()\n"
<< "Input offset is too big."
<< "Required offset: " << offset
<< "Result buffer size: " << resultBuffer.size();
<< "Input offset is too big."
<< "Required offset: " << offset
<< "Result buffer size: " << resultBuffer.size();
return offset;
};

View File

@ -34,6 +34,8 @@ class MyriadInferRequest : public InferenceEngine::IInferRequestInternal {
GraphDesc _graphDesc;
std::vector<uint8_t> resultBuffer;
std::vector<uint8_t> inputBuffer;
std::map<std::string, ie::Blob::Ptr> _constDatas;
bool _isNetworkConstant;
public:
typedef std::shared_ptr<MyriadInferRequest> Ptr;
@ -46,7 +48,9 @@ public:
const std::vector<StageMetaInfo> &blobMetaData,
const MyriadConfig &myriadConfig,
const Logger::Ptr &log,
const MyriadExecutorPtr &executor);
const MyriadExecutorPtr &executor,
std::map<std::string, ie::Blob::Ptr> constDatas,
bool isNetworkConstant);
void InferImpl() override;
void InferAsync();

View File

@ -0,0 +1,98 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "shared_test_classes/single_layer/prior_box.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestDefinitions;
namespace {
TEST_P(PriorBoxLayerTest, Serialize) {
Serialize();
}
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::I32,
InferenceEngine::Precision::U16
};
const std::vector<std::vector<float>> min_sizes = {
{16.f, 32.f}
};
const std::vector<std::vector<float>> max_sizes = {
{256.f, 512.f}
};
const std::vector<std::vector<float>> aspect_ratios = {
{0.66f, 1.56f}
};
const std::vector<std::vector<float>> densities = {
{0.55f}
};
const std::vector<std::vector<float>> fixed_ratios = {
{0.88f}
};
const std::vector<std::vector<float>> fixed_sizes = {
{1.25f}
};
const std::vector<bool> clips = {
true, false
};
const std::vector<bool> flips = {
true, false
};
const std::vector<float> steps = {
1.0f, 2.0f
};
const std::vector<float> offsets = {
0.0f, 0.5f
};
const std::vector<std::vector<float>> variances = {
{2.22f, 3.14f}
};
const std::vector<bool> scale_all_sizes = {
true, false
};
const std::vector<size_t> inputShape = {128, 128};
const std::vector<size_t> imageShape = {50, 50};
const auto layerSpecificParams = ::testing::Combine(
::testing::ValuesIn(min_sizes),
::testing::ValuesIn(max_sizes),
::testing::ValuesIn(aspect_ratios),
::testing::ValuesIn(densities),
::testing::ValuesIn(fixed_ratios),
::testing::ValuesIn(fixed_sizes),
::testing::ValuesIn(clips),
::testing::ValuesIn(flips),
::testing::ValuesIn(steps),
::testing::ValuesIn(offsets),
::testing::ValuesIn(variances),
::testing::ValuesIn(scale_all_sizes));
INSTANTIATE_TEST_SUITE_P(smoke_PriorBox_Basic, PriorBoxLayerTest,
::testing::Combine(
layerSpecificParams,
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(inputShape),
::testing::Values(imageShape),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
PriorBoxLayerTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,45 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/single_layer/space_to_depth.hpp"
#include <ngraph/opsets/opset3.hpp>
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
using namespace ngraph::opset3;
namespace {
TEST_P(SpaceToDepthLayerTest, Serialize) {
Serialize();
}
const std::vector<InferenceEngine::Precision> inputPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::U8,
InferenceEngine::Precision::I16,
};
const std::vector<SpaceToDepth::SpaceToDepthMode> modes = {
SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST,
SpaceToDepth::SpaceToDepthMode::DEPTH_FIRST};
const std::vector<std::vector<size_t>> inputShapesBS2 = {
{1, 1, 2, 2}, {1, 1, 4, 4}, {1, 1, 6, 6}, {2, 8, 6, 6},
{2, 4, 10, 8}, {1, 1, 2, 2, 2}, {1, 1, 4, 4, 4}, {1, 1, 6, 6, 6},
{2, 8, 6, 6, 6}, {2, 4, 10, 8, 12}};
const auto SpaceToDepthBS2 = ::testing::Combine(
::testing::ValuesIn(inputShapesBS2), ::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(modes), ::testing::Values(1, 2),
::testing::Values(CommonTestUtils::DEVICE_CPU));
INSTANTIATE_TEST_CASE_P(
smoke_SpaceToDepthSerialization, SpaceToDepthLayerTest,
::testing::Combine(::testing::ValuesIn(inputShapesBS2),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(modes), ::testing::Values(1, 2),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
SpaceToDepthLayerTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,160 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <string>
#include <memory>
#include <ngraph/function.hpp>
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset8.hpp>
#include <ngraph/pass/manager.hpp>
#include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
#include <transformations/init_node_info.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
using namespace testing;
using namespace ngraph;
TEST(TransformationTests, ConvertDeformableConv8to1) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
const Strides strides{1, 1};
const CoordinateDiff padding{0, 0};
const Strides dilations{1, 1};
const Shape input_shape{1, 1, 4, 4};
const Shape filter_shape{1, 1, 2, 2};
const Shape offsets_shape{1, 8, 3, 3};
auto data = std::make_shared<opset8::Parameter>(element::f32, input_shape);
auto filter = std::make_shared<opset8::Parameter>(element::f32, filter_shape);
auto offsets = std::make_shared<opset8::Parameter>(element::f32, offsets_shape);
auto deformable_conv = std::make_shared<opset8::DeformableConvolution>(data,
offsets,
filter,
strides,
padding,
padding,
dilations);
f = std::make_shared<Function>(NodeVector{deformable_conv}, ParameterVector{data, filter, offsets});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::ConvertDeformableConv8To1>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
{
const Strides strides{1, 1};
const CoordinateDiff padding{0, 0};
const Strides dilations{1, 1};
const Shape input_shape{1, 1, 4, 4};
const Shape filter_shape{1, 1, 2, 2};
const Shape offsets_shape{1, 8, 3, 3};
auto data = std::make_shared<opset1::Parameter>(element::f32, input_shape);
auto filter = std::make_shared<opset1::Parameter>(element::f32, filter_shape);
auto offsets = std::make_shared<opset1::Parameter>(element::f32, offsets_shape);
auto deformable_conv = std::make_shared<opset1::DeformableConvolution>(data,
offsets,
filter,
strides,
padding,
padding,
dilations);
f_ref = std::make_shared<Function>(NodeVector{deformable_conv}, ParameterVector{data, filter, offsets});
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, ConvertDeformableConv8to1_mask) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
const Strides strides{1, 1};
const CoordinateDiff padding{0, 0};
const Strides dilations{1, 1};
const Shape input_shape{1, 1, 4, 4};
const Shape filter_shape{1, 1, 2, 2};
const Shape offsets_shape{1, 8, 3, 3};
const Shape mask_shape{1, 4, 3, 3};
auto data = std::make_shared<opset8::Parameter>(element::f32, input_shape);
auto filter = std::make_shared<opset8::Parameter>(element::f32, filter_shape);
auto offsets = std::make_shared<opset8::Parameter>(element::f32, offsets_shape);
auto mask = std::make_shared<opset8::Parameter>(element::f32, mask_shape);
auto deformable_conv = std::make_shared<opset8::DeformableConvolution>(data,
offsets,
filter,
mask,
strides,
padding,
padding,
dilations);
f = std::make_shared<Function>(NodeVector{deformable_conv}, ParameterVector{data, filter,
mask, offsets});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::ConvertDeformableConv8To1>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
// mask input is provided, DeformableConvolution-8 must remain
ASSERT_EQ(count_ops_of_type<opset1::DeformableConvolution>(f), 0);
ASSERT_EQ(count_ops_of_type<opset8::DeformableConvolution>(f), 1);
}
TEST(TransformationTests, ConvertDeformableConv8to1_bilinear_interpolation_padding) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
const Strides strides{1, 1};
const CoordinateDiff padding{0, 0};
const Strides dilations{1, 1};
const Shape input_shape{1, 1, 4, 4};
const Shape filter_shape{1, 1, 2, 2};
const Shape offsets_shape{1, 8, 3, 3};
auto data = std::make_shared<opset8::Parameter>(element::f32, input_shape);
auto filter = std::make_shared<opset8::Parameter>(element::f32, filter_shape);
auto offsets = std::make_shared<opset8::Parameter>(element::f32, offsets_shape);
auto deformable_conv = std::make_shared<opset8::DeformableConvolution>(data,
offsets,
filter,
strides,
padding,
padding,
dilations,
op::PadType::EXPLICIT,
1,
1,
true);
f = std::make_shared<Function>(NodeVector{deformable_conv}, ParameterVector{data, filter, offsets});
pass::Manager manager;
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::ConvertDeformableConv8To1>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
}
// use_bilinear_interpolation_padding is true, DeformableConvolution-8 must remain
ASSERT_EQ(count_ops_of_type<opset1::DeformableConvolution>(f), 0);
ASSERT_EQ(count_ops_of_type<opset8::DeformableConvolution>(f), 1);
}

View File

@ -35,10 +35,12 @@ Output<Node> create_constant_with_zeros(const Shape & shape, const Mask & mask)
Coordinate coord_end(shape);
coord_end[dim] = dim_value + 1;
NGRAPH_SUPPRESS_DEPRECATED_START
CoordinateTransform iter(shape, coord_begin, coord_end);
for (const Coordinate & coord : iter) {
values[iter.index(coord)] = 0;
}
NGRAPH_SUPPRESS_DEPRECATED_END
}
}
return std::make_shared<opset5::Constant>(element::f32, shape, values);
@ -57,10 +59,12 @@ TEST(TransformationTests, InitMasksOutputChannel) {
Shape weights_shape{6, 3, 3, 3};
std::vector<double> values(shape_size(weights_shape), 1);
NGRAPH_SUPPRESS_DEPRECATED_START
CoordinateTransform iter(weights_shape, {0, 1, 0, 0}, {6, 2, 3, 3});
for (const Coordinate & coord : iter) {
values[iter.index(coord)] = 0;
}
NGRAPH_SUPPRESS_DEPRECATED_END
auto weights = std::make_shared<opset5::Constant>(element::f32, weights_shape, values);
pass::InitConstMask({1}).apply(weights);

View File

@ -0,0 +1,81 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <string>
#include <memory>
#include <queue>
#include <ngraph/function.hpp>
#include <ngraph/opsets/opset7.hpp>
#include <transformations/common_optimizations/simplify_shape_of_sub_graph.hpp>
#include <transformations/init_node_info.hpp>
#include <ngraph/pass/manager.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
using namespace testing;
using namespace ngraph;
auto gather = [](const std::shared_ptr<Node> input, std::vector<int64_t> indices, bool scalar = false) -> Output<Node> {
std::shared_ptr<Node> indices_node;
if (scalar)
indices_node = opset7::Constant::create(element::i64, {}, indices);
else
indices_node = opset7::Constant::create(element::i64, {indices.size()}, indices);
return std::make_shared<ngraph::opset7::Gather>(
input, indices_node, opset7::Constant::create(element::i64, {}, {0}));
};
TEST(TransformationTests, ShapeSubGraphTest) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
Shape data_shape{1, 2, 3, 4};
{
auto data = std::make_shared<opset7::Parameter>(element::f32, data_shape);
auto shape_op_1 = std::make_shared<opset7::ShapeOf>(data);
auto gather_1 = gather(shape_op_1, {1}, true);
auto unsqueeze_1 = std::make_shared<opset7::Unsqueeze>(
gather_1, opset7::Constant::create(element::i64, {1}, {0}));
auto shape_op_2 = std::make_shared<opset7::ShapeOf>(data);
auto gather_2 = gather(shape_op_2, {2}, true);
auto unsqueeze_2 = std::make_shared<opset7::Unsqueeze>(
gather_2, opset7::Constant::create(element::i64, {1}, {0}));
auto const_1 = opset7::Constant::create(element::i64, Shape{1}, {2});
auto const_2 = opset7::Constant::create(element::i64, Shape{1}, {2});
auto concat = std::make_shared<opset7::Concat>(OutputVector{unsqueeze_1, unsqueeze_2, const_1, const_2}, 0);
auto reshape = std::make_shared<opset7::Reshape>(data, concat, false);
f = std::make_shared<Function>(NodeVector{reshape}, ParameterVector{data});
pass::Manager m;
m.register_pass<pass::InitNodeInfo>();
m.register_pass<pass::SimplifyShapeOfSubGraph>();
m.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
ASSERT_EQ(reshape->get_output_partial_shape(0), PartialShape({2, 3, 2, 2}));
}
{
auto data = std::make_shared<opset7::Parameter>(element::f32, data_shape);
auto shape_op_1 = std::make_shared<opset7::ShapeOf>(data);
auto gather_1 = gather(shape_op_1, {1, 2});
auto const_1 = opset7::Constant::create(element::i64, Shape{1}, {2});
auto const_2 = opset7::Constant::create(element::i64, Shape{1}, {2});
auto concat = std::make_shared<opset7::Concat>(OutputVector{gather_1, const_1, const_2}, 0);
auto reshape = std::make_shared<opset7::Reshape>(data, concat, false);
f_ref = std::make_shared<Function>(NodeVector{reshape}, ParameterVector{data});
}
auto res = compare_functions(f, f_ref, true);
ASSERT_TRUE(res.first) << res.second;
}

View File

@ -65,9 +65,11 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
// List of operations that should be tested also with integer precision
const std::map<ActivationTypes, std::vector<std::vector<float>>> intActivationTypes = {
{Atan, {}},
{Negative, {}},
{Ceiling, {}},
{Cos, {}},
{Sinh, {}},
{Sqrt, {}},
{Tanh, {}},
};

View File

@ -89,4 +89,30 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(std::vector<size_t>({1, 4, 224, 224})),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
DeformableConvolutionLayerTest::getTestCaseName);
/* ============= Single Test Case ============= */
const std::vector<std::vector<size_t>> single_deform_vals = {{1, 54, 28, 28}};
const std::vector<std::vector<size_t>> single_kernel = {{1, 3, 3, 3}};
const std::vector<size_t> single_deform_groups = {3};
const auto deformableConv2DParams_SingleTestCase = ::testing::Combine(
::testing::ValuesIn(single_deform_vals),
::testing::ValuesIn(single_kernel), ::testing::ValuesIn(strides),
::testing::ValuesIn(padBegins), ::testing::ValuesIn(padEnds),
::testing::ValuesIn(dilations), ::testing::ValuesIn(groups),
::testing::ValuesIn(single_deform_groups), ::testing::ValuesIn(numOutChannels),
::testing::Values(ngraph::op::PadType::EXPLICIT));
INSTANTIATE_TEST_SUITE_P(
smoke_DeformableConvolution2D_SingleTestCase, DeformableConvolutionLayerTest,
::testing::Combine(
deformableConv2DParams_SingleTestCase, ::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t>({1, 3, 30, 30})),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
DeformableConvolutionLayerTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,81 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/prior_box.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestDefinitions;
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::I32,
InferenceEngine::Precision::U16};
const std::vector<std::vector<float>> min_sizes = {
{256.0f}};
const std::vector<std::vector<float>> max_sizes = {
{315.0f}};
const std::vector<std::vector<float>> aspect_ratios = {
{2.0f}};
const std::vector<std::vector<float>> densities = {
{1.0f}};
const std::vector<std::vector<float>> fixed_ratios = {
{}};
const std::vector<std::vector<float>> fixed_sizes = {
{}};
const std::vector<bool> clips = {
false, true};
const std::vector<bool> flips = {
false, true};
const std::vector<float> steps = {
1.0f,
};
const std::vector<float> offsets = {
0.0f,
};
const std::vector<std::vector<float>> variances = {
{}};
const std::vector<bool> scale_all_sizes = {
false, true};
const std::vector<size_t> inputShape = {300, 300};
const std::vector<size_t> imageShape = {32, 32};
const auto layerSpecificParams = ::testing::Combine(
::testing::ValuesIn(min_sizes),
::testing::ValuesIn(max_sizes),
::testing::ValuesIn(aspect_ratios),
::testing::ValuesIn(densities),
::testing::ValuesIn(fixed_ratios),
::testing::ValuesIn(fixed_sizes),
::testing::ValuesIn(clips),
::testing::ValuesIn(flips),
::testing::ValuesIn(steps),
::testing::ValuesIn(offsets),
::testing::ValuesIn(variances),
::testing::ValuesIn(scale_all_sizes));
INSTANTIATE_TEST_SUITE_P(smoke_PriorBox_Basic, PriorBoxLayerTest,
::testing::Combine(
layerSpecificParams,
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(inputShape),
::testing::Values(imageShape),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
PriorBoxLayerTest::getTestCaseName);

View File

@ -156,4 +156,4 @@ const auto basicCases5D = ::testing::Combine(
INSTANTIATE_TEST_SUITE_P(smoke_Activation5D_Eltwise_CPU_BF16, ActivationLayerCPUTest, basicCases5D, ActivationLayerCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions
} // namespace CPULayerTestsDefinitions

View File

@ -9,28 +9,12 @@
std::vector<std::string> disabledTestPatterns() {
return {
".*TensorNamesTest\\.CheckAddOutput.*",
// TODO: FIX BUG 31661
// TODO: support InferRequest in GNAPlugin
".*InferRequestTests\\.canRun3AsyncRequestsConsistentlyFromThreadsWithoutWait.*",
// TODO: FIX BUG 23740
".*InferRequestTests\\.CanCreateTwoExeNetworks.*",
// TODO: FIX BUG 26702
".*InferRequestTests\\.FailedAsyncInferWithNegativeTimeForWait.*",
// TODO: FIX BUG 23741
".*InferRequestTests\\.canRun3SyncRequestsConsistentlyFromThreads.*",
// TODO: FIX BUG 23742
".*InferRequestTests\\.canWaitWithotStartAsync.*",
// TODO: FIX BUG 23743
".*InferRequestTests\\.returnDeviceBusyOnSetBlobAfterAsyncInfer.*",
".*InferRequestTests\\.returnDeviceBusyOnGetBlobAfterAsyncInfer.*",
".*InferRequestTests\\.returnDeviceBusyOnGetPerformanceCountAfterAsyncInfer.*",
".*InferRequestTests\\.returnDeviceBusyOnStartInferAfterAsyncInfer.*",
".*InferRequestTests\\.returnDeviceBusyOnGetUserDataAfterAsyncInfer.*",
".*InferRequestTests\\.returnDeviceBusyOnSetUserDataAfterAsyncInfer.*",
// TODO: FIX BUG 31661
".*InferRequestTests\\.canStartSeveralAsyncInsideCompletionCallbackNoSafeDtorWithoutWait.*",
// TODO: FIX BUG 31661
// TODO: FIX BUG 59041
".*Behavior.*CallbackThrowException.*",
// TODO: FIX BUG 32210
R"(.*ActivationLayerTest.CompareWithRefs/(Sigmoid|Tanh|Exp|Log).*)",

View File

@ -154,6 +154,34 @@ INSTANTIATE_TEST_SUITE_P(
Gather7LayerTest::getTestCaseName
);
INSTANTIATE_TEST_SUITE_P(
smoke_Gather7Axes4i4b1,
Gather8LayerTest,
GatherAxes4i4b1,
Gather8LayerTest::getTestCaseName
);
INSTANTIATE_TEST_SUITE_P(
smoke_Gather7Axes4i4b2,
Gather8LayerTest,
GatherAxes4i4b1,
Gather8LayerTest::getTestCaseName
);
INSTANTIATE_TEST_SUITE_P(
smoke_Gather7Axes4i8b1,
Gather8LayerTest,
GatherAxes4i8b1,
Gather8LayerTest::getTestCaseName
);
INSTANTIATE_TEST_SUITE_P(
smoke_Gather7Axes4i8b2,
Gather8LayerTest,
GatherAxes4i8b2,
Gather8LayerTest::getTestCaseName
);
const std::vector<std::vector<int>> indices = {
std::vector<int>{0, 3, 2, 1},
};

View File

@ -74,7 +74,6 @@ protected:
const auto tensorWithTargetShapeParam = std::make_shared<ngraph::opset3::Parameter>(tensorType, targetShape);
const auto shapeOfNode = std::make_shared<ngraph::opset3::ShapeOf>(tensorWithTargetShapeParam, shapeType);
shapeOfNode->set_is_foldable(false);
ngraph::ParameterVector params{tensorParam, tensorWithTargetShapeParam};
@ -197,7 +196,6 @@ protected:
const auto tensorWithTargetShapeParam = std::make_shared<ngraph::opset5::Parameter>(shapeType, targetShape);
const auto shapeOfNode = std::make_shared<ngraph::opset5::ShapeOf>(tensorWithTargetShapeParam, shapeType);
shapeOfNode->set_is_foldable(false);
ngraph::ParameterVector params{tensorParam, tensorWithTargetShapeParam};

View File

@ -23,8 +23,6 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*IEClassGetAvailableDevices.*)",
// TODO: Issue: 40473
R"(.*TopKLayerTest.*mode=min.*sort=index.*)",
// TODO: Issue: 40961
R"(.*(ConstantResultSubgraphTest).*)",
// TODO: Issue: 42828
R"(.*DSR_NonMaxSuppression.*NBoxes=(5|20|200).*)",
// TODO: Issue: 42721

View File

@ -23,15 +23,7 @@ const std::vector<SizeVector> shapes = {
};
const std::vector<Precision> precisions = {
Precision::U8,
Precision::I8,
Precision::U16,
Precision::I16,
Precision::I32,
Precision::U64,
Precision::I64,
Precision::FP32,
Precision::BOOL
Precision::FP32
};
INSTANTIATE_TEST_SUITE_P(smoke_Check, ConstantResultSubgraphTest,

View File

@ -16,4 +16,8 @@ TEST_P(Gather7LayerTest, CompareWithRefs) {
Run();
};
TEST_P(Gather8LayerTest, CompareWithRefs) {
Run();
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,15 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "shared_test_classes/single_layer/prior_box.hpp"
namespace LayerTestDefinitions {
TEST_P(PriorBoxLayerTest, CompareWithRefs) {
Run();
}
} // namespace LayerTestDefinitions

View File

@ -63,4 +63,13 @@ protected:
void SetUp() override;
};
class Gather8LayerTest : public testing::WithParamInterface<gather7ParamsTuple>,
virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(const testing::TestParamInfo<gather7ParamsTuple>& obj);
protected:
void SetUp() override;
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,80 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include <tuple>
#include <string>
#include <map>
#include <memory>
#include <set>
#include <functional>
#include <gtest/gtest.h>
#include "ie_core.hpp"
#include "ie_precision.hpp"
#include "ngraph/opsets/opset1.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "common_test_utils/common_utils.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
namespace LayerTestDefinitions {
using priorBoxSpecificParams = std::tuple<
std::vector<float>, // min_size
std::vector<float>, // max_size
std::vector<float>, // aspect_ratio
std::vector<float>, // density
std::vector<float>, // fixed_ratio
std::vector<float>, // fixed_size
bool, // clip
bool, // flip
float, // step
float, // offset
std::vector<float>, // variance
bool>; // scale_all_sizes
typedef std::tuple<
priorBoxSpecificParams,
InferenceEngine::Precision, // net precision
InferenceEngine::Precision, // Input precision
InferenceEngine::Precision, // Output precision
InferenceEngine::Layout, // Input layout
InferenceEngine::Layout, // Output layout
InferenceEngine::SizeVector, // input shape
InferenceEngine::SizeVector, // image shape
std::string> priorBoxLayerParams;
class PriorBoxLayerTest
: public testing::WithParamInterface<priorBoxLayerParams>,
virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(const testing::TestParamInfo<priorBoxLayerParams>& obj);
protected:
InferenceEngine::SizeVector inputShapes;
InferenceEngine::SizeVector imageShapes;
InferenceEngine::Precision netPrecision;
std::vector<float> min_size;
std::vector<float> max_size;
std::vector<float> aspect_ratio;
std::vector<float> density;
std::vector<float> fixed_ratio;
std::vector<float> fixed_size;
std::vector<float> variance;
float step;
float offset;
bool clip;
bool flip;
bool scale_all_sizes;
void SetUp() override;
};
} // namespace LayerTestDefinitions

View File

@ -93,4 +93,47 @@ void Gather7LayerTest::SetUp() {
function = std::make_shared<ngraph::Function>(results, functionParams, "gather");
}
std::string Gather8LayerTest::getTestCaseName(const testing::TestParamInfo<gather7ParamsTuple>& obj) {
std::tuple<int, int> axis_batchIdx;
std::vector<int> indices;
std::vector<size_t> indicesShape, inputShape;
InferenceEngine::Precision netPrecision;
InferenceEngine::Precision inPrc, outPrc;
InferenceEngine::Layout inLayout, outLayout;
std::string targetName;
std::tie(inputShape, indicesShape, axis_batchIdx, netPrecision, inPrc, outPrc, inLayout, outLayout, targetName) = obj.param;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
result << "axis=" << std::get<0>(axis_batchIdx) << "_";
result << "batchIdx=" << std::get<1>(axis_batchIdx) << "_";
result << "indicesShape=" << CommonTestUtils::vec2str(indicesShape) << "_";
result << "netPRC=" << netPrecision.name() << "_";
result << "inPRC=" << inPrc.name() << "_";
result << "outPRC=" << outPrc.name() << "_";
result << "inL=" << inLayout << "_";
result << "outL=" << outLayout << "_";
result << "trgDev=" << targetName << "_";
return result.str();
}
void Gather8LayerTest::SetUp() {
std::tuple<int, int> axis_batchIdx;
std::vector<size_t> indicesShape;
std::vector<size_t> inputShape;
InferenceEngine::Precision netPrecision;
std::tie(inputShape, indicesShape, axis_batchIdx, netPrecision, inPrc, outPrc, inLayout, outLayout, targetDevice) = GetParam();
int axis = std::get<0>(axis_batchIdx);
int batchIdx = std::get<1>(axis_batchIdx);
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto functionParams = ngraph::builder::makeParams(ngPrc, { inputShape });
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(functionParams));
auto indicesNode = ngraph::builder::makeConstant<int>(ngraph::element::i64, indicesShape, {}, true,
inputShape[axis < 0 ? axis + inputShape.size() : axis] - 1,
1 - static_cast<int>(inputShape[axis < 0 ? axis + inputShape.size() : axis]));
auto axisNode = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape({}), { axis });
auto gather = std::make_shared<ngraph::opset8::Gather>(paramOuts[0], indicesNode, axisNode, batchIdx);
ngraph::ResultVector results{ std::make_shared<ngraph::opset8::Result>(gather) };
function = std::make_shared<ngraph::Function>(results, functionParams, "gather");
}
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,91 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/single_layer/prior_box.hpp"
namespace LayerTestDefinitions {
std::string PriorBoxLayerTest::getTestCaseName(const testing::TestParamInfo<priorBoxLayerParams>& obj) {
InferenceEngine::Precision netPrecision;
InferenceEngine::Precision inPrc, outPrc;
InferenceEngine::Layout inLayout, outLayout;
InferenceEngine::SizeVector inputShapes, imageShapes;
std::string targetDevice;
priorBoxSpecificParams specParams;
std::tie(specParams,
netPrecision,
inPrc, outPrc, inLayout, outLayout,
inputShapes,
imageShapes,
targetDevice) = obj.param;
std::vector<float> min_size, max_size, aspect_ratio, density, fixed_ratio, fixed_size, variance;
float step, offset;
bool clip, flip, scale_all_sizes;
std::tie(min_size, max_size, aspect_ratio,
density, fixed_ratio, fixed_size, clip,
flip, step, offset, variance, scale_all_sizes) = specParams;
std::ostringstream result;
const char separator = '_';
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << separator;
result << "imageS=" << CommonTestUtils::vec2str(imageShapes) << separator;
result << "netPRC=" << netPrecision.name() << separator;
result << "inPRC=" << inPrc.name() << separator;
result << "outPRC=" << outPrc.name() << separator;
result << "inL=" << inLayout << separator;
result << "outL=" << outLayout << separator;
result << "min_s=" << CommonTestUtils::vec2str(min_size) << separator;
result << "max_s=" << CommonTestUtils::vec2str(max_size)<< separator;
result << "asp_r=" << CommonTestUtils::vec2str(aspect_ratio)<< separator;
result << "dens=" << CommonTestUtils::vec2str(density)<< separator;
result << "fix_r=" << CommonTestUtils::vec2str(fixed_ratio)<< separator;
result << "fix_s=" << CommonTestUtils::vec2str(fixed_size)<< separator;
result << "var=" << CommonTestUtils::vec2str(variance)<< separator;
result << "step=" << step << separator;
result << "off=" << offset << separator;
result << "clip=" << clip << separator;
result << "flip=" << flip<< separator;
result << "scale_all=" << scale_all_sizes << separator;
result << "trgDev=" << targetDevice;
return result.str();
}
void PriorBoxLayerTest::SetUp() {
priorBoxSpecificParams specParams;
std::tie(specParams, netPrecision,
inPrc, outPrc, inLayout, outLayout,
inputShapes, imageShapes, targetDevice) = GetParam();
std::tie(min_size, max_size, aspect_ratio,
density, fixed_ratio, fixed_size, clip,
flip, step, offset, variance, scale_all_sizes) = specParams;
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShapes, imageShapes});
ngraph::op::PriorBoxAttrs attributes;
attributes.min_size = min_size;
attributes.max_size = max_size;
attributes.aspect_ratio = aspect_ratio;
attributes.density = density;
attributes.fixed_ratio = fixed_ratio;
attributes.fixed_size = fixed_size;
attributes.variance = variance;
attributes.step = step;
attributes.offset = offset;
attributes.clip = clip;
attributes.flip = flip;
auto shape_of_1 = std::make_shared<ngraph::opset3::ShapeOf>(params[0]);
auto shape_of_2 = std::make_shared<ngraph::opset3::ShapeOf>(params[1]);
auto priorBox = std::make_shared<ngraph::op::PriorBox>(
shape_of_1,
shape_of_2,
attributes);
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(priorBox)};
function = std::make_shared <ngraph::Function>(results, params, "PriorBoxFunction");
}
} // namespace LayerTestDefinitions

View File

@ -60,9 +60,10 @@ VERIFIED_OP_REFERENCES = [
'NonMaxSuppression-4',
'NonMaxSuppression-5',
'NonZero-3',
'PSROIPooling-1',
'PriorBox-1',
'Proposal-1',
'Proposal-4',
'PSROIPooling-1',
'RNNSequence-5',
'ROIAlign-3',
'ROIPooling-2',
@ -83,11 +84,13 @@ VERIFIED_OP_REFERENCES = [
'ReorgYOLO-2',
'Result-1'
'Round-5',
'SpaceToDepth-1',
'ScatterNDUpdate-4',
'ShapeOf-1',
'ShapeOf-3',
'Sigmoid-1',
'Sin-1',
'Sinh-1'
'SoftPlus-4',
'Softmax-1',
'Split-1',

View File

@ -14,6 +14,7 @@
#include <ngraph/opsets/opset5.hpp>
#include <ngraph/opsets/opset6.hpp>
#include <ngraph/opsets/opset7.hpp>
#include <ngraph/opsets/opset8.hpp>
#include "ngraph_functions/utils/data_utils.hpp"

View File

@ -0,0 +1,89 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include "transformations/remove_extra_reshapes.hpp"
#include "common_test_utils/ngraph_test_utils.hpp"
#include <ngraph/function.hpp>
#include <ngraph/opsets/opset7.hpp>
#include <ngraph/pass/manager.hpp>
#include <transformations/init_node_info.hpp>
namespace testing {
TEST(TransformationTests, RemoveExtraReshapesTestReshapeNotEqualInputOutput) {
std::shared_ptr<ngraph::Function> func(nullptr), reference_func(nullptr);
const ngraph::Shape data_shape{1, 3, 64, 64};
{
auto input_params = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, data_shape);
auto new_shape = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{3}, {1, 3, 64 * 64});
auto reshape_operation = std::make_shared<ngraph::opset7::Reshape>(input_params, new_shape, true);
auto max_pool_operation = std::make_shared<ngraph::opset7::MaxPool>(reshape_operation,
ngraph::Strides{1},
ngraph::Shape{0},
ngraph::Shape{0},
ngraph::Shape{3});
auto result = std::make_shared<ngraph::opset7::Result>(max_pool_operation);
func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
ngraph::ParameterVector{input_params});
reference_func = ngraph::clone_function(*func);
ngraph::pass::Manager m;
m.register_pass<ngraph::pass::InitNodeInfo>();
m.register_pass<GNAPluginNS::RemoveExtraReshapes>();
m.run_passes(func);
ASSERT_NO_THROW(check_rt_info(func));
}
const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES);
const FunctionsComparator::Result result = func_comparator(func, reference_func);
ASSERT_TRUE(result.valid);
}
TEST(TransformationTests, RemoveExtraReshapesTestReshapeEqualInputOutput) {
std::shared_ptr<ngraph::Function> func(nullptr), reference_func(nullptr);
const ngraph::Shape data_shape{1, 3, 64, 64};
{
auto input_params = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, data_shape);
auto new_shape = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1, 3, 64, 64});
auto reshape_operation = std::make_shared<ngraph::opset7::Reshape>(input_params, new_shape, true);
auto max_pool_operation = std::make_shared<ngraph::opset7::MaxPool>(reshape_operation,
ngraph::Strides{1, 1},
ngraph::Shape{0, 0},
ngraph::Shape{0, 0},
ngraph::Shape{3, 3});
auto result = std::make_shared<ngraph::opset7::Result>(max_pool_operation);
func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
ngraph::ParameterVector{input_params});
ngraph::pass::Manager m;
m.register_pass<ngraph::pass::InitNodeInfo>();
m.register_pass<GNAPluginNS::RemoveExtraReshapes>();
m.run_passes(func);
ASSERT_NO_THROW(check_rt_info(func));
}
{
auto input_params = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, data_shape);
auto max_pool_operation = std::make_shared<ngraph::opset7::MaxPool>(input_params,
ngraph::Strides{1, 1},
ngraph::Shape{0, 0},
ngraph::Shape{1, 1},
ngraph::Shape{4, 4});
auto result = std::make_shared<ngraph::opset7::Result>(max_pool_operation);
reference_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
ngraph::ParameterVector{input_params});
}
const FunctionsComparator func_comparator = FunctionsComparator::with_default();
const FunctionsComparator::Result result = func_comparator(func, reference_func);
ASSERT_TRUE(result.valid);
}
} // namespace testing

View File

@ -35,6 +35,7 @@ struct gather : public primitive_base<gather> {
/// @param axis Gathering axis.
/// @param output_shape Output shape.
/// @param batch_dim Batch_dim
/// @param support_neg_ind Support negative indexes
gather(const primitive_id& id,
const primitive_id& dict,
const primitive_id& idx,
@ -42,8 +43,11 @@ struct gather : public primitive_base<gather> {
const format& output_format,
const tensor& output_shape,
const int64_t batch_dim = 0,
const padding& output_padding = padding())
: primitive_base(id, {dict, idx}, output_padding), axis(axis), output_format(output_format), output_shape(output_shape), batch_dim(batch_dim) {}
const bool support_neg_ind = false,
const padding& output_padding = padding()
)
: primitive_base(id, {dict, idx}, output_padding), axis(axis), output_format(output_format),
output_shape(output_shape), batch_dim(batch_dim), support_neg_ind(support_neg_ind) {}
/// @brief Gathering axis
gather_axis axis;
@ -53,6 +57,8 @@ struct gather : public primitive_base<gather> {
tensor output_shape;
/// @brief Gathering batch_dim
int64_t batch_dim;
/// @brief Support negative indexes
bool support_neg_ind;
};
/// @}
/// @}

View File

@ -79,6 +79,10 @@ static int64_t GetGatherBatchDim(const gather_params& params) {
return params.batch_dim;
}
static inline std::string GetGatherMaxIndexDim(const gather_params& params) {
return std::to_string(params.inputs[0].GetDims().at(params.inputs[0].GetDims().size() - GetGatherChannelIndex(params) - 1).v);
}
static inline std::string GetOrderString(std::vector<std::string>& order) {
std::string order_str = order[0];
for (size_t i = 1; i < order.size(); i++)
@ -168,6 +172,8 @@ JitConstants GatherKernelRef::GetJitConstants(const gather_params& params) const
jit.AddConstant(MakeJitConstant("DICTIONARY_INDEX_ORDER", GetDictionaryIndexOrder(params, GetGatherChannelIndex(params))));
jit.AddConstant(MakeJitConstant("INDICES_INDEX_ORDER", GetIndecesIdxOrder(params, GetGatherChannelIndex(params), GetGatherBatchDim(params))));
if (params.support_neg_ind)
jit.AddConstant(MakeJitConstant("INDEX_DIM", GetGatherMaxIndexDim(params)));
if (!params.fused_ops.empty()) {
std::vector<std::string> idx_order = GetOrder(params.inputs[0].GetDims().size());

View File

@ -11,10 +11,11 @@ namespace kernel_selector {
// gather_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct gather_params : public base_params {
gather_params() : base_params(KernelType::GATHER), axis(GatherAxis::BATCH), batch_dim(0) {}
gather_params() : base_params(KernelType::GATHER), axis(GatherAxis::BATCH), batch_dim(0), support_neg_ind(false) {}
GatherAxis axis;
int64_t batch_dim;
bool support_neg_ind;
virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); }
};

View File

@ -5,7 +5,19 @@
#include "include/data_types.cl"
#include "include/fetch_data.cl"
#define INPUT_AXIS_INDEX (uint)indices[indices_idx]
#ifdef INDEX_DIM
inline uint FUNC(get_positive_index)(int in)
{
if(in < 0)
return in + INDEX_DIM;
else
return in;
}
#define INPUT_AXIS_INDEX (uint)FUNC_CALL(get_positive_index)(indices[indices_idx])
#else
#define INPUT_AXIS_INDEX (uint)(indices[indices_idx])
#endif
#define GET_DICTIONARY_INDEX(idx_order) INPUT0_GET_INDEX(idx_order)
#define GET_INDICES_INDEX(idx_order) INPUT1_GET_INDEX(idx_order)
#define GET_INDEX(prefix, num, idx_order) CAT(CAT(prefix, num), _GET_INDEX)(idx_order)

View File

@ -16,23 +16,29 @@ const char *debug_configuration::prefix = "GPU_Debug: ";
static void print_option(std::string option_name, std::string option_value) {
GPU_DEBUG_COUT << "Config " << option_name << " = " << option_value << std::endl;
}
static void get_int_env(const std::string &var, int &val) {
if (const auto env_var = std::getenv(var.c_str())) {
val = std::stoi(env_var);
print_option(var, std::to_string(val));
}
}
static void get_str_env(const std::string &var, std::string &val) {
if (const auto env_var = std::getenv(var.c_str())) {
val = env_var;
print_option(var, val);
}
}
#endif
debug_configuration::debug_configuration()
: verbose(0)
, dump_graphs(std::string()) {
#ifdef GPU_DEBUG_CONFIG
const std::string OV_GPU_VERBOSE("OV_GPU_Verbose");
const std::string OV_GPU_DUMP_GRAPHS("OV_GPU_DumpGraphs");
if (const auto env_var = std::getenv(OV_GPU_VERBOSE.c_str())) {
verbose = std::stoi(env_var);
print_option(OV_GPU_VERBOSE, std::to_string(verbose));
}
if (const auto env_var = std::getenv(OV_GPU_DUMP_GRAPHS.c_str())) {
dump_graphs = env_var;
print_option(OV_GPU_DUMP_GRAPHS, dump_graphs);
}
get_int_env("OV_GPU_Verbose", verbose);
get_str_env("OV_GPU_DumpGraphs", dump_graphs);
#endif
}

View File

@ -5,6 +5,7 @@
#include "kernels_factory.hpp"
#include "kernels_cache.hpp"
#include "ocl/ocl_engine.hpp"
#include "cldnn/runtime/debug_configuration.hpp"
#include <algorithm>
#include <cassert>
@ -372,6 +373,10 @@ void kernels_cache::build_batch(const engine& build_engine, const batch_program&
dump_file << "*/\n";
}
if (!err_log.empty()) {
GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->verbose) {
std::cout << err_log << std::endl;
}
throw std::runtime_error("Program build failed. You may enable OCL source dump to see the error log.\n");
}
}

View File

@ -49,6 +49,7 @@ public:
gather_params.axis = convert_axis(arg.get_primitive()->axis);
gather_params.batch_dim = size_t(arg.get_primitive()->batch_dim);
gather_params.support_neg_ind = arg.get_primitive()->support_neg_ind;
gather_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));

View File

@ -12,6 +12,136 @@
using namespace cldnn;
using namespace ::tests;
TEST(gather8_gpu_fp16, d323_axisY_bdim_m1) {
// Dictionary : 3x2x3x4x2
// Indexes : 3x2x3x1
// Axis : 3
// batch_dim : -1
// Output : 3x2x3x3x2
// Input values in fp16
// Indexes:
// 0.f, 0.f, 0.f, 3.f, -3.f, 0.f, 1.f, -3.f, 1.f, -2.f, 0.f, 3.f, -1.f, 1.f, 0.f, 2.f, 0.f, 1.f
//
// Dictionary:
// 1.f 2.f 3.f 4.f 5.f 6.f 7.f 8.f 9.f 10.f 11.f 12.f 13.f 14.f 15.f 16.f 17.f 18.f
// 19.f 20.f 21.f 22.f 23.f 24.f 25.f 26.f 27.f 28.f 29.f 30.f 31.f 32.f 33.f 34.f 35.f 36.f
// 37.f 38.f 39.f 40.f 41.f 42.f 43.f 44.f 45.f 46.f 47.f 48.f 49.f 50.f 51.f 52.f 53.f 54.f
// 55.f 56.f 57.f 58.f 59.f 60.f 61.f 62.f 63.f 64.f 65.f 66.f 67.f 68.f 69.f 70.f 71.f 72.f
// 73.f 74.f 75.f 76.f 77.f 78.f 79.f 80.f 81.f 82.f 83.f 84.f 85.f 86.f 87.f 88.f 89.f 90.f
// 91.f 92.f 93.f 94.f 95.f 96.f 97.f 98.f 99.f 100.f 101.f 102.f 103.f 104.f 105.f 106.f 107.f 108.f
// 109.f 110.f 111.f 112.f 113.f 114.f 115.f 116.f 117.f 118.f 119.f 120.f 121.f 122.f 123.f 124.f 125.f 126.f
// 127.f 128.f 129.f 130.f 131.f 132.f 133.f 134.f 135.f 136.f 137.f 138.f 139.f 140.f 141.f 142.f 143.f 144.f
//
// Output:
// 1.f 2.f 1.f 2.f 1.f 2.f 9.f 10.f 9.f 10.f 9.f 10.f
// 17.f 18.f 17.f 18.f 17.f 18.f 31.f 32.f 27.f 28.f 25.f 26.f
// 39.f 40.f 35.f 6.f 33.f 34.f 47.f 48.f 43.f 44.f 41.f 42.f
// 51.f 52.f 51.f 52.f 51.f 52.f 59.f 60.f 59.f 60.f 59.f 60.f
// 67.f 68.f 67.f 68.f 67.f 68.f 77.f 78.f 73.f 74.f 79.f 80.f
// 85.f 86.f 81.f 82.f 87.f 88.f 93.f 94.f 89.f 90.f 95.f 96.f
// 103.f 104.f 99.f 100.f 97.f 98.f 111.f 112.f 107.f 108.f 105.f 106.f
// 119.f 120.f 115.f 116.f 113.f 114.f 125.f 126.f 121.f 122.f 123.f 124.f
// 133.f 134.f 129.f 130.f 131.f 132.f 141.f 142.f 137.f 138.f 139.f 140.f
auto& engine = get_test_engine();
auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 3, 2, 2, 4, 3} }); // Dictionary
auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 3 } }); // Indexes
auto axis = cldnn::gather::gather_axis::along_y;
int64_t batch_dim = -1;
bool negative_indexes = true;
set_values(input1, {
FLOAT16(1.f), FLOAT16(2.f), FLOAT16(3.f), FLOAT16(4.f), FLOAT16(5.f), FLOAT16(6.f), FLOAT16(7.f), FLOAT16(8.f),
FLOAT16(9.f), FLOAT16(10.f), FLOAT16(11.f), FLOAT16(12.f), FLOAT16(13.f), FLOAT16(14.f), FLOAT16(15.f), FLOAT16(16.f),
FLOAT16(17.f), FLOAT16(18.f), FLOAT16(19.f), FLOAT16(20.f), FLOAT16(21.f), FLOAT16(22.f), FLOAT16(23.f), FLOAT16(24.f),
FLOAT16(25.f), FLOAT16(26.f), FLOAT16(27.f), FLOAT16(28.f), FLOAT16(29.f), FLOAT16(30.f), FLOAT16(31.f), FLOAT16(32.f),
FLOAT16(33.f), FLOAT16(34.f), FLOAT16(35.f), FLOAT16(36.f), FLOAT16(37.f), FLOAT16(38.f), FLOAT16(39.f), FLOAT16(40.f),
FLOAT16(41.f), FLOAT16(42.f), FLOAT16(43.f), FLOAT16(44.f), FLOAT16(45.f), FLOAT16(46.f), FLOAT16(47.f), FLOAT16(48.f),
FLOAT16(49.f), FLOAT16(50.f), FLOAT16(51.f), FLOAT16(52.f), FLOAT16(53.f), FLOAT16(54.f), FLOAT16(55.f), FLOAT16(56.f),
FLOAT16(57.f), FLOAT16(58.f), FLOAT16(59.f), FLOAT16(60.f), FLOAT16(61.f), FLOAT16(62.f), FLOAT16(63.f), FLOAT16(64.f),
FLOAT16(65.f), FLOAT16(66.f), FLOAT16(67.f), FLOAT16(68.f), FLOAT16(69.f), FLOAT16(70.f), FLOAT16(71.f), FLOAT16(72.f),
FLOAT16(73.f), FLOAT16(74.f), FLOAT16(75.f), FLOAT16(76.f), FLOAT16(77.f), FLOAT16(78.f), FLOAT16(79.f), FLOAT16(80.f),
FLOAT16(81.f), FLOAT16(82.f), FLOAT16(83.f), FLOAT16(84.f), FLOAT16(85.f), FLOAT16(86.f), FLOAT16(87.f), FLOAT16(88.f),
FLOAT16(89.f), FLOAT16(90.f), FLOAT16(91.f), FLOAT16(92.f), FLOAT16(93.f), FLOAT16(94.f), FLOAT16(95.f), FLOAT16(96.f),
FLOAT16(97.f), FLOAT16(98.f), FLOAT16(99.f), FLOAT16(100.f), FLOAT16(101.f), FLOAT16(102.f), FLOAT16(103.f), FLOAT16(104.f),
FLOAT16(105.f), FLOAT16(106.f), FLOAT16(107.f), FLOAT16(108.f), FLOAT16(109.f), FLOAT16(110.f), FLOAT16(111.f), FLOAT16(112.f),
FLOAT16(113.f), FLOAT16(114.f), FLOAT16(115.f), FLOAT16(116.f), FLOAT16(117.f), FLOAT16(118.f), FLOAT16(119.f), FLOAT16(120.f),
FLOAT16(121.f), FLOAT16(122.f), FLOAT16(123.f), FLOAT16(124.f), FLOAT16(125.f), FLOAT16(126.f), FLOAT16(127.f), FLOAT16(128.f),
FLOAT16(129.f), FLOAT16(130.f), FLOAT16(131.f), FLOAT16(132.f), FLOAT16(133.f), FLOAT16(134.f), FLOAT16(135.f), FLOAT16(136.f),
FLOAT16(137.f), FLOAT16(138.f), FLOAT16(139.f), FLOAT16(140.f), FLOAT16(141.f), FLOAT16(142.f), FLOAT16(143.f), FLOAT16(144.f)
});
set_values(input2, {
0.f, 0.f, 0.f,
3.f, -3.f, 0.f,
1.f, -3.f, 1.f,
-2.f, 0.f, 3.f,
-1.f, 1.f, 0.f,
2.f, 0.f, 1.f
});
topology topology;
topology.add(input_layout("InputDictionary", input1->get_layout()));
topology.add(input_layout("InputText", input2->get_layout()));
topology.add(
gather("gather", "InputDictionary", "InputText", axis, format::bfzyx, tensor(3, 2, 2, 3, 3), batch_dim, negative_indexes)
);
network network(engine, topology);
network.set_input_data("InputDictionary", input1);
network.set_input_data("InputText", input2);
auto outputs = network.execute();
auto output = outputs.at("gather").get_memory();
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
std::vector<float> expected_results = {
1.f, 2.f, 1.f, 2.f, 1.f, 2.f,
9.f, 10.f, 9.f, 10.f, 9.f, 10.f,
17.f, 18.f, 17.f, 18.f, 17.f, 18.f,
31.f, 32.f, 27.f, 28.f, 25.f, 26.f,
39.f, 40.f, 35.f, 36.f, 33.f, 34.f,
47.f, 48.f, 43.f, 44.f, 41.f, 42.f,
51.f, 52.f, 51.f, 52.f, 51.f, 52.f,
59.f, 60.f, 59.f, 60.f, 59.f, 60.f,
67.f, 68.f, 67.f, 68.f, 67.f, 68.f,
77.f, 78.f, 73.f, 74.f, 79.f, 80.f,
85.f, 86.f, 81.f, 82.f, 87.f, 88.f,
93.f, 94.f, 89.f, 90.f, 95.f, 96.f,
103.f, 104.f, 99.f, 100.f, 97.f, 98.f,
111.f, 112.f, 107.f, 108.f, 105.f, 106.f,
119.f, 120.f, 115.f, 116.f, 113.f, 114.f,
125.f, 126.f, 121.f, 122.f, 123.f, 124.f,
133.f, 134.f, 129.f, 130.f, 131.f, 132.f,
141.f, 142.f, 137.f, 138.f, 139.f, 140.f
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
}
}
TEST(gather7_gpu_fp16, d222_axisX_bdim_m1) {
// Dictionary : 2x2x2x2x2x2
// Indexes : 2x2x2x1

View File

@ -1,10 +1,8 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
if (NOT NGRAPH_PYTHON_BUILD_ENABLE)
message(WARNING "Please enable nGraph Python API (_pyngraph) target to enable Model Optimizer target")
elseif(NOT ENABLE_PYTHON)
message(WARNING "Please enable IE Python API (ie_api and offline_transformations_api) targets to enable Model Optimizer target")
if(NOT ENABLE_PYTHON)
message(WARNING "Please enable IE & nGraph Python API (ie_api and offline_transformations_api) targets to enable Model Optimizer target")
else()
add_custom_target(model_optimizer DEPENDS ie_api offline_transformations_api inference_engine_ir_reader)
if(ENABLE_TESTS)

View File

@ -154,7 +154,7 @@ class ConvertGroupedStridedSlice(MiddleReplacementPattern):
size_splits.append(l - prev_r)
shape[split_channel_dim] = l - prev_r
data_node = Op._create_data_node(graph, 'fake_data_'+out_nodes[0].name, {'shape': shape})
add_opoutput(graph, data_node.id, 0, False)
add_opoutput(graph, data_node.id, 0, False, keep_output_port=True)
final_data_nodes_list.append(data_node)
prev_r = r
@ -167,7 +167,7 @@ class ConvertGroupedStridedSlice(MiddleReplacementPattern):
shape[split_channel_dim] = input_shape[split_channel_dim] - prev_r
size_splits.append(input_shape[split_channel_dim] - prev_r)
data_node = Op._create_data_node(graph, 'fake_data_'+out_nodes[0].name, {'shape': shape})
add_opoutput(graph, data_node.id, 0, False)
add_opoutput(graph, data_node.id, 0, False, keep_output_port=True)
final_data_nodes_list.append(data_node)
for node in out_nodes:

View File

@ -1032,21 +1032,24 @@ def dict_includes(big: dict, sub_dict: dict, skip_attr_names=[]):
)
def add_opoutput(graph: Graph, node_name: str, port: int, cut: bool = True):
def add_opoutput(graph: Graph, node_name: str, port: int, cut: bool = True, keep_output_port: bool = False):
"""
Creates and connects Result node to node_name port. Cuts existing port if requested.
:param graph: graph to operate with
:param node_name: name of existing node in the graph that we want to add Result to
:param port: output port of node to connect Result to
:param cut: determines way of operating with edge specified by node_name and port
:param keep_output_port: special attribute determines if this operation is saved in IR or not
"""
# we import it here because Op imports add_attrs_props and update_ie_fields from this file
from mo.ops.result import Result
node = Node(graph, node_name)
if cut and len(node.out_edges()) != 0:
opoutput_node = Result(graph).create_node_on_port(node, port, {'name': node_name + '/sink_port_' + str(port)})
opoutput_node = Result(graph).create_node_on_port(node, port, {'name': node_name + '/sink_port_' + str(port),
'keep_output_port': keep_output_port})
else:
opoutput_node = Result(graph).create_node([(node, port)], {'name': node_name + '/sink_port_' + str(port)})
opoutput_node = Result(graph).create_node([(node, port)], {'name': node_name + '/sink_port_' + str(port),
'keep_output_port': keep_output_port})
opoutput_node.in_edge()['data_attrs'] = ['fw_tensor_debug_info']
log.debug('Sink: {} for node {}'.format(opoutput_node.id, node_name))

Some files were not shown because too many files have changed in this diff Show More