Merge remote-tracking branch 'upstream/master'

2021-07-08 08:15:46 +09:00 · 2021-07-08 08:15:46 +09:00 · 54182c03b2
commit 54182c03b2
parent 7654df0d93 35d9bd0f63
220 changed files with 8946 additions and 1710 deletions
--- a/.ci/azure/linux_ngraph_onnx.yml
+++ b/.ci/azure/linux_ngraph_onnx.yml
@ -1,5 +1,20 @@
 jobs:
- job: nGraph_ONNX_Lin
+- job: OpenVINO_ONNX_CI
+  strategy:
+    matrix:
+      Release:
+        BUILD_TYPE: 'Release'
+        PROTOBUF_LITE: 'OFF'
+        TOX_COMMAND: 'tox && tox -e zoo_models'
+      Debug:
+        BUILD_TYPE: 'Debug'
+        PROTOBUF_LITE: 'OFF'
+        TOX_COMMAND: 'tox'
+      Protobuf_lite:
+        BUILD_TYPE: 'Release'
+        PROTOBUF_LITE: 'ON'
+        TOX_COMMAND: 'tox && tox -e zoo_models'
+    maxParallel: 3

  # About 300% of total time
  timeoutInMinutes: 90
@ -12,7 +27,6 @@ jobs:
    VSTS_HTTP_RETRY: 5
    VSTS_HTTP_TIMEOUT: 200
    WORKERS_NUMBER: 8
-    BUILD_TYPE: Release
    REPO_DIR: $(Build.Repository.LocalPath)
    WORK_DIR: $(Pipeline.Workspace)/_w
    MODELS_DIR: /mount/cinfsshare/onnxtestdata
@ -54,31 +68,16 @@ jobs:
    submodules: recursive
    path: openvino

-  - script: docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile .
-    displayName: 'Docker build'
+  - script: docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg PROTOBUF_LITE=$(PROTOBUF_LITE) .
+    displayName: 'Docker build $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)'

  - script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(TMP_DIR) -o -s "$(ONNX_MODEL_ZOO_SHA)"
    displayName: 'Get models'
-
-  - script: |
-      ##wget -O "$(TMP_DIR)/msft.zip" https://onnxruntimetestdata.blob.core.windows.net/models/20191107.zip
-      ##unzip "$(TMP_DIR)/msft.zip" -d "$(MODELS_DIR)/msft"
-      #unzip "/mnt/onnxtestdata/models/20191107.zip" -d "$(MODELS_DIR)/msft"
-      #mv $(MODELS_DIR)/msft/opset9/LSTM_Seq_lens_unpacked/seq_lens_sorted $(MODELS_DIR)/msft/opset9/LSTM_Seq_lens_unpacked/test_data_set_0
-      #mv $(MODELS_DIR)/msft/opset9/LSTM_Seq_lens_unpacked/seq_lens_unsorted $(MODELS_DIR)/msft/opset9/LSTM_Seq_lens_unpacked/test_data_set_1
-    displayName: 'Get MSFT models'
-    enabled: false
-
-  - script: |
-      ls -alR $(MODELS_DIR)
-      ls -alR $(TMP_DIR)
-    displayName: 'List models'
-    enabled: false
+    condition: ne(variables['BUILD_TYPE'], 'Debug')

  - script: sudo fallocate -l 48G /swapfile ; sudo mkswap /swapfile ; sudo swapon /swapfile ; df ; free -h
    displayName: 'Create swap'

  - script: |
-      docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "tox && tox -e zoo_models"
-    displayName: 'Docker run'
-
+      docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "$(TOX_COMMAND)"
+    displayName: 'Docker run $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)'
--- a/.gitmodules
+++ b/.gitmodules
@ -38,3 +38,15 @@
 	path = thirdparty/ocl/clhpp_headers
 	url = https://github.com/KhronosGroup/OpenCL-CLHPP.git
 	ignore = dirty
+[submodule "thirdparty/onnx"]
+	path = thirdparty/onnx/onnx
+	url = https://github.com/openvinotoolkit/onnx.git
+[submodule "thirdparty/protobuf"]
+	path = thirdparty/protobuf/protobuf
+	url = https://github.com/protocolbuffers/protobuf.git
+[submodule "ngraph/python/pybind11"]
+	path = ngraph/python/pybind11
+	url = https://github.com/pybind/pybind11.git
+[submodule "thirdparty/ittapi/ittapi"]
+	path = thirdparty/ittapi/ittapi
+	url = https://github.com/intel/ittapi.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -63,12 +63,6 @@ function(build_ngraph)
        ngraph_set(NGRAPH_PDPD_FRONTEND_ENABLE OFF)
    endif()

-    if(ENABLE_PYTHON)
-        ngraph_set(NGRAPH_PYTHON_BUILD_ENABLE ON)
-    else()
-        ngraph_set(NGRAPH_PYTHON_BUILD_ENABLE OFF)
-    endif()
-
    if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
        ie_add_compiler_flags(-Wno-error=uninitialized -Wno-error=literal-conversion)
    elseif(UNIX)
--- a/cmake/dependencies.cmake
+++ b/cmake/dependencies.cmake
@ -32,12 +32,12 @@ if(COMMAND get_linux_name)
 endif()

 if(CMAKE_CROSSCOMPILING AND CMAKE_HOST_SYSTEM_NAME MATCHES Linux AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
-    set(protoc_version "3.7.1")
+    set(protoc_version "3.9.2")

    RESOLVE_DEPENDENCY(SYSTEM_PROTOC_ROOT
        ARCHIVE_LIN "protoc-${protoc_version}-linux-x86_64.tar.gz"
        TARGET_PATH "${TEMP}/protoc-${protoc_version}-linux-x86_64"
-        SHA256 "a1bedd5c05ca51e49f8f254faa3d7331e05b3a806c151fb111d582f154d0fee8"
+        SHA256 "1d6da1d97d0cbfcd333558afe24533eb3cb48dc1e0ab5e971aa1e50ede8bcf45"
    )
    debug_message(STATUS "host protoc-${protoc_version} root path = " ${SYSTEM_PROTOC_ROOT})

--- a/cmake/developer_package/IEDevScriptsConfig.cmake
+++ b/cmake/developer_package/IEDevScriptsConfig.cmake
@ -249,6 +249,25 @@ function(ie_mark_target_as_cc TARGET_NAME)
    set_source_files_properties(${sources} PROPERTIES OBJECT_DEPENDS ${GENERATED_HEADER})
 endfunction()

+# check python package
+
+function(ie_check_pip_package name message_type)
+    find_package(PythonInterp 3 REQUIRED)
+
+    execute_process(
+        COMMAND ${PYTHON_EXECUTABLE} -m pip show ${name}
+        RESULT_VARIABLE PIP_EXIT_CODE
+        OUTPUT_QUIET
+    )
+
+    if(NOT PIP_EXIT_CODE EQUAL 0)
+        set(${name}_FOUND OFF PARENT_SCOPE)
+        message(${message_type} "${name} package is not installed. Please use \"${PYTHON_EXECUTABLE} -m pip install ${name}\".")
+    else()
+        set(${name}_FOUND ON PARENT_SCOPE)
+    endif()
+endfunction()
+
 # Code style utils

 include(cpplint/cpplint)
--- a/cmake/developer_package/api_validator/api_validator.cmake
+++ b/cmake/developer_package/api_validator/api_validator.cmake
@ -88,9 +88,12 @@ function(_ie_add_api_validator_post_build_step)

    macro(api_validator_get_target_name)
        get_target_property(IS_IMPORTED ${target} IMPORTED)
+        get_target_property(orig_target ${target} ALIASED_TARGET)
        if(IS_IMPORTED)
            get_target_property(target_location ${target} LOCATION)  
            get_filename_component(target_name "${target_location}" NAME_WE)
+        elseif(TARGET "${orig_target}")
+            set(target_name ${orig_target})
        else()
            set(target_name ${target})
        endif()
--- a/cmake/developer_package/compile_flags/sanitizer.cmake
+++ b/cmake/developer_package/compile_flags/sanitizer.cmake
@ -5,13 +5,36 @@
 include(CheckCXXCompilerFlag)

 if (ENABLE_SANITIZER)
-    set(SANITIZER_COMPILER_FLAGS "-g -fsanitize=address -fno-omit-frame-pointer")
-    CHECK_CXX_COMPILER_FLAG("-fsanitize-recover=address" SANITIZE_RECOVER_SUPPORTED)
-    if (SANITIZE_RECOVER_SUPPORTED)
+    set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=address")
+    CHECK_CXX_COMPILER_FLAG("-fsanitize-recover=address" SANITIZE_RECOVER_ADDRESS_SUPPORTED)
+    if (SANITIZE_RECOVER_ADDRESS_SUPPORTED)
        set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=address")
    endif()

-    set(SANITIZER_LINKER_FLAGS "-fsanitize=address")
+    set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=address")
+endif()
+
+if (ENABLE_UB_SANITIZER)
+    # TODO: Remove -fno-sanitize=null as thirdparty/ocl/clhpp_headers UBSAN compatibility resolved:
+    # https://github.com/KhronosGroup/OpenCL-CLHPP/issues/17
+    set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=undefined -fno-sanitize=null")
+    CHECK_CXX_COMPILER_FLAG("-fsanitize-recover=undefined" SANITIZE_RECOVER_UNDEFINED_SUPPORTED)
+    if (SANITIZE_RECOVER_UNDEFINED_SUPPORTED)
+        set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=undefined")
+    endif()
+
+    set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=undefined")
+endif()
+
+if (ENABLE_THREAD_SANITIZER)
+    set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=thread")
+    set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fsanitize=thread")
+endif()
+
+# common sanitizer options
+if (DEFINED SANITIZER_COMPILER_FLAGS)
+    # ensure sumbols are present
+    set(SANITIZER_COMPILER_FLAGS "-g -fno-omit-frame-pointer")
    # prevent unloading libraries at runtime, so sanitizer can resolve their symbols
    set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -Wl,-z,nodelete")

@ -29,22 +52,3 @@ if (ENABLE_SANITIZER)
    set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${SANITIZER_LINKER_FLAGS}")
    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${SANITIZER_LINKER_FLAGS}")
 endif()
-
-if (ENABLE_THREAD_SANITIZER)
-    set(SANITIZER_COMPILER_FLAGS "-g -fsanitize=thread -fno-omit-frame-pointer")
-    set(SANITIZER_LINKER_FLAGS "-fsanitize=thread")
-    set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -Wl,-z,nodelete")
-
-    if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$" AND NOT WIN32)
-        if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0)
-            set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld")
-        else()
-            set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -static-libsan")
-        endif()
-    endif()
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
-    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${SANITIZER_LINKER_FLAGS}")
-    set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${SANITIZER_LINKER_FLAGS}")
-    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${SANITIZER_LINKER_FLAGS}")
-endif()
--- a/cmake/developer_package/features.cmake
+++ b/cmake/developer_package/features.cmake
@ -20,10 +20,12 @@ endif()
 # FIXME: ARM cross-compiler generates several "false positive" warnings regarding __builtin_memcpy buffer overflow
 ie_dependent_option (TREAT_WARNING_AS_ERROR "Treat build warnings as errors" ON "X86 OR X86_64" OFF)

-ie_option (ENABLE_INTEGRITYCHECK "build DLLs with /INTEGRITYCHECK flag" OFF)
+ie_dependent_option (ENABLE_INTEGRITYCHECK "build DLLs with /INTEGRITYCHECK flag" OFF "CMAKE_CXX_COMPILER_ID STREQUAL MSVC" OFF)

 ie_option (ENABLE_SANITIZER "enable checking memory errors via AddressSanitizer" OFF)

+ie_option (ENABLE_UB_SANITIZER "enable UndefinedBahavior sanitizer" OFF)
+
 ie_option (ENABLE_THREAD_SANITIZER "enable checking data races via ThreadSanitizer" OFF)

 ie_dependent_option (ENABLE_COVERAGE "enable code coverage" OFF "CMAKE_CXX_COMPILER_ID STREQUAL GNU" OFF)
--- a/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in
+++ b/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in
@ -13,7 +13,8 @@ set_and_check(IE_MAIN_SOURCE_DIR "@IE_MAIN_SOURCE_DIR@") # HDDL

 # Variables to export in plugin's projects

-set(ie_options "@IE_OPTIONS@;CMAKE_BUILD_TYPE;CMAKE_SKIP_RPATH")
+set(ie_options "@IE_OPTIONS@;CMAKE_BUILD_TYPE;CMAKE_SKIP_RPATH;")
+list(APPEND ie_options CMAKE_CXX_COMPILER_LAUNCHER CMAKE_C_COMPILER_LAUNCHER)
 file(TO_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}" cache_path)

 message(STATUS "The following CMake options are exported from Inference Engine Developer package")
--- a/cmake/toolchains/oecore.arm64.toolchain.cmake
+++ b/cmake/toolchains/oecore.arm64.toolchain.cmake
@ -0,0 +1,50 @@
+#
+# Copyright 2020 Intel Corporation.
+#
+# LEGAL NOTICE: Your use of this software and any required dependent software
+# (the "Software Package") is subject to the terms and conditions of
+# the Intel(R) OpenVINO(TM) Distribution License for the Software Package,
+# which may also include notices, disclaimers, or license terms for
+# third party or open source software included in or with the Software Package,
+# and your use indicates your acceptance of all such terms. Please refer
+# to the "third-party-programs.txt" or other similarly-named text file
+# included with the Software Package for additional details.
+#
+
+if(DEFINED OECORE_BASE_DIR)
+    # OECORE_BASE_DIR was passed via CMake command line, nothing to do
+elseif(DEFINED ENV{OECORE_BASE_DIR})
+    # User sets OECORE_BASE_DIR environment variable
+    set(OECORE_BASE_DIR $ENV{OECORE_BASE_DIR})
+elseif(DEFINED ENV{OECORE_NATIVE_SYSROOT})
+    # OECORE_NATIVE_SYSROOT is a default environment variable for the OECore toolchain
+    set(OECORE_BASE_DIR "$ENV{OECORE_NATIVE_SYSROOT}/../..")
+else()
+    # Use default value
+    set(OECORE_BASE_DIR "/usr/local/oecore-x86_64")
+endif()
+
+set(OECORE_TARGET_NAME              "aarch64-ese-linux")
+set(OECORE_TARGET_SYSROOT           "${OECORE_BASE_DIR}/sysroots/${OECORE_TARGET_NAME}")
+set(OECORE_HOST_SYSROOT             "${OECORE_BASE_DIR}/sysroots/x86_64-esesdk-linux")
+set(OECORE_HOST_COMPILER_BIN_DIR    "${OECORE_HOST_SYSROOT}/usr/bin/${OECORE_TARGET_NAME}")
+
+set(CMAKE_SYSTEM_NAME       "Linux")
+set(CMAKE_SYSTEM_PROCESSOR  "aarch64")
+
+set(CMAKE_SYSROOT "${OECORE_TARGET_SYSROOT}")
+
+set(CMAKE_C_COMPILER    "${OECORE_HOST_COMPILER_BIN_DIR}/aarch64-ese-linux-gcc")
+set(CMAKE_CXX_COMPILER  "${OECORE_HOST_COMPILER_BIN_DIR}/aarch64-ese-linux-g++")
+
+set(CMAKE_C_FLAGS_INIT      "-mcpu=cortex-a53 -mtune=cortex-a53 --sysroot=${OECORE_TARGET_SYSROOT}")
+set(CMAKE_CXX_FLAGS_INIT    "-mcpu=cortex-a53 -mtune=cortex-a53 --sysroot=${OECORE_TARGET_SYSROOT}")
+
+set(CMAKE_EXE_LINKER_FLAGS_INIT     "-Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed --sysroot=${OECORE_TARGET_SYSROOT}")
+set(CMAKE_SHARED_LINKER_FLAGS_INIT  "-Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed --sysroot=${OECORE_TARGET_SYSROOT}")
+set(CMAKE_MODULE_LINKER_FLAGS_INIT  "-Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed --sysroot=${OECORE_TARGET_SYSROOT}")
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@ -48,7 +48,6 @@ if(NOT ENABLE_DOCKER)
            LIBRARY DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT tests EXCLUDE_FROM_ALL)
 endif()

-set(LINKCHECKER_PY "" CACHE FILEPATH "Path to linkchecker.py for documentation check")
 set(OMZ_DOCS_DIR "" CACHE PATH "Path to open_model_zoo documentation")
 set(WORKBENCH_DOCS_DIR "" CACHE PATH "Path to workbench documentation")
 set(POT_DOCS_DIR "" CACHE PATH "Path to post-training-compression-tool documentation")
@ -56,18 +55,14 @@ set(GST_DOCS_DIR "" CACHE PATH "Path to gst-video-analytics documentation")

 function(build_docs)
    find_package(Doxygen REQUIRED dot)
-    find_package(PythonInterp 3 REQUIRED)
    find_package(LATEX REQUIRED)

-    execute_process(
-        COMMAND ${PYTHON_EXECUTABLE} -m pip show lxml
-        RESULT_VARIABLE PIP_EXIT_CODE
-        OUTPUT_QUIET
-    )
+    ie_check_pip_package(lxml FATAL_ERROR)
+    ie_check_pip_package(LinkChecker WARNING)

-    if (NOT ${PIP_EXIT_CODE} EQUAL 0)
-        message(FATAL_ERROR "lxml package is not installed. Please use \"pip install lxml\".")
-    endif()
+    find_host_program(LINKCHECKER_PY
+                      NAMES linkchecker
+                      DOC "linkchecker tools for documentation check")

    set(DOCS_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}")
    set(DOXYGEN_DIR "${OpenVINO_SOURCE_DIR}/docs/doxygen")
@ -357,7 +352,7 @@ function(build_docs)

    if(EXISTS "${LINKCHECKER_PY}")
        add_custom_target(docs_check
-                            COMMAND ${PYTHON_EXECUTABLE} "${LINKCHECKER_PY}" -v "${DOCS_BUILD_DIR}/html/"
+                            COMMAND "${LINKCHECKER_PY}" -v "${DOCS_BUILD_DIR}/html/"
                            COMMENT "Check links in generated documentation"
                            WORKING_DIRECTORY "${DOCS_BUILD_DIR}"
                            VERBATIM)
--- a/docs/IE_DG/Integrate_with_customer_application_new_API.md
+++ b/docs/IE_DG/Integrate_with_customer_application_new_API.md
@ -210,11 +210,6 @@ It's allowed to specify additional build options (e.g. to build CMake project on

 ### Run Your Application

-> **NOTE**: Before running, make sure you completed **Set the Environment Variables** section in [OpenVINO Installation](../../inference-engine/samples/hello_nv12_input_classification/README.md) document so that the application can find the libraries.
-
-To run compiled applications on Microsoft* Windows* OS, make sure that Microsoft* Visual C++ 2017
-Redistributable and Intel® C++ Compiler 2017 Redistributable packages are installed and
-`<INSTALL_DIR>/bin/intel64/Release/*.dll` files are placed to the
-application folder or accessible via `%PATH%` environment variable.
+Before running, make sure you completed **Set the Environment Variables** section in [OpenVINO Installation](../../inference-engine/samples/hello_nv12_input_classification/README.md) document so that the application can find the libraries.

 [integration_process]: img/integration_process.png
--- a/docs/doxygen/doxygen-ignore.txt
+++ b/docs/doxygen/doxygen-ignore.txt
@ -1,6 +1,5 @@
 openvino/inference-engine/samples/hello_reshape_ssd/README.md
 openvino/docs/index.md
-inference-engine/include/ie_icnn_network.hpp
 openvino/docs/get_started/get_started_dl_workbench.md
 openvino/docs/get_started/get_started_linux.md
 openvino/docs/get_started/get_started_raspbian.md
@ -11,25 +10,14 @@ openvino/docs/install_guides/deployment-manager-tool.md
 openvino/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md
 openvino/docs/ovsa/ovsa_get_started.md
 openvino/inference-engine/ie_bridges/c/docs/api_overview.md
-inference-engine/include/cpp/ie_infer_request.hpp
-inference-engine/include/ie_parallel.hpp
-inference-engine/include/gpu/gpu_context_api_ocl.hpp
-inference-engine/include/gpu/gpu_context_api_va.hpp
-inference-engine/include/ie_plugin_config.hpp
-inference-engine/include/ie_unicode.hpp
-inference-engine/include/vpu/myriad_config.hpp
-inference-engine/include/vpu/vpu_config.hpp
-inference-engine/include/vpu/vpu_plugin_config.hpp
 openvino/docs/benchmarks/performance_int8_vs_fp32.md
 openvino/docs/get_started/get_started_macos.md
 openvino/docs/optimization_guide/dldt_optimization_guide.md
 openvino/docs/IE_DG/ShapeInference.md
-inference-engine/include/details/ie_so_pointer.hpp
-inference-engine/include/ie_compound_blob.h
-inference-engine/include/ie_data.h
-inference-engine/include/ie_blob.h
-inference-engine/include/ie_precision.hpp
-inference-engine/include/ie_remote_context.hpp
-inference-engine/include/gpu/gpu_context_api_dx.hpp
 build/docs/openvino_docs.xml
 openvino/docs/install_guides/installing-openvino-linux-ivad-vpu.md
+inference-engine/include/ie_parallel.hpp
+inference-engine/include/ie_plugin_config.hpp
+inference-engine/include/vpu/myriad_config.hpp
+inference-engine/include/vpu/vpu_config.hpp
+inference-engine/include/vpu/vpu_plugin_config.hpp
--- a/docs/doxygen/ie_docs.config
+++ b/docs/doxygen/ie_docs.config
@ -913,12 +913,14 @@ EXCLUDE_SYMBOLS        = InferenceEngine::details \
                         DECLARE_*METRIC_KEY \
                         DECLARE_*METRIC_VALUE \
                         DECLARE_*CONFIG_KEY \
+                         DECLARE_VPU_CONFIG \
+                         VPU_CONFIG_KEY \
+                         VPU_CONFIG_VALUE \
+                         VPU_METRIC \
                         DECLARE_*CONFIG_VALUE \
                         DECLARE_PARAM_KEY_IMPL \
                         TBB_PREVIEW_LOCAL_OBSERVER \
                         PARTITIONING \
-                         CALL_STATUS_FNC* \
-                         CALL_FNC* \
                         __PRETTY_FUNCTION__ \
                         PRINT_COLOR_FORMAT \
                         PRINT_LAYOUT \
@ -943,6 +945,8 @@ EXCLUDE_SYMBOLS        = InferenceEngine::details \
                         InferenceEngine::parallel_* \
                         NOMINMAX \
                         TBB_PREVIEW_NUMA_SUPPORT \
+                         TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION \
+                         _TBB_REDUCE_FUNC \
                         IE_THREAD_*

 # The EXAMPLE_PATH tag can be used to specify one or more files or directories
--- a/docs/ops/arithmetic/Atan_1.md
+++ b/docs/ops/arithmetic/Atan_1.md
@ -6,31 +6,27 @@

 **Short description**: *Atan* performs element-wise inverse tangent (arctangent) operation with given tensor.

-**Attributes**:
-
-    No attributes available.
-
-**Inputs**
-
-* **1**: An tensor of type *T*. **Required.**
-
-**Outputs**
-
-* **1**: The result of element-wise atan operation. A tensor of type *T*.
-
-**Types**
-
-* *T*: any numeric type.
-
-*atan* does the following with the input tensor *a*:
+**Detailed description**:  Operation takes one input tensor and performs the element-wise inverse tangent function on a given input tensor, based on the following mathematical formula:

 \f[
 a_{i} = atan(a_{i})
 \f]

-**Examples**
+**Attributes**: *Atan* operation has no attributes.

-*Example 1*
+**Inputs**
+
+* **1**: A tensor of type *T* and arbitrary shape. **Required.**
+
+**Outputs**
+
+* **1**: The result of element-wise *Atan* applied to the input tensor. A tensor of type *T* and same shape as the input tensor.
+
+**Types**
+
+* *T*: any supported numeric type.
+
+**Examples**

 ```xml
 <layer ... type="Atan">
--- a/docs/ops/arithmetic/Sinh_1.md
+++ b/docs/ops/arithmetic/Sinh_1.md
@ -4,11 +4,15 @@

 **Category**: Arithmetic unary operation

-**Short description**: *Sinh* performs element-wise hyperbolic sine (sinh) operation with given tensor.
+**Short description**: *Sinh* performs element-wise hyperbolic sine (sinh) operation on a given input tensor

-**Attributes**:
+**Detailed description**: *Sinh* performs element-wise hyperbolic sine (sinh) operation on a given input tensor, based on the following mathematical formula:

-    No attributes available.
+\f[
+a_{i} = sinh(a_{i})
+\f]
+
+**Attributes**: *Sinh* operation has no attributes.

 **Inputs**

@ -16,21 +20,13 @@

 **Outputs**

-* **1**: The result of element-wise sinh operation. A tensor of type *T*.
+* **1**: The result of element-wise *Sinh* operation applied to the input tensor. A tensor of type *T* and the same shape as input tensor.

 **Types**

-* *T*: any numeric type.
+* *T*: any supported numeric type.

-*sinh* does the following with the input tensor *a*:
-
-\f[
-a_{i} = sinh(a_{i})
-\f]
-
-**Examples**
-
-*Example 1*
+**Example**

 ```xml
 <layer ... type="Sinh">
--- a/docs/ops/pooling/AdaptiveMaxPool_8.md
+++ b/docs/ops/pooling/AdaptiveMaxPool_8.md
@ -44,7 +44,7 @@ Output(i,j,k) = max(Input[d_{start}:d_{end}, h_{start}:h_{end}, w_{start}:w_{end
 **Outputs**:

 *   **1**: Output of type *T* and shape `[N, C, H_out]`, `[N, C, H_out, W_out]` or `[N, C, D_out, H_out, W_out]`.
-*   **2**: Output of type specified by *index_element_type* and same shape as the first output containing indices of elements in the first output. The values of indices are computed as if input was flatten 1-D tensor, so the values are in the range `[0, N * C * H * W * D)`.
+*   **2**: Output of type specified by *index_element_type* and same shape as the first output containing indices of elements in the first output. The values of indices are computed as if input spatial dimensions were flatten, so the values are in the range `[0, H * W * D)`.

 **Types**

--- a/inference-engine/ie_bridges/c/src/ie_c_api.cpp
+++ b/inference-engine/ie_bridges/c/src/ie_c_api.cpp
@ -235,9 +235,8 @@ IEStatusCode ie_core_create(const char *xml_config_file, ie_core_t **core) {

    IEStatusCode status = IEStatusCode::OK;
    try {
-        std::unique_ptr<ie_core_t> tmp(new ie_core_t);
-        tmp->object = IE::Core(xml_config_file);
-        *core = tmp.release();
+        auto object = IE::Core(xml_config_file);
+        *core = new ie_core_t { std::move(object) };
    } CATCH_IE_EXCEPTIONS

    return status;
--- a/inference-engine/ie_bridges/python/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/CMakeLists.txt
@ -68,7 +68,7 @@ if(ENABLE_WHEEL)
    add_subdirectory(wheel)
 endif()

-if (NGRAPH_PYTHON_BUILD_ENABLE)
+if(TARGET _pyngraph)
    add_dependencies(ie_api _pyngraph)
 endif()

--- a/inference-engine/ie_bridges/python/README.md
+++ b/inference-engine/ie_bridges/python/README.md
@ -61,11 +61,6 @@ sudo apt install patchelf
 -DENABLE_PYTHON=ON
 -DENABLE_WHEEL=ON
 ```
-If you need to include other components to the package you need to enable them too.
-For example, to include ngraph python API:
-```shellscript
-NGRAPH_PYTHON_BUILD_ENABLE=ON
-```

 ## Running sample

--- a/inference-engine/include/cpp/ie_executable_network.hpp
+++ b/inference-engine/include/cpp/ie_executable_network.hpp
@ -66,7 +66,6 @@ public:
     * This method need to be called to find out input names for using them later
     * when calling InferenceEngine::InferRequest::SetBlob
     *
-     * @param inputs Reference to InferenceEngine::ConstInputsDataMap object.
     * @return A collection that contains string as key, and const InputInfo smart pointer as value
     */
    ConstInputsDataMap GetInputsInfo() const;
--- a/inference-engine/include/cpp/ie_infer_request.hpp
+++ b/inference-engine/include/cpp/ie_infer_request.hpp
@ -235,6 +235,9 @@ public:
    bool operator==(const InferRequest&) const noexcept;
 };

+/**
+ * @private
+ */
 template<>
 struct InferRequest::SetCallback<std::function<void(InferRequest, StatusCode)>> {
    void operator()(std::function<void(InferRequest, StatusCode)> f) {
@ -245,6 +248,9 @@ struct InferRequest::SetCallback<std::function<void(InferRequest, StatusCode)>>

 IE_SUPPRESS_DEPRECATED_START

+/**
+ * @private
+ */
 template<>
 struct InferRequest::SetCallback<IInferRequest::CompletionCallback> {
    void operator()(IInferRequest::CompletionCallback f) {
--- a/inference-engine/include/gpu/gpu_context_api_dx.hpp
+++ b/inference-engine/include/gpu/gpu_context_api_dx.hpp
@ -106,8 +106,8 @@ public:
    }

    /**
-     * @brief Returns plane ID of underlying video decoder surface,
-     * or 0 if no video surface was shared.
+     * @brief Returns plane ID of underlying video decoder surface, or 0 if no video surface was shared.
+     * @return Plane ID
     */
    uint32_t plane() {
        return _ObjFromParams<uint32_t, uint32_t>(getParams(),
--- a/inference-engine/include/gpu/gpu_context_api_ocl.hpp
+++ b/inference-engine/include/gpu/gpu_context_api_ocl.hpp
@ -39,6 +39,7 @@ public:

    /**
     * @brief Returns the underlying OpenCL context handle.
+     * @return `cl_context`
     */
    cl_context get() {
        return _ObjFromParams<cl_context, gpu_handle_param>(getParams(), GPU_PARAM_KEY(OCL_CONTEXT),
@ -47,7 +48,7 @@ public:

    /**
     * @brief OpenCL context handle conversion operator for the ClContext object.
-     * @return Underlying OpenCL context handle
+     * @return `cl_context`
     */
    operator cl_context() {
        return get();
@ -55,7 +56,7 @@ public:

    /**
     * @brief Standard Khronos cl::Context wrapper conversion operator for the ClContext object.
-     * @return cl::Context object
+     * @return `cl::Context` object
     */
    operator cl::Context() {
        return cl::Context(get(), true);
@ -101,6 +102,7 @@ public:

    /**
     * @brief Returns the underlying OpenCL memory object handle.
+     * @return underlying OpenCL memory object handle
     */
    cl_mem get() {
        return _ObjFromParams<cl_mem, gpu_handle_param>(getParams(), GPU_PARAM_KEY(MEM_HANDLE),
@ -109,6 +111,7 @@ public:

    /**
     * @brief OpenCL memory handle conversion operator.
+     * @return `cl_mem`
     */
    operator cl_mem() {
        return get();
@ -116,7 +119,7 @@ public:

    /**
     * @brief Standard Khronos cl::Buffer wrapper conversion operator.
-     * @return cl::Buffer object
+     * @return `cl::Buffer` object
     */
    operator cl::Buffer() {
        return cl::Buffer(get(), true);
@ -144,6 +147,7 @@ public:

    /**
     * @brief Returns the underlying OpenCL memory object handle.
+     * @return `cl_mem`
     */
    cl_mem get() {
        return _ObjFromParams<cl_mem, gpu_handle_param>(getParams(), GPU_PARAM_KEY(MEM_HANDLE),
@ -152,6 +156,7 @@ public:

    /**
     * @brief OpenCL memory handle conversion operator.
+     * @return `cl_mem`
     */
    operator cl_mem() {
        return get();
@ -159,7 +164,7 @@ public:

    /**
     * @brief Standard Khronos cl::Image2D wrapper conversion operator for the ClContext object.
-     * @return cl::Image2D object
+     * @return `cl::Image2D` object
     */
    operator cl::Image2D() {
        return cl::Image2D(get(), true);
@ -269,7 +274,7 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::
 * @brief This function is used to obtain remote blob object from user-supplied cl::Image2D wrapper object
 * @param desc A tensor descriptor object representing remote blob configuration
 * @param ctx A remote context used to create remote blob
- * @param buffer A cl::Image2D object wrapped by a remote blob
+ * @param image A cl::Image2D object wrapped by a remote blob
 * @return A remote blob instance
 */
 static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx, cl::Image2D& image) {
--- a/inference-engine/include/gpu/gpu_context_api_va.hpp
+++ b/inference-engine/include/gpu/gpu_context_api_va.hpp
@ -36,8 +36,8 @@ public:
    using Ptr = std::shared_ptr<VAContext>;

    /**
-     * @brief VADisplay conversion operator for the VAContext object.
-     * @return Underlying VADisplay object handle 
+     * @brief `VADisplay` conversion operator for the VAContext object.
+     * @return Underlying `VADisplay` object handle
     */
    operator VADisplay() {
        return _ObjFromParams<VADisplay, gpu_handle_param>(getParams(),
@ -67,7 +67,7 @@ public:

    /**
     * @brief VASurfaceID conversion operator for the VASurfaceBlob object.
-     * @return VA surface handle 
+     * @return `VASurfaceID` handle
     */
    operator VASurfaceID() {
        return _ObjFromParams<VASurfaceID, uint32_t>(getParams(),
@ -77,6 +77,7 @@ public:

    /**
     * @brief Returns plane ID of underlying video decoder surface
+     * @return Plane ID
     */
    uint32_t plane() {
        return _ObjFromParams<uint32_t, uint32_t>(getParams(),
@ -86,11 +87,16 @@ public:
 };

 /**
-* @brief This function is used to obtain a NV12 compound blob object from NV12 VA decoder output.
-* The resulting compound contains two remote blobs for Y and UV planes of the surface.
-*/
+ * @brief This function is used to obtain a NV12 compound blob object from NV12 VA decoder output.
+ * The resulting compound contains two remote blobs for Y and UV planes of the surface.
+ * @param height A height of Y plane
+ * @param width A width of Y plane
+ * @param ctx A remote context instance
+ * @param nv12_surf NV12 `VASurfaceID` to create NV12 from
+ * @return A remote NV12 blob wrapping `VASurfaceID`
+ */
 static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, RemoteContext::Ptr ctx, VASurfaceID nv12_surf) {
-    // despite of layout, blob dimensions always follow in N,C,H,W order
+    // despite of layout, blob dimensions always follow in N, C, H, W order
    TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
    ParamMap blobParams = {
        { GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) },
@ -107,8 +113,12 @@ static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, Remot
 }

 /**
-* @brief This function is used to obtain remote context object from VA display handle
-*/
+ * @brief This function is used to obtain remote context object from VA display handle
+ * @param core Inference Engine Core object
+ * @param deviceName A device name to create a remote context for
+ * @param device A `VADisplay` to create remote context from
+ * @return A remote context wrapping `VADisplay`
+ */
 static inline VAContext::Ptr make_shared_context(Core& core, std::string deviceName, VADisplay device) {
    ParamMap contextParams = {
        { GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED) },
@ -118,8 +128,13 @@ static inline VAContext::Ptr make_shared_context(Core& core, std::string deviceN
 }

 /**
-* @brief This function is used to obtain remote blob object from VA surface handle
-*/
+ * @brief This function is used to obtain remote blob object from VA surface handle
+ * @param desc Tensor descriptor
+ * @param ctx A remote context instance
+ * @param surface A `VASurfaceID` to create remote blob from
+ * @param plane An index of a plane inside `VASurfaceID` to create blob from
+ * @return A remote blob wrapping `VASurfaceID`
+ */
 static inline VASurfaceBlob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx, VASurfaceID surface, uint32_t plane = 0) {
    auto casted = std::dynamic_pointer_cast<VAContext>(ctx);
    if (nullptr == casted) {
--- a/inference-engine/include/ie_blob.h
+++ b/inference-engine/include/ie_blob.h
@ -304,6 +304,7 @@ public:

    /**
     * @brief Returns the tensor description
+     * @return A tensor description
     */
    const TensorDesc& getTensorDesc() const noexcept override {
        return tensorDesc;
@ -311,6 +312,7 @@ public:

    /**
     * @brief Returns the tensor description
+     * @return A tensor description
     */
    TensorDesc& getTensorDesc() noexcept override {
        return tensorDesc;
@ -395,7 +397,7 @@ public:
     *
     * @return A LockedMemory object
     */
-    virtual LockedMemory<void> rwmap()noexcept = 0;
+    virtual LockedMemory<void> rwmap() noexcept = 0;

    /**
     * @brief Gets read only access to the memory in virtual space of the process.
@ -419,7 +421,7 @@ public:
     *
     * @return A LockedMemory object
     */
-    virtual LockedMemory<const void> rmap()const noexcept = 0;
+    virtual LockedMemory<const void> rmap() const noexcept = 0;

    /**
     * @brief Gets "write only direction" access to the memory in virtual space of the process.
@ -446,7 +448,7 @@ public:
     *
     * @return A LockedMemory object
     */
-    virtual LockedMemory<void> wmap()noexcept = 0;
+    virtual LockedMemory<void> wmap() noexcept = 0;

 protected:
    /**
@ -567,11 +569,6 @@ public:
     */
    virtual ~TBlob();

-    /**
-     * @brief Gets the size of the given type.
-     *
-     * @return Size of the type
-     */
    size_t element_size() const noexcept override {
        return sizeof(T);
    }
@ -594,9 +591,6 @@ public:
        return std::move(lockme<const T>());
    }

-    /**
-     * @brief Allocates or reallocates memory
-     */
    void allocate() noexcept override {
        const auto allocator = getAllocator();
        const auto rawHandle = allocator->alloc(byteSize());
@ -612,27 +606,14 @@ public:
            });
    }

-    /**
-     * @brief Frees all allocated data
-     */
    bool deallocate() noexcept override {
        return free();
    }

-    /**
-     * @brief Creates a new LockedMemory instance holding void pointer.
-     *
-     * @return LockedMemory instance holding void pointer
-     */
    LockedMemory<void> buffer() noexcept override {
        return std::move(lockme<void>());
    }

-    /**
-     * @brief Creates a new LockedMemory instance holding constant void pointer.
-     *
-     * @return LockedMemory instance holding constant void pointer
-     */
    LockedMemory<const void> cbuffer() const noexcept override {
        return std::move(lockme<const void>());
    }
@ -734,6 +715,7 @@ protected:

    /**
     * @brief Frees handler and cleans up the stored data.
+     * @return `true` if memory was freed
     */
    virtual bool free() {
        bool bCanRelease = _handle != nullptr;
@ -753,11 +735,6 @@ protected:
         //   getTensorDesc().getBlockingDesc().getOffsetPadding());
    }

-    /**
-     * @brief Gets an allocator or creates a default one.
-     *
-     * @return IAllocator instance
-     */
    const std::shared_ptr<IAllocator>& getAllocator() const noexcept override {
        // in case when constructor without allocator was used
        if (!_allocator) {
@ -767,9 +744,6 @@ protected:
        return _allocator;
    }

-    /**
-     * @brief Returns handle to the stored data.
-     */
    void* getHandle() const noexcept override {
        return _handle.get();
    }
--- a/inference-engine/include/ie_compound_blob.h
+++ b/inference-engine/include/ie_compound_blob.h
@ -73,16 +73,19 @@ public:

    /**
     * @brief Always returns an empty LockedMemory object
+     * @return Empty locked memory
     */
    LockedMemory<void> buffer() noexcept override;

    /**
     * @brief Always returns an empty LockedMemory object
+     * @return Empty locked memory
     */
    LockedMemory<const void> cbuffer() const noexcept override;

    /**
     * @brief Returns the number of underlying blobs in the compound blob
+     * @return A number of underlying blobs
     */
    size_t size() const noexcept override;

@ -109,9 +112,6 @@ protected:
     */
    std::vector<Blob::Ptr> _blobs;

-    /**
-     * @brief Returns nullptr as CompoundBlob is not allocator-based
-     */
    const std::shared_ptr<IAllocator>& getAllocator() const noexcept override;
 };

@ -148,21 +148,25 @@ public:

    /**
     * @brief Returns a shared pointer to Y plane
+     * @return Y plane
     */
    virtual Blob::Ptr& y() noexcept;

    /**
     * @brief Returns a shared pointer to Y plane
+     * @return Y plane
     */
    virtual const Blob::Ptr& y() const noexcept;

    /**
     * @brief Returns a shared pointer to UV plane
+     * @return UV plane
     */
    virtual Blob::Ptr& uv() noexcept;

    /**
     * @brief Returns a shared pointer to UV plane
+     * @return UV plane
     */
    virtual const Blob::Ptr& uv() const noexcept;

--- a/inference-engine/include/ie_data.h
+++ b/inference-engine/include/ie_data.h
@ -93,6 +93,7 @@ public:

    /**
     * @brief Gets the layout value for this Data instance
+     * @return Layout
     */
    Layout getLayout() const;

--- a/inference-engine/include/ie_plugin_config.hpp
+++ b/inference-engine/include/ie_plugin_config.hpp
@ -264,9 +264,9 @@ DECLARE_CONFIG_VALUE(HYBRID_AWARE);
 *   (and what is the optimal number of streams)
 * - finally, specifying the positive integer value creates the requested number of streams
 */
+DECLARE_CONFIG_KEY(CPU_THROUGHPUT_STREAMS);
 DECLARE_CONFIG_VALUE(CPU_THROUGHPUT_NUMA);
 DECLARE_CONFIG_VALUE(CPU_THROUGHPUT_AUTO);
-DECLARE_CONFIG_KEY(CPU_THROUGHPUT_STREAMS);

 /**
 * @brief The name for setting performance counters option.
--- a/inference-engine/include/ie_precision.hpp
+++ b/inference-engine/include/ie_precision.hpp
@ -91,13 +91,21 @@ public:
        precisionInfo.value = CUSTOM;
    }

-    /** @brief Creates custom precision with specific underlined type */
+    /**
+     * @brief Creates custom precision with specific underlined type
+     * @param typeName A string name of precision
+     * @return Precision converted from string name
+     */
    template <class T>
    static Precision fromType(const char* typeName = nullptr) {
        return Precision(8 * sizeof(T), typeName == nullptr ? typeid(T).name() : typeName);
    }

-    /** @brief checks whether given storage class T can be used to store objects of current precision */
+    /**
+     * @brief checks whether given storage class T can be used to store objects of current precision
+     * @param typeName A string name of precision
+     * @return `true` if `typeName` has underlaying storage type
+     */
    template <class T>
    bool hasStorageType(const char* typeName = nullptr) const noexcept {
        try {
--- a/inference-engine/include/ie_remote_context.hpp
+++ b/inference-engine/include/ie_remote_context.hpp
@ -46,9 +46,6 @@ public:
     */
    explicit RemoteBlob(const TensorDesc& tensorDesc): MemoryBlob(tensorDesc) {}

-    /**
-    * @brief Returns the number of bytes per element.
-    */
    size_t element_size() const noexcept override {
        return tensorDesc.getPrecision().size();
    }
--- a/inference-engine/samples/common/format_reader/CMakeLists.txt
+++ b/inference-engine/samples/common/format_reader/CMakeLists.txt
@ -13,7 +13,7 @@ source_group("src" FILES ${LIBRARY_SRC})
 source_group("include" FILES ${LIBRARY_HEADERS})

 # Create library file from sources.
-add_library(${TARGET_NAME} SHARED EXCLUDE_FROM_ALL ${MAIN_SRC} ${LIBRARY_HEADERS})
+add_library(${TARGET_NAME} SHARED ${MAIN_SRC} ${LIBRARY_HEADERS})

 # Find OpenCV components if exist
 find_package(OpenCV COMPONENTS core imgproc imgcodecs QUIET)
--- a/inference-engine/src/auto_plugin/auto_infer_request.cpp
+++ b/inference-engine/src/auto_plugin/auto_infer_request.cpp
@ -78,7 +78,6 @@ void AutoInferRequest::HotSwapRequests() {
        InferenceEngine::SoExecutableNetworkInternal tempSoExecNetwork;
        if (_autoExecutableNetwork->TryGetActualNetwork(tempSoExecNetwork)) {
            _alreadyActualNetwork = true;
-            std::cout << "!!! DEBUG: HotSwapRequests !!!" << std::endl;
            _inferRequest = {tempSoExecNetwork, tempSoExecNetwork->CreateInferRequest()};
            _inferRequest->SetCallback(_callback);
        }
--- a/inference-engine/src/auto_plugin/auto_plugin.cpp
+++ b/inference-engine/src/auto_plugin/auto_plugin.cpp
@ -84,14 +84,11 @@ std::shared_ptr<AutoExecutableNetwork> AutoInferencePlugin::LoadNetworkImpl(cons
        [core, modelPath, network](const std::string& device)
            -> IE::SoExecutableNetworkInternal {
            IE::SoExecutableNetworkInternal executableNetwork;
-            std::cout << "!!! DEBUG: Starting Async loading to the " << device <<  " !!!" << std::endl;
-            std::cout << "!!! DEBUG: device full name: " << core->GetMetric(device, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>() << std::endl;
            if (!modelPath.empty()) {
                executableNetwork = core->LoadNetwork(modelPath, device, {});
            } else {
                executableNetwork = core->LoadNetwork(network, device, {});
            }
-            std::cout << "!!! DEBUG: " << device << " was loaded !!!" << std::endl;
            return executableNetwork;
        };

--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@ -61,6 +61,7 @@
 #include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
 #include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
 #include <transformations/op_conversions/convert_gather_0d.hpp>
+#include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
 #include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
 #include <transformations/convert_precision.hpp>
 #include <transformations/init_node_info.hpp>
@ -191,6 +192,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
            manager.register_pass<ngraph::pass::ConvertNMS4ToNMS5>();
            manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
            manager.register_pass<ngraph::pass::ConvertGather0D>();
+            manager.register_pass<ngraph::pass::ConvertDeformableConv8To1>();

            static const precisions_array convert_precision_list {
                    {ngraph::element::i64, ngraph::element::i32},
--- a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp
+++ b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp
@ -208,5 +208,8 @@ REGISTER_FACTORY(v6, MVN);
 // ------------------------------ Supported v7 ops ------------------------------ //
 REGISTER_FACTORY(v7, Gather);

+// ------------------------------ Supported v8 ops ------------------------------ //
+REGISTER_FACTORY(v8, Gather);
+
 // --------------------------- Supported internal ops --------------------------- //
 REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
--- a/inference-engine/src/cldnn_engine/ops/gather.cpp
+++ b/inference-engine/src/cldnn_engine/ops/gather.cpp
@ -57,51 +57,8 @@ static cldnn::gather::gather_axis GetGatherAxis(int32_t axis, cldnn::format inpu
    }
 }

-void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v1::Gather>& op) {
-    p.ValidateInputs(op, {2, 3});
-    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
-    std::string layerName = layer_type_name_ID(op);
-
-    int32_t axis = static_cast<int32_t>(op->get_axis());
-
-    std::vector<cldnn::primitive_id> reorderedInputs;
-    reorderedInputs.resize(inputPrimitives.size());
-
-    for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
-        auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
-        if (inputDataType == cldnn::data_types::i64) {
-            // clDNN primitive does not support i64 inputs,
-            // so we need additional reorders to convert them to i32
-            auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
-            auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
-            auto preprocessPrim = cldnn::reorder(reorderPrimName,
-                                                 inputPrimitives[portIndex],
-                                                 targetFormat,
-                                                 cldnn::data_types::i32);
-            p.AddPrimitive(preprocessPrim);
-            p.AddInnerPrimitiveToProfiler(reorderPrimName, layerName, op);
-            reorderedInputs[portIndex] = reorderPrimName;
-        } else {
-            reorderedInputs[portIndex] = inputPrimitives[portIndex];
-        }
-    }
-
-    auto outLayout = DefaultFormatForDims(op->get_output_shape(0).size());
-    auto gatherPrim = cldnn::gather(layerName,
-                                    reorderedInputs[0],
-                                    reorderedInputs[1],
-                                    GetGatherAxis(axis, DefaultFormatForDims(op->get_input_shape(0).size())),
-                                    outLayout,
-                                    CldnnTensorFromIEDims(op->get_output_shape(0)));
-
-    p.AddPrimitive(gatherPrim);
-    p.AddPrimitiveToProfiler(op);
-}
-
-REGISTER_FACTORY_IMPL(v1, Gather);
-
-void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v7::Gather>& op) {
-    p.ValidateInputs(op, {2, 3, 4});
+template <typename T>
+void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t batch_dim = 0, bool support_neg_ind = false) {
    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
    std::string layerName = layer_type_name_ID(op);

@ -136,11 +93,32 @@ void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v7::Gather>& o
                                    GetGatherAxis(axis, DefaultFormatForDims(op->get_input_shape(0).size())),
                                    outLayout,
                                    CldnnTensorFromIEDims(op->get_output_shape(0)),
-                                    op->get_batch_dims());
+                                    batch_dim,
+                                    support_neg_ind);

    p.AddPrimitive(gatherPrim);
    p.AddPrimitiveToProfiler(op);
 }

+void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v1::Gather>& op) {
+    p.ValidateInputs(op, {2, 3});
+    CreateGatherOpBase<ngraph::op::v1::Gather>(p, op);
+}
+
+REGISTER_FACTORY_IMPL(v1, Gather);
+
+void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v7::Gather>& op) {
+    p.ValidateInputs(op, {2, 3, 4});
+    CreateGatherOpBase<ngraph::op::v7::Gather>(p, op, op->get_batch_dims());
+}
+
 REGISTER_FACTORY_IMPL(v7, Gather);
+
+void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v8::Gather>& op) {
+    p.ValidateInputs(op, {2, 3, 4});
+    CreateGatherOpBase<ngraph::op::v8::Gather>(p, op, op->get_batch_dims(), true);
+}
+
+REGISTER_FACTORY_IMPL(v8, Gather);
+
 }  // namespace CLDNNPlugin
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@ -25,6 +25,7 @@
 #include "dnn_types.h"
 #include "gna_types.h"
 #include "gna_limitations.hpp"
+#include "layers/gna_convolution_layer.hpp"

 #if GNA_LIB_VER == 2
 #include <gna2-model-api.h>
@ -50,6 +51,9 @@

 using namespace GNAPluginNS::backend;

+using GNAPluginNS::GNAConvolutionLayer::outputFromConv;
+using GNAPluginNS::GNAConvolutionLayer::outputFromPooling;
+using GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy;

 void GNAPluginNS::backend::AMIntelDNN::BeginNewWrite(uint32_t index) {
    dump_write_index = index;
@ -152,8 +156,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
                                                 uint32_t num_bytes_per_bias,
                                                 uint32_t num_filters,
                                                 uint32_t num_filter_coefficients,
-                                                 uint32_t num_feature_map_rows,
-                                                 uint32_t num_feature_map_columns,
+                                                 const uint32_t convStride,
                                                 float weight_scale_factor,
                                                 float output_scale_factor,
                                                 void *&ptr_inputs,
@ -177,8 +180,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
    comp.op.conv1D.num_bytes_per_bias = num_bytes_per_bias;
    comp.op.conv1D.num_filters = num_filters;
    comp.op.conv1D.num_filter_coefficients = num_filter_coefficients;
-    comp.op.conv1D.num_feature_map_rows = num_feature_map_rows;
-    comp.op.conv1D.num_feature_map_columns = num_feature_map_columns;
+    comp.op.conv1D.convStride = convStride;
    comp.op.conv1D.weight_scale_factor = weight_scale_factor;
    comp.output_scale_factor = output_scale_factor;
    comp.input_scale_factor = output_scale_factor / weight_scale_factor;
@ -195,18 +197,17 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
        ptr_outputs = &comp.ptr_outputs;
    }

-    if (comp.num_columns_in % 8 != 0) {
-        THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << comp.num_columns_in <<
+    if (num_columns_in % 8 != 0) {
+        THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << num_columns_in <<
                               ") is not a multiply by 8";
    }
-    if (comp.op.conv1D.num_filters < GNALimitations::convMinFiltersNum ||
-        comp.op.conv1D.num_filters > GNALimitations::convMaxFiltersNum ||
-        comp.op.conv1D.num_filters % GNALimitations::convFiltersNumDivider != 0) {
-        THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << comp.op.conv1D.num_filters;
+    if (num_filters < GNALimitations::convMinFiltersNum ||
+        num_filters > GNALimitations::convMaxFiltersNum ||
+        num_filters % GNALimitations::convFiltersNumDivider != 0) {
+        THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << num_filters;
    }
-    auto filter_stride_size = comp.op.conv1D.num_feature_map_columns;
-    auto max_number_of_out_elements = (comp.num_columns_in - comp.op.conv1D.num_filter_coefficients) / filter_stride_size + 1;
-    if (comp.num_columns_out / max_number_of_out_elements != comp.op.conv1D.num_filters) {
+    auto max_number_of_out_elements = outputFromConv(num_columns_in, num_filter_coefficients, convStride);
+    if (num_columns_out / max_number_of_out_elements != num_filters) {
        THROW_GNA_EXCEPTION << "Number of outputs or feature map config is incorrect in Convolutional1DComponent";
    }
 }
@ -538,8 +539,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteGraphWizModel(const char *filename)
            auto &conv = components[k].op.conv1D;
            graph << "  <TR><TD> num_filters</TD><TD>" <<  conv.num_filters<< "</TD></TR>\n";
            graph << "  <TR><TD> num_filter_coefficients</TD><TD>" <<  conv.num_filter_coefficients<< "</TD></TR>\n";
-            graph << "  <TR><TD> num_feature_map_rows</TD><TD>" <<  conv.num_feature_map_rows<< "</TD></TR>\n";
-            graph << "  <TR><TD> num_feature_map_columns</TD><TD>" <<  conv.num_feature_map_columns<< "</TD></TR>\n";
+            graph << "  <TR><TD> conv_stride</TD><TD>" <<  conv.convStride<< "</TD></TR>\n";
            graph << "  <TR><TD> wscale</TD><TD>" <<  conv.weight_scale_factor<< "</TD></TR>\n";
            graph << "  <TR><TD> wbit</TD><TD>" <<  conv.num_bytes_per_weight<< "</TD></TR>\n";
            graph << "  <TR><TD> bbit</TD><TD>" <<  conv.num_bytes_per_bias<< "</TD></TR>\n";
@ -936,16 +936,14 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                case kDnnConvolutional1dOp: {
                    uint32_t num_filters = component[i].op.conv1D.num_filters;
                    uint32_t num_filter_coefficients = component[i].op.conv1D.num_filter_coefficients;
-                    uint32_t num_feature_map_rows = component[i].op.conv1D.num_feature_map_rows;
-                    uint32_t num_feature_map_columns = component[i].op.conv1D.num_feature_map_columns;
+                    const auto convStride = component[i].op.conv1D.convStride;
                    uint32_t num_bytes_per_weight = component[i].op.conv1D.num_bytes_per_weight;
                    uint32_t num_bytes_per_bias = component[i].op.conv1D.num_bytes_per_bias;
                    float weight_scale_factor = component[i].op.conv1D.weight_scale_factor;
                    float output_scale_factor = component[i].output_scale_factor;
                    out_file << "<num_filters> " << std::dec << num_filters << "\n";
                    out_file << "<num_filter_coefficients> " << std::dec << num_filter_coefficients << "\n";
-                    out_file << "<num_feature_map_rows> " << std::dec << num_feature_map_rows << "\n";
-                    out_file << "<num_feature_map_columns> " << std::dec << num_feature_map_columns << "\n";
+                    out_file << "<conv_stride> " << std::dec << convStride << "\n";
                    if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
                        out_file << "<num_bytes_per_weight> " << std::dec << 4 << "\n";
                        out_file << "<num_bytes_per_bias> " << std::dec << 4 << "\n";
@ -1362,35 +1360,6 @@ uint32_t GNAPluginNS::backend::AMIntelDNN::CountLayers() {
    return n;
 }

-namespace {
-uint32_t outputFromConv(const uint32_t in, const uint32_t flt, const uint32_t stride) {
-    // floor[(in - flt)/stride] + 1, GNA Spec 1.24
-    if (flt > in || flt == 0 || stride == 0) {
-        THROW_GNA_EXCEPTION << "Invalid (input, filter, stride) = (" << in << "," << flt << "," << stride << ")";
-    }
-    return (in - flt) / stride + 1;
-}
-
-uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint32_t stride) {
-    // ceil[(in - window)/stride] + 1, GNA Spec 1.24
-    if (window > in || window == 0 || stride == 0) {
-        THROW_GNA_EXCEPTION << "Invalid (input, window, stride) = (" << in << "," << window << "," << stride << ")";
-    }
-    if (window == in) return 1;
-
-    return (in - window - 1) / stride + 2;
-}
-
-uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride) {
-    // floor[(in - 1)/stride] + 1, GNA 1.0/2.0 HW Spec
-    if (in == 0 || stride == 0) {
-        THROW_GNA_EXCEPTION << "Invalid (input, stride) = (" << in << "," << stride << ")";
-    }
-    return (in - 1) / stride + 1;
-}
-
-} // namespace
-
 #if GNA_LIB_VER == 2
 void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel) {
    Gna2Operation * gnaOperation;
@ -1593,7 +1562,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
                                comp.op.conv1D.ptr_biases),
                        nullptr,
                        create_shape1D_parameter(
-                                comp.op.conv1D.num_feature_map_columns),
+                                comp.op.conv1D.convStride),
                        nullptr,
                        nullptr);

@ -1619,11 +1588,11 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
                    pConvolutionalLayer->nBytesBias = component[i].op.conv1D.num_bytes_per_bias;
                    pConvolutionalLayer->nBytesFilterCoefficient = component[i].op.conv1D.num_bytes_per_weight;
                    pConvolutionalLayer->nFilters = component[i].op.conv1D.num_filters;
-                    pConvolutionalLayer->nFilterRows = comp.op.conv1D.num_filter_coefficients / comp.op.conv1D.num_feature_map_columns;
+                    pConvolutionalLayer->nFilterRows = comp.op.conv1D.num_filter_coefficients / comp.op.conv1D.convStride;
                    pConvolutionalLayer->nFilterCoefficients = component[i].op.conv1D.num_filter_coefficients;
                    pConvolutionalLayer->nFeatureMaps = 1;
-                    pConvolutionalLayer->nFeatureMapRows = component[i].op.conv1D.num_feature_map_rows;
-                    pConvolutionalLayer->nFeatureMapColumns = component[i].op.conv1D.num_feature_map_columns;
+                    pConvolutionalLayer->nFeatureMapColumns = component[i].op.conv1D.convStride;
+                    pConvolutionalLayer->nFeatureMapRows = pLayer->nInputColumns / pConvolutionalLayer->nFeatureMapColumns;
                    pConvolutionalLayer->poolType = INTEL_NO_POOLING;  //  will be overwritten
                    pConvolutionalLayer->nPoolSize = 0;  //  will be overwritten
                    pConvolutionalLayer->nPoolStride = 0;  //  will be overwritten
@ -1750,8 +1719,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
                        auto fltStrideSz = pConvolutionalLayer->nFeatureMaps * pConvolutionalLayer->nFeatureMapColumns;  // always move 1 "row"
                        auto outFromConv = outputFromConv(pLayer->nInputColumns, nFltSize, fltStrideSz);
                        // FLAT input matrix, pooled outputs per filter
-                        // TODO: Issue 50386 check why (outFromConv - 1) an not (outFromConv - nPoolSize)
-                        pLayer->nOutputColumns = pConvolutionalLayer->nFilters * ((outFromConv - 1) / pConvolutionalLayer->nPoolStride + 1);
+                        pLayer->nOutputColumns = pConvolutionalLayer->nFilters * outputFromPoolingLegacy(outFromConv, pConvolutionalLayer->nPoolStride);
                    }
 #endif
                } else {
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp
@ -97,8 +97,7 @@ public:
                                             uint32_t num_bytes_per_bias,
                                             uint32_t num_filters,
                                             uint32_t num_filter_coefficients,
-                                             uint32_t num_feature_map_rows,
-                                             uint32_t num_feature_map_columns,
+                                             uint32_t convStride,
                                             float weight_scale_factor,
                                             float output_scale_factor,
                                             A *&ptr_inputs,
@ -114,8 +113,7 @@ public:
                                            num_bytes_per_bias,
                                            num_filters,
                                            num_filter_coefficients,
-                                            num_feature_map_rows,
-                                            num_feature_map_columns,
+                                            convStride,
                                            weight_scale_factor,
                                            output_scale_factor,
                                            (void *&) ptr_inputs,
@ -428,8 +426,7 @@ private:
                                                    uint32_t num_bytes_per_bias,
                                                    uint32_t num_filters,
                                                    uint32_t num_filter_coefficients,
-                                                    uint32_t num_feature_map_rows,
-                                                    uint32_t num_feature_map_columns,
+                                                    uint32_t convStride,
                                                    float weight_scale_factor,
                                                    float output_scale_factor,
                                                    void *&ptr_inputs,
--- a/inference-engine/src/gna_plugin/backend/dnn_types.h
+++ b/inference-engine/src/gna_plugin/backend/dnn_types.h
@ -146,8 +146,7 @@ typedef struct {
    uint32_t num_bytes_per_bias;
    uint32_t num_filters;
    uint32_t num_filter_coefficients;
-    uint32_t num_feature_map_rows;
-    uint32_t num_feature_map_columns;
+    uint32_t convStride;
    float weight_scale_factor;
    void *ptr_filters;     // filters stored one after the other
    void *ptr_biases;
--- a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
+++ b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
@ -16,6 +16,7 @@ constexpr uint32_t bufferMaxSize = 65528;
 constexpr uint32_t convMinFiltersNum = 4;
 constexpr uint32_t convMaxFiltersNum = 65532;
 constexpr uint32_t convFiltersNumDivider = 4;
+constexpr uint32_t convFilterSizeDivider = 8;
 constexpr uint32_t convFilterMaxSize = 768;
 constexpr uint32_t convEachKernelByteAlignment = 16;
 constexpr uint32_t noOfInputsDivisor = 8;
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@ -1138,7 +1138,7 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {

            double weights_reducer = 1.0;
            auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl);
-            if (conv) {
+            if (conv && !LayerInfo(conv).isConvolutionFilter()) {
                const auto inDepth = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
                weights_reducer = GNAConvolutionLayer::getWeightsReducer(*conv);
                weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
--- a/inference-engine/src/gna_plugin/gna2_model_debug_log.cpp
+++ b/inference-engine/src/gna_plugin/gna2_model_debug_log.cpp
@ -390,6 +390,7 @@ void DumpGna2Model(const Gna2Model& gnaModel, const std::string dumpFolderNameGN
            dumpFile << "\tOperand " << j << " (" << GetOperandName(operation.Type, j) << ")"
                << " type: " << GetOperandType(operand.Type) <<
                " shape: " << GetSimpleString(operand.Shape) <<
+                " data: " << operand.Data <<
                " layout: ";

            DumpCharArray(dumpFile, operand.Layout, GNA2_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS);
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@ -162,7 +162,7 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
                        InferenceEngine::details::product(begin(dataOutput->getDims()),
                                                        end(dataOutput->getDims())) * dataOutput->getPrecision().size();

-                if (LayerInfo(outFunctionalLayer.first).isAffineFilter()) {
+                if (LayerInfo(outFunctionalLayer.first).isConvolutionFilter()) {
                    size_t aligned64_offset = outFunctionalLayer.first->GetParamAsInt("offset");
                    layerInfoItem.splitOutputLayers.emplace_back(
                        outFunctionalLayer.first,
@ -351,37 +351,33 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
    }

    // have to pad input to let last kernel meets it's corresponding input
-    uint32_t num_inputs = in_width * in_channels;
+    const auto num_inputs = in_width * in_channels;
    uint32_t num_input_padding = ALIGN(num_inputs, 8) - num_inputs;

    //  convert to 2D and set GNA input feature map size
-    uint32_t effectiveStride = convolution._stride_x * convolution._stride_y;
+    auto convStride = convolution._stride_x * convolution._stride_y;
    if (convolution._stride_y != 1) {
-        effectiveStride = convolution._stride_x;
+        convStride = convolution._stride_x;
    } else if (in_width == 1 && convolution._stride_x != 1) {
-        effectiveStride = convolution._stride_y;
+        convStride = convolution._stride_y;
    }
-    uint32_t num_feature_map_columns = in_channels * effectiveStride;
-
-    uint32_t num_feature_map_rows = (in_channels * in_width) / num_feature_map_columns;
+    const auto effectiveStride = in_channels * convStride;

    uint32_t num_filters = convolution._out_depth;
    uint32_t num_filter_coefficients = single_conv_kernel_size + num_conv_kernel_padding;
    uint32_t num_columns_in = num_inputs + num_input_padding;

-    uint32_t num_columns_out = (((num_inputs - num_filter_coefficients) / num_feature_map_columns) + 1) * convolution._out_depth;
-    uint32_t num_columns_out_unpadded = (((num_inputs - single_conv_kernel_size) / num_feature_map_columns) + 1) * convolution._out_depth;
+    uint32_t num_columns_out = (((num_inputs - num_filter_coefficients) / effectiveStride) + 1) * convolution._out_depth;
+    uint32_t num_columns_out_unpadded = (((num_inputs - single_conv_kernel_size) / effectiveStride) + 1) * convolution._out_depth;

-    uint32_t original_num_feature_map_rows = num_feature_map_rows;
    uint32_t original_input_padding = num_input_padding;
    uint32_t additional_padding = 0;

    // if kernel padding to multiple of 8 will cause missed outputs, need to pad further
    while (num_columns_out < out_batch * out_channels * out_width) {
        num_input_padding = original_input_padding + additional_padding;
-        num_feature_map_rows = original_num_feature_map_rows + (num_input_padding) / num_feature_map_columns;
        num_columns_in = num_inputs + num_input_padding;
-        num_columns_out = (((num_inputs + num_input_padding - num_filter_coefficients) / num_feature_map_columns) + 1) * convolution._out_depth;
+        num_columns_out = (((num_inputs + num_input_padding - num_filter_coefficients) / effectiveStride) + 1) * convolution._out_depth;
        dnn->new_num_conv_columns = num_columns_out;
        additional_padding += 8;
    }
@ -427,8 +423,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
        num_bytes_per_bias,
        num_filters,
        num_filter_coefficients,
-        num_feature_map_rows,
-        num_feature_map_columns,
+        effectiveStride,
        weight_scale_factor,
        output_scale_factor,
        ptr_inputs,
@ -457,8 +452,8 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
        if (inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) {
            //  Kaldi features are opposite orientation
            dnn->do_rotate_input = true;
-            dnn->num_rotate_rows = num_feature_map_columns;
-            dnn->num_rotate_columns = original_num_feature_map_rows;
+            dnn->num_rotate_rows = effectiveStride;
+            dnn->num_rotate_columns = num_inputs / effectiveStride;
        } else {
            dnn->do_rotate_input = false;
        }
@ -559,20 +554,10 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
    const auto outputs = convolution.outData.front();

    // have to pad input to let last kernel meets it's corresponding input
-    uint32_t num_inputs = in_width * in_height * in_channels;
+    const auto num_inputs = in_width * in_height * in_channels;
    uint32_t num_input_padding = ALIGN(num_inputs, 8) - num_inputs;

-    //  convert to 2D and set GNA input feature map size
-    uint32_t num_feature_map_columns = in_channels * convolution._stride_x * convolution._stride_y;
-    if (in_height == 1 && convolution._stride_y != 1) {
-        num_feature_map_columns = in_channels * convolution._stride_x;
-    } else if (in_width == 1 && convolution._stride_x != 1) {
-        num_feature_map_columns = in_channels * convolution._stride_y;
-    }
-    uint32_t num_feature_map_rows = (in_channels * in_height * in_width) / num_feature_map_columns;
-
    const uint32_t filter_n = convolution._out_depth;
-    uint32_t original_num_feature_map_rows = num_feature_map_rows;

    // if kernel padding to multiple of 8 will cause missed outputs, need to pad further
    if (num_input_padding == 0) {
@ -638,15 +623,17 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
    auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;

    // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
-    if (!dnn->do_rotate_input) {
-        if (inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) {
-            //  Kaldi features are opposite orientation
-            dnn->do_rotate_input = true;
-            dnn->num_rotate_rows = num_feature_map_columns;
-            dnn->num_rotate_columns = original_num_feature_map_rows;
-        } else {
-            dnn->do_rotate_input = false;
+    if (!dnn->do_rotate_input && inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) {
+        //  Kaldi features are opposite orientation
+        dnn->do_rotate_input = true;
+        dnn->num_rotate_rows = in_channels;
+        if (in_height != 1) {
+            dnn->num_rotate_rows *= convolution._stride_y;
        }
+        if (in_width != 1) {
+            dnn->num_rotate_rows *= convolution._stride_x;
+        }
+        dnn->num_rotate_columns = num_inputs / dnn->num_rotate_rows;
    }

    connectOutput(layer, ptr_outputs, num_data_bytes_out);
@ -654,7 +641,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
    const auto kernelHW = convolution._kernel_y * convolution._kernel_x;

    std::vector<uint8_t> transposedWeights;
-    const auto singleKernelSize = in_channels* kernelHW* convolution.precision.size();
+    const auto singleKernelSize = in_channels* kernelHW * convolution.precision.size();
    const auto kernelPad = Gna2RoundUp(singleKernelSize, 16) - singleKernelSize;
    for (uint32_t k = 0; k < convolution._out_depth; k++) {
        uint8_t* ptr_filt_current
@ -1728,8 +1715,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
    }
 }

-void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
-    auto filterLayer = dynamic_cast<InferenceEngine::WeightableLayer*> (layer.get());
+void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
+    auto filterLayer = dynamic_cast<InferenceEngine::ConvolutionLayer*> (layer.get());

    if (filterLayer == nullptr) {
        return;
@ -1752,62 +1739,57 @@ void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer)
    auto outputs = *layer->outData.begin();
    auto inputs = layer->insData.begin()->lock();

-    const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ?
+    const auto noOfInputsDivisor = gnaFlags->input_low_precision ?
        GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
-    uint32_t num_columns_in = GetDataDimSize(inputs, 2);
-    uint32_t num_rows_out = GetDataDimSize(outputs, 1);
-    uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
+    const uint32_t orginalInputSize = GetDataDimSize(inputs, 1);
+    const uint32_t orginalOutputSize = GetDataDimSize(outputs, 1);
+    if (orginalInputSize != orginalOutputSize) {
+        THROW_GNA_LAYER_EXCEPTION(filterLayer) << "Number in inputs (" << orginalInputSize <<
+            ") should be equal to number of outputs (" << orginalOutputSize << ")!";
+    }
+    const auto numberOfFilters = filterLayer->_out_depth;
+    const auto convolutionStride = numberOfFilters;
+    const auto filterWidth = filterLayer->_kernel_x;
+    const auto minOutputsPerFilter = ALIGN(orginalOutputSize, numberOfFilters) / numberOfFilters;
+    const auto minInputsNeeded = (minOutputsPerFilter - 1) * convolutionStride + filterWidth;
+    const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, noOfInputsDivisor);

-    uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
-    auto biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision();
+    auto numOutputs = GNAConvolutionLayer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride);
+    numOutputs *= numberOfFilters;
+    const auto& biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision();
    auto& currentComponent = dnnComponents.addComponent(layer->name, "affine");

-    dnn->InitAffineComponent(currentComponent,
-        num_rows_in + num_padding,
-        num_columns_in,
-        num_rows_out,
+    layer->params["num_rows_for_pwl"] = std::to_string(numOutputs);
+    dnn->InitConvolutional1DComponent(currentComponent,
+        numInputsFullyPadedAndAligned,
+        numOutputs,
        inputs->getPrecision().size(),
        outputs->getPrecision().size(),
        filterLayer->_weights->getTensorDesc().getPrecision().size(),
        biasPrecision.size(),
+        numberOfFilters,
+        filterWidth,
+        convolutionStride,
        quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
        quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
        ptr_inputs,
        ptr_outputs,
        ptr_weights,
-        ptr_biases,
-        false);
+        ptr_biases);

    size_t num_data_bytes_out =
        InferenceEngine::details::product(
            begin(outputs->getDims()), end(outputs->getDims())) * 4;

-    size_t num_data_bytes_in = num_columns_in *
-        ALIGN(num_rows_in, noOfInputsDivisor) * inputs->getPrecision().size();
+    size_t num_data_bytes_in = numInputsFullyPadedAndAligned * inputs->getPrecision().size();

    connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
    connectOutput(layer, ptr_outputs, num_data_bytes_out);

-    if (num_padding == 0) {
-        gnamem->readonly().push_ptr(ptr_weights,
-            filterLayer->_weights->cbuffer().as<const void*>(),
-            filterLayer->_weights->byteSize(),
-            64);
-    } else {
-        auto elementsIn = (num_rows_in + num_padding) * num_columns_in;
-        auto paddedWeights = elementsIn * num_rows_out;
-        auto paddedWeightsSize = paddedWeights * filterLayer->precision.size();
-
-        gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
-            size_t offset = 0;
-            for (uint32_t i = 0; i < num_rows_out && size >= offset; i++) {
-                ie_memcpy(reinterpret_cast<uint8_t*>(data) + offset, size - offset,
-                    filterLayer->_weights->cbuffer().as<const uint8_t*>() + num_rows_in * i * filterLayer->precision.size(),
-                    num_rows_in* filterLayer->precision.size());
-                offset += (num_rows_in + num_padding) * filterLayer->precision.size();
-            }
-            }, 64);
-    }
+    gnamem->readonly().push_ptr(ptr_weights,
+        filterLayer->_weights->cbuffer().as<const void*>(),
+        filterLayer->_weights->byteSize(),
+        64);

    if (filterLayer->_biases) {
        gnamem->readonly().push_ptr(ptr_biases,
@ -1815,7 +1797,7 @@ void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer)
            filterLayer->_biases->byteSize(),
            64);
    } else {
-        gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
+        gnamem->readonly().push_value(ptr_biases, 0.0f, numberOfFilters, 64);
    }
 }

@ -1878,13 +1860,18 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
    }

    // TODO: solve this by layer level transformations
-    auto concatAlignFilter = CNNNetPrevLayer(layer, 0);
-    if (LayerInfo(concatAlignFilter).isConcatAlignFilter()) {
-        auto rowsCopiedOffset = concatAlignFilter->GetParamAsInt("rows_copied_offset");
+    auto prevLayer = CNNNetPrevLayer(layer, 0);
+    if (LayerInfo(prevLayer).isConcatAlignFilter()) {
+        auto rowsCopiedOffset = prevLayer->GetParamAsInt("rows_copied_offset");
        if (rowsCopiedOffset != 0) {
            num_rows -= rowsCopiedOffset / outputs->getPrecision().size();
            layer->params["output_offset"] = std::to_string(rowsCopiedOffset);
        }
+    } else if (LayerInfo(prevLayer).isConvolutionFilter()) {
+        const auto num_rows_for_pwl = prevLayer->GetParamAsInt("num_rows_for_pwl", 0);
+        if (num_rows_for_pwl != 0) {
+            num_rows = num_rows_for_pwl;
+        }
    }
    size_t num_data_bytes_out = num_columns * num_rows * outputs->getPrecision().size();
    size_t num_data_bytes_in = num_columns * num_rows * inputs->getPrecision().size();
@ -2135,7 +2122,7 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
        {{"FullyConnected", "InnerProduct"}, CREATE(AffinePrimitive)},
        {{"Gemm"}, CREATE(GemmPrimitive)},
        {{"ScaleShift"}, CREATE(DiagonalPrimitive)},
-        {{"AffineFilter"}, CREATE(AffineFilterPrimitive)},
+        {{"ConvolutionFilter"}, CREATE(ConvolutionFilterPrimitive)},
        {{"ConcatAlignFilter"}, CREATE(ConcatAlignFilterPrimitive)},
        {{"Const"}, CREATE(ConstPrimitive)},
        {{"Eltwise"}, CREATE(EltwisePrimitive)},  // same as diagonal while weights are not taken from network, rather than from another output
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
@ -108,7 +108,7 @@ public:
    void CreateLayerPrimitive(InferenceEngine::CNNLayerPtr);

    void AffinePrimitive(InferenceEngine::CNNLayerPtr, bool isDiag = false);
-    void AffineFilterPrimitive(InferenceEngine::CNNLayerPtr);
+    void ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr);
    void ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr);
    void DiagonalPrimitive(InferenceEngine::CNNLayerPtr);
    void ConstPrimitive(InferenceEngine::CNNLayerPtr);
--- a/inference-engine/src/gna_plugin/layers/gna_convolution_layer.cpp
+++ b/inference-engine/src/gna_plugin/layers/gna_convolution_layer.cpp
@ -0,0 +1,79 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gna_convolution_layer.hpp"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include <legacy/ie_layers.h>
+#include "gna_graph_tools.hpp"
+#include "gna_plugin_log.hpp"
+
+namespace GNAPluginNS {
+namespace GNAConvolutionLayer {
+bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) {
+    return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1;
+}
+
+// 3D input or 2D kernel
+bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
+                 const uint32_t kernelHeight, const uint32_t kernelWidth) {
+    return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
+}
+
+double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
+    using KRT = std::pair<uint32_t, double>;
+    // Empirically determined weights reducers for 2D Convolution
+    // i.e.:
+    // for kernelSize >= 9       -> 1.3
+    // for kernelSize in {7, 8}  -> 1.2
+    const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
+    auto reducer = 1.0;
+    const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
+    const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
+    const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
+    if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
+         !isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) {
+        const auto kernelSize = conv._kernel_x * conv._kernel_y;
+        auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
+            [](const KRT& l, const KRT::first_type& r) {return l.first > r; });
+        if (r != reducers.end())
+            reducer = r->second;
+    }
+    return reducer;
+}
+
+uint32_t outputFromConv(const uint32_t in, const uint32_t flt, const uint32_t stride) {
+    // floor[(in - flt)/stride] + 1, GNA Spec 1.24
+    if (flt > in || flt == 0 || stride == 0) {
+        THROW_GNA_EXCEPTION << "Invalid (input, filter, stride) = (" << in << "," << flt << "," << stride << ")";
+    }
+    return (in - flt) / stride + 1;
+}
+
+uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint32_t stride) {
+    // ceil[(in - window)/stride] + 1, GNA Spec 1.24
+    if (window > in || window == 0 || stride == 0) {
+        THROW_GNA_EXCEPTION << "Invalid (input, window, stride) = (" << in << "," << window << "," << stride << ")";
+    }
+    if (window == in) return 1;
+
+    return (in - window - 1) / stride + 2;
+}
+
+uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride) {
+    // floor[(in - 1)/stride] + 1, GNA 1.0/2.0 HW Spec
+    // See issue 50386 for details
+    if (in == 0 || stride == 0) {
+        THROW_GNA_EXCEPTION << "Invalid (input, stride) = (" << in << "," << stride << ")";
+    }
+    return (in - 1) / stride + 1;
+}
+
+} // namespace GNAConvolutionLayer
+} // namespace GNAPluginNS
--- a/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp
@ -4,46 +4,25 @@

 #pragma once

-#include <algorithm>
-#include <cmath>
-#include <utility>
-#include <vector>
+#include <cstdint>

 #include <legacy/ie_layers.h>
-#include "../gna_graph_tools.hpp"

 namespace GNAPluginNS {
-struct GNAConvolutionLayer {
-    static bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) {
-        return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1;
-    }
+namespace GNAConvolutionLayer {
+bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth);

-    // 3D input or 2D kernel
-    static bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
-                     const uint32_t kernelHeight, const uint32_t kernelWidth) {
-        return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
-    }
+// 3D input or 2D kernel
+bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
+    const uint32_t kernelHeight, const uint32_t kernelWidth);

-    static double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
-        using KRT = std::pair<uint32_t, double>;
-        // Empirically determined weights reducers for 2D Convolution
-        // i.e.:
-        // for kernelSize >= 9       -> 1.3
-        // for kernelSize in {7, 8}  -> 1.2
-        const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
-        auto reducer = 1.0;
-        const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
-        const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
-        const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
-        if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
-             !isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) {
-            const auto kernelSize = conv._kernel_x * conv._kernel_y;
-            auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
-                [](const KRT& l, const KRT::first_type& r) {return l.first > r; });
-            if (r != reducers.end())
-                reducer = r->second;
-        }
-        return reducer;
-    }
-};
-}  // namespace GNAPluginNS
+double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv);
+
+uint32_t outputFromConv(const uint32_t in, const uint32_t flt, const uint32_t stride);
+
+uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint32_t stride);
+
+uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride);
+
+} // namespace GNAConvolutionLayer
+} // namespace GNAPluginNS
--- a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
@ -70,6 +70,7 @@ class LayerInfo {
            [this]() { return isFullyConnected(); },
            [this]() { return isAffineFilter(); },
            [this]() { return isConcatAlignFilter(); },
+            [this]() { return isConvolutionFilter(); },
            [this]() { return isEltwise(); },
            [this]() { return isScaleShift(); },
            [this]() { return isConvolution(); },
@ -157,6 +158,9 @@ class LayerInfo {
    bool isAffineFilter() const noexcept {
        return isOfType("AffineFilter");
    }
+    bool isConvolutionFilter() const noexcept {
+        return isOfType("ConvolutionFilter");
+    }
    bool isRelu() const noexcept {
        return isOfType("relu");
    }
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -41,6 +41,7 @@
 #include "gna_data_types.hpp"
 #include "gna_tensor_tools.hpp"
 #include "gna_itt.hpp"
+#include "backend/gna_limitations.hpp"

 using namespace InferenceEngine;
 using namespace InferenceEngine::details;
@ -1277,35 +1278,49 @@ void InsertSplitAligningFilterPass::run() {
                    gnalog() << std::endl;
 #endif
                    auto filterLayer =
-                            std::make_shared<WeightableLayer>(LayerParams({filterName, "AffineFilter", Precision::FP32}));
+                            std::make_shared<ConvolutionLayer>(LayerParams({filterName, "ConvolutionFilter", Precision::FP32}));

                    auto inputData = splitOutput;

                    size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(currentOffset) - 64));
-                    size_t
-                            newOutputSize = (currentOffset + ALIGN(outputSize, 8) * bytesPerSplitElement - aligned64_offset)
-                                            / bytesPerSplitElement;

                    IE_ASSERT(filterLayer != nullptr);

                    // encodes offset to beginning of split layer input
                    filterLayer->params["offset"] = std::to_string(aligned64_offset / bytesPerSplitElement);
-
                    auto dims = splitOutput->getTensorDesc().getDims();
                    if (dims.size() > 3) {
                        THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
                    }

-                    auto num_rows_out = dims[1] * (dims.size() != 2 ? dims[2] : 1);
-                    std::vector<float> filterWeights(newOutputSize * num_rows_out, 0.f);
+                    const auto offsetOfUnalignment = (currentOffset - aligned64_offset) / bytesPerSplitElement;
+                    // TODO consider to use a different number of filters do decrese the number of trailing zeros (additionalPaddingOfFilter)
+                    const auto numberOfFilters = GNALimitations::convMinFiltersNum;
+                    const auto filterSize = ALIGN(offsetOfUnalignment + numberOfFilters, GNALimitations::convFilterSizeDivider);

-                    auto offset = (currentOffset - aligned64_offset) / bytesPerSplitElement;
-
-                    for (int i = 0; i != outputSize; i++) {
-                        filterWeights[offset] = 1.0f;
-                        offset += newOutputSize + 1;
+                    // filterWeights: numberOfFilters X (offsetOfUnalignment + additionalPaddingOfFilter + numberOfFilters)
+                    // offsetOfUnalignment - the leading zeros in the filter
+                    //       |
+                    //       |             additionalPaddingOfFilter = filterSize - offsetOfUnalignment - numberOfFilters
+                    //   ____|___         ___|___
+                    //  |        |       |       |
+                    //  0 0 ... 0 1 0 0 0 0 ... 0
+                    //  0 0 ... 0 0 1 0 0 0 ... 0
+                    //  0 0 ... 0 0 0 1 0 0 ... 0
+                    //  0 0 ... 0 0 0 0 1 0 ... 0
+                    std::vector<float> filterWeights(filterSize * 4, 0.f);
+                    for (auto f = 0u; f < numberOfFilters; f++) {
+                        filterWeights[f * filterSize + f + offsetOfUnalignment] = 1;
                    }

+                    filterLayer->_out_depth = numberOfFilters;
+                    filterLayer->_stride_x = numberOfFilters;
+                    filterLayer->_stride_y = 1;
+                    filterLayer->_kernel_x = filterSize;
+                    filterLayer->_kernel_y = 1;
+                    filterLayer->_padding_x = 0;
+                    filterLayer->_padding_y = 0;
+
                    filterLayer->_weights = make_shared_blob<float>(TensorDesc(
                            inputData->getTensorDesc().getPrecision(),
                            SizeVector({filterWeights.size()}),
@ -1313,6 +1328,15 @@ void InsertSplitAligningFilterPass::run() {
                    filterLayer->_weights->allocate();
                    CopyVectorToBlob(filterLayer->_weights, filterWeights);

+                    std::vector<float> biasWeights(numberOfFilters, 0.f);
+
+                    filterLayer->_biases = make_shared_blob<float>(TensorDesc(
+                        inputData->getTensorDesc().getPrecision(),
+                        SizeVector({ biasWeights.size() }),
+                        Layout::C));
+                    filterLayer->_biases->allocate();
+                    CopyVectorToBlob(filterLayer->_biases, biasWeights);
+
                    auto outData = std::make_shared<Data>(filterName,
                                                          TensorDesc(splitOutput->getTensorDesc().getPrecision(),
                                                                     splitOutput->getTensorDesc().getDims(),
--- a/inference-engine/src/gna_plugin/runtime/cnn.cpp
+++ b/inference-engine/src/gna_plugin/runtime/cnn.cpp
@ -12,7 +12,9 @@
 #include "backend/dnn_types.h"
 #include "backend/gna_limitations.hpp"
 #include "gna_lib_ver_selector.hpp"
+#include "layers/gna_convolution_layer.hpp"

+using namespace GNAPluginNS::GNAConvolutionLayer;

 void CNNFilter32(intel_dnn_component_t *component) {
    auto filters = reinterpret_cast<float *>(component->op.conv1D.ptr_filters);
@ -20,11 +22,10 @@ void CNNFilter32(intel_dnn_component_t *component) {
    auto input = reinterpret_cast<float *>(component->ptr_inputs);
    auto output = reinterpret_cast<float *>(component->ptr_outputs);

-    const auto convolutionStride = component->op.conv1D.num_feature_map_columns;
+    const auto convolutionStride = component->op.conv1D.convStride;
    const auto filterSize = component->op.conv1D.num_filter_coefficients;
    const auto numberOfInputs = component->num_columns_in;
-    // TODO: reuse outputFromConv() from backend\am_intel_dnn.cpp
-    const auto numberOfOutputsPerFilter = (numberOfInputs - filterSize) / convolutionStride + 1;
+    const auto numberOfOutputsPerFilter = outputFromConv(numberOfInputs, filterSize, convolutionStride);
    const auto numberOfFilters = component->op.conv1D.num_filters;

    std::string layer_name;
--- a/inference-engine/src/inference_engine/ie_core.cpp
+++ b/inference-engine/src/inference_engine/ie_core.cpp
@ -775,7 +775,7 @@ public:
    }

    /**
-     * @brief Porvides a list of plugin names in registry; physically such plugins may not be created
+     * @brief Provides a list of plugin names in registry; physically such plugins may not be created
     * @return A list of plugin names
     */
    std::vector<std::string> GetListOfDevicesInRegistry() const {
--- a/inference-engine/src/inference_engine/os/win/win_shared_object_loader.cpp
+++ b/inference-engine/src/inference_engine/os/win/win_shared_object_loader.cpp
@ -98,7 +98,7 @@ class SharedObjectLoader::Impl {
    // Exclude current directory from DLL search path process wise.
    // If application specific path was configured before then
    // current directory is already excluded.
-    // GetDLLDirectory does not distinguish if aplication specific
+    // GetDLLDirectory does not distinguish if application specific
    // path was set to "" or NULL so reset it to "" to keep
    // application safe.
    void ExcludeCurrentDirectoryA() {
--- a/inference-engine/src/offline_transformations/src/pruning/init_const_mask.cpp
+++ b/inference-engine/src/offline_transformations/src/pruning/init_const_mask.cpp
@ -40,6 +40,7 @@ ngraph::pass::InitConstMask::InitConstMask(const ngraph::AxisSet & dims,
                end[dim] = value + 1;

                bool skip_dim_value = false;
+                NGRAPH_SUPPRESS_DEPRECATED_START
                CoordinateTransform iter(shape, begin, end);
                for (const Coordinate & coord : iter) {
                    if (!condition(values.at(iter.index(coord)))) {
@ -47,6 +48,7 @@ ngraph::pass::InitConstMask::InitConstMask(const ngraph::AxisSet & dims,
                        break;
                    }
                }
+                NGRAPH_SUPPRESS_DEPRECATED_END
                if (!skip_dim_value) {
                    mask->at(dim).insert(value);
                }
--- a/inference-engine/src/readers/ir_reader_v7/CMakeLists.txt
+++ b/inference-engine/src/readers/ir_reader_v7/CMakeLists.txt
@ -16,7 +16,7 @@ source_group("src" FILES ${LIBRARY_SRC})

 # Create module library

-add_library(${TARGET_NAME} MODULE EXCLUDE_FROM_ALL ${LIBRARY_SRC})
+add_library(${TARGET_NAME} MODULE ${LIBRARY_SRC})

 ie_faster_build(${TARGET_NAME}
    UNITY
--- a/inference-engine/src/transformations/include/transformations/common_optimizations/eliminate_unsqueeze_gather.hpp
+++ b/inference-engine/src/transformations/include/transformations/common_optimizations/eliminate_unsqueeze_gather.hpp
@ -14,6 +14,7 @@ namespace ngraph {
 namespace pass {

 class TRANSFORMATIONS_API EliminateUnsqueezeGather;
+class TRANSFORMATIONS_API EliminateGatherUnsqueeze;

 }  // namespace pass
 }  // namespace ngraph
@ -29,3 +30,15 @@ public:
    NGRAPH_RTTI_DECLARATION;
    EliminateUnsqueezeGather();
 };
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief Remove Gather -> Unsqueeze pair, if Gather takes a scalar and
+ * Unsqueeze makes it a 1D tensor
+ */
+
+class ngraph::pass::EliminateGatherUnsqueeze : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    EliminateGatherUnsqueeze();
+};
--- a/inference-engine/src/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp
+++ b/inference-engine/src/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp
@ -0,0 +1,60 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/util.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class TRANSFORMATIONS_API SimplifyShapeOfSubGraph;
+class TRANSFORMATIONS_API SharedShapeOf;
+class TRANSFORMATIONS_API GroupedGatherElimination;
+
+}  // namespace pass
+}  // namespace ngraph
+
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief SharedShapeOf transformation replaces group of ShapeOf
+ * operations with the first ShapeOf in this group. All ShapeOfs in this group
+ * must be equal and consume the same output port.
+ */
+class ngraph::pass::SharedShapeOf: public ngraph::pass::FunctionPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief GroupedGatherElimination transformation replaces group of Gather
+ * operations with the first Gather in this group and updated indices input
+ * in case all Gathers in the group are consumed by the same Concat in incremental order.
+ */
+class ngraph::pass::GroupedGatherElimination: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    GroupedGatherElimination();
+};
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief SimplifyShapeOfSubGraph transformation runs specific optimizations of shape sub-graphs
+ */
+class ngraph::pass::SimplifyShapeOfSubGraph: public ngraph::pass::FunctionPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
--- a/inference-engine/src/transformations/include/transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp
+++ b/inference-engine/src/transformations/include/transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp
@ -0,0 +1,27 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class TRANSFORMATIONS_API ConvertDeformableConv8To1;
+
+}  // namespace pass
+}  // namespace ngraph
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief ConvertDeformableConv8To1 converts v8::DeformableConvolution into v1::DeformableConvolution.
+ */
+class ngraph::pass::ConvertDeformableConv8To1 : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertDeformableConv8To1();
+};
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
@ -76,6 +76,7 @@
 #include <ngraph/pass/manager.hpp>
 #include <ngraph/pass/constant_folding.hpp>
 #include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
+#include <transformations/common_optimizations/simplify_shape_of_sub_graph.hpp>

 NGRAPH_RTTI_DEFINITION(ngraph::pass::CommonOptimizations, "CommonOptimizations", 0);

@ -85,6 +86,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::

    // This pass must be called first in pipeline
    manager.register_pass<ngraph::pass::InitNodeInfo>();
+    manager.register_pass<ngraph::pass::SimplifyShapeOfSubGraph>();
    manager.register_pass<ngraph::pass::ConstantFolding>();
    manager.register_pass<ngraph::pass::RemoveFilteringBoxesBySize>(); // Resolves dynamism (replaces NonZero), CF needed

--- a/inference-engine/src/transformations/src/transformations/common_optimizations/eliminate_unsqueeze_gather.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/eliminate_unsqueeze_gather.cpp
@ -7,6 +7,7 @@
 #include <ngraph/opsets/opset6.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
 #include <ngraph/rt_info.hpp>
+#include <transformations/utils/utils.hpp>
 #include "itt.hpp"

 NGRAPH_RTTI_DEFINITION(ngraph::pass::EliminateUnsqueezeGather, "EliminateUnsqueezeGather", 0);
@ -58,3 +59,36 @@ ngraph::pass::EliminateUnsqueezeGather::EliminateUnsqueezeGather() {
    auto m = std::make_shared<ngraph::pattern::Matcher>(gather, "EliminateUnsqueezeGather");
    register_matcher(m, callback);
 }
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::EliminateGatherUnsqueeze, "EliminateGatherUnsqueeze", 0);
+
+ngraph::pass::EliminateGatherUnsqueeze::EliminateGatherUnsqueeze() {
+    MATCHER_SCOPE(EliminateGatherUnsqueeze);
+
+    const auto gather_indices_label = ngraph::pattern::wrap_type<ngraph::op::Constant>(pattern::rank_equals(0));
+    const auto gather_axis_label = ngraph::pattern::wrap_type<ngraph::op::Constant>();
+    const auto gather_label = ngraph::pattern::wrap_type<ngraph::op::util::GatherBase>(
+            {ngraph::pattern::any_input(), gather_indices_label, gather_axis_label}, pattern::rank_equals(0));
+
+    const auto unsqueeze_label = ngraph::pattern::wrap_type<ngraph::opset6::Unsqueeze>(
+            {gather_label, ngraph::pattern::any_input()}, pattern::rank_equals(1));
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
+        auto pattern_nodes = m.get_pattern_map();
+
+        auto& gather_indices = pattern_nodes.at(gather_indices_label);
+        auto& gather = pattern_nodes.at(gather_label);
+        auto& unsqueeze = pattern_nodes.at(unsqueeze_label);
+
+        auto new_indices = ngraph::op::util::make_try_fold<ngraph::opset6::Reshape>(gather_indices, opset6::Constant::create(element::i32, {1}, {1}), false);
+        auto new_gather = gather->clone_with_new_inputs({gather->input_value(0), new_indices, gather->input_value(2)});
+
+        new_gather->set_friendly_name(gather->get_friendly_name());
+        ngraph::copy_runtime_info({unsqueeze, gather}, {new_gather, new_indices});
+        ngraph::replace_node(unsqueeze, new_gather);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(unsqueeze_label, "EliminateGatherUnsqueeze");
+    register_matcher(m, callback);
+}
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp
@ -0,0 +1,101 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <vector>
+
+#include "itt.hpp"
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <transformations/common_optimizations/simplify_shape_of_sub_graph.hpp>
+#include <transformations/common_optimizations/eliminate_unsqueeze_gather.hpp>
+#include <transformations/utils/utils.hpp>
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::SharedShapeOf, "SharedShapeOf", 0);
+
+bool ngraph::pass::SharedShapeOf::run_on_function(std::shared_ptr<ngraph::Function> f) {
+    RUN_ON_FUNCTION_SCOPE(SharedShapeOf);
+    bool graph_rewritten = false;
+
+    std::map<ngraph::Output<Node>, std::vector<std::shared_ptr<ngraph::Node>>> source_to_shape_of;
+    for (const auto & node : f->get_ordered_ops()) {
+        // Recursively apply transformation for sub-graph based operations
+        if (auto sub_graph_node = std::dynamic_pointer_cast<op::util::SubGraphOp>(node))
+            if (auto sub_graph = sub_graph_node->get_function())
+                graph_rewritten |= run_on_function(sub_graph);
+
+        if (is_type<ngraph::opset1::ShapeOf>(node) || is_type<ngraph::opset3::ShapeOf>(node))
+            source_to_shape_of[node->input_value(0)].push_back(node);
+    }
+
+    for (const auto& pair : source_to_shape_of) {
+        if (pair.second.size() < 2)
+            continue;
+        const auto& root_ss = pair.second[0];
+        for (const auto& child_ss : pair.second)
+            if (root_ss->get_instance_id() != child_ss->get_instance_id() && root_ss->get_output_element_type(0) == root_ss->get_output_element_type(0))
+                graph_rewritten |= replace_output_update_name(child_ss->output(0), root_ss->output(0));
+    }
+    return graph_rewritten;
+}
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::GroupedGatherElimination, "GroupedGatherElimination", 0);
+
+ngraph::pass::GroupedGatherElimination::GroupedGatherElimination() {
+    MATCHER_SCOPE(GroupedGatherElimination);
+    auto concat_label = ngraph::pattern::wrap_type<ngraph::opset1::Concat>(pattern::rank_equals(1));
+
+    ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) {
+        auto concat = m.get_match_root();
+        OutputVector inputs = concat->input_values();
+        NodeVector new_ops;
+        size_t i = 0, original_inputs_size = inputs.size();
+        while (inputs.size() > i + 1) {
+            auto curr = inputs[i].get_node_shared_ptr(), next = inputs[i + 1].get_node_shared_ptr();
+            if (curr->get_type_info() != next->get_type_info() ||
+                (!is_type<opset1::Gather>(curr) && !is_type<opset7::Gather>(curr)) ||
+                (curr->input_value(0) != next->input_value(0))) {
+                ++i;
+                continue;
+            } // curr and next are the same type of gather which takes data from the same source
+            auto joint_indices = ngraph::op::util::make_try_fold<opset1::Concat>(OutputVector{curr->input_value(1), next->input_value(1)}, 0);
+            auto new_gather = curr->clone_with_new_inputs(
+                    {curr->input_value(0), joint_indices, ngraph::opset1::Constant::create(element::i64, {}, {0})});
+            new_ops.push_back(joint_indices);
+            new_ops.push_back(new_gather);
+            inputs.erase(inputs.begin() + i);
+            inputs[i] = new_gather->output(0);
+        }
+        if (original_inputs_size > inputs.size()) {
+            auto new_concat = std::make_shared<opset1::Concat>(inputs, 0);
+            new_ops.push_back(new_concat);
+            new_concat->set_friendly_name(concat->get_friendly_name());
+            ngraph::copy_runtime_info(concat, new_ops);
+            ngraph::replace_node(concat, new_concat);
+            return true;
+        }
+        return false;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(concat_label, matcher_name);
+    this->register_matcher(m, callback);
+}
+
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::SimplifyShapeOfSubGraph, "SimplifyShapeOfSubGraph", 0);
+
+bool ngraph::pass::SimplifyShapeOfSubGraph::run_on_function(std::shared_ptr<ngraph::Function> f) {
+    RUN_ON_FUNCTION_SCOPE(GroupedGatherElimination);
+    ngraph::pass::Manager manager;
+    manager.set_per_pass_validation(false);
+    manager.register_pass<ngraph::pass::EliminateGatherUnsqueeze>();
+    manager.register_pass<ngraph::pass::SharedShapeOf>();
+    manager.register_pass<ngraph::pass::GroupedGatherElimination>();
+    manager.register_pass<ngraph::pass::Validate>();
+    manager.run_passes(f);
+    return false;
+}
--- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_deformable_conv_v8_to_v1.cpp
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_deformable_conv_v8_to_v1.cpp
@ -0,0 +1,52 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp"
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset8.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+#include "itt.hpp"
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertDeformableConv8To1, "ConvertDeformableConv8To1", 0);
+
+ngraph::pass::ConvertDeformableConv8To1::ConvertDeformableConv8To1() {
+    MATCHER_SCOPE(ConvertDeformableConv8To1);
+
+    auto deformable_conv_v8 = pattern::wrap_type<ngraph::opset8::DeformableConvolution>();
+
+    ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        auto deformable_conv_v8_node = std::dynamic_pointer_cast<ngraph::opset8::DeformableConvolution>(m.get_match_root());
+        if (!deformable_conv_v8_node)
+            return false;
+
+        if (deformable_conv_v8_node->get_input_size() != 3
+            || deformable_conv_v8_node->get_bilinear_interpolation_pad())
+            return false;
+
+        auto arg = deformable_conv_v8_node->input_value(0);
+        auto offsets = deformable_conv_v8_node->input_value(1);
+        auto filters = deformable_conv_v8_node->input_value(2);
+
+        auto deformable_conv_v1 =
+                std::make_shared<ngraph::opset1::DeformableConvolution>(arg,
+                                                                        offsets,
+                                                                        filters,
+                                                                        deformable_conv_v8_node->get_strides(),
+                                                                        deformable_conv_v8_node->get_pads_begin(),
+                                                                        deformable_conv_v8_node->get_pads_end(),
+                                                                        deformable_conv_v8_node->get_dilations(),
+                                                                        deformable_conv_v8_node->get_auto_pad(),
+                                                                        deformable_conv_v8_node->get_group(),
+                                                                        deformable_conv_v8_node->get_deformable_group());
+        deformable_conv_v1->set_friendly_name(deformable_conv_v8_node->get_friendly_name());
+        ngraph::copy_runtime_info(deformable_conv_v8_node, deformable_conv_v1);
+        ngraph::replace_node(deformable_conv_v8_node, deformable_conv_v1);
+        return true;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(deformable_conv_v8, matcher_name);
+    register_matcher(m, callback);
+}
--- a/inference-engine/src/vpu/graph_transformer/src/frontend/ie_parsed_network.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/ie_parsed_network.cpp
@ -27,7 +27,6 @@ IeParsedNetwork parseNetwork(const ie::CNNNetwork& network) {
    out.networkOutputs = network.getOutputsInfo();

    env.log->trace("Got %d inputs and %d outputs", out.networkInputs.size(), out.networkOutputs.size());
-    IE_ASSERT(!out.networkInputs.empty());
    IE_ASSERT(!out.networkOutputs.empty());

    env.log->trace("Perform topological sort");
--- a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp
@ -7,6 +7,7 @@

 #include <ie_metric_helpers.hpp>
 #include <legacy/cnn_network_impl.hpp>
+#include <legacy/convert_function_to_cnn_network.hpp>
 #include "exec_graph_info.hpp"
 #include <myriad_executable_network.h>
 #include <vpu/blob_reader.hpp>
@ -25,7 +26,6 @@ namespace MyriadPlugin {

 ExecutableNetwork::ExecutableNetwork(
        std::shared_ptr<IMvnc> mvnc,
-        std::vector<DevicePtr>& devicePool,
        const MyriadConfiguration& config,
        const std::shared_ptr<ie::ICore> core) :
            _config(config),
@ -40,10 +40,6 @@ ExecutableNetwork::ExecutableNetwork(
        defaultOutput(_config.pluginLogFilePath()));

    _executor = std::make_shared<MyriadExecutor>(_config.forceReset(), std::move(mvnc), logLevel, _log);
-    _device = _executor->openDevice(devicePool, _config);
-
-    const auto& revision = _device->revision();
-    _actualNumExecutors = config.compileConfig().numExecutors != -1 ? config.compileConfig().numExecutors : DefaultAllocation::numStreams(revision, config);

    _supportedMetrics = {
        METRIC_KEY(NETWORK_NAME),
@ -54,13 +50,19 @@ ExecutableNetwork::ExecutableNetwork(
    };
 }

+void ExecutableNetwork::openDevice(std::vector<DevicePtr>& devicePool) {
+    _device = _executor->openDevice(devicePool, _config);
+    const auto& revision = _device->revision();
+    _actualNumExecutors = _config.compileConfig().numExecutors != -1 ? _config.compileConfig().numExecutors : DefaultAllocation::numStreams(revision, _config);
+}
+
 ExecutableNetwork::ExecutableNetwork(
        const ie::CNNNetwork& network,
        std::shared_ptr<IMvnc> mvnc,
        std::vector<DevicePtr>& devicePool,
        const MyriadConfiguration& config,
        const std::shared_ptr<ie::ICore> core) :
-            ExecutableNetwork(std::move(mvnc), devicePool, config, core) {
+            ExecutableNetwork(std::move(mvnc), config, core) {
    VPU_PROFILE(ExecutableNetwork);

    const auto compilerLog = std::make_shared<Logger>(
@ -68,11 +70,9 @@ ExecutableNetwork::ExecutableNetwork(
        _config.get<LogLevelOption>(),
        defaultOutput(_config.compilerLogFilePath()));

-    if (_device == nullptr)
-        IE_THROW() << "No device was detected";
    auto compiledGraph = compileNetwork(
        network,
-        _device->_platform,
+        NC_MYRIAD_X,
        _config,
        compilerLog,
        _core);
@ -84,12 +84,7 @@ ExecutableNetwork::ExecutableNetwork(
    _inputInfo  = std::move(compiledGraph->inputInfo);
    _outputInfo = std::move(compiledGraph->outputInfo);

-    if (!_device->isBooted()) {
-        return;
-    }
-
    const auto& networkName = network.getName();
-    _executor->allocateGraph(_device, _graphDesc, _graphBlob, compiledGraph->blobHeader, compiledGraph->numActiveStages, networkName, _actualNumExecutors);
    if (_config.exclusiveAsyncRequests()) {
        ExecutorManager *executorManager = ExecutorManager::getInstance();
        _taskExecutor = executorManager->getExecutor("MYRIAD");
@ -100,6 +95,21 @@ ExecutableNetwork::ExecutableNetwork(
        idStream << networkName << "_TaskExecutorGetResult" << i;
        _taskExecutorGetResultIds.emplace(idStream.str());
    }
+    if (_inputInfo.totalSize == 0) {
+        _isNetworkConstant = true;
+        const auto& nGraphFunc = network.getFunction();
+        const auto& sortedLayers = nGraphFunc->get_ordered_ops();
+        for (const auto& layer : sortedLayers) {
+            if (strcmp(layer->get_type_info().name, "Constant") == 0) {
+                const auto& constOp = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(layer);
+                auto name = constOp->get_friendly_name();
+                _constDatas[name] = ie::details::shareWeights(constOp);
+            }
+        }
+        return;
+    }
+    openDevice(devicePool);
+    _executor->allocateGraph(_device, _graphDesc, _graphBlob, compiledGraph->blobHeader, compiledGraph->numActiveStages, networkName, _actualNumExecutors);
 }

 void ExecutableNetwork::Import(std::istream& strm, std::vector<DevicePtr> &devicePool, const MyriadConfiguration& configuration) {
@ -110,10 +120,6 @@ void ExecutableNetwork::Import(std::istream& strm, std::vector<DevicePtr> &devic
    strm.seekg(currentPos, strm.beg);
    strm.read(&_graphBlob[0], blobSize);

-    if (!_device->isBooted()) {
-        return;
-    }
-
    std::string networkName = importedNetworkName;

    BlobReader blobReader;
@ -126,9 +132,8 @@ void ExecutableNetwork::Import(std::istream& strm, std::vector<DevicePtr> &devic

    _inputInfo  = blobReader.getInputInfo();
    _outputInfo = blobReader.getOutputInfo();
-
+    openDevice(devicePool);
    _executor->allocateGraph(_device, _graphDesc, _graphBlob, blobHeader, numStages, networkName, _actualNumExecutors);
-
    _graphMetaData.stagesMeta.resize(numStages);
    for (auto &meta : _graphMetaData.stagesMeta) {
        meta.stageName = meta.stageType = meta.layerName = meta.layerType = "UNKNOWN";
@ -147,9 +152,12 @@ void ExecutableNetwork::Import(std::istream& strm, std::vector<DevicePtr> &devic
    }
 }

-ExecutableNetwork::ExecutableNetwork(std::istream& strm, std::shared_ptr<IMvnc> mvnc, std::vector<DevicePtr> &devicePool,
-    const MyriadConfiguration& config, const std::shared_ptr<ie::ICore> core) :
-    ExecutableNetwork(std::move(mvnc), devicePool, config, core) {
+ExecutableNetwork::ExecutableNetwork(std::istream& strm,
+                               std::shared_ptr<IMvnc> mvnc,
+                               std::vector<DevicePtr> &devicePool,
+                               const MyriadConfiguration& config,
+                               const std::shared_ptr<ie::ICore> core) :
+    ExecutableNetwork(std::move(mvnc), config, core) {
    VPU_PROFILE(ExecutableNetwork);
    Import(strm, devicePool, config);
 }
@ -160,7 +168,7 @@ ExecutableNetwork::ExecutableNetwork(
        std::vector<DevicePtr>& devicePool,
        const MyriadConfiguration& config,
        const std::shared_ptr<ie::ICore> core) :
-    ExecutableNetwork(std::move(mvnc), devicePool, config, core) {
+    ExecutableNetwork(std::move(mvnc), config, core) {
    VPU_PROFILE(ExecutableNetwork);
    std::ifstream blobFile{blobFilename, std::ios::binary};
    Import(blobFile, devicePool, config);
--- a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h
@ -44,7 +44,9 @@ public:

    virtual ~ExecutableNetwork() {
        try {
-            _executor->deallocateGraph(_device, _graphDesc);
+            if (_device != nullptr) {
+                _executor->deallocateGraph(_device, _graphDesc);
+            }
        }
        catch (...) {
            std::cerr << "ERROR ~ExecutableNetwork():\n"
@ -54,18 +56,19 @@ public:

    ie::IInferRequestInternal::Ptr CreateInferRequestImpl(ie::InputsDataMap networkInputs,
                                                         ie::OutputsDataMap networkOutputs) override {
-        if (_device == nullptr || !_device->isBooted()) {
+        if (!_isNetworkConstant && (_device == nullptr || !_device->isBooted())) {
            IE_THROW() << "Can not create infer request: there is no available devices with platform "
                               << _device->_platform;
        }

        return std::make_shared<MyriadInferRequest>(_graphDesc, networkInputs, networkOutputs,
                                                    _inputInfo, _outputInfo,
-                                                    _graphMetaData.stagesMeta, _config, _log, _executor);
+                                                    _graphMetaData.stagesMeta, _config, _log, _executor,
+                                                    _constDatas, _isNetworkConstant);
    }

    ie::IInferRequestInternal::Ptr CreateInferRequest() override {
-        if (_device == nullptr || !_device->isBooted()) {
+        if (!_isNetworkConstant && (_device == nullptr || !_device->isBooted())) {
            IE_THROW() << "Can not create infer request: there is no available devices with platform "
                               << _device->_platform;
        }
@ -73,7 +76,7 @@ public:
        auto syncRequestImpl = std::make_shared<MyriadInferRequest>(_graphDesc, _networkInputs, _networkOutputs,
                                                                    _inputInfo, _outputInfo,
                                                                    _graphMetaData.stagesMeta, _config, _log,
-                                                                    _executor);
+                                                                    _executor, _constDatas, _isNetworkConstant);
        syncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this());
        auto taskExecutorGetResult = getNextTaskExecutor();
        return std::make_shared<MyriadAsyncInferRequest>(
@ -84,6 +87,16 @@ public:
        model.write(_graphBlob.data(), _graphBlob.size());
    }

+    void Export(const std::string &modelFileName) override {
+        std::ofstream modelFile(modelFileName, std::ios::out | std::ios::binary);
+
+        if (modelFile.is_open()) {
+            Export(modelFile);
+        } else {
+            IE_THROW() << "The " << modelFileName << " file can not be opened for export";
+        }
+    }
+
    ie::Parameter GetMetric(const std::string &name) const override;

    ie::CNNNetwork GetExecGraphInfo() override;
@ -98,9 +111,11 @@ private:
    DevicePtr _device;
    GraphMetaInfo _graphMetaData;
    MyriadConfiguration _config;
+    bool _isNetworkConstant = false;
    const std::shared_ptr<ie::ICore> _core = nullptr;
    int _actualNumExecutors = 0;
    std::vector<std::string> _supportedMetrics;
+    std::map<std::string, ie::Blob::Ptr> _constDatas;

    DataInfo _inputInfo;
    DataInfo _outputInfo;
@ -109,9 +124,8 @@ private:
    std::queue<std::string> _taskExecutorGetResultIds;

    ExecutableNetwork(std::shared_ptr<IMvnc> mvnc,
-                      std::vector<DevicePtr> &devicePool,
-                      const MyriadConfiguration& config,
-                      const std::shared_ptr<ie::ICore> core);
+        const MyriadConfiguration& config,
+        const std::shared_ptr<ie::ICore> core);

    ie::ITaskExecutor::Ptr getNextTaskExecutor() {
        std::string id = _taskExecutorGetResultIds.front();
@ -124,6 +138,8 @@ private:

        return taskExecutor;
    }
+
+    void openDevice(std::vector<DevicePtr>& devicePool);
 };

 }  // namespace MyriadPlugin
--- a/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp
@ -33,11 +33,13 @@ MyriadInferRequest::MyriadInferRequest(GraphDesc &graphDesc,
                                       const std::vector<StageMetaInfo> &blobMetaData,
                                       const MyriadConfig& myriadConfig,
                                       const Logger::Ptr &log,
-                                       const MyriadExecutorPtr &executor) :
+                                       const MyriadExecutorPtr &executor,
+                                       std::map<std::string, ie::Blob::Ptr> constDatas,
+                                       bool isNetworkConstant = true) :
        IInferRequestInternal(networkInputs, networkOutputs), _executor(executor),
        _log(log), _stagesMetaData(blobMetaData), _config(myriadConfig),
        _inputInfo(compilerInputsInfo), _outputInfo(compilerOutputsInfo),
-        _graphDesc(graphDesc) {
+        _graphDesc(graphDesc), _constDatas(constDatas), _isNetworkConstant(isNetworkConstant) {
    VPU_PROFILE(MyriadInferRequest);

    const auto& ioStrides = _config.compileConfig().ioStrides;
@ -83,7 +85,7 @@ MyriadInferRequest::MyriadInferRequest(GraphDesc &graphDesc,
    resultBuffer.resize(compilerOutputsInfo.totalSize);

    VPU_THROW_UNLESS(
-        !_networkOutputs.empty() && !_networkInputs.empty(),
+        !_networkOutputs.empty() && !(_networkInputs.empty() && !_isNetworkConstant),
        "No information about network's output/input");
 }

@ -93,6 +95,9 @@ void MyriadInferRequest::InferImpl() {
 }

 void MyriadInferRequest::InferAsync() {
+    if (_isNetworkConstant) {
+        return;
+    }
    VPU_PROFILE(InferAsync);

    // execute input pre-processing
@ -104,7 +109,7 @@ void MyriadInferRequest::InferAsync() {
    auto getOffset = [&inputInfo] (const std::string& name) {
        const auto offsetIt = inputInfo.offset.find(name);
        IE_ASSERT(offsetIt != inputInfo.offset.end()) << "MyriadInferRequest::InferAsync()\n"
-                                                      << "Input offset [" << name << "] is not provided.";
+                                                    << "Input offset [" << name << "] is not provided.";
        return offsetIt->second;
    };

@ -123,9 +128,9 @@ void MyriadInferRequest::InferAsync() {
        const auto byteSize = blob->byteSize();
        const auto requiredSize = vpu::checked_cast<size_t>(offset) + byteSize;
        IE_ASSERT(requiredSize <= inputBuffer.size())  << "MyriadInferRequest::InferAsync()\n"
-                                                       << "Input offset is too big. "
-                                                       << "Required size: " << requiredSize
-                                                       << ", Input buffer size: " << inputBuffer.size();
+                                                    << "Input offset is too big. "
+                                                    << "Required size: " << requiredSize
+                                                    << ", Input buffer size: " << inputBuffer.size();

        const auto foundBlob = getNetInputInfo(name);
        const auto vpuLayout = foundBlob->second->getTensorDesc().getLayout();
@ -139,9 +144,8 @@ void MyriadInferRequest::InferAsync() {
    }

    _executor->queueInference(_graphDesc, inputBuffer.data(),
-                              _inputInfo.totalSize, nullptr, 0);
+                            _inputInfo.totalSize, nullptr, 0);
 }
-
 static void copyBlobAccordingUpperBound(
    const Blob::Ptr& in,
    const Blob::Ptr& out) {
@ -199,10 +203,22 @@ void MyriadInferRequest::GetResult() {
    const auto getVpuLayout = [&networkOutputs] (const std::string& name){
        const auto foundBlob = networkOutputs.find(name);
        IE_ASSERT(foundBlob != networkOutputs.end()) << "MyriadInferRequest::InferAsync()\n"
-                                                     << "Output [" << name << "] is not provided.";
+                                                    << "Output [" << name << "] is not provided.";
        return foundBlob->second->getTensorDesc().getLayout();
    };
-
+    if (_isNetworkConstant) {
+        for (const auto& output : _outputs) {
+            const auto& ieBlobName = output.first;
+            const auto& ieBlob = output.second;
+            IE_ASSERT(_constDatas.find(ieBlobName) != _constDatas.end()) <<
+            "Input [" << ieBlobName << "] is not provided.";
+            std::copy_n(
+                _constDatas[ieBlobName]->cbuffer().as<uint8_t *>(),
+                _constDatas[ieBlobName]->byteSize(),
+                ieBlob->buffer().as<uint8_t *>());
+        }
+        return;
+    }
    // For networks with only one output
    if (_outputInfo.offset.size() == 1) {
        const auto& it = _outputs.begin();
@ -224,12 +240,12 @@ void MyriadInferRequest::GetResult() {
        const auto resultOffset = [&](const std::string& name) {
            const auto offset_it = _outputInfo.offset.find(name);
            IE_ASSERT(offset_it != _outputInfo.offset.end())  << "MyriadInferRequest::InferAsync()\n"
-                                                                       << "Output offset [" << name << "] error.";
+                                                                    << "Output offset [" << name << "] error.";
            const auto offset = vpu::checked_cast<size_t>(offset_it->second);
            IE_ASSERT(offset <= resultBuffer.size())  << "MyriadInferRequest::InferAsync()\n"
-                                                      << "Input offset is too big."
-                                                      << "Required offset: " << offset
-                                                      << "Result buffer size: " << resultBuffer.size();
+                                                    << "Input offset is too big."
+                                                    << "Required offset: " << offset
+                                                    << "Result buffer size: " << resultBuffer.size();
            return offset;
        };

--- a/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.h
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.h
@ -34,6 +34,8 @@ class MyriadInferRequest : public InferenceEngine::IInferRequestInternal {
    GraphDesc _graphDesc;
    std::vector<uint8_t> resultBuffer;
    std::vector<uint8_t> inputBuffer;
+    std::map<std::string, ie::Blob::Ptr> _constDatas;
+    bool _isNetworkConstant;

 public:
    typedef std::shared_ptr<MyriadInferRequest> Ptr;
@ -46,7 +48,9 @@ public:
                                const std::vector<StageMetaInfo> &blobMetaData,
                                const MyriadConfig &myriadConfig,
                                const Logger::Ptr &log,
-                                const MyriadExecutorPtr &executor);
+                                const MyriadExecutorPtr &executor,
+                                std::map<std::string, ie::Blob::Ptr> constDatas,
+                                bool isNetworkConstant);

    void InferImpl() override;
    void InferAsync();
--- a/inference-engine/tests/functional/inference_engine/serialization/single_layer/prior_box.cpp
+++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/prior_box.cpp
@ -0,0 +1,98 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "shared_test_classes/single_layer/prior_box.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestDefinitions;
+
+namespace {
+    TEST_P(PriorBoxLayerTest, Serialize) {
+        Serialize();
+    }
+
+    const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::I32,
+        InferenceEngine::Precision::U16
+    };
+    const std::vector<std::vector<float>> min_sizes = {
+        {16.f, 32.f}
+    };
+
+    const std::vector<std::vector<float>> max_sizes = {
+        {256.f, 512.f}
+    };
+
+    const std::vector<std::vector<float>> aspect_ratios = {
+        {0.66f, 1.56f}
+    };
+
+    const std::vector<std::vector<float>> densities = {
+        {0.55f}
+    };
+
+    const std::vector<std::vector<float>> fixed_ratios = {
+        {0.88f}
+    };
+
+    const std::vector<std::vector<float>> fixed_sizes = {
+        {1.25f}
+    };
+
+    const std::vector<bool> clips = {
+        true, false
+    };
+
+    const std::vector<bool> flips = {
+        true, false
+    };
+
+    const std::vector<float> steps = {
+        1.0f, 2.0f
+    };
+
+    const std::vector<float> offsets = {
+        0.0f, 0.5f
+    };
+
+    const std::vector<std::vector<float>> variances = {
+        {2.22f, 3.14f}
+    };
+
+    const std::vector<bool> scale_all_sizes = {
+        true, false
+    };
+
+    const std::vector<size_t> inputShape = {128, 128};
+    const std::vector<size_t> imageShape = {50, 50};
+
+    const auto layerSpecificParams = ::testing::Combine(
+            ::testing::ValuesIn(min_sizes),
+            ::testing::ValuesIn(max_sizes),
+            ::testing::ValuesIn(aspect_ratios),
+            ::testing::ValuesIn(densities),
+            ::testing::ValuesIn(fixed_ratios),
+            ::testing::ValuesIn(fixed_sizes),
+            ::testing::ValuesIn(clips),
+            ::testing::ValuesIn(flips),
+            ::testing::ValuesIn(steps),
+            ::testing::ValuesIn(offsets),
+            ::testing::ValuesIn(variances),
+            ::testing::ValuesIn(scale_all_sizes));
+
+    INSTANTIATE_TEST_SUITE_P(smoke_PriorBox_Basic, PriorBoxLayerTest,
+                            ::testing::Combine(
+                                layerSpecificParams,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(inputShape),
+                                ::testing::Values(imageShape),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            PriorBoxLayerTest::getTestCaseName);
+} // namespace
--- a/inference-engine/tests/functional/inference_engine/serialization/single_layer/space_to_depth.cpp
+++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/space_to_depth.cpp
@ -0,0 +1,45 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/single_layer/space_to_depth.hpp"
+
+#include <ngraph/opsets/opset3.hpp>
+
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace ngraph::opset3;
+
+namespace {
+TEST_P(SpaceToDepthLayerTest, Serialize) {
+    Serialize();
+}
+const std::vector<InferenceEngine::Precision> inputPrecisions = {
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::U8,
+    InferenceEngine::Precision::I16,
+};
+
+const std::vector<SpaceToDepth::SpaceToDepthMode> modes = {
+    SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST,
+    SpaceToDepth::SpaceToDepthMode::DEPTH_FIRST};
+
+const std::vector<std::vector<size_t>> inputShapesBS2 = {
+    {1, 1, 2, 2},    {1, 1, 4, 4},     {1, 1, 6, 6},    {2, 8, 6, 6},
+    {2, 4, 10, 8},   {1, 1, 2, 2, 2},  {1, 1, 4, 4, 4}, {1, 1, 6, 6, 6},
+    {2, 8, 6, 6, 6}, {2, 4, 10, 8, 12}};
+
+const auto SpaceToDepthBS2 = ::testing::Combine(
+    ::testing::ValuesIn(inputShapesBS2), ::testing::ValuesIn(inputPrecisions),
+    ::testing::ValuesIn(modes), ::testing::Values(1, 2),
+    ::testing::Values(CommonTestUtils::DEVICE_CPU));
+
+INSTANTIATE_TEST_CASE_P(
+    smoke_SpaceToDepthSerialization, SpaceToDepthLayerTest,
+    ::testing::Combine(::testing::ValuesIn(inputShapesBS2),
+                       ::testing::ValuesIn(inputPrecisions),
+                       ::testing::ValuesIn(modes), ::testing::Values(1, 2),
+                       ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+    SpaceToDepthLayerTest::getTestCaseName);
+}  // namespace
--- a/inference-engine/tests/functional/inference_engine/transformations/convert_deformable_conv_v8_to_v1_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_deformable_conv_v8_to_v1_test.cpp
@ -0,0 +1,160 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset8.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
+#include <transformations/init_node_info.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+
+TEST(TransformationTests, ConvertDeformableConv8to1) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        const Strides strides{1, 1};
+        const CoordinateDiff padding{0, 0};
+        const Strides dilations{1, 1};
+
+        const Shape input_shape{1, 1, 4, 4};
+        const Shape filter_shape{1, 1, 2, 2};
+        const Shape offsets_shape{1, 8, 3, 3};
+
+        auto data = std::make_shared<opset8::Parameter>(element::f32, input_shape);
+        auto filter = std::make_shared<opset8::Parameter>(element::f32, filter_shape);
+        auto offsets = std::make_shared<opset8::Parameter>(element::f32, offsets_shape);
+
+        auto deformable_conv = std::make_shared<opset8::DeformableConvolution>(data,
+                                                                               offsets,
+                                                                               filter,
+                                                                               strides,
+                                                                               padding,
+                                                                               padding,
+                                                                               dilations);
+
+        f = std::make_shared<Function>(NodeVector{deformable_conv}, ParameterVector{data, filter, offsets});
+
+        pass::Manager manager;
+        manager.register_pass<pass::InitNodeInfo>();
+        manager.register_pass<pass::ConvertDeformableConv8To1>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        const Strides strides{1, 1};
+        const CoordinateDiff padding{0, 0};
+        const Strides dilations{1, 1};
+
+        const Shape input_shape{1, 1, 4, 4};
+        const Shape filter_shape{1, 1, 2, 2};
+        const Shape offsets_shape{1, 8, 3, 3};
+
+        auto data = std::make_shared<opset1::Parameter>(element::f32, input_shape);
+        auto filter = std::make_shared<opset1::Parameter>(element::f32, filter_shape);
+        auto offsets = std::make_shared<opset1::Parameter>(element::f32, offsets_shape);
+
+        auto deformable_conv = std::make_shared<opset1::DeformableConvolution>(data,
+                                                                               offsets,
+                                                                               filter,
+                                                                               strides,
+                                                                               padding,
+                                                                               padding,
+                                                                               dilations);
+
+        f_ref = std::make_shared<Function>(NodeVector{deformable_conv}, ParameterVector{data, filter, offsets});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, ConvertDeformableConv8to1_mask) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        const Strides strides{1, 1};
+        const CoordinateDiff padding{0, 0};
+        const Strides dilations{1, 1};
+
+        const Shape input_shape{1, 1, 4, 4};
+        const Shape filter_shape{1, 1, 2, 2};
+        const Shape offsets_shape{1, 8, 3, 3};
+        const Shape mask_shape{1, 4, 3, 3};
+
+        auto data = std::make_shared<opset8::Parameter>(element::f32, input_shape);
+        auto filter = std::make_shared<opset8::Parameter>(element::f32, filter_shape);
+        auto offsets = std::make_shared<opset8::Parameter>(element::f32, offsets_shape);
+        auto mask = std::make_shared<opset8::Parameter>(element::f32, mask_shape);
+
+        auto deformable_conv = std::make_shared<opset8::DeformableConvolution>(data,
+                                                                               offsets,
+                                                                               filter,
+                                                                               mask,
+                                                                               strides,
+                                                                               padding,
+                                                                               padding,
+                                                                               dilations);
+
+        f = std::make_shared<Function>(NodeVector{deformable_conv}, ParameterVector{data, filter,
+                                                                                    mask, offsets});
+
+        pass::Manager manager;
+        manager.register_pass<pass::InitNodeInfo>();
+        manager.register_pass<pass::ConvertDeformableConv8To1>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    // mask input is provided, DeformableConvolution-8 must remain
+    ASSERT_EQ(count_ops_of_type<opset1::DeformableConvolution>(f), 0);
+    ASSERT_EQ(count_ops_of_type<opset8::DeformableConvolution>(f), 1);
+}
+
+TEST(TransformationTests, ConvertDeformableConv8to1_bilinear_interpolation_padding) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        const Strides strides{1, 1};
+        const CoordinateDiff padding{0, 0};
+        const Strides dilations{1, 1};
+
+        const Shape input_shape{1, 1, 4, 4};
+        const Shape filter_shape{1, 1, 2, 2};
+        const Shape offsets_shape{1, 8, 3, 3};
+
+        auto data = std::make_shared<opset8::Parameter>(element::f32, input_shape);
+        auto filter = std::make_shared<opset8::Parameter>(element::f32, filter_shape);
+        auto offsets = std::make_shared<opset8::Parameter>(element::f32, offsets_shape);
+
+        auto deformable_conv = std::make_shared<opset8::DeformableConvolution>(data,
+                                                                               offsets,
+                                                                               filter,
+                                                                               strides,
+                                                                               padding,
+                                                                               padding,
+                                                                               dilations,
+                                                                               op::PadType::EXPLICIT,
+                                                                               1,
+                                                                               1,
+                                                                               true);
+
+        f = std::make_shared<Function>(NodeVector{deformable_conv}, ParameterVector{data, filter, offsets});
+
+        pass::Manager manager;
+        manager.register_pass<pass::InitNodeInfo>();
+        manager.register_pass<pass::ConvertDeformableConv8To1>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    //  use_bilinear_interpolation_padding is true, DeformableConvolution-8 must remain
+    ASSERT_EQ(count_ops_of_type<opset1::DeformableConvolution>(f), 0);
+    ASSERT_EQ(count_ops_of_type<opset8::DeformableConvolution>(f), 1);
+}
--- a/inference-engine/tests/functional/inference_engine/transformations/pruning_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/pruning_test.cpp
@ -35,10 +35,12 @@ Output<Node> create_constant_with_zeros(const Shape & shape, const Mask & mask)
            Coordinate coord_end(shape);
            coord_end[dim] = dim_value + 1;

+            NGRAPH_SUPPRESS_DEPRECATED_START
            CoordinateTransform iter(shape, coord_begin, coord_end);
            for (const Coordinate & coord : iter) {
                values[iter.index(coord)] = 0;
            }
+            NGRAPH_SUPPRESS_DEPRECATED_END
        }
    }
    return std::make_shared<opset5::Constant>(element::f32, shape, values);
@ -57,10 +59,12 @@ TEST(TransformationTests, InitMasksOutputChannel) {
    Shape weights_shape{6, 3, 3, 3};

    std::vector<double> values(shape_size(weights_shape), 1);
+    NGRAPH_SUPPRESS_DEPRECATED_START
    CoordinateTransform iter(weights_shape, {0, 1, 0, 0}, {6, 2, 3, 3});
    for (const Coordinate & coord : iter) {
        values[iter.index(coord)] = 0;
    }
+    NGRAPH_SUPPRESS_DEPRECATED_END

    auto weights = std::make_shared<opset5::Constant>(element::f32, weights_shape, values);
    pass::InitConstMask({1}).apply(weights);
--- a/inference-engine/tests/functional/inference_engine/transformations/simplify_shape_of_sub_graph.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/simplify_shape_of_sub_graph.cpp
@ -0,0 +1,81 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <queue>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset7.hpp>
+#include <transformations/common_optimizations/simplify_shape_of_sub_graph.hpp>
+#include <transformations/init_node_info.hpp>
+#include <ngraph/pass/manager.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+
+using namespace testing;
+using namespace ngraph;
+
+auto gather = [](const std::shared_ptr<Node> input, std::vector<int64_t> indices, bool scalar = false) -> Output<Node> {
+    std::shared_ptr<Node> indices_node;
+    if (scalar)
+        indices_node = opset7::Constant::create(element::i64, {}, indices);
+    else
+        indices_node = opset7::Constant::create(element::i64, {indices.size()}, indices);
+    return std::make_shared<ngraph::opset7::Gather>(
+            input, indices_node, opset7::Constant::create(element::i64, {}, {0}));
+};
+
+TEST(TransformationTests, ShapeSubGraphTest) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+
+    Shape data_shape{1, 2, 3, 4};
+    {
+        auto data = std::make_shared<opset7::Parameter>(element::f32, data_shape);
+
+        auto shape_op_1 = std::make_shared<opset7::ShapeOf>(data);
+        auto gather_1 = gather(shape_op_1, {1}, true);
+        auto unsqueeze_1 = std::make_shared<opset7::Unsqueeze>(
+                gather_1, opset7::Constant::create(element::i64, {1}, {0}));
+
+        auto shape_op_2 = std::make_shared<opset7::ShapeOf>(data);
+        auto gather_2 = gather(shape_op_2, {2}, true);
+        auto unsqueeze_2 = std::make_shared<opset7::Unsqueeze>(
+                gather_2, opset7::Constant::create(element::i64, {1}, {0}));
+
+        auto const_1 = opset7::Constant::create(element::i64, Shape{1}, {2});
+        auto const_2 = opset7::Constant::create(element::i64, Shape{1}, {2});
+
+        auto concat = std::make_shared<opset7::Concat>(OutputVector{unsqueeze_1, unsqueeze_2, const_1, const_2}, 0);
+
+        auto reshape = std::make_shared<opset7::Reshape>(data, concat, false);
+        f = std::make_shared<Function>(NodeVector{reshape}, ParameterVector{data});
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<pass::SimplifyShapeOfSubGraph>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+        ASSERT_EQ(reshape->get_output_partial_shape(0), PartialShape({2, 3, 2, 2}));
+    }
+    {
+        auto data = std::make_shared<opset7::Parameter>(element::f32, data_shape);
+
+        auto shape_op_1 = std::make_shared<opset7::ShapeOf>(data);
+        auto gather_1 = gather(shape_op_1, {1, 2});
+
+        auto const_1 = opset7::Constant::create(element::i64, Shape{1}, {2});
+        auto const_2 = opset7::Constant::create(element::i64, Shape{1}, {2});
+
+        auto concat = std::make_shared<opset7::Concat>(OutputVector{gather_1, const_1, const_2}, 0);
+
+        auto reshape = std::make_shared<opset7::Reshape>(data, concat, false);
+        f_ref = std::make_shared<Function>(NodeVector{reshape}, ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref, true);
+    ASSERT_TRUE(res.first) << res.second;
+}
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
@ -65,9 +65,11 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes

 // List of operations that should be tested also with integer precision
 const std::map<ActivationTypes, std::vector<std::vector<float>>> intActivationTypes = {
+        {Atan,                  {}},
        {Negative,              {}},
        {Ceiling,               {}},
        {Cos,                   {}},
+        {Sinh,                  {}},
        {Sqrt,                  {}},
        {Tanh,                  {}},
 };
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/deformable_convolution.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/deformable_convolution.cpp
@ -89,4 +89,30 @@ INSTANTIATE_TEST_SUITE_P(
        ::testing::Values(std::vector<size_t>({1, 4, 224, 224})),
        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
    DeformableConvolutionLayerTest::getTestCaseName);
+
+/* ============= Single Test Case ============= */
+const std::vector<std::vector<size_t>> single_deform_vals = {{1, 54, 28, 28}};
+const std::vector<std::vector<size_t>> single_kernel = {{1, 3, 3, 3}};
+const std::vector<size_t> single_deform_groups = {3};
+
+const auto deformableConv2DParams_SingleTestCase = ::testing::Combine(
+    ::testing::ValuesIn(single_deform_vals),
+    ::testing::ValuesIn(single_kernel), ::testing::ValuesIn(strides),
+    ::testing::ValuesIn(padBegins), ::testing::ValuesIn(padEnds),
+    ::testing::ValuesIn(dilations), ::testing::ValuesIn(groups),
+    ::testing::ValuesIn(single_deform_groups), ::testing::ValuesIn(numOutChannels),
+    ::testing::Values(ngraph::op::PadType::EXPLICIT));
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_DeformableConvolution2D_SingleTestCase, DeformableConvolutionLayerTest,
+    ::testing::Combine(
+        deformableConv2DParams_SingleTestCase, ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        ::testing::Values(InferenceEngine::Layout::ANY),
+        ::testing::Values(InferenceEngine::Layout::ANY),
+        ::testing::Values(std::vector<size_t>({1, 3, 30, 30})),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+    DeformableConvolutionLayerTest::getTestCaseName);
+
 }  // namespace
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/prior_box.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/prior_box.cpp
@ -0,0 +1,81 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/prior_box.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestDefinitions;
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::I32,
+    InferenceEngine::Precision::U16};
+const std::vector<std::vector<float>> min_sizes = {
+    {256.0f}};
+
+const std::vector<std::vector<float>> max_sizes = {
+    {315.0f}};
+
+const std::vector<std::vector<float>> aspect_ratios = {
+    {2.0f}};
+
+const std::vector<std::vector<float>> densities = {
+    {1.0f}};
+
+const std::vector<std::vector<float>> fixed_ratios = {
+    {}};
+
+const std::vector<std::vector<float>> fixed_sizes = {
+    {}};
+
+const std::vector<bool> clips = {
+    false, true};
+
+const std::vector<bool> flips = {
+    false, true};
+
+const std::vector<float> steps = {
+    1.0f,
+};
+
+const std::vector<float> offsets = {
+    0.0f,
+};
+
+const std::vector<std::vector<float>> variances = {
+    {}};
+
+const std::vector<bool> scale_all_sizes = {
+    false, true};
+
+const std::vector<size_t> inputShape = {300, 300};
+const std::vector<size_t> imageShape = {32, 32};
+
+const auto layerSpecificParams = ::testing::Combine(
+    ::testing::ValuesIn(min_sizes),
+    ::testing::ValuesIn(max_sizes),
+    ::testing::ValuesIn(aspect_ratios),
+    ::testing::ValuesIn(densities),
+    ::testing::ValuesIn(fixed_ratios),
+    ::testing::ValuesIn(fixed_sizes),
+    ::testing::ValuesIn(clips),
+    ::testing::ValuesIn(flips),
+    ::testing::ValuesIn(steps),
+    ::testing::ValuesIn(offsets),
+    ::testing::ValuesIn(variances),
+    ::testing::ValuesIn(scale_all_sizes));
+
+INSTANTIATE_TEST_SUITE_P(smoke_PriorBox_Basic, PriorBoxLayerTest,
+                            ::testing::Combine(
+                                layerSpecificParams,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(inputShape),
+                                ::testing::Values(imageShape),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            PriorBoxLayerTest::getTestCaseName);
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
@ -9,28 +9,12 @@

 std::vector<std::string> disabledTestPatterns() {
    return {
-        ".*TensorNamesTest\\.CheckAddOutput.*",
        // TODO: FIX BUG 31661
        // TODO: support InferRequest in GNAPlugin
        ".*InferRequestTests\\.canRun3AsyncRequestsConsistentlyFromThreadsWithoutWait.*",
-        // TODO: FIX BUG 23740
-        ".*InferRequestTests\\.CanCreateTwoExeNetworks.*",
-        // TODO: FIX BUG 26702
-        ".*InferRequestTests\\.FailedAsyncInferWithNegativeTimeForWait.*",
        // TODO: FIX BUG 23741
        ".*InferRequestTests\\.canRun3SyncRequestsConsistentlyFromThreads.*",
-        // TODO: FIX BUG 23742
-        ".*InferRequestTests\\.canWaitWithotStartAsync.*",
-        // TODO: FIX BUG 23743
-        ".*InferRequestTests\\.returnDeviceBusyOnSetBlobAfterAsyncInfer.*",
-        ".*InferRequestTests\\.returnDeviceBusyOnGetBlobAfterAsyncInfer.*",
-        ".*InferRequestTests\\.returnDeviceBusyOnGetPerformanceCountAfterAsyncInfer.*",
-        ".*InferRequestTests\\.returnDeviceBusyOnStartInferAfterAsyncInfer.*",
-        ".*InferRequestTests\\.returnDeviceBusyOnGetUserDataAfterAsyncInfer.*",
-        ".*InferRequestTests\\.returnDeviceBusyOnSetUserDataAfterAsyncInfer.*",
-        // TODO: FIX BUG 31661
-        ".*InferRequestTests\\.canStartSeveralAsyncInsideCompletionCallbackNoSafeDtorWithoutWait.*",
-        // TODO: FIX BUG 31661
+        // TODO: FIX BUG 59041
        ".*Behavior.*CallbackThrowException.*",
        // TODO: FIX BUG 32210
        R"(.*ActivationLayerTest.CompareWithRefs/(Sigmoid|Tanh|Exp|Log).*)",
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/gather.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/gather.cpp
@ -154,6 +154,34 @@ INSTANTIATE_TEST_SUITE_P(
        Gather7LayerTest::getTestCaseName
 );

+INSTANTIATE_TEST_SUITE_P(
+        smoke_Gather7Axes4i4b1,
+        Gather8LayerTest,
+        GatherAxes4i4b1,
+        Gather8LayerTest::getTestCaseName
+);
+
+INSTANTIATE_TEST_SUITE_P(
+        smoke_Gather7Axes4i4b2,
+        Gather8LayerTest,
+        GatherAxes4i4b1,
+        Gather8LayerTest::getTestCaseName
+);
+
+INSTANTIATE_TEST_SUITE_P(
+        smoke_Gather7Axes4i8b1,
+        Gather8LayerTest,
+        GatherAxes4i8b1,
+        Gather8LayerTest::getTestCaseName
+);
+
+INSTANTIATE_TEST_SUITE_P(
+        smoke_Gather7Axes4i8b2,
+        Gather8LayerTest,
+        GatherAxes4i8b2,
+        Gather8LayerTest::getTestCaseName
+);
+
 const std::vector<std::vector<int>> indices = {
        std::vector<int>{0, 3, 2, 1},
 };
--- a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_broadcast.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_broadcast.cpp
@ -74,7 +74,6 @@ protected:
        const auto tensorWithTargetShapeParam = std::make_shared<ngraph::opset3::Parameter>(tensorType, targetShape);

        const auto shapeOfNode = std::make_shared<ngraph::opset3::ShapeOf>(tensorWithTargetShapeParam, shapeType);
-        shapeOfNode->set_is_foldable(false);

        ngraph::ParameterVector params{tensorParam, tensorWithTargetShapeParam};

@ -197,7 +196,6 @@ protected:
        const auto tensorWithTargetShapeParam = std::make_shared<ngraph::opset5::Parameter>(shapeType, targetShape);

        const auto shapeOfNode = std::make_shared<ngraph::opset5::ShapeOf>(tensorWithTargetShapeParam, shapeType);
-        shapeOfNode->set_is_foldable(false);

        ngraph::ParameterVector params{tensorParam, tensorWithTargetShapeParam};

--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp
@ -23,8 +23,6 @@ std::vector<std::string> disabledTestPatterns() {
        R"(.*IEClassGetAvailableDevices.*)",
        // TODO: Issue: 40473
        R"(.*TopKLayerTest.*mode=min.*sort=index.*)",
-        // TODO: Issue: 40961
-        R"(.*(ConstantResultSubgraphTest).*)",
        // TODO: Issue: 42828
        R"(.*DSR_NonMaxSuppression.*NBoxes=(5|20|200).*)",
        // TODO: Issue: 42721
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/subgraph_tests/constant_result.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/subgraph_tests/constant_result.cpp
@ -23,15 +23,7 @@ const std::vector<SizeVector> shapes = {
 };

 const std::vector<Precision> precisions = {
-    Precision::U8,
-    Precision::I8,
-    Precision::U16,
-    Precision::I16,
-    Precision::I32,
-    Precision::U64,
-    Precision::I64,
-    Precision::FP32,
-    Precision::BOOL
+    Precision::FP32
 };

 INSTANTIATE_TEST_SUITE_P(smoke_Check, ConstantResultSubgraphTest,
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gather.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gather.hpp
@ -16,4 +16,8 @@ TEST_P(Gather7LayerTest, CompareWithRefs) {
    Run();
 };

+TEST_P(Gather8LayerTest, CompareWithRefs) {
+    Run();
+};
+
 }  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/prior_box.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/prior_box.hpp
@ -0,0 +1,15 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/single_layer/prior_box.hpp"
+
+namespace LayerTestDefinitions {
+
+TEST_P(PriorBoxLayerTest, CompareWithRefs) {
+    Run();
+}
+
+}  // namespace LayerTestDefinitions
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/gather.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/gather.hpp
@ -63,4 +63,13 @@ protected:
    void SetUp() override;
 };

+class Gather8LayerTest : public testing::WithParamInterface<gather7ParamsTuple>,
+                         virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<gather7ParamsTuple>& obj);
+
+protected:
+    void SetUp() override;
+};
+
 }  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/prior_box.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/prior_box.hpp
@ -0,0 +1,80 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <tuple>
+#include <string>
+#include <map>
+#include <memory>
+#include <set>
+#include <functional>
+#include <gtest/gtest.h>
+
+
+#include "ie_core.hpp"
+#include "ie_precision.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace LayerTestDefinitions {
+using priorBoxSpecificParams =  std::tuple<
+        std::vector<float>, // min_size
+        std::vector<float>, // max_size
+        std::vector<float>, // aspect_ratio
+        std::vector<float>, // density
+        std::vector<float>, // fixed_ratio
+        std::vector<float>, // fixed_size
+        bool,               // clip
+        bool,               // flip
+        float,              // step
+        float,              // offset
+        std::vector<float>, // variance
+        bool>;              // scale_all_sizes
+
+typedef std::tuple<
+        priorBoxSpecificParams,
+        InferenceEngine::Precision,   // net precision
+        InferenceEngine::Precision,   // Input precision
+        InferenceEngine::Precision,   // Output precision
+        InferenceEngine::Layout,      // Input layout
+        InferenceEngine::Layout,      // Output layout
+        InferenceEngine::SizeVector,  // input shape
+        InferenceEngine::SizeVector,  // image shape
+        std::string> priorBoxLayerParams;
+
+class PriorBoxLayerTest
+    : public testing::WithParamInterface<priorBoxLayerParams>,
+      virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<priorBoxLayerParams>& obj);
+protected:
+    InferenceEngine::SizeVector inputShapes;
+    InferenceEngine::SizeVector imageShapes;
+    InferenceEngine::Precision netPrecision;
+    std::vector<float> min_size;
+    std::vector<float> max_size;
+    std::vector<float> aspect_ratio;
+    std::vector<float> density;
+    std::vector<float> fixed_ratio;
+    std::vector<float> fixed_size;
+    std::vector<float> variance;
+    float step;
+    float offset;
+    bool clip;
+    bool flip;
+    bool scale_all_sizes;
+
+    void SetUp() override;
+};
+
+} // namespace LayerTestDefinitions
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/gather.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/gather.cpp
@ -93,4 +93,47 @@ void Gather7LayerTest::SetUp() {
    function = std::make_shared<ngraph::Function>(results, functionParams, "gather");
 }

+std::string Gather8LayerTest::getTestCaseName(const testing::TestParamInfo<gather7ParamsTuple>& obj) {
+    std::tuple<int, int> axis_batchIdx;
+    std::vector<int> indices;
+    std::vector<size_t> indicesShape, inputShape;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::Precision inPrc, outPrc;
+    InferenceEngine::Layout inLayout, outLayout;
+    std::string targetName;
+    std::tie(inputShape, indicesShape, axis_batchIdx, netPrecision, inPrc, outPrc, inLayout, outLayout, targetName) = obj.param;
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
+    result << "axis=" << std::get<0>(axis_batchIdx) << "_";
+    result << "batchIdx=" << std::get<1>(axis_batchIdx) << "_";
+    result << "indicesShape=" << CommonTestUtils::vec2str(indicesShape) << "_";
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "inPRC=" << inPrc.name() << "_";
+    result << "outPRC=" << outPrc.name() << "_";
+    result << "inL=" << inLayout << "_";
+    result << "outL=" << outLayout << "_";
+    result << "trgDev=" << targetName << "_";
+    return result.str();
+}
+
+void Gather8LayerTest::SetUp() {
+    std::tuple<int, int> axis_batchIdx;
+    std::vector<size_t> indicesShape;
+    std::vector<size_t> inputShape;
+    InferenceEngine::Precision netPrecision;
+    std::tie(inputShape, indicesShape, axis_batchIdx, netPrecision, inPrc, outPrc, inLayout, outLayout, targetDevice) = GetParam();
+    int axis = std::get<0>(axis_batchIdx);
+    int batchIdx = std::get<1>(axis_batchIdx);
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto functionParams = ngraph::builder::makeParams(ngPrc, { inputShape });
+    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(functionParams));
+    auto indicesNode = ngraph::builder::makeConstant<int>(ngraph::element::i64, indicesShape, {}, true,
+                                                          inputShape[axis < 0 ? axis + inputShape.size() : axis] - 1,
+                                                          1 - static_cast<int>(inputShape[axis < 0 ? axis + inputShape.size() : axis]));
+    auto axisNode = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape({}), { axis });
+    auto gather = std::make_shared<ngraph::opset8::Gather>(paramOuts[0], indicesNode, axisNode, batchIdx);
+    ngraph::ResultVector results{ std::make_shared<ngraph::opset8::Result>(gather) };
+    function = std::make_shared<ngraph::Function>(results, functionParams, "gather");
+}
+
 }  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/prior_box.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/prior_box.cpp
@ -0,0 +1,91 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/single_layer/prior_box.hpp"
+
+namespace LayerTestDefinitions {
+std::string PriorBoxLayerTest::getTestCaseName(const testing::TestParamInfo<priorBoxLayerParams>& obj) {
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::Precision inPrc, outPrc;
+    InferenceEngine::Layout inLayout, outLayout;
+    InferenceEngine::SizeVector inputShapes, imageShapes;
+    std::string targetDevice;
+    priorBoxSpecificParams specParams;
+    std::tie(specParams,
+        netPrecision,
+        inPrc, outPrc, inLayout, outLayout,
+        inputShapes,
+        imageShapes,
+        targetDevice) = obj.param;
+
+    std::vector<float> min_size, max_size, aspect_ratio, density, fixed_ratio, fixed_size, variance;
+    float step, offset;
+    bool clip, flip, scale_all_sizes;
+    std::tie(min_size, max_size, aspect_ratio,
+             density, fixed_ratio, fixed_size, clip,
+             flip, step, offset, variance, scale_all_sizes) = specParams;
+
+    std::ostringstream result;
+    const char separator = '_';
+    result << "IS="      << CommonTestUtils::vec2str(inputShapes) << separator;
+    result << "imageS="  << CommonTestUtils::vec2str(imageShapes) << separator;
+    result << "netPRC="  << netPrecision.name()   << separator;
+    result << "inPRC="   << inPrc.name() << separator;
+    result << "outPRC="  << outPrc.name() << separator;
+    result << "inL="     << inLayout << separator;
+    result << "outL="    << outLayout << separator;
+    result << "min_s=" << CommonTestUtils::vec2str(min_size) << separator;
+    result << "max_s=" << CommonTestUtils::vec2str(max_size)<< separator;
+    result << "asp_r=" << CommonTestUtils::vec2str(aspect_ratio)<< separator;
+    result << "dens=" << CommonTestUtils::vec2str(density)<< separator;
+    result << "fix_r=" << CommonTestUtils::vec2str(fixed_ratio)<< separator;
+    result << "fix_s=" << CommonTestUtils::vec2str(fixed_size)<< separator;
+    result << "var=" << CommonTestUtils::vec2str(variance)<< separator;
+    result << "step=" << step << separator;
+    result << "off=" << offset << separator;
+    result << "clip=" << clip << separator;
+    result << "flip=" << flip<< separator;
+    result << "scale_all=" << scale_all_sizes << separator;
+    result << "trgDev=" << targetDevice;
+
+    return result.str();
+}
+
+void PriorBoxLayerTest::SetUp() {
+    priorBoxSpecificParams specParams;
+    std::tie(specParams, netPrecision,
+             inPrc, outPrc, inLayout, outLayout,
+             inputShapes, imageShapes, targetDevice) = GetParam();
+
+    std::tie(min_size, max_size, aspect_ratio,
+             density, fixed_ratio, fixed_size, clip,
+             flip, step, offset, variance, scale_all_sizes) = specParams;
+
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto params = ngraph::builder::makeParams(ngPrc, {inputShapes, imageShapes});
+
+    ngraph::op::PriorBoxAttrs attributes;
+    attributes.min_size = min_size;
+    attributes.max_size = max_size;
+    attributes.aspect_ratio = aspect_ratio;
+    attributes.density = density;
+    attributes.fixed_ratio = fixed_ratio;
+    attributes.fixed_size = fixed_size;
+    attributes.variance = variance;
+    attributes.step = step;
+    attributes.offset = offset;
+    attributes.clip = clip;
+    attributes.flip = flip;
+
+    auto shape_of_1 = std::make_shared<ngraph::opset3::ShapeOf>(params[0]);
+    auto shape_of_2 = std::make_shared<ngraph::opset3::ShapeOf>(params[1]);
+    auto priorBox = std::make_shared<ngraph::op::PriorBox>(
+        shape_of_1,
+        shape_of_2,
+        attributes);
+
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(priorBox)};
+    function = std::make_shared <ngraph::Function>(results, params, "PriorBoxFunction");
+}
+} // namespace LayerTestDefinitions
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
@ -60,9 +60,10 @@ VERIFIED_OP_REFERENCES = [
    'NonMaxSuppression-4',
    'NonMaxSuppression-5',
    'NonZero-3',
-    'PSROIPooling-1',
+    'PriorBox-1',
    'Proposal-1',
    'Proposal-4',
+    'PSROIPooling-1',
    'RNNSequence-5',
    'ROIAlign-3',
    'ROIPooling-2',
@ -83,11 +84,13 @@ VERIFIED_OP_REFERENCES = [
    'ReorgYOLO-2',
    'Result-1'
    'Round-5',
+    'SpaceToDepth-1',
    'ScatterNDUpdate-4',
    'ShapeOf-1',
    'ShapeOf-3',
    'Sigmoid-1',
    'Sin-1',
+    'Sinh-1'
    'SoftPlus-4',
    'Softmax-1',
    'Split-1',
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
@ -14,6 +14,7 @@
 #include <ngraph/opsets/opset5.hpp>
 #include <ngraph/opsets/opset6.hpp>
 #include <ngraph/opsets/opset7.hpp>
+#include <ngraph/opsets/opset8.hpp>

 #include "ngraph_functions/utils/data_utils.hpp"

--- a/inference-engine/tests/unit/gna/ngraph/transformations/gna_remove_extra_reshapes.cpp
+++ b/inference-engine/tests/unit/gna/ngraph/transformations/gna_remove_extra_reshapes.cpp
@ -0,0 +1,89 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "transformations/remove_extra_reshapes.hpp"
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <transformations/init_node_info.hpp>
+
+namespace testing {
+
+TEST(TransformationTests, RemoveExtraReshapesTestReshapeNotEqualInputOutput) {
+    std::shared_ptr<ngraph::Function> func(nullptr), reference_func(nullptr);
+    const ngraph::Shape data_shape{1, 3, 64, 64};
+
+    {
+        auto input_params = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, data_shape);
+        auto new_shape = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{3}, {1, 3, 64 * 64});
+        auto reshape_operation = std::make_shared<ngraph::opset7::Reshape>(input_params, new_shape, true);
+        auto max_pool_operation = std::make_shared<ngraph::opset7::MaxPool>(reshape_operation,
+                                                                            ngraph::Strides{1},
+                                                                            ngraph::Shape{0},
+                                                                            ngraph::Shape{0},
+                                                                            ngraph::Shape{3});
+        auto result = std::make_shared<ngraph::opset7::Result>(max_pool_operation);
+        func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                                                  ngraph::ParameterVector{input_params});
+
+        reference_func = ngraph::clone_function(*func);
+
+        ngraph::pass::Manager m;
+        m.register_pass<ngraph::pass::InitNodeInfo>();
+        m.register_pass<GNAPluginNS::RemoveExtraReshapes>();
+        m.run_passes(func);
+        ASSERT_NO_THROW(check_rt_info(func));
+    }
+
+    const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES);
+    const FunctionsComparator::Result result = func_comparator(func, reference_func);
+    ASSERT_TRUE(result.valid);
+}
+
+TEST(TransformationTests, RemoveExtraReshapesTestReshapeEqualInputOutput) {
+    std::shared_ptr<ngraph::Function> func(nullptr), reference_func(nullptr);
+    const ngraph::Shape data_shape{1, 3, 64, 64};
+
+    {
+        auto input_params = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, data_shape);
+        auto new_shape = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1, 3, 64, 64});
+        auto reshape_operation = std::make_shared<ngraph::opset7::Reshape>(input_params, new_shape, true);
+        auto max_pool_operation = std::make_shared<ngraph::opset7::MaxPool>(reshape_operation,
+                                                                            ngraph::Strides{1, 1},
+                                                                            ngraph::Shape{0, 0},
+                                                                            ngraph::Shape{0, 0},
+                                                                            ngraph::Shape{3, 3});
+        auto result = std::make_shared<ngraph::opset7::Result>(max_pool_operation);
+        func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                                                  ngraph::ParameterVector{input_params});
+
+        ngraph::pass::Manager m;
+        m.register_pass<ngraph::pass::InitNodeInfo>();
+        m.register_pass<GNAPluginNS::RemoveExtraReshapes>();
+        m.run_passes(func);
+        ASSERT_NO_THROW(check_rt_info(func));
+    }
+
+    {
+        auto input_params = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, data_shape);
+        auto max_pool_operation = std::make_shared<ngraph::opset7::MaxPool>(input_params,
+                                                                            ngraph::Strides{1, 1},
+                                                                            ngraph::Shape{0, 0},
+                                                                            ngraph::Shape{1, 1},
+                                                                            ngraph::Shape{4, 4});
+        auto result = std::make_shared<ngraph::opset7::Result>(max_pool_operation);
+        reference_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                                                            ngraph::ParameterVector{input_params});
+    }
+
+    const FunctionsComparator func_comparator = FunctionsComparator::with_default();
+    const FunctionsComparator::Result result = func_comparator(func, reference_func);
+    ASSERT_TRUE(result.valid);
+}
+
+} // namespace testing
--- a/inference-engine/thirdparty/clDNN/api/cldnn/primitives/gather.hpp
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/gather.hpp
@ -35,6 +35,7 @@ struct gather : public primitive_base<gather> {
    /// @param axis Gathering axis.
    /// @param output_shape Output shape.
    /// @param batch_dim Batch_dim
+    /// @param support_neg_ind Support negative indexes
    gather(const primitive_id& id,
           const primitive_id& dict,
           const primitive_id& idx,
@ -42,8 +43,11 @@ struct gather : public primitive_base<gather> {
           const format& output_format,
           const tensor& output_shape,
           const int64_t batch_dim = 0,
-           const padding& output_padding = padding())
-        : primitive_base(id, {dict, idx}, output_padding), axis(axis), output_format(output_format), output_shape(output_shape), batch_dim(batch_dim) {}
+           const bool support_neg_ind = false,
+           const padding& output_padding = padding()
+           )
+        : primitive_base(id, {dict, idx}, output_padding), axis(axis), output_format(output_format),
+                         output_shape(output_shape), batch_dim(batch_dim), support_neg_ind(support_neg_ind) {}

    /// @brief Gathering axis
    gather_axis axis;
@ -53,6 +57,8 @@ struct gather : public primitive_base<gather> {
    tensor output_shape;
    /// @brief Gathering batch_dim
    int64_t batch_dim;
+    /// @brief Support negative indexes
+    bool support_neg_ind;
 };
 /// @}
 /// @}
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.cpp
@ -79,6 +79,10 @@ static int64_t GetGatherBatchDim(const gather_params& params) {
        return params.batch_dim;
 }

+static inline std::string GetGatherMaxIndexDim(const gather_params& params) {
+    return std::to_string(params.inputs[0].GetDims().at(params.inputs[0].GetDims().size() - GetGatherChannelIndex(params) - 1).v);
+}
+
 static inline std::string GetOrderString(std::vector<std::string>& order) {
    std::string order_str = order[0];
    for (size_t i = 1; i < order.size(); i++)
@ -168,6 +172,8 @@ JitConstants GatherKernelRef::GetJitConstants(const gather_params& params) const

    jit.AddConstant(MakeJitConstant("DICTIONARY_INDEX_ORDER", GetDictionaryIndexOrder(params, GetGatherChannelIndex(params))));
    jit.AddConstant(MakeJitConstant("INDICES_INDEX_ORDER", GetIndecesIdxOrder(params, GetGatherChannelIndex(params), GetGatherBatchDim(params))));
+    if (params.support_neg_ind)
+        jit.AddConstant(MakeJitConstant("INDEX_DIM", GetGatherMaxIndexDim(params)));

    if (!params.fused_ops.empty()) {
        std::vector<std::string> idx_order = GetOrder(params.inputs[0].GetDims().size());
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.h
@ -11,10 +11,11 @@ namespace kernel_selector {
 // gather_params
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 struct gather_params : public base_params {
-    gather_params() : base_params(KernelType::GATHER), axis(GatherAxis::BATCH), batch_dim(0) {}
+    gather_params() : base_params(KernelType::GATHER), axis(GatherAxis::BATCH), batch_dim(0), support_neg_ind(false) {}

    GatherAxis axis;
    int64_t batch_dim;
+    bool support_neg_ind;
    virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); }
 };

--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_ref.cl
@ -5,7 +5,19 @@
 #include "include/data_types.cl"
 #include "include/fetch_data.cl"

-#define INPUT_AXIS_INDEX (uint)indices[indices_idx]
+#ifdef INDEX_DIM
+inline uint FUNC(get_positive_index)(int in)
+{
+    if(in < 0)
+        return in + INDEX_DIM;
+    else
+        return in;
+}
+#define INPUT_AXIS_INDEX (uint)FUNC_CALL(get_positive_index)(indices[indices_idx])
+#else
+#define INPUT_AXIS_INDEX (uint)(indices[indices_idx])
+#endif
+
 #define GET_DICTIONARY_INDEX(idx_order) INPUT0_GET_INDEX(idx_order)
 #define GET_INDICES_INDEX(idx_order) INPUT1_GET_INDEX(idx_order)
 #define GET_INDEX(prefix, num, idx_order) CAT(CAT(prefix, num), _GET_INDEX)(idx_order)
--- a/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp
@ -16,23 +16,29 @@ const char *debug_configuration::prefix = "GPU_Debug: ";
 static void print_option(std::string option_name, std::string option_value) {
    GPU_DEBUG_COUT << "Config " << option_name << " = " << option_value << std::endl;
 }
+
+static void get_int_env(const std::string &var, int &val) {
+    if (const auto env_var = std::getenv(var.c_str())) {
+        val = std::stoi(env_var);
+        print_option(var, std::to_string(val));
+    }
+}
+
+static void get_str_env(const std::string &var, std::string &val) {
+    if (const auto env_var = std::getenv(var.c_str())) {
+        val = env_var;
+        print_option(var, val);
+    }
+}
+
 #endif

 debug_configuration::debug_configuration()
        : verbose(0)
        , dump_graphs(std::string()) {
 #ifdef GPU_DEBUG_CONFIG
-    const std::string OV_GPU_VERBOSE("OV_GPU_Verbose");
-    const std::string OV_GPU_DUMP_GRAPHS("OV_GPU_DumpGraphs");
-    if (const auto env_var = std::getenv(OV_GPU_VERBOSE.c_str())) {
-        verbose = std::stoi(env_var);
-        print_option(OV_GPU_VERBOSE, std::to_string(verbose));
-    }
-
-    if (const auto env_var = std::getenv(OV_GPU_DUMP_GRAPHS.c_str())) {
-        dump_graphs = env_var;
-        print_option(OV_GPU_DUMP_GRAPHS, dump_graphs);
-    }
+    get_int_env("OV_GPU_Verbose", verbose);
+    get_str_env("OV_GPU_DumpGraphs", dump_graphs);
 #endif
 }

--- a/inference-engine/thirdparty/clDNN/runtime/kernels_cache.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/kernels_cache.cpp
@ -5,6 +5,7 @@
 #include "kernels_factory.hpp"
 #include "kernels_cache.hpp"
 #include "ocl/ocl_engine.hpp"
+#include "cldnn/runtime/debug_configuration.hpp"

 #include <algorithm>
 #include <cassert>
@ -372,6 +373,10 @@ void kernels_cache::build_batch(const engine& build_engine, const batch_program&
            dump_file << "*/\n";
    }
    if (!err_log.empty()) {
+        GPU_DEBUG_GET_INSTANCE(debug_config);
+        GPU_DEBUG_IF(debug_config->verbose) {
+            std::cout << err_log << std::endl;
+        }
        throw std::runtime_error("Program build failed. You may enable OCL source dump to see the error log.\n");
    }
 }
--- a/inference-engine/thirdparty/clDNN/src/gpu/gather_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/gather_gpu.cpp
@ -49,6 +49,7 @@ public:

        gather_params.axis = convert_axis(arg.get_primitive()->axis);
        gather_params.batch_dim = size_t(arg.get_primitive()->batch_dim);
+        gather_params.support_neg_ind = arg.get_primitive()->support_neg_ind;

        gather_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));

--- a/inference-engine/thirdparty/clDNN/tests/test_cases/gather_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/gather_gpu_test.cpp
@ -12,6 +12,136 @@
 using namespace cldnn;
 using namespace ::tests;

+TEST(gather8_gpu_fp16, d323_axisY_bdim_m1) {
+    //  Dictionary : 3x2x3x4x2
+    //  Indexes : 3x2x3x1
+    //  Axis : 3
+    //  batch_dim : -1
+    //  Output : 3x2x3x3x2
+    //  Input values in fp16
+
+    //  Indexes:
+    //  0.f, 0.f, 0.f, 3.f, -3.f, 0.f, 1.f, -3.f, 1.f, -2.f, 0.f, 3.f, -1.f, 1.f, 0.f, 2.f, 0.f, 1.f
+    //
+    //  Dictionary:
+    //  1.f   2.f   3.f   4.f   5.f   6.f   7.f   8.f   9.f   10.f  11.f  12.f  13.f  14.f  15.f  16.f  17.f  18.f
+    //  19.f  20.f  21.f  22.f  23.f  24.f  25.f  26.f  27.f  28.f  29.f  30.f  31.f  32.f  33.f  34.f  35.f  36.f
+    //  37.f  38.f  39.f  40.f  41.f  42.f  43.f  44.f  45.f  46.f  47.f  48.f  49.f  50.f  51.f  52.f  53.f  54.f
+    //  55.f  56.f  57.f  58.f  59.f  60.f  61.f  62.f  63.f  64.f  65.f  66.f  67.f  68.f  69.f  70.f  71.f  72.f
+    //  73.f  74.f  75.f  76.f  77.f  78.f  79.f  80.f  81.f  82.f  83.f  84.f  85.f  86.f  87.f  88.f  89.f  90.f
+    //  91.f  92.f  93.f  94.f  95.f  96.f  97.f  98.f  99.f  100.f 101.f 102.f 103.f 104.f 105.f 106.f 107.f 108.f
+    //  109.f 110.f 111.f 112.f 113.f 114.f 115.f 116.f 117.f 118.f 119.f 120.f 121.f 122.f 123.f 124.f 125.f 126.f
+    //  127.f 128.f 129.f 130.f 131.f 132.f 133.f 134.f 135.f 136.f 137.f 138.f 139.f 140.f 141.f 142.f 143.f 144.f
+    //
+    //  Output:
+    //  1.f   2.f   1.f   2.f   1.f   2.f   9.f   10.f   9.f  10.f   9.f  10.f
+    //  17.f  18.f  17.f  18.f  17.f  18.f  31.f  32.f  27.f  28.f  25.f  26.f
+    //  39.f  40.f  35.f  6.f   33.f  34.f  47.f  48.f  43.f  44.f  41.f  42.f
+    //  51.f  52.f  51.f  52.f  51.f  52.f  59.f  60.f  59.f  60.f  59.f  60.f
+    //  67.f  68.f  67.f  68.f  67.f  68.f  77.f  78.f  73.f  74.f  79.f  80.f
+    //  85.f  86.f  81.f  82.f  87.f  88.f  93.f  94.f  89.f  90.f  95.f  96.f
+    //  103.f 104.f  99.f  100.f 97.f  98.f 111.f 112.f 107.f 108.f 105.f 106.f
+    //  119.f 120.f 115.f 116.f 113.f 114.f 125.f 126.f 121.f 122.f 123.f 124.f
+    //  133.f 134.f 129.f 130.f 131.f 132.f 141.f 142.f 137.f 138.f 139.f 140.f
+
+    auto& engine = get_test_engine();
+
+    auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 3, 2, 2, 4, 3} }); // Dictionary
+    auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 3 } }); // Indexes
+    auto axis = cldnn::gather::gather_axis::along_y;
+    int64_t batch_dim = -1;
+    bool negative_indexes = true;
+
+    set_values(input1, {
+        FLOAT16(1.f),   FLOAT16(2.f),   FLOAT16(3.f),   FLOAT16(4.f),   FLOAT16(5.f),   FLOAT16(6.f),   FLOAT16(7.f),   FLOAT16(8.f),
+        FLOAT16(9.f),   FLOAT16(10.f),  FLOAT16(11.f),  FLOAT16(12.f),  FLOAT16(13.f),  FLOAT16(14.f),  FLOAT16(15.f),  FLOAT16(16.f),
+        FLOAT16(17.f),  FLOAT16(18.f),  FLOAT16(19.f),  FLOAT16(20.f),  FLOAT16(21.f),  FLOAT16(22.f),  FLOAT16(23.f),  FLOAT16(24.f),
+
+        FLOAT16(25.f),  FLOAT16(26.f),  FLOAT16(27.f),  FLOAT16(28.f),  FLOAT16(29.f),  FLOAT16(30.f),  FLOAT16(31.f),  FLOAT16(32.f),
+        FLOAT16(33.f),  FLOAT16(34.f),  FLOAT16(35.f),  FLOAT16(36.f),  FLOAT16(37.f),  FLOAT16(38.f),  FLOAT16(39.f),  FLOAT16(40.f),
+        FLOAT16(41.f),  FLOAT16(42.f),  FLOAT16(43.f),  FLOAT16(44.f),  FLOAT16(45.f),  FLOAT16(46.f),  FLOAT16(47.f),  FLOAT16(48.f),
+
+
+        FLOAT16(49.f),  FLOAT16(50.f),  FLOAT16(51.f),  FLOAT16(52.f),  FLOAT16(53.f),  FLOAT16(54.f),  FLOAT16(55.f),  FLOAT16(56.f),
+        FLOAT16(57.f),  FLOAT16(58.f),  FLOAT16(59.f),  FLOAT16(60.f),  FLOAT16(61.f),  FLOAT16(62.f),  FLOAT16(63.f),  FLOAT16(64.f),
+        FLOAT16(65.f),  FLOAT16(66.f),  FLOAT16(67.f),  FLOAT16(68.f),  FLOAT16(69.f),  FLOAT16(70.f),  FLOAT16(71.f),  FLOAT16(72.f),
+
+        FLOAT16(73.f),  FLOAT16(74.f),  FLOAT16(75.f),  FLOAT16(76.f),  FLOAT16(77.f),  FLOAT16(78.f),  FLOAT16(79.f),  FLOAT16(80.f),
+        FLOAT16(81.f),  FLOAT16(82.f),  FLOAT16(83.f),  FLOAT16(84.f),  FLOAT16(85.f),  FLOAT16(86.f),  FLOAT16(87.f),  FLOAT16(88.f),
+        FLOAT16(89.f),  FLOAT16(90.f),  FLOAT16(91.f),  FLOAT16(92.f),  FLOAT16(93.f),  FLOAT16(94.f),  FLOAT16(95.f),  FLOAT16(96.f),
+
+
+        FLOAT16(97.f),  FLOAT16(98.f),  FLOAT16(99.f),  FLOAT16(100.f), FLOAT16(101.f), FLOAT16(102.f), FLOAT16(103.f), FLOAT16(104.f),
+        FLOAT16(105.f), FLOAT16(106.f), FLOAT16(107.f), FLOAT16(108.f), FLOAT16(109.f), FLOAT16(110.f), FLOAT16(111.f), FLOAT16(112.f),
+        FLOAT16(113.f), FLOAT16(114.f), FLOAT16(115.f), FLOAT16(116.f), FLOAT16(117.f), FLOAT16(118.f), FLOAT16(119.f), FLOAT16(120.f),
+
+        FLOAT16(121.f), FLOAT16(122.f), FLOAT16(123.f), FLOAT16(124.f), FLOAT16(125.f), FLOAT16(126.f), FLOAT16(127.f), FLOAT16(128.f),
+        FLOAT16(129.f), FLOAT16(130.f), FLOAT16(131.f), FLOAT16(132.f), FLOAT16(133.f), FLOAT16(134.f), FLOAT16(135.f), FLOAT16(136.f),
+        FLOAT16(137.f), FLOAT16(138.f), FLOAT16(139.f), FLOAT16(140.f), FLOAT16(141.f), FLOAT16(142.f), FLOAT16(143.f), FLOAT16(144.f)
+    });
+
+    set_values(input2, {
+        0.f, 0.f, 0.f,
+        3.f, -3.f, 0.f,
+
+        1.f, -3.f, 1.f,
+        -2.f, 0.f, 3.f,
+
+        -1.f, 1.f, 0.f,
+        2.f, 0.f, 1.f
+    });
+
+    topology topology;
+    topology.add(input_layout("InputDictionary", input1->get_layout()));
+    topology.add(input_layout("InputText", input2->get_layout()));
+    topology.add(
+        gather("gather", "InputDictionary", "InputText", axis, format::bfzyx, tensor(3, 2, 2, 3, 3), batch_dim, negative_indexes)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("InputDictionary", input1);
+    network.set_input_data("InputText", input2);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("gather").get_memory();
+    cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
+
+    std::vector<float> expected_results = {
+        1.f,   2.f,   1.f,   2.f,   1.f,   2.f,
+        9.f,   10.f,  9.f,   10.f,  9.f,   10.f,
+        17.f,  18.f,  17.f,  18.f,  17.f,  18.f,
+
+        31.f,  32.f,  27.f,  28.f,  25.f,  26.f,
+        39.f,  40.f,  35.f,  36.f,  33.f,  34.f,
+        47.f,  48.f,  43.f,  44.f,  41.f,  42.f,
+
+
+        51.f,  52.f,  51.f,  52.f,  51.f,  52.f,
+        59.f,  60.f,  59.f,  60.f,  59.f,  60.f,
+        67.f,  68.f,  67.f,  68.f,  67.f,  68.f,
+
+        77.f,  78.f,  73.f,  74.f,  79.f,  80.f,
+        85.f,  86.f,  81.f,  82.f,  87.f,  88.f,
+        93.f,  94.f,  89.f,  90.f,  95.f,  96.f,
+
+
+        103.f, 104.f,  99.f,  100.f, 97.f,  98.f,
+        111.f, 112.f, 107.f, 108.f, 105.f, 106.f,
+        119.f, 120.f, 115.f, 116.f, 113.f, 114.f,
+
+        125.f, 126.f, 121.f, 122.f, 123.f, 124.f,
+        133.f, 134.f, 129.f, 130.f, 131.f, 132.f,
+        141.f, 142.f, 137.f, 138.f, 139.f, 140.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
+
+
 TEST(gather7_gpu_fp16, d222_axisX_bdim_m1) {
    //  Dictionary : 2x2x2x2x2x2
    //  Indexes : 2x2x2x1
--- a/model-optimizer/CMakeLists.txt
+++ b/model-optimizer/CMakeLists.txt
@ -1,10 +1,8 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-if (NOT NGRAPH_PYTHON_BUILD_ENABLE)
-    message(WARNING "Please enable nGraph Python API (_pyngraph) target to enable Model Optimizer target")
-elseif(NOT ENABLE_PYTHON)
-    message(WARNING "Please enable IE Python API (ie_api and offline_transformations_api) targets to enable Model Optimizer target")
+if(NOT ENABLE_PYTHON)
+    message(WARNING "Please enable IE & nGraph Python API (ie_api and offline_transformations_api) targets to enable Model Optimizer target")
 else()
    add_custom_target(model_optimizer DEPENDS ie_api offline_transformations_api inference_engine_ir_reader)
    if(ENABLE_TESTS)
--- a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice.py
+++ b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice.py
@ -154,7 +154,7 @@ class ConvertGroupedStridedSlice(MiddleReplacementPattern):
                    size_splits.append(l - prev_r)
                    shape[split_channel_dim] = l - prev_r
                    data_node = Op._create_data_node(graph, 'fake_data_'+out_nodes[0].name, {'shape': shape})
-                    add_opoutput(graph, data_node.id, 0, False)
+                    add_opoutput(graph, data_node.id, 0, False, keep_output_port=True)
                    final_data_nodes_list.append(data_node)

                prev_r = r
@ -167,7 +167,7 @@ class ConvertGroupedStridedSlice(MiddleReplacementPattern):
                shape[split_channel_dim] = input_shape[split_channel_dim] - prev_r
                size_splits.append(input_shape[split_channel_dim] - prev_r)
                data_node = Op._create_data_node(graph, 'fake_data_'+out_nodes[0].name, {'shape': shape})
-                add_opoutput(graph, data_node.id, 0, False)
+                add_opoutput(graph, data_node.id, 0, False, keep_output_port=True)
                final_data_nodes_list.append(data_node)

            for node in out_nodes:
--- a/model-optimizer/mo/graph/graph.py
+++ b/model-optimizer/mo/graph/graph.py
@ -1032,21 +1032,24 @@ def dict_includes(big: dict, sub_dict: dict, skip_attr_names=[]):
    )


-def add_opoutput(graph: Graph, node_name: str, port: int, cut: bool = True):
+def add_opoutput(graph: Graph, node_name: str, port: int, cut: bool = True, keep_output_port: bool = False):
    """
    Creates and connects Result node to node_name port. Cuts existing port if requested.
    :param graph: graph to operate with
    :param node_name: name of existing node in the graph that we want to add Result to
    :param port: output port of node to connect Result to
    :param cut: determines way of operating with edge specified by node_name and port
+    :param keep_output_port: special attribute determines if this operation is saved in IR or not
    """
    # we import it here because Op imports add_attrs_props and update_ie_fields from this file
    from mo.ops.result import Result
    node = Node(graph, node_name)
    if cut and len(node.out_edges()) != 0:
-        opoutput_node = Result(graph).create_node_on_port(node, port, {'name': node_name + '/sink_port_' + str(port)})
+        opoutput_node = Result(graph).create_node_on_port(node, port, {'name': node_name + '/sink_port_' + str(port),
+                                                                       'keep_output_port': keep_output_port})
    else:
-        opoutput_node = Result(graph).create_node([(node, port)], {'name': node_name + '/sink_port_' + str(port)})
+        opoutput_node = Result(graph).create_node([(node, port)], {'name': node_name + '/sink_port_' + str(port),
+                                                                   'keep_output_port': keep_output_port})
        opoutput_node.in_edge()['data_attrs'] = ['fw_tensor_debug_info']

    log.debug('Sink: {} for node {}'.format(opoutput_node.id, node_name))
--- a/model-optimizer/unit_tests/extensions/middle/ConvertGroupedStridedSlice_test.py
+++ b/model-optimizer/unit_tests/extensions/middle/ConvertGroupedStridedSlice_test.py
@ -60,8 +60,8 @@ nodes_attributes = {
    'concat_1_data': {'value': None, 'shape': None, 'kind': 'data'},

    'op_output': {'kind': 'op', 'op': 'Result'},
-    'op_output_1': {'kind': 'op', 'op': 'Result'},
-    'op_output_2': {'kind': 'op', 'op': 'Result'},
+    'op_output_1': {'kind': 'op', 'op': 'Result', 'keep_output_port': True},
+    'op_output_2': {'kind': 'op', 'op': 'Result', 'keep_output_port': True},

    # Squeeze layers
    'sslice_1/Squeeze_shrink': {'type': None, 'value': None, 'kind': 'op', 'op': 'Squeeze'},
--- a/Show More
+++ b/Show More