Merge remote-tracking branch 'upstream/master' into itikhono/ts/fix_performance_issues

2023-02-22 15:33:52 +00:00 · 2023-02-22 15:33:52 +00:00 · fa9fe34c16
commit fa9fe34c16
parent ef6e141082 eaf368a5f5
777 changed files with 26000 additions and 15478 deletions
--- a/.ci/azure/android_arm64.yml
+++ b/.ci/azure/android_arm64.yml
@ -39,6 +39,9 @@ resources:
    name: openvinotoolkit/openvino_contrib
    ref: master

+variables:
+  - group: github
+
 jobs:
 - job: android_arm64
  # About 150% of total time
@ -62,8 +65,23 @@ jobs:
    TMP_DIR: /mnt/tmp
    SHARE_DIR: /mount/cinfsshare/onnxtestdata
    CCACHE_DIR: $(SHARE_DIR)/ccache/master/android_arm64
+    LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib
+    OV_PYTHON_VERSION: 3.10.10 # Full version of Python its required for LD_LIBRARY_PATH. More details https://github.com/microsoft/azure-pipelines-tool-lib/blob/master/docs/overview.md#tool-cache

  steps:
+  - task: UsePythonVersion@0
+    inputs:
+      versionSpec: '$(OV_PYTHON_VERSION)' # Setting only major & minor version will download latest release from GH repo example 3.10 will be 3.10.10. 
+      addToPath: true
+      disableDownloadFromRegistry: false
+      architecture: 'x64'
+      githubToken: $(auth_token)
+    displayName: Setup Python 3.10
+    name: setupPython
+  - bash: |
+      #!/bin/bash
+      python -V
+
  - script: |
      curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
      whoami
--- a/.ci/azure/linux.yml
+++ b/.ci/azure/linux.yml
@ -40,6 +40,9 @@ resources:
    name: openvinotoolkit/testdata
    ref: master

+variables:
+  - group: github
+
 jobs:
 - job: Lin
  strategy:
@ -68,7 +71,7 @@ jobs:
    maxParallel: '2'

  # About 150% of total time
-  timeoutInMinutes: '120'
+  timeoutInMinutes: '180'

  pool:
    name: LIN_VMSS_VENV_F16S_U20_WU2
@ -96,8 +99,23 @@ jobs:
    CMAKE_VERSION: 3.24.0
    BUILD_PYTHON: $(WORK_DIR)/build_python
    INSTALL_PYTHON: $(INSTALL_OPENVINO)/extras/python
+    LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib
+    OV_PYTHON_VERSION: 3.10.10 # Full version of Python its required for LD_LIBRARY_PATH. More details https://github.com/microsoft/azure-pipelines-tool-lib/blob/master/docs/overview.md#tool-cache

  steps:
+  - task: UsePythonVersion@0
+    inputs:
+      versionSpec: '$(OV_PYTHON_VERSION)' # Setting only major & minor version will download latest release from GH repo example 3.10 will be 3.10.10. 
+      addToPath: true
+      disableDownloadFromRegistry: false
+      architecture: 'x64'
+      githubToken: $(auth_token)
+    displayName: Setup Python 3.10
+    name: setupPython
+  - bash: |
+      #!/bin/bash
+      python -V
+
  - script: |
      curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
      whoami
@ -285,34 +303,28 @@ jobs:

    # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
  - script: |
+      export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
      python3 -m pytest -s $(INSTALL_TEST_DIR)/pyngraph $(PYTHON_STATIC_ARGS) \
        --junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
        --ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_zoo_models.py \
        --ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_backend.py
-    env:
-      # because of static build libgna is needed for python binary
-      LD_LIBRARY_PATH: $(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64
    displayName: 'nGraph and IE Python Bindings Tests'

    # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
  - script: |
      # For python imports to import pybind_mock_frontend
+      export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
      export PYTHONPATH=$(INSTALL_TEST_DIR):$(INSTALL_DIR)/python/python3.8:$PYTHONPATH
      python3 -m pytest -sv $(INSTALL_TEST_DIR)/pyopenvino $(PYTHON_STATIC_ARGS) \
        --junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
        --ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_utils/test_utils.py \
        --ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_onnx/test_zoo_models.py \
        --ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_onnx/test_backend.py
-    env:
-      # because of static build libgna is needed for python binary and mock_py frontend library
-      LD_LIBRARY_PATH: $(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(INSTALL_TEST_DIR)
    displayName: 'Python API 2.0 Tests'

  - script: |
+      export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64:$(LD_LIBRARY_PATH)
      python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml
-    env:
-      # because of static build libgna is needed for python binary
-      LD_LIBRARY_PATH: $(REPO_DIR)/temp/gna_03.00.00.1910/linux/x64
    displayName: 'Model Optimizer UT'

  - script: |
--- a/.ci/azure/linux_arm64.yml
+++ b/.ci/azure/linux_arm64.yml
@ -39,6 +39,9 @@ resources:
    name: openvinotoolkit/openvino_contrib
    ref: master

+variables:
+  - group: github
+
 jobs:
 - job: linux_arm64
  # About 150% of total time
@ -77,8 +80,23 @@ jobs:
    OPENVINO_CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux_arm64
    OPENCV_CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux_arm64_opencv
    ONETBB_CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux_arm64_onetbb
+    LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib
+    OV_PYTHON_VERSION: 3.10.10 # Full version of Python its required for LD_LIBRARY_PATH. More details https://github.com/microsoft/azure-pipelines-tool-lib/blob/master/docs/overview.md#tool-cache

  steps:
+  - task: UsePythonVersion@0
+    inputs:
+      versionSpec: '$(OV_PYTHON_VERSION)' # Setting only major & minor version will download latest release from GH repo example 3.10 will be 3.10.10. 
+      addToPath: true
+      disableDownloadFromRegistry: false
+      architecture: 'x64'
+      githubToken: $(auth_token)
+    displayName: Setup Python 3.10
+    name: setupPython
+  - bash: |
+      #!/bin/bash
+      python -V
+
  - script: |
      curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
      whoami
--- a/.ci/azure/linux_conditional_compilation.yml
+++ b/.ci/azure/linux_conditional_compilation.yml
@ -36,6 +36,9 @@ resources:
    endpoint: openvinotoolkit
    name: openvinotoolkit/testdata

+variables:
+  - group: github
+
 jobs:
 - job: LinCC
  # About 150% of total time
@ -55,8 +58,23 @@ jobs:
    BUILD_DIR: $(WORK_DIR)/build
    INSTALL_DIR: $(WORK_DIR)/install_pkg
    SETUPVARS: $(INSTALL_DIR)/setupvars.sh
+    LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib
+    OV_PYTHON_VERSION: 3.10.10 # Full version of Python its required for LD_LIBRARY_PATH. More details https://github.com/microsoft/azure-pipelines-tool-lib/blob/master/docs/overview.md#tool-cache

  steps:
+  - task: UsePythonVersion@0
+    inputs:
+      versionSpec: '$(OV_PYTHON_VERSION)' # Setting only major & minor version will download latest release from GH repo example 3.10 will be 3.10.10. 
+      addToPath: true
+      disableDownloadFromRegistry: false
+      architecture: 'x64'
+      githubToken: $(auth_token)
+    displayName: Setup Python 3.10
+    name: setupPython
+  - bash: |
+      #!/bin/bash
+      python -V
+
  - script: |
      curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
      whoami
--- a/.ci/azure/linux_coverity.yml
+++ b/.ci/azure/linux_coverity.yml
@ -32,17 +32,22 @@ jobs:
    TMP_DIR: /mnt/tmp
    SHARE_DIR: /mount/cinfsshare/onnxtestdata
    CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux_coverity
+    LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib
+    OV_PYTHON_VERSION: 3.10.10 # Full version of Python its required for LD_LIBRARY_PATH. More details https://github.com/microsoft/azure-pipelines-tool-lib/blob/master/docs/overview.md#tool-cache

  steps:
-
  - task: UsePythonVersion@0
    inputs:
-      versionSpec: '3.10'
+      versionSpec: '$(OV_PYTHON_VERSION)' # Setting only major & minor version will download latest release from GH repo example 3.10 will be 3.10.10. 
      addToPath: true
+      disableDownloadFromRegistry: false
      architecture: 'x64'
      githubToken: $(auth_token)
    displayName: Setup Python 3.10
    name: setupPython
+  - bash: |
+      #!/bin/bash
+      python -V

  - script: |
      curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
--- a/.ci/azure/linux_debian.yml
+++ b/.ci/azure/linux_debian.yml
@ -109,7 +109,7 @@ jobs:
  - script: |
      set -e
      sudo -E $(REPO_DIR)/install_build_dependencies.sh
-      # 'clang' compiler is to check that samples can be built using it
+      # 'clang' is used as a default compiler
      sudo apt --assume-yes install clang
      sudo apt --assume-yes install --no-install-recommends libopencv-imgproc-dev libopencv-imgcodecs-dev
      # For opencv-python: python3-setuptools and pip upgrade
@ -165,6 +165,8 @@ jobs:
        -DCMAKE_C_COMPILER_LAUNCHER=ccache
        -DCMAKE_CXX_LINKER_LAUNCHER=ccache
        -DCMAKE_C_LINKER_LAUNCHER=ccache
+        -DCMAKE_CXX_COMPILER=clang++
+        -DCMAKE_C_COMPILER=clang
        -DCPACK_GENERATOR=DEB
        -S $(REPO_DIR)
        -B $(BUILD_DIR)
@ -229,6 +231,8 @@ jobs:
        --junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
        --ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_zoo_models.py \
        --ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_backend.py
+    env:
+      LD_LIBRARY_PATH: $(INSTALL_TEST_DIR)
    displayName: 'nGraph and IE Python Bindings Tests'

    # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
@ -246,7 +250,8 @@ jobs:
        --ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_onnx/test_backend.py -v
    displayName: 'Python API 2.0 Tests'

-  - script: python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml
+  - script: |
+      python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml
    displayName: 'Model Optimizer UT'

  - script: |
@ -282,12 +287,6 @@ jobs:
  - script: $(SAMPLES_INSTALL_DIR)/cpp/build_samples.sh -i $(INSTALL_DIR)
    displayName: 'Build cpp samples'

-  - script: $(SAMPLES_INSTALL_DIR)/cpp/build_samples.sh -i $(INSTALL_DIR)
-    env:
-      CC: clang
-      CXX: clang++
-    displayName: 'Build cpp samples - clang'
-
  - script: $(SAMPLES_INSTALL_DIR)/c/build_samples.sh -i $(INSTALL_DIR)
    displayName: 'Build c samples'

--- a/.ci/azure/linux_onnxruntime.yml
+++ b/.ci/azure/linux_onnxruntime.yml
@ -31,6 +31,9 @@ pr:
    - 'tools/*'
    - 'tests/layer_tests/*'

+variables:
+  - group: github
+
 jobs:
 - job: onnxruntime
  timeoutInMinutes: '90'
@ -52,8 +55,23 @@ jobs:
    BUILD_DIR: $(WORK_DIR)/build
    ONNXRUNTIME_UTILS: $(REPO_DIR)/.ci/azure/ci_utils/onnxruntime
    ONNXRUNTIME_BUILD_DIR: $(ONNXRUNTIME_REPO_DIR)/build
+    LD_LIBRARY_PATH: $(Agent.ToolsDirectory)/Python/$(OV_PYTHON_VERSION)/x64/lib
+    OV_PYTHON_VERSION: 3.10.10 # Full version of Python its required for LD_LIBRARY_PATH. More details https://github.com/microsoft/azure-pipelines-tool-lib/blob/master/docs/overview.md#tool-cache

  steps:
+  - task: UsePythonVersion@0
+    inputs:
+      versionSpec: '$(OV_PYTHON_VERSION)' # Setting only major & minor version will download latest release from GH repo example 3.10 will be 3.10.10. 
+      addToPath: true
+      disableDownloadFromRegistry: false
+      architecture: 'x64'
+      githubToken: $(auth_token)
+    displayName: Setup Python 3.10
+    name: setupPython
+  - bash: |
+      #!/bin/bash
+      python -V
+
  - script: |
      curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
      whoami
--- a/.ci/azure/mac.yml
+++ b/.ci/azure/mac.yml
@ -143,9 +143,6 @@ jobs:
        -DBUILD_nvidia_plugin=OFF \
        -S $(REPO_DIR) \
        -B $(BUILD_DIR)
-    env:
-      CC: gcc
-      CXX: g++
    displayName: 'CMake OpenVINO'

  - script: ls -alR $(REPO_DIR)/temp/
--- a/.gitignore
+++ b/.gitignore
@ -57,3 +57,5 @@ __pycache__
 /tools/mo/*.mapping
 /tools/mo/*.dat
 /tools/mo/*.svg
+/src/plugins/intel_cpu/tools/commit_slider/*.json
+/src/plugins/intel_cpu/tools/commit_slider/slider_cache/*
--- a/cmake/developer_package/compile_flags/os_flags.cmake
+++ b/cmake/developer_package/compile_flags/os_flags.cmake
@ -412,11 +412,6 @@ else()
    # Warn if an undefined identifier is evaluated in an #if directive. Such identifiers are replaced with zero.
    ie_add_compiler_flags(-Wundef)

-    check_cxx_compiler_flag("-Wsuggest-override" SUGGEST_OVERRIDE_SUPPORTED)
-    if(SUGGEST_OVERRIDE_SUPPORTED)
-        set(CMAKE_CXX_FLAGS "-Wsuggest-override ${CMAKE_CXX_FLAGS}")
-    endif()
-
    #
    # Warnings as errors
    #
@ -468,6 +463,13 @@ if(OV_COMPILER_IS_CLANG)
    ie_add_compiler_flags(-Wno-delete-non-abstract-non-virtual-dtor)
 endif()

+check_cxx_compiler_flag("-Wsuggest-override" SUGGEST_OVERRIDE_SUPPORTED)
+if(SUGGEST_OVERRIDE_SUPPORTED)
+    set(CMAKE_CXX_FLAGS "-Wsuggest-override ${CMAKE_CXX_FLAGS}")
+endif()
+
+check_cxx_compiler_flag("-Wunused-but-set-variable" UNUSED_BUT_SET_VARIABLE_SUPPORTED)
+
 #
 # link_system_libraries(target <PUBLIC | PRIVATE | INTERFACE> <lib1 [lib2 lib3 ...]>)
 #
@ -499,6 +501,11 @@ endfunction()
 # Tries to use gold linker in current scope (directory, function)
 #
 function(ov_try_use_gold_linker)
+    # don't use the gold linker, if the mold linker is set
+    if(CMAKE_EXE_LINKER_FLAGS MATCHES "mold" OR CMAKE_MODULE_LINKER_FLAGS MATCHES "mold" OR CMAKE_SHARED_LINKER_FLAGS MATCHES "mold")
+        return()
+    endif()
+
    # gold linker on ubuntu20.04 may fail to link binaries build with sanitizer
    if(CMAKE_COMPILER_IS_GNUCXX AND NOT ENABLE_SANITIZER AND NOT CMAKE_CROSSCOMPILING)
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fuse-ld=gold" PARENT_SCOPE)
--- a/cmake/developer_package/packaging/rpm/post_build.cmake
+++ b/cmake/developer_package/packaging/rpm/post_build.cmake
@ -10,6 +10,24 @@ endif()

 set(rpmlint_passed ON)

+execute_process(COMMAND "${rpmlint_PROGRAM}" --version
+                RESULT_VARIABLE rpmlint_exit_code
+                OUTPUT_VARIABLE rpmlint_version)
+
+if(NOT rpmlint_exit_code EQUAL 0)
+    message(FATAL_ERROR "Failed to get ${rpmlint_PROGRAM} version. Output is '${rpmlint_version}'")
+endif()
+
+if(rpmlint_version MATCHES "([0-9]+)\.([0-9]+)")
+    set(rpmlint_version "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}")
+else()
+    message(FATAL_ERROR "Failed to parse rpmlint version '${rpmlint_version}'")
+endif()
+
+if(rpmlint_version VERSION_GREATER_EQUAL 2.0)
+    set(rpmlint_has_strict_option ON)
+endif()
+
 foreach(rpm_file IN LISTS CPACK_PACKAGE_FILES)
    get_filename_component(rpm_name "${rpm_file}" NAME)
    get_filename_component(dir_name "${rpm_file}" DIRECTORY)
@ -17,20 +35,25 @@ foreach(rpm_file IN LISTS CPACK_PACKAGE_FILES)

    set(rpmlint_overrides "${dir_name}/${rpm_name}.rpmlintrc")
    if(EXISTS "${rpmlint_overrides}")
-        set(file_option --file "${rpmlint_overrides}")
+        set(rpmlint_options --file "${rpmlint_overrides}")
+    endif()
+    if(rpmlint_has_strict_option)
+        list(APPEND rpmlint_options --strict)
    endif()

-    execute_process(COMMAND "${rpmlint_PROGRAM}" --strict ${file_option} ${rpm_file}
+    execute_process(COMMAND "${rpmlint_PROGRAM}" ${rpmlint_options} ${rpm_file}
                    RESULT_VARIABLE rpmlint_exit_code
                    OUTPUT_VARIABLE rpmlint_output)

-    if(NOT rpmlint_exit_code EQUAL 0)
+    if(NOT rpmlint_exit_code EQUAL 0 OR NOT rpmlint_has_strict_option)
        message("Package ${rpm_name}:")
        message("${rpmlint_output}")
-        set(rpmlint_passed OFF)
+        if(rpmlint_has_strict_option)
+            set(rpmlint_passed OFF)
+        endif()
    endif()

-    unset(file_option)
+    unset(rpmlint_options)
 endforeach()

 if(NOT rpmlint_passed)
--- a/cmake/developer_package/plugins/plugins.hpp.in
+++ b/cmake/developer_package/plugins/plugins.hpp.in
@ -6,7 +6,6 @@

 #include "cpp_interfaces/interface/ie_iplugin_internal.hpp"

-namespace {
@IE_PLUGINS_DECLARATIONS@

 struct Value {
@ -18,9 +17,7 @@ struct Value {
 using Key = std::string;
 using PluginsStaticRegistry = std::map<Key, Value>;

-const std::map<Key, Value> getStaticPluginsRegistry() {
+inline const std::map<Key, Value> getStaticPluginsRegistry() {
@IE_PLUGINS_MAP_DEFINITION@
    return plugins_hpp;
 }
-
-} // namespace
--- a/docs/Documentation/dl_workbench_overview.md
+++ b/docs/Documentation/dl_workbench_overview.md
@ -1,15 +0,0 @@
-# OpenVINO™ Deep Learning Workbench Overview {#workbench_docs_Workbench_DG_Introduction}
-
-@sphinxdirective
-.. toctree::
-   :maxdepth: 1
-   :hidden:
-
-   workbench_docs_Workbench_DG_Install
-   workbench_docs_Workbench_DG_Work_with_Models_and_Sample_Datasets
-   Tutorials <workbench_docs_Workbench_DG_Tutorials>
-   User Guide <workbench_docs_Workbench_DG_User_Guide>
-   workbench_docs_Workbench_DG_Troubleshooting
-
-@endsphinxdirective
-
--- a/docs/Documentation/openvino_ecosystem.md
+++ b/docs/Documentation/openvino_ecosystem.md
@ -7,7 +7,6 @@
   :hidden:

   ovtf_integration
-   ote_documentation
   ovsa_get_started
   openvino_inference_engine_tools_compile_tool_README
   openvino_docs_tuning_utilities
@ -16,7 +15,6 @@
@endsphinxdirective


-
 OpenVINO™ is not just one tool. It is an expansive ecosystem of utilities, providing a comprehensive workflow for deep learning solution development. Learn more about each of them to reach the full potential of OpenVINO™ Toolkit.

 ### Neural Network Compression Framework (NNCF)
@ -51,14 +49,14 @@ More resources:
 * [installation Guide on GitHub](https://github.com/openvinotoolkit/dlstreamer_gst/wiki/Install-Guide)

 ### DL Workbench
-A web-based tool for deploying deep learning models. Built on the core of OpenVINO and equipped with a graphics user interface, DL Workbench is a great way to explore the possibilities of the OpenVINO workflow, import, analyze, optimize, and build your pre-trained models. You can do all that by visiting [Intel® DevCloud for the Edge](https://software.intel.com/content/www/us/en/develop/tools/devcloud.html) and launching DL Workbench on-line.
+A web-based tool for deploying deep learning models. Built on the core of OpenVINO and equipped with a graphics user interface, DL Workbench is a great way to explore the possibilities of the OpenVINO workflow, import, analyze, optimize, and build your pre-trained models. You can do all that by visiting [Intel® Developer Cloud](https://software.intel.com/content/www/us/en/develop/tools/devcloud.html) and launching DL Workbench online.

 More resources:
-* [documentation](dl_workbench_overview.md)
+* [Documentation](https://docs.openvino.ai/2022.3/workbench_docs_Workbench_DG_Introduction.html)
 * [Docker Hub](https://hub.docker.com/r/openvino/workbench)
 * [PyPI](https://pypi.org/project/openvino-workbench/)

-### OpenVINO™ Training Extensions (OTE)
+### OpenVINO™ Training Extensions (OTX)
 A convenient environment to train Deep Learning models and convert them using the OpenVINO™ toolkit for optimized inference.

 More resources:
--- a/docs/IE_PLUGIN_DG/AsyncInferRequest.md
+++ b/docs/IE_PLUGIN_DG/AsyncInferRequest.md
@ -12,7 +12,7 @@ OpenVINO Runtime Plugin API provides the base InferenceEngine::AsyncInferRequest

 OpenVINO Runtime Plugin API provides the base InferenceEngine::AsyncInferRequestThreadSafeDefault class for a custom asynchronous inference request implementation:

-@snippet src/template_async_infer_request.hpp async_infer_request:header
+@snippet src/async_infer_request.hpp async_infer_request:header

 #### Class Fields

@ -30,7 +30,7 @@ The main goal of the `AsyncInferRequest` constructor is to define a device pipel
 - `waitPipeline` is a CPU non-compute task that waits for a response from a remote device.
 - `inferPostprocess` is a CPU compute task.

-@snippet src/template_async_infer_request.cpp async_infer_request:ctor
+@snippet src/async_infer_request.cpp async_infer_request:ctor

 The stages are distributed among two task executors in the following way:

@ -46,4 +46,4 @@ Inference request stages are also profiled using IE_PROFILING_AUTO_SCOPE, which

 In the asynchronous request destructor, it is necessary to wait for a pipeline to finish. It can be done using the InferenceEngine::AsyncInferRequestThreadSafeDefault::StopAndWait method of the base class.

-@snippet src/template_async_infer_request.cpp async_infer_request:dtor
+@snippet src/async_infer_request.cpp async_infer_request:dtor
--- a/docs/IE_PLUGIN_DG/InferRequest.md
+++ b/docs/IE_PLUGIN_DG/InferRequest.md
@ -12,7 +12,7 @@ Inference Engine Plugin API provides the helper InferenceEngine::IInferRequestIn
 to use as a base class for a synchronous inference request implementation. Based of that, a declaration 
 of a synchronous request class can look as follows: 

-@snippet src/template_infer_request.hpp infer_request:header
+@snippet src/infer_request.hpp infer_request:header

 #### Class Fields

@ -34,7 +34,7 @@ The example class has several fields:

 The constructor initializes helper fields and calls methods which allocate blobs:

-@snippet src/template_infer_request.cpp infer_request:ctor
+@snippet src/infer_request.cpp infer_request:ctor

 > **NOTE**: Call InferenceEngine::CNNNetwork::getInputsInfo and InferenceEngine::CNNNetwork::getOutputsInfo to specify both layout and precision of blobs, which you can set with InferenceEngine::InferRequest::SetBlob and get with InferenceEngine::InferRequest::GetBlob. A plugin uses these hints to determine its internal layouts and precisions for input and output blobs if needed. 

@ -42,7 +42,7 @@ The constructor initializes helper fields and calls methods which allocate blobs

 Decrements a number of created inference requests: 

-@snippet src/template_infer_request.cpp infer_request:dtor
+@snippet src/infer_request.cpp infer_request:dtor

 ### `InferImpl()`

@ -50,13 +50,13 @@ Decrements a number of created inference requests:
 - Checks blobs set by users
 - Calls the `InferImpl` method defined in a derived class to call actual pipeline stages synchronously

-@snippet src/template_infer_request.cpp infer_request:infer_impl
+@snippet src/infer_request.cpp infer_request:infer_impl

 #### 1. `inferPreprocess`

 Below is the code of the `inferPreprocess` method to demonstrate Inference Engine common preprocessing step handling:

-@snippet src/template_infer_request.cpp infer_request:infer_preprocess
+@snippet src/infer_request.cpp infer_request:infer_preprocess

 **Details:**
 * `InferImpl` must call the InferenceEngine::IInferRequestInternal::execDataPreprocessing function, which executes common Inference Engine preprocessing step (for example, applies resize or color conversion operations) if it is set by the user. The output dimensions, layout and precision matches the input information set via InferenceEngine::CNNNetwork::getInputsInfo.
@ -66,18 +66,18 @@ Below is the code of the `inferPreprocess` method to demonstrate Inference Engin

 Executes a pipeline synchronously using `_executable` object:

-@snippet src/template_infer_request.cpp infer_request:start_pipeline
+@snippet src/infer_request.cpp infer_request:start_pipeline

 #### 3. `inferPostprocess`

 Converts output blobs if precisions of backend output blobs and blobs passed by user are different:

-@snippet src/template_infer_request.cpp infer_request:infer_postprocess
+@snippet src/infer_request.cpp infer_request:infer_postprocess

 ### `GetPerformanceCounts()`

 The method sets performance counters which were measured during pipeline stages execution:

-@snippet src/template_infer_request.cpp infer_request:get_performance_counts
+@snippet src/infer_request.cpp infer_request:get_performance_counts

 The next step in the plugin library implementation is the [Asynchronous Inference Request](@ref openvino_docs_ie_plugin_dg_async_infer_request) class.
--- a/docs/MO_DG/prepare_model/FP16_Compression.md
+++ b/docs/MO_DG/prepare_model/FP16_Compression.md
@ -1,17 +1,13 @@
 # Compressing a Model to FP16 {#openvino_docs_MO_DG_FP16_Compression}

-Model Optimizer can convert all floating-point weights to `FP16` data type. The resulting IR is called
+Model Optimizer by default converts all floating-point weights to `FP16` data type. The resulting IR is called
 compressed `FP16` model. The resulting model will occupy about twice as less space in the file system, 
-but it may have some accuracy drop. For most models, the accuracy drop is negligible.
-
-To compress the model, use the `--compress_to_fp16` option:
-> **NOTE**: Starting from the 2022.3 release, option --data_type is deprecated.
-> Instead of --data_type FP16 use --compress_to_fp16.
-> Using `--data_type FP32` will give no result and will not force `FP32` precision in 
-> the model. If the model has `FP16` constants, such constants will have `FP16` precision in IR as well.
+but it may have some accuracy drop. For most models, the accuracy drop is negligible. 
+But in case if accuracy drop is significant user can disable compression explicitly.

+By default, models are compressed to `FP16`, but you can disable compression by specifying `--compress_to_fp16=False`:
 ```
- mo --input_model INPUT_MODEL --compress_to_fp16
+mo --input_model INPUT_MODEL --compress_to_fp16=False
 ```

 For details on how plugins handle compressed `FP16` models, see [Working with devices](../../OV_Runtime_UG/supported_plugins/Device_Plugins.md).
--- a/docs/OV_Runtime_UG/auto_device_selection.md
+++ b/docs/OV_Runtime_UG/auto_device_selection.md
@ -44,12 +44,12 @@ The logic behind the choice is as follows:
@endsphinxdirective

 To put it simply, when loading the model to the first device on the list fails, AUTO will try to load it to the next device in line, until one of them succeeds. 
-What is important, **AUTO always starts inference with the CPU of the system**, as it provides very low latency and can start inference with no additional delays. 
+What is important, **AUTO starts inference with the CPU of the system by default**, as it provides very low latency and can start inference with no additional delays. 
 While the CPU is performing inference, AUTO continues to load the model to the device best suited for the purpose and transfers the task to it when ready.
 This way, the devices which are much slower in compiling models, GPU being the best example, do not impede inference at its initial stages.
 For example, if you use a CPU and a GPU, the first-inference latency of AUTO will be better than that of using GPU alone.

-Note that if you choose to exclude CPU from the priority list, it will be unable to support the initial model compilation stage.
+Note that if you choose to exclude CPU from the priority list or disable the initial CPU acceleration feature via `ov::intel_auto::enable_startup_fallback`, it will be unable to support the initial model compilation stage.
     
 ![](../img/autoplugin_accelerate.svg)

@ -76,41 +76,56 @@ Following the OpenVINO™ naming convention, the Automatic Device Selection mode

@sphinxdirective

-+--------------------------------+----------------------------------------------------------------------+
-| | Property                     | | Values and Description                                             |
-+================================+======================================================================+
-| | <device candidate list>      | | **Values**:                                                        |
-| |                              | |       empty                                                        |
-| |                              | |       `AUTO`                                                       |
-| |                              | |       `AUTO: <device names>` (comma-separated, no spaces)          |
-| |                              | |                                                                    |
-| |                              | | Lists the devices available for selection.                         |
-| |                              | | The device sequence will be taken as priority from high to low.    |
-| |                              | | If not specified, `AUTO` will be used as default,                  |
-| |                              | | and all devices will be "viewed" as candidates.                    |
-+--------------------------------+----------------------------------------------------------------------+
-| | `ov::device:priorities`      | | **Values**:                                                        |
-| |                              | |       `<device names>` (comma-separated, no spaces)                |
-| |                              | |                                                                    |
-| |                              | | Specifies the devices for AUTO to select.                          |
-| |                              | | The device sequence will be taken as priority from high to low.    |
-| |                              | | This configuration is optional.                                    |
-+--------------------------------+----------------------------------------------------------------------+
-| | `ov::hint::performance_mode` | | **Values**:                                                        |
-| |                              | |       `ov::hint::PerformanceMode::LATENCY`                         |
-| |                              | |       `ov::hint::PerformanceMode::THROUGHPUT`                      |
-| |                              | |       `ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT`           |
-| |                              | |                                                                    |
-| |                              | | Specifies the performance option preferred by the application.     |
-+--------------------------------+----------------------------------------------------------------------+
-| | `ov::hint::model_priority`   | | **Values**:                                                        |
-| |                              | |       `ov::hint::Priority::HIGH`                                   |
-| |                              | |       `ov::hint::Priority::MEDIUM`                                 |
-| |                              | |       `ov::hint::Priority::LOW`                                    |
-| |                              | |                                                                    |
-| |                              | | Indicates the priority for a model.                                |
-| |                              | | IMPORTANT: This property is not fully supported yet.               |
-+--------------------------------+----------------------------------------------------------------------+
+---------------------------------------------+----------------------------------------------------------------------+
+| | Property                                  | | Values and Description                                             |
+=============================================+======================================================================+
+| | <device candidate list>                   | | **Values**:                                                        |
+| |                                           | |       empty                                                        |
+| |                                           | |       `AUTO`                                                       |
+| |                                           | |       `AUTO: <device names>` (comma-separated, no spaces)          |
+| |                                           | |                                                                    |
+| |                                           | | Lists the devices available for selection.                         |
+| |                                           | | The device sequence will be taken as priority from high to low.    |
+| |                                           | | If not specified, `AUTO` will be used as default,                  |
+| |                                           | | and all devices will be "viewed" as candidates.                    |
+---------------------------------------------+----------------------------------------------------------------------+
+| | `ov::device::priorities`                  | | **Values**:                                                        |
+| |                                           | |       `<device names>` (comma-separated, no spaces)                |
+| |                                           | |                                                                    |
+| |                                           | | Specifies the devices for AUTO to select.                          |
+| |                                           | | The device sequence will be taken as priority from high to low.    |
+| |                                           | | This configuration is optional.                                    |
+---------------------------------------------+----------------------------------------------------------------------+
+| | `ov::hint::performance_mode`              | | **Values**:                                                        |
+| |                                           | |       `ov::hint::PerformanceMode::LATENCY`                         |
+| |                                           | |       `ov::hint::PerformanceMode::THROUGHPUT`                      |
+| |                                           | |       `ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT`           |
+| |                                           | |                                                                    |
+| |                                           | | Specifies the performance option preferred by the application.     |
+---------------------------------------------+----------------------------------------------------------------------+
+| | `ov::hint::model_priority`                | | **Values**:                                                        |
+| |                                           | |       `ov::hint::Priority::HIGH`                                   |
+| |                                           | |       `ov::hint::Priority::MEDIUM`                                 |
+| |                                           | |       `ov::hint::Priority::LOW`                                    |
+| |                                           | |                                                                    |
+| |                                           | | Indicates the priority for a model.                                |
+| |                                           | | IMPORTANT: This property is not fully supported yet.               |
+---------------------------------------------+----------------------------------------------------------------------+
+| | `ov::execution_devices`                   | | Lists the runtime target devices on which the inferences are being |
+| |                                           | | executed.                                                          |
+| |                                           | | Examples of returning results could be `(CPU)`(`(CPU)` is a        |
+| |                                           | | temporary device, indicating that CPU is used for acceleration at  |
+| |                                           | | the model compilation stage), `CPU`, `GPU`, `CPU GPU`, `GPU.0`,    |
+| |                                           | | etc.                                                               |
+---------------------------------------------+----------------------------------------------------------------------+
+| | `ov::intel_auto::enable_startup_fallback` | |  **Values**:                                                       |
+| |                                           | |       `true`                                                       |
+| |                                           | |       `false`                                                      |
+| |                                           | |                                                                    |
+| |                                           | | Enables/disables CPU as acceleration (or the helper device) in the |
+| |                                           | | beginning. The default value is `true`, indicating that CPU is used|
+| |                                           | | as acceleration by default.                                        |
+---------------------------------------------+----------------------------------------------------------------------+

@endsphinxdirective

@ -122,7 +137,7 @@ The device candidate list enables you to customize the priority and limit the ch
 - If <device candidate list> is not specified, AUTO assumes all the devices present in the system can be used. 
 - If `AUTO` without any device names is specified, AUTO assumes all the devices present in the system can be used, and will load the network to all devices and run inference based on their default priorities, from high to low.

-To specify the priority of devices, enter the device names in the priority order (from high to low) in `AUTO: <device names>`, or use the `ov::device:priorities` property.
+To specify the priority of devices, enter the device names in the priority order (from high to low) in `AUTO: <device names>`, or use the `ov::device::priorities` property.

 See the following code for using AUTO and specifying devices: 

@ -192,25 +207,43 @@ AUTO will then query all available devices and remove CPU from the candidate lis

 Note that if you choose to exclude CPU from device candidate list, CPU will not be able to support the initial model compilation stage. See more information in [How AUTO Works](#how-auto-works).

+### Checking Target Runtime Devices
+
+To query the runtime target devices on which the inferences are being executed using AUTO, you can use the `ov::execution_devices` property. It must be used with `get_property`, for example:
+
+@sphinxdirective
+
+.. tab:: C++
+
+    .. doxygensnippet:: docs/snippets/AUTO7.cpp
+       :language: cpp
+       :fragment: [part7]
+
+.. tab:: Python
+
+    .. doxygensnippet:: docs/snippets/ov_auto.py
+       :language: python
+       :fragment: [part7]
+
+@endsphinxdirective
+
 ### Performance Hints for AUTO
-The `ov::hint::performance_mode` property enables you to specify a performance option for AUTO to be more efficient for particular use cases.
-
-> **NOTE**: Currently, the `ov::hint` property is supported by CPU and GPU devices only.
-
-#### THROUGHPUT
-This option prioritizes high throughput, balancing between latency and power. It is best suited for tasks involving multiple jobs, such as inference of video feeds or large numbers of images.
-
-> **NOTE**: If no performance hint is set explicitly, AUTO will set THROUGHPUT for devices that have not set `ov::device::properties`. For example, if you have both a CPU and a GPU in the system, this command `core.compile_model("AUTO", ov::device::properties("CPU", ov::enable_profiling(true)))` will set THROUGHPUT for the GPU only. No hint will be set for the CPU although it's the selected device.
+The `ov::hint::performance_mode` property enables you to specify a performance option for AUTO to be more efficient for particular use cases. The default hint for AUTO is `LATENCY`.

 #### LATENCY
 This option prioritizes low latency, providing short response time for each inference job. It performs best for tasks where inference is required for a single input image, e.g. a medical analysis of an ultrasound scan image. It also fits the tasks of real-time or nearly real-time applications, such as an industrial robot's response to actions in its environment or obstacle avoidance for autonomous vehicles.

+> **NOTE**: If no performance hint is set explicitly, AUTO will set LATENCY for devices that have not set `ov::device::properties`, for example, `ov::device::properties(<DEVICE_NAME>, ov::hint::performance_mode(ov::hint::LATENCY))`.
+
@sphinxdirective

 .. _cumulative throughput:

@endsphinxdirective

+#### THROUGHPUT
+This option prioritizes high throughput, balancing between latency and power. It is best suited for tasks involving multiple jobs, such as inference of video feeds or large numbers of images.
+
 #### CUMULATIVE_THROUGHPUT
 While `LATENCY` and `THROUGHPUT` can select one target device with your preferred performance option, the `CUMULATIVE_THROUGHPUT` option enables running inference on multiple devices for higher throughput. With `CUMULATIVE_THROUGHPUT`, AUTO loads the network model to all available devices in the candidate list, and then runs inference on them based on the default or specified priority. 

--- a/docs/OV_Runtime_UG/migration_ov_2_0/intro.md
+++ b/docs/OV_Runtime_UG/migration_ov_2_0/intro.md
@ -46,7 +46,7 @@ Some of the OpenVINO Development Tools also support both OpenVINO IR v10 and v11
 - Accuracy checker uses API 2.0 for model accuracy measurement by default. It also supports switching to the old API by using the `--use_new_api False` command-line parameter. Both launchers accept OpenVINO IR v10 and v11, but in some cases configuration files should be updated. For more details, see the [Accuracy Checker documentation](https://github.com/openvinotoolkit/open_model_zoo/blob/master/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/openvino_launcher_readme.md).
 - [Compile tool](../../../tools/compile_tool/README.md) compiles the model to be used in API 2.0 by default. To use the resulting compiled blob under the Inference Engine API, the additional `ov_api_1_0` option should be passed.

-However, Post-Training Optimization Tool and Deep Learning Workbench of OpenVINO 2022.1 do not support OpenVINO IR v10. They require the latest version of Model Optimizer to generate OpenVINO IR v11 files.
+However, Post-Training Optimization Tool of OpenVINO 2022.1 does not support OpenVINO IR v10. They require the latest version of Model Optimizer to generate OpenVINO IR v11 files.

 > **NOTE**: To quantize your OpenVINO IR v10 models to run with OpenVINO 2022.1, download and use Post-Training Optimization Tool of OpenVINO 2021.4.

--- a/docs/install_guides/troubleshooting-issues.md
+++ b/docs/install_guides/troubleshooting-issues.md
@ -30,7 +30,7 @@ Users in China might encounter errors while downloading sources via PIP during O

 ### <a name="proxy-issues"></a>Proxy Issues

-If you met proxy issues during the installation with Docker, you need set up proxy settings for Docker. See the [Set Proxy section in DL Workbench Installation](https://docs.openvino.ai/latest/workbench_docs_Workbench_DG_Prerequisites.html#set-proxy) for more details.
+If you met proxy issues during the installation with Docker, you need set up proxy settings for Docker. See the [Docker guide](https://docs.docker.com/network/proxy/) for more details.


@anchor yocto-install-issues
--- a/docs/optimization_guide/dldt_deployment_optimization_tput_advanced.md
+++ b/docs/optimization_guide/dldt_deployment_optimization_tput_advanced.md
@ -45,7 +45,6 @@ Similarly, different devices require a different number of execution streams to
 In some cases, combination of streams and batching may be required to maximize the throughput.

 One possible throughput optimization strategy is to **set an upper bound for latency and then increase the batch size and/or number of the streams until that tail latency is met (or the throughput is not growing anymore)**.
-Consider [OpenVINO Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) that builds handy latency vs throughput charts, iterating over possible values of the batch size and number of streams.

 > **NOTE**: When playing with [dynamically-shaped inputs](../OV_Runtime_UG/ov_dynamic_shapes.md), use only the streams (no batching), as they tolerate individual requests having different shapes. 

--- a/docs/security_guide/introduction.md
+++ b/docs/security_guide/introduction.md
@ -6,7 +6,6 @@
   :maxdepth: 1
   :hidden:

-   openvino_docs_security_guide_workbench
   openvino_docs_OV_UG_protecting_model_guide

@endsphinxdirective
--- a/docs/security_guide/workbench.md
+++ b/docs/security_guide/workbench.md
@ -1,27 +0,0 @@
-# Deep Learning Workbench Security {#openvino_docs_security_guide_workbench}
-
-Deep Learning Workbench (DL Workbench) is a web application running within a Docker\* container.
-
-## Run DL Workbench 
-
-Unless necessary, limit the connections to the DL Workbench to `localhost` (127.0.0.1), so that it
-is only accessible from the machine the Docker container is built on.
-
-When using `docker run` to [start the DL Workbench from Docker Hub](@ref workbench_docs_Workbench_DG_Run_Locally), limit connections for the host IP 127.0.0.1.
-For example, limit the connections for the host IP to the port `5665` with the `-p 127.0.0.1:5665:5665` command . Refer to [Container networking](https://docs.docker.com/config/containers/container-networking/#published-ports) for details.
-
-## Authentication Security
-
-DL Workbench uses [authentication tokens](@ref workbench_docs_Workbench_DG_Authentication) to access the
-application. The script starting the DL Workbench creates an authentication token each time the DL
-Workbench starts. Anyone who has the authentication token can use the DL Workbench.
-
-When you finish working with the DL Workbench, log out to prevent the use of the DL Workbench from
-the same browser session without authentication.
-
-To invalidate the authentication token completely, [restart the DL Workbench](@ref workbench_docs_Workbench_DG_Docker_Container).
-
-## Use TLS to Protect Communications 
-
-[Configure Transport Layer Security (TLS)](@ref workbench_docs_Workbench_DG_Configure_TLS) to keep the
-authentication token encrypted.
--- a/docs/snippets/AUTO7.cpp
+++ b/docs/snippets/AUTO7.cpp
@ -0,0 +1,18 @@
+#include <openvino/openvino.hpp>
+
+int auto7() {
+{
+//! [part7]
+ov::Core core;
+
+// read a network in IR, PaddlePaddle, or ONNX format
+std::shared_ptr<ov::Model> model = core.read_model("sample.xml");
+
+// compile a model on AUTO and set log level to debug
+ov::CompiledModel compiled_model = core.compile_model(model, "AUTO");
+// query the runtime target devices on which the inferences are being executed
+ov::Any execution_devices = compiled_model.get_property(ov::execution_devices);
+//! [part7]
+}
+    return 0;
+}
--- a/docs/snippets/CMakeLists.txt
+++ b/docs/snippets/CMakeLists.txt
@ -10,9 +10,9 @@ endif()

 if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
    ie_add_compiler_flags(-Wno-unused-variable)
-    if(CMAKE_COMPILER_IS_GNUCXX)
-        ie_add_compiler_flags(-Wno-unused-variable -Wno-unused-but-set-variable)
-    endif()
+endif()
+if(UNUSED_BUT_SET_VARIABLE_SUPPORTED)
+    ie_add_compiler_flags(-Wno-unused-but-set-variable)
 endif()

 file(GLOB SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp"
--- a/docs/snippets/ov_auto.py
+++ b/docs/snippets/ov_auto.py
@ -108,6 +108,17 @@ def part6():
    compiled_model = core.compile_model(model=model, device_name="AUTO");
 #! [part6]

+def part7():
+#! [part7]
+    core = Core()
+    # read a network in IR, PaddlePaddle, or ONNX format
+    model = core.read_model(model_path)
+    # compile a model on AUTO and set log level to debug
+    compiled_model = core.compile_model(model=model, device_name="AUTO")
+    # query the runtime target devices on which the inferences are being executed
+    execution_devices = compiled_model.get_property("EXECUTION_DEVICES")
+#! [part7]
+
 def main():
    part0()
    part1()
@ -115,6 +126,7 @@ def main():
    part4()
    part5()
    part6()
+    part7()

 if __name__ == '__main__':
    sys.exit(main())
--- a/docs/tuning_utilities.md
+++ b/docs/tuning_utilities.md
@ -8,6 +8,5 @@
   
   omz_tools_accuracy_checker
   omz_data_datasets
-   openvino_inference_engine_tools_cross_check_tool_README

@endsphinxdirective
--- a/install_build_dependencies.sh
+++ b/install_build_dependencies.sh
@ -29,6 +29,8 @@ if [ -f /etc/lsb-release ] || [ -f /etc/debian_version ] ; then
        file \
        `# build tools` \
        build-essential \
+        ninja-build \
+        scons \
        ccache \
        "${cmake_packages[@]}" \
        "${x86_64_specific_packages[@]}" \
@ -93,6 +95,8 @@ elif [ -f /etc/redhat-release ] || grep -q "rhel" /etc/os-release ; then
        `# build tools` \
        cmake3 \
        ccache \
+        ninja-build \
+        scons \
        gcc \
        gcc-c++ \
        make \
@ -129,6 +133,8 @@ elif [ -f /etc/os-release ] && grep -q "raspbian" /etc/os-release; then
        `# build tools` \
        build-essential \
        ccache \
+        ninja-build \
+        scons \
        `# to find dependencies` \
        pkg-config \
        `# to deternime product version via git` \
--- a/samples/CMakeLists.txt
+++ b/samples/CMakeLists.txt
@ -8,7 +8,7 @@ add_subdirectory(c)
 # used by tests_deprecated
 if(TARGET format_reader)
    install(TARGETS format_reader
-            RUNTIME DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL
+            ARCHIVE DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL
            LIBRARY DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL)
 endif()

--- a/tools/cross_check_tool/openvino/tools/init.py
+++ b/tools/cross_check_tool/openvino/tools/init.py
@ -1,3 +1,5 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
+#

+add_subdirectory(opencv_c_wrapper)
--- a/samples/c/common/opencv_c_wrapper/CMakeLists.txt
+++ b/samples/c/common/opencv_c_wrapper/CMakeLists.txt
@ -3,13 +3,16 @@
 #

 project(opencv_c_wrapper)
-set(TARGET_NAME opencv_c_wrapper)

-file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.c)
-file(GLOB HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.h)
+set(TARGET_NAME ${PROJECT_NAME})
+
+file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp
+                  ${CMAKE_CURRENT_SOURCE_DIR}/src/*.h
+                  ${CMAKE_CURRENT_SOURCE_DIR}/src/*.c)
+file(GLOB HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h)

 # create library
-add_library(${TARGET_NAME} SHARED ${HEADERS} ${SOURCES})
+add_library(${TARGET_NAME} STATIC ${HEADERS} ${SOURCES})

 # Find OpenCV components if exist
 find_package(OpenCV QUIET COMPONENTS core imgproc imgcodecs)
@ -21,16 +24,11 @@ endif()

 target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCV_LIBRARIES})

-target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
+target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include"
+                                          PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src")

 set_target_properties(${TARGET_NAME} PROPERTIES FOLDER c_samples)

 if(COMMAND add_clang_format_target AND NOT IE_SAMPLE_EXCLUDE_CLANG_FORMAT)
    add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
 endif()
-
-install(
-    TARGETS ${TARGET_NAME}
-    RUNTIME DESTINATION samples_bin/ COMPONENT samples_bin EXCLUDE_FROM_ALL
-    LIBRARY DESTINATION samples_bin/ COMPONENT samples_bin EXCLUDE_FROM_ALL
-)
--- a/samples/c/common/opencv_c_wrapper/include/opencv_c_wrapper.h
+++ b/samples/c/common/opencv_c_wrapper/include/opencv_c_wrapper.h
@ -6,23 +6,7 @@
 #include <stdio.h>

 #ifdef __cplusplus
-#    define OPENCV_C_EXTERN extern "C"
-#else
-#    define OPENCV_C_EXTERN
-#endif
-
-#if defined(__GNUC__) && (__GNUC__ < 4)
-#    define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __VA_ARGS__
-#else
-#    if defined(_WIN32)
-#        ifdef opencv_c_wrapper_EXPORTS
-#            define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __declspec(dllexport) __VA_ARGS__ __cdecl
-#        else
-#            define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __declspec(dllimport) __VA_ARGS__ __cdecl
-#        endif
-#    else
-#        define OPENCV_C_WRAPPER(...) OPENCV_C_EXTERN __attribute__((visibility("default"))) __VA_ARGS__
-#    endif
+extern "C" {
 #endif

 /**
@ -65,7 +49,7 @@ typedef struct color {
 * @param img A pointer to the newly created c_mat_t.
 * @return Status of the operation: 0 for success, -1 for fail.
 */
-OPENCV_C_WRAPPER(int) image_read(const char* img_path, c_mat_t* img);
+int image_read(const char* img_path, c_mat_t* img);

 /**
 * @brief  Resizes an image.
@ -75,8 +59,7 @@ OPENCV_C_WRAPPER(int) image_read(const char* img_path, c_mat_t* img);
 * @param height The height of dst_img.
 * @return Status of the operation: 0 for success, -1 for fail.
 */
-OPENCV_C_WRAPPER(int)
-image_resize(const c_mat_t* src_img, c_mat_t* dst_img, const int width, const int height);
+int image_resize(const c_mat_t* src_img, c_mat_t* dst_img, const int width, const int height);

 /**
 * @brief Saves an image to a specified file.The image format is chosen based on the filename
@ -85,14 +68,14 @@ image_resize(const c_mat_t* src_img, c_mat_t* dst_img, const int width, const in
 * @param img Image to be saved.
 * @return Status of the operation: 0 for success, -1 for fail.
 */
-OPENCV_C_WRAPPER(int) image_save(const char* img_path, c_mat_t* img);
+int image_save(const char* img_path, c_mat_t* img);

 /**
 * @brief Releases memory occupied by a c_mat_t instance.
 * @param img A pointer to the c_mat_t instance to free memory.
 * @return Status of the operation: 0 for success, -1 for fail.
 */
-OPENCV_C_WRAPPER(int) image_free(c_mat_t* img);
+int image_free(c_mat_t* img);

 /**
 * @brief Adds colored rectangles to the image
@ -103,5 +86,8 @@ OPENCV_C_WRAPPER(int) image_free(c_mat_t* img);
 * @param thickness - thickness of a line (in pixels) to be used for bounding boxes
 * @return Status of the operation: 0 for success, -1 for fail.
 */
-OPENCV_C_WRAPPER(int)
-image_add_rectangles(c_mat_t* img, rectangle_t rects[], int classes[], int num, int thickness);
+int image_add_rectangles(c_mat_t* img, rectangle_t rects[], int classes[], int num, int thickness);
+
+#ifdef __cplusplus
+}
+#endif
--- a/samples/c/common/opencv_c_wrapper/src/bmp_reader.c
+++ b/samples/c/common/opencv_c_wrapper/src/bmp_reader.c
@ -1,3 +1,7 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
 #include "bmp_reader.h"

 #include <memory.h>
--- a/samples/c/common/opencv_c_wrapper/src/bmp_reader.h
+++ b/samples/c/common/opencv_c_wrapper/src/bmp_reader.h
@ -1,3 +1,7 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
 #pragma once

 typedef struct BmpHeaderType {
--- a/samples/c/common/opencv_c_wrapper/src/opencv_c_wrapper.cpp
+++ b/samples/c/common/opencv_c_wrapper/src/opencv_c_wrapper.cpp
--- a/samples/cpp/CMakeLists.txt
+++ b/samples/cpp/CMakeLists.txt
@ -70,10 +70,11 @@ if(APPLE)
 endif()

 if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64.*|aarch64.*|AARCH64.*)")
-  set(AARCH64 ON)
+    set(AARCH64 ON)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
-  set(ARM ON)
+    set(ARM ON)
 endif()
+
 if(ARM AND NOT (CMAKE_CROSSCOMPILING OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC"))
    add_compile_options(-march=armv7-a+fp)
 endif()
@ -133,19 +134,10 @@ elseif(gflags_required)
    endif()
 endif()

-if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/common/utils")
-    add_subdirectory(common/utils)
-endif()
+# include common utils
+add_subdirectory(common)

-# format reader must be added after find_package(OpenVINO) to get
-# exactly the same OpenCV_DIR path which was used for the OpenVINO build
-if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/common/format_reader")
-    add_subdirectory(common/format_reader)
-elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/common/opencv_c_wrapper")
-    add_subdirectory(common/opencv_c_wrapper)
-endif()
-
-# samples build can be switched off during whole IE build
+# samples build can be switched off during whole OpenVINO build
 if (DEFINED OpenVINO_SOURCE_DIR AND NOT ENABLE_SAMPLES)
    return()
 endif()
@ -247,8 +239,11 @@ endmacro()

 # collect all samples subdirectories
 file(GLOB samples_dirs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *)
+
 # skip building of unnecessary subdirectories
 if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty")
-    list(REMOVE_ITEM samples_dirs common thirdparty)
+    list(REMOVE_ITEM samples_dirs thirdparty)
 endif()
+list(REMOVE_ITEM samples_dirs common)
+
 add_samples_to_build(${samples_dirs})
--- a/samples/cpp/benchmark_app/README.md
+++ b/samples/cpp/benchmark_app/README.md
@ -125,7 +125,7 @@ Options:
                               'throughput' or 'tput': device performance mode will be set to THROUGHPUT.
                               'cumulative_throughput' or 'ctput': device performance mode will be set to CUMULATIVE_THROUGHPUT.
                               'latency': device performance mode will be set to LATENCY.
-                               'none': no device performance mode will be set.
+                               'none': device performance mode will be set to UNDEFINED.
                              Using explicit 'nstreams' or other device-specific options, please set hint to 'none'
    -niter  <integer>             Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device.
    -t                            Optional. Time in seconds to execute topology.
--- a/samples/cpp/benchmark_app/benchmark_app.hpp
+++ b/samples/cpp/benchmark_app/benchmark_app.hpp
@ -44,7 +44,7 @@ static const char hint_message[] =
    "                               'cumulative_throughput' or 'ctput': device performance mode will be set to "
    "CUMULATIVE_THROUGHPUT.\n"
    "                               'latency': device performance mode will be set to LATENCY.\n"
-    "                               'none': no device performance mode will be set.\n"
+    "                               'none': device performance mode will be set to UNDEFINED.\n"
    "                              Using explicit 'nstreams' or other device-specific options, please set hint to "
    "'none'";

--- a/samples/cpp/benchmark_app/inputs_filling.cpp
+++ b/samples/cpp/benchmark_app/inputs_filling.cpp
@ -14,7 +14,6 @@
 #include <vector>

 #include "format_reader_ptr.h"
-#include "npy.h"
 #include "samples/slog.hpp"
 #include "shared_tensor_allocator.hpp"
 #include "utils.hpp"
--- a/samples/cpp/benchmark_app/main.cpp
+++ b/samples/cpp/benchmark_app/main.cpp
@ -606,11 +606,14 @@ int main(int argc, char* argv[]) {
                device_nstreams.erase(device);
            }
        }
-
-        for (auto&& item : config) {
-            core.set_property(item.first, item.second);
-        }
-
+        auto result = std::find_if(config.begin(), config.end(), [&](const std::pair<std::string, ov::AnyMap>& item) {
+            if (device_name.find(item.first) == 0)
+                return true;
+            return false;
+        });
+        ov::AnyMap device_config = {};
+        if (result != config.end())
+            device_config = result->second;
        size_t batchSize = FLAGS_b;
        ov::element::Type type = ov::element::undefined;
        std::string topology_name = "";
@ -642,7 +645,7 @@ int main(int argc, char* argv[]) {
            next_step();
            slog::info << "Skipping the step for loading model from file" << slog::endl;
            auto startTime = Time::now();
-            compiledModel = core.compile_model(FLAGS_m, device_name);
+            compiledModel = core.compile_model(FLAGS_m, device_name, device_config);
            auto duration_ms = get_duration_ms_till_now(startTime);
            slog::info << "Compile model took " << double_to_string(duration_ms) << " ms" << slog::endl;
            slog::info << "Original model I/O parameters:" << slog::endl;
@ -821,7 +824,7 @@ int main(int argc, char* argv[]) {
            // --------------------------------------------------------
            next_step();
            startTime = Time::now();
-            compiledModel = core.compile_model(model, device_name);
+            compiledModel = core.compile_model(model, device_name, device_config);
            duration_ms = get_duration_ms_till_now(startTime);
            slog::info << "Compile model took " << double_to_string(duration_ms) << " ms" << slog::endl;
            if (statistics)
--- a/tools/cross_check_tool/openvino/tools/cross_check_tool/init.py
+++ b/tools/cross_check_tool/openvino/tools/cross_check_tool/init.py
@ -1,3 +1,6 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
+#

+add_subdirectory(utils)
+add_subdirectory(format_reader)
--- a/samples/cpp/common/format_reader/CMakeLists.txt
+++ b/samples/cpp/common/format_reader/CMakeLists.txt
@ -4,16 +4,17 @@

 set (TARGET_NAME "format_reader")

-file (GLOB MAIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
-file (GLOB LIBRARY_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.h)
+file (GLOB MAIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
+file (GLOB LIBRARY_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/src/*.h)
+file (GLOB LIBRARY_PUBLIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h)

 # Create named folders for the sources within the .vcproj
 # Empty name lists them directly under the .vcproj
-source_group("src" FILES ${LIBRARY_SRC})
-source_group("include" FILES ${LIBRARY_HEADERS})
+source_group("src" FILES ${LIBRARY_SRC} ${LIBRARY_HEADERS})
+source_group("include" FILES ${LIBRARY_PUBLIC_HEADERS})

 # Create library file from sources.
-add_library(${TARGET_NAME} SHARED ${MAIN_SRC} ${LIBRARY_HEADERS})
+add_library(${TARGET_NAME} STATIC ${MAIN_SRC} ${LIBRARY_HEADERS} ${LIBRARY_PUBLIC_HEADERS})

 # Find OpenCV components if exist
 find_package(OpenCV QUIET COMPONENTS core imgproc imgcodecs)
@ -29,20 +30,11 @@ else()
    target_compile_definitions(${TARGET_NAME} PRIVATE USE_OPENCV)
 endif()

-target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_FORMAT_READER)
+target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include"
+                                          PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src")

-target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}"
-                                                 "${CMAKE_CURRENT_SOURCE_DIR}/..")
-
-set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}
-                                                FOLDER cpp_samples)
+set_target_properties(${TARGET_NAME} PROPERTIES FOLDER cpp_samples)

 if(COMMAND add_clang_format_target)
    add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
 endif()
-
-install(
-    TARGETS ${TARGET_NAME}
-    RUNTIME DESTINATION samples_bin/ COMPONENT samples_bin EXCLUDE_FROM_ALL
-    LIBRARY DESTINATION samples_bin/ COMPONENT samples_bin EXCLUDE_FROM_ALL
-)
--- a/samples/cpp/common/format_reader/include/format_reader.h
+++ b/samples/cpp/common/format_reader/include/format_reader.h
@ -13,22 +13,6 @@
 #include <string>
 #include <vector>

-#if defined(_WIN32)
-#    ifdef IMPLEMENT_FORMAT_READER
-#        define FORMAT_READER_API(type) extern "C" __declspec(dllexport) type
-#    else
-#        define FORMAT_READER_API(type) extern "C" type
-#    endif
-#elif (__GNUC__ >= 4)
-#    ifdef IMPLEMENT_FORMAT_READER
-#        define FORMAT_READER_API(type) extern "C" __attribute__((visibility("default"))) type
-#    else
-#        define FORMAT_READER_API(type) extern "C" type
-#    endif
-#else
-#    define FORMAT_READER_API(TYPE) extern "C" TYPE
-#endif
-
 namespace FormatReader {
 /**
 * \class FormatReader
@ -85,10 +69,11 @@ public:
     */
    virtual size_t size() const = 0;
 };
-}  // namespace FormatReader

 /**
 * \brief Function for create reader
- * @return FormatReader pointer
+ * @return Reader pointer
 */
-FORMAT_READER_API(FormatReader::Reader*) CreateFormatReader(const char* filename);
+Reader* CreateFormatReader(const char* filename);
+
+}  // namespace FormatReader
--- a/samples/cpp/common/format_reader/include/format_reader_ptr.h
+++ b/samples/cpp/common/format_reader/include/format_reader_ptr.h
--- a/samples/cpp/common/format_reader/src/MnistUbyte.cpp
+++ b/samples/cpp/common/format_reader/src/MnistUbyte.cpp
--- a/samples/cpp/common/format_reader/src/MnistUbyte.h
+++ b/samples/cpp/common/format_reader/src/MnistUbyte.h
--- a/samples/cpp/common/format_reader/src/bmp.cpp
+++ b/samples/cpp/common/format_reader/src/bmp.cpp
--- a/samples/cpp/common/format_reader/src/bmp.h
+++ b/samples/cpp/common/format_reader/src/bmp.h
--- a/samples/cpp/common/format_reader/src/format_reader.cpp
+++ b/samples/cpp/common/format_reader/src/format_reader.cpp
@ -42,6 +42,6 @@ void Registry::RegisterReader(CreatorFunction f) {
    _data.push_back(f);
 }

-FORMAT_READER_API(Reader*) CreateFormatReader(const char* filename) {
+Reader* FormatReader::CreateFormatReader(const char* filename) {
    return Registry::CreateReader(filename);
 }
--- a/samples/cpp/common/format_reader/src/npy.cpp
+++ b/samples/cpp/common/format_reader/src/npy.cpp
--- a/samples/cpp/common/format_reader/src/npy.h
+++ b/samples/cpp/common/format_reader/src/npy.h
--- a/samples/cpp/common/format_reader/src/opencv_wrapper.cpp
+++ b/samples/cpp/common/format_reader/src/opencv_wrapper.cpp
--- a/samples/cpp/common/format_reader/src/opencv_wrapper.h
+++ b/samples/cpp/common/format_reader/src/opencv_wrapper.h
--- a/samples/cpp/common/format_reader/src/register.h
+++ b/samples/cpp/common/format_reader/src/register.h
--- a/samples/cpp/common/format_reader/src/yuv_nv12.cpp
+++ b/samples/cpp/common/format_reader/src/yuv_nv12.cpp
--- a/samples/cpp/common/format_reader/src/yuv_nv12.h
+++ b/samples/cpp/common/format_reader/src/yuv_nv12.h
--- a/samples/cpp/common/utils/CMakeLists.txt
+++ b/samples/cpp/common/utils/CMakeLists.txt
@ -1,6 +1,7 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
+
 set(TARGET_NAME "ie_samples_utils")

 file(GLOB_RECURSE SOURCES "*.cpp" "*.hpp" "*.h")
--- a/samples/cpp/common/utils/include/samples/classification_results.h
+++ b/samples/cpp/common/utils/include/samples/classification_results.h
@ -148,7 +148,7 @@ public:
            std::cout << std::endl << std::endl;
            printHeader();

-            for (size_t id = image_id * _nTop, cnt = 0; id < (image_id + 1) * _nTop; ++cnt, ++id) {
+            for (size_t id = image_id * _nTop; id < (image_id + 1) * _nTop; ++id) {
                std::cout.precision(7);
                // Getting probability for resulting class
                const auto index = _results.at(id) + image_id * (_outTensor.get_size() / _batchSize);
@ -179,7 +179,7 @@ public:
            std::cout << std::endl << std::endl;
            printHeader();

-            for (size_t id = image_id * _nTop, cnt = 0; id < (image_id + 1) * _nTop; ++cnt, ++id) {
+            for (size_t id = image_id * _nTop; id < (image_id + 1) * _nTop; ++id) {
                std::cout.precision(7);
                // Getting probability for resulting class
                const auto result = _outTensor.data<float>();
--- a/src/bindings/c/src/ie_c_api.cpp
+++ b/src/bindings/c/src/ie_c_api.cpp
@ -196,7 +196,7 @@ std::map<IE::ColorFormat, colorformat_e> colorformat_map = {{IE::ColorFormat::RA
    CATCH_IE_EXCEPTION(INFER_NOT_STARTED, InferNotStarted)    \
    CATCH_IE_EXCEPTION(NETWORK_NOT_READ, NetworkNotRead)      \
    CATCH_IE_EXCEPTION(INFER_CANCELLED, InferCancelled)       \
-    catch (...) {                                             \
+    catch (const std::exception&) {                           \
        return IEStatusCode::UNEXPECTED;                      \
    }

--- a/src/bindings/python/CMakeLists.txt
+++ b/src/bindings/python/CMakeLists.txt
@ -21,7 +21,7 @@ endif()
 #

 set(ov_python_req "${OpenVINOPython_SOURCE_DIR}/requirements.txt")
-set(ie_python_req "cython>=0.29.32")
+set(ie_python_req "${OpenVINOPython_SOURCE_DIR}/src/compatibility/openvino/requirements-dev.txt")

 function(ov_check_python_build_conditions)
    # user explicitly specified ENABLE_PYTHON=ON
@ -72,10 +72,10 @@ function(ov_check_python_build_conditions)
    set(ov_python_req_FOUND ON)

    # check for Cython requirement for build IE API 1.0
-    ov_check_pip_package(REQUIREMENT ${ie_python_req}
-                         RESULT_VAR ie_python_req_FOUND
-                         WARNING_MESSAGE "install python3 -m install ${ie_python_req} for IE API 1.0 requirements"
-                         MESSAGE_MODE TRACE)
+    ov_check_pip_packages(REQUIREMENTS_FILE ${ie_python_req}
+                          RESULT_VAR ie_python_req_FOUND
+                          WARNING_MESSAGE "install python3 -m pip install -r ${ie_python_req} for IE API 1.0 requirements"
+                          MESSAGE_MODE TRACE)

    # cython can be installed as a debian package, so pip requirements can be unsatisfied
    # so, let's check to find cython anyway
@ -87,7 +87,7 @@ function(ov_check_python_build_conditions)
        if(CYTHON_VERSION VERSION_GREATER_EQUAL 0.29)
            set(ie_python_req_FOUND ON)
        else()
-            message(${message_mode} "Python module '${ie_python_req}' is missed, IE Python API 1.0 will not be built (ENABLE_PYTHON is OFF)")
+            message(${message_mode} "Python requirements '${ie_python_req}' are missed, IE Python API 1.0 will not be built (ENABLE_PYTHON is OFF)")
        endif()
    endif()

--- a/src/bindings/python/src/compatibility/openvino/inference_engine/CMakeLists.txt
+++ b/src/bindings/python/src/compatibility/openvino/inference_engine/CMakeLists.txt
@ -21,7 +21,8 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
    # 'argument': conversion from 'size_t' to 'int', possible loss of data
    ie_add_compiler_flags(/wd4267)
    ie_add_compiler_flags(/wd4244)
-elseif(CMAKE_COMPILER_IS_GNUCXX)
+endif()
+if(UNUSED_BUT_SET_VARIABLE_SUPPORTED)
    ie_add_compiler_flags(-Wno-unused-but-set-variable)
 endif()

--- a/src/bindings/python/src/compatibility/pyngraph/node.cpp
+++ b/src/bindings/python/src/compatibility/pyngraph/node.cpp
@ -13,7 +13,6 @@
 #include "ngraph/op/divide.hpp"
 #include "ngraph/op/multiply.hpp"
 #include "ngraph/op/subtract.hpp"
-#include "ngraph/variant.hpp"
 #include "pyngraph/node.hpp"
 #include "pyngraph/rt_map.hpp"
 #include "pyngraph/variant.hpp"
--- a/src/bindings/python/src/compatibility/pyngraph/rt_map.cpp
+++ b/src/bindings/python/src/compatibility/pyngraph/rt_map.cpp
@ -14,13 +14,12 @@
 #include "ngraph/op/divide.hpp"
 #include "ngraph/op/multiply.hpp"
 #include "ngraph/op/subtract.hpp"
-#include "ngraph/variant.hpp"
 #include "pyngraph/node.hpp"
 #include "pyngraph/variant.hpp"

 namespace py = pybind11;

-using PyRTMap = ngraph::RTMap;
+using PyRTMap = ov::RTMap;

 PYBIND11_MAKE_OPAQUE(PyRTMap);

@ -36,6 +35,6 @@ void regclass_pyngraph_PyRTMap(py::module m) {
        m[k] = v;
    });
    py_map.def("__getitem__", [](PyRTMap& m, const std::string& k) {
-        return m.at(k).as<std::shared_ptr<ngraph::Variant>>();
+        return m.at(k).as<std::shared_ptr<ov::RuntimeAttribute>>();
    });
 }
--- a/src/bindings/python/src/compatibility/pyngraph/variant.cpp
+++ b/src/bindings/python/src/compatibility/pyngraph/variant.cpp
@ -1,11 +1,6 @@
 // Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-
-#include "ngraph/variant.hpp"  // ngraph::Variant
-
-#include <pybind11/pybind11.h>
-
 #include "pyngraph/variant.hpp"

 namespace py = pybind11;
--- a/src/bindings/python/src/openvino/frontend/pytorch/decoder.py
+++ b/src/bindings/python/src/openvino/frontend/pytorch/decoder.py
@ -115,6 +115,9 @@ class TorchScriptPythonDecoder (Decoder):
    def get_input(self, index: int):
        return self.inputs()[index]

+    def get_input_debug_name(self, index: int) -> str:
+        return self._raw_input(index).debugName()
+
    def get_input_shape(self, index: int):
        raw_input = self._raw_input(index)
        return self.get_shape_for_value(raw_input)
@ -123,6 +126,9 @@ class TorchScriptPythonDecoder (Decoder):
        raw_input = self._raw_input(index)
        return self.get_type_for_value(raw_input)

+    def get_output_debug_name(self, index: int) -> str:
+        return self._raw_output(index).debugName()
+
    def get_output_shape(self, index: int):
        output = self._raw_output(index)
        return self.get_shape_for_value(output)
--- a/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp
+++ b/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp
@ -22,6 +22,10 @@ class PyDecoder : public ov::frontend::pytorch::TorchDecoder {
        PYBIND11_OVERRIDE_PURE(const std::vector<size_t>&, TorchDecoder, inputs);
    }

+    const std::string& get_input_debug_name(size_t index) const override {
+        PYBIND11_OVERRIDE_PURE(const std::string&, TorchDecoder, get_input_debug_name, index);
+    }
+
    ov::PartialShape get_input_shape(size_t index) const override {
        PYBIND11_OVERRIDE_PURE(ov::PartialShape, TorchDecoder, get_input_shape, index);
    }
@ -34,8 +38,8 @@ class PyDecoder : public ov::frontend::pytorch::TorchDecoder {
        PYBIND11_OVERRIDE_PURE(const std::vector<size_t>&, TorchDecoder, get_input_transpose_order, index);
    }

-    const std::vector<size_t>& get_output_transpose_order(size_t index) const override {
-        PYBIND11_OVERRIDE_PURE(const std::vector<size_t>&, TorchDecoder, get_output_transpose_order, index);
+    const std::string& get_output_debug_name(size_t index) const override {
+        PYBIND11_OVERRIDE_PURE(const std::string&, TorchDecoder, get_output_debug_name, index);
    }

    ov::PartialShape get_output_shape(size_t index) const override {
@ -46,6 +50,10 @@ class PyDecoder : public ov::frontend::pytorch::TorchDecoder {
        PYBIND11_OVERRIDE_PURE(ov::Any, TorchDecoder, get_output_type, index);
    }

+    const std::vector<size_t>& get_output_transpose_order(size_t index) const override {
+        PYBIND11_OVERRIDE_PURE(const std::vector<size_t>&, TorchDecoder, get_output_transpose_order, index);
+    }
+
    bool input_is_none(size_t index) const override {
        PYBIND11_OVERRIDE_PURE(bool, TorchDecoder, input_is_none, index);
    }
--- a/src/bindings/python/wheel/requirements-dev.txt
+++ b/src/bindings/python/wheel/requirements-dev.txt
@ -1,3 +1,3 @@
 setuptools>=53.0.0
 wheel>=0.38.1
-patchelf; sys_platform == 'linux' and platform_machine == 'x86_64'
+patchelf; sys_platform == 'linux' and platform_machine == 'x86_64' or sys_platform == 'linux' and platform_machine == 'aarch64'
--- a/src/common/conditional_compilation/CMakeLists.txt
+++ b/src/common/conditional_compilation/CMakeLists.txt
@ -33,9 +33,9 @@ elseif(SELECTIVE_BUILD STREQUAL "ON")
                                -Wno-unused-variable
                                -Wno-unused-parameter
                                -Wno-unused-local-typedefs)
-        if(CMAKE_COMPILER_IS_GNUCXX)
-            target_compile_options(${TARGET_NAME} INTERFACE -Wno-unused-but-set-variable)
-        endif()
+    endif()
+    if(UNUSED_BUT_SET_VARIABLE_SUPPORTED)
+        target_compile_options(${TARGET_NAME} INTERFACE -Wno-unused-but-set-variable)
    endif()

    set(GENERATED_HEADER ${CMAKE_CURRENT_BINARY_DIR}/conditional_compilation_gen.h CACHE FILEPATH "")
--- a/src/common/low_precision_transformations/include/low_precision/common/precisions_restriction.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/common/precisions_restriction.hpp
@ -10,7 +10,6 @@
 #include <vector>

 #include <ngraph/node.hpp>
-#include <ngraph/variant.hpp>

 #include <low_precision/lpt_visibility.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
--- a/src/common/low_precision_transformations/include/low_precision/common/quantization_granularity_restriction.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/common/quantization_granularity_restriction.hpp
@ -7,7 +7,6 @@
 #include <vector>

 #include <ngraph/node.hpp>
-#include <ngraph/variant.hpp>

 #include <low_precision/lpt_visibility.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
--- a/src/common/low_precision_transformations/include/low_precision/create_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/create_attribute.hpp
@ -10,7 +10,6 @@

 #include <ngraph/pass/graph_rewrite.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
-#include <ngraph/variant.hpp>
 #include "low_precision/lpt_visibility.hpp"
 #include "low_precision/base_matcher_pass.hpp"
 #include "low_precision/lpt_itt.hpp"
--- a/src/common/low_precision_transformations/include/low_precision/create_precisions_dependent_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/create_precisions_dependent_attribute.hpp
@ -8,7 +8,6 @@
 #include <vector>

 #include <ngraph/node.hpp>
-#include <ngraph/variant.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>

 #include <low_precision/lpt_visibility.hpp>
--- a/src/common/low_precision_transformations/include/low_precision/propagate_precisions.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/propagate_precisions.hpp
@ -8,7 +8,6 @@
 #include <vector>

 #include <ngraph/node.hpp>
-#include <ngraph/variant.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
 #include <low_precision/lpt_visibility.hpp>
 #include "low_precision/rt_info/attribute_parameters.hpp"
--- a/src/common/low_precision_transformations/include/low_precision/propagate_shared_value.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/propagate_shared_value.hpp
@ -9,7 +9,6 @@
 #include <vector>

 #include <ngraph/node.hpp>
-#include <ngraph/variant.hpp>

 #include <low_precision/lpt_visibility.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
--- a/src/common/low_precision_transformations/include/low_precision/propagate_through_precision_preserved.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/propagate_through_precision_preserved.hpp
@ -10,7 +10,6 @@
 #include <ngraph/node.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
-#include <ngraph/variant.hpp>

 #include "low_precision/lpt_visibility.hpp"
 #include "low_precision/network_helper.hpp"
--- a/src/common/low_precision_transformations/include/low_precision/propagate_to_input.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/propagate_to_input.hpp
@ -8,7 +8,6 @@
 #include <vector>

 #include <ngraph/node.hpp>
-#include <ngraph/variant.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>

 #include <low_precision/lpt_visibility.hpp>
--- a/src/common/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp
@ -8,7 +8,6 @@
 #include <string>
 #include <vector>
 #include <ngraph/node.hpp>
-#include <ngraph/variant.hpp>
 #include "low_precision/lpt_visibility.hpp"
 #include "low_precision/rt_info/precision_preserved_attribute.hpp"

--- a/src/common/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp
@ -8,7 +8,6 @@
 #include <string>

 #include <ngraph/node.hpp>
-#include <ngraph/variant.hpp>

 #include "low_precision/rt_info/shared_value_attribute.hpp"
 #include "low_precision/rt_info/attribute_parameters.hpp"
--- a/src/common/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp
@ -8,7 +8,6 @@
 #include <vector>

 #include <ngraph/node.hpp>
-#include <ngraph/variant.hpp>
 #include "low_precision/lpt_visibility.hpp"
 #include "low_precision/rt_info/shared_value_attribute.hpp"

--- a/src/common/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp
@ -11,7 +11,6 @@

 #include <ngraph/node.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
-#include <ngraph/variant.hpp>

 #include "low_precision/lpt_visibility.hpp"
 #include "low_precision/rt_info/attribute_parameters.hpp"
--- a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp
@ -10,7 +10,6 @@
 #include <vector>

 #include <ngraph/node.hpp>
-#include <ngraph/variant.hpp>

 #include <low_precision/lpt_visibility.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
--- a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_granularity_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_granularity_attribute.hpp
@ -5,7 +5,6 @@
 #pragma once

 #include <ngraph/node.hpp>
-#include <ngraph/variant.hpp>

 #include <low_precision/lpt_visibility.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
--- a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_mode_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_mode_attribute.hpp
@ -5,8 +5,7 @@
 #pragma once

 #include <string>
-
-#include <ngraph/variant.hpp>
+#include "openvino/core/runtime_attribute.hpp"
 #include <low_precision/lpt_visibility.hpp>

 namespace ngraph {
--- a/src/common/low_precision_transformations/include/low_precision/rt_info/shared_value_attribute.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/rt_info/shared_value_attribute.hpp
@ -9,7 +9,6 @@
 #include <vector>

 #include <ngraph/node.hpp>
-#include <ngraph/variant.hpp>

 #include <low_precision/lpt_visibility.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
--- a/src/common/low_precision_transformations/include/low_precision/update_shared_precision_preserved.hpp
+++ b/src/common/low_precision_transformations/include/low_precision/update_shared_precision_preserved.hpp
@ -8,7 +8,6 @@
 #include <vector>

 #include <ngraph/pass/pass.hpp>
-#include <ngraph/variant.hpp>

 #include "low_precision/network_helper.hpp"
 #include "low_precision/lpt_itt.hpp"
--- a/src/common/low_precision_transformations/src/rt_info/avg_pool_precision_preserved_attribute.cpp
+++ b/src/common/low_precision_transformations/src/rt_info/avg_pool_precision_preserved_attribute.cpp
@ -6,7 +6,6 @@

 #include <memory>
 #include <vector>
-#include <ngraph/variant.hpp>

 using namespace ngraph;
 using namespace ov;
--- a/src/common/low_precision_transformations/src/rt_info/quantization_mode_attribute.cpp
+++ b/src/common/low_precision_transformations/src/rt_info/quantization_mode_attribute.cpp
@ -4,6 +4,7 @@

 #include "low_precision/rt_info/quantization_mode_attribute.hpp"
 #include <assert.h>
+#include <sstream>

 using namespace ngraph;
 using namespace ov;
--- a/src/common/offline_transformations/src/pruning/mask_attribute.cpp
+++ b/src/common/offline_transformations/src/pruning/mask_attribute.cpp
@ -6,7 +6,6 @@

 #include <functional>
 #include <ngraph/node.hpp>
-#include <ngraph/variant.hpp>
 #include <ostream>

 namespace ngraph {
--- a/src/common/offline_transformations/src/pruning/shrink_weights.cpp
+++ b/src/common/offline_transformations/src/pruning/shrink_weights.cpp
@ -33,6 +33,7 @@ static bool is_static_reshape_op(std::shared_ptr<ov::Node> node) {

    const auto input = reshape_node->input_value(0);
    const auto shape = reshape_node->input_value(1);
+
    if (input.get_partial_shape().is_dynamic() || shape.get_partial_shape().is_dynamic())
        return false;

@ -41,15 +42,13 @@ static bool is_static_reshape_op(std::shared_ptr<ov::Node> node) {
        return false;

    const auto& input_shape = input.get_shape();
-    const auto output_shape = output_shape_const_op->cast_vector<int64_t>();
+    const auto& output_shape = output_shape_const_op->cast_vector<int64_t>();
    // below casts are needed due to VC warning C4244, literals are not enough in this case
    const int64_t input_elems =
        std::accumulate(input_shape.begin(), input_shape.end(), static_cast<int64_t>(1), std::multiplies<int64_t>());
    const auto output_elems =
        std::accumulate(output_shape.begin(), output_shape.end(), static_cast<int64_t>(1), std::multiplies<int64_t>());
-    if (output_elems <= 0 || input_elems == output_elems)
-        return false;
-    return true;
+    return input_elems != output_elems;
 }

 static bool maybe_adopt_reshape_node(std::shared_ptr<ov::Node> reshape, ngraph::Mask::Ptr mask) {
@ -60,13 +59,30 @@ static bool maybe_adopt_reshape_node(std::shared_ptr<ov::Node> reshape, ngraph::
        return false;
    }

-    auto sub_const_vector = std::vector<int64_t>();
-    for (auto& dim : *mask.get())
-        sub_const_vector.push_back(dim.size());
+    const auto constant = get_constant_from_source(shape);
+    if (!constant) {
+        return false;
+    }
+    const auto new_shape = constant->cast_vector<int64_t>();
+    std::vector<int64_t> sub_const_vector;
+    sub_const_vector.reserve(mask->size());
+    bool all_zeros = true;
+    for (size_t i = 0; i < mask->size(); i++) {
+        if (new_shape[i] <= 0) {
+            sub_const_vector.push_back(0);
+        } else {
+            all_zeros = all_zeros && mask->at(i).size() == 0;
+            sub_const_vector.push_back(mask->at(i).size());
+        }
+    }
+
+    if (all_zeros)
+        return true;

    const auto sub_const = ngraph::opset6::Constant::create(shape.get_element_type(), {mask->size()}, sub_const_vector);
    const auto sub = std::make_shared<ngraph::opset6::Subtract>(shape, sub_const);
    consumers.begin()->replace_source_output(sub);
+    copy_runtime_info(shape.get_node_shared_ptr(), {sub_const, sub});

    NGRAPH_DEBUG << "Adopting values in (" << shape.get_node()->get_friendly_name() << ")"
                 << " by substracting " << vec_to_str(sub_const_vector);
@ -182,7 +198,8 @@ bool ngraph::pass::ShrinkWeights::run_on_model(const std::shared_ptr<ngraph::Fun
        if (!mask && init_mask)
            NGRAPH_DEBUG << "Mask was ruined for node:" << node->get_friendly_name() << "\nInit mask: " << *init_mask;
 #endif
-        if (is_static_reshape_op(node) && not_empty_mask(mask))
+        if (is_static_reshape_op(node) && not_empty_mask(mask) &&
+            !ov::op::util::is_constant(node->get_input_node_ptr(1)))
            if (!maybe_adopt_reshape_node(node, mask))
                continue;

--- a/src/common/snippets/README.md
+++ b/src/common/snippets/README.md
@ -0,0 +1,13 @@
+# SnippetS
+
+## Key Contacts
+
+Please contact a member of [openvino-ie-cpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-cpu-maintainers) group, for assistance regarding snippets.
+
+* [SnippetS design guide](./docs/snippets_design_guide.md)
+* [CPU target for SnippetS code generator](./docs/snippets_cpu_target.md)
+
+## See also
+ * [OpenVINO™ README](../../../README.md)
+ * [OpenVINO Core Components](../../README.md)
+ * [Developer documentation](../../../docs/dev/index.md)
--- a/src/common/snippets/docs/snippets_cpu_target.md
+++ b/src/common/snippets/docs/snippets_cpu_target.md
@ -0,0 +1,57 @@
+# CPU target for SnippetS code generator
+
+Snippets in its first generation can be seen as a generalization over generic eltwise node. First generation of snippets has lack of integration with oneDNN and so patterns it supports should be kept orthogonal to what is fused with post-ops. 
+
+POC CPU implementation could be found [here](https://github.com/openvinotoolkit/openvino/pull/2824)
+
+First 8 kernel parameters are passed by structure which is unpacked inside a kernel into the registers. The rest are passed through the stack.
+
+Loop trip count should be placed to some GP register, as well as work amount. Moreover, we need to load all the parameters into GP registers. If we assume that we have enough registers than it can be done before the loop body.
+
+```
+auto param0 = abi_params[0];
+auto param1 = abi_params[1];
+auto result = abi_params[2];
+
+auto work_amount = abi_params[3];
+```
+
+## Memory operations
+
+Load could be Vector, Scalar and Broadcast. Only native vector size for an architecture is supported (e.g. 16 on AVX-512)
+
+Memory operation also generates post increments for the pointer it uses. 
+
+- `MemoryEmitter`
+    - `StoreEmitter`
+    - `ScalarStoreEmitter`
+    - `LoadEmitter` (post increment)
+    - `BroadcastLoadEmitter`
+    - `ScalarLoadEmitter` (post increment)
+
+## Tensor blocking
+
+All inputs and outputs should be the same layout. Re-layout operations are not included in the snippets dialect. Since current scope is limited to layout-oblivious operations no specific handling for blocking is required. Extending dialect with re-layout operations is a subject of further benchmarking. The following memory representation is assumed.
+
+```
+ offset              domain                margin
+-------+-------------------------------+----------+
+|       |                               |          |
+|       |                               |          |
+|       |                               |          |
+|       |                               |          |
+-------+-------------------------------+----------+
+```
+
+Tensor data can be passed with strides.
+
+## Data section
+
+`Data` corresponds to a constant table and wraps this entity for the CPU.
+
+## See also
+ * [OpenVINO™ README](../../../../README.md)
+ * [OpenVINO SnippetS](../README.md)
+ * [OpenVINO Core Components](../../../README.md)
+ * [Developer documentation](../../../../docs/dev/index.md)
+ 
--- a/src/common/snippets/docs/snippets_design_guide.md
+++ b/src/common/snippets/docs/snippets_design_guide.md
@ -0,0 +1,301 @@
+# SnippetS design guide
+This document describes the design and rationale for snippets code generator. Implementation of code functionality is located [here](https://github.com/openvinotoolkit/openvino/tree/master/src/common/snippets). Proposal for CPU backend integration is [here](https://github.com/openvinotoolkit/openvino/pull/2824).
+
+## Rationale
+
+We believe that core **CNN operators (convolution, gemm, fully connected) are limited by compute, the rest is memory bound**. Math approximations (like transcendental functions) are rare in emerging workloads and could be treated with the same machinery. **Snippets are designed to optimize topology for memory**, while leaving compute intensive kernels for backend developers.
+
+We believe **potential speedup is proportional to shrink in memory-walked bytes**. So we can transform the problem to a task to optimize for memory walks, whatever pattern snippet has and operations it contains. Number of memory walks should be less or equal to handcrafted optimizations. This guarantees performance improvements over the previous approach (excluding corner cases caused by cache effects). *Shrinkage factor might be encoded to some cost function in future evolution of code generator*. Snippets generator provides diagnostics to estimate this shrinkage factor with `ngraph::snippets::op::Subgraph::print_statistics(bool verbose)` member.
+
+We design SnippetS generator for back-end developers. The main purpose of inventing snippets code generator is an **operator fusion**, **register allocation** and **target kernel generation** decomposition. This allows modifications (like new fusion support) and feature extensions (like new operation support) to be done in a single point of modification and avoid combinatorial explosion for fusions/types/architectures etc.
+
+We believe that creating a full-fledged compiler or usage of existing compiler infrastructure (like LLVM & MLIR) is superfluous at this point of evelition. We aim to provide a **flexible and performant framework for operation fusions**, leaving micro optimizations (e.g. instruction scheduling) to the backend H/W.
+
+We do not aim to invent a DSL for SnippetS and would like to keep it this way. DSL gives users more flexibility to express uncommon operations. However, the shift towards an approach to encode topologies with elementary operations followed by smart enough fusions is already expressive and performant enough.
+
+**Snippet** is a compiled compute **kernel** generated from a subgraph using SnippetS code generator for specific architecture with a **scheduling domain**. Using this scheduling domain and calling convention backend can execute generated compute kernels. For the first generation, snippets are **statically scheduled towards the output domain**. Multi-output snippets are supported if all outputs are broadcast-compatible in a sense that domains for all outputs can be broadcasted from one root domain which defines snippet schedule. It’s a subject of extension for future generations.
+
+We use nGraph as the highest level IR for subgraph representation and lowering transformations. **Opset1** is a base operation set for code generation. We aim to **keep the minimal possible and sufficient operation set** (or ISA) and keep it **RISC-like** (memory and compute decomposed).
+
+**One subgraph corresponds to one snippet**. Operations which cannot be scheduled by a single schedule should not be placed in the same subgraph. Snippet somewhat conceptually close to OpenCL kernel without a restriction to express only embarrassingly parallel tasks.
+**Subgraph** once extracted from full topology IR is **treated as an operation and data flow descriptor in scalar notation** (similar to OpenCL/CUDA). Tensor sizes are used for defining scheduling domain and detecting broadcasts/reductions.
+
+We split operations into 3 groups: **layout-oblivious (LOO), layout-aware(-tolerant) and layout-dependent**. **Layout-oblivious** operation semantics and implementation are completely agnostic to a specific layout in which tensors are placed in memory. For example, elements-wise math and ReLU does in this category. Implementation **layout-aware** operation depends on the layout of input/output tensors. For example, convolutions and other block-wise kernels or layout repaks. For **layout-specific** operation semantics and implementation depends on the layout. For example, the Yolo region. Patterns to fuse constructed in terms of taxonomy above.
+
+## Design
+
+Code generation is split into 2 phases, **tokenization** and **lowering**.
+
+### Tokenization
+
+Tokenization runs on full topology nGraph function inside a specific plugin in a stage of common transformations. Input of tokenization is a topology graph. Output is a modified topology graph with `ngraph::snippets::op::Subgraph` operations installed. Each subgraph contains nGraph function (called **body**) which holds a part of original topology legal for snippet generation (can be scheduled with a single schedule) 
+
+Procedure of finding subgraphs suitable for code generation is called **tokenization**, meaning that we split the topology tree into subgraphs in the same greedy approach which is used for parsing input stream of characters into the tokens. It also could be seen as and modified into a basic block construction problem, since we also find a leader and potentially terminators. Implementation can be found [here](https://github.com/openvinotoolkit/openvino/blob/master/src/common/snippets/src/pass/collapse_subgraph.cpp).
+
+Tokenization has an advantage over the pattern matching approach (used in traditional and MLIR-based compilers) since it can handle arbitrary patterns of operations. Pattern matching deduces specific configuration of operations to translate to another one, more suitable for target machine or further lowering. This means that relations between operations are fixed. Tokenization on the other hand has the only limitation on specific operation types which are **suitable and profitable** to fuse with respect to original topology correctness (keeping it as a direct acyclic graph).
+
+The extracted body comes to a plug-in wrapped as a composite `Subgraph` operation which is seen as a block box from a plugin standpoint and can participate in any plugin specific subroutines (e.g. layout assignment, memory allocation, etc.).
+
+### Supported subgraph patterns
+
+Subgraph accepts arbitrary numbers of inputs and outputs. There is 1:1 mapping for external (subgraph node’s) and internal (body) parameters indexes. 
+
+Pattern here is an exact subgraph configuration (nodes and edges between them). **The first generation of snippets supports only layout-oblivious operations which may have broadcast on inputs and broadcast-compatible outputs**. For example Shapes `<1, 42, 17, 31>`, `<1, 42, 17, 1>` and `<1, 42, 1, 31>` are considered as broadcast-compatible. Layout-oblivious operation with multiple outputs as a snippet leader and forms a new subgraph. The most beneficial patterns are subgraphs with complex control flow but minimal number of inputs/and outputs. For example, GeLU has a 5x shrinkage factor from original unfused subgraph in number of bytes walked. Subgraph below could be considered as an example of such a subgraph. Leader detection procedure aims to find such subgraphs.
+
+```mermaid
+ flowchart LR
+    nodeA1(...) --> nodeA2(Add)
+    nodeA2(Add) --> nodeA3(Add)
+    nodeA2(Add) --> nodeA5(Multiply)
+    nodeA3(Add) --> nodeA4(Clamp)
+    nodeA4(Clamp) --> nodeA5(Multiply)
+    nodeA5(Multiply) --> nodeA6(...)
+classDef no-bg-color fill:none,stroke-width:0px
+classDef steel1 fill:#B9D6E5, stroke: #86B3CA, color: #262626
+classDef daisy1 fill:#FFE17A, stroke: #FEC91B, color: #262626
+class nodeA1,nodeA6 no-bg-color
+class nodeA2,nodeA3 daisy1
+class nodeA4,nodeA5 steel1
+class nodeA3 steel1
+```
+
+Operations are greedily added to the subgraph until
+1. New operation doesn’t introduce a loop in a topology function.
+1. Number of inputs and outputs satisfies target criteria.
+1. Operation is not a predecessor of topology output.
+1. Resulting subgraph can be scheduled (all outputs are broadcast-compatible). 
+
+If a potential subgraph doesn’t meet any of criteria above, the procedure continues to find a new leader.
+
+### Lowering
+
+Lowering is a sequence of subgraph (snippet body) traversal passes to generate a compute kernel out of subgraphs of operations extracted by tokenization.
+
+1. Common optimizations
+1. Canonicalization
+    1. Domain normalization
+    1. Conversion to snippets dialect
+1. Target-specific optimizations
+1. Register allocation
+1. Schedule generation
+1. Target code emission
+
+#### Common optimizations
+
+Constants are treated as inputs for a subgraph with an exception for scalar cases (since we don’t need to schedule them). `snippets::op::Scalar` is used to represent this kind of constants.
+
+If such Scalar comes as a second input of Power operation, it’s replaced with `snippets::op::PowerStatic`.
+
+#### Canonicalization
+
+The goal of this step is to apply target independent and schedule related optimizations and to make snippet **schedulable**.
+
+##### Domain normalization
+
+All input and output shapes are normalized to 6D for future schedule generation. If shape propagation fails or leads to inconsistent output shapes an exception is raised.
+
+Layout assigned by user code and passed to a `generate` function is propagated through subgraph on this step as well. Layout is passed to a generate function as a `BlockedShapeVector` which is a `std::vector<BlockedShape>` , while `BlockedShape` is `std::tuple<ngraph::Shape, ngraph::AxisVector, ngraph::element::Type>`. For example, if backend supports `NCHW16c` layout and tensor has size of `<1, 42, 17, 31>` and hold single precision floating point this structure should be `std::make_tuple(ngraph::Shape {1, 3, 17, 31, 16}, ngraph::AxisVector {0, 1, 2, 3, 1}, ngraph::element::f32);`. This allows generic layout representation.
+
+##### Dialect conversion
+
+The goal for this step is to transform a subgraph (body function) into a form possible to code generation. Input for this step is subgraph in a canonical form output is a subgraph in snippets dialect.
+
+Snippet or kernel is formed around the subgraph body in a sequence of traversal steps. Let’s walk through these steps with the smallest possible subgraph which contains out of single `[Add]` operation. 
+
+While we extract subgraphs with the tokenization part we explicitly insert Parameters and Results to its body to form a complete nGraph Function.
+
+```mermaid
+flowchart LR
+    nodeA1(Parameter) --> nodeA2(Add)
+    nodeA3(Parameter) --> nodeA2(Add)
+    nodeA2(Add) --> nodeA5(Result)
+classDef moss1 fill:#D7F3A2, stroke: #B1D272, color: #262626
+classDef steel1 fill:#B9D6E5, stroke: #86B3CA, color: #262626
+classDef daisy1 fill:#FFE17A, stroke: #FEC91B, color: #262626
+class nodeA2 daisy1
+class nodeA5 moss1
+class nodeA8 steel1
+class nodeA1,nodeA3 steel1
+```
+
+This function represents operation dependencies in scalar (similar to OpenCL) notation while shapes of tensors are used to generate schedules. At this point kernel-schedule decomposition is made (similar to Halide/OpenCL/TVM)
+
+###### Explicit memory operations
+
+As a next step explicit memory operations are placed for each input and output. `InsertLoad` and `InsertStore` passes derived from `MatcherPass`.
+
+```mermaid
+flowchart LR
+    nodeA1(Parameter) --> nodeA6(Load)
+    nodeA6(Load) --> nodeA2(Add)
+    nodeA3(Parameter) --> nodeA7(Load)
+    nodeA7(Load) --> nodeA2(Add)
+    nodeA2(Add) --> nodeA8(Store)
+    nodeA8(Store) --> nodeA5(Result)
+classDef carbon1 fill:#E9E9E9, stroke: #AEAEAE, color: #262626
+classDef moss1 fill:#D7F3A2, stroke: #B1D272, color: #262626
+classDef steel1 fill:#B9D6E5, stroke: #86B3CA, color: #262626
+classDef daisy1 fill:#FFE17A, stroke: #FEC91B, color: #262626
+class nodeA2 daisy1
+class nodeA5 moss1
+class nodeA8 carbon1
+class nodeA1,nodeA3,nodeA6,nodeA7 steel1
+```
+
+By default, memory operations assumes vector memory access, if scalar access is needed special passes `ReplaceLoadsWithScalarLoads` and `ReplaceStoresWithScalarStores`  should be executed.
+
+###### Explicit broadcast
+
+For each operation in body function inputs are checked against broadcasting. In case of parameters to be broadcasted explicit broadcast operation is generated. For example, if for the subgraph above we have `<1, 42, 17, 31>` and `<1, 42, 17, 1>` resulting subgraph is going to be
+
+```mermaid
+flowchart LR
+    nodeA1("Parameter\n<1, 42, 17, 1>") --> node6("Load\n<1, 42, 17, 1>")
+    node6("Load\n<1, 42, 17, 1>") --> nodeA9("BroadcastMove\n<1, 42, 17, 31>")
+    nodeA9("BroadcastMove\n<1, 42, 17, 31>") --> nodeA2(Add)
+    nodeA3("Parameter\n<1, 42, 17, 31>") --> nodeA7("Load\n<1, 42, 17, 31>")
+    nodeA7("Load\n<1, 42, 17, 31>") ---> nodeA2(Add)
+    nodeA2(Add) --> nodeA8("Store\n<1, 42, 17, 31>")
+    nodeA8("Store\n<1, 42, 17, 31>") --> nodeA5("Result\n<1, 42, 17, 31>")
+classDef carbon1 fill:#E9E9E9, stroke: #AEAEAE, color: #262626
+classDef moss1 fill:#D7F3A2, stroke: #B1D272, color: #262626
+classDef steel1 fill:#B9D6E5, stroke: #86B3CA, color: #262626
+classDef daisy1 fill:#FFE17A, stroke: #FEC91B, color: #262626
+class nodeA2 daisy1
+class nodeA5 moss1
+class nodeA8,nodeA9 carbon1
+class nodeA1,nodeA3,node6,nodeA7 steel1
+```
+
+If load followed by broadcast is detected then this pair is replaced by a single Broadcast load instruction. Like the following
+
+```mermaid
+flowchart LR
+    nodeA1(Parameter) --> nodeA6(BroadcastLoad)
+    nodeA6(BroadcastLoad) --> nodeA2(Add)
+    nodeA3(Parameter) --> nodeA7(Load)
+    nodeA7(Load) --> nodeA2(Add)
+    nodeA2(Add) --> nodeA8(Store)
+    nodeA8(Store) --> nodeA5(Result)
+classDef carbon1 fill:#E9E9E9, stroke: #AEAEAE, color: #262626
+classDef moss1 fill:#D7F3A2, stroke: #B1D272, color: #262626
+classDef steel1 fill:#B9D6E5, stroke: #86B3CA, color: #262626
+classDef daisy1 fill:#FFE17A, stroke: #FEC91B, color: #262626
+class nodeA2 daisy1
+class nodeA5 moss1
+class nodeA8 carbon1
+class nodeA1,nodeA3,nodeA6,nodeA7 steel1
+```
+
+Broadcast and regular streaming vector load is possible from the same pointer. Broadcast load should always go before streaming load. Broadcast load for non the most varying dimension is not generated, however it affects the generated schedule.
+
+#### Target-specific optimizations
+
+Target developers can plug in to the code generation pipeline some specific optimizations with passing `ngraph::pass::Manager` into `generate` function of `subgraph`. **Passes are executed on subgraph in canonical form converted to a snippet dialect**.
+
+*It might be also extended to provide an interface for target independent optimizations in future*
+
+#### Register allocation
+
+Canonicalized subgraph in a snippets dialect forms a basic block or region inside a snippet (kernel). Registers are allocated globally for the whole subgraph. Since all operations for a subgraph are assumed to be vector, only vector registers are allocated for the first generation of SnippetS. Linear scan register allocation algorithm is used. Register allocator is implemented as a function pass `ngraph::snippets::pass::AssignRegisters` and store allocated registers for each node into `rt_info`. `rt_info` for a node holds a register for Node's output. *However, this part should be refactored batter, either to become target independent or use target specific abstraction to acquire a new register*
+
+#### Schedule generation 
+
+The goal of this step is to transform subgraphs in a scalar notation into kernel functions callable from user code. `Kernel` and `Tile` operations are introduced for this purpose. Each of this operation has a constructor from code region described as a collection of operation and operands pairs `Kernel(const std::vector<std::pair<std::shared_ptr<ngraph::snippets::Emitter>, ngraph::snippets::RegInfo>>& region);`. 
+
+If we return to example above this comes to a following hierarchical IR. If we limit scope to layout oblivious operations with broadcasting support, tile could be generated as a single loop over the most warning dimension. The second `Tile` is generated to handle tails and can be omitted if not needed. Special pass replaces memory operations on vector to scalar versions for tail subgraph. 
+
+```mermaid
+graph LR
+subgraph subgraphD1[ ]
+nodeD1(Data)
+end
+subgraph subgraphC1[Kernel]
+direction LR
+subgraph subgraphA1[Tile]
+nodeA1(Parameter) --> nodeA6(Load)
+nodeA6(Load) --> nodeA2(Add)
+nodeA3(Parameter) --> nodeA7(Load)
+nodeA7(Load) --> nodeA2(Add)
+nodeA2(Add) --> nodeA8(Store)
+nodeA8(Store) --> nodeA5(Result)
+end
+subgraph subgraphB1[Tile]
+nodeB1(Parameter) --> nodeB6(ScalarLoad)
+nodeB6(ScalarLoad) --> nodeB2(Add)
+nodeB3(Parameter) --> nodeB7(ScalarLoad)
+nodeB7(ScalarLoad) --> nodeB2(Add)
+nodeB2(Add) --> nodeB8(ScalarStore)
+nodeB8(ScalarStore) --> nodeB5(Result)
+end
+end
+classDef no-stroke fill:none,stroke-width:0px
+classDef no-bg-color fill:none,stroke-width:1px,stroke:#86B3CA
+classDef carbon1 fill:#E9E9E9, stroke: #AEAEAE, color: #262626
+classDef moss1 fill:#D7F3A2, stroke: #B1D272, color: #262626
+classDef steel1 fill:#B9D6E5, stroke: #86B3CA, color: #262626
+classDef daisy1 fill:#FFE17A, stroke: #FEC91B, color: #262626
+class subgraphC1,subgraphA1,subgraphB1,subgraphD1 no-bg-color
+class nodeA2,nodeB2 daisy1
+class nodeA5,nodeB5 moss1
+class nodeA8,nodeB8 carbon1
+class nodeA1,nodeA3,nodeA6,nodeA7,nodeB1,nodeB3,nodeB6,nodeB7 steel1
+class nodeD1 no-stroke
+```
+
+Where
+*  `Kernel` constants a collection of the tiles, corresponds to a Subgraph node and responsible for function signature generation, calls generators for all tiles and data sections
+* `Tile` contains single subgraph body, vector or scalar
+* `Data` corresponds to data section aggregated for all nodes in all Tile’s subgraphs
+
+#### Target code emission
+
+Target code emission is table based. Target is responsible for filling `jitters` table field in `Generator` class. 
+
+```
+std::map<const ngraph::DiscreteTypeInfo, std::function<std::shared_ptr<Emitter>(std::shared_ptr<ngraph::Node>)>> jitters;
+```
+
+##### Interface with a target
+
+An OpenVINO plugin is treated as a target for snippets.
+
+Each nGraph node is mapped to a convertor function which creates `Emitter` form this node. Each specific emitter should extend from `Emitter`. It is used to map this node to target code and has `emit_code` and `emit_data` methods. `emit_data` is used during data section generation. All operations from snippets dialect which are legal for code generation should be expressed as operations derived from nGraph Op as well as Emitter derived snippets::Emitter class which knows how to translate this Op to Target specific ISA. (ex. xbyak is a jit backend for CPU plugin).
+
+For minimal code generator support target should provide emitters for the following operations
+
+* `Kernel`
+* `Tile`
+* `Data`
+* `Load`
+* `ScalarLoad`
+* `BroadcastLoad`
+* `Store`
+* `ScalarStore`
+
+Once a schedule is generated, target code is emitted from a kernel in Generator::generate method by executing Kernel::emit_code function. Since Kernel and Tile represents hierarchical
+
+##### Dialect extensibility
+
+Target can potentially extend snippets dialect with target specific operation for code emission. It should implement:
+
+* nGraph operation (ex. `class FMA : public ngraph::op::Op`)
+* Emitter for this operation (ex. `class FmaEmitter : public Emitter` )
+* register this pair in `jitters` map
+
+### Calling convention
+
+Parameters for a generated snippet are split into schedule-invariant and schedule-dependent. Schedule-invariant parameters include pointers to input/output tensors and strides for each of them with the same rank as scheduling domain.
+
+### Diagnostics
+
+#### Reference mode
+
+Subgraph can be executed with nGraph references if no generator is present.
+
+## See also
+ * [OpenVINO™ README](../../../../README.md)
+ * [OpenVINO SnippetS](../README.md)
+ * [OpenVINO Core Components](../../../README.md)
+ * [Developer documentation](../../../../docs/dev/index.md)
+
--- a/src/common/snippets/tests/CMakeLists.txt
+++ b/src/common/snippets/tests/CMakeLists.txt
@ -23,3 +23,8 @@ ie_faster_build(${TARGET_NAME}
    UNITY
    PCH PRIVATE "src/precomp.hpp"
 )
+
+add_library(snippets_test_utils STATIC ${CMAKE_CURRENT_SOURCE_DIR}/include/lowering_utils.hpp
+                                       ${CMAKE_CURRENT_SOURCE_DIR}/src/lowering_utils.cpp)
+target_include_directories(snippets_test_utils PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_link_libraries(snippets_test_utils PRIVATE commonTestUtils snippetsNgraphFunctions)
--- a/src/common/snippets/tests/include/lowering_utils.hpp
+++ b/src/common/snippets/tests/include/lowering_utils.hpp
@ -26,7 +26,7 @@ public:

 class DummyTargetMachine : public ngraph::snippets::TargetMachine {
 public:
-    DummyTargetMachine();
+    DummyTargetMachine(const std::vector<ov::Node::type_info_t>& custom_opset = {});
    bool is_supported() const override { return true; }
    ngraph::snippets::code get_snippet() const override { return nullptr; }
    size_t get_lanes() const override { return 10; }
@ -35,6 +35,7 @@ public:
 class DummyGenerator : public ngraph::snippets::Generator {
 public:
    DummyGenerator() : ngraph::snippets::Generator(std::make_shared<DummyTargetMachine>()) {}
+    DummyGenerator(const std::shared_ptr<ngraph::snippets::TargetMachine>& t) : ngraph::snippets::Generator(t) {}
 };

 class LoweringTests : public TransformationTestsF {
@ -47,7 +48,9 @@ public:
 protected:
    static std::shared_ptr<ngraph::snippets::op::Subgraph> getSubgraph(const std::shared_ptr<Model>& f);
    static std::shared_ptr<ngraph::snippets::op::Subgraph> getLoweredSubgraph(const std::shared_ptr<Model>& f,
-                                                                              const ov::PartialShape& master_shape);
+                                                                              const ov::PartialShape& master_shape,
+                                                                              ov::pass::Manager target_optimizations = {},
+                                                                              const std::shared_ptr<ngraph::snippets::Generator> generator = nullptr);
    static std::shared_ptr<ngraph::snippets::op::Subgraph> getTokenizedSubgraph(const std::shared_ptr<Model>& f);
    ov::PartialShape master_shape{};
 };
--- a/src/common/snippets/tests/src/lowering_utils.cpp
+++ b/src/common/snippets/tests/src/lowering_utils.cpp
@ -11,7 +11,7 @@ namespace ov {
 namespace test {
 namespace snippets {

-DummyTargetMachine::DummyTargetMachine() {
+DummyTargetMachine::DummyTargetMachine(const std::vector<ov::Node::type_info_t>& custom_opset) {
    auto dummy_functor = [](const std::shared_ptr<ngraph::Node>& n) {
        return std::make_shared<DummyEmitter>();
    };
@ -41,6 +41,10 @@ DummyTargetMachine::DummyTargetMachine() {
    jitters[ngraph::snippets::op::Buffer::get_type_info_static()] = dummy_functor;
    jitters[ngraph::snippets::op::VectorBuffer::get_type_info_static()] = dummy_functor;
    jitters[ngraph::snippets::op::Fill::get_type_info_static()] = dummy_functor;
+
+    for (const auto& elem : custom_opset) {
+        jitters[elem] = dummy_functor;
+    }
 }

 LoweringTests::LoweringTests() : TransformationTestsF() {
@ -92,9 +96,11 @@ std::shared_ptr<ngraph::snippets::op::Subgraph> LoweringTests::getSubgraph(const
 }

 std::shared_ptr<ngraph::snippets::op::Subgraph> LoweringTests::getLoweredSubgraph(const std::shared_ptr<Model> &f,
-                                                                                  const ov::PartialShape& master_shape) {
+                                                                                  const ov::PartialShape& master_shape,
+                                                                                  ov::pass::Manager target_optimizations,
+                                                                                  const std::shared_ptr<ngraph::snippets::Generator> generator) {
    auto subgraph = getTokenizedSubgraph(f);
-    subgraph->set_generator(std::make_shared<DummyGenerator>());
+    subgraph->set_generator(generator == nullptr ? std::make_shared<DummyGenerator>() : generator);
    subgraph->set_master_shape(master_shape);
    const auto& body = subgraph->body_ptr();
    auto& body_rt_info = body->get_rt_info();
@ -103,19 +109,17 @@ std::shared_ptr<ngraph::snippets::op::Subgraph> LoweringTests::getLoweredSubgrap
    std::vector<std::vector<size_t>> new_shapes;
    for (const auto& p : body->get_parameters()) {
        const auto pshape = p->get_output_partial_shape(0);
-        if (pshape.is_dynamic())
-            IE_THROW() << "getLoweredSubgraph supports only static shapes";
+        OPENVINO_ASSERT(pshape.is_static(), "getLoweredSubgraph supports only static shapes");
        new_shapes.push_back(pshape.get_shape());
    }
    for (const auto& r : body->get_results()) {
        const auto pshape = r->get_input_partial_shape(0);
-        if (pshape.is_dynamic())
-            IE_THROW() << "getLoweredSubgraph supports only static shapes";
+        OPENVINO_ASSERT(pshape.is_static(), "getLoweredSubgraph supports only static shapes");
        new_shapes.push_back(pshape.get_shape());
    }
    body_rt_info["PluginShapesOverride"] = new_shapes;
    subgraph->set_tile_rank(2);
-    subgraph->generate();
+    subgraph->generate(target_optimizations);
    return subgraph;
 }

--- a/src/common/snippets/tests/src/registers.cpp
+++ b/src/common/snippets/tests/src/registers.cpp
@ -6,7 +6,6 @@

 #include <ngraph/function.hpp>
 #include <ngraph/pass/manager.hpp>
-#include <ngraph/variant.hpp>

 #include <snippets/snippets_isa.hpp>
 #include <snippets/pass/assign_registers.hpp>
--- a/src/common/transformations/src/transformations/common_optimizations/compress_float_constants.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/compress_float_constants.cpp
@ -27,8 +27,8 @@ std::shared_ptr<ov::Node> change_constant_precision_to_fp16(std::shared_ptr<ov::

    int num_out_of_range = 0;
    for (size_t i = 0; i < size; ++i) {
-        // if it is smaller than the smallest positive normal fp16 but not zero
-        if (std::abs(src_data[i]) <= ov::float16::from_bits(0x0400) && src_data[i] != 0.0f) {
+        // if abs value is smaller than the smallest positive fp16, but not zero
+        if (std::abs(src_data[i]) < ov::float16::from_bits(0x0001) && src_data[i] != 0.0f) {
            num_out_of_range++;
        } else if (src_data[i] > std::numeric_limits<ov::float16>::max()) {
            dst_data[i] = std::numeric_limits<ov::float16>::max();
--- a/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp
@ -273,13 +273,13 @@ static bool eliminate_unsqueeze(const shared_ptr<Node>& node) {

 #define ECHO(NAME) #NAME
 #define STR(NAME)  ECHO(NAME)
-#define SIMPLE_MATCHER_PASS_DEFINITION(NAME, OP, FUNC)                                  \
+#define SIMPLE_MATCHER_PASS_DEFINITION(NAME, FUNC, ...)                                 \
    class NAME : public ov::pass::MatcherPass {                                         \
    public:                                                                             \
        OPENVINO_RTTI(STR(NAME), "0");                                                  \
        NAME() {                                                                        \
            MATCHER_SCOPE(NAME);                                                        \
-            auto match_node = ov::pass::pattern::wrap_type<OP>();                       \
+            auto match_node = ov::pass::pattern::wrap_type<__VA_ARGS__>();              \
            ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {   \
                return FUNC(m.get_match_root());                                        \
            };                                                                          \
@ -288,10 +288,10 @@ static bool eliminate_unsqueeze(const shared_ptr<Node>& node) {
        }                                                                               \
    };

-SIMPLE_MATCHER_PASS_DEFINITION(EliminateReshape, opset3::Reshape, eliminate_reshape_v1);
-SIMPLE_MATCHER_PASS_DEFINITION(EliminateUnsqueeze, opset3::Unsqueeze, eliminate_unsqueeze);
-SIMPLE_MATCHER_PASS_DEFINITION(EliminateBroadcast, op::v1::Broadcast, eliminate_nop);
-SIMPLE_MATCHER_PASS_DEFINITION(EliminateGather, opset3::Gather, simplify_gather);
+SIMPLE_MATCHER_PASS_DEFINITION(EliminateReshape, eliminate_reshape_v1, opset3::Reshape);
+SIMPLE_MATCHER_PASS_DEFINITION(EliminateUnsqueeze, eliminate_unsqueeze, opset3::Unsqueeze);
+SIMPLE_MATCHER_PASS_DEFINITION(EliminateBroadcast, eliminate_nop, op::v1::Broadcast, op::v3::Broadcast);
+SIMPLE_MATCHER_PASS_DEFINITION(EliminateGather, simplify_gather, opset3::Gather);

 pass::EliminatePad::EliminatePad() {
    MATCHER_SCOPE(EliminatePad);
--- a/src/common/transformations/src/transformations/common_optimizations/push_constant_to_subgraph.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/push_constant_to_subgraph.cpp
@ -46,12 +46,12 @@ static void replace_body_parameter(const std::shared_ptr<ov::Model>& body,
 }

 static void update_multi_sub_graph_op_inputs(const std::shared_ptr<MultiSubGraphOp>& multi_sub_graph_op,
-                                             int remove_inputs_mask) {
+                                             const std::vector<bool>& remove_inputs_mask) {
    int num_subgraphs = static_cast<int>(multi_sub_graph_op->get_internal_subgraphs_size());
    auto inputs = multi_sub_graph_op->input_values();
    for (size_t i = multi_sub_graph_op->get_input_size(); i > 0; i--) {
        const auto input_index = i - 1;
-        if ((remove_inputs_mask & (1 << input_index)) != 0) {
+        if (remove_inputs_mask[input_index]) {
            // remove MultiSubGraphOp's input if it was marked to be removed
            // (meaning it was constfolded and pushed to inner subgraph)
            inputs.erase(inputs.begin() + input_index);
@ -83,7 +83,7 @@ bool ov::pass::PushConstantToSubgraph::run_on_model(const std::shared_ptr<Model>
        // cache for already constant folded inputs
        std::unordered_map<size_t, std::shared_ptr<op::v0::Constant>> cache;
        // bitmask describing which MultiSubGraphOp's input to remove
-        int remove_inputs_mask = 0;
+        std::vector<bool> remove_inputs_mask(multi_sub_graph_op->get_input_size(), false);
        int num_subgraphs = static_cast<int>(multi_sub_graph_op->get_internal_subgraphs_size());

        for (int body_idx = 0; body_idx < num_subgraphs; body_idx++) {
@ -95,7 +95,7 @@ bool ov::pass::PushConstantToSubgraph::run_on_model(const std::shared_ptr<Model>
                const auto input_index = desc->m_input_index;
                const auto constant = try_constantfold_input(multi_sub_graph_op, desc, cache);
                if (!constant) {
-                    remove_inputs_mask &= ~(1 << input_index);
+                    remove_inputs_mask[input_index] = false;
                    desc_it++;
                    continue;
                }
@ -103,12 +103,12 @@ bool ov::pass::PushConstantToSubgraph::run_on_model(const std::shared_ptr<Model>
                desc_it = descriptions.erase(desc_it);
                auto& body_param = body_params[body_parameter_index];
                replace_body_parameter(body, body_param, body_parameter_index, constant, descriptions);
-                remove_inputs_mask |= 1 << input_index;
+                remove_inputs_mask[input_index] = true;
                result = true;
            }
        }

-        if (remove_inputs_mask > 0) {
+        if (result) {
            update_multi_sub_graph_op_inputs(multi_sub_graph_op, remove_inputs_mask);
        }

--- a/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/reverse_shape_and_type_infer.cpp
@ -5,6 +5,7 @@
 #include "transformations/common_optimizations/reverse_shape_and_type_infer.hpp"

 #include "itt.hpp"
+#include "openvino/core/validation_util.hpp"
 #include "openvino/opsets/opset10.hpp"

 using namespace ov::opset10;
@ -15,7 +16,7 @@ bool ov::pass::ReverseShapeAndTypeInfer::inherit_output_shape(const std::shared_
    auto output_shape = node->get_output_partial_shape(0);

    for (auto idx : input_idxs) {
-        if (node->get_input_partial_shape(idx).rank().is_dynamic()) {
+        if (idx < node->get_input_size() && node->get_input_partial_shape(idx).rank().is_dynamic()) {
            node->get_input_tensor(idx).m_partial_shape = output_shape;
            is_changed = true;
        }
@ -43,7 +44,7 @@ bool ov::pass::ReverseShapeAndTypeInfer::inherit_output_type(const std::shared_p
    auto output_type = node->get_output_element_type(0);

    for (auto idx : input_idxs) {
-        if (node->get_input_element_type(idx).is_dynamic()) {
+        if (idx < node->get_input_size() && node->get_input_element_type(idx).is_dynamic()) {
            node->get_input_tensor(idx).m_element_type = output_type;
            is_changed = true;
        }
@ -68,11 +69,41 @@ bool ov::pass::ReverseShapeAndTypeInfer::run_on_model(const std::shared_ptr<ov::
                param->set_element_type(output_type);
                is_changed = true;
            }
-        } else if (std::dynamic_pointer_cast<Convolution>(op) ||
-                   std::dynamic_pointer_cast<GroupConvolutionBackpropData>(op) ||
-                   std::dynamic_pointer_cast<ConvolutionBackpropData>(op) ||
-                   std::dynamic_pointer_cast<GroupConvolution>(op)) {
+        } else if (std::dynamic_pointer_cast<Convolution>(op)) {
            is_changed |= inherit_output_rank(op, {0, 1});
+            // Inherit channels from weights
+            const auto& weigths_pshape = op->get_input_partial_shape(1);
+            if (weigths_pshape.rank().is_static() && op->get_input_partial_shape(1).rank().is_static() &&
+                weigths_pshape[1] != 1) {
+                op->get_input_tensor(0).m_partial_shape[1] = weigths_pshape[1];
+            }
+            is_changed |= inherit_output_type(op, {0, 1});
+        } else if (std::dynamic_pointer_cast<GroupConvolution>(op)) {
+            is_changed |= inherit_output_rank(op, {0, 1});
+            // Inherit channels from weights
+            const auto& weigths_pshape = op->get_input_partial_shape(1);
+            if (weigths_pshape.rank().is_static() && op->get_input_partial_shape(1).rank().is_static() &&
+                weigths_pshape[2] != 1) {
+                op->get_input_tensor(0).m_partial_shape[1] = weigths_pshape[0] * weigths_pshape[2];
+            }
+            is_changed |= inherit_output_type(op, {0, 1});
+        } else if (std::dynamic_pointer_cast<ConvolutionBackpropData>(op)) {
+            is_changed |= inherit_output_rank(op, {0, 1});
+            // Inherit channels from weights
+            const auto& weigths_pshape = op->get_input_partial_shape(1);
+            if (weigths_pshape.rank().is_static() && op->get_input_partial_shape(1).rank().is_static() &&
+                weigths_pshape[0] != 1) {
+                op->get_input_tensor(0).m_partial_shape[1] = weigths_pshape[0];
+            }
+            is_changed |= inherit_output_type(op, {0, 1});
+        } else if (std::dynamic_pointer_cast<GroupConvolutionBackpropData>(op)) {
+            is_changed |= inherit_output_rank(op, {0, 1});
+            // Inherit channels from weights
+            const auto& weigths_pshape = op->get_input_partial_shape(1);
+            if (weigths_pshape.rank().is_static() && op->get_input_partial_shape(1).rank().is_static() &&
+                weigths_pshape[1] != 1) {
+                op->get_input_tensor(0).m_partial_shape[1] = weigths_pshape[0] * weigths_pshape[1];
+            }
            is_changed |= inherit_output_type(op, {0, 1});
        } else if (std::dynamic_pointer_cast<DeformableConvolution>(op)) {
            is_changed |= inherit_output_rank(op, {0, 1, 2, 3});
@ -111,6 +142,140 @@ bool ov::pass::ReverseShapeAndTypeInfer::run_on_model(const std::shared_ptr<ov::
                }
            }
            is_changed |= inherit_output_type(op, {0, 1});
+        } else if (const auto& concat = std::dynamic_pointer_cast<Concat>(op)) {
+            std::vector<size_t> input_idxs(op->get_input_size());
+            std::iota(input_idxs.begin(), input_idxs.end(), 0);
+
+            auto axis = concat->get_axis();
+            if (output_shape.rank().is_static()) {
+                if (axis < 0) {
+                    axis = output_shape.rank().get_length() + axis;
+                }
+                auto input_pshape = output_shape;
+                input_pshape[axis] = Dimension::dynamic();
+                for (auto idx : input_idxs) {
+                    if (idx < op->get_input_size() && op->get_input_partial_shape(idx).rank().is_dynamic()) {
+                        op->get_input_tensor(idx).m_partial_shape = input_pshape;
+                        is_changed = true;
+                    }
+                }
+            }
+            is_changed |= inherit_output_type(op, input_idxs);
+        } else if (std::dynamic_pointer_cast<Slice>(op)) {
+            is_changed |= inherit_output_rank(op, {0});
+            is_changed |= inherit_output_type(op, {0});
+        } else if (std::dynamic_pointer_cast<Squeeze>(op)) {
+            auto in0_rank = op->get_input_partial_shape(0).rank();
+            if (output_shape.rank().is_static() && in0_rank.is_dynamic() && op->get_input_size() > 1) {
+                auto in1_pshape = op->get_input_partial_shape(1);
+                if (in1_pshape.is_static()) {
+                    auto num_dims = in1_pshape.size() == 0 ? 1 : in1_pshape[0].get_length();
+                    op->get_input_tensor(0).m_partial_shape =
+                        PartialShape::dynamic(output_shape.rank().get_length() + num_dims);
+                }
+            }
+            is_changed |= inherit_output_type(op, {0});
+        } else if (std::dynamic_pointer_cast<Unsqueeze>(op)) {
+            auto in0_rank = op->get_input_partial_shape(0).rank();
+            auto in1_pshape = op->get_input_partial_shape(1);
+            if (output_shape.rank().is_static() && in0_rank.is_dynamic() && in1_pshape.is_static()) {
+                auto num_dims = in1_pshape.size() == 0 ? 1 : in1_pshape[0].get_length();
+                op->get_input_tensor(0).m_partial_shape =
+                    PartialShape::dynamic(output_shape.rank().get_length() - num_dims);
+            }
+            is_changed |= inherit_output_type(op, {0});
+        } else if (const auto& if_op = std::dynamic_pointer_cast<If>(op)) {
+            auto then_body = if_op->get_then_body();
+            auto else_body = if_op->get_else_body();
+            // First set types and shapes to Result nodes
+            const auto& then_body_results = then_body->get_results();
+            const auto& else_body_results = else_body->get_results();
+            const auto& then_out_desc = if_op->get_output_descriptions(If::THEN_BODY_INDEX);
+            const auto& else_out_desc = if_op->get_output_descriptions(If::ELSE_BODY_INDEX);
+            for (const auto& out_desc : then_out_desc) {
+                const auto& out_indx = out_desc->m_output_index;
+                const auto& body_indx = out_desc->m_body_value_index;
+                then_body_results[body_indx]->get_input_tensor(0).m_partial_shape =
+                    if_op->get_output_partial_shape(out_indx);
+                then_body_results[body_indx]->get_input_tensor(0).m_element_type =
+                    if_op->get_output_element_type(out_indx);
+            }
+            for (const auto& out_desc : else_out_desc) {
+                const auto& out_indx = out_desc->m_output_index;
+                const auto& body_indx = out_desc->m_body_value_index;
+                else_body_results[body_indx]->get_input_tensor(0).m_partial_shape =
+                    if_op->get_output_partial_shape(out_indx);
+                else_body_results[body_indx]->get_input_tensor(0).m_element_type =
+                    if_op->get_output_element_type(out_indx);
+            }
+            is_changed |= run_on_model(then_body);
+            is_changed |= run_on_model(else_body);
+            auto then_body_params = then_body->get_parameters();
+            auto else_body_params = else_body->get_parameters();
+            const auto& then_in_desc = if_op->get_input_descriptions(If::THEN_BODY_INDEX);
+            const auto& else_in_desc = if_op->get_input_descriptions(If::ELSE_BODY_INDEX);
+            for (const auto& in_desc : then_in_desc) {
+                const auto& in_indx = in_desc->m_input_index;
+                const auto& body_indx = in_desc->m_body_parameter_index;
+                if (if_op->get_input_tensor(in_indx).get_partial_shape().rank().is_dynamic()) {
+                    if_op->get_input_tensor(in_indx).m_partial_shape =
+                        then_body_params.at(body_indx)->get_partial_shape();
+                    is_changed = true;
+                }
+                if (if_op->get_input_tensor(in_indx).get_element_type().is_dynamic()) {
+                    if_op->get_input_tensor(in_indx).m_element_type =
+                        then_body_params.at(body_indx)->get_element_type();
+                    is_changed = true;
+                }
+            }
+            for (const auto& in_desc : else_in_desc) {
+                const auto& in_indx = in_desc->m_input_index;
+                const auto& body_indx = in_desc->m_body_parameter_index;
+                if (if_op->get_input_tensor(in_indx).get_partial_shape().rank().is_dynamic()) {
+                    if_op->get_input_tensor(in_indx).m_partial_shape =
+                        else_body_params.at(body_indx)->get_partial_shape();
+                    is_changed = true;
+                }
+                if (if_op->get_input_tensor(in_indx).get_element_type().is_dynamic()) {
+                    if_op->get_input_tensor(in_indx).m_element_type =
+                        else_body_params.at(body_indx)->get_element_type();
+                    is_changed = true;
+                }
+            }
+            // Set type for If condition
+            if (if_op->get_input_element_type(0).is_dynamic()) {
+                if_op->get_input_tensor(0).m_element_type = element::boolean;
+                is_changed = true;
+            }
+        } else if (std::dynamic_pointer_cast<ConvertLike>(op)) {
+            is_changed |= inherit_output_shape(op, {0});
+            is_changed |= inherit_output_type(op, {1});
+        } else if (std::dynamic_pointer_cast<Transpose>(op)) {
+            auto transpose_order = get_constant_from_source(op->input_value(1));
+            if (output_shape.rank().is_static()) {
+                if (transpose_order) {
+                    // set more precise dimensions during reverse infer
+                    // if transpose order is known
+                    int64_t rank_length = output_shape.rank().get_length();
+                    op->get_input_tensor(0).m_partial_shape = PartialShape::dynamic(output_shape.rank());
+                    auto order_value = transpose_order->cast_vector<int64_t>();
+                    OPENVINO_ASSERT(order_value.size() == static_cast<size_t>(rank_length),
+                                    "The length of Transpose order and the input rank mismatch");
+                    for (int64_t dim_idx = 0; dim_idx < rank_length; ++dim_idx) {
+                        OPENVINO_ASSERT(0 <= order_value[dim_idx] && order_value[dim_idx] < rank_length,
+                                        "Transpose order is out-of-range");
+                        op->get_input_tensor(0).m_partial_shape[order_value[dim_idx]] = output_shape[dim_idx];
+                    }
+                    is_changed = true;
+                } else {
+                    is_changed |= inherit_output_rank(op, {0});
+                }
+            } else if (transpose_order) {
+                auto order_value = transpose_order->cast_vector<int64_t>();
+                op->get_input_tensor(0).m_partial_shape = PartialShape::dynamic(order_value.size());
+                is_changed = true;
+            }
+            is_changed |= inherit_output_type(op, {0});
        }
    }
    return is_changed;
--- a/Show More
+++ b/Show More