Merge branch 'master' into river/cpu_plugin_api_2.0

2023-07-02 09:25:02 +08:00 · 2023-07-02 09:25:02 +08:00 · 91fe9fa5df
commit 91fe9fa5df
parent 8871a29c25 deb6231329
292 changed files with 14774 additions and 6355 deletions
--- a/.ci/azure/linux.yml
+++ b/.ci/azure/linux.yml
@ -449,6 +449,10 @@ jobs:
      python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml
    displayName: 'Model Optimizer UT'

+  - script: |
+      python3 -m pytest -s $(REPO_DIR)/tools/ovc/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-OpenVinoConversion.xml
+    displayName: 'OpenVino Conversion UT'
+
  - script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_cpu_func_tests --gtest_filter=*smoke* --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_cpu_func_tests.xml
    displayName: 'CPU FuncTests'
    condition: and(succeeded(), eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'OFF'))
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@ -99,6 +99,7 @@
 /tools/legacy/ @openvinotoolkit/openvino-samples-maintainers
 /tools/openvino_dev/ @openvinotoolkit/openvino-tools-maintainers @openvinotoolkit/openvino-ie-python-api-maintainers
 /tools/mo/  @openvinotoolkit/openvino-mo-maintainers
+/tools/ovc/  @openvinotoolkit/openvino-mo-maintainers
 /tools/pot/  @openvinotoolkit/openvino-pot-maintainers
 /thirdparty/open_model_zoo/ @openvinotoolkit/omz-maintainers @openvinotoolkit/openvino-pot-maintainers

--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@ -87,6 +87,7 @@

 'category: MO':
 - 'tools/mo/**/*'
+- 'tools/ovc/**/*'

 'category: ONNX FE':
 - 'src/frontends/onnx/**/*'
--- a/cmake/packaging/debian.cmake
+++ b/cmake/packaging/debian.cmake
@ -87,7 +87,7 @@ macro(ov_cpack_settings)
        # - 2022.1.1, 2022.2 do not have debian packages enabled, distributed only as archives
        # - 2022.3 is the first release where Debian updated packages are introduced, others 2022.3.X are LTS
        2022.3.0 2022.3.1 2022.3.2 2022.3.3 2022.3.4 2022.3.5
-        2023.0.0
+        2023.0.0 2023.0.1
        )

    #
--- a/cmake/packaging/rpm.cmake
+++ b/cmake/packaging/rpm.cmake
@ -73,7 +73,7 @@ macro(ov_cpack_settings)
        # - 2022.1.1, 2022.2 do not have rpm packages enabled, distributed only as archives
        # - 2022.3 is the first release where RPM updated packages are introduced, others 2022.3.X are LTS
        2022.3.0 2022.3.1 2022.3.2 2022.3.3 2022.3.4 2022.3.5
-        2023.0.0
+        2023.0.0 2023.0.1
        )

    find_host_program(rpmlint_PROGRAM NAMES rpmlint DOC "Path to rpmlint")
--- a/docs/_static/css/custom.css
+++ b/docs/_static/css/custom.css
@ -70,6 +70,7 @@ ul#navbar-main-elements li:first-of-type {

 ul#navbar-main-elements > li:hover { 
    text-decoration: underline;
+    color: #fff;
 }


--- a/docs/_static/css/homepage_style.css
+++ b/docs/_static/css/homepage_style.css
@ -3,13 +3,13 @@
 #openvino-documentation > h1 {
    display: none;
 }
-img {
-    cursor: default;
-}
 h1 {
-    font-size: var(--pst-font-size-h2);
-    margin-bottom: 3rem;
+    /*font-size: var(--pst-font-size-h2);*/
+    /*margin-bottom: 3rem;*/
+    display: none!important;
 }
+
+
 #ov-homepage-banner, .openvino-diagram, .ov-homepage-higlight-grid {
    margin-bottom: 90px!important;
 }
--- a/docs/_static/download/OV_2023_models_supported.pdf
+++ b/docs/_static/download/OV_2023_models_supported.pdf
--- a/docs/_static/selector-tool/assets/index-89e3365b.js
+++ b/docs/_static/selector-tool/assets/index-89e3365b.js
@ -1 +0,0 @@
-const e=document.getElementById("selector");if(!e)throw new Error("cannot find selector document");window.addEventListener("message",i=>{e.style.height=i.data.height+"px"});var o,n;const t=(n=(o=e.contentDocument)==null?void 0:o.body)==null?void 0:n.offsetHeight;t&&(e.style.height=`${t}px`);
--- a/docs/_static/selector-tool/assets/index-f34d1fad.js
+++ b/docs/_static/selector-tool/assets/index-f34d1fad.js
@ -0,0 +1 @@
+const e=document.getElementById("selector");if(!e)throw new Error("cannot find selector document");window.addEventListener("message",t=>{t.origin===window.origin&&(t.data.type!=="size"||!t.data.height||(e.style.height=t.data.height+"px"))});var n,o;const i=(o=(n=e.contentDocument)==null?void 0:n.body)==null?void 0:o.offsetHeight;i&&(e.style.height=`${i}px`);
--- a/docs/_static/selector-tool/assets/selector-860516f5.js
+++ b/docs/_static/selector-tool/assets/selector-860516f5.js
--- a/docs/_static/selector-tool/selector-8db148d.html
+++ b/docs/_static/selector-tool/selector-8db148d.html
@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <html lang="en">
  <head>
-    <meta name="version" content="0290a24" />
+    <meta name="version" content="8db148d" />
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Download Intel® Distribution of OpenVINO™ Toolkit</title>
@ -9,14 +9,11 @@
      name="description"
      content="Download a version of the Intel® Distribution of OpenVINO™ toolkit for Linux, Windows, or macOS."
    />
-    <script
-      type="module"
-      crossorigin
-      src="./assets/selector-363359f4.js"
-    ></script>
-    <link rel="stylesheet" href="./assets/selector-5c3f26d1.css" />
+    <script type="module" crossorigin src="./assets/selector-860516f5.js"></script>
+    <link rel="stylesheet" href="./assets/selector-5c3f26d1.css">
  </head>
  <body>
    <div id="root"></div>
+    
  </body>
 </html>
--- a/docs/conf.py
+++ b/docs/conf.py
@ -28,6 +28,7 @@ copyright = '2023, Intel®'
 author = 'Intel®'

 language = 'en'
+version_name = 'nightly'

 # -- General configuration ---------------------------------------------------

@ -48,7 +49,13 @@ extensions = [

 html_baseurl = 'https://docs.openvino.ai/canonical/'

+# -- Sitemap configuration ---------------------------
+
 sitemap_url_scheme = "{link}"
+site_url = f'https://docs.openvino.ai/{version_name}/'
+
+# ----------------------------------------------------
+

 html_favicon = '_static/favicon.ico'
 autodoc_default_flags = ['members']
--- a/docs/home.rst
+++ b/docs/home.rst
@ -1,7 +1,6 @@
-.. OpenVINO Toolkit documentation master file, created by
-   sphinx-quickstart on Wed Jul  7 10:46:56 2021.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
+============================
+OpenVINO 2023.0
+============================

 .. meta::
   :google-site-verification: _YqumYQ98cmXUTwtzM_0WIIadtDc6r_TMYGbmGgNvrk
@ -34,8 +33,6 @@
      :align: center


-
-
 .. grid:: 2 2 3 3
   :class-container: ov-homepage-higlight-grid

@ -75,11 +72,8 @@
      Reach for performance with post-training and training-time compression with NNCF


-
-
-
 Feature Overview
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+##############################

 .. grid:: 1 2 2 2
   :class-container: ov-homepage-feature-grid
@ -109,9 +103,6 @@ Feature Overview



-
-
-
 .. toctree::
   :maxdepth: 2
   :hidden:
@ -122,4 +113,5 @@ Feature Overview
   DOCUMENTATION <documentation>
   MODEL ZOO <model_zoo>
   RESOURCES <resources>
-   RELEASE NOTES <https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html>
+   RELEASE NOTES <release_notes>
+
--- a/docs/index.rst
+++ b/docs/index.rst
@ -7,5 +7,5 @@ OpenVINO™ Documentation
   Install <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html> 
   Blog <https://blog.openvino.ai/>
   Forum <https://community.intel.com/t5/Intel-Distribution-of-OpenVINO/bd-p/distribution-openvino-toolkit>
-   Training <https://www.intel.com/content/www/us/en/developer/tools/devcloud/edge/learn/certification.html>
+   Support <https://www.intel.com/content/www/us/en/support/products/96066/software/development-software/openvino-toolkit.html>
   GitHub <https://github.com/openvinotoolkit>
--- a/docs/install_guides/configurations-for-intel-gpu.md
+++ b/docs/install_guides/configurations-for-intel-gpu.md
@ -20,8 +20,6 @@ To use a GPU device for OpenVINO inference, you must meet the following prerequi
  - `Intel® Graphics Compiler for OpenCL™ <https://github.com/intel/intel-graphics-compiler>`__
  - `OpenCL ICD loader package <https://github.com/KhronosGroup/OpenCL-ICD-Loader>`__

-.. _wsl-instal:
-
 Depending on your operating system, there may be different methods to install the above packages. Below are the instructions on how to install the packages on supported Linux distributions.

 .. tab-set::
@ -92,6 +90,8 @@ To check if the driver has been installed:

 Your device driver has been updated and is now ready to use your GPU.

+.. _wsl-install:
+
 Windows Subsystem for Linux (WSL)
 #################################

--- a/docs/install_guides/installing-openvino-docker-linux.md
+++ b/docs/install_guides/installing-openvino-docker-linux.md
@ -7,22 +7,27 @@ Supported operating systems for the Docker Base image:
 - Ubuntu 22.04 LTS
 - Ubuntu 20.04 LTS
 - RedHat UBI 8
+- Windows (WSL2)
+
+.. important::
+
+   While Windows is listed as a supported system, there is no dedicated Docker Image for it. To work with Windows, use Windows Subsystem for Linux (WSL2).

 The `Docker CI framework <https://github.com/openvinotoolkit/docker_ci/>`__ can generate a Dockerfile, build, test, and deploy an image using the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the OpenVINO™ image to your needs. You can get started easily with pre-built and published docker images. Details on how to get started can be found `here <https://github.com/openvinotoolkit/docker_ci/blob/master/get-started.md>`__.

 To start using them, the following conditions must be met:

- Linux OS or Windows Subsystem for Linux (WSL2)
+- Linux OS or Windows (under :ref:`Windows Subsystem for Linux (WSL2) <wsl-install>`)
 - Installed docker engine or compatible container engine
 - Permissions to run containers (sudo or docker group membership)

-OpenVINO's `Docker <https://docs.docker.com/>`__ and `Bare Metal <https://docs.openvino.ai/2023.0/ovms_docs_deploying_server.html#doxid-ovms-docs-deploying-server>` distributions are identical, so the documentation applies to both.
+OpenVINO's `Docker <https://docs.docker.com/>`__ and `Bare Metal <https://docs.openvino.ai/2023.0/ovms_docs_deploying_server.html#doxid-ovms-docs-deploying-server>`__ distributions are identical, so the documentation applies to both.

 .. note:: 

   The OpenVINO development environment in a docker container is also available in the `notebook repository <https://github.com/openvinotoolkit/openvino_notebooks>`__ . It can be implemented in `OpenShift RedHat OpenData Science (RHODS) <https://github.com/openvinotoolkit/operator/blob/main/docs/notebook_in_rhods.md>`__.

-ore information about Docker CI for Intel® Distribution of OpenVINO™ toolset can be found `here <https://github.com/openvinotoolkit/docker_ci/blob/master/README.md>`__
+More information about Docker CI for Intel® Distribution of OpenVINO™ toolset can be found `here <https://github.com/openvinotoolkit/docker_ci/blob/master/README.md>`__

 * `Docker CI framework for Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/README.md>`__
 * `Get Started with DockerHub CI for Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/get-started.md>`__
--- a/docs/install_guides/installing-openvino-from-archive-windows.md
+++ b/docs/install_guides/installing-openvino-from-archive-windows.md
@ -75,7 +75,7 @@ Step 1: Download and Install OpenVINO Core Components
      ``C:\Program Files (x86)\Intel`` is the recommended folder. You may also use a different path if desired or if you don't have administrator privileges on your computer.


-2. Download the `OpenVINO Runtime archive file for Windows <https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/windows/>`__ to your local ``Downloads`` folder.
+2. Download the `OpenVINO Runtime archive file for Windows <https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/windows/>`__ to your local ``Downloads`` folder.

   If you prefer using command-lines, run the following commands in the command prompt window you opened:

--- a/docs/install_guides/installing-openvino-overview.md
+++ b/docs/install_guides/installing-openvino-overview.md
@ -24,9 +24,9 @@ Install OpenVINO

 .. raw:: html

-   <script type="module" crossorigin src="_static/selector-tool/assets/index-89e3365b.js"></script>
+   <script type="module" crossorigin src="_static/selector-tool/assets/index-f34d1fad.js"></script>
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-   <iframe id="selector" src="_static/selector-tool/selector-0290a24.html" style="width: 100%; border: none" title="Download Intel® Distribution of OpenVINO™ Toolkit"></iframe>
+   <iframe id="selector" src="_static/selector-tool/selector-8db148d.html" style="width: 100%; border: none" title="Download Intel® Distribution of OpenVINO™ Toolkit"></iframe>


 OpenVINO installation package is distributed in two parts: OpenVINO Runtime and OpenVINO Development Tools.
--- a/docs/install_guides/installing-openvino-windows-header.md
+++ b/docs/install_guides/installing-openvino-windows-header.md
@ -9,6 +9,7 @@
   Use Archive <openvino_docs_install_guides_installing_openvino_from_archive_windows>
   Use PyPI <openvino_docs_install_guides_installing_openvino_pip>
   Use Conda Forge <openvino_docs_install_guides_installing_openvino_conda>
+   Use Docker <openvino_docs_install_guides_installing_openvino_docker_linux>


 If you want to install OpenVINO™ Runtime on Windows, you have the following options:
@ -16,6 +17,7 @@ If you want to install OpenVINO™ Runtime on Windows, you have the following op
 * :doc:`Install OpenVINO Runtime from an Archive File <openvino_docs_install_guides_installing_openvino_from_archive_windows>`
 * :doc:`Install OpenVINO Runtime using PyPI <openvino_docs_install_guides_installing_openvino_pip>`
 * :doc:`Install OpenVINO Runtime using Conda Forge <openvino_docs_install_guides_installing_openvino_conda>`
+* :doc:`Install OpenVINO using Docker <openvino_docs_install_guides_installing_openvino_docker_linux>`

 For a full selection of distribution channels, 
 see the `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
--- a/docs/resources/prerelease_information.md
+++ b/docs/resources/prerelease_information.md
@ -17,13 +17,23 @@ Please file a github Issue on these with the label “pre-release” so we can g
   * NOT subject to official support.
   * Subject to change in the future.
   * Introduced to allow early testing and get early feedback from the community.
- 

-.. dropdown:: OpenVINO Toolkit 2023.0.0.dev20230427
+
+
+.. dropdown:: OpenVINO Toolkit 2023.1.0.dev20230623
   :open:
   :animate: fade-in-slide-down
   :color: primary

+   The first pre-release for OpenVINO 2023.1, focused on fixing bugs and performance issues.
+
+   `Check on GitHub <https://github.com/openvinotoolkit/openvino/releases/tag/2023.1.0.dev20230623>`__ 
+   
+
+.. dropdown:: OpenVINO Toolkit 2023.0.0.dev20230407
+   :animate: fade-in-slide-down
+   :color: secondary
+
   Note that a new distribution channel has been introduced for C++ developers: `Conda Forge <https://anaconda.org/conda-forge/openvino>`__ 
   (the 2022.3.0 release is available there now).

--- a/docs/resources/release_notes.md
+++ b/docs/resources/release_notes.md
@ -0,0 +1,18 @@
+# Release Notes {#release_notes}
+
+@sphinxdirective
+
+.. raw:: html
+
+   <meta http-equiv="Refresh" content="0; url='https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html'" />
+
+
+.. toctree::
+   :hidden:
+
+   prerelease_information
+
+The official OpenVINO Release Notes are published at `intel.com <https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html>`__
+
+
+@endsphinxdirective
--- a/docs/resources/resources.md
+++ b/docs/resources/resources.md
@ -8,7 +8,6 @@
   :hidden:

   openvino_docs_performance_benchmarks
-   prerelease_information

 .. toctree::
   :maxdepth: 1
--- a/docs/resources/supported_models.md
+++ b/docs/resources/supported_models.md
@ -6,12 +6,14 @@ The OpenVINO team continues the effort to support as many models out-of-the-box
 Based on our research and user feedback, we prioritize the most common models and test them 
 before every release. These models are considered officially supported.

+
 .. button-link:: _static/download/OV_2023_models_supported.pdf
   :color: primary
   :outline:

   :material-regular:`download;1.5em` Click for supported models [PDF]

+The list is based on release 2023.0, as of June 01, 2023

 | Note that the list provided here does not include all models supported by OpenVINO.
 | If your model is not included but is similar to those that are, it is still very likely to work. 
@ -22,30 +24,6 @@ before every release. These models are considered officially supported.
 * As OpenVINO™ is open source you can enhance it with your own contribution to the GitHub repository. To learn more, see the articles on :doc:`OpenVINO Extensibility <openvino_docs_Extensibility_UG_Intro>`.


-The following table summarizes the number of models supported by OpenVINO™ in different categories:
-
-===========================================  ====================
- Model Categories:                            Number of Models:  
-===========================================  ====================
- Object Detection                             149                
- Instance Segmentation                        3                  
- Semantic Segmentation                        19                 
- Image Processing, Enhancement                16                 
- Monodepth                                    2                  
- Colorization                                 2                  
- Behavior / Decision Prediction               1                  
- Action Recognition                           2                  
- Time Series Forecasting                      1                  
- Image Classification                         68                 
- Image Classification, Dual Path Network      1                  
- Image Classification, Emotion                1                  
- Image Translation                            1                  
- Natural language Processing                  35                 
- Text Detection                               18                 
- Audio Enhancement                            3                  
- Sound Classification                         2                  
-===========================================  ====================
-
@endsphinxdirective


--- a/scripts/install_dependencies/install_openvino_dependencies.sh
+++ b/scripts/install_dependencies/install_openvino_dependencies.sh
@ -202,25 +202,25 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
    elif [ "$os" == "centos8" ] || [ "$os" == "rhel8" ] || [ "$os" == "almalinux8.7" ] ; then
        pkgs_core+=(
            "https://vault.centos.org/centos/8/AppStream/$arch/os/Packages/tbb-2018.2-9.el8.$arch.rpm"
-            "https://download-ib01.fedoraproject.org/pub/epel/8/Everything/$arch/Packages/p/pugixml-1.13-1.el8.$arch.rpm"
+            "https://dl.fedoraproject.org/pub/epel/8/Everything/$arch/Packages/p/pugixml-1.13-1.el8.$arch.rpm"
            "https://vault.centos.org/centos/8/PowerTools/$arch/os/Packages/gflags-2.1.2-6.el8.$arch.rpm"
        )
        pkgs_gpu+=("http://mirror.centos.org/centos/8-stream/AppStream/$arch/os/Packages/ocl-icd-2.2.12-1.el8.$arch.rpm")
        pkgs_python+=(python38 python38-pip)
        pkgs_dev+=(
            "https://vault.centos.org/centos/8/PowerTools/$arch/os/Packages/gflags-devel-2.1.2-6.el8.$arch.rpm"
-            "https://download-ib01.fedoraproject.org/pub/epel/8/Everything/$arch/Packages/j/json-devel-3.6.1-2.el8.$arch.rpm"
+            "https://dl.fedoraproject.org/pub/epel/8/Everything/$arch/Packages/j/json-devel-3.6.1-2.el8.$arch.rpm"
        )
        extra_repos+=("https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm")
    elif [ "$os" == "rhel9.1" ] ; then
        pkgs_core=(
            "http://mirror.stream.centos.org/9-stream/AppStream/$arch/os/Packages/tbb-2020.3-8.el9.$arch.rpm"
-            "https://download-ib01.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/p/pugixml-1.13-1.el9.$arch.rpm"
-            "https://download-ib01.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-2.2.2-9.el9.$arch.rpm"
+            "https://dl.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/p/pugixml-1.13-1.el9.$arch.rpm"
+            "https://dl.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-2.2.2-9.el9.$arch.rpm"
        )
        pkgs_gpu+=("https://mirror.stream.centos.org/9-stream/AppStream/$arch/os/Packages/ocl-icd-2.2.13-4.el9.$arch.rpm")
        pkgs_python=(python3 python3-pip)
-        pkgs_dev+=("https://download-ib01.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-devel-2.2.2-9.el9.$arch.rpm")
+        pkgs_dev+=("https://dl.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-devel-2.2.2-9.el9.$arch.rpm")
        extra_repos+=("https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm")
    fi
 elif [ "$os" == "opensuse-leap15.3" ] ; then
--- a/src/bindings/python/requirements.txt
+++ b/src/bindings/python/requirements.txt
@ -1,2 +1,3 @@
 numpy>=1.16.6
 singledispatchmethod; python_version<'3.8'
+openvino-telemetry>=2023.0.0
--- a/src/bindings/python/src/compatibility/openvino/inference_engine/ie_api_impl.cpp
+++ b/src/bindings/python/src/compatibility/openvino/inference_engine/ie_api_impl.cpp
@ -511,10 +511,8 @@ void InferenceEnginePython::IEExecNetwork::createInferRequests(int num_requests)
                    if (code != InferenceEngine::StatusCode::OK) {
                        IE_EXCEPTION_SWITCH(code,
                                            ExceptionType,
-                                            InferenceEngine::details::ThrowNow<ExceptionType>{} <<=
-                                            std::stringstream{}
-                                            << IE_LOCATION
-                                            << InferenceEngine::details::ExceptionTraits<ExceptionType>::string());
+                                            InferenceEngine::details::ThrowNow<ExceptionType>{IE_LOCATION_PARAM} <<=
+                                            std::stringstream{});
                    }

                    auto end_time = Time::now();
--- a/src/bindings/python/src/openvino/frontend/tensorflow/utils.py
+++ b/src/bindings/python/src/openvino/frontend/tensorflow/utils.py
@ -5,9 +5,9 @@
 # mypy: ignore-errors


-from openvino.tools.mo.moc_frontend.shape_utils import get_static_shape
-from openvino.tools.mo.utils.versions_checker import get_environment_setup  # pylint: disable=no-name-in-module
-from openvino.tools.mo.utils.error import Error
+from openvino.tools.ovc.moc_frontend.shape_utils import get_static_shape
+from openvino.tools.ovc.environment_setup_utils import get_environment_setup  # pylint: disable=no-name-in-module
+from openvino.tools.ovc.error import Error
 from distutils.version import LooseVersion
 import logging as log

--- a/src/bindings/python/src/openvino/runtime/init.py
+++ b/src/bindings/python/src/openvino/runtime/init.py
@ -67,6 +67,13 @@ from openvino.runtime.ie_api import tensor_from_file
 from openvino.runtime.ie_api import compile_model


+# Model Conversion API
+try:
+    from openvino.tools.ovc import convert_model, InputCutInfo, LayoutMap
+except ImportError:
+    pass
+
+
 # Extend Node class to support binary operators
 Node.__add__ = opset11.add
 Node.__sub__ = opset11.subtract
--- a/src/bindings/python/src/openvino/runtime/ie_api.py
+++ b/src/bindings/python/src/openvino/runtime/ie_api.py
@ -58,7 +58,7 @@ class InferRequest(_InferRequestWrapper):
                              Tensors for every input in form of:
                              * `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
                              Data that is going to be copied:
-                              * `numpy.ndarray` which are not C contiguous
+                              * `numpy.ndarray` which are not C contiguous and/or not writable (WRITEABLE flag is set to False)
                              * inputs which data types are mismatched from Infer Request's inputs
                              * inputs that should be in `BF16` data type
                              * scalar inputs (i.e. `np.float_`/`int`/`float`)
@ -118,7 +118,7 @@ class InferRequest(_InferRequestWrapper):
                              Tensors for every input in form of:
                              * `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
                              Data that is going to be copied:
-                              * `numpy.ndarray` which are not C contiguous
+                              * `numpy.ndarray` which are not C contiguous and/or not writable (WRITEABLE flag is set to False)
                              * inputs which data types are mismatched from Infer Request's inputs
                              * inputs that should be in `BF16` data type
                              * scalar inputs (i.e. `np.float_`/`int`/`float`)
@ -246,7 +246,7 @@ class CompiledModel(CompiledModelBase):
                              Tensors for every input in form of:
                              * `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
                              Data that is going to be copied:
-                              * `numpy.ndarray` which are not C contiguous
+                              * `numpy.ndarray` which are not C contiguous and/or not writable (WRITEABLE flag is set to False)
                              * inputs which data types are mismatched from Infer Request's inputs
                              * inputs that should be in `BF16` data type
                              * scalar inputs (i.e. `np.float_`/`int`/`float`)
@ -340,7 +340,7 @@ class AsyncInferQueue(AsyncInferQueueBase):
                              Tensors for every input in form of:
                              * `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
                              Data that is going to be copied:
-                              * `numpy.ndarray` which are not C contiguous
+                              * `numpy.ndarray` which are not C contiguous and/or not writable (WRITEABLE flag is set to False)
                              * inputs which data types are mismatched from Infer Request's inputs
                              * inputs that should be in `BF16` data type
                              * scalar inputs (i.e. `np.float_`/`int`/`float`)
--- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py
+++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/data_dispatcher.py
@ -70,6 +70,11 @@ def _(
        tensor = Tensor(tensor_type, value.shape)
        tensor.data[:] = value.view(tensor_dtype)
        return tensor
+    # WA for "not writeable" edge-case, always copy.
+    if value.flags["WRITEABLE"] is False:
+        tensor = Tensor(tensor_type, value.shape)
+        tensor.data[:] = value.astype(tensor_dtype) if tensor_dtype != value.dtype else value
+        return tensor
    # If types are mismatched, convert and always copy.
    if tensor_dtype != value.dtype:
        return Tensor(value.astype(tensor_dtype), shared_memory=False)
--- a/src/bindings/python/tests/test_runtime/test_infer_request.py
+++ b/src/bindings/python/tests/test_runtime/test_infer_request.py
@ -1112,3 +1112,33 @@ def test_mixed_dynamic_infer(device, shared_flag, input_data):
    else:
        assert not np.shares_memory(input_data[0], input_tensor0.data)
        assert not np.shares_memory(input_data[1], input_tensor1.data)
+
+
+@pytest.mark.parametrize("shared_flag", [True, False])
+@pytest.mark.parametrize(("input_data", "change_flags"), [
+    ({0: np.frombuffer(b"\x01\x02\x03\x04", np.uint8)}, False),
+    ({0: np.array([1, 2, 3, 4], dtype=np.uint8)}, True),
+])
+def test_not_writable_inputs_infer(device, shared_flag, input_data, change_flags):
+    if change_flags is True:
+        input_data[0].setflags(write=0)
+    # identity model
+    input_shape = [4]
+    param_node = ops.parameter(input_shape, np.uint8, name="data0")
+    core = Core()
+    model = Model(param_node, [param_node])
+    compiled = core.compile_model(model, "CPU")
+
+    results = compiled(input_data, shared_memory=shared_flag)
+
+    assert np.array_equal(results[0], input_data[0])
+
+    request = compiled.create_infer_request()
+    results = request.infer(input_data, shared_memory=shared_flag)
+
+    assert np.array_equal(results[0], input_data[0])
+
+    input_tensor = request.get_input_tensor(0)
+
+    # Not writable inputs should always be copied.
+    assert not np.shares_memory(input_data[0], input_tensor.data)
--- a/src/bindings/python/wheel/setup.py
+++ b/src/bindings/python/wheel/setup.py
@ -175,6 +175,18 @@ PY_INSTALL_CFG = {
        "install_dir": PY_PACKAGES_DIR,
        "binary_dir": OPENVINO_PYTHON_BINARY_DIR,
    },
+    "ovc": {
+        "entry_point": {
+            "console_scripts": [
+                "ovc = openvino.tools.ovc.main:main",
+            ],
+        },
+        "name": f"pyopenvino_{PYTHON_VERSION}",
+        "prefix": f"{BUILD_BASE}/site-packages",
+        "source_dir": f"{OPENVINO_SOURCE_DIR}/tools/ovc",
+        "install_dir": PY_PACKAGES_DIR,
+        "binary_dir": "ovc",
+    },
    # "benchmark_app": {                                                 # noqa: E731
    #     "entry_point": {                                               # noqa: E731
    #         "console_scripts": [                                       # noqa: E731
@ -187,18 +199,6 @@ PY_INSTALL_CFG = {
    #     "install_dir": PY_PACKAGES_DIR,                                # noqa: E731
    #     "binary_dir": "benchmark_app",                                 # noqa: E731
    # },                                                                 # noqa: E731
-    # "model_optimizer": {                                               # noqa: E731
-    #     "entry_point": {                                               # noqa: E731
-    #         "console_scripts": [                                       # noqa: E731
-    #             "mo = openvino.tools.mo.main:main",                    # noqa: E731
-    #         ],                                                         # noqa: E731
-    #     },                                                             # noqa: E731
-    #     "name": f"pyopenvino_{PYTHON_VERSION}",                        # noqa: E731
-    #     "prefix": f"{BUILD_BASE}/site-packages",                       # noqa: E731
-    #     "source_dir": f"{OPENVINO_SOURCE_DIR}/tools/mo",               # noqa: E731
-    #     "install_dir": PY_PACKAGES_DIR,                                # noqa: E731
-    #     "binary_dir": "model_optimizer",                               # noqa: E731
-    # },                                                                 # noqa: E731
 }


--- a/src/common/snippets/include/snippets/lowered/loop_manager.hpp
+++ b/src/common/snippets/include/snippets/lowered/loop_manager.hpp
@ -76,11 +76,19 @@ public:
                   LinearIR::constExprIt loop_end_pos,
                   size_t loop_depth, size_t vector_size);
    // Return Loop ID
+    template <typename T>
    size_t mark_loop(LinearIR::constExprIt loop_begin_pos,
                    LinearIR::constExprIt loop_end_pos,
                    size_t work_amount, size_t work_amount_increment, size_t dim_idx,
-                    const std::vector<ExpressionPort>& entries,
-                    const std::vector<ExpressionPort>& exits);
+                    const std::vector<T>& entries,
+                    const std::vector<T>& exits) {
+        const auto loop_info = std::make_shared<LoopManager::LoopInfo>(work_amount, work_amount_increment, dim_idx, entries, exits);
+        const auto loop_id = this->add_loop_info(loop_info);
+        for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) {
+            insert_loop_id(*expr_it, loop_id);
+        }
+        return loop_id;
+    }

    void fuse_loops(const LinearIR& linear_ir, size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper = true);
    void fuse_loops(LinearIR::constExprIt loop_begin_target, LinearIR::constExprIt loop_end_target,
@ -123,6 +131,8 @@ public:
                                LinearIR::constExprIt& loop_end_pos,
                                size_t loop_id, bool loop_ops_inserted = false);

+    LoopPort get_loop_port_by_expr_port(const ExpressionPort& expr_port, const size_t loop_id);
+
 private:
    static void get_io_loop_ports(LinearIR::constExprIt loop_begin_pos,
                                  LinearIR::constExprIt loop_end_pos,
--- a/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp
+++ b/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp
@ -42,6 +42,12 @@ public:
    FuseLoops();
    bool run(LinearIR& linear_ir) override;

+    // This method checks that all ports which connect lower and upper loops are incremented.
+    // This helps to avoid fusing for the ports with incompleted data
+    static bool loop_ports_are_compatible(const LinearIR::LoopManagerPtr& loop_manager,
+                                          const size_t loop_lower_id,
+                                          const size_t loop_upper_id);
+
 private:
    static bool can_be_fused(const LinearIR::LoopManager::LoopInfoPtr& loop_current,
                             const LinearIR::LoopManager::LoopInfoPtr& loop_target);
--- a/src/common/snippets/include/snippets/lowered/pass/split_loops.hpp
+++ b/src/common/snippets/include/snippets/lowered/pass/split_loops.hpp
@ -0,0 +1,46 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "pass.hpp"
+#include "snippets/lowered/loop_manager.hpp"
+
+namespace ov {
+namespace snippets {
+namespace lowered {
+namespace pass {
+
+/**
+ * @interface SplitLoops
+ * @brief If loop_1 has larger increment but the same works amount of loop_2, that follows loop_1, then split loop_2
+ *        into two loops so the outermost of the split loops could be fused with the loop_1 using the pass `FuseLoops`.
+ * Example:
+ *         Loop_1_begin                                Loop_1_begin                                  Loop_1_begin
+ *              ...                                        ...                                           ...
+ *         Loop_1_end (wa = 128, inc = 32)             Loop_1_end (wa = 128, inc = 32)               Split_loop_2_begin
+ *              ...                         Splitting      ...                               Fusing      ...
+ *         Loop_2_begin                         =>     Split_loop_1_begin                      =>    Split_loop_2_end (wa = 32, inc = 1)
+ *              ...                                    Split_loop_2_begin                                ...
+ *         Loop_2_end (wa = 128, inc = 1)                  ...                                       Loop_1_end (wa = 128, inc = 32)
+ *                                                     Split_loop_2_end (wa = 32, inc = 1)
+ *                                                     Split_loop_1_end (wa = 128, inc = 32)
+ * @ingroup snippets
+ */
+
+class SplitLoops : public Pass {
+public:
+    OPENVINO_RTTI("SplitLoops", "Pass")
+    SplitLoops();
+    bool run(LinearIR& linear_ir) override;
+
+private:
+    static bool can_be_split(const LinearIR::LoopManager::LoopInfoPtr& current,
+                             const LinearIR::LoopManager::LoopInfoPtr& target);
+};
+
+} // namespace pass
+} // namespace lowered
+} // namespace snippets
+} // namespace ov
--- a/src/common/snippets/include/snippets/op/subgraph.hpp
+++ b/src/common/snippets/include/snippets/op/subgraph.hpp
@ -104,12 +104,14 @@ public:
                                ov::pass::Manager& pre_common,
                                ov::pass::Manager& post_common,
                                ov::pass::Manager& post_precision,
+                                lowered::pass::PassPipeline& target_lowered_markup_pipeline,
                                lowered::pass::PassPipeline& target_lowered_pipeline,
                                const void* compile_params = nullptr);
    snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, const void* compile_params = nullptr);
    snippets::Schedule generate(ov::pass::Manager& pre_common,
                                ov::pass::Manager& post_common,
                                ov::pass::Manager& post_precision,
+                                lowered::pass::PassPipeline& target_lowered_markup_pipeline,
                                lowered::pass::PassPipeline& target_lowered_pipeline,
                                const void* compile_params = nullptr);
    snippets::Schedule generate(const void* compile_params = nullptr);
@ -144,7 +146,9 @@ public:
 private:
    void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes);
    void data_flow_transformations(ov::pass::Manager& pre_common, ov::pass::Manager& post_common, ov::pass::Manager& post_precision);
-    void control_flow_transformations(lowered::LinearIR& linear_ir, lowered::pass::PassPipeline& target_pipeline);
+    void control_flow_transformations(lowered::LinearIR& linear_ir,
+                                      lowered::pass::PassPipeline& target_markup_pipeline,
+                                      lowered::pass::PassPipeline& target_pipeline);
    void init_config();
    // Count of Subgraph virtual ports:
    //  - Potential non-scalar Constants that will be created after some transformations (At the moment it's relevant only for FakeQuantize decomposition)
--- a/src/common/snippets/src/lowered/loop_manager.cpp
+++ b/src/common/snippets/src/lowered/loop_manager.cpp
@ -113,6 +113,18 @@ void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir,
    }
 }

+LinearIR::LoopManager::LoopPort LinearIR::LoopManager::get_loop_port_by_expr_port(const ExpressionPort& expr_port, const size_t loop_id) {
+    auto get_loop_port = [&](const std::vector<LinearIR::LoopManager::LoopPort>& ports) {
+        auto it = std::find_if(ports.cbegin(), ports.cend(), [&](const LinearIR::LoopManager::LoopPort& p) { return *p.expr_port == expr_port; });
+        if (it == ports.cend())
+            OPENVINO_THROW("Expression has not been found among loop ports. Loop id: " + std::to_string(loop_id));
+        return *it;
+    };
+    const auto& loop_info = get_loop_info(loop_id);
+    return expr_port.get_type() == ExpressionPort::Input ? get_loop_port(loop_info->entry_points)
+                                                         : get_loop_port(loop_info->exit_points);
+}
+
 void LinearIR::LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_pos,
                                              LinearIR::constExprIt loop_end_pos,
                                              std::vector<ExpressionPort> &entries,
@ -211,18 +223,6 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos,
    }
 }

-size_t LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos,
-                                        LinearIR::constExprIt loop_end_pos,
-                                        size_t work_amount, size_t work_amount_increment, size_t dim_idx,
-                                        const std::vector<ExpressionPort>& entries,
-                                        const std::vector<ExpressionPort>& exits) {
-    const auto loop_info = std::make_shared<LoopManager::LoopInfo>(work_amount, work_amount_increment, dim_idx, entries, exits);
-    const auto loop_id = this->add_loop_info(loop_info);
-    for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) {
-        insert_loop_id(*expr_it, loop_id);
-    }
-    return loop_id;
-}
 void LinearIR::LoopManager::fuse_loops(const LinearIR& linear_ir, size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper) {
    LinearIR::constExprIt loop_begin_target, loop_end_target;
    get_loop_bounds(linear_ir, fuse_into_upper ? loop_id_lower : loop_id_upper, loop_begin_target, loop_end_target);
--- a/src/common/snippets/src/lowered/pass/fuse_loops.cpp
+++ b/src/common/snippets/src/lowered/pass/fuse_loops.cpp
@ -24,6 +24,23 @@ using LoopInfoPtr = LoopManager::LoopInfoPtr;

 FuseLoops::FuseLoops() : Pass() {}

+bool FuseLoops::loop_ports_are_compatible(const LinearIR::LoopManagerPtr& loop_manager,
+                                          const size_t loop_lower_id,
+                                          const size_t loop_upper_id) {
+    const auto loop_lower = loop_manager->get_loop_info(loop_lower_id);
+    for (const auto& entry : loop_lower->entry_points) {
+        const auto& src_port = entry.expr_port->get_port_connector_ptr()->get_source();
+        if (is_loop_id_found(src_port.get_expr()->get_loop_ids(), loop_upper_id)) {
+            if (!entry.is_incremented)
+                return false;
+            auto src_loop_port = loop_manager->get_loop_port_by_expr_port(src_port, loop_upper_id);
+            if (!src_loop_port.is_incremented)
+                return false;
+        }
+    }
+    return true;
+}
+
 bool FuseLoops::can_be_fused(const LoopInfoPtr& loop_current, const LoopInfoPtr& loop_target) {
    auto current_work_amount = loop_current->work_amount;
    auto target_work_amount = loop_target->work_amount;
@ -79,7 +96,7 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo
                                        LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos) {
    const auto& loop_current = loop_manager->get_loop_info(current_loop_id);
    const auto& loop_target = loop_manager->get_loop_info(target_loop_id);
-    if (!can_be_fused(loop_current, loop_target))
+    if (!can_be_fused(loop_current, loop_target) || !loop_ports_are_compatible(loop_manager, current_loop_id, target_loop_id))
        return false;

    // We can fuse Loop_up to Loop_down only in cases when other consumers of Loop_up are after Loop_down
@ -129,7 +146,7 @@ bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::Loo
                                        LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos) {
    const auto& loop_current = loop_manager->get_loop_info(current_loop_id);
    const auto& loop_target = loop_manager->get_loop_info(target_loop_id);
-    if (!can_be_fused(loop_current, loop_target))
+    if (!can_be_fused(loop_current, loop_target) || !loop_ports_are_compatible(loop_manager, target_loop_id, current_loop_id))
        return false;

    // We can fuse Loop_down to Loop_up only in cases when other parents of Loop_down are before Loop_up
--- a/src/common/snippets/src/lowered/pass/init_loops.cpp
+++ b/src/common/snippets/src/lowered/pass/init_loops.cpp
@ -51,10 +51,15 @@ void InitLoops::init_ptr_increments(std::vector<LoopPort>& loop_inputs, std::vec
        const auto& layout = port->get_descriptor_ptr()->get_layout();
        const auto& shape = port->get_descriptor_ptr()->get_shape();
        const auto& dim = *(layout.rbegin() + dim_idx);
+        // Ticket: 113106
+        // WA: the current logic doesn't support the case with transposed output shape for brgemm layer
+        // but for all existing cases planar layout can be used
+        std::vector<size_t> planar(layout.size());
+        std::iota(planar.begin(), planar.end(), 0);
        loop_output.ptr_increment = 0;
        // If relevant dim is not broadcasted, then ptr_increment is the dim stride in the new layout
        if (loop_output.is_incremented && !(shape[dim] == 1 && work_amount != 1)) {
-            loop_output.ptr_increment = get_dim_stride(dim, layout, shape);
+            loop_output.ptr_increment = get_dim_stride(dim, planar, shape);
        }
    }
 }
--- a/src/common/snippets/src/lowered/pass/insert_buffers.cpp
+++ b/src/common/snippets/src/lowered/pass/insert_buffers.cpp
@ -4,9 +4,10 @@

 #include "snippets/lowered/pass/insert_buffers.hpp"

+#include "snippets/itt.hpp"
 #include "snippets/lowered/linear_ir.hpp"
 #include "snippets/snippets_isa.hpp"
-#include "snippets/itt.hpp"
+#include "snippets/utils.hpp"


 namespace ov {
@ -28,6 +29,49 @@ std::vector<size_t> get_buffer_loop_ids(const std::vector<size_t>& lhs, const st
    }
    return buffer_loop_ids;
 }
+
+// Ticket: 113744
+// TODO: This logic covers only several specific cases so it should be generalized.
+ov::Shape compute_allocation_shape(const LinearIR::LoopManagerPtr& loop_manager,
+                                   const std::vector<size_t>& buffer_loop_ids,
+                                   const std::vector<size_t>& parent_loop_ids,
+                                   const ov::Output<ov::Node>& parent_output,
+                                   const int allocation_rank) {
+    const size_t rank = allocation_rank >= 0 ? allocation_rank : parent_output.get_shape().size();
+    ov::Shape allocation_shape(rank);
+    const auto port = lowered::PortDescriptorUtils::get_port_descriptor_ptr(parent_output);
+    const auto planar_shape = utils::get_reordered_planar_shape(ov::Shape{port->get_shape()}, port->get_layout());
+    for (size_t i = 0; i < rank; ++i) {
+        *(allocation_shape.rbegin() + i) = (planar_shape.rbegin() + i)->get_length();
+    }
+
+    if (buffer_loop_ids.empty() || parent_loop_ids.empty()) {
+        return allocation_shape;
+    }
+
+    auto set_rest_dims_to_ones = [&](const int filled_dims_count) {
+        for (int i = 0; i < static_cast<int>(allocation_shape.size()) - filled_dims_count; ++i) {
+            allocation_shape[i] = 1;
+        }
+    };
+
+    // In some cases it's possible to allocate less shape
+    // 1. Buffer and its parent are in the same loop: allocation size for the outer dimension can be extracted from loop increment
+    // 2. Buffer is outside the parent's loops: allocation size can be extracted from the corresponding loop work amount
+    // TODO: Use general logic with the help of memory counts for allocation shape computation
+    if (buffer_loop_ids.back() == parent_loop_ids.back()) {
+        const auto buffer_loop = loop_manager->get_loop_info(buffer_loop_ids.back());
+        *(allocation_shape.rbegin() + 1) = buffer_loop->increment;
+        set_rest_dims_to_ones(2);
+    } else {
+        for (size_t i = 0; i < std::min(rank, parent_loop_ids.size()); ++i) {
+            const auto loop = loop_manager->get_loop_info(*(parent_loop_ids.rbegin() + i));
+            *(allocation_shape.rbegin() + i) = loop->work_amount;
+        }
+        set_rest_dims_to_ones(static_cast<int>(parent_loop_ids.size()));
+    }
+    return allocation_shape;
+}
 }  // namespace

 InsertBuffers::InsertBuffers(int32_t buffer_allocation_rank)
@ -110,7 +154,12 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt
            //          Current expr Loop identifies:  3, 4, 6
            //          Need to insert between 2nd and 4th Loops - after 2nd Loop
            const auto pos = insertion_position(linear_ir, loop_manager, parent_expr, expr);
-            const auto buffer = std::make_shared<op::Buffer>(parent->output(parent_port), m_buffer_allocation_rank);
+            const auto allocation_shape = compute_allocation_shape(loop_manager,
+                                                                   buffer_loop_ids,
+                                                                   parent_loops,
+                                                                   parent->output(parent_port),
+                                                                   m_buffer_allocation_rank);
+            const auto buffer = std::make_shared<op::Buffer>(parent->output(parent_port), allocation_shape);
            PortDescriptorUtils::set_port_descriptor_ptr(buffer->output(0), parent_expr_output.get_descriptor_ptr()->clone());
            // Output connector is automatically filled from PortDescriptor
            const auto buffer_expr = linear_ir.create_expression(buffer, {input_connector});
@ -183,7 +232,12 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt
            // Note: All potential consumers must have the same count of first equal Loop identifies and the same count of different last identifies
            const auto pos = insertion_position(linear_ir, loop_manager, expr, (*potential_consumers.begin()).get_expr());

-            auto buffer = std::make_shared<op::Buffer>(node->output(port), m_buffer_allocation_rank);
+            const auto allocation_shape = compute_allocation_shape(loop_manager,
+                                                                   buffer_loop_ids,
+                                                                   current_loops,
+                                                                   node->output(port),
+                                                                   m_buffer_allocation_rank);
+            auto buffer = std::make_shared<op::Buffer>(node->output(port), allocation_shape);
            PortDescriptorUtils::set_port_descriptor_ptr(buffer->output(0), exit_port->get_descriptor_ptr()->clone());
            // We cannot insert Node output connector on Buffer output because not all consumers of Node needs Buffer
            //  Example:
--- a/src/common/snippets/src/lowered/pass/split_loops.cpp
+++ b/src/common/snippets/src/lowered/pass/split_loops.cpp
@ -0,0 +1,96 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/lowered/pass/split_loops.hpp"
+
+#include "snippets/lowered/pass/fuse_loops.hpp"
+#include "snippets/lowered/linear_ir.hpp"
+#include "snippets/lowered/loop_manager.hpp"
+#include "snippets/snippets_isa.hpp"
+#include "snippets/itt.hpp"
+
+namespace ov {
+namespace snippets {
+namespace lowered {
+namespace pass {
+using LoopManager = LinearIR::LoopManager;
+using LoopInfoPtr = LoopManager::LoopInfoPtr;
+
+SplitLoops::SplitLoops() : Pass() {}
+
+bool SplitLoops::can_be_split(const LoopInfoPtr& current, const LoopInfoPtr& parent) {
+    return current->work_amount == parent->work_amount && current->dim_idx == parent->dim_idx &&
+           current->increment != parent->increment;
+}
+
+bool SplitLoops::run(LinearIR& linear_ir) {
+    OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::SplitLoops")
+    if (linear_ir.empty())
+        return false;
+
+    const auto& loop_manager = linear_ir.get_loop_manager();
+    bool loop_was_split = false;
+    for (const auto& expr : linear_ir) {
+        const auto& loop_ids = expr->get_loop_ids();
+        if (loop_ids.empty())
+            continue;
+
+        // Ticket: 113755
+        // Note: we currently consider only the outermost loops for splitting
+        // Splitting could also be done in a more general case, but the splitted loop and its parent must always
+        // be in the same set of outer loops. Otherwise they won't be fused.
+        const auto& loop_id = loop_ids.front();
+        const auto loop = loop_manager->get_loop_info(loop_id);
+        for (const auto& entry_point : loop->entry_points) {
+            const auto& parent_port = entry_point.expr_port->get_port_connector_ptr()->get_source();
+            const auto& parent_expr = parent_port.get_expr();
+            const auto parent_loop_ids = parent_expr->get_loop_ids();
+            if (parent_loop_ids.empty())
+                continue;
+
+            const auto& parent_loop_id = parent_loop_ids.front();
+            const auto parent_loop_port = loop_manager->get_loop_port_by_expr_port(parent_port, parent_loop_id);
+            // We don't split loop which are not compatible with parent loop because such loops will not be fused
+            if (!FuseLoops::loop_ports_are_compatible(loop_manager, loop_id, parent_loop_id))
+                continue;
+
+            const auto parent_loop = loop_manager->get_loop_info(parent_loop_id);
+            if (can_be_split(loop, parent_loop)) {
+                loop_was_split = true;
+                const bool split_parent = parent_loop->increment < loop->increment;
+                const auto& loop_to_split = split_parent ? parent_loop : loop;
+                const auto& loop_to_split_id = split_parent ? parent_loop_id : loop_id;
+                const auto& loop_to_fuse = !split_parent ? parent_loop : loop;
+                loop_to_split->work_amount = loop_to_fuse->increment;
+
+                LinearIR::constExprIt loop_begin_pos, loop_end_pos;
+                LoopManager::get_loop_bounds(linear_ir,
+                                             loop_to_split->entry_points,
+                                             loop_to_split->exit_points,
+                                             loop_begin_pos,
+                                             loop_end_pos,
+                                             loop_to_split_id);
+                const auto split_loop_id = loop_manager->mark_loop(loop_begin_pos,
+                                                                   loop_end_pos,
+                                                                   loop_to_fuse->work_amount,
+                                                                   loop_to_fuse->increment,
+                                                                   loop_to_split->dim_idx,
+                                                                   loop_to_split->entry_points,
+                                                                   loop_to_split->exit_points);
+                loop_manager->get_loop_info(split_loop_id)->outer_splited_loop = true;
+                break;
+            }
+        }
+    }
+    // Ticket: 113666
+    // FuseLoops pass is explicitly run here in order to avoid unnecessary computations
+    // in case if loops are not split but FuseLoops is registered in pass manager after SplitLoops
+    if (loop_was_split)
+        FuseLoops().run(linear_ir);
+    return loop_was_split;
+}
+} // namespace pass
+} // namespace lowered
+} // namespace snippets
+} // namespace ov
--- a/src/common/snippets/src/op/subgraph.cpp
+++ b/src/common/snippets/src/op/subgraph.cpp
@ -24,6 +24,7 @@
 #include "snippets/lowered/linear_ir.hpp"
 #include "snippets/lowered/pass/assign_registers.hpp"
 #include "snippets/lowered/pass/mark_loops.hpp"
+#include "snippets/lowered/pass/split_loops.hpp"
 #include "snippets/lowered/pass/fuse_loops.hpp"
 #include "snippets/lowered/pass/init_loops.hpp"
 #include "snippets/lowered/pass/insert_buffers.hpp"
@ -507,6 +508,7 @@ void snippets::op::Subgraph::data_flow_transformations(ov::pass::Manager& pre_co
 }

 void snippets::op::Subgraph::control_flow_transformations(lowered::LinearIR& linear_ir,
+                                                          lowered::pass::PassPipeline& target_markup_pipeline,
                                                          lowered::pass::PassPipeline& target_pipeline) {
    INTERNAL_OP_SCOPE(Subgraph);
    OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::control_flow_transformations")
@ -514,10 +516,15 @@ void snippets::op::Subgraph::control_flow_transformations(lowered::LinearIR& lin
    const size_t vector_size = get_generator()->get_target_machine()->get_lanes();
    const int32_t buffer_allocation_rank = static_cast<int32_t>(linear_ir.get_config().m_loop_depth);

+    // Ticket: 113666
+    // TODO: Make pass pipeline with backend passes more flexible
+    target_markup_pipeline.run(linear_ir);
+
    lowered::pass::PassPipeline common_pipeline;
    common_pipeline.register_pass<lowered::pass::MarkLoops>(vector_size);
    common_pipeline.register_pass<lowered::pass::SoftmaxDecomposition>(vector_size);
    common_pipeline.register_pass<lowered::pass::FuseLoops>();
+    common_pipeline.register_pass<lowered::pass::SplitLoops>();
    common_pipeline.register_pass<lowered::pass::MoveResultOutOfLoop>();
    common_pipeline.register_pass<lowered::pass::InsertBuffers>(buffer_allocation_rank);
    common_pipeline.register_pass<lowered::pass::InsertLoadStore>(vector_size);
@ -557,22 +564,24 @@ snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& ou
                                                    ov::pass::Manager& pre_common,
                                                    ov::pass::Manager& post_common,
                                                    ov::pass::Manager& post_precision,
+                                                    lowered::pass::PassPipeline& target_lowered_markup_pipeline,
                                                    lowered::pass::PassPipeline& target_lowered_pipeline,
                                                    const void* compile_params) {
    canonicalize(output_shapes, input_shapes);
-    return generate(pre_common, post_common, post_precision, target_lowered_pipeline, compile_params);
+    return generate(pre_common, post_common, post_precision, target_lowered_markup_pipeline, target_lowered_pipeline, compile_params);
 }

 snippets::Schedule snippets::op::Subgraph::generate(const void* compile_params) {
    auto mngr = ov::pass::Manager();
    auto lowered = lowered::pass::PassPipeline();
-    return generate(mngr, mngr, mngr, lowered, compile_params);
+    return generate(mngr, mngr, mngr, lowered, lowered, compile_params);
 }

 snippets::Schedule snippets::op::Subgraph::generate(
    ov::pass::Manager& pre_common,
    ov::pass::Manager& post_common,
    ov::pass::Manager& post_precision,
+    lowered::pass::PassPipeline& target_lowered_markup_pipeline,
    lowered::pass::PassPipeline& target_lowered_pipeline,
    const void* compile_params) {
    INTERNAL_OP_SCOPE(Subgraph);
@ -587,7 +596,7 @@ snippets::Schedule snippets::op::Subgraph::generate(
    lowering_config.m_loop_depth = tileRank;

    lowered::LinearIR linear_ir = lowered::LinearIR(body_ptr(), lowering_config);
-    control_flow_transformations(linear_ir, target_lowered_pipeline);
+    control_flow_transformations(linear_ir, target_lowered_markup_pipeline, target_lowered_pipeline);

    // actual code emission
    const auto& lowering_result = m_generator->generate(linear_ir, lowering_config, compile_params);
--- a/src/common/snippets/tests/src/lowering_utils.cpp
+++ b/src/common/snippets/tests/src/lowering_utils.cpp
@ -126,7 +126,8 @@ std::shared_ptr<ov::snippets::op::Subgraph> LoweringTests::getLoweredSubgraph(co
    }
    body_rt_info["PluginShapesOverride"] = new_shapes;
    subgraph->set_tile_rank(2);
-    subgraph->generate(pre_dialect, post_precision, post_precision, lowered_pipeline);
+    ov::snippets::lowered::pass::PassPipeline empty_pipeline;
+    subgraph->generate(pre_dialect, post_precision, post_precision, empty_pipeline, lowered_pipeline);
    return subgraph;
 }

--- a/src/common/transformations/include/transformations/op_conversions/convert_pad12_downgrade.hpp
+++ b/src/common/transformations/include/transformations/op_conversions/convert_pad12_downgrade.hpp
@ -0,0 +1,23 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <openvino/pass/graph_rewrite.hpp>
+#include <transformations_visibility.hpp>
+
+namespace ov {
+namespace pass {
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief Converts Pad v12 to Pad v1
+ */
+class TRANSFORMATIONS_API ConvertPad12ToPad1 : public MatcherPass {
+public:
+    OPENVINO_RTTI("ConvertPad12ToPad1", "0");
+    ConvertPad12ToPad1();
+};
+
+}  // namespace pass
+}  // namespace ov
--- a/src/common/transformations/include/transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp
+++ b/src/common/transformations/include/transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp
@ -0,0 +1,23 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <openvino/pass/graph_rewrite.hpp>
+#include <transformations_visibility.hpp>
+
+namespace ov {
+namespace pass {
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief Converts Pad v12 to Pad v1
+ */
+class TRANSFORMATIONS_API ConvertScatterElementsUpdate12ToScatterElementsUpdate3 : public MatcherPass {
+public:
+    OPENVINO_RTTI("ConvertScatterElementsUpdate12ToScatterElementsUpdate3", "0");
+    ConvertScatterElementsUpdate12ToScatterElementsUpdate3();
+};
+
+}  // namespace pass
+}  // namespace ov
--- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp
@ -83,12 +83,13 @@
 #include "transformations/op_conversions/convert_minimum_to_power_and_max.hpp"
 #include "transformations/op_conversions/convert_mod.hpp"
 #include "transformations/op_conversions/convert_multiclass_nms_upgrade.hpp"
+#include "transformations/op_conversions/convert_pad12_downgrade.hpp"
 #include "transformations/op_conversions/convert_pad_to_group_conv.hpp"
 #include "transformations/op_conversions/convert_prior_box_v8_to_v0.hpp"
 #include "transformations/op_conversions/convert_reduce_to_pooling.hpp"
 #include "transformations/op_conversions/convert_roi_align_v3_to_v9.hpp"
 #include "transformations/op_conversions/convert_roi_align_v9_to_v3.hpp"
-#include "transformations/op_conversions/convert_scatter_elements_to_scatter.hpp"
+#include "transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp"
 #include "transformations/op_conversions/convert_softmax_downgrade.hpp"
 #include "transformations/op_conversions/convert_softmax_upgrade.hpp"
 #include "transformations/op_conversions/convert_space_to_depth.hpp"
@ -213,6 +214,8 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr<ov::Model
    REGISTER_PASS(manager, ConvertXorToLogicalXor)
    REGISTER_PASS(manager, ConvertTopK11ToTopK3)
    REGISTER_PASS(manager, ConvertInterpolate11ToInterpolate4)
+    REGISTER_PASS(manager, ConvertPad12ToPad1)
+    REGISTER_PASS(manager, ConvertScatterElementsUpdate12ToScatterElementsUpdate3)

    auto fq_fusions = manager.register_pass<GraphRewrite>();
    ADD_MATCHER(fq_fusions, FakeQuantizeMulFusion)
--- a/src/common/transformations/src/transformations/op_conversions/convert_pad12_downgrade.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/convert_pad12_downgrade.cpp
@ -0,0 +1,50 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/convert_pad12_downgrade.hpp"
+
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/rt_info.hpp>
+#include <openvino/op/pad.hpp>
+
+#include "itt.hpp"
+
+ov::pass::ConvertPad12ToPad1::ConvertPad12ToPad1() {
+    MATCHER_SCOPE(ConvertPad12ToPad1);
+
+    const auto pad_v12_pattern = pattern::wrap_type<ov::op::v12::Pad>();
+
+    const matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        const auto pad_v12 = std::dynamic_pointer_cast<ov::op::v12::Pad>(m.get_match_root());
+        if (!pad_v12 || transformation_callback(pad_v12)) {
+            return false;
+        }
+
+        std::shared_ptr<ov::Node> pad_v1;
+        if (pad_v12->get_input_size() == 4) {
+            pad_v1 = std::make_shared<ov::op::v1::Pad>(pad_v12->input_value(0),
+                                                       pad_v12->input_value(1),
+                                                       pad_v12->input_value(2),
+                                                       pad_v12->input_value(3),
+                                                       pad_v12->get_pad_mode());
+        } else {
+            const auto pad_value =
+                ov::op::v0::Constant::create(pad_v12->input_value(0).get_element_type(), ov::Shape{}, {0});
+
+            pad_v1 = std::make_shared<ov::op::v1::Pad>(pad_v12->input_value(0),
+                                                       pad_v12->input_value(1),
+                                                       pad_v12->input_value(2),
+                                                       pad_value,
+                                                       pad_v12->get_pad_mode());
+        }
+        pad_v1->set_friendly_name(pad_v12->get_friendly_name());
+        copy_runtime_info(pad_v12, pad_v1);
+        replace_node(pad_v12, pad_v1);
+
+        return true;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(pad_v12_pattern, matcher_name);
+    register_matcher(m, callback);
+}
--- a/src/common/transformations/src/transformations/op_conversions/convert_scatter_elements_update12_downgrade.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/convert_scatter_elements_update12_downgrade.cpp
@ -0,0 +1,40 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp"
+
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/rt_info.hpp>
+#include <openvino/op/scatter_elements_update.hpp>
+
+#include "itt.hpp"
+
+ov::pass::ConvertScatterElementsUpdate12ToScatterElementsUpdate3::
+    ConvertScatterElementsUpdate12ToScatterElementsUpdate3() {
+    MATCHER_SCOPE(ConvertScatterElementsUpdate12ToScatterElementsUpdate3);
+
+    const auto seu_v12_pattern = pattern::wrap_type<ov::op::v12::ScatterElementsUpdate>();
+
+    const matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        const auto seu_v12 = std::dynamic_pointer_cast<ov::op::v12::ScatterElementsUpdate>(m.get_match_root());
+        if (!seu_v12 || transformation_callback(seu_v12) ||
+            seu_v12->get_reduction() != ov::op::v12::ScatterElementsUpdate::Reduction::NONE) {
+            return false;
+        }
+
+        const auto seu_v3 = std::make_shared<ov::op::v3::ScatterElementsUpdate>(seu_v12->input_value(0),
+                                                                                seu_v12->input_value(1),
+                                                                                seu_v12->input_value(2),
+                                                                                seu_v12->input_value(3));
+
+        seu_v3->set_friendly_name(seu_v12->get_friendly_name());
+        copy_runtime_info(seu_v12, seu_v3);
+        replace_node(seu_v12, seu_v3);
+
+        return true;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(seu_v12_pattern, matcher_name);
+    register_matcher(m, callback);
+}
--- a/src/common/transformations/tests/op_conversions/convert_pad12_downgrade_test.cpp
+++ b/src/common/transformations/tests/op_conversions/convert_pad12_downgrade_test.cpp
@ -0,0 +1,86 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <openvino/opsets/opset1.hpp>
+#include <openvino/opsets/opset12.hpp>
+#include <openvino/pass/manager.hpp>
+#include <transformations/op_conversions/convert_pad12_downgrade.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+
+namespace {
+std::shared_ptr<ov::Model> create_v12_model(const ov::op::PadMode pad_mode, const int16_t pad_v = -1) {
+    const auto input = std::make_shared<ov::opset12::Parameter>(ov::element::i16, ov::Shape{1, 3, 100, 100});
+    const auto pads_begin =
+        std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{0, 2, 1, 0});
+    const auto pads_end =
+        std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{0, 1, 1, 0});
+
+    std::shared_ptr<ov::opset12::Pad> pad;
+    if (pad_v != -1) {
+        const auto pad_value =
+            std::make_shared<ov::op::v0::Constant>(ov::element::i16, ov::Shape{}, std::vector<int16_t>{pad_v});
+        pad = std::make_shared<ov::opset12::Pad>(input, pads_begin, pads_end, pad_value, pad_mode);
+    } else {
+        pad = std::make_shared<ov::opset12::Pad>(input, pads_begin, pads_end, pad_mode);
+    }
+    pad->set_friendly_name("pad12");
+
+    return std::make_shared<ov::Model>(pad->outputs(), ov::ParameterVector{input});
+}
+
+std::shared_ptr<ov::Model> create_v1_model(const ov::op::PadMode pad_mode, const int16_t pad_v) {
+    const auto input = std::make_shared<ov::opset1::Parameter>(ov::element::i16, ov::Shape{1, 3, 100, 100});
+    const auto pads_begin =
+        std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{0, 2, 1, 0});
+    const auto pads_end =
+        std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{0, 1, 1, 0});
+    const auto pad_value =
+        std::make_shared<ov::op::v0::Constant>(ov::element::i16, ov::Shape{}, std::vector<int16_t>{pad_v});
+
+    const auto pad = std::make_shared<ov::opset1::Pad>(input, pads_begin, pads_end, pad_value, pad_mode);
+    pad->set_friendly_name("pad1");
+
+    return std::make_shared<ov::Model>(pad->outputs(), ov::ParameterVector{input});
+}
+
+}  // namespace
+
+TEST_F(TransformationTestsF, ConvertPad12ToPad1) {
+    manager.register_pass<ov::pass::ConvertPad12ToPad1>();
+    function = create_v12_model(ov::op::PadMode::CONSTANT);
+    function_ref = create_v1_model(ov::op::PadMode::CONSTANT, 0);
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, ConvertPad12ToPad1_explicit_pad_value) {
+    manager.register_pass<ov::pass::ConvertPad12ToPad1>();
+    function = create_v12_model(ov::op::PadMode::CONSTANT, 5);
+    function_ref = create_v1_model(ov::op::PadMode::CONSTANT, 5);
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, ConvertPad12ToPad1_symmetric) {
+    manager.register_pass<ov::pass::ConvertPad12ToPad1>();
+    function = create_v12_model(ov::op::PadMode::SYMMETRIC);
+    function_ref = create_v1_model(ov::op::PadMode::SYMMETRIC, 0);
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, ConvertPad12ToPad1_symmetric_explicit_pad_value) {
+    manager.register_pass<ov::pass::ConvertPad12ToPad1>();
+    function = create_v12_model(ov::op::PadMode::SYMMETRIC, 5);
+    function_ref = create_v1_model(ov::op::PadMode::SYMMETRIC, 5);
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
--- a/src/common/transformations/tests/op_conversions/convert_scatter_elements_update12_downgrade_test.cpp
+++ b/src/common/transformations/tests/op_conversions/convert_scatter_elements_update12_downgrade_test.cpp
@ -0,0 +1,78 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <openvino/opsets/opset12.hpp>
+#include <openvino/opsets/opset3.hpp>
+#include <openvino/pass/manager.hpp>
+#include <transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+
+namespace {
+using Reduction = ov::opset12::ScatterElementsUpdate::Reduction;
+
+std::shared_ptr<ov::Model> create_v12_model(const Reduction reduction_type, const bool use_init_value) {
+    const auto input = std::make_shared<ov::opset12::Parameter>(ov::element::f32, ov::Shape{1, 3, 100, 100});
+    const auto indices = std::make_shared<ov::opset12::Parameter>(ov::element::i32, ov::Shape{1, 1, 5, 5});
+    const auto updates = std::make_shared<ov::opset12::Parameter>(ov::element::f32, ov::Shape{1, 1, 5, 5});
+    const auto axis = std::make_shared<ov::opset12::Parameter>(ov::element::i64, ov::Shape{});
+
+    const auto seu = std::make_shared<ov::opset12::ScatterElementsUpdate>(input,
+                                                                          indices,
+                                                                          updates,
+                                                                          axis,
+                                                                          reduction_type,
+                                                                          use_init_value);
+
+    seu->set_friendly_name("seu12");
+
+    return std::make_shared<ov::Model>(seu->outputs(), ov::ParameterVector{input, indices, updates, axis});
+}
+
+std::shared_ptr<ov::Model> create_v3_model() {
+    const auto input = std::make_shared<ov::opset3::Parameter>(ov::element::f32, ov::Shape{1, 3, 100, 100});
+    const auto indices = std::make_shared<ov::opset3::Parameter>(ov::element::i32, ov::Shape{1, 1, 5, 5});
+    const auto updates = std::make_shared<ov::opset3::Parameter>(ov::element::f32, ov::Shape{1, 1, 5, 5});
+    const auto axis = std::make_shared<ov::opset3::Parameter>(ov::element::i64, ov::Shape{});
+
+    const auto seu = std::make_shared<ov::opset3::ScatterElementsUpdate>(input, indices, updates, axis);
+
+    seu->set_friendly_name("seu3");
+
+    return std::make_shared<ov::Model>(seu->outputs(), ov::ParameterVector{input, indices, updates, axis});
+}
+
+}  // namespace
+
+TEST_F(TransformationTestsF, ConvertScatterElementsUpdate12ToScatterElementsUpdate3_no_reduction_use_init_value) {
+    manager.register_pass<ov::pass::ConvertScatterElementsUpdate12ToScatterElementsUpdate3>();
+    function = create_v12_model(Reduction::NONE, true);
+    function_ref = create_v3_model();
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, ConvertScatterElementsUpdate12ToScatterElementsUpdate3_no_reduction) {
+    manager.register_pass<ov::pass::ConvertScatterElementsUpdate12ToScatterElementsUpdate3>();
+    function = create_v12_model(Reduction::NONE, false);
+    function_ref = create_v3_model();
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, ConvertScatterElementsUpdate12ToScatterElementsUpdate3_reduction_use_init_value) {
+    manager.register_pass<ov::pass::ConvertScatterElementsUpdate12ToScatterElementsUpdate3>();
+    function = create_v12_model(Reduction::MEAN, true);
+}
+
+TEST_F(TransformationTestsF, ConvertScatterElementsUpdate12ToScatterElementsUpdate3_reduction) {
+    manager.register_pass<ov::pass::ConvertScatterElementsUpdate12ToScatterElementsUpdate3>();
+    function = create_v12_model(Reduction::PROD, false);
+}
--- a/src/core/include/ngraph/op/group_normalization.hpp
+++ b/src/core/include/ngraph/op/group_normalization.hpp
@ -1,15 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "openvino/op/group_normalization.hpp"
-
-namespace ngraph {
-namespace op {
-namespace v12 {
-using ov::op::v12::GroupNormalization;
-}  // namespace v12
-}  // namespace op
-}  // namespace ngraph
--- a/src/core/include/ngraph/ops.hpp
+++ b/src/core/include/ngraph/ops.hpp
@ -82,7 +82,6 @@
 #include "ngraph/op/grid_sample.hpp"
 #include "ngraph/op/grn.hpp"
 #include "ngraph/op/group_conv.hpp"
-#include "ngraph/op/group_normalization.hpp"
 #include "ngraph/op/gru_cell.hpp"
 #include "ngraph/op/gru_sequence.hpp"
 #include "ngraph/op/hard_sigmoid.hpp"
--- a/src/core/include/ngraph/opsets/opset.hpp
+++ b/src/core/include/ngraph/opsets/opset.hpp
@ -67,7 +67,6 @@ const NGRAPH_API OpSet& get_opset8();
 const NGRAPH_API OpSet& get_opset9();
 const NGRAPH_API OpSet& get_opset10();
 const NGRAPH_API OpSet& get_opset11();
-const NGRAPH_API OpSet& get_opset12();
 const NGRAPH_API std::map<std::string, std::function<const ngraph::OpSet&()>>& get_available_opsets();
 }  // namespace ngraph
 NGRAPH_SUPPRESS_DEPRECATED_END
--- a/src/core/include/ngraph/opsets/opset12.hpp
+++ b/src/core/include/ngraph/opsets/opset12.hpp
@ -1,15 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "ngraph/ops.hpp"
-
-namespace ngraph {
-namespace opset12 {
-#define NGRAPH_OP(a, b) using b::a;
-#include "ngraph/opsets/opset12_tbl.hpp"
-#undef NGRAPH_OP
-}  // namespace opset12
-}  // namespace ngraph
--- a/src/core/include/ngraph/opsets/opset12_tbl.hpp
+++ b/src/core/include/ngraph/opsets/opset12_tbl.hpp
@ -1,12 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#ifndef NGRAPH_OP
-#    warning "NGRAPH_OP not defined"
-#    define NGRAPH_OP(x, y)
-#endif
-
-#define _OPENVINO_OP_REG NGRAPH_OP
-#include "openvino/opsets/opset12_tbl.hpp"
-#undef _OPENVINO_OP_REG
--- a/src/core/include/openvino/op/scatter_elements_update.hpp
+++ b/src/core/include/openvino/op/scatter_elements_update.hpp
@ -31,6 +31,13 @@ public:
    bool visit_attributes(AttributeVisitor& visitor) override;

    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;
+
+    OPENVINO_SUPPRESS_DEPRECATED_START
+    bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
+    OPENVINO_SUPPRESS_DEPRECATED_END
+
+private:
+    bool evaluate_scatter_elements_update(const HostTensorVector& outputs, const HostTensorVector& inputs) const;
 };
 }  // namespace v3
 namespace v12 {
@ -80,7 +87,12 @@ public:

    bool has_evaluate() const override;

+    OPENVINO_SUPPRESS_DEPRECATED_START
+    bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
+    OPENVINO_SUPPRESS_DEPRECATED_END
+
 private:
+    bool evaluate_scatter_elements_update(const HostTensorVector& outputs, const HostTensorVector& inputs) const;
    Reduction m_reduction = Reduction::NONE;
    bool m_use_init_val = true;
 };
--- a/src/core/include/openvino/op/util/scatter_elements_update_base.hpp
+++ b/src/core/include/openvino/op/util/scatter_elements_update_base.hpp
@ -33,12 +33,9 @@ public:
    bool evaluate_upper(TensorVector& output_values) const override;
    bool evaluate_label(TensorLabelVector& output_labels) const override;

-    OPENVINO_SUPPRESS_DEPRECATED_START
-    bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
-    OPENVINO_SUPPRESS_DEPRECATED_END
-
-private:
-    bool evaluate_scatter_element_update(const HostTensorVector& outputs, const HostTensorVector& inputs) const;
+protected:
+    bool is_supported_index_input_element_type() const;
+    int64_t get_normalized_axis(const HostTensorVector& inputs) const;
 };
 }  // namespace util
 }  // namespace op
--- a/src/core/reference/include/ngraph/runtime/reference/scatter_elements_update.hpp
+++ b/src/core/reference/include/ngraph/runtime/reference/scatter_elements_update.hpp
@ -4,26 +4,57 @@

 #pragma once

+#include <cfenv>
 #include <cstring>
+#include <iterator>

 #include "ngraph/check.hpp"
 #include "ngraph/coordinate_transform.hpp"
 #include "ngraph/shape.hpp"
+#include "openvino/op/scatter_elements_update.hpp"

 namespace ngraph {
 namespace runtime {
 namespace reference {
+using Reduction = ov::op::v12::ScatterElementsUpdate::Reduction;
+
+template <typename DataType, typename IndicesType>
+void scatter_elem_update_with_reduction(const DataType* input_data,
+                                        const IndicesType* indices,
+                                        const DataType* updates,
+                                        const int64_t axis,
+                                        DataType* out_buf,
+                                        const Shape& data_shape,
+                                        const Shape& indices_shape,
+                                        const ov::op::v12::ScatterElementsUpdate::Reduction reduction_type,
+                                        const bool use_init_val);
+
 template <typename DataType, typename IndicesType>
 void scatter_elem_update(const DataType* input_data,
                         const IndicesType* indices,
                         const DataType* updates,
-                         const int64_t& axis,
+                         const int64_t axis,
                         DataType* out_buf,
                         const Shape& data_shape,
-                         const Shape& indices_shape) {
+                         const Shape& indices_shape,
+                         const Reduction reduction_type = Reduction::NONE,
+                         const bool use_init_val = true) {
    // Copy inputs to out
    std::memcpy(out_buf, input_data, sizeof(DataType) * shape_size(data_shape));

+    if (reduction_type != Reduction::NONE) {
+        scatter_elem_update_with_reduction(input_data,
+                                           indices,
+                                           updates,
+                                           axis,
+                                           out_buf,
+                                           data_shape,
+                                           indices_shape,
+                                           reduction_type,
+                                           use_init_val);
+        return;
+    }
+
    // 3D example
    // output[indices[i][j][k]][j][k] = updates[i][j][k] if axis = 0,
    // output[i][indices[i][j][k]][k] = updates[i][j][k] if axis = 1,
@ -43,6 +74,160 @@ void scatter_elem_update(const DataType* input_data,
        out_buf[out_idx] = updates[indices_idx];
    }
 }
+
+template <typename T>
+T reduction_neutral_value(const Reduction reduction_type) {
+    switch (reduction_type) {
+    case Reduction::MAX:
+        return std::numeric_limits<T>::min();
+    case Reduction::MIN:
+        return std::numeric_limits<T>::max();
+    case Reduction::PROD:
+        return T{1};
+    case Reduction::SUM:
+    case Reduction::MEAN:
+        return T{0};
+    default:
+        OPENVINO_THROW("Neutral value not available for this type of reduction");
+        return 0;
+    }
+}
+
+template <typename T>
+std::function<T(const T, const T)> reduction_functor_for(const Reduction reduction_type) {
+    switch (reduction_type) {
+    case Reduction::MAX:
+        return [](const T a, const T b) {
+            return a > b ? a : b;
+        };
+    case Reduction::MIN:
+        return [](const T a, const T b) {
+            return a < b ? a : b;
+        };
+    case Reduction::PROD:
+        return std::multiplies<T>{};
+    case Reduction::SUM:
+    case Reduction::MEAN:
+        return std::plus<T>{};
+    default:
+        OPENVINO_THROW("No functor available for this type of reduction");
+        return 0;
+    }
+}
+
+template <>
+std::function<char(const char, const char)> reduction_functor_for<char>(const Reduction reduction_type) {
+    switch (reduction_type) {
+    case Reduction::MAX:
+        return [](const char a, const char b) {
+            return a > b ? a : b;
+        };
+    case Reduction::MIN:
+        return [](const char a, const char b) {
+            return a < b ? a : b;
+        };
+    case Reduction::PROD:
+        return [](const char a, const char b) {
+            return static_cast<bool>(a) && static_cast<bool>(b);
+        };
+    case Reduction::SUM:
+        return [](const char a, const char b) {
+            return static_cast<bool>(a) || static_cast<bool>(b);
+        };
+    default:
+        OPENVINO_THROW("No functor available for this type of reduction");
+        return 0;
+    }
+}
+
+template <typename T>
+typename std::enable_if<std::is_floating_point<T>::value || std::is_class<T>::value, T>::type arithmetic_mean(
+    const T accumulator,
+    const int32_t N) {
+    return accumulator / N;
+}
+
+template <typename T>
+typename std::enable_if<std::is_integral<T>::value, T>::type arithmetic_mean(const T accumulator, const int32_t N) {
+    const auto old_mode = std::fegetround();
+    std::fesetround(FE_DOWNWARD);
+    const T value = static_cast<T>(std::nearbyint(static_cast<double>(accumulator) / N));
+    std::fesetround(old_mode);
+    return value;
+}
+
+template <typename T>
+size_t normalize_index(const T idx, const size_t dim_value) {
+    if (idx < 0) {
+        return static_cast<size_t>(idx + dim_value);
+    } else {
+        return static_cast<size_t>(idx);
+    }
+}
+
+template <typename DataType, typename IndicesType>
+void scatter_elem_update_with_reduction(const DataType* input_data,
+                                        const IndicesType* indices,
+                                        const DataType* updates,
+                                        const int64_t axis,
+                                        DataType* out_buf,
+                                        const Shape& data_shape,
+                                        const Shape& indices_shape,
+                                        const Reduction reduction_type,
+                                        const bool use_init_val) {
+    CoordinateTransformBasic indices_transform{indices_shape};
+    CoordinateTransformBasic data_transform{data_shape};
+    const auto indices_strides = row_major_strides(indices_shape);
+    const auto data_strides = row_major_strides(data_shape);
+
+    struct Offsets {
+        size_t idx_offset;
+        size_t out_offset;
+    };
+
+    std::vector<Offsets> idx_to_output_element;
+    idx_to_output_element.reserve(shape_size(indices_shape));
+    for (const Coordinate& indices_cord : indices_transform) {
+        const size_t indices_offset =
+            std::inner_product(indices_cord.begin(), indices_cord.end(), indices_strides.begin(), uint64_t(0));
+        Coordinate out_cord(indices_cord);
+        out_cord.at(axis) = normalize_index(indices[indices_offset], data_shape[axis]);
+        const auto out_offset = std::inner_product(out_cord.begin(), out_cord.end(), data_strides.begin(), uint64_t(0));
+
+        idx_to_output_element.push_back({indices_offset, out_offset});
+    }
+
+    // When this is false we need to substitute the copied values at target locations with values that will not affect
+    // the particular reduction algorithms. Effectively what happens here is setting the initial value
+    // for the reduction accumulators.
+    if (!use_init_val) {
+        const auto value = reduction_neutral_value<DataType>(reduction_type);
+        for (const auto& offsets : idx_to_output_element) {
+            out_buf[offsets.out_offset] = value;
+        }
+    }
+
+    // keeps the count of numbers included in the initial sums accumulated in the output tensor (reduction: MEAN)
+    // the values in this map will later be used to divide the sums and calculate the final means
+    // the key is the output tensor's element index and the value is the count
+    std::unordered_map<size_t, int32_t> mean_reduction_counters;
+
+    const auto reduce = reduction_functor_for<DataType>(reduction_type);
+    for (const auto& offsets : idx_to_output_element) {
+        out_buf[offsets.out_offset] = reduce(out_buf[offsets.out_offset], updates[offsets.idx_offset]);
+        if (reduction_type == Reduction::MEAN) {
+            mean_reduction_counters[offsets.out_offset] += 1;
+        }
+    }
+
+    if (reduction_type == Reduction::MEAN) {
+        for (const auto& counter : mean_reduction_counters) {
+            // include the initial value in the arithmetic mean divisor (if needed)
+            const auto N = counter.second + static_cast<int32_t>(use_init_val);
+            out_buf[counter.first] = arithmetic_mean<DataType>(out_buf[counter.first], N);
+        }
+    }
+}
 }  // namespace reference
 }  // namespace runtime
 }  // namespace ngraph
--- a/src/core/src/op/scatter_elements_update.cpp
+++ b/src/core/src/op/scatter_elements_update.cpp
@ -7,6 +7,7 @@
 #include <scatter_elements_update_shape_inference.hpp>

 #include "itt.hpp"
+#include "ngraph/runtime/reference/scatter_elements_update.hpp"
 #include "openvino/core/validation_util.hpp"

 using namespace std;
@ -86,11 +87,246 @@ shared_ptr<Node> op::v12::ScatterElementsUpdate::clone_with_new_inputs(const Out
 }

 bool op::v12::ScatterElementsUpdate::has_evaluate() const {
-    if (m_reduction != Reduction::NONE) {
-        return false;
-    } else {
-        return ScatterElementsUpdateBase::has_evaluate();
+    return ScatterElementsUpdateBase::has_evaluate() ||
+           (get_output_element_type(0) == element::boolean && is_supported_index_input_element_type());
+}
+
+namespace scatter_elements_update {
+namespace {
+template <element::Type_t DT, element::Type_t IT, element::Type_t AT>
+bool evaluate(const HostTensorPtr& data,
+              const HostTensorPtr& indices,
+              const HostTensorPtr& updates,
+              const HostTensorPtr& axis,
+              const HostTensorPtr& out,
+              const int64_t normalized_axis,
+              const op::v12::ScatterElementsUpdate::Reduction reduction_type,
+              const bool use_init_value) {
+    using DataType = typename element_type_traits<DT>::value_type;
+    using IndicesType = typename element_type_traits<IT>::value_type;
+
+    out->set_shape(data->get_shape());
+
+    ngraph::runtime::reference::scatter_elem_update<DataType, IndicesType>(data->get_data_ptr<DT>(),
+                                                                           indices->get_data_ptr<IT>(),
+                                                                           updates->get_data_ptr<DT>(),
+                                                                           normalized_axis,
+                                                                           out->get_data_ptr<DT>(),
+                                                                           data->get_shape(),
+                                                                           indices->get_shape(),
+                                                                           reduction_type,
+                                                                           use_init_value);
+
+    return true;
+}
+
+#define TYPE_AXS_CASE(a, ...)                                      \
+    case element::Type_t::a: {                                     \
+        OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_axs, _, a)); \
+        rc = evaluate<DT, IT, element::Type_t::a>(__VA_ARGS__);    \
+    } break;
+
+template <element::Type_t DT, element::Type_t IT>
+bool evaluate(const HostTensorPtr& arg0,
+              const HostTensorPtr& arg1,
+              const HostTensorPtr& arg2,
+              const HostTensorPtr& arg3,
+              const HostTensorPtr& out,
+              const int64_t normalized_axis,
+              const op::v12::ScatterElementsUpdate::Reduction reduction_type,
+              const bool use_init_value) {
+    auto axis_type = arg3->get_element_type();
+
+    // Dispatch specialization based on axis data type.
+    bool rc = true;
+
+    switch (axis_type) {
+        TYPE_AXS_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+        TYPE_AXS_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+        TYPE_AXS_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+        TYPE_AXS_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+        TYPE_AXS_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+        TYPE_AXS_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+        TYPE_AXS_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+        TYPE_AXS_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+    default:
+        rc = false;
+        break;
    }
+    return rc;
+}
+
+#define TYPE_IND_CASE(a, ...)                                      \
+    case element::Type_t::a: {                                     \
+        OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_ind, _, a)); \
+        rc = evaluate<DT, element::Type_t::a>(__VA_ARGS__);        \
+    } break;
+
+template <element::Type_t DT>
+bool evaluate(const HostTensorPtr& arg0,
+              const HostTensorPtr& arg1,
+              const HostTensorPtr& arg2,
+              const HostTensorPtr& arg3,
+              const HostTensorPtr& out,
+              const int64_t normalized_axis,
+              const op::v12::ScatterElementsUpdate::Reduction reduction_type,
+              const bool use_init_value) {
+    auto indices_type = arg1->get_element_type();
+
+    // Dispatch specialization based on indicies data type.
+    bool rc = true;
+
+    switch (indices_type) {
+        TYPE_IND_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+        TYPE_IND_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+        TYPE_IND_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+        TYPE_IND_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+        TYPE_IND_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+        TYPE_IND_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+        TYPE_IND_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+        TYPE_IND_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
+    default:
+        rc = false;
+        break;
+    }
+    return rc;
+}
+
+bool evaluate_scatter_elements_update(
+    const HostTensorPtr& arg0,
+    const HostTensorPtr& arg1,
+    const HostTensorPtr& arg2,
+    const HostTensorPtr& arg3,
+    const HostTensorPtr& out,
+    const int64_t normalized_axis,
+    const op::v12::ScatterElementsUpdate::Reduction reduction_type = op::v12::ScatterElementsUpdate::Reduction::NONE,
+    const bool use_init_value = false) {
+    bool rc = true;
+
+    switch (out->get_element_type()) {
+        NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
+                         i16,
+                         arg0,
+                         arg1,
+                         arg2,
+                         arg3,
+                         out,
+                         normalized_axis,
+                         reduction_type,
+                         use_init_value);
+        NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
+                         i32,
+                         arg0,
+                         arg1,
+                         arg2,
+                         arg3,
+                         out,
+                         normalized_axis,
+                         reduction_type,
+                         use_init_value);
+        NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
+                         i64,
+                         arg0,
+                         arg1,
+                         arg2,
+                         arg3,
+                         out,
+                         normalized_axis,
+                         reduction_type,
+                         use_init_value);
+        NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
+                         u32,
+                         arg0,
+                         arg1,
+                         arg2,
+                         arg3,
+                         out,
+                         normalized_axis,
+                         reduction_type,
+                         use_init_value);
+        NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
+                         u64,
+                         arg0,
+                         arg1,
+                         arg2,
+                         arg3,
+                         out,
+                         normalized_axis,
+                         reduction_type,
+                         use_init_value);
+        NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
+                         f16,
+                         arg0,
+                         arg1,
+                         arg2,
+                         arg3,
+                         out,
+                         normalized_axis,
+                         reduction_type,
+                         use_init_value);
+        NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
+                         f32,
+                         arg0,
+                         arg1,
+                         arg2,
+                         arg3,
+                         out,
+                         normalized_axis,
+                         reduction_type,
+                         use_init_value);
+        NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
+                         boolean,
+                         arg0,
+                         arg1,
+                         arg2,
+                         arg3,
+                         out,
+                         normalized_axis,
+                         reduction_type,
+                         use_init_value);
+    default:
+        rc = false;
+        break;
+    }
+    return rc;
+}
+}  // namespace
+}  // namespace scatter_elements_update
+
+bool op::v3::ScatterElementsUpdate::evaluate_scatter_elements_update(const HostTensorVector& outputs,
+                                                                     const HostTensorVector& inputs) const {
+    const auto normalized_axis = get_normalized_axis(inputs);
+
+    return scatter_elements_update::evaluate_scatter_elements_update(inputs[0],
+                                                                     inputs[1],
+                                                                     inputs[2],
+                                                                     inputs[3],
+                                                                     outputs[0],
+                                                                     normalized_axis);
+}
+
+bool op::v12::ScatterElementsUpdate::evaluate_scatter_elements_update(const HostTensorVector& outputs,
+                                                                      const HostTensorVector& inputs) const {
+    const auto normalized_axis = get_normalized_axis(inputs);
+
+    return scatter_elements_update::evaluate_scatter_elements_update(inputs[0],
+                                                                     inputs[1],
+                                                                     inputs[2],
+                                                                     inputs[3],
+                                                                     outputs[0],
+                                                                     normalized_axis,
+                                                                     m_reduction,
+                                                                     m_use_init_val);
+}
+
+bool op::v3::ScatterElementsUpdate::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
+    OV_OP_SCOPE(v3_ScatterElementsUpdate_evaluate);
+    return evaluate_scatter_elements_update(outputs, inputs);
+}
+
+bool op::v12::ScatterElementsUpdate::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
+    OV_OP_SCOPE(v12_ScatterElementsUpdate_evaluate);
+    return evaluate_scatter_elements_update(outputs, inputs);
 }

 template <>
--- a/src/core/src/op/util/scatter_elements_update_base.cpp
+++ b/src/core/src/op/util/scatter_elements_update_base.cpp
@ -8,7 +8,6 @@

 #include "bound_evaluate.hpp"
 #include "itt.hpp"
-#include "ngraph/runtime/reference/scatter_elements_update.hpp"
 #include "openvino/core/validation_util.hpp"

 namespace ov {
@ -59,31 +58,35 @@ bool op::util::ScatterElementsUpdateBase::has_evaluate() const {
    OV_OP_SCOPE(util_ScatterElementsUpdateBase_has_evaluate);

    switch (get_output_element_type(0)) {
-    case ngraph::element::i16:
-    case ngraph::element::i32:
-    case ngraph::element::i64:
-    case ngraph::element::u32:
-    case ngraph::element::u64:
-    case ngraph::element::f16:
-    case ngraph::element::f32:
+    case element::i16:
+    case element::i32:
+    case element::i64:
+    case element::u32:
+    case element::u64:
+    case element::f16:
+    case element::f32:
        break;
    default:
        return false;
    }
+
+    return is_supported_index_input_element_type();
+}
+
+bool op::util::ScatterElementsUpdateBase::is_supported_index_input_element_type() const {
    switch (get_input_element_type(1)) {
-    case ngraph::element::i8:
-    case ngraph::element::i16:
-    case ngraph::element::i32:
-    case ngraph::element::i64:
-    case ngraph::element::u8:
-    case ngraph::element::u16:
-    case ngraph::element::u32:
-    case ngraph::element::u64:
-        break;
+    case element::i8:
+    case element::i16:
+    case element::i32:
+    case element::i64:
+    case element::u8:
+    case element::u16:
+    case element::u32:
+    case element::u64:
+        return true;
    default:
        return false;
    }
-    return true;
 }

 bool op::util::ScatterElementsUpdateBase::evaluate_lower(ov::TensorVector& output_values) const {
@ -104,126 +107,7 @@ bool op::util::ScatterElementsUpdateBase::evaluate_label(TensorLabelVector& outp
    OPENVINO_SUPPRESS_DEPRECATED_END
 }

-namespace scatter_element_update {
-namespace {
-template <element::Type_t DT, element::Type_t IT, element::Type_t AT>
-bool evaluate(const HostTensorPtr& data,
-              const HostTensorPtr& indices,
-              const HostTensorPtr& updates,
-              const HostTensorPtr& axis,
-              const HostTensorPtr& out,
-              const int64_t normalized_axis) {
-    using DataType = typename element_type_traits<DT>::value_type;
-    using IndicesType = typename element_type_traits<IT>::value_type;
-
-    out->set_shape(data->get_shape());
-
-    ngraph::runtime::reference::scatter_elem_update<DataType, IndicesType>(data->get_data_ptr<DT>(),
-                                                                           indices->get_data_ptr<IT>(),
-                                                                           updates->get_data_ptr<DT>(),
-                                                                           normalized_axis,
-                                                                           out->get_data_ptr<DT>(),
-                                                                           data->get_shape(),
-                                                                           indices->get_shape());
-
-    return true;
-}
-
-#define TYPE_AXS_CASE(a, ...)                                      \
-    case element::Type_t::a: {                                     \
-        OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_axs, _, a)); \
-        rc = evaluate<DT, IT, element::Type_t::a>(__VA_ARGS__);    \
-    } break;
-
-template <element::Type_t DT, element::Type_t IT>
-bool evaluate(const HostTensorPtr& arg0,
-              const HostTensorPtr& arg1,
-              const HostTensorPtr& arg2,
-              const HostTensorPtr& arg3,
-              const HostTensorPtr& out,
-              const int64_t normalized_axis) {
-    auto axis_type = arg3->get_element_type();
-
-    // Dispatch specialization based on axis data type.
-    bool rc = true;
-
-    switch (axis_type) {
-        TYPE_AXS_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis);
-        TYPE_AXS_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis);
-        TYPE_AXS_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis);
-        TYPE_AXS_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis);
-        TYPE_AXS_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis);
-        TYPE_AXS_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis);
-        TYPE_AXS_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis);
-        TYPE_AXS_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis);
-    default:
-        rc = false;
-        break;
-    }
-    return rc;
-}
-
-#define TYPE_IND_CASE(a, ...)                                      \
-    case element::Type_t::a: {                                     \
-        OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_ind, _, a)); \
-        rc = evaluate<DT, element::Type_t::a>(__VA_ARGS__);        \
-    } break;
-
-template <element::Type_t DT>
-bool evaluate(const HostTensorPtr& arg0,
-              const HostTensorPtr& arg1,
-              const HostTensorPtr& arg2,
-              const HostTensorPtr& arg3,
-              const HostTensorPtr& out,
-              const int64_t normalized_axis) {
-    auto indices_type = arg1->get_element_type();
-
-    // Dispatch specialization based on indicies data type.
-    bool rc = true;
-
-    switch (indices_type) {
-        TYPE_IND_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis);
-        TYPE_IND_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis);
-        TYPE_IND_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis);
-        TYPE_IND_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis);
-        TYPE_IND_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis);
-        TYPE_IND_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis);
-        TYPE_IND_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis);
-        TYPE_IND_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis);
-    default:
-        rc = false;
-        break;
-    }
-    return rc;
-}
-
-bool evaluate_scatter_element_update(const HostTensorPtr& arg0,
-                                     const HostTensorPtr& arg1,
-                                     const HostTensorPtr& arg2,
-                                     const HostTensorPtr& arg3,
-                                     const HostTensorPtr& out,
-                                     const int64_t normalized_axis) {
-    bool rc = true;
-
-    switch (out->get_element_type()) {
-        NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i16, arg0, arg1, arg2, arg3, out, normalized_axis);
-        NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i32, arg0, arg1, arg2, arg3, out, normalized_axis);
-        NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i64, arg0, arg1, arg2, arg3, out, normalized_axis);
-        NGRAPH_TYPE_CASE(evaluate_scatter_element_update, u32, arg0, arg1, arg2, arg3, out, normalized_axis);
-        NGRAPH_TYPE_CASE(evaluate_scatter_element_update, u64, arg0, arg1, arg2, arg3, out, normalized_axis);
-        NGRAPH_TYPE_CASE(evaluate_scatter_element_update, f16, arg0, arg1, arg2, arg3, out, normalized_axis);
-        NGRAPH_TYPE_CASE(evaluate_scatter_element_update, f32, arg0, arg1, arg2, arg3, out, normalized_axis);
-    default:
-        rc = false;
-        break;
-    }
-    return rc;
-}
-}  // namespace
-}  // namespace scatter_element_update
-
-bool op::util::ScatterElementsUpdateBase::evaluate_scatter_element_update(const HostTensorVector& outputs,
-                                                                          const HostTensorVector& inputs) const {
+int64_t op::util::ScatterElementsUpdateBase::get_normalized_axis(const HostTensorVector& inputs) const {
    NGRAPH_CHECK(inputs[3]->get_element_type().is_integral_number(), "axis element type is not integral data type");

    OPENVINO_SUPPRESS_DEPRECATED_START
@ -243,20 +127,7 @@ bool op::util::ScatterElementsUpdateBase::evaluate_scatter_element_update(const
            OPENVINO_SUPPRESS_DEPRECATED_END
        }
    }
-
-    return scatter_element_update::evaluate_scatter_element_update(inputs[0],
-                                                                   inputs[1],
-                                                                   inputs[2],
-                                                                   inputs[3],
-                                                                   outputs[0],
-                                                                   normalized_axis);
+    return normalized_axis;
 }
-
-bool op::util::ScatterElementsUpdateBase::evaluate(const HostTensorVector& outputs,
-                                                   const HostTensorVector& inputs) const {
-    OV_OP_SCOPE(util_ScatterElementsUpdate_evaluate);
-    return evaluate_scatter_element_update(outputs, inputs);
-}
-
 }  // namespace op
 }  // namespace ov
--- a/src/core/src/opsets/opset.cpp
+++ b/src/core/src/opsets/opset.cpp
@ -7,7 +7,7 @@
 #include "itt.hpp"
 #include "ngraph/deprecated.hpp"
 #include "ngraph/log.hpp"
-#include "ngraph/ops.hpp"
+#include "openvino/op/ops.hpp"
 #include "openvino/opsets/opset.hpp"
 #include "openvino/util/log.hpp"

@ -64,8 +64,7 @@ const std::map<std::string, std::function<const ngraph::OpSet&()>>& ngraph::get_
                                                                                           _NGRAPH_REG_OPSET(opset8),
                                                                                           _NGRAPH_REG_OPSET(opset9),
                                                                                           _NGRAPH_REG_OPSET(opset10),
-                                                                                           _NGRAPH_REG_OPSET(opset11),
-                                                                                           _NGRAPH_REG_OPSET(opset12)};
+                                                                                           _NGRAPH_REG_OPSET(opset11)};
 #undef _NGRAPH_REG_OPSET
    return opset_map;
 }
@ -275,8 +274,3 @@ const ngraph::OpSet& ngraph::get_opset11() {
    static OpSet opset(ov::get_opset11());
    return opset;
 }
-
-const ngraph::OpSet& ngraph::get_opset12() {
-    static OpSet opset(ov::get_opset12());
-    return opset;
-}
--- a/src/core/tests/eval.cpp
+++ b/src/core/tests/eval.cpp
@ -1218,14 +1218,18 @@ TEST(eval, max_pool_v1_dynamic) {
    vector<float> out{1, 2, 2, 2, 3, 3, 3, 2, 2, 2, 2, 0};
 }

-TEST(eval, evaluate_static_scatter_elements_update_basic) {
+template <class T>
+class ScatterElementsUpdateEvalTest : public ::testing::Test {};
+TYPED_TEST_SUITE_P(ScatterElementsUpdateEvalTest);
+
+TYPED_TEST_P(ScatterElementsUpdateEvalTest, evaluate_static_scatter_elements_update_basic) {
    const Shape data_shape{3, 3};
    const Shape indices_shape{2, 3};
    auto arg1 = make_shared<op::Parameter>(element::f32, data_shape);
    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
    auto arg3 = make_shared<op::Parameter>(element::f32, indices_shape);
    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
-    auto scatter_elements_update = make_shared<op::v3::ScatterElementsUpdate>(arg1, arg2, arg3, arg4);
+    auto scatter_elements_update = make_shared<TypeParam>(arg1, arg2, arg3, arg4);
    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
    auto result_tensor = make_shared<HostTensor>();
    ASSERT_TRUE(fun->evaluate(
@ -1241,7 +1245,7 @@ TEST(eval, evaluate_static_scatter_elements_update_basic) {
    ASSERT_EQ(cval, out);
 }

-TEST(eval, evaluate_dynamic_scatter_elements_update_basic) {
+TYPED_TEST_P(ScatterElementsUpdateEvalTest, evaluate_dynamic_scatter_elements_update_basic) {
    const Shape data_shape{3, 3};
    const Shape indices_shape{2, 3};

@ -1250,7 +1254,7 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_basic) {
    auto arg3 = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
    auto arg4 = make_shared<op::Parameter>(element::i64, PartialShape::dynamic());

-    auto scatter_elements_update = make_shared<op::v3::ScatterElementsUpdate>(arg1, arg2, arg3, arg4);
+    auto scatter_elements_update = make_shared<TypeParam>(arg1, arg2, arg3, arg4);
    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
    auto result_tensor = make_shared<HostTensor>();
    ASSERT_TRUE(fun->evaluate(
@ -1267,7 +1271,7 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_basic) {
    ASSERT_EQ(cval, out);
 }

-TEST(eval, evaluate_dynamic_scatter_elements_update_negative_axis) {
+TYPED_TEST_P(ScatterElementsUpdateEvalTest, evaluate_dynamic_scatter_elements_update_negative_axis) {
    const Shape data_shape{3, 3};
    const Shape indices_shape{2, 3};
    const Shape axis_shape{};
@ -1277,7 +1281,7 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_negative_axis) {
    auto arg3 = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
    auto arg4 = make_shared<op::Parameter>(element::i64, PartialShape::dynamic());

-    auto scatter_elements_update = make_shared<op::v3::ScatterElementsUpdate>(arg1, arg2, arg3, arg4);
+    auto scatter_elements_update = make_shared<TypeParam>(arg1, arg2, arg3, arg4);
    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
    auto result_tensor = make_shared<HostTensor>();
    ASSERT_TRUE(fun->evaluate(
@ -1294,7 +1298,7 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_negative_axis) {
    ASSERT_EQ(cval, out);
 }

-TEST(eval, evaluate_dynamic_scatter_elements_update_1d_axis) {
+TYPED_TEST_P(ScatterElementsUpdateEvalTest, evaluate_dynamic_scatter_elements_update_1d_axis) {
    const Shape data_shape{3, 3};
    const Shape indices_shape{2, 3};

@ -1303,7 +1307,7 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_1d_axis) {
    auto arg3 = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
    auto arg4 = make_shared<op::Parameter>(element::i64, PartialShape::dynamic());

-    auto scatter_elements_update = make_shared<op::v3::ScatterElementsUpdate>(arg1, arg2, arg3, arg4);
+    auto scatter_elements_update = make_shared<TypeParam>(arg1, arg2, arg3, arg4);
    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
    auto result_tensor = make_shared<HostTensor>();
    ASSERT_TRUE(fun->evaluate(
@ -1321,7 +1325,7 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_1d_axis) {
 }

 // Disabled test for disabled reference implementation
-TEST(eval, DISABLED_evaluate_dynamic_scatter_elements_update_3d_i16) {
+TYPED_TEST_P(ScatterElementsUpdateEvalTest, DISABLED_evaluate_dynamic_scatter_elements_update_3d_i16) {
    const Shape data_shape{3, 3, 3};
    const Shape indices_shape{2, 2, 3};

@ -1330,7 +1334,7 @@ TEST(eval, DISABLED_evaluate_dynamic_scatter_elements_update_3d_i16) {
    auto arg3 = make_shared<op::Parameter>(element::i16, PartialShape::dynamic());
    auto arg4 = make_shared<op::Parameter>(element::i64, PartialShape::dynamic());

-    auto scatter_elements_update = make_shared<op::v3::ScatterElementsUpdate>(arg1, arg2, arg3, arg4);
+    auto scatter_elements_update = make_shared<TypeParam>(arg1, arg2, arg3, arg4);
    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
    auto result_tensor = make_shared<HostTensor>();
    ASSERT_TRUE(
@ -1348,7 +1352,7 @@ TEST(eval, DISABLED_evaluate_dynamic_scatter_elements_update_3d_i16) {
    ASSERT_EQ(cval, out);
 }

-TEST(eval, evaluate_dynamic_scatter_elements_update_one_elem_i32) {
+TYPED_TEST_P(ScatterElementsUpdateEvalTest, evaluate_dynamic_scatter_elements_update_one_elem_i32) {
    const Shape data_shape{3, 3, 3};
    const Shape indices_shape{1, 1, 1};

@ -1357,7 +1361,7 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_one_elem_i32) {
    auto arg3 = make_shared<op::Parameter>(element::i32, PartialShape::dynamic());
    auto arg4 = make_shared<op::Parameter>(element::i64, PartialShape::dynamic());

-    auto scatter_elements_update = make_shared<op::v3::ScatterElementsUpdate>(arg1, arg2, arg3, arg4);
+    auto scatter_elements_update = make_shared<TypeParam>(arg1, arg2, arg3, arg4);
    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
    auto result_tensor = make_shared<HostTensor>();
    ASSERT_TRUE(
@ -1375,6 +1379,505 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_one_elem_i32) {
    ASSERT_EQ(cval, out);
 }

+REGISTER_TYPED_TEST_SUITE_P(ScatterElementsUpdateEvalTest,
+                            evaluate_dynamic_scatter_elements_update_one_elem_i32,
+                            DISABLED_evaluate_dynamic_scatter_elements_update_3d_i16,
+                            evaluate_dynamic_scatter_elements_update_1d_axis,
+                            evaluate_dynamic_scatter_elements_update_negative_axis,
+                            evaluate_dynamic_scatter_elements_update_basic,
+                            evaluate_static_scatter_elements_update_basic);
+
+using OpVersions = ::testing::Types<ov::op::v3::ScatterElementsUpdate, ov::op::v12::ScatterElementsUpdate>;
+INSTANTIATE_TYPED_TEST_SUITE_P(eval, ScatterElementsUpdateEvalTest, OpVersions);
+
+TEST(eval, evaluate_static_scatter_elements_update_reduction_sum) {
+    const Shape data_shape{10};
+    const Shape indices_shape{4};
+    auto arg1 = make_shared<op::Parameter>(element::f32, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::f32, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::SUM);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(fun->evaluate(
+        {result_tensor},
+        {make_host_tensor<element::Type_t::f32>(data_shape,
+                                                {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}),
+         make_host_tensor<element::Type_t::i32>(indices_shape, {5, 0, 7, 5}),
+         make_host_tensor<element::Type_t::f32>(indices_shape, {5.0f, 6.0f, 1.5f, -5.0f}),
+         make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::f32);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<float>(result_tensor);
+    const vector<float> out{6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 8.5f, 8.0f, 9.0f};
+    ASSERT_EQ(cval, out);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_reduction_prod_exclusive) {
+    const Shape data_shape{10};
+    const Shape indices_shape{4};
+    auto arg1 = make_shared<op::Parameter>(element::f32, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::f32, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::PROD,
+                                                        false);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(fun->evaluate(
+        {result_tensor},
+        {make_host_tensor<element::Type_t::f32>(data_shape,
+                                                {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}),
+         make_host_tensor<element::Type_t::i32>(indices_shape, {1, 9, 4, 9}),
+         make_host_tensor<element::Type_t::f32>(indices_shape, {5.0f, 6.0f, 1.5f, -2.0f}),
+         make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::f32);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<float>(result_tensor);
+    const vector<float> out{0.0f, 5.0f, 2.0f, 3.0f, 1.5f, 5.0f, 6.0f, 7.0f, 8.0f, -12.0f};
+    ASSERT_EQ(cval, out);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_reduction_mean) {
+    const Shape data_shape{3, 3};
+    const Shape indices_shape{2, 2};
+    auto arg1 = make_shared<op::Parameter>(element::f32, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::f32, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::MEAN,
+                                                        true);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(fun->evaluate(
+        {result_tensor},
+        {make_host_tensor<element::Type_t::f32>(data_shape, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}),
+         make_host_tensor<element::Type_t::i32>(indices_shape, {2, 2, 0, 1}),
+         make_host_tensor<element::Type_t::f32>(indices_shape, {10.f, 21.f, 25.f, 38.f}),
+         make_host_tensor<element::Type_t::i64>({}, {1})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::f32);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<float>(result_tensor);
+    const vector<float> out{1.0f, 2.0f, 11.33333f, 14.5f, 21.5f, 6.0f, 7.0f, 8.0f, 9.0f};
+    for (size_t i = 0; i < cval.size(); ++i)
+        EXPECT_NEAR(cval[i], out[i], 1e-5f);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_reduction_mean_exclusive) {
+    const Shape data_shape{3, 3};
+    const Shape indices_shape{2, 2};
+    auto arg1 = make_shared<op::Parameter>(element::f32, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::f32, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::MEAN,
+                                                        false);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(fun->evaluate(
+        {result_tensor},
+        {make_host_tensor<element::Type_t::f32>(data_shape, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}),
+         make_host_tensor<element::Type_t::i32>(indices_shape, {2, 2, 0, 1}),
+         make_host_tensor<element::Type_t::f32>(indices_shape, {10.f, 21.f, 25.f, 38.f}),
+         make_host_tensor<element::Type_t::i64>({}, {1})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::f32);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<float>(result_tensor);
+    const vector<float> out{1.0f, 2.0f, 15.5f, 25.f, 38.f, 6.0f, 7.0f, 8.0f, 9.0f};
+    for (size_t i = 0; i < cval.size(); ++i)
+        EXPECT_NEAR(cval[i], out[i], 1e-5f);
+}
+
+TEST(eval, DISABLED_evaluate_static_scatter_elements_update_reduction_mean_ints) {
+    // on MAC rounding towards -infinity doesn't work as expected, to be investigated
+    const Shape data_shape{3, 3};
+    const Shape indices_shape{2, 2};
+    auto arg1 = make_shared<op::Parameter>(element::i32, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::MEAN,
+                                                        true);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(fun->evaluate({result_tensor},
+                              {make_host_tensor<element::Type_t::i32>(data_shape, {1, 2, 3, 4, -5, 6, 7, 8, 9}),
+                               make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 1}),
+                               make_host_tensor<element::Type_t::i32>(indices_shape, {-6, -2, 600, -120}),
+                               make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::i32);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<int32_t>(result_tensor);
+    const vector<int32_t> out{-3, 2, 3, 4, -43, 6, 303, 8, 9};
+    ASSERT_EQ(cval, out);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_reduction_min) {
+    const Shape data_shape{9};
+    const Shape indices_shape{9};
+    auto arg1 = make_shared<op::Parameter>(element::i32, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::MIN,
+                                                        true);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(
+        fun->evaluate({result_tensor},
+                      {make_host_tensor<element::Type_t::i32>(data_shape, {-1000, 2, 3, 4, -5, 6, 7, -2, 8}),
+                       make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 5, 6, 7, 0}),
+                       make_host_tensor<element::Type_t::i32>(indices_shape, {-999, 1, 3, 5, -4, 6, 8, 9, -1001}),
+                       make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::i32);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<int32_t>(result_tensor);
+    const vector<int32_t> out{-1001, 1, 3, 4, -5, 6, 7, -2, 8};
+    ASSERT_EQ(cval, out);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_reduction_max) {
+    const Shape data_shape{9};
+    const Shape indices_shape{9};
+    auto arg1 = make_shared<op::Parameter>(element::i32, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::MAX,
+                                                        true);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(
+        fun->evaluate({result_tensor},
+                      {make_host_tensor<element::Type_t::i32>(data_shape, {-1000, 2, 3, 4, -5, 6, 7, -2, 8}),
+                       make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 5, 6, 7, 0}),
+                       make_host_tensor<element::Type_t::i32>(indices_shape, {-999, 1, 3, 5, -4, 6, 8, 9, -1001}),
+                       make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::i32);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<int32_t>(result_tensor);
+    const vector<int32_t> out{-999, 2, 3, 5, -4, 6, 8, 9, 8};
+    ASSERT_EQ(cval, out);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_reduction_max_exclusive) {
+    const Shape data_shape{9};
+    const Shape indices_shape{9};
+    auto arg1 = make_shared<op::Parameter>(element::i32, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::MAX,
+                                                        false);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(
+        fun->evaluate({result_tensor},
+                      {make_host_tensor<element::Type_t::i32>(data_shape, {1000, 2, 3, 4, -5, 6, 7, -2, 8}),
+                       make_host_tensor<element::Type_t::i32>(indices_shape, {0, 2, 1, 3, 7, 5, 6, 7, 0}),
+                       make_host_tensor<element::Type_t::i32>(indices_shape, {999, 10, 20, 30, -40, 6, 8, 9, 555}),
+                       make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::i32);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<int32_t>(result_tensor);
+    const vector<int32_t> out{999, 20, 10, 30, -5, 6, 8, 9, 8};
+    ASSERT_EQ(cval, out);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_boolean_sum) {
+    const Shape data_shape{5};
+    const Shape indices_shape{6};
+    auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::SUM,
+                                                        true);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(fun->evaluate({result_tensor},
+                              {make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 0, 1, 0}),
+                               make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 1}),
+                               make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 0, 0, 1, 1, 1}),
+                               make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<char>(result_tensor);
+    const vector<char> out{1, 1, 0, 1, 1};
+    ASSERT_EQ(cval, out);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_boolean_sum_exclusive) {
+    const Shape data_shape{5};
+    const Shape indices_shape{6};
+    auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::SUM,
+                                                        false);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(fun->evaluate({result_tensor},
+                              {make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 1, 1, 0}),
+                               make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 4, 4, 0}),
+                               make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 1, 0, 1, 1, 1}),
+                               make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<char>(result_tensor);
+    const vector<char> out{1, 1, 0, 1, 1};
+    ASSERT_EQ(cval, out);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_boolean_prod) {
+    const Shape data_shape{5};
+    const Shape indices_shape{6};
+    auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::PROD,
+                                                        true);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(fun->evaluate({result_tensor},
+                              {make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 0, 1, 1}),
+                               make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 1}),
+                               make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 0, 1, 1, 0, 1}),
+                               make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<char>(result_tensor);
+    const vector<char> out{0, 0, 0, 1, 0};
+    ASSERT_EQ(cval, out);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_boolean_prod_exclusive) {
+    const Shape data_shape{5};
+    const Shape indices_shape{6};
+    auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::PROD,
+                                                        false);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(fun->evaluate({result_tensor},
+                              {make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 1, 1, 0}),
+                               make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 4, 4, 0}),
+                               make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 0, 1, 1, 1, 1}),
+                               make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<char>(result_tensor);
+    const vector<char> out{0, 0, 1, 1, 1};
+    ASSERT_EQ(cval, out);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_boolean_min) {
+    const Shape data_shape{6};
+    const Shape indices_shape{8};
+    auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::MIN,
+                                                        true);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(fun->evaluate({result_tensor},
+                              {make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 0, 1, 1, 0}),
+                               make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 4, 5, 5}),
+                               make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 0, 0, 1, 0, 1, 1, 0}),
+                               make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<char>(result_tensor);
+    const vector<char> out{0, 0, 0, 1, 0, 0};
+    ASSERT_EQ(cval, out);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_boolean_min_exclusive) {
+    const Shape data_shape{6};
+    const Shape indices_shape{8};
+    auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::MIN,
+                                                        false);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(fun->evaluate({result_tensor},
+                              {make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 1, 0, 1, 0}),
+                               make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 4, 5, 5}),
+                               make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 0, 1, 1, 0, 1, 1, 1}),
+                               make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<char>(result_tensor);
+    const vector<char> out{0, 0, 1, 1, 0, 1};
+    ASSERT_EQ(cval, out);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_boolean_max) {
+    const Shape data_shape{6};
+    const Shape indices_shape{8};
+    auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::MAX,
+                                                        true);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(fun->evaluate({result_tensor},
+                              {make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 0, 1, 1, 0}),
+                               make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 4, 5, 5}),
+                               make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 1, 0, 1, 0, 1, 0, 0}),
+                               make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<char>(result_tensor);
+    const vector<char> out{1, 1, 0, 1, 1, 0};
+    ASSERT_EQ(cval, out);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_boolean_max_exclusive) {
+    const Shape data_shape{6};
+    const Shape indices_shape{8};
+    auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::MAX,
+                                                        false);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(fun->evaluate({result_tensor},
+                              {make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 1, 0, 1, 0}),
+                               make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 4, 5, 5}),
+                               make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 1, 1, 0, 0, 1, 0, 0}),
+                               make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<char>(result_tensor);
+    const vector<char> out{0, 1, 1, 0, 1, 0};
+    ASSERT_EQ(cval, out);
+}
+
+TEST(eval, evaluate_static_scatter_elements_update_reduction_sum_negative_idx) {
+    const Shape data_shape{10};
+    const Shape indices_shape{4};
+    auto arg1 = make_shared<op::Parameter>(element::f32, data_shape);
+    auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
+    auto arg3 = make_shared<op::Parameter>(element::f32, indices_shape);
+    auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
+    auto scatter_elements_update =
+        make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
+                                                        arg2,
+                                                        arg3,
+                                                        arg4,
+                                                        ov::op::v12::ScatterElementsUpdate::Reduction::SUM);
+    auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
+    auto result_tensor = make_shared<HostTensor>();
+    ASSERT_TRUE(fun->evaluate(
+        {result_tensor},
+        {make_host_tensor<element::Type_t::f32>(data_shape,
+                                                {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}),
+         make_host_tensor<element::Type_t::i32>(indices_shape, {-5, 0, -3, -5}),
+         make_host_tensor<element::Type_t::f32>(indices_shape, {5.0f, 6.0f, 1.5f, -5.0f}),
+         make_host_tensor<element::Type_t::i64>({}, {0})}));
+    EXPECT_EQ(result_tensor->get_element_type(), element::f32);
+    EXPECT_EQ(result_tensor->get_shape(), data_shape);
+    const auto cval = read_vector<float>(result_tensor);
+    const vector<float> out{6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 8.5f, 8.0f, 9.0f};
+    ASSERT_EQ(cval, out);
+}
+
 TEST(eval, topk_v1) {
    Shape shape{2, 3, 2};
    Shape rshape{2, 2, 2};
--- a/src/core/tests/op_version_tbl.hpp
+++ b/src/core/tests/op_version_tbl.hpp
@ -67,7 +67,7 @@ _OPENVINO_OP_REG(Greater, ngraph::op::v1)
 _OPENVINO_OP_REG(GreaterEqual, ngraph::op::v1)
 _OPENVINO_OP_REG(GroupConvolution, ngraph::op::v1)
 _OPENVINO_OP_REG(GroupConvolutionBackpropData, ngraph::op::v1)
-_OPENVINO_OP_REG(GroupNormalization, ngraph::op::v12)
+_OPENVINO_OP_REG(GroupNormalization, ov::op::v12)
 _OPENVINO_OP_REG(HardSigmoid, ngraph::op::v0)
 _OPENVINO_OP_REG(Interpolate, ngraph::op::v0)
 _OPENVINO_OP_REG(Interpolate, ngraph::op::v4)
--- a/src/inference/include/ie/details/ie_so_pointer.hpp
+++ b/src/inference/include/ie/details/ie_so_pointer.hpp
@ -164,8 +164,8 @@ protected:
                if (sts != OK) {
                    IE_EXCEPTION_SWITCH(sts,
                                        ExceptionType,
-                                        InferenceEngine::details::ThrowNow<ExceptionType>{} <<=
-                                        std::stringstream{} << IE_LOCATION << desc.msg)
+                                        InferenceEngine::details::ThrowNow<ExceptionType>{IE_LOCATION_PARAM} <<=
+                                        std::stringstream{} << desc.msg)
                }
                IE_SUPPRESS_DEPRECATED_START
                _ptr = std::shared_ptr<T>(object, [](T* ptr) {
--- a/src/inference/include/ie/ie_common.h
+++ b/src/inference/include/ie/ie_common.h
@ -328,7 +328,14 @@ IE_SUPPRESS_DEPRECATED_END
 namespace details {
 template <typename ExceptionType>
 struct ExceptionTraits;
-}
+
+template <>
+struct INFERENCE_ENGINE_1_0_DEPRECATED ExceptionTraits<InferenceEngineException> {
+    static const char* string() {
+        return "";
+    }
+};
+}  // namespace details

 #define INFERENCE_ENGINE_DECLARE_EXCEPTION(ExceptionType, statusCode)                      \
    struct INFERENCE_ENGINE_1_0_DEPRECATED INFERENCE_ENGINE_API_CLASS(ExceptionType) final \
@ -400,20 +407,45 @@ namespace details {
 /**
 * @brief Tag struct used to throw exception
 */
+#ifndef NDEBUG
 template <typename ExceptionType>
 struct INFERENCE_ENGINE_1_0_DEPRECATED ThrowNow final {
-    [[noreturn]] void operator<<=(const std::ostream& ostream) {
-        std::ostringstream stream;
-        stream << ostream.rdbuf();
+    const char* const file;
+    const int line;
+
+    [[noreturn]] static void create(const std::ostream& ostream, const char* file, int line) {
+        std::stringstream stream;
+        stream << '\n' << file << ':' << line << ' ';
+        stream << ExceptionTraits<ExceptionType>::string() << ' ' << ostream.rdbuf();
        throw ExceptionType{stream.str()};
    }
+
+    [[noreturn]] void operator<<=(const std::ostream& ostream) {
+        create(ostream, file, line);
+    }
 };
+#else
+template <typename ExceptionType>
+struct INFERENCE_ENGINE_1_0_DEPRECATED ThrowNow final {
+    [[noreturn]] static void create(const std::ostream& ostream) {
+        std::stringstream stream;
+        stream << ExceptionTraits<ExceptionType>::string() << ' ' << ostream.rdbuf();
+        throw ExceptionType{stream.str()};
+    }
+
+    [[noreturn]] void operator<<=(const std::ostream& ostream) {
+        create(ostream);
+    }
+};
+#endif

 /// @cond
 #ifndef NDEBUG
-#    define IE_LOCATION '\n' << __FILE__ << ':' << __LINE__ << ' '
+#    define IE_LOCATION       '\n' << __FILE__ << ':' << __LINE__ << ' '
+#    define IE_LOCATION_PARAM __FILE__, __LINE__
 #else
 #    define IE_LOCATION ""
+#    define IE_LOCATION_PARAM
 #endif  // NDEBUG

 // WARNING: DO NOT USE THIS MACRO! Use openvino/util/pp.hpp macro library
@ -430,13 +462,10 @@ struct INFERENCE_ENGINE_1_0_DEPRECATED ThrowNow final {
 // ENDWARNING

 #define IE_THROW_0() \
-    InferenceEngine::details::ThrowNow<InferenceEngine::GeneralError>{} <<= std::stringstream{} << IE_LOCATION
+    (InferenceEngine::details::ThrowNow<InferenceEngine::GeneralError>{IE_LOCATION_PARAM}) <<= std::stringstream {}

-#define IE_THROW_1(ExceptionType)                                                                                  \
-    InferenceEngine::details::ThrowNow<InferenceEngine::ExceptionType>{} <<=                                       \
-        std::stringstream{} << IE_LOCATION                                                                         \
-                            << InferenceEngine::details::ExceptionTraits<InferenceEngine::ExceptionType>::string() \
-                            << ' '
+#define IE_THROW_1(ExceptionType) \
+    (InferenceEngine::details::ThrowNow<InferenceEngine::ExceptionType>{IE_LOCATION_PARAM}) <<= std::stringstream {}
 /// @endcond

 /**
@ -452,7 +481,7 @@ struct INFERENCE_ENGINE_1_0_DEPRECATED ThrowNow final {
 #ifdef NDEBUG
 #    define IE_ASSERT(EXPRESSION) \
        if (!(EXPRESSION))        \
-        IE_THROW(GeneralError) << " AssertionFailed: " << #EXPRESSION
+        IE_THROW(GeneralError) << " AssertionError " #EXPRESSION
 #else
 /**
 * @private
@ -470,9 +499,9 @@ struct NullStream {
 #endif  // NDEBUG

 /// @cond
-#define THROW_IE_EXCEPTION                                                                                           \
-    InferenceEngine::details::ThrowNow<InferenceEngine::details::InferenceEngineException>{} <<= std::stringstream{} \
-                                                                                                 << IE_LOCATION
+#define THROW_IE_EXCEPTION                                                                                          \
+    (InferenceEngine::details::ThrowNow<InferenceEngine::details::InferenceEngineException>{IE_LOCATION_PARAM}) <<= \
+        std::stringstream {}

 #define IE_EXCEPTION_CASE(TYPE_ALIAS, STATUS_CODE, EXCEPTION_TYPE, ...) \
    case InferenceEngine::STATUS_CODE: {                                \
--- a/src/inference/src/cpp/exception2status.hpp
+++ b/src/inference/src/cpp/exception2status.hpp
@ -59,15 +59,16 @@ namespace InferenceEngine {
    }

 #define CALL_STATUS_FNC(function, ...)                     \
-    if (!actual)                                           \
+    if (!actual) {                                         \
        IE_THROW() << "Wrapper used was not initialized."; \
+    }                                                      \
    ResponseDesc resp;                                     \
    auto res = actual->function(__VA_ARGS__, &resp);       \
    if (res != OK)                                         \
    IE_EXCEPTION_SWITCH(                                   \
        res,                                               \
        ExceptionType,                                     \
-        InferenceEngine::details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << IE_LOCATION << resp.msg)
+        (InferenceEngine::details::ThrowNow<ExceptionType>{IE_LOCATION_PARAM}) <<= std::stringstream{} << resp.msg)

 #define CALL_STATUS_FNC_NO_ARGS(function)                                                 \
    if (!actual)                                                                          \
@ -75,8 +76,9 @@ namespace InferenceEngine {
    ResponseDesc resp;                                                                    \
    auto res = actual->function(&resp);                                                   \
    if (res != OK)                                                                        \
-    IE_EXCEPTION_SWITCH(res,                                                              \
-                        ExceptionType,                                                    \
-                        InferenceEngine::details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << IE_LOCATION)
+    IE_EXCEPTION_SWITCH(                                                                  \
+        res,                                                                              \
+        ExceptionType,                                                                    \
+        (InferenceEngine::details::ThrowNow<ExceptionType>{IE_LOCATION_PARAM}) <<= std::stringstream{})

 }  // namespace InferenceEngine
--- a/src/inference/src/cpp/ie_extension.cpp
+++ b/src/inference/src/cpp/ie_extension.cpp
@ -31,10 +31,9 @@ std::shared_ptr<T> CreateExtensionFromLibrary(std::shared_ptr<void> _so) {
            ResponseDesc desc;
            StatusCode sts = reinterpret_cast<CreateF*>(create)(object, &desc);
            if (sts != OK) {
-                IE_EXCEPTION_SWITCH(
-                    sts,
-                    ExceptionType,
-                    details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << IE_LOCATION << desc.msg)
+                IE_EXCEPTION_SWITCH(sts,
+                                    ExceptionType,
+                                    details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << desc.msg)
            }
            IE_SUPPRESS_DEPRECATED_START
            _ptr = std::shared_ptr<T>(object, [](T* ptr) {
--- a/src/inference/tests/unit/cpp_interfaces/exception_test.cpp
+++ b/src/inference/tests/unit/cpp_interfaces/exception_test.cpp
@ -18,14 +18,14 @@ public:
        TO_STATUS(IE_EXCEPTION_SWITCH(
            statusCode,
            ExceptionType,
-            InferenceEngine::details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << IE_LOCATION))
+            InferenceEngine::details::ThrowNow<ExceptionType>{IE_LOCATION_PARAM} <<= std::stringstream{}))
    }

    static InferenceEngine::StatusCode toStatusWrapperMsg(std::string& msg, InferenceEngine::ResponseDesc* resp) {
        TO_STATUS(IE_EXCEPTION_SWITCH(
            statusCode,
            ExceptionType,
-            InferenceEngine::details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << IE_LOCATION << msg))
+            InferenceEngine::details::ThrowNow<ExceptionType>{IE_LOCATION_PARAM} <<= std::stringstream{} << msg))
    }
 };

@ -72,7 +72,7 @@ TEST_F(ExceptionTests, throwAfterConvertStatusToClassContainMessage) {
    std::string refMessage = "Exception message!";
    auto actual = std::make_shared<WrapperClass<StatusCode::NOT_ALLOCATED>>();
    try {
-        CALL_STATUS_FNC(toStatusWrapperMsg, refMessage)
+        CALL_STATUS_FNC(toStatusWrapperMsg, refMessage);
    } catch (const NotAllocated& iex) {
        std::string actualMessage = iex.what();
        ASSERT_TRUE(actualMessage.find(refMessage) != std::string::npos);
--- a/src/inference/tests/unit/ie_exception_test.cpp
+++ b/src/inference/tests/unit/ie_exception_test.cpp
@ -36,7 +36,8 @@ TEST(ExceptionTests, ExceptionShowsCorrectMessageDebugVersion) {
        lineNum = __LINE__ + 1;
        IE_THROW() << message;
    } catch (InferenceEngine::Exception& iex) {
-        std::string ref_message = std::string{"\n"} + __FILE__ + ":" + std::to_string(lineNum) + " " + message;
+        std::string ref_message =
+            std::string{"\n"} + __FILE__ + ":" + std::to_string(lineNum) + " [ GENERAL_ERROR ] " + message;
        ASSERT_STREQ(iex.what(), ref_message.c_str());
    }
 }
@ -46,7 +47,7 @@ TEST(ExceptionTests, ExceptionShowsCorrectMessageReleaseVersion) {
    try {
        IE_THROW() << message;
    } catch (InferenceEngine::Exception& iex) {
-        std::string ref_message = message;
+        std::string ref_message = "[ GENERAL_ERROR ] " + message;
        ASSERT_STREQ(iex.what(), ref_message.c_str());
    }
 }
--- a/src/plugins/auto/src/common.hpp
+++ b/src/plugins/auto/src/common.hpp
@ -92,9 +92,9 @@ struct DeviceInformation {
    DeviceName unique_name;
    unsigned int device_priority;
    DeviceInformation(DeviceName dn = {}, ov::AnyMap conf = {},
-        int nReq = -1, std::string defaultID = {}, DeviceName uName = {}, unsigned int priority = 0)
-        : device_name(dn), config(conf),
-        num_requests_per_devices(nReq), default_device_id(defaultID), unique_name(uName), device_priority(priority)
+        int n_req = -1, std::string default_id = {}, DeviceName name = {}, unsigned int priority = 0)
+        : device_name(std::move(dn)), config(std::move(conf)),
+        num_requests_per_devices(n_req), default_device_id(std::move(default_id)), unique_name(std::move(name)), device_priority(priority)
        {}
 };

--- a/src/plugins/auto/src/plugin.cpp
+++ b/src/plugins/auto/src/plugin.cpp
@ -282,8 +282,7 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
        auto ret = m_plugin_config.supported_properties(get_device_name());
        return ret;
    } else if (name == ov::device::full_name) {
-        std::string device_name = { get_device_name() };
-        return decltype(ov::device::full_name)::value_type {device_name};
+        return decltype(ov::device::full_name)::value_type {get_device_name()};
    } else if (name == ov::device::capabilities.name()) {
        auto device_list = get_core()->get_available_devices();
        std::vector<std::string> capabilities;
@ -538,7 +537,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
    queryconfig.apply_user_properties();
    auto full_property = queryconfig.get_full_properties();
    auto priorities = full_property.find(ov::device::priorities.name());
-    if (!priorities->second.empty()) {
+    if (priorities!= full_property.end() && !priorities->second.empty()) {
        auto meta_devices = parse_meta_devices(priorities->second.as<std::string>(), full_property);
        std::unordered_set<std::string> supported_layers;
        for (auto&& value : meta_devices) {
--- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp
@ -701,8 +701,8 @@ void StoreConvertEmitter::emit_isa(const std::vector<size_t> &in, const std::vec
 void StoreConvertEmitter::emit_data() const {
    store_emitter->emit_data();
 }
-size_t BrgemmEmitter::getBrgIdx(size_t mIdx, size_t kIdx, size_t nIdx) const {
-    return mIdx * 4 + kIdx * 2 + nIdx;
+size_t BrgemmEmitter::getBrgIdx(size_t kIdx, size_t nIdx) const {
+    return kIdx * 2 + nIdx;
 }
 BrgemmEmitter::BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa,
                             const std::shared_ptr<ov::Node>& node) : jit_emitter(h, isa, node) {
@ -758,10 +758,8 @@ BrgemmEmitter::BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl:
        return std::distance(layout.begin(), std::find(layout.begin(), layout.end(), idx));
    };

-    m_M = C_shape[get_ordered_idx(C_layout, C_layout.size() - 2)];
    m_K = A_shape[get_ordered_idx(A_layout, A_layout.size() - 1)];
-    m_M_blk = matmulOptimalM;
-    m_M_tail = m_M % m_M_blk;
+    m_M = brgemm_node->get_input_count(0);
    m_N = C_shape[get_ordered_idx(C_layout, C_layout.size() - 1)];

    auto brg0Prc = InferenceEngine::details::convertPrecision(brgemm_node->get_input_element_type(0));
@ -780,34 +778,28 @@ BrgemmEmitter::BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl:
                         : m_K;
    m_K_tail = m_K % m_K_blk;

-    size_t brg0BaseIdx = std::numeric_limits<size_t>::max();
-    for (size_t m = 0; m < 2; m++) {
-        for (size_t k = 0; k < 2; k++) {
-            for (size_t n = 0; n < 2; n++) {
-                auto& brgemmCtx = m_brgCtxs0[getBrgIdx(m, k, n)];
+    for (size_t k = 0; k < 2; k++) {
+        for (size_t n = 0; n < 2; n++) {
+            auto& brgemmCtx = m_brgCtxs0[getBrgIdx(k, n)];

-                auto M_ = m ? m_M_tail
-                            : m_M < m_M_blk ? 0 : m_M_blk;
-                auto N_ = n ? m_N_tail : m_N - m_N_tail;
-                auto K_ = k ? m_K_tail : m_K - m_K_tail;
-                auto beta = k && m_brgCtxs0[getBrgIdx(m, 0, n)].K != 0 ? 1.0f : 0.0f;
+            auto M_ = m_M;
+            auto N_ = n ? m_N_tail : m_N - m_N_tail;
+            auto K_ = k ? m_K_tail : m_K - m_K_tail;
+            auto beta = k && m_brgCtxs0[getBrgIdx(0, n)].K != 0 ? 1.0f : 0.0f;

-                brgemmCtx.M = M_;
-                brgemmCtx.N = N_;
-                brgemmCtx.K = K_;
-                brgemmCtx.LDA = leading_dimensions[0];
-                brgemmCtx.LDB = brgemm_node->is_with_data_repacking() ? rnd_up(m_N, m_N_blk) : leading_dimensions[1];
-                brgemmCtx.LDC = leading_dimensions[2];
-                brgemmCtx.dt_in0 = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::IEPrecisionToDataType(brg0Prc));
-                brgemmCtx.dt_in1 = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::IEPrecisionToDataType(brg1Prc));
-                brgemmCtx.beta = beta;
+            brgemmCtx.M = M_;
+            brgemmCtx.N = N_;
+            brgemmCtx.K = K_;
+            brgemmCtx.LDA = leading_dimensions[0];
+            brgemmCtx.LDB = brgemm_node->is_with_data_repacking() ? rnd_up(m_N, m_N_blk) : leading_dimensions[1];
+            brgemmCtx.LDC = leading_dimensions[2];
+            brgemmCtx.dt_in0 = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::IEPrecisionToDataType(brg0Prc));
+            brgemmCtx.dt_in1 = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::IEPrecisionToDataType(brg1Prc));
+            brgemmCtx.beta = beta;

-                // don't create brgemm kernels for empty tiles
-                if (M_ != 0 && K_ != 0 && N_ != 0) {
-                    if (brg0BaseIdx == std::numeric_limits<size_t>::max())
-                        brg0BaseIdx = getBrgIdx(m, k, n);
-                    initBrgemm(brgemmCtx, m_brgKernels0[getBrgIdx(m, k, n)], brgWithAMX);
-                }
+            // don't create brgemm kernels for empty tiles
+            if (M_ != 0 && K_ != 0 && N_ != 0) {
+                initBrgemm(brgemmCtx, m_brgKernels0[getBrgIdx(k, n)], brgWithAMX);
            }
        }
    }
@ -878,36 +870,31 @@ void BrgemmEmitter::emit_impl(const std::vector<size_t>& in,
        }
        Xbyak::Reg64 output_0(static_cast<int>(out[0]));

-        for (size_t mb = 0; mb < div_up(m_M, m_M_blk); mb++) {
-            const bool is_M_tail = (m_M - mb * m_M_blk < m_M_blk);
+        size_t brgIdx0 = getBrgIdx(0, 0);
+        size_t K0_step0 = m_brgCtxs0[brgIdx0].K;
+        size_t K0_step1 = m_brgCtxs0[brgIdx0].K * m_brgCtxs0[brgIdx0].LDB;
+        size_t N0_step0 = m_brgCtxs0[brgIdx0].N * m_brg0VnniFactor;
+        size_t N0_step1 = m_brgCtxs0[brgIdx0].N;
+        for (size_t n = 0; n < 2; n++) {
+            for (size_t k = 0; k < 2; k++) {
+                auto& brgemmCtx = m_brgCtxs0[getBrgIdx(k, n)];

-            size_t brgIdx0 = getBrgIdx(0, 0, 0);
-            size_t K0_step0 = m_brgCtxs0[brgIdx0].K;
-            size_t K0_step1 = m_brgCtxs0[brgIdx0].K * m_brgCtxs0[brgIdx0].LDB;
-            size_t N0_step0 = m_brgCtxs0[brgIdx0].N * m_brg0VnniFactor;
-            size_t N0_step1 = m_brgCtxs0[brgIdx0].N;
-            for (size_t n = 0; n < 2; n++) {
-                for (size_t k = 0; k < 2; k++) {
-                    size_t mIdx = is_M_tail ? 1 : 0;
-                    auto& brgemmCtx = m_brgCtxs0[getBrgIdx(mIdx, k, n)];
+                if (brgemmCtx.K != 0 && brgemmCtx.N != 0) {
+                    const size_t in0_offset = m_load_offset_a + k * K0_step0 * io_data_size[0];
+                    const size_t in1_offset = m_load_offset_b + (k * K0_step1 + n * N0_step0) * io_data_size[1];
+                    const size_t in2_offset = m_load_offset_scratch + (m_with_comp ? n * N0_step1 * sizeof(int32_t) : 0);
+                    const size_t out0_offset = m_store_offset_c + n * N0_step1 * io_data_size[2];

-                    if (brgemmCtx.K != 0 && brgemmCtx.N != 0) {
-                        const size_t in0_offset = m_load_offset_a + (k * K0_step0 + mb * m_M_blk * brgemmCtx.LDA) * io_data_size[0];
-                        const size_t in1_offset = m_load_offset_b + (k * K0_step1 + n * N0_step0) * io_data_size[1];
-                        const size_t in2_offset = m_load_offset_scratch + (m_with_comp ? n * N0_step1 * sizeof(int32_t) : 0);
-                        const size_t out0_offset = m_store_offset_c + (n * N0_step1 + mb * m_M_blk * brgemmCtx.LDC) * io_data_size[2];
-
-                        emit_brgemm_kernel_call(m_brgKernels0[getBrgIdx(mIdx, k, n)].get(),
-                                                brgemmCtx,
-                                                input_0,
-                                                input_1,
-                                                input_2,
-                                                output_0,
-                                                in0_offset,
-                                                in1_offset,
-                                                in2_offset,
-                                                out0_offset);
-                    }
+                    emit_brgemm_kernel_call(m_brgKernels0[getBrgIdx(k, n)].get(),
+                                            brgemmCtx,
+                                            input_0,
+                                            input_1,
+                                            input_2,
+                                            output_0,
+                                            in0_offset,
+                                            in1_offset,
+                                            in2_offset,
+                                            out0_offset);
                }
            }
        }
--- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.hpp
+++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.hpp
@ -353,7 +353,7 @@ private:
        float beta;
    };
    void initBrgemm(brgemmCtx& ctx, std::unique_ptr<dnnl::impl::cpu::x64::brgemm_kernel_t>& brgKernel, bool use_amx) const;
-    size_t getBrgIdx(size_t mIdx, size_t kIdx, size_t nIdx) const;
+    size_t getBrgIdx(size_t kIdx, size_t nIdx) const;

    void emit_brgemm_kernel_call(const dnnl::impl::cpu::x64::brgemm_kernel_t* brg_kernel, const brgemmCtx& ctx,
                                 Xbyak::Reg64 addr_A, Xbyak::Reg64 addr_B, Xbyak::Reg64 scratch, Xbyak::Reg64 addr_C,
@ -362,11 +362,10 @@ private:
    static void kernel_execute(const dnnl::impl::cpu::x64::brgemm_kernel_t *brg_kernel, const void *A, const void *B, void *C, void *scratch, int with_comp);

    static constexpr size_t BRGEMM_KERNELS_NUM = 8;
-    static constexpr size_t matmulOptimalM = 32;
    brgemmCtx m_brgCtxs0[BRGEMM_KERNELS_NUM];
    std::unique_ptr<dnnl::impl::cpu::x64::brgemm_kernel_t> m_brgKernels0[BRGEMM_KERNELS_NUM];

-    size_t m_M, m_M_blk, m_M_tail;
+    size_t m_M;
    size_t m_K, m_K_blk, m_K_tail;
    size_t m_N, m_N_blk, m_N_tail;
    size_t m_brg0VnniFactor;
--- a/src/plugins/intel_cpu/src/graph_optimizer.cpp
+++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp
@ -1996,7 +1996,15 @@ void GraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(Graph &graph) {

        const auto &outputShape = child->getOutputShapeAtPort(0);
        VectorDims outputDims = outputShape.getDims();
-        const auto channelPos = parent->getParentEdgeAt(0)->getParent()->getFusingAxis();
+
+        // We need to compute explicitly port with unfolded parent,
+        // because there is no guarantee, that the order of operands will be invariant
+        // (i.e. zero) after all transformations, which may cause wrong channel-dim in
+        // [Const-Schift -> Add <- Mul] topology with constant-folded schift,
+        // (Const node return 1 by default as channel dim.)
+        // Look into FQScaleshiftWithConstantShift test
+        const auto nonConstPort = (parent->getParentEdgeAt(0)->getParent()->isConstant() ? 1 : 0);
+        const auto channelPos = parent->getParentEdgeAt(nonConstPort)->getParent()->getFusingAxis();

        if (outputShape.isDynamic()) {
            if (outputDims[channelPos] == Shape::UNDEFINED_DIM) {
--- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp
+++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp
@ -25,6 +25,7 @@
 #include "utils/cpu_utils.hpp"
 #include "emitters/x64/cpu_generator.hpp"
 #include "transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.hpp"
+#include "transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp"
 #include "transformations/snippets/x64/pass/mul_add_to_fma.hpp"
 #include "transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.hpp"
 #include "transformations/snippets/x64/pass/remove_converts.hpp"
@ -564,6 +565,9 @@ void Snippet::generate(const jit_snippets_compile_args* jcp) {
    CPU_REGISTER_PASS_X64(post_precision, ov::intel_cpu::pass::RemoveConverts);
    CPU_REGISTER_PASS_X64(post_precision, ov::intel_cpu::pass::MulAddToFMA);

+    ov::snippets::lowered::pass::PassPipeline control_flow_markup_pipeline;
+    CPU_REGISTER_PASS_X64(control_flow_markup_pipeline, ov::intel_cpu::pass::BrgemmBlocking);
+
    ov::snippets::lowered::pass::PassPipeline control_flow_pipeline;
    CPU_REGISTER_PASS_X64(control_flow_pipeline, ov::intel_cpu::pass::FuseLoadStoreConvert);

@ -571,6 +575,7 @@ void Snippet::generate(const jit_snippets_compile_args* jcp) {
        pre_dialect,
        post_dialect,
        post_precision,
+        control_flow_markup_pipeline,
        control_flow_pipeline,
        reinterpret_cast<const void*>(jcp));
 }
--- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp
+++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp
@ -0,0 +1,80 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "brgemm_blocking.hpp"
+
+#include "openvino/pass/pattern/matcher.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "snippets/itt.hpp"
+#include "snippets/lowered/linear_ir.hpp"
+#include "snippets/lowered/loop_manager.hpp"
+#include "snippets/snippets_isa.hpp"
+#include "transformations/snippets/x64/op/brgemm_cpu.hpp"
+
+
+namespace ov {
+namespace intel_cpu {
+namespace pass {
+using LoopManager = snippets::lowered::LinearIR::LoopManager;
+using LoopInfoPtr = LoopManager::LoopInfoPtr;
+using LoopPort = LoopManager::LoopPort;
+
+BrgemmBlocking::BrgemmBlocking() : Pass() {}
+
+bool BrgemmBlocking::run(snippets::lowered::LinearIR& linear_ir) {
+    OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::BrgemmBlocking")
+    if (linear_ir.empty())
+        return false;
+
+    // Ticket: 113745
+    // TODO: make the block size configurable
+    const auto block_size = 32;
+    const auto dim_idx = 1;
+
+    const auto& loop_manager = linear_ir.get_loop_manager();
+
+    auto blocking_loop_exists = [&](const ov::snippets::lowered::ExpressionPtr& expr,
+                                    const std::shared_ptr<ov::intel_cpu::BrgemmCPU>& brgemm) {
+        const auto& loop_ids = expr->get_loop_ids();
+        for (const auto& id : loop_ids) {
+            const auto loop = loop_manager->get_loop_info(id);
+            if (loop->dim_idx == dim_idx) {
+                OPENVINO_ASSERT(brgemm->get_input_count(0) == loop->increment,
+                                "Brgemm ", brgemm, " has input count (", brgemm->get_input_count(0),
+                                ") which doesn't match the increment(", loop->increment, ") of loop by M");
+                return true;
+            }
+        }
+        return false;
+    };
+
+    bool modified = false;
+    for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) {
+        const auto& expr = *expr_it;
+        const auto brgemm = ov::as_type_ptr<ov::intel_cpu::BrgemmCPU>(expr->get_node());
+        if (!brgemm || blocking_loop_exists(expr, brgemm))
+            continue;
+
+        const auto& input_shape_0 = expr->get_input_port_descriptor(0)->get_shape();
+        const auto& input_layout_0 = expr->get_input_port_descriptor(0)->get_layout();
+        const auto& dim = *(input_layout_0.rbegin() + dim_idx);
+        const auto& m = input_shape_0[dim];
+
+        brgemm->set_input_count(block_size);
+
+        const auto work_amount = m;
+        const auto increment = block_size;
+
+        std::vector<LoopPort> entries{LoopPort(expr->get_input_port(0), true), LoopPort(expr->get_input_port(1), false)};
+        if (brgemm->is_with_scratchpad())
+            entries.emplace_back(expr->get_input_port(2), false);
+        std::vector<LoopPort> exits{LoopPort(expr->get_output_port(0), true)};
+        loop_manager->mark_loop(expr_it, std::next(expr_it), work_amount, increment, dim_idx, entries, exits);
+    }
+
+    return modified;
+}
+}  // namespace pass
+}  // namespace intel_cpu
+}  // namespace ov
--- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp
+++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp
@ -0,0 +1,28 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "snippets/lowered/pass/pass.hpp"
+
+namespace ov {
+namespace intel_cpu {
+namespace pass {
+
+/**
+ * @interface BrgemmBlocking
+ * @brief Covers BrgemmCPU with blocking loop by M
+ * @ingroup snippets
+ */
+
+class BrgemmBlocking : public snippets::lowered::pass::Pass {
+public:
+    OPENVINO_RTTI("BrgemmBlocking", "Pass")
+    BrgemmBlocking();
+    bool run(snippets::lowered::LinearIR& linear_ir) override;
+};
+
+}  // namespace pass
+}  // namespace intel_cpu
+}  // namespace ov
--- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.hpp
+++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.hpp
@ -21,7 +21,7 @@ namespace pass {
 class FuseLoadStoreConvert: public snippets::lowered::pass::Pass {
 public:
    FuseLoadStoreConvert() = default;
-    OPENVINO_RTTI("FuseLoadStoreConvert", "LinearIRTransformation");
+    OPENVINO_RTTI("FuseLoadStoreConvert", "Pass");
    bool run(snippets::lowered::LinearIR& linear_ir) override;

 private:
--- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fq_fused_with_ss.cpp
+++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/fq_fused_with_ss.cpp
@ -0,0 +1,89 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace SubgraphTestsDefinitions {
+
+using FQScaleshiftWithConstantShiftTestParams = Precision;
+
+class FQScaleshiftWithConstantShiftTest : public testing::WithParamInterface<FQScaleshiftWithConstantShiftTestParams>,
+                                       public CPUTestsBase,
+                                       virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<FQScaleshiftWithConstantShiftTestParams> obj) {
+        Precision netPrecision;
+        netPrecision = obj.param;
+        std::ostringstream result;
+        result << "netPRC=" << netPrecision.name() << "_";
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+        Precision netPrecision;
+        netPrecision = this->GetParam();
+        const auto ngPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+        std::vector<SizeVector> mmShape{{25, 14, 14, 768}};
+        SizeVector mmShape2{768, 2304};
+        SizeVector sumShape{1, 1, 1, 2304};
+
+        // avoid eliminations
+        std::vector<int> mmInData(768 * 2304);
+        std::fill(mmInData.begin(), mmInData.end(), 2);
+        mmInData[0] = 1;
+        std::vector<int> sumConstData(1 * 1 * 1 * 2304);
+        std::iota(sumConstData.begin(), sumConstData.end(), 0);
+
+        auto constShift = ngraph::opset5::Constant::create(ngraph::element::f32, sumShape, sumConstData);
+        auto mmConst = ngraph::opset5::Constant::create(ngraph::element::f32, mmShape2, mmInData);
+        auto mmParams = builder::makeParams(ngPrec, {mmShape});
+        const auto mmOutputNodes = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(mmParams));
+
+        const auto mm = builder::makeMatMul(mmOutputNodes[0], mmConst, false, false);
+        auto sum = ngraph::builder::makeEltwise(constShift, mm, ngraph::helpers::EltwiseTypes::ADD);
+        auto fq = ngraph::builder::makeFakeQuantize(sum, ngraph::element::f32, 256, {}, {-8.0f}, {7.0f}, {-8.0f}, {7.0f});
+
+        ngraph::ParameterVector inputParams = {mmParams[0]};
+        function = makeNgraphFunction(ngPrec, inputParams, fq, "FQScaleshiftWithConstantShift");
+    }
+};
+
+/* Network with SS subgraph and FQ node. Shift in SS is constant-folded.
+ * Test that FQ-SS fusing works correctly while comparing SS and FQ channel dims.
+     Input         Const
+          \       /
+           \     /
+            \   /
+            MatMul      Const
+               \         /
+                \       /
+                 \     /
+                   Add
+                    |
+                    |
+                   FQ
+                    |
+                    |
+                 Output
+*/
+
+TEST_P(FQScaleshiftWithConstantShiftTest, CompareWithRefs) {
+    Run();
+}
+
+namespace {
+INSTANTIATE_TEST_SUITE_P(smoke_Check, FQScaleshiftWithConstantShiftTest,
+                         ::testing::Values(Precision::FP32),
+                         FQScaleshiftWithConstantShiftTest::getTestCaseName);
+} // namespace
+} // namespace SubgraphTestsDefinitions
--- a/src/plugins/intel_gna/src/backend/gna_limitations.cpp
+++ b/src/plugins/intel_gna/src/backend/gna_limitations.cpp
@ -18,6 +18,7 @@
 #include "gna_lib_ver_selector.hpp"
 #include "ie_ngraph_utils.hpp"
 #include "log/log.hpp"
+#include "openvino/opsets/opset12.hpp"

 namespace std {
 inline std::ostream& operator<<(std::ostream& os, const std::set<ov::element::Type>& t) {
@ -35,6 +36,7 @@ inline std::ostream& operator<<(std::ostream& os, const std::set<ov::element::Ty
 namespace ov {
 namespace intel_gna {
 using namespace target;
+using namespace opset12;
 namespace limitations {

 class SupportedElementTypes {
@ -689,22 +691,6 @@ void Limitations::init(const DeviceVersion& compile_target) {
    k_instance = std::shared_ptr<Limitations>(new Limitations(compile_target));
 }

-bool Limitations::is_transpose_2d(const std::vector<size_t>& shape) {
-    return std::count_if(std::begin(shape), std::end(shape), [](size_t dim) {
-               return dim != 1;
-           }) == 2;
-}
-
-bool Limitations::is_transpose_supported(const std::vector<size_t>& shape) {
-    if (!is_transpose_2d(shape))
-        return false;
-    auto shape_no_1 = shape;
-    shape_no_1.erase(std::remove(shape_no_1.begin(), shape_no_1.end(), 1), shape_no_1.end());
-    size_t min, max;
-    std::tie(min, max) = std::minmax(shape_no_1[0], shape_no_1[1]);
-    return min <= 8 && max % 8 == 0 && max >= 8 && max <= kTransposeMaxSize;
-}
-
 size_t Limitations::get_min_batch_to_fit_in_buffer(InferenceEngine::DataPtr input) {
    auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims()));
    return total_size / kBufferMaxSize + 1;
@ -753,31 +739,38 @@ bool SupportedElementTypes::IsConstantTypeSupported(ov::element::Type elem_type,
    return true;
 }

-bool Limitations::is_transpose_supported(const std::shared_ptr<const ov::Node>& node) {
-    OPENVINO_ASSERT(node, "Transpose node is empty!");
-    const ov::Shape squeezed_shape = graph_utils::squeeze_shape(node->get_input_shape(0));
-    const size_t min_input_dim = std::min(squeezed_shape[0], squeezed_shape[1]);
-    const size_t max_input_dim = std::max(squeezed_shape[0], squeezed_shape[1]);
+bool Limitations::is_transpose_supported(const ov::Shape& shape) {
+    const ov::Shape squeezed_shape = graph_utils::squeeze_shape(shape);

    // GNA transpose limitations:
    // - supports 2d transposes only
    // - smaller dimension should be less or equal to 8
-    // - bigger dimension should be a multiple of Limitations::kNoOfInputsDivisor
-    if (squeezed_shape.size() == 2 && min_input_dim <= 8 && ALIGN(max_input_dim, kNoOfInputsDivisor) == max_input_dim) {
-        return true;
+    // - bigger dimension should be a multiple of limitations::noOfInputsDivisor
+    if (squeezed_shape.size() == 2) {
+        const size_t min_input_dim = std::min(squeezed_shape[0], squeezed_shape[1]);
+        const size_t max_input_dim = std::max(squeezed_shape[0], squeezed_shape[1]);
+        if (min_input_dim <= 8 && max_input_dim % Limitations::kNoOfInputsDivisor == 0 &&
+            max_input_dim <= kTransposeMaxSize) {
+            return true;
+        }
    }
    return false;
 }

-bool Limitations::is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
+bool Limitations::is_transpose_supported(const std::shared_ptr<const ov::Node>& node) {
+    OPENVINO_ASSERT(node, "Transpose node is empty!");
+    return is_transpose_supported(node->get_input_shape(0));
+}
+
+bool Limitations::is_conv_supported(const std::shared_ptr<ov::intel_gna::op::GNAConvolution>& conv_gna,
                                    const InferenceEngine::Precision gna_precision,
                                    bool is_exception_allowed) {
-    OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!");
-    size_t batch_size = conv_ie->input_value(0).get_shape()[0];
+    OPENVINO_ASSERT(conv_gna, "GNAConvolution node is empty!");
+    size_t batch_size = conv_gna->input_value(0).get_shape()[0];
    if (batch_size != 1) {
        if (is_exception_allowed) {
-            THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() +
-                                       ", type: " + conv_ie->get_type_name() + ", and batch size(" +
+            THROW_GNA_EXCEPTION << "topology with layer: " + conv_gna->get_friendly_name() +
+                                       ", type: " + conv_gna->get_type_name() + ", and batch size(" +
                                       std::to_string(batch_size) + ") != 1 not supported";
        }
        return false;
@ -789,15 +782,15 @@ bool Limitations::is_conv_supported(const std::shared_ptr<ngraph::op::Convolutio
                                                           static_cast<uint32_t>(filter_stride_width));
        return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed,
                                                             error,
-                                                             conv_ie->get_friendly_name(),
-                                                             conv_ie->get_type_name());
+                                                             conv_gna->get_friendly_name(),
+                                                             conv_gna->get_type_name());
    };
-    auto input_shape = conv_ie->input_value(0).get_shape();
-    auto filter_shape = conv_ie->input_value(1).get_shape();
-    if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) ||
-        (4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) {
+    auto input_shape = conv_gna->input_value(0).get_shape();
+    auto filter_shape = conv_gna->input_value(1).get_shape();
+    if ((4 == filter_shape.size() && filter_shape[1] > 1 && filter_shape[2] > 1) ||
+        (4 == input_shape.size() && input_shape[1] > 1 && input_shape[2] > 1)) {
        pass::helper::ConvData conv_data;
-        pass::helper::GetConvData(conv_ie, conv_data);
+        pass::helper::GetConvData(conv_gna, conv_data);
        if (gna_convolution_layer::isMappableFrom2DTo1D(static_cast<uint32_t>(conv_data.input_height),
                                                        static_cast<uint32_t>(conv_data.input_width),
                                                        static_cast<uint32_t>(conv_data.input_channel_count),
@ -809,7 +802,7 @@ bool Limitations::is_conv_supported(const std::shared_ptr<ngraph::op::Convolutio
        }

        if (m_cnn_validator) {
-            return m_cnn_validator->ValidateCnn2D(conv_ie->get_friendly_name(),
+            return m_cnn_validator->ValidateCnn2D(conv_gna->get_friendly_name(),
                                                  static_cast<uint32_t>(conv_data.input_height),
                                                  static_cast<uint32_t>(conv_data.input_width),
                                                  static_cast<uint32_t>(conv_data.input_channel_count),
@ -824,10 +817,12 @@ bool Limitations::is_conv_supported(const std::shared_ptr<ngraph::op::Convolutio
                                                  is_exception_allowed);
        }
    }
-    return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]);
+
+    return check_dilation(conv_gna->get_dilations()[0],
+                          conv_gna->get_dilations()[conv_gna->get_dilations().size() - 1]);
 }

-bool Limitations::is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
+bool Limitations::is_pooling_supported(const std::shared_ptr<ov::intel_gna::op::GNAMaxPool> max_pool,
                                       bool is_exception_allowed) {
    OPENVINO_ASSERT(max_pool, "MaxPool node is empty!");
    auto kernels = max_pool->get_kernel();
@ -869,6 +864,100 @@ bool Limitations::is_split_supported(const std::shared_ptr<ov::Node>& node, bool
    return is_aligned;
 }

+bool Limitations::is_concat_supported(const std::shared_ptr<const ov::Node>& node) {
+    OPENVINO_ASSERT(node, "Concat node is empty!");
+    auto concat_node = std::dynamic_pointer_cast<const Concat>(node);
+    const ov::Shape& output_shape = concat_node->get_output_shape(0);
+    auto axis = concat_node->get_axis();
+
+    return graph_utils::get_first_valuable_dim_id(output_shape) == axis;
+}
+
+bool Limitations::is_forward_transposed_concat_supported(const std::shared_ptr<const ov::Node>& node,
+                                                         const AxisVector& order) {
+    auto concat_node = std::dynamic_pointer_cast<const Concat>(node);
+    if (!concat_node) {
+        log::debug() << "Concat node is empty!" << std::endl;
+        return false;
+    }
+
+    const ov::Shape& output_shape = concat_node->get_output_shape(0);
+    auto axis = concat_node->get_axis();
+
+    const ov::Shape& transposed_shape =
+        graph_utils::transpose_shape(output_shape, pass::helper::reverse_transpose_order(order));
+    const size_t transposed_concat_axis = order[axis];
+
+    return graph_utils::get_first_valuable_dim_id(transposed_shape) == static_cast<int64_t>(transposed_concat_axis);
+}
+
+bool Limitations::is_backward_transposed_concat_supported(const std::shared_ptr<const ov::Node>& node,
+                                                          const AxisVector& order) {
+    auto concat_node = std::dynamic_pointer_cast<const Concat>(node);
+    if (!concat_node) {
+        log::debug() << "Concat node is empty!" << std::endl;
+        return false;
+    }
+
+    const ov::Shape& output_shape = concat_node->get_output_shape(0);
+    auto axis = concat_node->get_axis();
+
+    const ov::Shape& transposed_shape = graph_utils::transpose_shape(output_shape, order);
+    const size_t transposed_concat_axis = order[axis];
+
+    return graph_utils::get_first_valuable_dim_id(transposed_shape) == static_cast<int64_t>(transposed_concat_axis);
+}
+
+bool Limitations::is_forward_transposed_split_supported(const std::shared_ptr<const ov::Node>& node,
+                                                        const AxisVector& order) {
+    std::shared_ptr<const ov::Node> split_node = nullptr;
+    if (std::dynamic_pointer_cast<const Split>(node)) {
+        split_node = std::dynamic_pointer_cast<const Split>(node);
+    } else if (std::dynamic_pointer_cast<const VariadicSplit>(node)) {
+        split_node = std::dynamic_pointer_cast<const VariadicSplit>(node);
+    } else {
+        log::debug() << "Split node is empty!" << std::endl;
+        return false;
+    }
+
+    const ov::Shape& output_shape = split_node->get_output_shape(0);
+    auto constant_node = as_type_ptr<Constant>(split_node->input_value(1).get_node_shared_ptr());
+    if (!constant_node)
+        return false;
+    auto axis = constant_node->get_axis_vector_val()[0];
+
+    const ov::Shape& transposed_shape =
+        graph_utils::transpose_shape(output_shape, pass::helper::reverse_transpose_order(order));
+    const size_t transposed_concat_axis = order[axis];
+
+    return graph_utils::get_first_valuable_dim_id(transposed_shape) == static_cast<int64_t>(transposed_concat_axis);
+}
+
+bool Limitations::is_backward_transposed_split_supported(const std::shared_ptr<const ov::Node>& node,
+                                                         const AxisVector& order) {
+    std::shared_ptr<const ov::Node> split_node = nullptr;
+    if (std::dynamic_pointer_cast<const Split>(node)) {
+        split_node = std::dynamic_pointer_cast<const Split>(node);
+    } else if (std::dynamic_pointer_cast<const VariadicSplit>(node)) {
+        split_node = std::dynamic_pointer_cast<const VariadicSplit>(node);
+    } else {
+        log::debug() << "Split node is empty!" << std::endl;
+        return false;
+    }
+
+    const ov::Shape& output_shape = split_node->get_output_shape(0);
+    auto constant_node = as_type_ptr<Constant>(split_node->input_value(1).get_node_shared_ptr());
+    if (!constant_node)
+        return false;
+    auto axis = constant_node->get_axis_vector_val()[0];
+
+    const ov::Shape& transposed_shape =
+        graph_utils::transpose_shape(output_shape, pass::helper::reverse_transpose_order(order));
+    const int64_t transposed_concat_axis = order[axis];
+
+    return graph_utils::get_first_valuable_dim_id(transposed_shape) == transposed_concat_axis;
+}
+
 bool Limitations::is_op_supported(const std::shared_ptr<ov::Node>& node,
                                  const InferenceEngine::Precision gna_precision,
                                  bool is_exception_allowed) {
@ -876,12 +965,13 @@ bool Limitations::is_op_supported(const std::shared_ptr<ov::Node>& node,
        return SupportedElementTypes::IsParameterTypeSupported(node->get_element_type(), is_exception_allowed);
    } else if (ov::op::util::is_constant(node)) {
        return SupportedElementTypes::IsConstantTypeSupported(node->get_element_type(), is_exception_allowed);
-    } else if (auto conv_ie = std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node)) {
-        return is_conv_supported(conv_ie, gna_precision, is_exception_allowed);
+    } else if (auto conv = std::dynamic_pointer_cast<ov::intel_gna::op::GNAConvolution>(node)) {
+        return is_conv_supported(conv, gna_precision, is_exception_allowed);
    } else if (auto fully_connected = std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node)) {
        return is_fc_supported(fully_connected, is_exception_allowed);
    } else if (ov::intel_gna::graph_utils::is_pooling(node)) {
-        return is_pooling_supported(std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node), is_exception_allowed);
+        return is_pooling_supported(std::dynamic_pointer_cast<ov::intel_gna::op::GNAMaxPool>(node),
+                                    is_exception_allowed);
    } else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) ||
               ov::intel_gna::graph_utils::is_eltwise_add(node) || ov::intel_gna::graph_utils::is_eltwise_mul(node) ||
               ov::intel_gna::graph_utils::is_crop_affined(node) ||
@ -891,11 +981,11 @@ bool Limitations::is_op_supported(const std::shared_ptr<ov::Node>& node,
               (std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(node) != nullptr) ||
               (std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) != nullptr) ||
               (std::dynamic_pointer_cast<ngraph::op::PowerIE>(node) != nullptr) ||
-               (std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr)) {
+               (std::dynamic_pointer_cast<MatMul>(node) != nullptr)) {
        return true;
    } else if (ov::intel_gna::graph_utils::is_gna_precision_agnostic(node)) {
-        if ((std::dynamic_pointer_cast<ngraph::opset9::Split>(node) != nullptr) ||
-            (std::dynamic_pointer_cast<ngraph::opset9::VariadicSplit>(node) != nullptr)) {
+        if ((std::dynamic_pointer_cast<Split>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<VariadicSplit>(node) != nullptr)) {
            return is_split_supported(node, is_exception_allowed);
        }
        // TODO check concat are aligned when transformation will be moved to ngraph
--- a/src/plugins/intel_gna/src/backend/gna_limitations.hpp
+++ b/src/plugins/intel_gna/src/backend/gna_limitations.hpp
@ -20,6 +20,8 @@
 #include "legacy/ngraph_ops/fully_connected.hpp"
 #include "ngraph/opsets/opset7.hpp"
 #include "ngraph/opsets/opset9.hpp"
+#include "ops/gna_convolution.hpp"
+#include "ops/gna_max_pool.hpp"

 namespace ov {
 namespace intel_gna {
@ -173,8 +175,6 @@ public:
     */
    static inline std::shared_ptr<Limitations> get_instance();

-    static bool is_transpose_2d(const std::vector<size_t>& shape);
-    static bool is_transpose_supported(const std::vector<size_t>& shape);
    static size_t get_min_batch_to_fit_in_buffer(InferenceEngine::DataPtr input);

    /**
@ -202,6 +202,13 @@ public:
     * @return true if supported
     */
    static bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed = false);
+
+    /**
+     * @brief Validates if transpose is supported by GNA
+     * @param shape transpose
+     * @return true if supported
+     */
+    static bool is_transpose_supported(const ov::Shape& shape);
    /**
     * @brief Validates if transpose is supported by GNA
     * @param node transpose
@ -209,13 +216,13 @@ public:
     */
    static bool is_transpose_supported(const std::shared_ptr<const ov::Node>& node);
    /**
-     * @brief Validates if legacy convolution is supported by GNA
-     * @param conv_ie convolution
+     * @brief Validates if convolution is supported by GNA
+     * @param conv_gna GNA convolution
     * @param gna_precision GNA inference precision
     * @param is_exception_allowed flag specifies whether exception is allowed
     * @return true if supported
     */
-    bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
+    bool is_conv_supported(const std::shared_ptr<ov::intel_gna::op::GNAConvolution>& conv_gna,
                           const InferenceEngine::Precision gna_precision,
                           bool is_exception_allowed = false);
    /**
@ -224,9 +231,19 @@ public:
     * @param is_exception_allowed flag specifies whether exception is allowed
     * @return true if precision is found in supported
     */
-    bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
+    bool is_pooling_supported(const std::shared_ptr<ov::intel_gna::op::GNAMaxPool> max_pool,
                              bool is_exception_allowed = false);

+    static bool is_concat_supported(const std::shared_ptr<const ov::Node>& node);
+    static bool is_forward_transposed_concat_supported(const std::shared_ptr<const ov::Node>& node,
+                                                       const AxisVector& order);
+    static bool is_backward_transposed_concat_supported(const std::shared_ptr<const ov::Node>& node,
+                                                        const AxisVector& order);
+    static bool is_forward_transposed_split_supported(const std::shared_ptr<const ov::Node>& node,
+                                                      const AxisVector& order);
+    static bool is_backward_transposed_split_supported(const std::shared_ptr<const ov::Node>& node,
+                                                       const AxisVector& order);
+
    /**
     * @brief Validates if operation is supported by GNA
     * @param node operation
--- a/src/plugins/intel_gna/src/common/graph_utils.hpp
+++ b/src/plugins/intel_gna/src/common/graph_utils.hpp
@ -198,7 +198,8 @@ inline bool is_eltwise_add(const std::shared_ptr<ngraph::Node>& node) {
 }

 inline bool is_pooling(const std::shared_ptr<ngraph::Node>& node) {
-    return (std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node) != nullptr);
+    return ((std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node) != nullptr) ||
+            std::dynamic_pointer_cast<ov::intel_gna::op::GNAMaxPool>(node) != nullptr);
 }

 template <typename T>
@ -268,7 +269,7 @@ inline bool has_32bit_output(const std::shared_ptr<ngraph::Node>& node) {
    return ((std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node) != nullptr) ||
            (std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr) ||
            (std::dynamic_pointer_cast<ngraph::opset9::Convolution>(node) != nullptr) ||
-            (std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node) != nullptr) ||
+            (std::dynamic_pointer_cast<ov::intel_gna::op::GNAConvolution>(node) != nullptr) ||
            (std::dynamic_pointer_cast<ngraph::opset9::Add>(node) != nullptr) ||
            (std::dynamic_pointer_cast<ngraph::opset9::Multiply>(node) != nullptr) ||
            (std::dynamic_pointer_cast<ngraph::op::Eltwise>(node) != nullptr) ||
@ -625,6 +626,20 @@ bool has_child_node(std::shared_ptr<ov::Node> node) {
    return false;
 }

+/**
+ * @brief Checks if shape without dimensions == 1 is 2D
+ */
+inline bool is_shape_2d(const ov::Shape& shape) {
+    return graph_utils::squeeze_shape(shape).size() == 2;
+}
+
+/**
+ * @brief Checks if node has N consumers
+ */
+inline bool has_n_consumers(const std::shared_ptr<ov::Node>& node, size_t n_consumers) {
+    return node->output(0).get_target_inputs().size() == n_consumers;
+}
+
 }  // namespace graph_utils
 }  // namespace intel_gna
 }  // namespace ov
--- a/src/plugins/intel_gna/src/descriptions/gna_desc.hpp
+++ b/src/plugins/intel_gna/src/descriptions/gna_desc.hpp
@ -74,9 +74,11 @@ struct GnaDesc {
    }

    InferenceEngine::DataPtr to_ie_data() {
+        OPENVINO_SUPPRESS_DEPRECATED_START
        return std::make_shared<InferenceEngine::Data>(
            name,
            InferenceEngine::TensorDesc(model_precision, dims, model_layout));
+        OPENVINO_SUPPRESS_DEPRECATED_END
    }
 };

@ -98,9 +100,11 @@ struct InputDesc : GnaDesc {
    }

    InferenceEngine::InputInfo::Ptr ToIEInputInfo() {
+        OPENVINO_SUPPRESS_DEPRECATED_START
        InferenceEngine::InputInfo::Ptr input_info = std::make_shared<InferenceEngine::InputInfo>();
        input_info->setInputData(this->to_ie_data());
        return input_info;
+        OPENVINO_SUPPRESS_DEPRECATED_END
    }
 };

--- a/src/plugins/intel_gna/src/frontend/layer_quantizer.cpp
+++ b/src/plugins/intel_gna/src/frontend/layer_quantizer.cpp
@ -90,7 +90,7 @@ size_t LayerQuantizer::GetBiasSizeForLayer(InferenceEngine::WeightableLayer& wl)
        return wl._biases->size();
    } else if (LayerInfo(wl).isConvolution()) {
        // Calculating biases len using outdata dims: biases number should be equal to output channels number
-        return InferenceEngine::GetDataDimByName(wl.outData.front(), InferenceEngine::DataDimName::C);
+        return InferenceEngine::GetDataDimSizeNHWC(wl.outData.front(), InferenceEngine::DataDimName::C);
    } else {
        // Calculating biases size using outData dimensions
        return wl.outData.front()->getDims().back();
--- a/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp
+++ b/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp
@ -1265,7 +1265,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerWeightable(InferenceEngine::Weigh
        double weights_reducer = 1.0;
        auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer*>(wl);
        if (conv && !LayerInfo(conv).isConvolutionFilter()) {
-            const auto inDepth = GetDataDimByName(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
+            const auto inDepth = GetDataDimSizeNHWC(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
            weights_reducer = gna_convolution_layer::getWeightsReducer(*conv);
            weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
            weights_reducer = std::max(1.0, weights_reducer);
--- a/src/plugins/intel_gna/src/gna_graph_compiler.cpp
+++ b/src/plugins/intel_gna/src/gna_graph_compiler.cpp
@ -307,12 +307,26 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer) {

 void GNAGraphCompiler::assertConvolutionLayoutProper(const InferenceEngine::DataPtr& data) {
    if (data->getLayout() != InferenceEngine::Layout::NHWC && data->getLayout() != InferenceEngine::Layout::NCHW &&
-        data->getLayout() != InferenceEngine::Layout::NC) {
+        data->getLayout() != InferenceEngine::Layout::NC && data->getLayout() != InferenceEngine::Layout::CHW) {
        THROW_GNA_EXCEPTION << "layer: \"Convolution\" with layout " << data->getLayout()
                            << " isn't currently supported on GNA";
    }
 }

+namespace {
+
+template <typename T>
+PropertyVector<T> property_vector_append(PropertyVector<T> properties, T value) {
+    std::vector<T> new_values;
+    for (size_t i = 0; i < properties.size(); ++i)
+        new_values.push_back(properties[i]);
+    new_values.push_back(value);
+
+    return PropertyVector<T>(new_values);
+}
+
+}  // namespace
+
 /**
 * Create AMIntelDNN Convolutional1DComponent from ConvolutionLayer
 *
@ -338,15 +352,24 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
    const auto outputs = layer->outData.front();
    assertConvolutionLayoutProper(inputs);

-    const auto in_batch = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::N);
-    const auto in_channels = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::C);
-    auto in_height = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::H);
-    auto in_width = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::W);
+    const auto in_batch = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::N);
+    const auto in_channels = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::C);
+    auto in_height = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::H);
+    auto in_width = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::W);
+    const auto out_batch = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::N);
+    const auto out_channels = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::C);
+    auto out_height = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::H);
+    auto out_width = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::W);

-    const auto out_batch = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::N);
-    const auto out_channels = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::C);
-    auto out_height = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::H);
-    auto out_width = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::W);
+    if (inputs->getLayout() == InferenceEngine::Layout::CHW) {
+        // convolution is ngraph-3D here. Make some fixes to work with it as it's ngraph-4D
+        convolution._kernel_y = 1;
+        convolution._dilation_y = 1;
+        convolution._stride_y = 1;
+
+        convolution._padding = property_vector_append<unsigned int>(convolution._padding, 0);
+        convolution._pads_end = property_vector_append<unsigned int>(convolution._pads_end, 0);
+    }

    if (in_height > 1 && in_width == 1 && !ShouldUseOnlyConv2DGnaIface()) {
        std::swap(in_height, in_width);
@ -589,42 +612,12 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
        });
    }

-    // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know
-    // how kaldi will handle that
-    if (!dnn->do_rotate_input) {
-        if ((inputs->getLayout() != InferenceEngine::Layout::NHWC || transpose_h_w) &&
-            LayerInfo(connectedInputLayer).isInput()) {
-            //  Kaldi features are opposite orientation
-            dnn->do_rotate_input = true;
-            dnn->num_rotate_rows = effectiveStride;
-            dnn->num_rotate_columns = num_inputs / effectiveStride;
-        } else {
-            dnn->do_rotate_input = false;
-        }
-    }
-
    connectOutput(layer, ptr_outputs, num_data_bytes_out);

-    // Transpose H with W or C with HW
-    auto A = transpose_h_w ? in_kernel_h : in_channels;
-    auto B = transpose_h_w ? in_kernel_w : convolution._kernel[X_AXIS];
-
-    std::vector<uint8_t> transposedWeights;
-    for (uint32_t k = 0; k < num_filters; k++) {
-        uint8_t* ptr_filt_current =
-            convolution._weights->cbuffer().as<uint8_t*>() + k * A * B * convolution.precision.size();
-        auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), A, B);
-        transposedWeights.insert(transposedWeights.end(), transposedPart.begin(), transposedPart.end());
-    }
-    if (transposedWeights.size() != convolution._weights->byteSize()) {
-        THROW_GNA_LAYER_EXCEPTION(&convolution) << "weights was transposed incorrectly. " << transposedWeights.size()
-                                                << ' ' << convolution._weights->byteSize();
-    }
-
    if (num_conv_kernel_padding == 0) {
        gnamem->getQueue(REGION_RO)->push_local_ptr(layer,
                                                    ptr_weights,
-                                                    transposedWeights.data(),
+                                                    convolution._weights->cbuffer(),
                                                    convolution._weights->byteSize());
    } else {
        auto paddedWeights = num_filter_coefficients * num_filters;
@ -636,7 +629,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
                            layerName,
                            num_conv_kernel_padding,
                            cpSize,
-                            transposedWeights,
+                            convolution,
                            num_filters,
                            single_conv_kernel_size](void* data, std::size_t size) {
            if (paddedWeightsSize > size) {
@ -648,7 +641,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
            for (uint32_t i = 0; i < num_filters; i++) {
                ie_memcpy(dstPtr + offset,
                          size - offset,
-                          transposedWeights.data() + single_conv_kernel_size * i * cpSize,
+                          convolution._weights->cbuffer().as<uint8_t*>() + single_conv_kernel_size * i * cpSize,
                          single_conv_kernel_size * cpSize);
                offset += single_conv_kernel_size * cpSize;
                ie_memcpy(dstPtr + offset, size - offset, &padding_zeros[0], padding_zeros.size());
@ -783,22 +776,6 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP

    auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;

-    // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know
-    // how kaldi will handle that
-    if (!dnn->do_rotate_input && inputs->getLayout() != InferenceEngine::Layout::NHWC &&
-        LayerInfo(connectedInputLayer).isInput()) {
-        //  Kaldi features are opposite orientation
-        dnn->do_rotate_input = true;
-        dnn->num_rotate_rows = in_channels;
-        if (in_height != 1) {
-            dnn->num_rotate_rows *= convolution._stride_y;
-        }
-        if (in_width != 1) {
-            dnn->num_rotate_rows *= convolution._stride_x;
-        }
-        dnn->num_rotate_columns = num_inputs / dnn->num_rotate_rows;
-    }
-
    connectOutput(layer, ptr_outputs, num_data_bytes_out);

    const auto convolution_precision = convolution.precision.size();
@ -815,7 +792,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
        ALIGN(effective_single_kernel_size, Limitations::kConvEachKernelByteAlignment) - effective_single_kernel_size;
    for (uint32_t k = 0; k < convolution._out_depth; k++) {
        uint8_t* ptr_filt_current = convolution._weights->cbuffer().as<uint8_t*>() + k * single_kernel_size;
-        auto transposed_part = transposeMatrix(ptr_filt_current, convolution_precision, in_channels, kernelHW);
+        auto transposed_part = copy_matrix(ptr_filt_current, convolution.precision.size(), in_channels, kernelHW);
        transposed_weights.insert(transposed_weights.end(), transposed_part.begin(), transposed_part.end());
        transposed_weights.resize(transposed_weights.size() + effective_single_kernel_size - single_kernel_size +
                                  kernel_pad);
@ -997,13 +974,19 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
    auto inputs = layer->insData.begin()->lock();
    auto outputs = *layer->outData.begin();

-    uint32_t w_dim_in = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::W);
-    uint32_t h_dim_in = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::H);
-    const uint32_t c_dim_in = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::C);
+    uint32_t w_dim_in = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::W);
+    uint32_t h_dim_in = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::H);
+    const uint32_t c_dim_in = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::C);

-    uint32_t w_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::W);
-    uint32_t h_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::H);
-    const uint32_t c_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::C);
+    uint32_t w_dim_out = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::W);
+    uint32_t h_dim_out = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::H);
+    const uint32_t c_dim_out = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::C);
+
+    if (inputs->getLayout() == InferenceEngine::Layout::CHW) {
+        // Pooling is ngraph-3D here. Make some fixes to work with it as it's ngraph-4D
+        pooling._kernel = property_vector_append<unsigned int>(pooling._kernel, 1);
+        pooling._stride = property_vector_append<unsigned int>(pooling._stride, 1);
+    }

    void* ptr_inputs = nullptr;
    void* ptr_outputs = nullptr;
@ -2590,7 +2573,8 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void* p
                if (layer->params.find("output_offset") != layer->params.end()) {
                    output_offset = layer->GetParamAsInt("output_offset");
                }
-                gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, output_offset);
+                gnamem->getQueue(REGION_AUTO)
+                    ->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, output_offset, num_data_bytes_out);
            }
            return;
        }
@ -2859,5 +2843,15 @@ std::vector<uint8_t> GNAGraphCompiler::transposeMatrix(uint8_t* ptr_matrix,
    return temp_buffer;
 }

+std::vector<uint8_t> GNAGraphCompiler::copy_matrix(uint8_t* ptr_matrix,
+                                                   size_t element_size,
+                                                   uint32_t num_rows,
+                                                   uint32_t num_cols) {
+    const size_t dest_size = num_rows * num_cols * element_size;
+    std::vector<uint8_t> temp_buffer(dest_size);
+    ::memcpy(temp_buffer.data(), ptr_matrix, dest_size);
+    return temp_buffer;
+}
+
 }  // namespace intel_gna
 }  // namespace ov
--- a/src/plugins/intel_gna/src/gna_graph_compiler.hpp
+++ b/src/plugins/intel_gna/src/gna_graph_compiler.hpp
@ -53,6 +53,10 @@ private:
                                                size_t element_size,
                                                uint32_t num_rows,
                                                uint32_t num_cols);
+    std::vector<uint8_t> static copy_matrix(uint8_t* ptr_matrix,
+                                            size_t element_size,
+                                            uint32_t num_rows,
+                                            uint32_t num_cols);

    bool ShouldUseOnlyConv2DGnaIface() const;

--- a/src/plugins/intel_gna/src/gna_graph_patterns.hpp
+++ b/src/plugins/intel_gna/src/gna_graph_patterns.hpp
@ -237,32 +237,6 @@ inline InferenceEngine::CNNLayerPtr FindPermutationAfterConvolutionInKaldiModel(
    return next;
 }

-/**
- * @brief identifies if a model must be converted to NHWC, it must not be neither NHWC, nor Kaldi
- * @param layers model sorted layers
- */
-inline bool MustBeConvertedFromNCHWToNHWC(const std::vector<InferenceEngine::CNNLayerPtr>& layers) {
-    for (auto& l : layers) {
-        if (!LayerInfo(l).isConvolution())
-            continue;
-
-        InferenceEngine::CNNLayerPtr next;
-        std::tie(std::ignore, next) = FindPermutationsAroundConvolutionInNHWCModel(l);
-        if (next != nullptr)
-            return false;
-        // If a convolution has only 1-dimension input and output we should skip it
-        auto in_dims = l->insData.begin()->lock()->getDims();
-        auto out_dims = l->outData.front()->getDims();
-
-        if (ov::intel_gna::graph_utils::is_one_dim_shapes(in_dims, out_dims)) {
-            continue;
-        }
-
-        return FindPermutationAfterConvolutionInKaldiModel(l) == nullptr;
-    }
-    return false;
-}
-
 /**
 * @brief returns transposition information for a layer based on the previous convolution or pooling dimensions order
 * @param layer layer from which transposition info search must be started
--- a/src/plugins/intel_gna/src/gna_graph_tools.hpp
+++ b/src/plugins/intel_gna/src/gna_graph_tools.hpp
@ -924,4 +924,38 @@ inline uint32_t GetDataDimByName(InferenceEngine::DataPtr data, DataDimName dimN
    return GetDimFromBack(dims, backOffsets[dimIxInNCHW]);
 }

+/**
+ * @brief returns a size of a specified data dimension depending on the layout
+ *        NHWC specialization
+ * @param data a pointer to the data
+ * @param dimName dimension name
+ */
+inline uint32_t GetDataDimSizeNHWC(InferenceEngine::DataPtr data, DataDimName dimName) {
+    uint32_t dimIxInNCHW = static_cast<uint32_t>(dimName);
+    IE_ASSERT(dimIxInNCHW <= 3);
+
+    std::vector<uint32_t> backOffsets;
+    switch (data->getLayout()) {
+    case Layout::C:
+    case Layout::NC:
+        // 1 will be returned for offsets > 2
+        backOffsets = std::vector<uint32_t>{2, 1, 3, 4};
+        break;
+    case Layout::HWC:
+        // 1 will be returned for offset 4
+    case Layout::NHWC:
+        backOffsets = std::vector<uint32_t>{4, 3, 2, 1};
+        break;
+    case Layout::CHW:
+        // 1 will be returned for offset 4
+    case Layout::NCHW:
+        backOffsets = std::vector<uint32_t>{4, 1, 3, 2};
+        break;
+    default:
+        THROW_GNA_EXCEPTION << data->getName() << " Unexpected layout " << data->getLayout();
+    }
+    auto dims = data->getDims();
+    return GetDimFromBack(dims, backOffsets[dimIxInNCHW]);
+}
+
 }  // namespace InferenceEngine
--- a/src/plugins/intel_gna/src/gna_infer_request.cpp
+++ b/src/plugins/intel_gna/src/gna_infer_request.cpp
@ -62,12 +62,10 @@ void GNAInferRequest::StartAsyncImpl() {
        std::exception_ptr exceptionPtr;
        if (res != InferenceEngine::StatusCode::OK) {
            try {
-                IE_EXCEPTION_SWITCH(res,
-                                    ExceptionType,
-                                    InferenceEngine::details::ThrowNow<ExceptionType>{} <<=
-                                    std::stringstream{}
-                                    << IE_LOCATION
-                                    << InferenceEngine::details::ExceptionTraits<ExceptionType>::string());
+                IE_EXCEPTION_SWITCH(
+                    res,
+                    ExceptionType,
+                    InferenceEngine::details::ThrowNow<ExceptionType>{IE_LOCATION_PARAM} <<= std::stringstream{});
            } catch (...) {
                exceptionPtr = std::current_exception();
            }
--- a/src/plugins/intel_gna/src/gna_plugin.cpp
+++ b/src/plugins/intel_gna/src/gna_plugin.cpp
@ -344,13 +344,13 @@ void GNAPlugin::PrePostProcess(InferenceEngine::Blob::Ptr input_blob,
                               std::shared_ptr<ov::Model> model) {
    const ov::element::Type input_type = details::convertPrecision(input_blob->getTensorDesc().getPrecision());
    const ov::element::Type output_type = details::convertPrecision(output_blob->getTensorDesc().getPrecision());
-    const ov::Shape& input_shape = model->get_parameters().front()->get_shape();
-    const ov::Shape& output_shape = model->get_results().front()->get_shape();
+    const ov::Shape& output_shape = output_blob->getTensorDesc().getDims();

    for (const auto& param : model->get_parameters()) {
        param->set_element_type(input_type);
    }
    model->validate_nodes_and_infer_types();
+    const ov::Shape& input_shape = model->get_parameters()[0]->get_output_shape(0);

    ov::TensorVector inputs = {ov::Tensor(input_type, input_shape, input_blob->cbuffer().as<void*>())};
    ov::TensorVector results = {ov::Tensor(output_type, output_shape, output_blob->buffer().as<void*>())};
@ -611,52 +611,6 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN
    return false;
 }

-void GNAPlugin::FillInputsAndOutputsTranspositionInfo(const InferenceEngine::CNNNetwork& net) {
-    OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FillInputsAndOutputsTranspositionInfo");
-    auto printTranspositionInfo = [](const std::vector<TranspositionInfo>& transpositionInfo) {
-        for (const auto& transpositionInfoPart : transpositionInfo) {
-            log::debug() << "transpose=" << transpositionInfoPart.transpose
-                         << " rows_num=" << transpositionInfoPart.num_transpose_rows
-                         << " columns_num=" << transpositionInfoPart.num_transpose_columns << "\n";
-        }
-    };
-
-    auto inputLayers = CNNNetGetAllInputLayers(net);
-    for (const auto& inputLayer : inputLayers) {
-        // Collect information for inputs transposition
-        if (!LayerInfo(inputLayer).isInput())
-            continue;
-        auto transpositionInfo = FindTranspositionInfoFromNextLayers(inputLayer);
-        if (transpositionInfo.empty())
-            continue;
-
-        transpose_inputs_info.insert({inputLayer->name, transpositionInfo});
-        log::debug() << "Input " << inputLayer->name << " transposition info: \n";
-        printTranspositionInfo(transpositionInfo);
-    }
-
-    auto outputsMap = net.getOutputsInfo();
-    for (const auto& outPort : outputsMap) {
-        auto outLayer = getCreatorLayer(outPort.second).lock();
-        // Collect information for outputs transposition
-        if (!LayerInfo(outLayer).isOutput())
-            continue;
-        auto transpositionInfo = FindTranspositionInfoFromPrevLayers(outLayer);
-        if (transpositionInfo.empty())
-            continue;
-
-        // Swap transposition info rows and columns since we need to transpose output back from NHWC to NCHW
-        for (auto&& transpositionInfoPart : transpositionInfo) {
-            if (transpositionInfoPart.transpose) {
-                std::swap(transpositionInfoPart.num_transpose_rows, transpositionInfoPart.num_transpose_columns);
-            }
-        }
-        transpose_outputs_info.insert({outLayer->name, transpositionInfo});
-        log::debug() << "Output " << outLayer->name << " transposition info: \n";
-        printTranspositionInfo(transpositionInfo);
-    }
-}
-
 #ifdef PLOT
 void GNAPlugin::AddDebugProperties(const InferenceEngine::CNNLayerPtr layer,
                                   InferenceEngine::ordered_properties& printed_properties,
@ -751,10 +705,6 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
        UpdateInputScaleFromNetwork(network);
    }

-    if (MustBeConvertedFromNCHWToNHWC(CNNNetSortTopologically(network))) {
-        FillInputsAndOutputsTranspositionInfo(network);
-    }
-
    InferenceEngine::CNNNetwork newNet;

    if (gnaFlags->sw_fp32) {
@ -995,22 +945,6 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
        }
    }

-    if (dnn->do_rotate_input && transpose_inputs_info.empty()) {
-        for (auto& inputLayer : inputLayers) {
-            transpose_inputs_info.insert(
-                {inputLayer->name,
-                 {TranspositionInfo{dnn->do_rotate_input, dnn->num_rotate_rows, dnn->num_rotate_columns}}});
-        }
-    }
-
-    // TODO: Need to remove this conversion when ngraph NCHW->NHWC transformation is enabled
-    if (!transpose_inputs_info.empty()) {
-        ConvertTransposeMapToModel(transpose_inputs_info, inputs_ptr_->Get());
-    }
-    if (!transpose_outputs_info.empty()) {
-        ConvertTransposeMapToModel(transpose_outputs_info, outputs_.Get());
-    }
-
    DumpXNNToFile();

 #ifdef PLOT
--- a/src/plugins/intel_gna/src/gna_plugin.hpp
+++ b/src/plugins/intel_gna/src/gna_plugin.hpp
@ -37,6 +37,8 @@ class WorkerPool;
 class Worker;
 }  // namespace request

+using namespace ov::intel_gna::pre_post_processing;
+
 class GNAPlugin : public InferenceEngine::IInferencePlugin {
 protected:
    std::string _pluginName = "GNA";
@ -204,6 +206,13 @@ protected:
                        InferenceEngine::Blob::Ptr output_blob,
                        std::shared_ptr<ov::Model> model);

+    /**
+     * Run ngraph model on CPU to modify inputs/outputs
+     */
+    void pre_post_process(InferenceEngine::Blob::Ptr input_blob,
+                          InferenceEngine::Blob::Ptr output_blob,
+                          std::shared_ptr<ov::Model> model);
+
    void ImportFrames(void* ptr_dst,
                      const void* ptr_src,
                      InferenceEngine::Precision input_precision,
@ -246,14 +255,6 @@ protected:
     * @return true if the output is initiated, false otherwise
     */
    bool TryToInitOutput(const std::string& portName, InferenceEngine::CNNLayerPtr layer);
-
-    /**
-     * @brief Fills inputs and outputs transposition info for model convertion from NCHW to NHWC.
-     *        Information for transposition is found from convolution/pooling input or output dimensions.
-     * @param layers model sorted layers
-     */
-    void FillInputsAndOutputsTranspositionInfo(const InferenceEngine::CNNNetwork& net);
-
    bool isFP32ModeActive() const;
    std::shared_ptr<request::ModelWrapper> createModelWrapperForLoadNetwork(bool trivial);
    std::shared_ptr<request::ModelWrapper> createModelWrapperForImportNetwork(uint32_t numberOfOperations);
--- a/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp
+++ b/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp
@ -7,6 +7,8 @@
 #include "gna_itt.hpp"
 #include "legacy/net_pass.h"
 #include "legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp"
+#include "legacy/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.hpp"
+#include "ngraph/opsets/opset2.hpp"
 #include "ngraph/opsets/opset7.hpp"
 #include "openvino/pass/manager.hpp"
 #include "optimizer/gna_pass_manager.hpp"
@ -18,7 +20,9 @@
 #include "transformations/common_optimizations/fq_reshape_fusion.hpp"
 #include "transformations/common_optimizations/pull_transpose_through_fq.hpp"
 #include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp"
+#include "transformations/common_optimizations/reshape_sequence_fusion.hpp"
 #include "transformations/common_optimizations/transpose_sinking.hpp"
+#include "transformations/common_optimizations/transpose_to_reshape.hpp"
 #include "transformations/control_flow/unroll_tensor_iterator.hpp"
 #include "transformations/convert_dwsc_to_scaleshifts.hpp"
 #include "transformations/convert_matmul_to_pointwise_convolution.hpp"
@ -28,6 +32,8 @@
 #include "transformations/decompose_mvn.hpp"
 #include "transformations/fp16_compression/convert_compression_only_to_legacy.hpp"
 #include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp"
+#include "transformations/fuse_conv_bias_activation.hpp"
+#include "transformations/gather_sinking.hpp"
 #include "transformations/handle_transposes_around_matmul.hpp"
 #include "transformations/init_node_info.hpp"
 #include "transformations/insert_copy_layer.hpp"
@ -37,6 +43,7 @@
 #include "transformations/markup_fusable_transpose.hpp"
 #include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp"
 #include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
+#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp"
 #include "transformations/op_conversions/gru_cell_decomposition.hpp"
 #include "transformations/op_conversions/lstm_cell_decomposition.hpp"
 #include "transformations/op_conversions/softsign_decomposition.hpp"
@ -48,13 +55,28 @@
 #include "transformations/remove_in_out_processing.hpp"
 #include "transformations/remove_single_input_concat.hpp"
 #include "transformations/reorder_activation_and_pooling.hpp"
+#include "transformations/replace_gna_nhwc_layers.hpp"
+#include "transformations/reshape_transpose_substitute.hpp"
+#include "transformations/rotate_inputs.hpp"
 #include "transformations/split_convolution_with_large_buffer_size.hpp"
 #include "transformations/split_eltwise.hpp"
 #include "transformations/substitute_softsign.hpp"
 #include "transformations/swap_input_matmul_gna.hpp"
+#include "transformations/transpose_sinking/ts_concat.hpp"
+#include "transformations/transpose_sinking/ts_fuse.hpp"
+#include "transformations/transpose_sinking/ts_general.hpp"
+#include "transformations/transpose_sinking/ts_split.hpp"
+#include "transformations/ts_concat_forward.hpp"
+#include "transformations/ts_split_backward.hpp"
 #include "transformations/unfuse_reshape_and_transpose.hpp"
+#include "transformations/utils/transformation_helper.hpp"
 #include "transformations/utils/utils.hpp"

+using namespace ov;
+using namespace ov::opset8;
+using namespace ov::intel_gna::limitations;
+using namespace ov::intel_gna::pass::helper;
+
 namespace ov {
 namespace intel_gna {

@ -64,12 +86,13 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,

    fake_quantized = ov::op::util::has_op_with_type<ngraph::op::FakeQuantize>(model);
    const bool has_convolution = ov::op::util::has_op_with_type<ngraph::opset7::Convolution>(model);
+    const bool has_maxpool = ov::op::util::has_op_with_type<ov::opset8::MaxPool>(model);
+    const bool has_slice = ov::op::util::has_op_with_type<ov::opset8::Slice>(model);
    const bool has_matmul = ov::op::util::has_op_with_type<ngraph::opset7::MatMul>(model);
-    const bool has_mvn = ov::op::util::has_op_with_type<ngraph::opset7::MVN>(model) ||
+    const bool has_mvn = ov::op::util::has_op_with_type<ov::opset8::MVN>(model) ||
                         ov::op::util::has_op_with_type<ov::op::v0::MVN>(model);
    ov::pass::Manager manager;
    manager.register_pass<ov::pass::InitNodeInfo>();
-
    // In OV API 2.0(IRv10) default convertion to fp32 (inputs, outputs and weights) is disabled
    // and we need to run the ConvertPrecision transformation to support old networks.
    manager.register_pass<ov::pass::ConvertPrecision>(precisions_map{{ngraph::element::f16, ngraph::element::f32}});
@ -104,7 +127,6 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
    manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithBias>();
    manager.register_pass<ov::intel_gna::pass::SwapInputMatMul>();
    manager.register_pass<ov::intel_gna::pass::HandleTransposesAroundMatMul>();
-    manager.register_pass<ov::intel_gna::pass::InsertTransposeAfterConvOrPool>();
    manager.register_pass<ov::intel_gna::pass::Unfuse2dto4dReshapeAndTranspose>();
    manager.register_pass<ov::intel_gna::pass::Unfuse4dto2dReshapeAndTranspose>();
    manager.register_pass<ov::intel_gna::pass::RemoveExtraReshapes>();
@ -112,11 +134,21 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
    manager.register_pass<ov::intel_gna::pass::RemoveSingleInputConcat>();
    manager.register_pass<ov::intel_gna::pass::SubstituteSoftsign>();
    manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeLayerToBeEliminated>();
-    if (!has_convolution && !has_matmul && !has_mvn) {
-        // TODO: Remove this condition when the legacy layout transformation (NCHW->NHWC) is disabled
-        manager.register_pass<ov::intel_gna::pass::RemoveInputsProcessing>(input_output_subgraphs);
-        manager.register_pass<ov::intel_gna::pass::RemoveOutputsProcessing>(input_output_subgraphs);
+    // TODO enable this transformation for networks without convolutions
+    if (has_convolution || has_maxpool || has_mvn || has_matmul) {
+        manager.register_pass<ov::intel_gna::pass::ReplaceGnaNHWCLayers>();
+        manager.register_pass<ov::intel_gna::pass::InsertConvolutionTransposeHW>();
+        manager.register_pass<ov::pass::TransposeSinkingGeneral>();
+        manager.register_pass<ov::intel_gna::pass::GatherSinkingGeneral>();
+        manager.register_pass<ov::pass::ReshapeSequenceFusion>();
+        manager.register_pass<ov::pass::TransposeToReshape>();
+        manager.register_pass<ov::intel_gna::pass::GnaConvolutionFusion>();
+        manager.register_pass<ov::intel_gna::pass::TSConcatForward>();
+        manager.register_pass<ov::intel_gna::pass::TSSplitBackward>();
+        manager.register_pass<ov::pass::transpose_sinking::TSFuse>();
    }
+    manager.register_pass<ov::intel_gna::pass::RemoveInputsProcessing>(input_output_subgraphs);
+    manager.register_pass<ov::intel_gna::pass::RemoveOutputsProcessing>(input_output_subgraphs);
    manager.register_pass<ov::pass::ConvertOpSet3ToOpSet2>();
    manager.register_pass<ov::pass::ConvertOpSet2ToOpSet1>();
    manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
@ -160,6 +192,62 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
                                                                     {ov::element::u32, ov::element::i32}});
    const auto& pass_config = manager.get_pass_config();

+    pass_config->set_callback<ov::pass::transpose_sinking::TSConcatForward>(
+        [](const std::shared_ptr<const ov::Node>& node) -> bool {
+            const TransposeInfo transpose_info = get_first_input_transpose(node);
+            if (transpose_info.isEmpty())
+                return false;
+            const bool is_supported = Limitations::is_forward_transposed_concat_supported(
+                node,
+                transpose_info.transpose_const->get_axis_vector_val());
+            if (!is_supported)
+                mark_input_transposes_as_nosinking(node);
+            return !is_supported;
+        });
+
+    pass_config->set_callback<ov::pass::transpose_sinking::TSConcatBackward>(
+        [](const std::shared_ptr<const ov::Node>& node) -> bool {
+            const TransposeInfo transpose_info = get_first_output_transpose(node);
+            if (transpose_info.isEmpty())
+                return false;
+            return !Limitations::is_backward_transposed_concat_supported(
+                node,
+                transpose_info.transpose_const->get_axis_vector_val());
+        });
+
+    pass_config->set_callback<ov::pass::transpose_sinking::TSSplitForward>(
+        [](const std::shared_ptr<const ov::Node>& node) -> bool {
+            const TransposeInfo transpose_info = get_first_input_transpose(node);
+            if (transpose_info.isEmpty())
+                return false;
+            const bool is_supported = Limitations::is_forward_transposed_split_supported(
+                node,
+                transpose_info.transpose_const->get_axis_vector_val());
+            if (!is_supported)
+                mark_input_transposes_as_nosinking(node);
+            return !is_supported;
+        });
+
+    pass_config->set_callback<ov::pass::transpose_sinking::TSSplitBackward>(
+        [](const std::shared_ptr<const ov::Node>& node) -> bool {
+            const TransposeInfo transpose_info = get_first_output_transpose(node);
+            if (transpose_info.isEmpty())
+                return false;
+            return !Limitations::is_backward_transposed_split_supported(
+                node,
+                transpose_info.transpose_const->get_axis_vector_val());
+        });
+
+    /**
+     * TransposeSinking doesn't currently support StridedSlice. We disable SliceToStridedSlice
+     * transformation to prevent convert Slice to StridedSlice. This allows us to work with
+     * networks, that initialy have Slice.
+     * That could be removed after StridedSlice implementation in TransposeSinking
+     */
+    if (has_slice && (has_convolution || has_maxpool || has_mvn)) {
+        pass_config->disable<ov::pass::SliceToStridedSlice>();
+    }
+
    // Allowing FP16 Converts to be folded and FP16 constants to upgrade to FP32 data type
    pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
    pass_config->disable<ov::pass::DisableDecompressionConvertConstantFolding>();
@ -177,8 +265,23 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
    // Operations Max and Min aren't supported
    pass_config->disable<ov::pass::ConcatReduceFusion>();

+    pass_config->disable<ov::pass::ConcatReduceFusion>();
    manager.run_passes(model);

+    /**
+     * As we disabled SliceToStridedSlice, we have after all transformations
+     * Slice, that is not supported natively in our plugin. Here we convert
+     * Slice -> StridedSlice -> CropIE
+     * That could be removed after StridedSlice implementation in TransposeSinking
+     */
+    if (has_slice && (has_convolution || has_maxpool || has_mvn)) {
+        ov::pass::Manager manager;
+        manager.register_pass<ov::pass::InitNodeInfo>();
+        manager.register_pass<ov::pass::SliceToStridedSlice>(true);
+        manager.register_pass<ngraph::pass::ConvertStridedSliceToCropMatcher>();
+        manager.run_passes(model);
+    }
+
    is_ngraph_passes_used = true;
 }

@ -204,8 +307,6 @@ void TransformationsPipeline::apply_legacy(const InferenceEngine::CNNNetwork& ne
    passes->registerPass<FuseFQIntoWeightsPass>();
    passes->registerPass<MoveFakeQuantizeLayerIntoQuantParamsPass>();

-    passes->registerPass<TransposeWeightsFromNCHWToNHWCPass>();
-
    passes->registerPass<SubstitutePReluPass>();

    if (!is_ngraph_passes_used) {
@ -221,7 +322,7 @@ void TransformationsPipeline::apply_legacy(const InferenceEngine::CNNNetwork& ne
    passes->registerPass<FlattenTrivialConcatPass>();
    passes->registerPass<InsertConcatAligningFilterPass>();
    passes->registerPass<ReorderConcatInputsPass>();
-    passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
+
    // Keep legacy inserting of Identity layer here
    // because concat and split aliging passes are not moved to ngraph yet
    passes->registerPass<InsertIdentityLayerPass>();
--- a/src/plugins/intel_gna/src/layers/gna_convolution_layer.cpp
+++ b/src/plugins/intel_gna/src/layers/gna_convolution_layer.cpp
@ -59,11 +59,11 @@ double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
    const std::vector<KRT> reducers{{49, 3.0}, {36, 2.6}, {21, 2.3}, {14, 1.7}, {9, 1.3}, {7, 1.2}};
    auto reducer = 1.0;
    const auto inDepth =
-        InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
+        InferenceEngine::GetDataDimSizeNHWC(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
    const auto inHeight =
-        InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
+        InferenceEngine::GetDataDimSizeNHWC(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
    const auto inWidth =
-        InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
+        InferenceEngine::GetDataDimSizeNHWC(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
    if (is3DInputOr2DKernel(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
        !isMappableFrom2DTo1D(inHeight,
                              inWidth,
--- a/src/plugins/intel_gna/src/layers/gna_layer_info.hpp
+++ b/src/plugins/intel_gna/src/layers/gna_layer_info.hpp
@ -297,7 +297,7 @@ public:
        return isOfType("FakeQuantize");
    }
    bool isNonFunctional() const {
-        return isOfType("reshape") || isOfType("squeeze") || isOfType("unsqueeze") || isTrivialPermute();
+        return isOfType("reshape") || isOfType("squeeze") || isOfType("unsqueeze") || isTrivialPermute() || is_gather();
    }
    bool isReshape() const noexcept {
        return isOfType("reshape");
@ -305,6 +305,9 @@ public:
    bool isPermute() const noexcept {
        return isOfType("permute");
    }
+    bool is_gather() const noexcept {
+        return isOfType("gather");
+    }
    bool isPermuteFusable() const noexcept {
        return isPermute() &&
               (layer->params.count(ov::intel_gna::rt_info::GNATransposeFusable::get_type_info_static()) > 0);
@ -349,11 +352,8 @@ public:
    bool isNonValuesChangable() const {
        return isNonFunctional() || isSplit() || isSlice() || isConcat();
    }
-    bool is_gather() const noexcept {
-        return isOfType("gather");
-    }
    bool is_fq_non_sensitive() const {
-        return isPermute() || is_gather() || isNonFunctional();
+        return isPermute() || isNonFunctional();
    }
    bool isPooling() const noexcept {
        return isOfType("pooling");
--- a/src/plugins/intel_gna/src/ops/gna_convolution.hpp
+++ b/src/plugins/intel_gna/src/ops/gna_convolution.hpp
@ -133,6 +133,9 @@ public:
    void set_auto_pad(const ov::op::PadType& auto_pad) {
        m_auto_pad = auto_pad;
    }
+    bool has_add_node() const {
+        return m_has_add_node;
+    }
    bool has_bias() const {
        return m_has_add_node;
    }
--- a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp
+++ b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp
@ -110,17 +110,6 @@ static void SumBlobs(Blob::Ptr& src_blob, Blob::Ptr& dst_blob) {
    }
 }

-static Blob::Ptr convertToRWBlob(const Blob::Ptr& readOnlyBlob, const std::string& name = {}) {
-    auto blob = Blob::CreateFromData(std::make_shared<Data>(name, readOnlyBlob->getTensorDesc()));
-    blob->allocate();
-    const auto ret = ie_memcpy(blob->buffer().as<uint8_t*>(),
-                               blob->size() * blob->getTensorDesc().getPrecision().size(),
-                               readOnlyBlob->buffer().as<uint8_t*>(),
-                               readOnlyBlob->size() * readOnlyBlob->getTensorDesc().getPrecision().size());
-    IE_ASSERT(ret == 0);
-    return blob;
-}
-
 // indexes stored in pass manager
 static const char identityLayersCounterName[] = "identityLayerCounter";
 static const char diagonalLayersCounterName[] = "diagonalLayerCounter";
@ -2419,225 +2408,6 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass ::run() {
    }
 }

-void TransposeWeightsFromNCHWToNHWCPass::run() {
-    OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "TransposeWeightsFromNCHWToNHWCPass");
-    if (!MustBeConvertedFromNCHWToNHWC(*pLayers))
-        return;
-
-    auto printTranspositionInfo = [](const std::vector<TranspositionInfo>& transpositionInfo) {
-        for (const auto& transpositionInfoPart : transpositionInfo) {
-            log::debug() << "transpose=" << transpositionInfoPart.transpose
-                         << " rows_num=" << transpositionInfoPart.num_transpose_rows
-                         << " columns_num=" << transpositionInfoPart.num_transpose_columns << "\n";
-        }
-    };
-
-    auto transpInfoMatchWeightsSize =
-        [](const std::vector<TranspositionInfo>& transpositionInfo, size_t weightsSize, const std::string& layerName) {
-            size_t totalElements = 0;
-            for (auto&& transpositionInfoPart : transpositionInfo) {
-                totalElements += transpositionInfoPart.num_transpose_rows * transpositionInfoPart.num_transpose_columns;
-            }
-            if (totalElements != weightsSize) {
-                THROW_GNA_EXCEPTION << layerName << " weights elements from transposition info (" << totalElements
-                                    << ") don't match input dimensions (" << weightsSize << ")";
-            }
-        };
-
-    for (auto&& l : *pLayers) {
-        if (LayerInfo(l).isScaleShift()) {
-            std::vector<TranspositionInfo> transpositionInfo;
-            // Try to find a convolution in previous layers
-            if (InferenceEngine::CNNNetHasPrevLayer(l.get())) {
-                transpositionInfo = FindTranspositionInfoFromPrevLayers(InferenceEngine::CNNNetPrevLayer(l));
-                // If no convolutions are found try to find them in next layers
-                if (!FoundPartToTranspose(transpositionInfo) && !l->outData.empty() &&
-                    !getInputTo(l->outData[0]).empty()) {
-                    transpositionInfo = FindTranspositionInfoFromNextLayers(getInputTo(l->outData[0]).begin()->second);
-                }
-            }
-            if (FoundPartToTranspose(transpositionInfo)) {
-                if (l->input()->getDims().front() > 1) {
-                    THROW_GNA_EXCEPTION << l->name
-                                        << " Weights transposition is not supported for a layer with batch size > 1";
-                }
-                auto weightable = dynamic_cast<WeightableLayer*>(l.get());
-                IE_ASSERT(weightable != nullptr);
-
-                size_t totalWeights = weightable->_weights->size();
-                transpInfoMatchWeightsSize(transpositionInfo, totalWeights, l->name);
-
-                ConvertTensorFromNCHWToNHWC(weightable->precision.size(),
-                                            1,
-                                            weightable->_weights->size(),
-                                            weightable->_weights->cbuffer().as<uint8_t*>(),
-                                            true,
-                                            transpositionInfo);
-                if (weightable->_biases) {
-                    ConvertTensorFromNCHWToNHWC(weightable->precision.size(),
-                                                1,
-                                                weightable->_biases->size(),
-                                                weightable->_biases->cbuffer().as<uint8_t*>(),
-                                                true,
-                                                transpositionInfo);
-                }
-                log::debug() << l->name << " weights and biases rows transposition info:\n";
-                printTranspositionInfo(transpositionInfo);
-            }
-        }
-
-        if (LayerInfo(l).isFullyConnected()) {
-            auto weightable = dynamic_cast<WeightableLayer*>(l.get());
-            IE_ASSERT(weightable != nullptr);
-            IE_ASSERT(weightable->_weights != nullptr);
-            auto precision = weightable->precision.size();
-            auto out_dims = l->outData[0]->getDims();
-            auto in_dims = l->input()->getDims();
-            auto weightsRows = InferenceEngine::details::product(std::begin(out_dims) + 1, std::end(out_dims));
-            auto weightsColumns = InferenceEngine::details::product(std::begin(in_dims) + 1, std::end(in_dims));
-            // Find a convolution in previous layers to rotate weights rows
-            if (InferenceEngine::CNNNetHasPrevLayer(l.get())) {
-                std::vector<TranspositionInfo> transpositionInfo;
-                auto prevLayer = InferenceEngine::CNNNetPrevLayer(l);
-                transpositionInfo = FindTranspositionInfoFromPrevLayers(prevLayer);
-                if (FoundPartToTranspose(transpositionInfo)) {
-                    if (l->input()->getDims().front() > 1) {
-                        THROW_GNA_EXCEPTION
-                            << l->name << " Weights transposition is not supported for a layer with batch size > 1";
-                    }
-                    if (LayerInfo(prevLayer).isSplit()) {
-                        // If we found a split it's not possible to rotate data
-                        THROW_GNA_EXCEPTION << l->name << " won't be transposed due to a split before it";
-                    }
-
-                    transpInfoMatchWeightsSize(transpositionInfo, weightsColumns, l->name);
-
-                    weightable->_weights = convertToRWBlob(weightable->_weights);
-
-                    ConvertTensorFromNCHWToNHWC(precision,
-                                                weightsRows,
-                                                weightsColumns,
-                                                weightable->_weights->buffer().as<uint8_t*>(),
-                                                true,
-                                                transpositionInfo);
-                    log::debug() << l->name << " weights rows transposition info:\n";
-                    printTranspositionInfo(transpositionInfo);
-                }
-            }
-            // Find a convolution in next layers to rotate weights columns
-            if (!l->outData.empty() && !getInputTo(l->outData[0]).empty()) {
-                std::vector<TranspositionInfo> transpositionInfo;
-                auto nextLayer = getInputTo(l->outData[0]).begin()->second;
-                transpositionInfo = FindTranspositionInfoFromNextLayers(nextLayer);
-                if (FoundPartToTranspose(transpositionInfo)) {
-                    if (l->outData[0]->getDims().front() > 1) {
-                        THROW_GNA_EXCEPTION
-                            << l->name << " Weights transposition is not supported for a layer with batch size > 1";
-                    }
-                    if (LayerInfo(nextLayer).isConcat()) {
-                        // If we found a concat it's not possible to rotate data
-                        THROW_GNA_EXCEPTION << l->name << " won't be transposed due to a concat after it";
-                    }
-
-                    transpInfoMatchWeightsSize(transpositionInfo, weightsRows, l->name);
-
-                    weightable->_weights = convertToRWBlob(weightable->_weights);
-
-                    ConvertTensorFromNCHWToNHWC(precision,
-                                                weightsRows,
-                                                weightsColumns,
-                                                weightable->_weights->cbuffer().as<uint8_t*>(),
-                                                false,
-                                                transpositionInfo);
-                    log::debug() << l->name << " weights columns transposition info:\n";
-                    printTranspositionInfo(transpositionInfo);
-                }
-            }
-        }
-
-        if (LayerInfo(l).isEltwise()) {
-            // We need to transpose a constant which is an eltwise input
-            auto firstInput = InferenceEngine::CNNNetPrevLayer(l, 0);
-            auto secondInput = InferenceEngine::CNNNetPrevLayer(l, 1);
-            if (!LayerInfo(firstInput).isConst() && !LayerInfo(secondInput).isConst()) {
-                continue;
-            }
-            // Let a constant to be the second input
-            if (LayerInfo(firstInput).isConst()) {
-                std::swap(firstInput, secondInput);
-            }
-            // Find a convolution in previous or next layers
-            auto transpositionInfo = FindTranspositionInfoFromPrevLayers(firstInput);
-            if (!FoundPartToTranspose(transpositionInfo) && !l->outData.empty() && !getInputTo(l->outData[0]).empty()) {
-                transpositionInfo = FindTranspositionInfoFromNextLayers(getInputTo(l->outData[0]).begin()->second);
-            }
-            if (FoundPartToTranspose(transpositionInfo)) {
-                auto blob = secondInput->blobs["custom"];
-                ConvertTensorFromNCHWToNHWC(blob->getTensorDesc().getPrecision().size(),
-                                            1,
-                                            blob->size(),
-                                            blob->buffer().as<uint8_t*>(),
-                                            true,
-                                            transpositionInfo);
-                log::debug() << secondInput->name << " data transposition info:\n";
-                printTranspositionInfo(transpositionInfo);
-            }
-        }
-
-        if (LayerInfo(l).isConcat()) {
-            auto concatLayer = LayerInfo(l).as<InferenceEngine::ConcatLayer*>();
-            IE_ASSERT(concatLayer != nullptr);
-            // If concatenation is along channel axis constant input transposition isn't required
-            if (concatLayer->_axis <= 1)
-                continue;
-
-            std::vector<InferenceEngine::CNNLayerPtr> constInputs;
-            bool transpose = false;
-            int nonConstInputIx = 0;
-            // Check if non-const inputs are transposed
-            for (int i = 0; InferenceEngine::CNNNetHasPrevLayer(l.get(), i); ++i) {
-                auto input = InferenceEngine::CNNNetPrevLayer(l, i);
-                if (LayerInfo(input).isConst()) {
-                    constInputs.push_back(input);
-                    continue;
-                }
-                auto transpositionInfo = FindTranspositionInfoFromPrevLayers(input);
-                bool transposeInput = FoundPartToTranspose(transpositionInfo);
-                if (nonConstInputIx == 0) {
-                    transpose = transposeInput;
-                } else if (transposeInput != transpose) {
-                    THROW_GNA_EXCEPTION << "Concat layer " << l->name << " inputs have different layouts";
-                }
-                ++nonConstInputIx;
-            }
-            if (!transpose)
-                continue;
-
-            // Transpose all constant inputs
-            for (auto&& input : constInputs) {
-                auto rows = GetDataDimByName(input->outData[0], DataDimName::C);
-                auto columns = GetDataDimByName(input->outData[0], DataDimName::H) *
-                               GetDataDimByName(input->outData[0], DataDimName::W);
-
-                auto blob = convertToRWBlob(input->blobs["custom"]);
-                input->blobs["custom"] = blob;
-
-                // A constant should have the same number of channels since concatenation will be in height/weight
-                // dimension
-                TranspositionInfo concatTranspositionInfo{true, rows, columns};
-                ConvertTensorFromNCHWToNHWC(blob->getTensorDesc().getPrecision().size(),
-                                            1,
-                                            blob->size(),
-                                            blob->buffer().as<uint8_t*>(),
-                                            true,
-                                            {concatTranspositionInfo});
-                log::debug() << input->name << " data transposition info:\n";
-                printTranspositionInfo({concatTranspositionInfo});
-            }
-        }
-    }
-}
-
 void FuseFullyConnectedWithEltwisePass::run() {
    // This legacy pass removes the Eltwise (only if it performs SUM op) from between FC and Any.
    // The blob data of Const layer attached to Eltwise is added to biases blob data of FC layer.
--- a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.hpp
+++ b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.hpp
@ -214,14 +214,6 @@ DECL_PASS(FuseFQIntoWeights);
 */
 DECL_PASS(MoveFakeQuantizeLayerIntoQuantParams);

-/**
- * @brief convert FullyConnected, ScaleShift and Eltwise layers weights order from NCHW to NHWC.
- * Information for transposition is found from convolution/pooling input or output dimensions.
- * Convolution weights are transposed in finalizeConvolution1DPrimitive() method (gna_graph_compiler.cpp).
- * They are transposed for the both, NCHW and NHWC models since MO always stores them in NCHW layout.
- */
-DECL_PASS(TransposeWeightsFromNCHWToNHWC);
-
 /**
 * @brief fuse FullyConnected and Eltwise layers, also in case there is a Reshape between them having input with only
 * one dimension > 1
--- a/src/plugins/intel_gna/src/transformations/decompose_mvn.cpp
+++ b/src/plugins/intel_gna/src/transformations/decompose_mvn.cpp
@ -26,7 +26,7 @@ struct MVNData {
    size_t W;
    size_t num_parts;
    float eps;
-    op::MVNEpsMode eps_mode;
+    ov::op::MVNEpsMode eps_mode;
    bool normalize_variance;
    element::Type element_type;
    std::string name;
@ -138,7 +138,7 @@ static std::shared_ptr<Node> NormalizeVariance(const std::shared_ptr<opset8::MVN
                                                                       CoordinateDiff{0, 0},
                                                                       CoordinateDiff{0, 0},
                                                                       Strides{1, 1},
-                                                                       op::PadType::VALID);
+                                                                       ov::op::PadType::VALID);
    transposed_avg_conv_3->set_friendly_name(mvn_data.name + "_Avg3");
    auto avg_conv_3 =
        std::make_shared<opset8::Transpose>(transposed_avg_conv_3,
@ -156,7 +156,7 @@ static std::shared_ptr<Node> NormalizeVariance(const std::shared_ptr<opset8::MVN
                                                                       CoordinateDiff{0, 0},
                                                                       CoordinateDiff{0, 0},
                                                                       Strides{1, 1},
-                                                                       op::PadType::VALID);
+                                                                       ov::op::PadType::VALID);
    transposed_avg_conv_4->set_friendly_name(mvn_data.name + "_Avg4");
    auto avg_conv_4 =
        std::make_shared<opset8::Transpose>(transposed_avg_conv_4,
@ -243,7 +243,7 @@ static void Decompose(const std::shared_ptr<opset8::MVN> mvn, const MVNData& mvn
                                                                       CoordinateDiff{0, 0},
                                                                       CoordinateDiff{0, 0},
                                                                       Strides{1, 1},
-                                                                       op::PadType::VALID);
+                                                                       ov::op::PadType::VALID);
    transposed_avg_conv_1->set_friendly_name(mvn_data.name + "_Avg1");
    auto avg_conv_1 =
        std::make_shared<opset8::Transpose>(transposed_avg_conv_1,
@ -261,7 +261,7 @@ static void Decompose(const std::shared_ptr<opset8::MVN> mvn, const MVNData& mvn
                                                                       CoordinateDiff{0, 0},
                                                                       CoordinateDiff{0, 0},
                                                                       Strides{1, 1},
-                                                                       op::PadType::VALID);
+                                                                       ov::op::PadType::VALID);
    transposed_avg_conv_2->set_friendly_name(mvn_data.name + "_Avg2");
    auto avg_conv_2 =
        std::make_shared<opset8::Transpose>(transposed_avg_conv_2,
--- a/Show More
+++ b/Show More
				`@ -1 +0,0 @@`
				const e=document.getElementById("selector");if(!e)throw new Error("cannot find selector document");window.addEventListener("message",i=>{e.style.height=i.data.height+"px"});var o,n;const t=(n=(o=e.contentDocument)==null?void 0:o.body)==null?void 0:n.offsetHeight;t&&(e.style.height=`${t}px`);
				`@ -0,0 +1 @@`
				const e=document.getElementById("selector");if(!e)throw new Error("cannot find selector document");window.addEventListener("message",t=>{t.origin===window.origin&&(t.data.type!=="size"\|\|!t.data.height\|\|(e.style.height=t.data.height+"px"))});var n,o;const i=(o=(n=e.contentDocument)==null?void 0:n.body)==null?void 0:o.offsetHeight;i&&(e.style.height=`${i}px`);