From 6e490c24e28f0fe9f735c58de1a9d139b94759a4 Mon Sep 17 00:00:00 2001
From: Marina Kolpakova <marina.kolpakova@intel.com>
Date: Wed, 10 Mar 2021 14:15:38 +0300
Subject: [PATCH] =?UTF-8?q?[=C2=A7]=20introduces=20snippets=20generator=20?=
 =?UTF-8?q?(#4349)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 inference-engine/src/CMakeLists.txt           |   4 +-
 .../src/inference_engine/CMakeLists.txt       |   4 +-
 .../src/legacy_api/CMakeLists.txt             |   5 +-
 .../src/convert_function_to_cnn_network.cpp   |  10 +
 inference-engine/src/snippets/CMakeLists.txt  |  56 ++
 .../snippets/include/snippets/generator.hpp   | 123 +++++
 .../include/snippets/op/blockedload.hpp       |  36 ++
 .../include/snippets/op/blockedparameter.hpp  |  38 ++
 .../include/snippets/op/broadcastload.hpp     |  48 ++
 .../include/snippets/op/broadcastmove.hpp     |  41 ++
 .../src/snippets/include/snippets/op/load.hpp |  42 ++
 .../src/snippets/include/snippets/op/nop.hpp  |  30 +
 .../snippets/include/snippets/op/scalar.hpp   |  48 ++
 .../include/snippets/op/scalarload.hpp        |  36 ++
 .../include/snippets/op/scalarstore.hpp       |  36 ++
 .../include/snippets/op/staticpower.hpp       |  44 ++
 .../snippets/include/snippets/op/store.hpp    |  38 ++
 .../snippets/include/snippets/op/subgraph.hpp | 101 ++++
 .../include/snippets/op/vectorload.hpp        |  36 ++
 .../include/snippets/op/vectorstore.hpp       |  36 ++
 .../snippets/pass/assign_registers.hpp        |  30 +
 .../snippets/pass/collapse_subgraph.hpp       |  74 +++
 .../snippets/pass/insert_load_store.hpp       |  41 ++
 .../snippets/pass/insert_movebroadcast.hpp    |  29 +
 .../load_movebroadcast_to_broadcastload.hpp   |  29 +
 .../snippets/pass/vector_to_scalar.hpp        |  42 ++
 .../include/snippets/register_info.hpp        |  24 +
 .../include/snippets/snippets_isa.hpp         |  32 ++
 .../include/snippets/snippets_isa_tbl.hpp     |  84 +++
 .../src/snippets/src/generator.cpp            |  30 +
 inference-engine/src/snippets/src/itt.hpp     |  71 +++
 .../src/snippets/src/op/blockedload.cpp       |  12 +
 .../src/snippets/src/op/blockedparameter.cpp  |   9 +
 .../src/snippets/src/op/broadcastload.cpp     |  35 ++
 .../src/snippets/src/op/broadcastmove.cpp     |  68 +++
 inference-engine/src/snippets/src/op/load.cpp |  48 ++
 inference-engine/src/snippets/src/op/nop.cpp  |  18 +
 .../src/snippets/src/op/scalar.cpp            |   9 +
 .../src/snippets/src/op/scalarload.cpp        |  12 +
 .../src/snippets/src/op/scalarstore.cpp       |  12 +
 .../src/snippets/src/op/staticpower.cpp       |   9 +
 .../src/snippets/src/op/store.cpp             |  48 ++
 .../src/snippets/src/op/subgraph.cpp          | 344 ++++++++++++
 .../src/snippets/src/op/vectorload.cpp        |  12 +
 .../src/snippets/src/op/vectorstore.cpp       |  12 +
 .../snippets/src/pass/assign_registers.cpp    | 183 +++++++
 .../snippets/src/pass/collapse_subgraph.cpp   | 516 ++++++++++++++++++
 .../snippets/src/pass/insert_load_store.cpp   |  67 +++
 .../src/pass/insert_movebroadcast.cpp         | 177 ++++++
 .../load_movebroadcast_to_broadcastload.cpp   |  59 ++
 .../snippets/src/pass/vector_to_scalar.cpp    |  40 ++
 .../src/snippets/src/register_info.cpp        |   9 +
 inference-engine/src/snippets/src/remarks.hpp |  20 +
 .../inference_engine/CMakeLists.txt           |   1 +
 .../snippets/broadcast_fusion.cpp             | 135 +++++
 .../inference_engine/snippets/memory_ops.cpp  |  94 ++++
 .../snippets/movebroadcast.cpp                |  44 ++
 .../inference_engine/snippets/registers.cpp   | 137 +++++
 .../snippets/tokenization.cpp                 | 154 ++++++
 .../snippets/vector_scalar.cpp                |  72 +++
 .../tests_deprecated/unit/CMakeLists.txt      |   4 +-
 ngraph/core/src/op/non_zero.cpp               |   2 +-
 ngraph/core/src/op/one_hot.cpp                |   2 +-
 ngraph/core/src/op/util/op_types.cpp          |   2 +
 64 files changed, 3657 insertions(+), 7 deletions(-)
 create mode 100644 inference-engine/src/snippets/CMakeLists.txt
 create mode 100644 inference-engine/src/snippets/include/snippets/generator.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/op/blockedload.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/op/blockedparameter.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/op/broadcastload.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/op/broadcastmove.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/op/load.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/op/nop.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/op/scalar.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/op/scalarload.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/op/scalarstore.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/op/staticpower.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/op/store.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/op/subgraph.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/op/vectorload.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/op/vectorstore.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/pass/assign_registers.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/pass/collapse_subgraph.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/pass/insert_load_store.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/pass/insert_movebroadcast.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/pass/load_movebroadcast_to_broadcastload.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/pass/vector_to_scalar.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/register_info.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/snippets_isa.hpp
 create mode 100644 inference-engine/src/snippets/include/snippets/snippets_isa_tbl.hpp
 create mode 100644 inference-engine/src/snippets/src/generator.cpp
 create mode 100644 inference-engine/src/snippets/src/itt.hpp
 create mode 100644 inference-engine/src/snippets/src/op/blockedload.cpp
 create mode 100644 inference-engine/src/snippets/src/op/blockedparameter.cpp
 create mode 100644 inference-engine/src/snippets/src/op/broadcastload.cpp
 create mode 100644 inference-engine/src/snippets/src/op/broadcastmove.cpp
 create mode 100644 inference-engine/src/snippets/src/op/load.cpp
 create mode 100644 inference-engine/src/snippets/src/op/nop.cpp
 create mode 100644 inference-engine/src/snippets/src/op/scalar.cpp
 create mode 100644 inference-engine/src/snippets/src/op/scalarload.cpp
 create mode 100644 inference-engine/src/snippets/src/op/scalarstore.cpp
 create mode 100644 inference-engine/src/snippets/src/op/staticpower.cpp
 create mode 100644 inference-engine/src/snippets/src/op/store.cpp
 create mode 100644 inference-engine/src/snippets/src/op/subgraph.cpp
 create mode 100644 inference-engine/src/snippets/src/op/vectorload.cpp
 create mode 100644 inference-engine/src/snippets/src/op/vectorstore.cpp
 create mode 100644 inference-engine/src/snippets/src/pass/assign_registers.cpp
 create mode 100644 inference-engine/src/snippets/src/pass/collapse_subgraph.cpp
 create mode 100644 inference-engine/src/snippets/src/pass/insert_load_store.cpp
 create mode 100644 inference-engine/src/snippets/src/pass/insert_movebroadcast.cpp
 create mode 100644 inference-engine/src/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp
 create mode 100644 inference-engine/src/snippets/src/pass/vector_to_scalar.cpp
 create mode 100644 inference-engine/src/snippets/src/register_info.cpp
 create mode 100644 inference-engine/src/snippets/src/remarks.hpp
 create mode 100644 inference-engine/tests/functional/inference_engine/snippets/broadcast_fusion.cpp
 create mode 100644 inference-engine/tests/functional/inference_engine/snippets/memory_ops.cpp
 create mode 100644 inference-engine/tests/functional/inference_engine/snippets/movebroadcast.cpp
 create mode 100644 inference-engine/tests/functional/inference_engine/snippets/registers.cpp
 create mode 100644 inference-engine/tests/functional/inference_engine/snippets/tokenization.cpp
 create mode 100644 inference-engine/tests/functional/inference_engine/snippets/vector_scalar.cpp

diff --git a/inference-engine/src/CMakeLists.txt b/inference-engine/src/CMakeLists.txt
index 63d85a820c2..10bfb7e5875 100644
--- a/inference-engine/src/CMakeLists.txt
+++ b/inference-engine/src/CMakeLists.txt
@@ -40,13 +40,15 @@ add_subdirectory(low_precision_transformations)
 
 add_subdirectory(offline_transformations)
 
+add_subdirectory(snippets)
+
 # add a custom target to build all Inference Engine Core libraries
 
 add_custom_target(ie_libraries ALL
                   DEPENDS inference_engine_transformations inference_engine_legacy
                           inference_engine inference_engine_preproc
                           inference_engine_ir_v7_reader inference_engine_ir_reader
-                          inference_engine_lp_transformations)
+                          inference_engine_lp_transformations inference_engine_snippets)
 
 if(NGRAPH_ONNX_IMPORT_ENABLE)
     add_dependencies(ie_libraries inference_engine_onnx_reader)
diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt
index 1ea32276311..cfad762a626 100644
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@@ -171,7 +171,9 @@ if(WIN32)
     set_target_properties(${TARGET_NAME}_s PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_s)
 endif()
 
-target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ${NGRAPH_LIBRARIES}
+target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt openvino::conditional_compilation
+                                               ${CMAKE_DL_LIBS} ${NGRAPH_LIBRARIES}
+                                               inference_engine_snippets
                                                inference_engine_transformations pugixml)
 
 target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE)
diff --git a/inference-engine/src/legacy_api/CMakeLists.txt b/inference-engine/src/legacy_api/CMakeLists.txt
index 66498fdbd49..09bcf94c273 100644
--- a/inference-engine/src/legacy_api/CMakeLists.txt
+++ b/inference-engine/src/legacy_api/CMakeLists.txt
@@ -42,6 +42,7 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE
     ${PUBLIC_HEADERS_DIR}
     ${CMAKE_CURRENT_SOURCE_DIR}/src
     ${IE_MAIN_SOURCE_DIR}/src/inference_engine # For CNNNetworkNGraphImpl
+    $<TARGET_PROPERTY:inference_engine_snippets,INTERFACE_INCLUDE_DIRECTORIES>
     $<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>
     $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
     $<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_INCLUDE_DIRECTORIES>
@@ -53,7 +54,7 @@ add_cpplint_target(${TARGET_NAME}_obj_cpplint FOR_TARGETS ${TARGET_NAME}_obj)
 
 # Create shared library
 
-add_library(${TARGET_NAME} SHARED 
+add_library(${TARGET_NAME} SHARED
     ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp
     $<TARGET_OBJECTS:${TARGET_NAME}_obj>)
 
@@ -62,7 +63,7 @@ ie_add_vs_version_file(NAME ${TARGET_NAME}
 
 set_ie_threading_interface_for(${TARGET_NAME})
 
-target_link_libraries(${TARGET_NAME} PUBLIC inference_engine
+target_link_libraries(${TARGET_NAME} PUBLIC inference_engine inference_engine_snippets
                                      PRIVATE pugixml openvino::itt
                                              ${NGRAPH_LIBRARIES} inference_engine_transformations)
 
diff --git a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
index 7b5ff7ae2fa..2ea30ad0631 100644
--- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
+++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
@@ -39,6 +39,7 @@
 #include "legacy/ngraph_ops/rnn_sequence_ie.hpp"
 #include "legacy/ngraph_ops/lstm_sequence_ie.hpp"
 #include "legacy/ngraph_ops/gru_sequence_ie.hpp"
+#include "snippets/op/subgraph.hpp"
 #include "exec_graph_info.hpp"
 
 #include "caseless.hpp"
@@ -1978,6 +1979,15 @@ void convertFunctionToICNNNetwork(const std::shared_ptr<const ::ngraph::Function
             cnnLayer->params[ExecGraphInfoSerialization::ORIGINAL_NAMES] = originalNames;
         }
 
+        if (auto subgraph = ::ngraph::as_type_ptr<ngraph::snippets::op::Subgraph>(layer)) {
+            std::string names = "";
+            for (const auto& op : subgraph->get_body()->get_ordered_ops()) {
+                names += ", " + op->get_friendly_name();
+            }
+
+            cnnLayer->params["originalLayersNames"] += names;
+        }
+
         std::string primitivesPriority = ::ngraph::getPrimitivesPriority(layer);
         if (!primitivesPriority.empty()) {
             cnnLayer->params["PrimitivesPriority"] = primitivesPriority;
diff --git a/inference-engine/src/snippets/CMakeLists.txt b/inference-engine/src/snippets/CMakeLists.txt
new file mode 100644
index 00000000000..09bbe10a38b
--- /dev/null
+++ b/inference-engine/src/snippets/CMakeLists.txt
@@ -0,0 +1,56 @@
+# Copyright (C) 2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set (TARGET_NAME "inference_engine_snippets")
+
+set(PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
+
+file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
+file(GLOB_RECURSE PUBLIC_HEADERS ${PUBLIC_HEADERS_DIR}/snippets/*.hpp)
+
+# Create named folders for the sources within the .vcproj
+# Empty name lists them directly under the .vcproj
+
+source_group("src" FILES ${LIBRARY_SRC})
+source_group("include" FILES ${PUBLIC_HEADERS})
+
+# Create shared library
+
+add_library(${TARGET_NAME} SHARED
+            ${LIBRARY_SRC}
+            ${PUBLIC_HEADERS})
+
+ie_faster_build(${TARGET_NAME}
+    UNITY
+)
+
+ie_add_vs_version_file(NAME ${TARGET_NAME}
+                       FILEDESCRIPTION "Inference Engine Snippets transformations library")
+
+target_compile_definitions(${TARGET_NAME} PRIVATE inference_engine_transformations_EXPORTS)
+
+target_link_libraries(${TARGET_NAME} PUBLIC inference_engine_transformations ${NGRAPH_LIBRARIES}
+                                     PRIVATE ${NGRAPH_REF_LIBRARIES} openvino::conditional_compilation)
+
+target_include_directories(${TARGET_NAME} PUBLIC ${PUBLIC_HEADERS_DIR}
+                                          PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
+
+add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
+
+ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
+
+# LTO
+
+set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
+
+# developer package
+
+ie_developer_export_targets(${TARGET_NAME})
+
+# install
+
+install(TARGETS ${TARGET_NAME}
+        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
+        ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core
+        LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)
diff --git a/inference-engine/src/snippets/include/snippets/generator.hpp b/inference-engine/src/snippets/include/snippets/generator.hpp
new file mode 100644
index 00000000000..1c45f1e5167
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/generator.hpp
@@ -0,0 +1,123 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief A file contains public interface for target indepenent code generator.
+ * @file generator.hpp
+ */
+#pragma once
+
+#include <transformations_visibility.hpp>
+#include "snippets_isa.hpp"
+
+namespace ngraph {
+namespace snippets {
+
+using code = const uint8_t *;
+using RegInfo = std::pair<std::vector<size_t>, std::vector<size_t>>;
+
+TRANSFORMATIONS_API auto getRegisters(std::shared_ptr<ngraph::Node>& n) -> ngraph::snippets::RegInfo;
+
+/**
+ * @interface Emitter
+ * @brief Base class for all target specific code emitters used by generator.
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API Emitter {
+public:
+    /**
+     * @brief Default constructor
+     */
+    Emitter(const std::shared_ptr<ngraph::Node>& n) {
+    }
+
+    /**
+     * @brief called by generator to generate code to produce target code for a specific operation
+     * @param in vector of vector argument registers
+     * @param out vector of vector resulting registers
+     * @param pool optional vector of free vector registers which might be used inside method
+     * @param gpr vector of free generam puproce registers which might be used inside method
+     * @return void
+     */
+    virtual void emit_code(const std::vector<size_t>& in,
+                           const std::vector<size_t>& out,
+                           const std::vector<size_t>& pool = {},
+                           const std::vector<size_t>& gpr  = {}) const = 0;
+
+    /**
+     * @brief called by generator to generate data section, if needed for a specific operation
+     * @return void
+     */
+    virtual void emit_data() const {
+    }
+};
+
+/**
+ * @interface TargetMachine
+ * @brief Base class Target machine representation. Target derives from this class to provide generator information about supported emittors
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API TargetMachine {
+public:
+    /**
+     * @brief called by generator to all the emittors available for a target machine
+     * @return a map by node's type info with callbacks to create an instance of emmitter for corresponding operation type
+     */
+    virtual auto getJitters() -> std::map<const ngraph::DiscreteTypeInfo, std::function<std::shared_ptr<Emitter>(std::shared_ptr<ngraph::Node>)>>{
+        return {};
+    }
+};
+
+/**
+ * @interface Schedule
+ * @brief Return scheduling information and pointer to generated kernel code
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API Schedule {
+public:
+    /**
+     * @brief Default constructor
+     */
+    Schedule() : work_size({}), is_flat(false), ptr(nullptr) {}
+    /**
+     * @brief Default to create schedule out of specific parameters
+     * @param ws work size for kernel execution
+     * @param f can this kernel be linearided to 1D range
+     * @param p pointer to generated code
+     */
+    Schedule(const Shape& ws, bool f, code p) : work_size(ws), is_flat(f), ptr(p) {}
+
+    Shape work_size {};
+    bool is_flat {false};
+    code ptr {nullptr};
+};
+
+/**
+ * @interface Generator
+ * @brief Target independent code generator interface
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API Generator {
+public:
+    /**
+     * @brief Default constructor
+     */
+    Generator() = default;
+    /**
+     * @brief Default destructor
+     */
+    virtual ~Generator() = default;
+    /**
+     * @brief virtual method any specific implementation should implement
+     * @param f runction in canonical for for table-based code generation
+     * @return pointer to generated code
+     */
+    virtual code generate(std::shared_ptr<Function>& f) const = 0;
+
+protected:
+    mutable std::map<const ngraph::DiscreteTypeInfo, std::function<std::shared_ptr<Emitter>(std::shared_ptr<ngraph::Node>)>> jitters;
+};
+
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/op/blockedload.hpp b/inference-engine/src/snippets/include/snippets/op/blockedload.hpp
new file mode 100644
index 00000000000..fe3a1d86cb8
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/op/blockedload.hpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/op/op.hpp>
+#include "load.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface BlockedLoad
+ * @brief Generated by Canonicalization step for blocked data (NCHW<X>c) to be loaded
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API BlockedLoad : public Load {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    BlockedLoad(const Output<Node>& x);
+    BlockedLoad() = default;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
+        check_new_args_count(this, new_args);
+        return std::make_shared<BlockedLoad>(new_args.at(0));
+    }
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/op/blockedparameter.hpp b/inference-engine/src/snippets/include/snippets/op/blockedparameter.hpp
new file mode 100644
index 00000000000..fade0611e40
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/op/blockedparameter.hpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/op/op.hpp>
+#include <ngraph/op/parameter.hpp>
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface BlockedParameter
+ * @brief Represents blocked input (NCHW<X>c) for a subgraph
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API BlockedParameter : public ngraph::op::Parameter {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    BlockedParameter() = default;
+    BlockedParameter(const ngraph::element::Type& element_type, const PartialShape& pshape)
+        : Parameter(element_type, pshape) {
+    }
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
+        check_new_args_count(this, new_args);
+        return std::make_shared<BlockedParameter>(m_element_type, m_partial_shape);
+    }
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/op/broadcastload.hpp b/inference-engine/src/snippets/include/snippets/op/broadcastload.hpp
new file mode 100644
index 00000000000..d174eb902fe
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/op/broadcastload.hpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+#include <snippets/op/broadcastmove.hpp>
+
+#include "ngraph/op/op.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface BroadcastLoad
+ * @brief Is generated for broadcasting by least varying dimension for non-blocked cases and the second varying dimension for blocked
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API BroadcastLoad : public BroadcastMove {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    BroadcastLoad(const Output<Node>& x, Shape output_shape);
+    BroadcastLoad() = default;
+
+    bool visit_attributes(AttributeVisitor& visitor) override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+
+    void validate_and_infer_types() override;
+
+    void set_broadcast_info(const Shape& bct) {
+        broadcast_info = bct;
+    }
+
+    bool is_broadcast(size_t idx) {
+        return broadcast_info[idx] == 1;
+    }
+
+private:
+    Shape broadcast_info;
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/op/broadcastmove.hpp b/inference-engine/src/snippets/include/snippets/op/broadcastmove.hpp
new file mode 100644
index 00000000000..4ddb652faaa
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/op/broadcastmove.hpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include "ngraph/op/op.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface BroadcastMove
+ * @brief Added to a subgraph if explicit broadcast instruction should be generated
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API BroadcastMove : public ngraph::op::Op {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    BroadcastMove(const Output<Node>& x, Shape output_shape);
+    BroadcastMove() = default;
+
+    bool visit_attributes(AttributeVisitor& visitor) override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+
+    void validate_and_infer_types() override;
+
+    bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
+
+protected:
+    Shape output_shape;
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/op/load.hpp b/inference-engine/src/snippets/include/snippets/op/load.hpp
new file mode 100644
index 00000000000..557d9dd078d
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/op/load.hpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/op/op.hpp>
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface Load
+ * @brief Generated by Canonicalization step where explicit load instruction should be emmiteed
+ * ScalarLoad == scalar instruction + post increment
+ * Load (VectorLoad) == vector instruction + post increment
+ * BroadcastLoad == scalar instruction - post increment
+ * BlockedLoad == vector instruction - post increment
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API Load : public ngraph::op::Op {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    Load(const Output<Node>& x);
+    Load() = default;
+
+    bool visit_attributes(AttributeVisitor& visitor) override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+
+    void validate_and_infer_types() override;
+
+    bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/op/nop.hpp b/inference-engine/src/snippets/include/snippets/op/nop.hpp
new file mode 100644
index 00000000000..8fc731d04dd
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/op/nop.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include "ngraph/op/op.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface Nop
+ * @brief Generated by Canonicalization and represents not-an-operation
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API Nop : public ngraph::op::Op {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    Nop(const OutputVector& arguments, const OutputVector& results);
+    Nop() = default;
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/op/scalar.hpp b/inference-engine/src/snippets/include/snippets/op/scalar.hpp
new file mode 100644
index 00000000000..bb2aad43d2c
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/op/scalar.hpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include "ngraph/op/op.hpp"
+#include "ngraph/op/constant.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface Scalar
+ * @brief Generated by Canonicalization for a scalar constant Shape() == {1}
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API Scalar  : public ngraph::op::Constant {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    Scalar() = default;
+    Scalar(const std::shared_ptr<runtime::Tensor>& tensor) : Constant(tensor) {}
+    template <typename T>
+    Scalar(const element::Type& type, Shape shape, const std::vector<T>& values) : Constant(type, shape, values) {}
+    Scalar(const element::Type& type, const Shape& shape) : Constant(type, shape) {}
+    template <class T, class = typename std::enable_if<std::is_fundamental<T>::value>::type>
+    Scalar(const element::Type& type, Shape shape, T value) : Constant(type, shape, value) {}
+    Scalar(const element::Type& type, Shape shape, const std::vector<std::string>& values) : Constant(type, shape, values) {}
+    Scalar(const element::Type& type, const Shape& shape, const void* data) : Constant(type, shape, data) {}
+
+    Scalar(const Constant& other) : Constant(other) {}
+    Scalar(const Scalar& other) : Constant(other) {}
+    Scalar& operator=(const Scalar&) = delete;
+    ~Scalar() override {}
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
+        check_new_args_count(this, new_args);
+        return std::make_shared<Scalar>(*this);
+    }
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/op/scalarload.hpp b/inference-engine/src/snippets/include/snippets/op/scalarload.hpp
new file mode 100644
index 00000000000..6553bc78e0a
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/op/scalarload.hpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/op/op.hpp>
+#include "load.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface ScalarLoad
+ * @brief Generated by Canonicalization for a scalar value load to vector register
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API ScalarLoad : public Load {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    ScalarLoad(const Output<Node>& x);
+    ScalarLoad() = default;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
+        check_new_args_count(this, new_args);
+        return std::make_shared<ScalarLoad>(new_args.at(0));
+    }
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/op/scalarstore.hpp b/inference-engine/src/snippets/include/snippets/op/scalarstore.hpp
new file mode 100644
index 00000000000..190f53d2d5f
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/op/scalarstore.hpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/op/op.hpp>
+#include "store.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface ScalarStore
+ * @brief Generated by Canonicalization for a scalar value store from vector register
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API ScalarStore : public Store {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    ScalarStore(const Output<Node>& x);
+    ScalarStore() = default;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
+        check_new_args_count(this, new_args);
+        return std::make_shared<ScalarStore>(new_args.at(0));
+    }
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/op/staticpower.hpp b/inference-engine/src/snippets/include/snippets/op/staticpower.hpp
new file mode 100644
index 00000000000..42128f761b8
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/op/staticpower.hpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/op/op.hpp>
+#include <ngraph/op/power.hpp>
+#include <snippets/snippets_isa.hpp>
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface PowerStatic
+ * @brief Generated by Canonicalization for a spasical case of power innstruction which has constant power value
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API PowerStatic : public ngraph::op::v1::Power {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    PowerStatic() : Power() {
+    }
+
+    PowerStatic(const Output<Node>& arg0,
+            const Output<Node>& arg1,
+            const ngraph::op::AutoBroadcastSpec& auto_broadcast =
+                ngraph::op::AutoBroadcastSpec(ngraph::op::AutoBroadcastType::NUMPY)) : Power(arg0, arg1, auto_broadcast) {
+        NGRAPH_CHECK(!!std::dynamic_pointer_cast<ngraph::snippets::op::Scalar>(arg1.get_node_shared_ptr()), "second argument must be scalar constant.");
+    }
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
+        check_new_args_count(this, new_args);
+        return std::make_shared<PowerStatic>(new_args.at(0), new_args.at(1), this->get_autob());
+    }
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/op/store.hpp b/inference-engine/src/snippets/include/snippets/op/store.hpp
new file mode 100644
index 00000000000..4bf883f39e1
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/op/store.hpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/op/op.hpp>
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface Load
+ * @brief Generated by Canonicalization step where explicit store instruction should be emmiteed
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API Store : public ngraph::op::Op {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    Store(const Output<Node>& x);
+    Store() = default;
+
+    bool visit_attributes(AttributeVisitor& visitor) override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+
+    void validate_and_infer_types() override;
+
+    bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/op/subgraph.hpp b/inference-engine/src/snippets/include/snippets/op/subgraph.hpp
new file mode 100644
index 00000000000..9d5a3a8e263
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/op/subgraph.hpp
@@ -0,0 +1,101 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+#include <transformations_visibility.hpp>
+#include <ngraph/function.hpp>
+#include <ngraph/op/op.hpp>
+#include <ngraph/rt_info.hpp>
+
+#include "snippets/generator.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface Subgraph
+ * @brief An operation that is implemented by a function
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API Subgraph : public ngraph::op::Op {
+public:
+    using BlockedShape = std::tuple<ngraph::Shape, ngraph::AxisVector, ngraph::element::Type>;
+    using BlockedShapeVector = std::vector<BlockedShape>;
+
+    NGRAPH_RTTI_DECLARATION;
+
+    Subgraph(const OutputVector& args, std::shared_ptr<Function> body);
+
+    Subgraph(const NodeVector& args, std::shared_ptr<Function> body);
+
+    bool visit_attributes(AttributeVisitor& visitor) override;
+
+    void validate_and_infer_types() override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;
+
+    std::shared_ptr<Function> get_body() const {
+        return m_body;
+    }
+
+    std::shared_ptr<ngraph::snippets::Generator> get_generator() const {
+        return m_generator;
+    }
+
+    std::shared_ptr<Subgraph> make_canonical_from_this();
+
+    snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes);
+    bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
+
+    /// Set a new body for the op; body needs to satisfy requirements on inputs/outputs
+    void set_body(std::shared_ptr<Function> body);
+
+    // plugin sets generator for a snippet to some specific generator.
+    // it's going to be replaced with Jitters table later
+    void set_generator(std::shared_ptr<ngraph::snippets::Generator> generator);
+
+    void print() const;
+    void print_statistics(bool verbose);
+
+    static auto wrap_node_as_subgraph(const std::shared_ptr<ngraph::Node>& node) -> std::shared_ptr<Subgraph>;
+
+private:
+    void canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes);
+    void convert_to_snippet_dialect();
+
+    std::shared_ptr<Function> m_body;
+    std::shared_ptr<ngraph::snippets::Generator> m_generator;
+};
+
+static inline std::ostream& operator<<(std::ostream& os, const op::Subgraph::BlockedShape& blocked_shape) {
+    os << std::get<0>(blocked_shape) << " " << std::get<1>(blocked_shape) << " " << std::get<2>(blocked_shape);
+    return os;
+}
+
+static inline auto is_scalar_constant(const std::shared_ptr<ngraph::Node>& source_output_node) -> bool {
+    return !!ngraph::as_type_ptr<ngraph::opset1::Constant>(source_output_node) &&
+        (source_output_node->get_shape() == ngraph::Shape() || ngraph::shape_size(source_output_node->get_shape()) == 1);
+};
+
+static inline auto create_body(std::string name, const ngraph::ResultVector& results, const ngraph::ParameterVector& parameters) ->
+    std::shared_ptr<ngraph::Function> {
+    auto body = std::make_shared<ngraph::Function>(results, parameters, name);
+    return body;
+};
+
+static inline auto build_subgraph(const std::shared_ptr<ngraph::Node>& node, const ngraph::OutputVector& inputs, const std::shared_ptr<ngraph::Function>& body)
+    -> std::shared_ptr<Subgraph>{
+    auto subgraph = std::make_shared<Subgraph>(inputs, body);
+    copy_runtime_info(node, subgraph);
+    subgraph->set_friendly_name(node->get_friendly_name());
+    return subgraph;
+};
+
+}  // namespace op
+}  // namespace snippets
+}  // namespace ngraph
diff --git a/inference-engine/src/snippets/include/snippets/op/vectorload.hpp b/inference-engine/src/snippets/include/snippets/op/vectorload.hpp
new file mode 100644
index 00000000000..11c77150f9b
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/op/vectorload.hpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/op/op.hpp>
+#include "load.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface VectorLoad
+ * @brief Generated by Canonicalization for a vector value load to vector register
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API VectorLoad : public Load {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    VectorLoad(const Output<Node>& x);
+    VectorLoad() = default;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
+        check_new_args_count(this, new_args);
+        return std::make_shared<VectorLoad>(new_args.at(0));
+    }
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/op/vectorstore.hpp b/inference-engine/src/snippets/include/snippets/op/vectorstore.hpp
new file mode 100644
index 00000000000..307c2e68236
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/op/vectorstore.hpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/op/op.hpp>
+#include "store.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface VectorStore
+ * @brief Generated by Canonicalization for a vector value store from vector register
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API VectorStore : public Store {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    VectorStore(const Output<Node>& x);
+    VectorStore() = default;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
+        check_new_args_count(this, new_args);
+        return std::make_shared<VectorStore>(new_args.at(0));
+    }
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/pass/assign_registers.hpp b/inference-engine/src/snippets/include/snippets/pass/assign_registers.hpp
new file mode 100644
index 00000000000..d94cfde48bd
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/pass/assign_registers.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+#include <ngraph/pass/pass.hpp>
+
+namespace ngraph {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface AssignRegisters
+ * @brief Assigns internal `vector` register indexes to operations.
+ * Changing order of variables or datafrow lead to invalidation of register assignment.
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API AssignRegisters : public ngraph::pass::FunctionPass {
+public:
+    AssignRegisters() : FunctionPass() {
+        set_property(ngraph::pass::PassProperty::REQUIRE_STATIC_SHAPE, true);
+    }
+    bool run_on_function(std::shared_ptr<ngraph::Function> function) override;
+};
+
+} // namespace pass
+} // namespace snippets
+} // namespace ngraph
diff --git a/inference-engine/src/snippets/include/snippets/pass/collapse_subgraph.hpp b/inference-engine/src/snippets/include/snippets/pass/collapse_subgraph.hpp
new file mode 100644
index 00000000000..1a77699d020
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/pass/collapse_subgraph.hpp
@@ -0,0 +1,74 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/ngraph.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/pattern/matcher.hpp>
+
+
+namespace ngraph {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface StartSubgraph
+ * @brief Matches multiple output loyout-oblivious operations to start a new subgraph
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API StartSubgraph: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    explicit StartSubgraph(bool tokenize_by_node = false);
+};
+
+/**
+ * @interface AttachToSubgraph
+ * @brief Matches loyout-oblivious operations with subgraph operation as an input to attech this node into it
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API AttachToSubgraph: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    explicit AttachToSubgraph(bool tokenize_by_node = false);
+};
+
+/**
+ * @interface TokenizeSnippets
+ * @brief Splits function to subgraphs if possible using rules above
+ * This pass tokenizes topology graph into subgraphs.
+ * Those subgraphs consists of unary or binary layout-oblivious (LO) opetations found in subset 1.
+ * Non-layout-oblivious (NLO) operations operations (called also support in this context) are ignored and become a fullstop in tokenization routine
+ * 1. if a considered LO operation doesn't have any unput subgraphs
+ *    -> a new single-op subgraph is introduced
+ * 1. if a considered LO operation is a binary or an unary operation with at least one subgraph as an input
+ *    -> 1. all inputs from the conput subgraphs are collected together
+ *       1. non-subgraph inputs are wrapped into parameters
+ *       1. all input bodies are merged and
+ *       1. this new operation is added to a body of input subgraph
+ *       1. outputs are collected subgraph (outputs consumed by some other node & subgraph outputs consumed by the node to be merged)
+ *       1. finally current node is replaced with the new subgraph. We cannot use replace_node because multiple nodes are replaced so
+ *       make the replacement manually by redirecting ports
+ * Input subgraph is prefented from visiting twice if more than one output of it consumed by currently considered node
+ * New subgraph is introduced, if there is a loop introduced
+ * New subgraph is introduced, if number of inputs and outputs exceeds 7 due to scheduling limitation
+ * New subgraph is introduced, if multiple outputs of merged nodes are not broadcastable to each other (equality of all outputs is too much on the other hand)
+ * Scalar constants are placed as is into subgraph due to optimization purpose
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API TokenizeSnippets: public ngraph::pass::GraphRewrite {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    TokenizeSnippets(bool tokenize_by_node = false) {
+        add_matcher<ngraph::snippets::pass::StartSubgraph>(tokenize_by_node);
+        add_matcher<ngraph::snippets::pass::AttachToSubgraph>(tokenize_by_node);
+    }
+};
+
+}  // namespace pass
+}  // namespace snippets
+}  // namespace ngraph
diff --git a/inference-engine/src/snippets/include/snippets/pass/insert_load_store.hpp b/inference-engine/src/snippets/include/snippets/pass/insert_load_store.hpp
new file mode 100644
index 00000000000..797710dae02
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/pass/insert_load_store.hpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/pattern/matcher.hpp>
+
+namespace ngraph {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface InsertLoad
+ * @brief Inserts explicit load instruction after each parameter.
+ * The pass is used to convert function to a canonical form for code generation
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API InsertLoad: public ngraph::pass::MatcherPass {
+public:
+    InsertLoad();
+};
+
+/**
+ * @interface InsertStore
+ * @brief Inserts explicit store instruction before each result.
+ * The pass is used to convert function to a canonical form for code generation
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API InsertStore: public ngraph::pass::MatcherPass {
+public:
+    InsertStore();
+};
+
+
+}  // namespace pass
+}  // namespace snippets
+}  // namespace ngraph
diff --git a/inference-engine/src/snippets/include/snippets/pass/insert_movebroadcast.hpp b/inference-engine/src/snippets/include/snippets/pass/insert_movebroadcast.hpp
new file mode 100644
index 00000000000..05e47374993
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/pass/insert_movebroadcast.hpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/pattern/matcher.hpp>
+
+namespace ngraph {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface InsertMoveBroadcast
+ * @brief Inserts explicit MoveBroadcast instruction if broadcasting by most warying dimension is needed.
+ * The pass is used to convert function to a canonical form for code generation
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API InsertMoveBroadcast: public ngraph::pass::MatcherPass {
+public:
+    InsertMoveBroadcast();
+};
+
+} // namespace pass
+} // namespace snippets
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/pass/load_movebroadcast_to_broadcastload.hpp b/inference-engine/src/snippets/include/snippets/pass/load_movebroadcast_to_broadcastload.hpp
new file mode 100644
index 00000000000..3af81c424b2
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/pass/load_movebroadcast_to_broadcastload.hpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/pattern/matcher.hpp>
+
+namespace ngraph {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface LoadMoveBroadcastToBroadcastLoad
+ * @brief Fuses consecutive Load and MoveBroadcast into a single load insctruction.
+ * The pass is used to convert function to a canonical form for code generation
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API LoadMoveBroadcastToBroadcastLoad: public ngraph::pass::MatcherPass {
+public:
+    LoadMoveBroadcastToBroadcastLoad();
+};
+
+}  // namespace pass
+}  // namespace snippets
+}  // namespace ngraph
diff --git a/inference-engine/src/snippets/include/snippets/pass/vector_to_scalar.hpp b/inference-engine/src/snippets/include/snippets/pass/vector_to_scalar.hpp
new file mode 100644
index 00000000000..e01b240ae87
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/pass/vector_to_scalar.hpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/pattern/matcher.hpp>
+
+namespace ngraph {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface ReplaceLoadsWithScalarLoads
+ * @brief Replases vector loads with scalar versions.
+ * The pass is used to cange alement type of function in a canonical form vector to scalar.
+ * Used for tail generation
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API ReplaceLoadsWithScalarLoads: public ngraph::pass::MatcherPass {
+public:
+    ReplaceLoadsWithScalarLoads();
+};
+
+/**
+ * @interface ReplaceStoresWithScalarStores
+ * @brief Replases vector stores with scalar versions.
+ * The pass is used to cange alement type of function in a canonical form vector to scalar.
+ * Used for tail generation
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API ReplaceStoresWithScalarStores: public ngraph::pass::MatcherPass {
+public:
+    ReplaceStoresWithScalarStores();
+};
+
+} // namespace pass
+} // namespace snippets
+} // namespace ngraph
diff --git a/inference-engine/src/snippets/include/snippets/register_info.hpp b/inference-engine/src/snippets/include/snippets/register_info.hpp
new file mode 100644
index 00000000000..dbe914552ed
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/register_info.hpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+#include <ngraph/variant.hpp>
+#include <ngraph/axis_vector.hpp>
+
+namespace ngraph {
+
+template <>
+class TRANSFORMATIONS_API VariantWrapper<std::vector<size_t>> : public VariantImpl<std::vector<size_t>> {
+public:
+    static constexpr VariantTypeInfo type_info{"Variant::RegInfo|Variant::RuntimeAttribute::AxisVector", 0};
+
+    const VariantTypeInfo& get_type_info() const override { return type_info; }
+    VariantWrapper(const value_type& value)
+        : VariantImpl<value_type>(value) {
+    }
+};
+
+} // namespace ngraph
\ No newline at end of file
diff --git a/inference-engine/src/snippets/include/snippets/snippets_isa.hpp b/inference-engine/src/snippets/include/snippets/snippets_isa.hpp
new file mode 100644
index 00000000000..f078d0570f3
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/snippets_isa.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/ops.hpp"
+#include <ngraph/opsets/opset1.hpp>
+
+#include "op/blockedload.hpp"
+#include "op/blockedparameter.hpp"
+#include "op/broadcastload.hpp"
+#include "op/broadcastmove.hpp"
+#include "op/load.hpp"
+#include "op/nop.hpp"
+#include "op/scalar.hpp"
+#include "op/scalarload.hpp"
+#include "op/scalarstore.hpp"
+#include "op/staticpower.hpp"
+#include "op/store.hpp"
+#include "op/vectorload.hpp"
+#include "op/vectorstore.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace isa {
+#define NGRAPH_OP(a, b) using b::a;
+#include "snippets_isa_tbl.hpp"
+#undef NGRAPH_OP
+} // namespace isa
+} // namespace snippets
+} // namespace ngraph
diff --git a/inference-engine/src/snippets/include/snippets/snippets_isa_tbl.hpp b/inference-engine/src/snippets/include/snippets/snippets_isa_tbl.hpp
new file mode 100644
index 00000000000..ecaf6e8dde1
--- /dev/null
+++ b/inference-engine/src/snippets/include/snippets/snippets_isa_tbl.hpp
@@ -0,0 +1,84 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#ifndef NGRAPH_OP
+#warning "NGRAPH_OP not defined"
+#define NGRAPH_OP(x, y)
+#endif
+
+// SnippetS dialect
+NGRAPH_OP(Load, ngraph::snippets::op)
+NGRAPH_OP(ScalarLoad, ngraph::snippets::op)
+NGRAPH_OP(VectorLoad, ngraph::snippets::op)
+NGRAPH_OP(BlockedLoad, ngraph::snippets::op)
+NGRAPH_OP(BroadcastLoad, ngraph::snippets::op)
+
+NGRAPH_OP(Store, ngraph::snippets::op)
+NGRAPH_OP(ScalarStore, ngraph::snippets::op)
+NGRAPH_OP(VectorStore, ngraph::snippets::op)
+
+NGRAPH_OP(BroadcastMove, ngraph::snippets::op)
+NGRAPH_OP(Scalar, ngraph::snippets::op)
+NGRAPH_OP(Nop, ngraph::snippets::op)
+
+// Layout-oblivious from opset1
+
+// opset completeness
+NGRAPH_OP(Constant, ngraph::op)
+NGRAPH_OP(Parameter, ngraph::op::v0)
+NGRAPH_OP(BlockedParameter, ngraph::snippets::op)
+NGRAPH_OP(Result, ngraph::op::v0)
+NGRAPH_OP(Broadcast, ngraph::op::v1)
+
+// unary
+NGRAPH_OP(Abs, ngraph::op::v0)
+NGRAPH_OP(Acos, ngraph::op::v0)
+NGRAPH_OP(Asin, ngraph::op::v0)
+NGRAPH_OP(Atan, ngraph::op::v0)
+NGRAPH_OP(Ceiling, ngraph::op::v0)
+NGRAPH_OP(Clamp, ngraph::op::v0)
+NGRAPH_OP(Cos, ngraph::op::v0)
+NGRAPH_OP(Cosh, ngraph::op::v0)
+NGRAPH_OP(Elu, ngraph::op::v0)
+NGRAPH_OP(Erf, ngraph::op::v0)
+NGRAPH_OP(Exp, ngraph::op::v0)
+NGRAPH_OP(Floor, ngraph::op::v0)
+NGRAPH_OP(HardSigmoid, ngraph::op::v0)
+NGRAPH_OP(Log, ngraph::op::v0)
+NGRAPH_OP(LogicalNot, ngraph::op::v1)
+NGRAPH_OP(Negative, ngraph::op::v0)
+NGRAPH_OP(Relu, ngraph::op::v0)
+NGRAPH_OP(Selu, ngraph::op::v0)
+NGRAPH_OP(Sign, ngraph::op::v0)
+NGRAPH_OP(Sigmoid, ngraph::op::v0)
+NGRAPH_OP(Sin, ngraph::op::v0)
+NGRAPH_OP(Sinh, ngraph::op::v0)
+NGRAPH_OP(Sqrt, ngraph::op::v0)
+NGRAPH_OP(Tan, ngraph::op::v0)
+NGRAPH_OP(Tanh, ngraph::op::v0)
+
+// binary
+NGRAPH_OP(Add, ngraph::op::v1)
+NGRAPH_OP(Divide, ngraph::op::v1)
+NGRAPH_OP(Equal, ngraph::op::v1)
+NGRAPH_OP(FloorMod, ngraph::op::v1)
+NGRAPH_OP(Greater, ngraph::op::v1)
+NGRAPH_OP(GreaterEqual, ngraph::op::v1)
+NGRAPH_OP(Less, ngraph::op::v1)
+NGRAPH_OP(LessEqual, ngraph::op::v1)
+NGRAPH_OP(LogicalAnd, ngraph::op::v1)
+NGRAPH_OP(LogicalOr, ngraph::op::v1)
+NGRAPH_OP(LogicalXor, ngraph::op::v1)
+NGRAPH_OP(Maximum, ngraph::op::v1)
+NGRAPH_OP(Minimum, ngraph::op::v1)
+NGRAPH_OP(Mod, ngraph::op::v1)
+NGRAPH_OP(Multiply, ngraph::op::v1)
+NGRAPH_OP(NotEqual, ngraph::op::v1)
+NGRAPH_OP(Power, ngraph::op::v1)
+NGRAPH_OP(PRelu, ngraph::op::v0)
+NGRAPH_OP(SquaredDifference, ngraph::op::v0)
+NGRAPH_OP(Subtract, ngraph::op::v1)
+NGRAPH_OP(Xor, ngraph::op::v0)
diff --git a/inference-engine/src/snippets/src/generator.cpp b/inference-engine/src/snippets/src/generator.cpp
new file mode 100644
index 00000000000..b037bea242e
--- /dev/null
+++ b/inference-engine/src/snippets/src/generator.cpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/generator.hpp"
+#include "snippets/register_info.hpp"
+
+auto ngraph::snippets::getRegisters(std::shared_ptr<ngraph::Node>& n) -> ngraph::snippets::RegInfo {
+    auto rt = n->get_rt_info();
+
+    std::vector<size_t> rout;
+    if (auto rinfo = rt["reginfo"]) {
+        auto reginfo = ngraph::as_type_ptr<ngraph::VariantWrapper<std::vector<size_t>>>(rinfo)->get();
+        for (auto reg : reginfo) {
+            rout.push_back(reg);
+        }
+    }
+
+    std::vector<size_t> rin;
+    for (auto input : n->inputs()) {
+        auto rt = input.get_source_output().get_node_shared_ptr()->get_rt_info();
+        if (auto rinfo = rt["reginfo"]) {
+            auto reginfo = ngraph::as_type_ptr<ngraph::VariantWrapper<std::vector<size_t>>>(rinfo)->get();
+            for (auto reg : reginfo) {
+                rin.push_back(reg);
+            }
+        }
+    }
+    return std::make_pair(rin, rout);
+}
\ No newline at end of file
diff --git a/inference-engine/src/snippets/src/itt.hpp b/inference-engine/src/snippets/src/itt.hpp
new file mode 100644
index 00000000000..d96715fcb2b
--- /dev/null
+++ b/inference-engine/src/snippets/src/itt.hpp
@@ -0,0 +1,71 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+/**
+ * @brief Defines openvino domains for tracing
+ * @file itt.hpp
+ */
+
+#pragma once
+
+#include <openvino/cc/selective_build.h>
+#include <openvino/itt.hpp>
+
+namespace ngraph {
+namespace pass {
+namespace itt {
+namespace domains {
+    OV_ITT_DOMAIN(IETransform);
+}   // namespace domains
+}   // namespace itt
+}   // namespace pass
+}   // namespace ngraph
+
+OV_CC_DOMAINS(ngraph_pass);
+OV_CC_DOMAINS(internal_op);
+
+/*
+ * RUN_ON_FUNCTION_SCOPE macro allows to disable the run_on_function pass
+ * MATCHER_SCOPE macro allows to disable the MatcherPass if matcher isn't applied
+ * INTERNAL_OP_SCOPE macro allows to disable parts of internal nGraph operations if they are not used
+ */
+#if defined(SELECTIVE_BUILD_ANALYZER)
+#define RUN_ON_FUNCTION_SCOPE(region) OV_SCOPE(ngraph_pass, OV_PP_CAT(region, _run_on_function))
+#define MATCHER_SCOPE(region)                                                                   \
+    const std::string matcher_name(OV_PP_TOSTRING(region))
+
+#define INTERNAL_OP_SCOPE(region) OV_SCOPE(internal_op, region)
+
+#elif defined(SELECTIVE_BUILD)
+
+#define MATCHER_SCOPE_(scope, region)                                                           \
+    if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(scope, _, region)) == 0)                              \
+    throw ngraph::ngraph_error(std::string(OV_PP_TOSTRING(OV_PP_CAT3(scope, _, region))) +      \
+                               " is disabled!")
+
+#define MATCHER_SCOPE(region)                                                                   \
+    const std::string matcher_name(OV_PP_TOSTRING(region));                                           \
+    if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(ngraph_pass, _, region)) == 0)                        \
+        return
+#define INTERNAL_OP_SCOPE(region) MATCHER_SCOPE_(internal_op, region)
+#define RUN_ON_FUNCTION_SCOPE(region) MATCHER_SCOPE_(ngraph_pass, OV_PP_CAT(region, _run_on_function))
+
+#else
+#define MATCHER_SCOPE(region)                                                                   \
+    const std::string matcher_name(OV_PP_TOSTRING(region))
+#define INTERNAL_OP_SCOPE(region)
+#define RUN_ON_FUNCTION_SCOPE(region)
+#endif
diff --git a/inference-engine/src/snippets/src/op/blockedload.cpp b/inference-engine/src/snippets/src/op/blockedload.cpp
new file mode 100644
index 00000000000..f5dcbff8e4c
--- /dev/null
+++ b/inference-engine/src/snippets/src/op/blockedload.cpp
@@ -0,0 +1,12 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/op/blockedload.hpp"
+
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::BlockedLoad, "BlockedLoad", 0);
+
+snippets::op::BlockedLoad::BlockedLoad(const Output<Node>& x) : Load(x) {
+}
diff --git a/inference-engine/src/snippets/src/op/blockedparameter.cpp b/inference-engine/src/snippets/src/op/blockedparameter.cpp
new file mode 100644
index 00000000000..85d66b3edba
--- /dev/null
+++ b/inference-engine/src/snippets/src/op/blockedparameter.cpp
@@ -0,0 +1,9 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/op/blockedparameter.hpp"
+
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::BlockedParameter, "BlockedParameter", 0);
diff --git a/inference-engine/src/snippets/src/op/broadcastload.cpp b/inference-engine/src/snippets/src/op/broadcastload.cpp
new file mode 100644
index 00000000000..d4f9372a2cf
--- /dev/null
+++ b/inference-engine/src/snippets/src/op/broadcastload.cpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "itt.hpp"
+
+#include "snippets/op/broadcastload.hpp"
+
+#include <ngraph/runtime/reference/broadcast.hpp>
+
+using namespace std;
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::BroadcastLoad, "BroadcastLoad", 0);
+
+snippets::op::BroadcastLoad::BroadcastLoad(const Output<Node>& x, Shape shape)
+: BroadcastMove(x, shape), broadcast_info(x.get_shape().size(), 0) {
+    constructor_validate_and_infer_types();
+}
+
+bool snippets::op::BroadcastLoad::visit_attributes(AttributeVisitor& visitor) {
+    return true;
+}
+
+std::shared_ptr<Node> snippets::op::BroadcastLoad::clone_with_new_inputs(const OutputVector& new_args) const {
+    INTERNAL_OP_SCOPE(BroadcastLoad);
+    check_new_args_count(this, new_args);
+    auto other = std::make_shared<BroadcastLoad>(new_args.at(0), output_shape);
+    other->set_broadcast_info(this->broadcast_info);
+    return other;
+}
+
+void snippets::op::BroadcastLoad::validate_and_infer_types() {
+    set_output_type(0, get_input_element_type(0), output_shape);
+}
diff --git a/inference-engine/src/snippets/src/op/broadcastmove.cpp b/inference-engine/src/snippets/src/op/broadcastmove.cpp
new file mode 100644
index 00000000000..c0e080de87d
--- /dev/null
+++ b/inference-engine/src/snippets/src/op/broadcastmove.cpp
@@ -0,0 +1,68 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "itt.hpp"
+
+#include "snippets/op/broadcastmove.hpp"
+
+#include <ngraph/runtime/host_tensor.hpp>
+#include <ngraph/runtime/reference/broadcast.hpp>
+
+using namespace std;
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::BroadcastMove, "BroadcastMove", 0);
+
+snippets::op::BroadcastMove::BroadcastMove(const Output<Node>& x, Shape shape) : Op({x}), output_shape(shape) {
+    constructor_validate_and_infer_types();
+}
+
+bool snippets::op::BroadcastMove::visit_attributes(AttributeVisitor& visitor) {
+    return true;
+}
+
+std::shared_ptr<Node> snippets::op::BroadcastMove::clone_with_new_inputs(const OutputVector& new_args) const {
+    INTERNAL_OP_SCOPE(BroadcastMove);
+    check_new_args_count(this, new_args);
+    auto other = std::make_shared<BroadcastMove>(new_args.at(0), this->output_shape);
+    return other;
+}
+
+void snippets::op::BroadcastMove::validate_and_infer_types() {
+    set_output_type(0, get_input_element_type(0), this->output_shape);
+}
+
+bool snippets::op::BroadcastMove::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const {
+    INTERNAL_OP_SCOPE(BroadcastMove);
+    NGRAPH_CHECK(input_values.size() == this->inputs().size(), "wrong input config");
+    NGRAPH_CHECK(output_values.size() == this->outputs().size(), "wrong output config");
+    NGRAPH_CHECK(input_values.size() == output_values.size() && input_values.size() == 1, "must be 1->1 operation");
+    NGRAPH_CHECK(this->output(0).get_shape() == output_values[0]->get_shape(), "output vector must have the same shape as output port");
+    NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape");
+
+    auto ishape = input_values[0]->get_shape();
+    auto oshape = output_values[0]->get_shape();
+
+    NGRAPH_CHECK(ishape.size() == oshape.size(), "input and output should have the same rank");
+
+    AxisSet broadcast_axes;
+    for (size_t k = 0; k < ishape.size(); k++) {
+        if (!((ishape[k] == oshape[k])
+           || (ishape[k] != oshape[k] && ((ishape[k] == 1) != (oshape[k] == 1) ) ))) {
+            throw ngraph_error("FakeBroadcast::evaluate incompatible shapes");
+        }
+
+        if (ishape[k] != oshape[k]) {
+            broadcast_axes.insert(k);
+        }
+    }
+
+    runtime::reference::broadcast(input_values[0]->get_data_ptr<char>(),
+                                  output_values[0]->get_data_ptr<char>(),
+                                  input_values[0]->get_shape(),
+                                  output_values[0]->get_shape(),
+                                  broadcast_axes,
+                                  sizeof(float));
+    return true;
+}
\ No newline at end of file
diff --git a/inference-engine/src/snippets/src/op/load.cpp b/inference-engine/src/snippets/src/op/load.cpp
new file mode 100644
index 00000000000..abad20e1dfc
--- /dev/null
+++ b/inference-engine/src/snippets/src/op/load.cpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "itt.hpp"
+
+#include "snippets/op/load.hpp"
+
+#include <ngraph/runtime/host_tensor.hpp>
+
+using namespace std;
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::Load, "Load", 0);
+
+snippets::op::Load::Load(const Output<Node>& x) : Op({x}) {
+    constructor_validate_and_infer_types();
+}
+
+bool snippets::op::Load::visit_attributes(AttributeVisitor& visitor) {
+    return true;
+}
+
+std::shared_ptr<Node> snippets::op::Load::clone_with_new_inputs(const OutputVector& new_args) const {
+    INTERNAL_OP_SCOPE(Load);
+    check_new_args_count(this, new_args);
+    return std::make_shared<Load>(new_args.at(0));
+}
+
+void snippets::op::Load::validate_and_infer_types() {
+    set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+}
+
+bool snippets::op::Load::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const {
+    INTERNAL_OP_SCOPE(Load);
+    NGRAPH_CHECK(input_values.size() == this->inputs().size(), "wrong input config");
+    NGRAPH_CHECK(output_values.size() == this->outputs().size(), "wrong output config");
+    NGRAPH_CHECK(input_values.size() == output_values.size() && input_values.size() == 1, "must be 1->1 operation");
+    NGRAPH_CHECK(this->output(0).get_shape() == output_values[0]->get_shape(), "output vector must have the same shape as output port");
+    NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape");
+    NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape");
+
+    std::copy(input_values[0]->get_data_ptr<uint8_t>(),
+        input_values[0]->get_data_ptr<uint8_t>() + shape_size(get_output_shape(0))*output_values[0]->get_element_type().size(),
+        output_values[0]->get_data_ptr<uint8_t>());
+
+    return true;
+}
diff --git a/inference-engine/src/snippets/src/op/nop.cpp b/inference-engine/src/snippets/src/op/nop.cpp
new file mode 100644
index 00000000000..73a5cebc124
--- /dev/null
+++ b/inference-engine/src/snippets/src/op/nop.cpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/op/nop.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::Nop, "Nop", 0);
+
+snippets::op::Nop::Nop(const OutputVector& arguments, const OutputVector& results) : Op([arguments, results]() -> OutputVector {
+    OutputVector x;
+    x.insert(x.end(), arguments.begin(), arguments.end());
+    x.insert(x.end(), results.begin(), results.end());
+    return x;
+    }()) {
+}
diff --git a/inference-engine/src/snippets/src/op/scalar.cpp b/inference-engine/src/snippets/src/op/scalar.cpp
new file mode 100644
index 00000000000..704be035eb4
--- /dev/null
+++ b/inference-engine/src/snippets/src/op/scalar.cpp
@@ -0,0 +1,9 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/op/scalar.hpp"
+
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::Scalar, "Scalar", 0);
\ No newline at end of file
diff --git a/inference-engine/src/snippets/src/op/scalarload.cpp b/inference-engine/src/snippets/src/op/scalarload.cpp
new file mode 100644
index 00000000000..e3bff9123f8
--- /dev/null
+++ b/inference-engine/src/snippets/src/op/scalarload.cpp
@@ -0,0 +1,12 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/op/scalarload.hpp"
+
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::ScalarLoad, "ScalarLoad", 0);
+
+snippets::op::ScalarLoad::ScalarLoad(const Output<Node>& x) : Load(x) {
+}
diff --git a/inference-engine/src/snippets/src/op/scalarstore.cpp b/inference-engine/src/snippets/src/op/scalarstore.cpp
new file mode 100644
index 00000000000..991050fc016
--- /dev/null
+++ b/inference-engine/src/snippets/src/op/scalarstore.cpp
@@ -0,0 +1,12 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/op/scalarstore.hpp"
+
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::ScalarStore, "ScalarStore", 0);
+
+snippets::op::ScalarStore::ScalarStore(const Output<Node>& x) : Store(x) {
+}
diff --git a/inference-engine/src/snippets/src/op/staticpower.cpp b/inference-engine/src/snippets/src/op/staticpower.cpp
new file mode 100644
index 00000000000..38deb26ae5f
--- /dev/null
+++ b/inference-engine/src/snippets/src/op/staticpower.cpp
@@ -0,0 +1,9 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/op/staticpower.hpp"
+
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::PowerStatic, "PowerStatic", 0);
diff --git a/inference-engine/src/snippets/src/op/store.cpp b/inference-engine/src/snippets/src/op/store.cpp
new file mode 100644
index 00000000000..2055df1f395
--- /dev/null
+++ b/inference-engine/src/snippets/src/op/store.cpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "itt.hpp"
+
+#include "snippets/op/scalarstore.hpp"
+
+#include <ngraph/runtime/host_tensor.hpp>
+
+using namespace std;
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::Store, "Store", 0);
+
+snippets::op::Store::Store(const Output<Node>& x) : Op({x}) {
+    constructor_validate_and_infer_types();
+}
+
+bool snippets::op::Store::visit_attributes(AttributeVisitor& visitor) {
+    return true;
+}
+
+std::shared_ptr<Node> snippets::op::Store::clone_with_new_inputs(const OutputVector& new_args) const {
+    INTERNAL_OP_SCOPE(Store);
+    check_new_args_count(this, new_args);
+    return std::make_shared<Store>(new_args.at(0));
+}
+
+void snippets::op::Store::validate_and_infer_types() {
+    set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+}
+
+bool snippets::op::Store::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const {
+    INTERNAL_OP_SCOPE(Store);
+    NGRAPH_CHECK(input_values.size() == this->inputs().size(), "wrong input config");
+    NGRAPH_CHECK(output_values.size() == this->outputs().size(), "wrong output config");
+    NGRAPH_CHECK(input_values.size() == output_values.size() && input_values.size() == 1, "must be 1->1 operation");
+    NGRAPH_CHECK(this->output(0).get_shape() == output_values[0]->get_shape(), "output vector must have the same shape as output port");
+    NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape");
+    NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape");
+
+    std::copy(input_values[0]->get_data_ptr<uint8_t>(),
+        input_values[0]->get_data_ptr<uint8_t>() + shape_size(get_output_shape(0))*output_values[0]->get_element_type().size(),
+        output_values[0]->get_data_ptr<uint8_t>());
+
+    return true;
+}
diff --git a/inference-engine/src/snippets/src/op/subgraph.cpp b/inference-engine/src/snippets/src/op/subgraph.cpp
new file mode 100644
index 00000000000..ff1d61916ef
--- /dev/null
+++ b/inference-engine/src/snippets/src/op/subgraph.cpp
@@ -0,0 +1,344 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "itt.hpp"
+#include "remarks.hpp"
+
+#include "snippets/op/subgraph.hpp"
+#include "snippets/pass/insert_load_store.hpp"
+#include "snippets/pass/insert_movebroadcast.hpp"
+#include "snippets/pass/load_movebroadcast_to_broadcastload.hpp"
+#include "snippets/pass/assign_registers.hpp"
+
+#include <ngraph/pass/manager.hpp>
+
+#include <algorithm>
+#include <memory>
+#include <array>
+
+using namespace std;
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::Subgraph, "Subgraph", 0);
+
+void snippets::op::Subgraph::set_generator(std::shared_ptr<ngraph::snippets::Generator> generator) {
+    m_generator = generator;
+}
+
+snippets::op::Subgraph::Subgraph(const OutputVector& args, std::shared_ptr<Function> body)
+    : Op(args), m_body(body), m_generator(nullptr) {
+    constructor_validate_and_infer_types();
+}
+
+snippets::op::Subgraph::Subgraph(const NodeVector& args, std::shared_ptr<Function> body)
+    : Subgraph(as_output_vector(args), body) {}
+
+std::shared_ptr<Node> snippets::op::Subgraph::clone_with_new_inputs(const OutputVector& inputs) const {
+    INTERNAL_OP_SCOPE(Subgraph);
+    return make_shared<Subgraph>(inputs, ngraph::clone_function(*m_body.get()));
+}
+
+void snippets::op::Subgraph::validate_and_infer_types() {
+    INTERNAL_OP_SCOPE(Subgraph);
+    ngraph::ParameterVector old_parameters;
+    for (auto op : m_body->get_parameters()) {
+        old_parameters.push_back(op);
+    }
+
+    for (size_t i = 0; i < get_input_size(); ++i) {
+        m_body->replace_parameter(i, std::make_shared<opset1::Parameter>(get_input_element_type(i), get_input_partial_shape(i)));
+    }
+
+    m_body->validate_nodes_and_infer_types();
+
+    for (size_t i = 0; i < m_body->get_parameters().size(); i++) {
+        m_body->get_parameters()[i]->set_friendly_name(old_parameters[i]->get_friendly_name());
+    }
+
+    set_output_size(m_body->get_output_size());
+    for (size_t i = 0; i < get_output_size(); ++i) {
+        set_output_type(i, m_body->get_output_element_type(i), m_body->get_output_partial_shape(i));
+    }
+}
+
+bool snippets::op::Subgraph::visit_attributes(AttributeVisitor& visitor) {
+    return true;
+}
+
+auto snippets::op::Subgraph::wrap_node_as_subgraph(const std::shared_ptr<ngraph::Node>& node) -> std::shared_ptr<op::Subgraph> {
+    INTERNAL_OP_SCOPE(Subgraph);
+    ngraph::ParameterVector body_parameters;
+    ngraph::OutputVector body_inputs;
+
+    ngraph::OutputVector subgraph_inputs;
+
+    for (auto input : node->inputs()) {
+        auto source_output = input.get_source_output();
+        if (is_scalar_constant(source_output.get_node_shared_ptr())) {
+            body_inputs.push_back(source_output);
+        } else {
+            auto parameter = std::make_shared<ngraph::opset1::Parameter>(input.get_element_type(), input.get_partial_shape());
+            body_parameters.push_back(parameter);
+            body_parameters.back()->set_friendly_name(source_output.get_node()->get_friendly_name());
+            body_inputs.push_back(parameter->output(0));
+
+            subgraph_inputs.push_back(source_output);
+        }
+    }
+
+    auto body_node = node->copy_with_new_inputs(body_inputs);
+    body_node->set_friendly_name(node->get_friendly_name());
+
+    if (node->get_output_size() != body_node->get_output_size()) {
+        throw ngraph::ngraph_error("original node outputs size and extracted subgraph node outputs size doesn't much");
+    }
+
+    ngraph::ResultVector body_results;
+    for (auto output : node->outputs()) {
+        body_results.push_back(std::make_shared<ngraph::opset1::Result>(body_node->output(output.get_index())));
+    }
+
+    auto body = create_body(node->get_friendly_name(), body_results, body_parameters);
+    auto subgraph = build_subgraph(node, subgraph_inputs, body);
+
+    for (size_t i = 0; i < body->get_parameters().size(); i++) {
+        body->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name());
+    }
+
+    if (subgraph->get_output_size() != body->get_results().size()) {
+        throw ngraph::ngraph_error("newly create subgraph doesn't much number of original node results");
+    }
+
+    return subgraph;
+}
+
+std::shared_ptr<snippets::op::Subgraph> snippets::op::Subgraph::make_canonical_from_this() {
+    INTERNAL_OP_SCOPE(Subgraph);
+    ngraph::OutputVector subgraph_node_inputs;
+    for (auto input : this->input_values()) {
+        subgraph_node_inputs.push_back(input);
+    }
+    auto new_body = ngraph::clone_function(*this->get_body().get());
+    auto snippet = std::make_shared<op::Subgraph>(subgraph_node_inputs, new_body);
+    ngraph::copy_runtime_info(this->shared_from_this(), snippet);
+    snippet->set_friendly_name(this->get_friendly_name());
+    snippet->set_generator(this->m_generator);
+
+    return snippet;
+}
+
+// We also can think of canonization as of pass to copy original subgraph and transforming it to canonical form suitable for code generation
+// pass actual parameters and results shapes to generate for as well as channel mapping,
+// we need to distinguish between 5d tensors that represents <N, C, H, W, c> and <N, C, D, H, W> somehow like locked dimensions
+// ngraph::AxisVector to code
+void snippets::op::Subgraph::canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes) {
+    INTERNAL_OP_SCOPE(Subgraph);
+    NODE_VALIDATION_CHECK(this, input_shapes.size() == m_body->get_parameters().size(),
+        "Number of parameters for snippet doesn't much passed to generate method: ", input_shapes.size(), " vs ", m_body->get_parameters().size(), ".");
+
+    NODE_VALIDATION_CHECK(this, output_shapes.size() == m_body->get_results().size(),
+        "number of results for snippet doesn't much passed to generate method: ", output_shapes.size(), " vs ", m_body->get_results().size(), ".");
+
+    // replace only constants which are actually should be represented as scalars during code generation and probably move this step a bit later
+    for (auto op : m_body->get_ordered_ops()) {
+        if (auto constant = ngraph::as_type_ptr<opset1::Constant>(op)) {
+            auto scalar = std::make_shared<snippets::op::Scalar>(*constant);
+            scalar->set_friendly_name(constant->get_friendly_name());
+            ngraph::copy_runtime_info(constant, scalar);
+            ngraph::replace_node(constant, scalar);
+        }
+    }
+
+    // repalace power with power static
+    for (auto op : m_body->get_ordered_ops()) {
+        if (auto power = ngraph::as_type_ptr<opset1::Power>(op)) {
+            if (ngraph::as_type_ptr<snippets::op::Scalar>(power->input(1).get_node()->shared_from_this())) {
+                auto power_static = std::make_shared<snippets::op::PowerStatic>(
+                    power->input(0).get_source_output(), power->input(1).get_source_output(), power->get_autob());
+                power_static->set_friendly_name(power->get_friendly_name());
+                ngraph::copy_runtime_info(power, power_static);
+                ngraph::replace_node(power, power_static);
+            }
+        }
+    }
+
+
+    // it should be in subgraph node to be aligned with internal and external parameter list, but adding this for testing
+    // TODO: store blocking into to Parameter's rt_info for future propagation
+    for (size_t i = 0; i < m_body->get_parameters().size(); i++) {
+        auto param = m_body->get_parameters()[i];
+        if (param->get_shape().size() < 4) {
+            std::vector<size_t> shape(4, 1);
+            std::copy(param->get_shape().begin(), param->get_shape().end(), &shape.at(4 - (param->get_shape().size() == 0 ? 1 : param->get_shape().size())) );
+            m_body->replace_parameter(i, std::make_shared<opset1::Parameter>(param->get_element_type(), ngraph::Shape(shape)));
+        } else if (param->get_shape().size() >= 4) {
+            if (param->get_element_type() != std::get<2>(input_shapes[i])) {
+                throw ngraph::ngraph_error("changes in presision. Is it legal??");
+            }
+            if (param->get_shape().size() != std::get<0>(input_shapes[i]).size()) {
+                m_body->replace_parameter(i, std::make_shared<opset1::Parameter>(std::get<2>(input_shapes[i]), std::get<0>(input_shapes[i])));
+            }
+        }
+    }
+
+    m_body->validate_nodes_and_infer_types();
+
+    for (size_t i = 0; i < m_body->get_results().size(); i++) {
+        auto result = m_body->get_results()[i];
+        PartialShape partial(result->get_shape());
+        bool isCompatible = ngraph::PartialShape::broadcast_merge_into(partial, std::get<0>(output_shapes[i]), ::ngraph::op::AutoBroadcastSpec::NUMPY);
+        // equality check won't pass since we reshape without changes on external snippet edges
+        NODE_VALIDATION_CHECK(this, isCompatible, "Inferend and passed results shapes are difference for snippet : ",
+                                                  result->get_shape(), " vs ", std::get<0>(output_shapes[i]), ".");
+    }
+}
+
+void snippets::op::Subgraph::convert_to_snippet_dialect() {
+    INTERNAL_OP_SCOPE(Subgraph);
+    ngraph::pass::Manager manager;
+    manager.register_pass<snippets::pass::InsertLoad>();
+    manager.register_pass<snippets::pass::InsertStore>();
+    manager.register_pass<snippets::pass::InsertMoveBroadcast>();
+    manager.register_pass<snippets::pass::LoadMoveBroadcastToBroadcastLoad>();
+    manager.run_passes(m_body);
+}
+
+snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes) {
+    INTERNAL_OP_SCOPE(Subgraph);
+    NGRAPH_CHECK(m_generator != nullptr, "generate is called while generator is not set");
+
+    canonicalize(output_shapes, input_shapes);
+    convert_to_snippet_dialect();
+
+    // generation flow
+    snippets::pass::AssignRegisters().run_on_function(m_body);
+
+    // actual code emission
+    ngraph::snippets::code ptr = m_generator->generate(m_body);
+
+    // chack that body doesnt have constants for scheduling
+    std::vector<std::shared_ptr<opset1::Constant>> constants;
+    for (auto op : m_body->get_ordered_ops()) {
+        if (auto constant = as_type_ptr<opset1::Constant>(op)) {
+            if (ngraph::shape_size(constant->get_shape()) != 1 && constant->get_shape() != Shape()) {
+                constants.push_back(constant);
+            }
+        }
+    }
+    NGRAPH_CHECK(!constants.size(), "External constants detected. Snippet is illigal for sheduling");
+
+    // check resulting shapes are broadcastable to each other so can be scheduled
+    Shape work_size = m_body->output(0).get_shape();
+    for (size_t k = 0; k < m_body->get_output_size(); k++) {
+        auto shape = m_body->output(k).get_shape();
+
+        if (work_size.size() != shape.size()) {
+            throw ngraph_error("rank for all outputs of a snippet should match");
+        }
+
+        for (size_t i = 0; i < work_size.size(); i++) {
+            if (work_size[i] != shape[i]) {
+                if (work_size[i] == 1) {
+                    work_size[i] = shape[i];
+                } else {
+                    throw ngraph_error("incompatible shapes for output graphs");
+                }
+            }
+        }
+    }
+
+    return {work_size, false /*canBeLinearized*/, ptr};
+}
+
+bool snippets::op::Subgraph::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
+    INTERNAL_OP_SCOPE(Subgraph);
+    return m_body->evaluate(outputs, inputs);
+}
+
+void snippets::op::Subgraph::print() const {
+    INTERNAL_OP_SCOPE(Subgraph);
+    remark(13) << "subgraph " << this->get_friendly_name() << " "
+        << this->get_type_name()
+        << " which contains " << this->get_body()->get_ops().size() << " nodes" << std::endl;
+
+    int qqq = 0;
+    for (auto op : this->get_body()->get_ordered_ops()) {
+        remark(13) << "op " << qqq++ << " " << op->get_friendly_name() << " (" << op->get_type_name() << ") " << op << std::endl;
+    }
+
+    for (auto& in : this->inputs()) {
+        remark(13) << "  -> " << in.get_source_output().get_node_shared_ptr()->get_friendly_name() << " "
+            << in.get_source_output().get_node_shared_ptr() << std::endl;
+    }
+
+    for (auto& out : this->outputs()) {
+        for (auto& user : out.get_target_inputs()) {
+            remark(13) << " <- " << user.get_node()->get_friendly_name() << " "  << user.get_node() << std::endl;
+        }
+        remark(13) << std::endl;
+    }
+}
+
+void snippets::op::Subgraph::print_statistics(bool verbose) {
+    INTERNAL_OP_SCOPE(Subgraph);
+    auto getNodeInventory = [](std::shared_ptr<ngraph::Node> n) -> size_t {
+        size_t total = 0;
+
+        for (auto input : n->inputs()) {
+            total += input.get_tensor().size();
+        }
+
+        for (auto output : n->outputs()) {
+            total += output.get_tensor().size();
+        }
+
+        if (auto subgraph = ngraph::as_type_ptr<op::Subgraph>(n)) {
+            for (auto op : subgraph->get_body()->get_ordered_ops()) {
+                if (ngraph::as_type_ptr<ngraph::opset1::Constant>(op)) {
+                    total += op->output(0).get_tensor().size();
+                }
+            }
+        }
+
+        return total;
+    };
+
+    auto getFunctionInventory = [getNodeInventory](std::shared_ptr<ngraph::Function> f) -> size_t {
+        size_t total = 0;
+        for (auto op : f->get_ordered_ops()) {
+            // Results and parameters are artificially introduced,
+            // while Constants are already considered if they are inputs of other operation
+            // this should lead to 1:1 inventory for single node operations
+            if (!ngraph::as_type_ptr<ngraph::opset1::Parameter>(op)
+             && !ngraph::as_type_ptr<ngraph::opset1::Result>(op)
+             && !ngraph::as_type_ptr<ngraph::opset1::Constant>(op)) {
+                total += getNodeInventory(op);
+            }
+        }
+        return total;
+    };
+
+    auto countConstants = [](std::shared_ptr<ngraph::Function> f) -> size_t {
+        size_t count = 0;
+        for (auto op : f->get_ordered_ops()) {
+            count += !!ngraph::as_type_ptr<ngraph::opset1::Constant>(op) ? 1 : 0;
+        }
+        return count;
+    };
+
+    auto body = this->get_body();
+
+    std::cout << this->get_friendly_name()
+                << ";" << this
+                << ";" << body->get_ops().size()
+                << ";" << body->get_parameters().size()
+                << ";" << body->get_results().size()
+                << ";" << countConstants(body)
+                << ";" << getFunctionInventory(body)
+                << ";" << getNodeInventory(this->shared_from_this()) << std::endl;
+
+    if (verbose) {
+        this->print();
+    }
+}
diff --git a/inference-engine/src/snippets/src/op/vectorload.cpp b/inference-engine/src/snippets/src/op/vectorload.cpp
new file mode 100644
index 00000000000..346767f9a9c
--- /dev/null
+++ b/inference-engine/src/snippets/src/op/vectorload.cpp
@@ -0,0 +1,12 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/op/vectorload.hpp"
+
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::VectorLoad, "VectorLoad", 0);
+
+snippets::op::VectorLoad::VectorLoad(const Output<Node>& x) : Load(x) {
+}
diff --git a/inference-engine/src/snippets/src/op/vectorstore.cpp b/inference-engine/src/snippets/src/op/vectorstore.cpp
new file mode 100644
index 00000000000..116b071a0ce
--- /dev/null
+++ b/inference-engine/src/snippets/src/op/vectorstore.cpp
@@ -0,0 +1,12 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/op/vectorstore.hpp"
+
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::VectorStore, "VectorStore", 0);
+
+snippets::op::VectorStore::VectorStore(const Output<Node>& x) : Store(x) {
+}
diff --git a/inference-engine/src/snippets/src/pass/assign_registers.cpp b/inference-engine/src/snippets/src/pass/assign_registers.cpp
new file mode 100644
index 00000000000..6abdf07d72b
--- /dev/null
+++ b/inference-engine/src/snippets/src/pass/assign_registers.cpp
@@ -0,0 +1,183 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// #include <openvino/cc/selective_build.h>
+#include "itt.hpp"
+#include "remarks.hpp"
+
+#include "snippets/pass/assign_registers.hpp"
+#include "snippets/register_info.hpp"
+#include "snippets/snippets_isa.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+
+#include <iterator>
+
+bool ngraph::snippets::pass::AssignRegisters::run_on_function(std::shared_ptr<Function> f) {
+    RUN_ON_FUNCTION_SCOPE(AssignRegisters);
+    int reg64_tmp_start { 8 }; // R8, R9, R10, R11, R12, R13, R14, R15 inputs+outputs+1
+    using Reg = size_t;
+    auto ops = f->get_ordered_ops();
+    decltype(ops) stmts;
+    std::copy_if(ops.begin(), ops.end(), std::back_inserter(stmts), [](decltype(ops[0]) op) {
+        return !(std::dynamic_pointer_cast<opset1::Parameter>(op) || std::dynamic_pointer_cast<opset1::Result>(op));
+        });
+
+    size_t rdx = 0;
+    std::map<std::shared_ptr<descriptor::Tensor>, Reg> regs;
+    for (auto op : stmts) {
+        for (auto output : op->outputs()) {
+            regs[output.get_tensor_ptr()] = rdx++;
+        }
+    }
+
+    std::vector<std::set<Reg>> used;
+    std::vector<std::set<Reg>> def;
+
+    for (auto op : stmts) {
+        std::set<Reg> u;
+        for (auto input : op->inputs()) {
+            if (regs.count(input.get_tensor_ptr())) {
+                u.insert(regs[input.get_tensor_ptr()]);
+            }
+        }
+        used.push_back(u);
+
+        std::set<Reg> d;
+        if (!std::dynamic_pointer_cast<snippets::op::Store>(op)) {
+            for (auto output : op->outputs()) {
+                d.insert(regs[output.get_tensor_ptr()]);
+            }
+        }
+        def.push_back(d);
+    }
+
+    // define life intervals
+    std::vector<std::set<Reg>> lifeIn(stmts.size(), std::set<Reg>());
+    std::vector<std::set<Reg>> lifeOut(stmts.size(), std::set<Reg>());
+
+    for (size_t i = 0; i < stmts.size(); i++) {
+        for (size_t n = 0; n < stmts.size(); n++) {
+            std::set_difference(lifeOut[n].begin(), lifeOut[n].end(), def[n].begin(), def[n].end(), std::inserter(lifeIn[n], lifeIn[n].begin()));
+            lifeIn[n].insert(used[n].begin(), used[n].end());
+        }
+        for (size_t n = 0; n < stmts.size(); n++) {
+            auto node = stmts[n];
+            if (!std::dynamic_pointer_cast<snippets::op::Store>(node)) {
+                for (auto out : node->outputs()) {
+                    for (auto port : out.get_target_inputs()) {
+                        auto pos = std::find(stmts.begin(), stmts.end(), port.get_node()->shared_from_this());
+                        if (pos != stmts.end()) {
+                            auto k = pos-stmts.begin();
+                            lifeOut[n].insert(lifeIn[k].begin(), lifeIn[k].end());
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    struct by_starting {
+        auto operator()(const std::pair<int, int>& lhs, const std::pair<int, int>& rhs) const -> bool {
+            return lhs.first < rhs.first|| (lhs.first == rhs.first && lhs.second < rhs.second);
+        }
+    };
+
+    struct by_ending {
+        auto operator()(const std::pair<int, int>& lhs, const std::pair<int, int>& rhs) const -> bool {
+            return lhs.second < rhs.second || (lhs.second == rhs.second && lhs.first < rhs.first);
+        }
+    };
+
+    std::set<std::pair<int, int>, by_starting> live_intervals;
+
+    std::reverse(lifeIn.begin(), lifeIn.end());
+    auto find_last_use = [lifeIn](int i) -> int {
+        int ln = lifeIn.size()-1;
+        for (auto& x : lifeIn) {
+            if (x.find(i) != x.end()) {
+                return ln;
+            }
+            ln--;
+        }
+        return i;
+    };
+
+    for (size_t i = 0; i < stmts.size(); i++) {
+        live_intervals.insert(std::make_pair(i, find_last_use(i)));
+    }
+
+    // http://web.cs.ucla.edu/~palsberg/course/cs132/linearscan.pdf
+    std::multiset<std::pair<int, int>, by_ending> active;
+    std::map<Reg, Reg> register_map;
+    std::stack<Reg> bank;
+    for (int i = 0; i < 16; i++) bank.push(16-1-i);
+
+    for (auto interval : live_intervals) {
+        // check expired
+        while (!active.empty()) {
+            auto x = *active.begin();
+            if (x.second >= interval.first) {
+                break;
+            }
+            active.erase(x);
+            bank.push(register_map[x.first]);
+        }
+        // allocate
+        if (active.size() == 16) {
+            throw ngraph_error("caanot allocate registers for a snippet ");
+        } else {
+            register_map[interval.first] = bank.top();
+            bank.pop();
+            active.insert(interval);
+        }
+    }
+
+    std::map<std::shared_ptr<descriptor::Tensor>, Reg> physical_regs;
+
+    for (auto reg : regs) {
+        physical_regs[reg.first] = register_map[reg.second];
+    }
+
+    size_t constantID = 0;
+
+    for (auto n : f->get_ordered_ops()) {
+        auto& rt = n->get_rt_info();
+        // nothing to do for function signature
+        if (std::dynamic_pointer_cast<opset1::Parameter>(n) || std::dynamic_pointer_cast<opset1::Result>(n)) {
+            continue;
+        }
+
+        // store only effective address
+        if (auto result = std::dynamic_pointer_cast<snippets::op::Store>(n)) {
+            auto ea = reg64_tmp_start+static_cast<int64_t>(f->get_result_index(result) + f->get_parameters().size());
+            rt["effectiveAddress"] = std::make_shared<VariantWrapper<int64_t>>(VariantWrapper<int64_t>(ea));
+            continue;
+        }
+        // store effective address and procced with vector registers
+        if (as_type_ptr<ngraph::snippets::op::Load>(n) || as_type_ptr<ngraph::snippets::op::BroadcastLoad>(n)) {
+            auto source = n->get_input_source_output(0).get_node_shared_ptr();
+
+            if (auto param = as_type_ptr<opset1::Parameter>(source)) {
+                auto ea = reg64_tmp_start+static_cast<int64_t>(f->get_parameter_index(param));
+                rt["effectiveAddress"] = std::make_shared<VariantWrapper<int64_t>>(VariantWrapper<int64_t>(ea));
+            } else if (auto constant = as_type_ptr<opset1::Constant>(source)) {
+                auto ea = reg64_tmp_start+static_cast<int64_t>(f->get_parameters().size() + f->get_results().size() + 1 + constantID);
+                rt["effectiveAddress"] = std::make_shared<VariantWrapper<int64_t>>(VariantWrapper<int64_t>(ea));
+                constantID++;
+            } else {
+                throw ngraph_error("load/broadcast should follow only Parameter or non-Scalar constant");
+            }
+        }
+
+        std::vector<size_t> regs; regs.reserve(n->outputs().size());
+        for (auto output : n->outputs()) {
+            auto allocated = physical_regs[output.get_tensor_ptr()];
+            regs.push_back(allocated);
+        }
+        rt["reginfo"] = std::make_shared<VariantWrapper<std::vector<size_t>>>(VariantWrapper<std::vector<size_t>>(regs));
+    }
+
+    return false;
+}
diff --git a/inference-engine/src/snippets/src/pass/collapse_subgraph.cpp b/inference-engine/src/snippets/src/pass/collapse_subgraph.cpp
new file mode 100644
index 00000000000..2b08eea16a5
--- /dev/null
+++ b/inference-engine/src/snippets/src/pass/collapse_subgraph.cpp
@@ -0,0 +1,516 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include "remarks.hpp"
+#include "itt.hpp"
+
+#include "snippets/pass/collapse_subgraph.hpp"
+#include "snippets/op/subgraph.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/op/loop.hpp>
+
+
+#include <memory>
+#include <vector>
+#include <cassert>
+#include <queue>
+#include <string>
+#include <numeric>
+
+NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::StartSubgraph, "CollapseSubgraph", 0);
+NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::AttachToSubgraph, "CollapseSubgraph", 0);
+NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::TokenizeSnippets, "CollapseSubgraph", 0);
+
+using namespace ngraph;
+using namespace snippets;
+
+namespace {
+
+auto outputs_are_not_broadcastable(const std::shared_ptr<ngraph::Node>& node) -> bool {
+    auto outputs = node->outputs();
+    auto find_smallest_output_shape = [](const std::vector<ngraph::Output<ngraph::Node>>& outputs) -> ngraph::Shape {
+        return std::accumulate(std::begin(outputs), std::end(outputs), ngraph::Shape(outputs.begin()->get_shape()),
+            [](ngraph::Shape other_shape, ngraph::Output<ngraph::Node> output){
+                return ngraph::shape_size(output.get_shape()) < ngraph::shape_size(other_shape) ? output.get_shape() : other_shape;
+            });
+    };
+    auto ref_shape = find_smallest_output_shape(outputs);
+
+    auto check_shapes_broadcastable = [ref_shape](const ngraph::Output<ngraph::Node>& output) -> bool {
+        auto other_shape = output.get_shape();
+
+        if (other_shape.size() != ref_shape.size()) {
+            return false;
+        }
+
+        return std::inner_product(std::begin(other_shape), std::end(other_shape), std::begin(ref_shape), true,
+                            std::logical_and<bool>(), [](ngraph::Shape::value_type lsh, ngraph::Shape::value_type rsh){
+                                return rsh == 1 || lsh == rsh;
+                            });
+    };
+
+    return std::find_if_not(std::begin(outputs), std::end(outputs), check_shapes_broadcastable) != std::end(outputs);
+};
+
+auto has_cycles_of_dependencies(const std::vector<std::set<ngraph::Input<ngraph::Node>>>& results,
+                                const std::vector<ngraph::Input<ngraph::Node>>& inputs) -> bool {
+    auto BFS_from_to = [](ngraph::Node* from, ngraph::Node* to) -> bool {
+        std::unordered_set<ngraph::Node*> visited;
+        std::queue<ngraph::Node*> stack;
+        stack.push(from);
+
+        while (stack.size() > 0) {
+            ngraph::Node* curr = stack.front();
+            visited.insert(curr);
+
+            if (ngraph::op::is_output(curr)) {
+                return false;
+            }
+
+            stack.pop();
+
+            if (curr != to) {
+                for (const auto& next : curr->get_users()) {
+                    if (visited.count(next.get()) == 0) {
+                        stack.push(next.get());
+                    }
+                }
+            } else {
+                return true;
+            }
+        }
+        return false;
+    };
+
+    for (auto& result : results) {
+        for (auto& user : result) {
+            for (auto& input : inputs) {
+                auto source = input.get_source_output().get_node();
+                auto containsLoop = BFS_from_to(user.get_node(), source);
+
+                remark(1) <<  "checking path from "
+                        << user.get_node()->get_friendly_name()
+                        << " to " << source->get_friendly_name()
+                        << " resulted in " << containsLoop << std::endl;
+
+                if (containsLoop) {
+                    return true;
+                }
+            }
+        }
+    }
+    return false;
+}
+
+auto has_subgraph_as_input(std::shared_ptr<Node> node) -> bool {
+    auto inputs = node->inputs();
+    for (auto input : inputs) {
+        auto parent = input.get_source_output().get_node_shared_ptr();
+        if (!!as_type_ptr<snippets::op::Subgraph>(parent)) {
+            return true;
+        }
+    }
+    return false;
+};
+
+auto is_lo(std::shared_ptr<Node> n) -> bool {
+    auto is_lob = [](std::shared_ptr<Node> n) -> bool {
+        using ngraph::as_type_ptr;
+        return !!as_type_ptr<opset1::Add>(n)
+            || !!as_type_ptr<opset1::Divide>(n)
+            || !!as_type_ptr<opset1::Equal>(n)
+            || !!as_type_ptr<opset1::FloorMod>(n)
+            || !!as_type_ptr<opset1::Greater>(n)
+            || !!as_type_ptr<opset1::GreaterEqual>(n)
+            || !!as_type_ptr<opset1::Less>(n)
+            || !!as_type_ptr<opset1::LessEqual>(n)
+            || !!as_type_ptr<opset1::LogicalAnd>(n)
+            || !!as_type_ptr<opset1::LogicalOr>(n)
+            || !!as_type_ptr<opset1::LogicalXor>(n)
+            || !!as_type_ptr<opset1::Maximum>(n)
+            || !!as_type_ptr<opset1::Minimum>(n)
+            || !!as_type_ptr<opset1::Mod>(n)
+            || !!as_type_ptr<opset1::Multiply>(n)
+            || !!as_type_ptr<opset1::NotEqual>(n)
+            || !!as_type_ptr<opset1::PRelu>(n)
+            || !!as_type_ptr<opset1::Power>(n)
+            || !!as_type_ptr<opset1::SquaredDifference>(n)
+            || !!as_type_ptr<opset1::Subtract>(n)
+            || !!as_type_ptr<opset1::Xor>(n);
+    };
+
+    auto is_lou = [](std::shared_ptr<Node> n) -> bool {
+        using ngraph::as_type_ptr;
+        return !!as_type_ptr<opset1::Abs>(n)
+            // || !!as_type_ptr<opset1::Acos>(n)
+            // || !!as_type_ptr<opset1::Asin>(n)
+            // || !!as_type_ptr<opset1::Atan>(n)
+            // || !!as_type_ptr<opset1::Ceiling>(n) ?
+            || !!as_type_ptr<opset1::Clamp>(n)
+            // || !!as_type_ptr<opset1::Cos>(n)
+            // || !!as_type_ptr<opset1::Cosh>(n)
+            || !!as_type_ptr<opset1::Elu>(n)
+            || !!as_type_ptr<opset1::Erf>(n)
+            || !!as_type_ptr<opset1::Exp>(n)
+            // || !!as_type_ptr<opset1::Floor>(n) ?
+            // || !!as_type_ptr<opset1::Log>(n) ?
+            || !!as_type_ptr<opset1::LogicalNot>(n)
+            || !!as_type_ptr<opset1::Negative>(n)
+            || !!as_type_ptr<opset1::Relu>(n)
+            // || !!as_type_ptr<opset1::Sign>(n) ?
+            || !!as_type_ptr<opset1::Sigmoid>(n)
+            // || !!as_type_ptr<opset1::Sin>(n)
+            // || !!as_type_ptr<opset1::Sinh>(n)
+            || !!as_type_ptr<opset1::Sqrt>(n)
+            // || !!as_type_ptr<opset1::Tan>(n)
+            || !!as_type_ptr<opset1::Tanh>(n);
+    };
+
+    auto is_lot = [](std::shared_ptr<Node> n) -> bool {
+        using ngraph::as_type_ptr;
+        return false;
+        // return !!as_type_ptr<opset1::HardSigmoid>(n) // ternary with 2 constants
+            // || !!as_type_ptr<opset1::Selu>(n); // ternary with 2 constants / or DW
+    };
+
+    auto is_fq = [](std::shared_ptr<Node> n) -> bool {
+        using ngraph::as_type_ptr;
+        return false;//!!as_type_ptr<opset1::FakeQuantize>(n); // 4->1
+    };
+
+    return is_lou(n) || is_lob(n) ||is_lot(n) || is_fq(n);
+}
+
+auto has_supported_in_out(std::shared_ptr<Node> n) -> bool {
+    for (auto in : n->inputs()) {
+        if (in.get_tensor().get_element_type() != ngraph::element::f32) {
+            return false;
+        }
+    }
+
+    for (auto out : n->outputs()) {
+        if (out.get_tensor().get_element_type() != ngraph::element::f32) {
+            return false;
+        }
+
+        for (auto in_out : out.get_target_inputs()) {
+            if (!!as_type_ptr<ngraph::op::v5::Loop>(in_out.get_node()->shared_from_this())) {
+                return false;
+            }
+        }
+    }
+
+    return true;
+};
+
+} // namespace
+
+ngraph::snippets::pass::StartSubgraph::StartSubgraph(bool tokenize_by_node) : MatcherPass() {
+    MATCHER_SCOPE(StartSubgraph);
+
+    auto has_multiple_output_edges = [](std::shared_ptr<Node> n) -> bool {
+        for (auto out : n->outputs()) {
+            if (out.get_target_inputs().size() != 1) return true;
+        }
+
+        return false;
+    };
+
+    register_matcher(std::make_shared<pattern::Matcher>(
+        std::make_shared<pattern::op::Label>(pattern::any_input(),
+        [tokenize_by_node, has_multiple_output_edges](std::shared_ptr<Node> n) {
+            return is_lo(n) &&
+                   has_supported_in_out(n) &&
+                   (tokenize_by_node || !has_subgraph_as_input(n)) &&
+                   has_multiple_output_edges(n);
+        })),
+        [](ngraph::pattern::Matcher &m) -> bool {
+        auto node = m.get_match_root();
+
+        remark(1) << "Match root"
+                  << node->get_friendly_name()
+                  << " " << node
+                  << " Creating new snippet - no input subgraphs found" << std::endl;
+
+        auto subgraph = op::Subgraph::wrap_node_as_subgraph(node);
+        ngraph::replace_node(node, subgraph);
+
+        remark(1) << "Replacement (new) done for: "
+                  << subgraph->get_friendly_name()
+                  << " with " << subgraph->inputs().size()
+                  << " inputs and " << subgraph->outputs().size()
+                  << " outputs and " << subgraph->get_body()->get_ops().size() << " ops total\n";
+        return true;
+    });
+}
+
+ngraph::snippets::pass::AttachToSubgraph::AttachToSubgraph(bool tokenize_by_node) : MatcherPass() {
+    MATCHER_SCOPE(AttachToSubgraph);
+    enum continuation_strategy {
+        reset,
+        abort
+    };
+
+    continuation_strategy strategy = continuation_strategy::abort;
+
+    ngraph::graph_rewrite_callback continuation_callback = [strategy](ngraph::pattern::Matcher &m) -> bool {
+        auto node = m.get_match_root();
+
+        remark(1) << "Match root " << node->get_friendly_name() << " " << node << std::endl;
+
+        // inputs that are already subgraphs
+        std::unordered_set<std::shared_ptr<Node>> input_subgraphs;
+        // clone bodies because we need a rollback if loop is found
+        std::map<std::shared_ptr<Node>, std::shared_ptr<ngraph::Function>> clones;
+
+        ParameterVector body_parameters;
+        OutputVector external_inputs;
+        OutputVector internal_inputs;
+
+        auto inputs = node->inputs();
+
+        auto is_recurrent = [inputs](const ngraph::Output<ngraph::Node>& to_find) -> bool {
+            for (auto in : inputs) {
+                if (in.get_source_output().get_node_shared_ptr() == to_find.get_node_shared_ptr()) {
+                    return true;
+                }
+            }
+            return false;
+        };
+
+        auto get_input_index = [](const Output<Node>& found) -> size_t {
+            for (auto& input : found.get_target_inputs()) {
+                remark(13) << input.get_node() << " " << input.get_source_output() << " vs "
+                    << found << found.get_node() << " : " << input.get_index() << " " << found.get_index() << std::endl;
+            }
+
+            for (auto& input : found.get_target_inputs()) {
+                remark(13) << input.get_node() << " " << input.get_source_output() << " vs "
+                    << found << " : " << input.get_index() << " " << found.get_index() << std::endl;
+                if (as_type_ptr<op::Subgraph>(input.get_node()->shared_from_this()) != nullptr && input.get_source_output() == found) {
+                    return input.get_index();
+                }
+            }
+            return 0;
+        };
+
+        for (auto input : inputs) {
+            auto input_node = input.get_source_output().get_node_shared_ptr();
+
+            if (auto subgraph = as_type_ptr<op::Subgraph>(input_node)) {
+                if (!clones.count(input_node)) {
+                    auto f = ngraph::clone_function(*subgraph->get_body().get());
+                    f->set_friendly_name(subgraph->get_body()->get_friendly_name());
+                    clones[input_node] = f;
+                }
+            }
+        }
+
+        for (auto input : inputs) {
+            auto input_node = input.get_source_output().get_node_shared_ptr();
+
+            if (auto subgraph = as_type_ptr<op::Subgraph>(input_node)) {
+                if (!input_subgraphs.count(input_node)) {
+                    input_subgraphs.insert(input_node);
+
+                    auto f = clones[input_node];
+                    const auto& input_body_parameters = f->get_parameters();
+
+                    for (size_t i = 0; i < input_body_parameters.size(); ++i) {
+                        auto found = std::find(external_inputs.begin(), external_inputs.end(), subgraph->input_value(i));
+                        if (found != external_inputs.end()) {
+                            auto current_input_index = get_input_index(*found);
+                            // Handling the case if multiple inputs referencing the same parameter comes from one subgraph => it's not introduced by SS.
+                            // It might be better to keep track if body parameter relationship rather than that
+                            if (current_input_index < body_parameters.size()) {
+                                remark(13) << "replacing " << *found << " " << current_input_index << " with "
+                                          << body_parameters[current_input_index] << std::endl;
+                                f->replace_parameter(i, body_parameters[current_input_index]);
+                            } else {
+                                external_inputs.push_back(subgraph->input_value(i));
+                                body_parameters.push_back(input_body_parameters[i]);
+                            }
+                        } else if (is_recurrent(subgraph->input_value(i))) {
+                            remark(13) << "ternary merge is conducted " << subgraph->input_value(i).get_node_shared_ptr() << std::endl;
+
+                            auto internal = input_body_parameters[i];
+                            auto internal_consumers = internal->outputs();
+
+                            for (auto output : internal->outputs()) {
+                                for (auto consumer : output.get_target_inputs()) {
+                                    if (auto to_replace_with = as_type_ptr<op::Subgraph>(subgraph->input_value(i).get_node_shared_ptr())) {
+                                        auto other_body = clones[subgraph->input_value(i).get_node_shared_ptr()];
+                                        auto other_body_result = other_body->get_results()[consumer.get_source_output().get_index()];
+                                        auto result_producer = other_body_result->input(0).get_source_output();
+
+                                        consumer.replace_source_output(result_producer.get_node_shared_ptr());
+                                    }
+                                }
+                            }
+                        } else {
+                            external_inputs.push_back(subgraph->input_value(i));
+                            body_parameters.push_back(input_body_parameters[i]);
+                        }
+                    }
+                }
+
+                // this is there stitching happens, get result of a copy of a body of currently processed input and put it to the new inputs
+                // internal output index == external output index
+                auto& input_body = clones[input_node];
+                size_t source_output_index = input.get_source_output().get_index();
+                auto source_result = input_body->get_results()[source_output_index];
+                // Result op has a single input
+                internal_inputs.push_back(source_result->input_value(0));
+            } else {
+                if (op::is_scalar_constant(input_node)) {
+                    internal_inputs.push_back(input_node->output(0));
+                } else {
+                    external_inputs.push_back(input.get_source_output());
+                    auto new_parameter = std::make_shared<opset1::Parameter>(input.get_element_type(), input.get_partial_shape());
+                    new_parameter->set_friendly_name(input.get_source_output().get_node()->get_friendly_name());
+                    body_parameters.push_back(new_parameter);
+                    body_parameters.back()->set_friendly_name(input.get_source_output().get_node()->get_friendly_name());
+                    internal_inputs.push_back(new_parameter->output(0));
+                }
+            }
+        }
+
+        auto body_node = node->copy_with_new_inputs(internal_inputs);
+        body_node->set_friendly_name(node->get_friendly_name());
+
+        remark(1) << "Original node outputs = " << node->get_output_size()
+                    << " body node outputs = " << body_node->get_output_size() << std::endl;
+
+        if (node->get_output_size() != body_node->get_output_size()) {
+            throw ngraph_error("original node outputs size and extracted node outputs size doesn't much");
+        }
+
+        ResultVector body_results;
+        std::vector<std::set<Input<Node>>> subgraph_result_inputs;
+
+        for (auto subgraph : input_subgraphs) {
+            for (auto output : subgraph->outputs()) {
+                bool first_side_consumer = true;
+
+                for (auto target_input : output.get_target_inputs()) {
+                    auto target_node = target_input.get_node()->shared_from_this();
+
+                    if (input_subgraphs.count(target_node)) {
+                        remark(13) << "ternary merge is conducted " << subgraph << " -> " << target_node << std::endl;
+                    }
+
+                    if (!input_subgraphs.count(target_node) && target_node != node) {
+                        if (first_side_consumer) {
+                            auto& input_subgraph_body = clones[subgraph];
+                            body_results.push_back(std::make_shared<opset1::Result>(input_subgraph_body->get_results()[output.get_index()]->input_value(0)));
+                            subgraph_result_inputs.push_back({});
+
+                            first_side_consumer = false;
+                        }
+
+                        if (!!subgraph_result_inputs.back().count(target_input)) {
+                            throw ngraph_error("target input added twice!!!");
+                        }
+                        // save target input port outside the body
+                        subgraph_result_inputs.back().insert(target_input);
+                    }
+                }
+            }
+        }
+
+        for (auto output : node->outputs()) {
+            body_results.push_back(std::make_shared<opset1::Result>(body_node->output(output.get_index())));
+            subgraph_result_inputs.push_back(output.get_target_inputs());
+        }
+
+        if (body_results.size() != subgraph_result_inputs.size()) {
+            throw ngraph_error("body results and node results size mismatch during subgraph collaps");
+        }
+
+        if (body_parameters.size() + body_results.size() > 7) {
+            if (strategy == continuation_strategy::reset) {
+                remark(13) << "new subgraph is created. Impossible to schedule subgraph with "
+                        << body_parameters.size() << " inputs and " << body_results.size() << " outputs." << std::endl;
+
+                auto single_node_subgraph = op::Subgraph::wrap_node_as_subgraph(node);
+                ngraph::replace_node(node, single_node_subgraph);
+                return true;
+            } else {
+                return false;
+            }
+        }
+
+        auto body = op::create_body(node->get_friendly_name(), body_results, body_parameters);
+        for (size_t i = 0; i < body->get_parameters().size(); i++) {
+            body->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name());
+        }
+
+        auto subgraph = op::build_subgraph(node, external_inputs, body);
+        auto act_body = subgraph->get_body();
+        for (size_t i = 0; i < act_body->get_parameters().size(); i++) {
+            act_body->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name());
+        }
+
+        if (subgraph->get_output_size() != subgraph_result_inputs.size()) {
+            throw ngraph_error("newly create subgraph doesn't much number of results");
+        }
+
+        if (outputs_are_not_broadcastable(subgraph)) {
+            if (strategy == continuation_strategy::reset) {
+                remark(13) << "New subgraph is created due to outputs of a subgraph not broadcastable." << std::endl;
+
+                auto single_node_subgraph = op::Subgraph::wrap_node_as_subgraph(node);
+                single_node_subgraph->validate_and_infer_types();
+                ngraph::replace_node(node, single_node_subgraph);
+                return true;
+            } else {
+                return false;
+            }
+        }
+
+        if (has_cycles_of_dependencies(subgraph_result_inputs, subgraph->inputs())) {
+            if (strategy == continuation_strategy::reset) {
+                remark(13) << "New subgraph is created due to loop dependency introduced by one of input subgraphs." << std::endl;
+
+                auto single_node_subgraph = op::Subgraph::wrap_node_as_subgraph(node);
+                single_node_subgraph->validate_and_infer_types();
+                ngraph::replace_node(node, single_node_subgraph);
+                return true;
+            } else {
+                return false;
+            }
+        }
+
+        for (size_t i = 0; i < subgraph->get_output_size(); ++i) {
+            for (auto target_input : subgraph_result_inputs[i]) {
+                target_input.replace_source_output(subgraph->output(i));
+            }
+        }
+
+        subgraph->validate_and_infer_types();
+
+        auto act_body1 = subgraph->get_body();
+        for (size_t i = 0; i < act_body1->get_parameters().size(); i++) {
+            act_body1->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name());
+        }
+
+        remark(1) << "Replacement (merge) done for: "
+                    << subgraph->get_friendly_name()
+                    << " with " << subgraph->inputs().size()
+                    << " inputs and " << subgraph->outputs().size()
+                    << " outputs and " << subgraph->get_body()->get_ops().size() << " ops total\n";
+
+        return true;
+    };
+
+    register_matcher(std::make_shared<pattern::Matcher>(
+        std::make_shared<pattern::op::Label>(pattern::any_input(),
+        [](std::shared_ptr<Node> n) {
+            return is_lo(n) && has_supported_in_out(n) && has_subgraph_as_input(n);
+        })),
+        continuation_callback);
+}
diff --git a/inference-engine/src/snippets/src/pass/insert_load_store.cpp b/inference-engine/src/snippets/src/pass/insert_load_store.cpp
new file mode 100644
index 00000000000..d1bfec5a0bf
--- /dev/null
+++ b/inference-engine/src/snippets/src/pass/insert_load_store.cpp
@@ -0,0 +1,67 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "itt.hpp"
+#include "remarks.hpp"
+
+#include "snippets/pass/insert_load_store.hpp"
+#include "snippets/snippets_isa.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+ngraph::snippets::pass::InsertLoad::InsertLoad() {
+    MATCHER_SCOPE(InsertLoad);
+    register_matcher(std::make_shared<ngraph::pattern::Matcher>(
+        ngraph::pattern::wrap_type<ngraph::opset1::Parameter>()),
+            [this](ngraph::pattern::Matcher &m) {
+            auto root = m.get_match_root();
+
+            // check if already has Load as an output
+            for (auto output : root->outputs()) {
+                for (auto consumer : output.get_target_inputs()) {
+                    if (dynamic_cast<ngraph::snippets::op::Load*>(consumer.get_node())) {
+                        return false;
+                    }
+                }
+            }
+
+            auto load = std::make_shared<ngraph::snippets::op::Load> (root);
+            ngraph::copy_runtime_info(root, load);
+
+            bool rewritten = false;
+            for (auto output : root->outputs()) {
+                for (auto consumer : output.get_target_inputs()) {
+                    if (consumer.get_node()->shared_from_this() != load) {
+                        consumer.replace_source_output(load);
+                        rewritten |= true;
+                    }
+                }
+            }
+
+            return rewritten;
+        });
+}
+
+ngraph::snippets::pass::InsertStore::InsertStore() {
+    MATCHER_SCOPE(InsertStore);
+    register_matcher(std::make_shared<ngraph::pattern::Matcher>(
+        ngraph::pattern::wrap_type<ngraph::opset1::Result>()),
+            [this](ngraph::pattern::Matcher &m) {
+            auto root = m.get_match_root();
+
+            // check if already has Store as an input
+            for (auto input : root->inputs()) {
+                if (dynamic_cast<ngraph::snippets::op::Store*>(input.get_source_output().get_node())) {
+                    return false;
+                }
+            }
+
+            auto store = std::make_shared<ngraph::snippets::op::Store> (root->input_value(0));
+            ngraph::copy_runtime_info(root, store);
+            root->set_argument(0, store);
+            return true;
+        });
+}
diff --git a/inference-engine/src/snippets/src/pass/insert_movebroadcast.cpp b/inference-engine/src/snippets/src/pass/insert_movebroadcast.cpp
new file mode 100644
index 00000000000..91ddc2096de
--- /dev/null
+++ b/inference-engine/src/snippets/src/pass/insert_movebroadcast.cpp
@@ -0,0 +1,177 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "remarks.hpp"
+#include "itt.hpp"
+
+#include "snippets/pass/insert_movebroadcast.hpp"
+#include "snippets/snippets_isa.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+
+#include <iostream>
+#include <numeric>
+
+using namespace ngraph;
+
+static std::shared_ptr<ngraph::Node> numpy_broadcast_node(const ngraph::Output<ngraph::Node>& value,
+    const ngraph::Shape& output_shape, const ngraph::Shape& source_shape) {
+    std::shared_ptr<ngraph::Node> broadcasted_node = value.get_node_shared_ptr();
+
+    if (output_shape == value.get_shape()) {
+        return broadcasted_node;
+    }
+
+    NGRAPH_CHECK(source_shape.size() == output_shape.size(),
+                    "Ranks of source_shape and output_shape dont match: ",
+                    source_shape.size(),
+                    " vs ",
+                    output_shape.size());
+
+    ngraph::AxisVector broadcast_axes;
+    ngraph::Shape squeezed_shape;
+    for (size_t index = 0; index < output_shape.size(); ++index) {
+        if (source_shape.at(index) == 1 && output_shape.at(index) != 1) {
+            broadcast_axes.push_back(index);
+        } else {
+            squeezed_shape.push_back(source_shape.at(index));
+        }
+    }
+
+    remark(2) << "Insert explicit broadcast " << value.get_node()->get_type_name()
+    << " " << broadcast_axes << " " << broadcasted_node->get_shape() << " -> " << output_shape << std::endl;
+
+    // it shouldn't be a probrem for now since we don't consider StridedSlice and Broadcast here
+    if (auto constant = ngraph::as_type_ptr<ngraph::opset1::Constant>(broadcasted_node)) {
+        if (constant->get_shape() == ngraph::Shape() || ngraph::shape_size(constant->get_shape()) == 1) {
+            remark(2) << "Insert explicit broadcast " << value.get_node()->get_type_name()
+                       << " to scalar constant " << constant->get_shape() << " -- aborting!" << std::endl;
+
+            return broadcasted_node;
+        }
+    }
+
+    if (auto constant = ngraph::as_type_ptr<ngraph::snippets::op::Scalar>(broadcasted_node)) {
+        if (constant->get_shape() == ngraph::Shape() || ngraph::shape_size(constant->get_shape()) == 1) {
+            remark(2) << "Insert explicit broadcast " << value.get_node()->get_type_name()
+                       << " to scalar constant " << constant->get_shape() << " -- aborting!" << std::endl;
+
+            return broadcasted_node;
+        }
+    }
+
+    if (!broadcast_axes.empty()) {
+        // ShapeOf
+        broadcasted_node = std::make_shared<ngraph::snippets::op::BroadcastMove>(broadcasted_node, output_shape);
+    }
+
+    return broadcasted_node;
+}
+
+static ngraph::Shape calculate_broadcast_shape(ngraph::Shape lhs_shape, ngraph::Shape rhs_shape) {
+    ngraph::Shape result;
+    auto lhs_rank = lhs_shape.size();
+    auto rhs_rank = rhs_shape.size();
+    auto max_rank = std::max(lhs_rank, rhs_rank);
+
+    // left-pad the lhs_shape with ones
+    lhs_shape.insert(begin(lhs_shape), max_rank - lhs_rank, 1);
+    // left-pad the rhs_shape with ones
+    rhs_shape.insert(begin(rhs_shape), max_rank - rhs_rank, 1);
+
+    for (size_t index = 0; index < max_rank; ++index) {
+        size_t lhs_dim = lhs_shape.at(index);
+        size_t rhs_dim = rhs_shape.at(index);
+
+        if (lhs_dim != rhs_dim && lhs_dim != 1 && rhs_dim != 1) {
+            throw ngraph::ngraph_error("incompatible shapes");
+        }
+
+        result.push_back(std::max(lhs_dim, rhs_dim));
+    }
+    return result;
+}
+
+std::pair<ngraph::Shape, std::vector<ngraph::Shape>> get_numpy_broadcast_shapes(const std::vector<ngraph::Shape>& input_shapes) {
+    ngraph::Shape target_shape = std::accumulate(begin(input_shapes), end(input_shapes), ngraph::Shape{}, calculate_broadcast_shape);
+
+    std::vector<ngraph::Shape> full_shapes;
+    for (const ngraph::Shape& input : input_shapes) {
+        ngraph::Shape padded_shape{input};
+        padded_shape.insert(begin(padded_shape), target_shape.size() - padded_shape.size(), 1);
+        full_shapes.push_back(move(padded_shape));
+    }
+
+    return {target_shape, full_shapes};
+}
+
+auto reset_broacast_config(const std::shared_ptr<ngraph::Node>& op) -> void {
+    using namespace ngraph;
+
+    bool is_scalar = false;
+    for (auto input : op->inputs()) {
+        if (input.get_shape() == Shape() || ngraph::shape_size(input.get_shape()) == 1) {
+            is_scalar = true;
+        }
+    }
+
+    if (!is_scalar) {
+        if (auto binary = std::dynamic_pointer_cast<ngraph::op::util::BinaryElementwiseArithmetic>(op)) {
+            binary->set_autob(ngraph::op::AutoBroadcastSpec::NONE);
+        } else if (auto binary = std::dynamic_pointer_cast<ngraph::op::util::BinaryElementwiseComparison>(op)) {
+            binary->set_autob(ngraph::op::AutoBroadcastSpec::NONE);
+        } else if (auto binary = std::dynamic_pointer_cast<ngraph::op::util::BinaryElementwiseLogical>(op)) {
+            binary->set_autob(ngraph::op::AutoBroadcastSpec::NONE);
+        }
+    }
+}
+
+// adds explicit broadcasts if needed
+// ToDO: this indeed make model not reshapable, need to come up with more clever way to insert fake broadcast,
+// well on the other hand, if we replace scalar constant with Scalar op / or ShapeOf, we could have broadcasts that are reshapable
+// TODO: generate FakeBroadcast if and only if broadcast is done by w dimension
+ngraph::snippets::pass::InsertMoveBroadcast::InsertMoveBroadcast() {
+    MATCHER_SCOPE(InsertMoveBroadcast);
+    ngraph::graph_rewrite_callback callback = [this](ngraph::pattern::Matcher &m) {
+        auto root = m.get_match_root();
+        const auto& values = root->input_values();
+        if (values.empty()) {
+            return false;
+        }
+
+        std::vector<ngraph::Shape> input_shapes;
+        for (const auto& input : values) {
+            input_shapes.push_back(input.get_shape());
+        }
+
+        // find the output tensor's shape, then broadcast all inputs so that they are compatible
+        auto bcast_shapes = get_numpy_broadcast_shapes(input_shapes);
+
+        ngraph::OutputVector broadcasted_inputs;
+        for (size_t i = 0; i < values.size(); ++i) {
+            auto node = numpy_broadcast_node(values[i], bcast_shapes.first, bcast_shapes.second[i]);
+            ngraph::copy_runtime_info(root, node);
+            broadcasted_inputs.push_back(node);
+        }
+
+        auto new_args = ngraph::as_node_vector(broadcasted_inputs);
+        for (size_t i = 0; i < new_args.size(); i++) {
+            root->input(i).replace_source_output(new_args[i]->output(0));
+        }
+
+        reset_broacast_config(root);
+
+        return true;
+    };
+
+    // only numpy broadcast type is supported currently
+    auto any = std::make_shared<pattern::op::Label>(pattern::any_input(),
+        [](std::shared_ptr<Node> n) {
+            // should add supports_auto_broadcast to SquaredDifference
+            return (ngraph::op::supports_auto_broadcast(n) || !!as_type_ptr<opset1::SquaredDifference>(n) || !!as_type_ptr<opset1::Mod>(n))
+                && n->get_autob().m_type == ngraph::op::AutoBroadcastType::NUMPY; });
+
+    register_matcher(std::make_shared<ngraph::pattern::Matcher>(any), callback);
+}
\ No newline at end of file
diff --git a/inference-engine/src/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp b/inference-engine/src/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp
new file mode 100644
index 00000000000..33451846d4b
--- /dev/null
+++ b/inference-engine/src/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp
@@ -0,0 +1,59 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "remarks.hpp"
+#include "itt.hpp"
+
+#include "snippets/pass/load_movebroadcast_to_broadcastload.hpp"
+#include "snippets/snippets_isa.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+#include <iostream>
+
+ngraph::snippets::pass::LoadMoveBroadcastToBroadcastLoad::LoadMoveBroadcastToBroadcastLoad() {
+    MATCHER_SCOPE(LoadMoveBroadcastToBroadcastLoad);
+    auto param_pattern = ngraph::pattern::wrap_type<ngraph::opset1::Parameter>();
+    auto load_pattern = std::make_shared<ngraph::snippets::op::Load>(param_pattern);
+    auto fbn = std::make_shared<ngraph::snippets::op::BroadcastMove>(load_pattern, Shape{1});
+
+    register_matcher(std::make_shared<ngraph::pattern::Matcher>(fbn),
+        [load_pattern, param_pattern](ngraph::pattern::Matcher &m) {
+            auto root = m.get_match_root();
+
+            const auto &pm = m.get_pattern_value_map();
+            const auto input = pm.at(load_pattern).get_node_shared_ptr();
+            const auto param = pm.at(param_pattern).get_node_shared_ptr();
+
+            // check if load has more than 1 user to avoid load+broadcast load on the same parameter
+            if (input->output(0).get_target_inputs().size() != 1) {
+                return false;
+            }
+
+            if (root->inputs().size() != 1 || input->inputs().size() != 1) {
+                throw ngraph_error("cannot rewrite Broadcast load with more than one input");
+            }
+
+            auto inshape = root->input(0).get_shape();
+            auto outshape = root->output(0).get_shape();
+            auto broadcastload = std::make_shared<snippets::op::BroadcastLoad>(param, outshape);
+            Shape bct(inshape.size(), 0);
+            for (size_t k = 0; k < inshape.size(); k++) {
+                if (inshape[k] != outshape[k] && inshape[k] == 1) {
+                    bct[k] = 1;
+                }
+            }
+
+            broadcastload->set_broadcast_info(bct);
+            if (broadcastload->is_broadcast(outshape.size()-1)) {
+                ngraph::copy_runtime_info(root, broadcastload);
+                ngraph::replace_node(root, broadcastload);
+                return true;
+            } else {
+                return false;
+            }
+        });
+}
\ No newline at end of file
diff --git a/inference-engine/src/snippets/src/pass/vector_to_scalar.cpp b/inference-engine/src/snippets/src/pass/vector_to_scalar.cpp
new file mode 100644
index 00000000000..64b96d118b1
--- /dev/null
+++ b/inference-engine/src/snippets/src/pass/vector_to_scalar.cpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "itt.hpp"
+
+#include "snippets/pass/vector_to_scalar.hpp"
+#include "snippets/snippets_isa.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+ngraph::snippets::pass::ReplaceLoadsWithScalarLoads::ReplaceLoadsWithScalarLoads() {
+    MATCHER_SCOPE(ReplaceLoadsWithScalarLoads);
+    register_matcher(std::make_shared<ngraph::pattern::Matcher>(
+        ngraph::pattern::wrap_type<ngraph::snippets::op::Load>()),
+            [this](ngraph::pattern::Matcher &m) {
+            auto root = m.get_match_root();
+            auto load = std::make_shared<ngraph::snippets::op::ScalarLoad> (root->input_value(0));
+            load->set_friendly_name(root->get_friendly_name());
+            ngraph::copy_runtime_info(root, load);
+            ngraph::replace_node(root, load);
+            return true;
+        });
+}
+
+ngraph::snippets::pass::ReplaceStoresWithScalarStores::ReplaceStoresWithScalarStores() {
+    MATCHER_SCOPE(ReplaceStoresWithScalarStores);
+    register_matcher(std::make_shared<ngraph::pattern::Matcher>(
+        ngraph::pattern::wrap_type<ngraph::snippets::op::Store>()),
+            [this](ngraph::pattern::Matcher &m) {
+            auto root = m.get_match_root();
+            auto store = std::make_shared<ngraph::snippets::op::ScalarStore> (root->input_value(0));
+            store->set_friendly_name(root->get_friendly_name());
+            ngraph::copy_runtime_info(root, store);
+            ngraph::replace_node(root, store);
+            return true;
+        });
+}
diff --git a/inference-engine/src/snippets/src/register_info.cpp b/inference-engine/src/snippets/src/register_info.cpp
new file mode 100644
index 00000000000..1f9abbd7267
--- /dev/null
+++ b/inference-engine/src/snippets/src/register_info.cpp
@@ -0,0 +1,9 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/register_info.hpp"
+
+template class ngraph::VariantImpl<std::vector<size_t>>;
+
+constexpr ngraph::VariantTypeInfo ngraph::VariantWrapper<std::vector<size_t>>::type_info;
diff --git a/inference-engine/src/snippets/src/remarks.hpp b/inference-engine/src/snippets/src/remarks.hpp
new file mode 100644
index 00000000000..0b7d5cebce7
--- /dev/null
+++ b/inference-engine/src/snippets/src/remarks.hpp
@@ -0,0 +1,20 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <iostream>
+
+class logstreambuf: public std::streambuf {
+public:
+    static const int threshold {5};
+};
+
+template <typename T>
+static inline auto remark(T x) -> std::ostream& {
+    static logstreambuf nostreambuf;
+    static std::ostream nocout(&nostreambuf);
+
+    return ((x >= logstreambuf::threshold)? std::cout << "Remark: " : nocout);
+}
diff --git a/inference-engine/tests/functional/inference_engine/CMakeLists.txt b/inference-engine/tests/functional/inference_engine/CMakeLists.txt
index a9fe8c9b05a..492fad7ab5a 100644
--- a/inference-engine/tests/functional/inference_engine/CMakeLists.txt
+++ b/inference-engine/tests/functional/inference_engine/CMakeLists.txt
@@ -16,6 +16,7 @@ set(LINK_LIBRARIES
     openvino::itt
     openvino::conditional_compilation
     sharedTestClasses
+    inference_engine_snippets
 )
 
 set(DEPENDENCIES
diff --git a/inference-engine/tests/functional/inference_engine/snippets/broadcast_fusion.cpp b/inference-engine/tests/functional/inference_engine/snippets/broadcast_fusion.cpp
new file mode 100644
index 00000000000..c89894afbf9
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/snippets/broadcast_fusion.cpp
@@ -0,0 +1,135 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ngraph/function.hpp>
+#include <ngraph/pass/manager.hpp>
+
+#include <snippets/snippets_isa.hpp>
+#include <snippets/pass/load_movebroadcast_to_broadcastload.hpp>
+
+#include <transformations/init_node_info.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+
+TEST(TransformationTests, FuseLoadWithBroadcastMoveByX) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 1});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto load0 = std::make_shared<snippets::isa::Load>(data0);
+        auto load1 = std::make_shared<snippets::isa::Load>(data1);
+        auto bct = std::make_shared<snippets::isa::BroadcastMove>(load0, load1->get_shape());
+        auto add = std::make_shared<opset1::Add>(bct, load1);
+        auto store = std::make_shared<snippets::isa::Store>(add);
+        f = std::make_shared<Function>(NodeVector{store}, ParameterVector{data0, data1});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::LoadMoveBroadcastToBroadcastLoad>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 1});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto load0 = std::make_shared<snippets::isa::BroadcastLoad>(data0, data1->get_shape());
+        auto load1 = std::make_shared<snippets::isa::Load>(data1);
+        auto add = std::make_shared<opset1::Add>(load0, load1);
+        auto store = std::make_shared<snippets::isa::Store>(add);
+        f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data0, data1});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, NotFuseLoadWithBroadcastMoveByY) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto load0 = std::make_shared<snippets::isa::Load>(data0);
+        auto load1 = std::make_shared<snippets::isa::Load>(data1);
+        auto bct = std::make_shared<snippets::isa::BroadcastMove>(load0, load1->get_shape());
+        auto add = std::make_shared<opset1::Add>(bct, load1);
+        auto store = std::make_shared<snippets::isa::Store>(add);
+        f = std::make_shared<Function>(NodeVector{store}, ParameterVector{data0, data1});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::LoadMoveBroadcastToBroadcastLoad>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto load0 = std::make_shared<snippets::isa::Load>(data0);
+        auto load1 = std::make_shared<snippets::isa::Load>(data1);
+        auto bct = std::make_shared<snippets::isa::BroadcastMove>(load0, load1->get_shape());
+        auto add = std::make_shared<opset1::Add>(bct, load1);
+        auto store = std::make_shared<snippets::isa::Store>(add);
+        f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data0, data1});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, NoFuseLoadWithBroadcastMoveMultipleUsers) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 1});
+        auto data2 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 1});
+
+        auto load0 = std::make_shared<snippets::isa::Load>(data0);
+        auto load1 = std::make_shared<snippets::isa::Load>(data1);
+        auto load2 = std::make_shared<snippets::isa::Load>(data2);
+
+        auto bct1 = std::make_shared<snippets::isa::BroadcastMove>(load1, load0->get_shape());
+
+        auto add = std::make_shared<opset1::Add>(load0, bct1);
+        auto mul = std::make_shared<opset1::Multiply>(load1, load2);
+
+        auto store0 = std::make_shared<snippets::isa::Store>(add);
+        auto store1 = std::make_shared<snippets::isa::Store>(mul);
+        f = std::make_shared<Function>(NodeVector{store0, store1}, ParameterVector{data0, data1, data2});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::LoadMoveBroadcastToBroadcastLoad>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 1});
+        auto data2 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 1});
+
+        auto load0 = std::make_shared<snippets::isa::Load>(data0);
+        auto load1 = std::make_shared<snippets::isa::Load>(data1);
+        auto load2 = std::make_shared<snippets::isa::Load>(data2);
+
+        auto bct1 = std::make_shared<snippets::isa::BroadcastMove>(load1, load0->get_shape());
+
+        auto add = std::make_shared<opset1::Add>(load0, bct1);
+        auto mul = std::make_shared<opset1::Multiply>(load1, load2);
+
+        auto store0 = std::make_shared<snippets::isa::Store>(add);
+        auto store1 = std::make_shared<snippets::isa::Store>(mul);
+        f_ref = std::make_shared<Function>(NodeVector{store0, store1}, ParameterVector{data0, data1, data2});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
diff --git a/inference-engine/tests/functional/inference_engine/snippets/memory_ops.cpp b/inference-engine/tests/functional/inference_engine/snippets/memory_ops.cpp
new file mode 100644
index 00000000000..3dbed7705b6
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/snippets/memory_ops.cpp
@@ -0,0 +1,94 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ngraph/function.hpp>
+#include <ngraph/pass/manager.hpp>
+
+#include <snippets/snippets_isa.hpp>
+#include <snippets/pass/insert_load_store.hpp>
+
+#include <transformations/init_node_info.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+
+TEST(TransformationTests, InsertLoadStore) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto neg = std::make_shared<opset1::Negative>(data);
+        f = std::make_shared<Function>(NodeVector{neg}, ParameterVector{data});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::InsertLoad>();
+        m.register_pass<snippets::pass::InsertStore>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    {
+        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto load = std::make_shared<snippets::isa::Load>(data);
+        auto neg = std::make_shared<opset1::Negative>(load);
+        auto store = std::make_shared<snippets::isa::Store>(neg);
+        f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, InsertLoadTwise) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto neg = std::make_shared<opset1::Negative>(data);
+        f = std::make_shared<Function>(NodeVector{neg}, ParameterVector{data});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::InsertLoad>();
+        m.register_pass<snippets::pass::InsertLoad>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    {
+        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto load = std::make_shared<snippets::isa::Load>(data);
+        auto neg = std::make_shared<opset1::Negative>(load);
+        f_ref = std::make_shared<Function>(NodeVector{neg}, ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, InsertStoreTwise) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto neg = std::make_shared<opset1::Negative>(data);
+        f = std::make_shared<Function>(NodeVector{neg}, ParameterVector{data});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::InsertStore>();
+        m.register_pass<snippets::pass::InsertStore>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    {
+        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto neg = std::make_shared<opset1::Negative>(data);
+        auto store = std::make_shared<snippets::isa::Store>(neg);
+        f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
\ No newline at end of file
diff --git a/inference-engine/tests/functional/inference_engine/snippets/movebroadcast.cpp b/inference-engine/tests/functional/inference_engine/snippets/movebroadcast.cpp
new file mode 100644
index 00000000000..0c7b6ad2dc7
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/snippets/movebroadcast.cpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ngraph/function.hpp>
+#include <ngraph/pass/manager.hpp>
+
+#include <snippets/snippets_isa.hpp>
+#include <snippets/pass/insert_movebroadcast.hpp>
+
+#include <transformations/init_node_info.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+
+TEST(TransformationTests, InsertBroadcastMove) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto add = std::make_shared<opset1::Add>(data0, data1);
+        f = std::make_shared<Function>(NodeVector{add}, ParameterVector{data0, data1});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::InsertMoveBroadcast>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto move = std::make_shared<snippets::isa::BroadcastMove>(data1, data0->output(0).get_shape());
+        auto add = std::make_shared<opset1::Add>(data0, move);
+        f_ref = std::make_shared<Function>(NodeVector{add}, ParameterVector{data0, data1});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
diff --git a/inference-engine/tests/functional/inference_engine/snippets/registers.cpp b/inference-engine/tests/functional/inference_engine/snippets/registers.cpp
new file mode 100644
index 00000000000..b4e64e0f83f
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/snippets/registers.cpp
@@ -0,0 +1,137 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ngraph/function.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <ngraph/variant.hpp>
+
+#include <snippets/snippets_isa.hpp>
+#include <snippets/register_info.hpp>
+#include <snippets/pass/assign_registers.hpp>
+
+#include <transformations/init_node_info.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+
+TEST(TransformationTests, AssignRegisters) {
+    std::shared_ptr<Function> f(nullptr);
+    {
+        auto p0 = std::make_shared<opset1::Parameter>(element::f32, Shape(1));
+        auto p1 = std::make_shared<opset1::Parameter>(element::f32, Shape(1));
+        auto y00 = std::make_shared<snippets::isa::Load>(p0); y00->set_friendly_name("y00");
+        auto y01 = std::make_shared<snippets::isa::Load>(p1); y01->set_friendly_name("y01");
+        auto y02 = std::make_shared<opset1::Multiply>(y00, y01); y02->set_friendly_name("y02");
+        auto y03 = std::make_shared<snippets::isa::Store>(y02); y03->set_friendly_name("y03");
+
+        f = std::make_shared<Function>(NodeVector{y03}, ParameterVector{p0, p1});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::AssignRegisters>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    // instead of comparing to a reference function check that registers are correctly assigned
+    // and stored to runtime info
+    {
+        std::map<std::string, size_t> ref_registers {
+            {"y00", 0},
+            {"y01", 1},
+            {"y02", 2}
+        };
+
+        auto total_ops = 0;
+        for (auto& op : f->get_ordered_ops()) {
+            auto& rt = op->get_rt_info();
+
+            if (auto rinfo = rt["reginfo"]) {
+                auto reginfo = as_type_ptr<VariantWrapper<std::vector<size_t>>>(rinfo)->get();
+                auto reg = reginfo[0];
+                ASSERT_TRUE(ref_registers[op->get_friendly_name()] == reg);
+                total_ops++;
+            }
+        }
+        ASSERT_EQ(total_ops, ref_registers.size());
+    }
+}
+
+TEST(TransformationTests, AssignRegisters2) {
+    std::shared_ptr<Function> f(nullptr);
+    {
+        auto p0 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
+        auto p1 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
+        auto p2 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
+        auto p3 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
+        auto p4 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
+        auto p5 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
+        auto p6 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
+        auto p7 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
+
+        auto c0 = std::make_shared<snippets::isa::Scalar>(ngraph::element::f32, Shape(), 3.14f); c0->set_friendly_name("r00");
+        auto c1 = std::make_shared<snippets::isa::Scalar>(ngraph::element::f32, Shape(), 6.6260701e-34f); c1->set_friendly_name("r01");
+
+        auto y00 = std::make_shared<snippets::isa::Load>(p0); y00->set_friendly_name("r02");
+        auto y01 = std::make_shared<snippets::isa::Load>(p1); y01->set_friendly_name("r03");
+        auto y02 = std::make_shared<opset1::Multiply>(y00, c0); y02->set_friendly_name("r04");
+        auto y03 = std::make_shared<opset1::Multiply>(y01, c1); y03->set_friendly_name("r05");
+        auto y04 = std::make_shared<snippets::isa::Load>(p2); y04->set_friendly_name("r06");
+        auto y05 = std::make_shared<snippets::isa::Load>(p3); y05->set_friendly_name("r07");
+        auto y06 = std::make_shared<opset1::Add>(y02, y03); y06->set_friendly_name("r08");
+        auto y07 = std::make_shared<opset1::Multiply>(y04, c0); y07->set_friendly_name("r09");
+        auto y08 = std::make_shared<opset1::Multiply>(y05, c1); y08->set_friendly_name("r10");
+        auto y09 = std::make_shared<snippets::isa::Load>(p4); y09->set_friendly_name("r11");
+        auto y10 = std::make_shared<snippets::isa::Load>(p5); y10->set_friendly_name("r12");
+        auto y11 = std::make_shared<opset1::Add>(y07, y08); y11->set_friendly_name("r13");
+        auto y12 = std::make_shared<opset1::Multiply>(y09, c0); y12->set_friendly_name("r14");
+        auto y13 = std::make_shared<opset1::Multiply>(y10, c1); y13->set_friendly_name("r15");
+        auto y14 = std::make_shared<snippets::isa::Load>(p6); y14->set_friendly_name("r16");
+        auto y15 = std::make_shared<opset1::Add>(y12, y13); y15->set_friendly_name("r17");
+        auto y16 = std::make_shared<snippets::isa::Load>(p7); y16->set_friendly_name("r18");
+        auto y17 = std::make_shared<opset1::Multiply>(y14, c0); y17->set_friendly_name("r19");
+        auto y18 = std::make_shared<opset1::Multiply>(y16, c1); y18->set_friendly_name("r20");
+        auto y19 = std::make_shared<opset1::Add>(y06, y11); y19->set_friendly_name("r21");
+        auto y20 = std::make_shared<opset1::Add>(y17, y18); y20->set_friendly_name("r22");
+        auto y21 = std::make_shared<opset1::Add>(y15, y19); y21->set_friendly_name("r23");
+        auto y22 = std::make_shared<opset1::Add>(y20, y21); y22->set_friendly_name("r24");
+        auto y23 = std::make_shared<snippets::isa::Store>(y22);
+
+        f = std::make_shared<Function>(NodeVector{y23}, ParameterVector{p0, p1, p2, p3, p4, p5, p6, p7});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::AssignRegisters>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    // instead of comparing to a reference function check that registers are correctly assigned
+    // and stored to runtime info
+    {
+        std::map<std::string, size_t> ref_registers {
+            {"r00", 1}, {"r01", 3}, {"r02", 5}, {"r03", 5}, {"r04", 2}, {"r05", 6}, {"r06", 6}, {"r07", 6},
+            {"r08", 5}, {"r09", 2}, {"r10", 1}, {"r11", 4}, {"r12", 4}, {"r13", 6}, {"r14", 2}, {"r15", 5},
+            {"r16", 0}, {"r17", 4}, {"r18", 0}, {"r19", 2}, {"r20", 4}, {"r21", 1}, {"r22", 0}, {"r23", 6},
+            {"r24", 1}
+        };
+
+        auto total_ops = 0;
+        for (auto& op : f->get_ordered_ops()) {
+            auto& rt = op->get_rt_info();
+
+            if (auto rinfo = rt["reginfo"]) {
+                auto reginfo = as_type_ptr<VariantWrapper<std::vector<size_t>>>(rinfo)->get();
+                auto reg = reginfo[0];
+                ASSERT_TRUE(ref_registers[op->get_friendly_name()] == reg);
+                total_ops++;
+            }
+        }
+        ASSERT_EQ(total_ops, ref_registers.size());
+    }
+}
diff --git a/inference-engine/tests/functional/inference_engine/snippets/tokenization.cpp b/inference-engine/tests/functional/inference_engine/snippets/tokenization.cpp
new file mode 100644
index 00000000000..379343aee59
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/snippets/tokenization.cpp
@@ -0,0 +1,154 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ngraph/function.hpp>
+#include <ngraph/pass/manager.hpp>
+
+#include <snippets/snippets_isa.hpp>
+#include <snippets/pass/collapse_subgraph.hpp>
+#include <snippets/op/subgraph.hpp>
+
+#include <transformations/init_node_info.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+
+TEST(TransformationTests, StartSubgraphMultipleOutputs) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto add = std::make_shared<opset1::Add>(data0, data1);
+        auto sub = std::make_shared<opset1::Subtract>(add, data1);
+        auto mul = std::make_shared<opset1::Multiply>(add, sub);
+        f = std::make_shared<Function>(NodeVector{mul}, ParameterVector{data0, data1});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::StartSubgraph>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto indata0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto indata1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto add = std::make_shared<snippets::op::Subgraph>(NodeVector{data0, data1},
+            std::make_shared<Function>(NodeVector{std::make_shared<opset1::Add>(indata0, indata1)}, ParameterVector{indata0, indata1}));
+        auto sub = std::make_shared<opset1::Subtract>(add, data1);
+        auto mul = std::make_shared<opset1::Multiply>(add, sub);
+        f_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{data0, data1});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, DontStartSubgraphSingleOuptut) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto add = std::make_shared<opset1::Add>(data0, data1);
+        auto sub = std::make_shared<opset1::Subtract>(add, data1);
+        auto mul = std::make_shared<opset1::Multiply>(data0, sub);
+        f = std::make_shared<Function>(NodeVector{mul}, ParameterVector{data0, data1});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::StartSubgraph>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto add = std::make_shared<opset1::Add>(data0, data1);
+        auto sub = std::make_shared<opset1::Subtract>(add, data1);
+        auto mul = std::make_shared<opset1::Multiply>(data0, sub);
+        f_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{data0, data1});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, AttachToSubgraph) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto indata0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto indata1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto add = std::make_shared<snippets::op::Subgraph>(NodeVector{data0, data1},
+            std::make_shared<Function>(NodeVector{std::make_shared<opset1::Add>(indata0, indata1)}, ParameterVector{indata0, indata1}));
+        auto neg = std::make_shared<opset1::Negative>(add);
+        auto concat = std::make_shared<opset1::Concat>(NodeVector{add, neg}, 0);
+        f = std::make_shared<Function>(NodeVector{concat}, ParameterVector{data0, data1});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::AttachToSubgraph>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto indata0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto indata1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto inner = std::make_shared<opset1::Add>(indata0, indata1);
+        auto add = std::make_shared<snippets::op::Subgraph>(NodeVector{data0, data1},
+            std::make_shared<Function>(NodeVector{std::make_shared<opset1::Negative>(inner), inner}, ParameterVector{indata0, indata1}));
+        auto concat = std::make_shared<opset1::Concat>(OutputVector{add->output(0), add->output(1)}, 0);
+        f_ref = std::make_shared<Function>(NodeVector{concat}, ParameterVector{data0, data1});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, DontAttachToSubgraphIfLoop) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto indata0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto indata1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto add = std::make_shared<snippets::op::Subgraph>(NodeVector{data0, data1},
+            std::make_shared<Function>(NodeVector{std::make_shared<opset1::Add>(indata0, indata1)}, ParameterVector{indata0, indata1}));
+        auto log = std::make_shared<opset1::Log>(add);
+        auto mul = std::make_shared<opset1::Multiply>(add, log);
+        f = std::make_shared<Function>(NodeVector{mul}, ParameterVector{data0, data1});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::AttachToSubgraph>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto indata0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
+        auto indata1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
+        auto add = std::make_shared<snippets::op::Subgraph>(NodeVector{data0, data1},
+            std::make_shared<Function>(NodeVector{std::make_shared<opset1::Add>(indata0, indata1)}, ParameterVector{indata0, indata1}));
+        auto log = std::make_shared<opset1::Log>(add);
+        auto mul = std::make_shared<opset1::Multiply>(add, log);
+        f_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{data0, data1});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
\ No newline at end of file
diff --git a/inference-engine/tests/functional/inference_engine/snippets/vector_scalar.cpp b/inference-engine/tests/functional/inference_engine/snippets/vector_scalar.cpp
new file mode 100644
index 00000000000..eec873d6a0c
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/snippets/vector_scalar.cpp
@@ -0,0 +1,72 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ngraph/function.hpp>
+#include <ngraph/pass/manager.hpp>
+
+#include <snippets/snippets_isa.hpp>
+#include <snippets/pass/vector_to_scalar.hpp>
+
+#include <transformations/init_node_info.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+
+TEST(TransformationTests, ReplaceLoadsWithScalarLoads) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto load = std::make_shared<snippets::isa::Load>(data);
+        auto neg = std::make_shared<opset1::Negative>(load);
+        auto store = std::make_shared<snippets::isa::Store>(neg);
+        f = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::ReplaceLoadsWithScalarLoads>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    {
+        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto load = std::make_shared<snippets::isa::ScalarLoad>(data);
+        auto neg = std::make_shared<opset1::Negative>(load);
+        auto store = std::make_shared<snippets::isa::Store>(neg);
+        f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, ReplaceStoresWithScalarStores) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto load = std::make_shared<snippets::isa::Load>(data);
+        auto neg = std::make_shared<opset1::Negative>(load);
+        auto store = std::make_shared<snippets::isa::Store>(neg);
+        f = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<snippets::pass::ReplaceStoresWithScalarStores>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+    {
+        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto load = std::make_shared<snippets::isa::Load>(data);
+        auto neg = std::make_shared<opset1::Negative>(load);
+        auto store = std::make_shared<snippets::isa::ScalarStore>(neg);
+        f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/unit/CMakeLists.txt b/inference-engine/tests_deprecated/unit/CMakeLists.txt
index 7949b0c7578..c749998f30b 100644
--- a/inference-engine/tests_deprecated/unit/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/unit/CMakeLists.txt
@@ -137,7 +137,9 @@ target_link_libraries(${TARGET_NAME} PRIVATE
 
     # dynamic libraries
     inference_engine_transformations
-    inference_engine_lp_transformations)
+    inference_engine_lp_transformations
+    inference_engine_snippets
+    )
 
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fuse-ld=gold")
diff --git a/ngraph/core/src/op/non_zero.cpp b/ngraph/core/src/op/non_zero.cpp
index 3c7c6cf2bd3..45b2432b623 100644
--- a/ngraph/core/src/op/non_zero.cpp
+++ b/ngraph/core/src/op/non_zero.cpp
@@ -158,7 +158,7 @@ namespace nonzero
 
         return rc;
     }
-
+#undef TYPE_OUT_CASE
     bool evaluate_nonzero(const HostTensorPtr& input, const HostTensorPtr& output)
     {
         bool rc = true;
diff --git a/ngraph/core/src/op/one_hot.cpp b/ngraph/core/src/op/one_hot.cpp
index c7e1db9c754..56c4f27cf85 100644
--- a/ngraph/core/src/op/one_hot.cpp
+++ b/ngraph/core/src/op/one_hot.cpp
@@ -178,7 +178,7 @@ namespace detail
 
         return rc;
     }
-
+#undef TYPE_OUT_CASE
     bool evaluate_onehot(const HostTensorVector& output_values,
                          const HostTensorVector& input_values,
                          const int64_t axis)
diff --git a/ngraph/core/src/op/util/op_types.cpp b/ngraph/core/src/op/util/op_types.cpp
index 966da266f81..eabdef5221c 100644
--- a/ngraph/core/src/op/util/op_types.cpp
+++ b/ngraph/core/src/op/util/op_types.cpp
@@ -27,6 +27,7 @@
 #include "ngraph/op/parameter.hpp"
 #include "ngraph/op/result.hpp"
 #include "ngraph/op/select.hpp"
+#include "ngraph/op/squared_difference.hpp"
 #include "ngraph/op/util/binary_elementwise_arithmetic.hpp"
 #include "ngraph/op/util/binary_elementwise_comparison.hpp"
 #include "ngraph/op/util/binary_elementwise_logical.hpp"
@@ -60,6 +61,7 @@ bool ngraph::op::is_binary_elementwise_logical(const ngraph::Node* node)
 bool ngraph::op::supports_auto_broadcast(const ngraph::Node* node)
 {
     return dynamic_cast<const ngraph::op::v1::Select*>(node) != nullptr ||
+           dynamic_cast<const ngraph::op::v0::SquaredDifference*>(node) != nullptr ||
            dynamic_cast<const ngraph::op::util::BinaryElementwiseComparison*>(node) != nullptr ||
            dynamic_cast<const ngraph::op::util::BinaryElementwiseLogical*>(node) != nullptr ||
            dynamic_cast<const ngraph::op::util::BinaryElementwiseArithmetic*>(node) != nullptr;