[§] introduces snippets generator (#4349)

2021-03-10 14:15:38 +03:00 · 2021-03-10 14:15:38 +03:00 · 6e490c24e2
commit 6e490c24e2
parent efcf24e789
64 changed files with 3657 additions and 7 deletions
--- a/inference-engine/src/CMakeLists.txt
+++ b/inference-engine/src/CMakeLists.txt
@ -40,13 +40,15 @@ add_subdirectory(low_precision_transformations)
 add_subdirectory(offline_transformations)
 add_subdirectory(snippets)
 # add a custom target to build all Inference Engine Core libraries
 add_custom_target(ie_libraries ALL
                  DEPENDS inference_engine_transformations inference_engine_legacy
                          inference_engine inference_engine_preproc
                          inference_engine_ir_v7_reader inference_engine_ir_reader
-                          inference_engine_lp_transformations)
+                          inference_engine_lp_transformations inference_engine_snippets)
 if(NGRAPH_ONNX_IMPORT_ENABLE)
    add_dependencies(ie_libraries inference_engine_onnx_reader)
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@ -171,7 +171,9 @@ if(WIN32)
    set_target_properties(${TARGET_NAME}_s PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_s)
 endif()
-target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ${NGRAPH_LIBRARIES}
+target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt openvino::conditional_compilation
                                               ${CMAKE_DL_LIBS} ${NGRAPH_LIBRARIES}
                                               inference_engine_snippets
                                               inference_engine_transformations pugixml)
 target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE)
--- a/inference-engine/src/legacy_api/CMakeLists.txt
+++ b/inference-engine/src/legacy_api/CMakeLists.txt
@ -42,6 +42,7 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE
    ${PUBLIC_HEADERS_DIR}
    ${CMAKE_CURRENT_SOURCE_DIR}/src
    ${IE_MAIN_SOURCE_DIR}/src/inference_engine # For CNNNetworkNGraphImpl
    $<TARGET_PROPERTY:inference_engine_snippets,INTERFACE_INCLUDE_DIRECTORIES>
    $<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>
    $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
    $<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_INCLUDE_DIRECTORIES>
@ -62,7 +63,7 @@ ie_add_vs_version_file(NAME ${TARGET_NAME}
 set_ie_threading_interface_for(${TARGET_NAME})
-target_link_libraries(${TARGET_NAME} PUBLIC inference_engine
+target_link_libraries(${TARGET_NAME} PUBLIC inference_engine inference_engine_snippets
                                     PRIVATE pugixml openvino::itt
                                             ${NGRAPH_LIBRARIES} inference_engine_transformations)
--- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
+++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
@ -39,6 +39,7 @@
 #include "legacy/ngraph_ops/rnn_sequence_ie.hpp"
 #include "legacy/ngraph_ops/lstm_sequence_ie.hpp"
 #include "legacy/ngraph_ops/gru_sequence_ie.hpp"
 #include "snippets/op/subgraph.hpp"
 #include "exec_graph_info.hpp"
 #include "caseless.hpp"
@ -1978,6 +1979,15 @@ void convertFunctionToICNNNetwork(const std::shared_ptr<const ::ngraph::Function
            cnnLayer->params[ExecGraphInfoSerialization::ORIGINAL_NAMES] = originalNames;
        }
        if (auto subgraph = ::ngraph::as_type_ptr<ngraph::snippets::op::Subgraph>(layer)) {
            std::string names = "";
            for (const auto& op : subgraph->get_body()->get_ordered_ops()) {
                names += ", " + op->get_friendly_name();
            }
            cnnLayer->params["originalLayersNames"] += names;
        }
        std::string primitivesPriority = ::ngraph::getPrimitivesPriority(layer);
        if (!primitivesPriority.empty()) {
            cnnLayer->params["PrimitivesPriority"] = primitivesPriority;
--- a/inference-engine/src/snippets/CMakeLists.txt
+++ b/inference-engine/src/snippets/CMakeLists.txt
@ -0,0 +1,56 @@
 # Copyright (C) 2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 set (TARGET_NAME "inference_engine_snippets")
 set(PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
 file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
 file(GLOB_RECURSE PUBLIC_HEADERS ${PUBLIC_HEADERS_DIR}/snippets/*.hpp)
 # Create named folders for the sources within the .vcproj
 # Empty name lists them directly under the .vcproj
 source_group("src" FILES ${LIBRARY_SRC})
 source_group("include" FILES ${PUBLIC_HEADERS})
 # Create shared library
 add_library(${TARGET_NAME} SHARED
            ${LIBRARY_SRC}
            ${PUBLIC_HEADERS})
 ie_faster_build(${TARGET_NAME}
    UNITY
 )
 ie_add_vs_version_file(NAME ${TARGET_NAME}
                       FILEDESCRIPTION "Inference Engine Snippets transformations library")
 target_compile_definitions(${TARGET_NAME} PRIVATE inference_engine_transformations_EXPORTS)
 target_link_libraries(${TARGET_NAME} PUBLIC inference_engine_transformations ${NGRAPH_LIBRARIES}
                                     PRIVATE ${NGRAPH_REF_LIBRARIES} openvino::conditional_compilation)
 target_include_directories(${TARGET_NAME} PUBLIC ${PUBLIC_HEADERS_DIR}
                                          PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
 add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
 ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
 # LTO
 set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
 # developer package
 ie_developer_export_targets(${TARGET_NAME})
 # install
 install(TARGETS ${TARGET_NAME}
        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
        ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core
        LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)
--- a/inference-engine/src/snippets/include/snippets/generator.hpp
+++ b/inference-engine/src/snippets/include/snippets/generator.hpp
@ -0,0 +1,123 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 /**
 * @brief A file contains public interface for target indepenent code generator.
 * @file generator.hpp
 */
 #pragma once
 #include <transformations_visibility.hpp>
 #include "snippets_isa.hpp"
 namespace ngraph {
 namespace snippets {
 using code = const uint8_t *;
 using RegInfo = std::pair<std::vector<size_t>, std::vector<size_t>>;
 TRANSFORMATIONS_API auto getRegisters(std::shared_ptr<ngraph::Node>& n) -> ngraph::snippets::RegInfo;
 /**
 * @interface Emitter
 * @brief Base class for all target specific code emitters used by generator.
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API Emitter {
 public:
    /**
     * @brief Default constructor
     */
    Emitter(const std::shared_ptr<ngraph::Node>& n) {
    }
    /**
     * @brief called by generator to generate code to produce target code for a specific operation
     * @param in vector of vector argument registers
     * @param out vector of vector resulting registers
     * @param pool optional vector of free vector registers which might be used inside method
     * @param gpr vector of free generam puproce registers which might be used inside method
     * @return void
     */
    virtual void emit_code(const std::vector<size_t>& in,
                           const std::vector<size_t>& out,
                           const std::vector<size_t>& pool = {},
                           const std::vector<size_t>& gpr  = {}) const = 0;
    /**
     * @brief called by generator to generate data section, if needed for a specific operation
     * @return void
     */
    virtual void emit_data() const {
    }
 };
 /**
 * @interface TargetMachine
 * @brief Base class Target machine representation. Target derives from this class to provide generator information about supported emittors
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API TargetMachine {
 public:
    /**
     * @brief called by generator to all the emittors available for a target machine
     * @return a map by node's type info with callbacks to create an instance of emmitter for corresponding operation type
     */
    virtual auto getJitters() -> std::map<const ngraph::DiscreteTypeInfo, std::function<std::shared_ptr<Emitter>(std::shared_ptr<ngraph::Node>)>>{
        return {};
    }
 };
 /**
 * @interface Schedule
 * @brief Return scheduling information and pointer to generated kernel code
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API Schedule {
 public:
    /**
     * @brief Default constructor
     */
    Schedule() : work_size({}), is_flat(false), ptr(nullptr) {}
    /**
     * @brief Default to create schedule out of specific parameters
     * @param ws work size for kernel execution
     * @param f can this kernel be linearided to 1D range
     * @param p pointer to generated code
     */
    Schedule(const Shape& ws, bool f, code p) : work_size(ws), is_flat(f), ptr(p) {}
    Shape work_size {};
    bool is_flat {false};
    code ptr {nullptr};
 };
 /**
 * @interface Generator
 * @brief Target independent code generator interface
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API Generator {
 public:
    /**
     * @brief Default constructor
     */
    Generator() = default;
    /**
     * @brief Default destructor
     */
    virtual ~Generator() = default;
    /**
     * @brief virtual method any specific implementation should implement
     * @param f runction in canonical for for table-based code generation
     * @return pointer to generated code
     */
    virtual code generate(std::shared_ptr<Function>& f) const = 0;
 protected:
    mutable std::map<const ngraph::DiscreteTypeInfo, std::function<std::shared_ptr<Emitter>(std::shared_ptr<ngraph::Node>)>> jitters;
 };
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/blockedload.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/blockedload.hpp
@ -0,0 +1,36 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/op/op.hpp>
 #include "load.hpp"
 namespace ngraph {
 namespace snippets {
 namespace op {
 /**
 * @interface BlockedLoad
 * @brief Generated by Canonicalization step for blocked data (NCHW<X>c) to be loaded
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API BlockedLoad : public Load {
 public:
    NGRAPH_RTTI_DECLARATION;
    BlockedLoad(const Output<Node>& x);
    BlockedLoad() = default;
    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
        check_new_args_count(this, new_args);
        return std::make_shared<BlockedLoad>(new_args.at(0));
    }
 };
 } // namespace op
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/blockedparameter.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/blockedparameter.hpp
@ -0,0 +1,38 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/op/op.hpp>
 #include <ngraph/op/parameter.hpp>
 namespace ngraph {
 namespace snippets {
 namespace op {
 /**
 * @interface BlockedParameter
 * @brief Represents blocked input (NCHW<X>c) for a subgraph
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API BlockedParameter : public ngraph::op::Parameter {
 public:
    NGRAPH_RTTI_DECLARATION;
    BlockedParameter() = default;
    BlockedParameter(const ngraph::element::Type& element_type, const PartialShape& pshape)
        : Parameter(element_type, pshape) {
    }
    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
        check_new_args_count(this, new_args);
        return std::make_shared<BlockedParameter>(m_element_type, m_partial_shape);
    }
 };
 } // namespace op
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/broadcastload.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/broadcastload.hpp
@ -0,0 +1,48 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <snippets/op/broadcastmove.hpp>
 #include "ngraph/op/op.hpp"
 namespace ngraph {
 namespace snippets {
 namespace op {
 /**
 * @interface BroadcastLoad
 * @brief Is generated for broadcasting by least varying dimension for non-blocked cases and the second varying dimension for blocked
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API BroadcastLoad : public BroadcastMove {
 public:
    NGRAPH_RTTI_DECLARATION;
    BroadcastLoad(const Output<Node>& x, Shape output_shape);
    BroadcastLoad() = default;
    bool visit_attributes(AttributeVisitor& visitor) override;
    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
    void validate_and_infer_types() override;
    void set_broadcast_info(const Shape& bct) {
        broadcast_info = bct;
    }
    bool is_broadcast(size_t idx) {
        return broadcast_info[idx] == 1;
    }
 private:
    Shape broadcast_info;
 };
 } // namespace op
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/broadcastmove.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/broadcastmove.hpp
@ -0,0 +1,41 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include "ngraph/op/op.hpp"
 namespace ngraph {
 namespace snippets {
 namespace op {
 /**
 * @interface BroadcastMove
 * @brief Added to a subgraph if explicit broadcast instruction should be generated
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API BroadcastMove : public ngraph::op::Op {
 public:
    NGRAPH_RTTI_DECLARATION;
    BroadcastMove(const Output<Node>& x, Shape output_shape);
    BroadcastMove() = default;
    bool visit_attributes(AttributeVisitor& visitor) override;
    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
    void validate_and_infer_types() override;
    bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
 protected:
    Shape output_shape;
 };
 } // namespace op
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/load.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/load.hpp
@ -0,0 +1,42 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/op/op.hpp>
 namespace ngraph {
 namespace snippets {
 namespace op {
 /**
 * @interface Load
 * @brief Generated by Canonicalization step where explicit load instruction should be emmiteed
 * ScalarLoad == scalar instruction + post increment
 * Load (VectorLoad) == vector instruction + post increment
 * BroadcastLoad == scalar instruction - post increment
 * BlockedLoad == vector instruction - post increment
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API Load : public ngraph::op::Op {
 public:
    NGRAPH_RTTI_DECLARATION;
    Load(const Output<Node>& x);
    Load() = default;
    bool visit_attributes(AttributeVisitor& visitor) override;
    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
    void validate_and_infer_types() override;
    bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
 };
 } // namespace op
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/nop.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/nop.hpp
@ -0,0 +1,30 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include "ngraph/op/op.hpp"
 namespace ngraph {
 namespace snippets {
 namespace op {
 /**
 * @interface Nop
 * @brief Generated by Canonicalization and represents not-an-operation
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API Nop : public ngraph::op::Op {
 public:
    NGRAPH_RTTI_DECLARATION;
    Nop(const OutputVector& arguments, const OutputVector& results);
    Nop() = default;
 };
 } // namespace op
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/scalar.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/scalar.hpp
@ -0,0 +1,48 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include "ngraph/op/op.hpp"
 #include "ngraph/op/constant.hpp"
 namespace ngraph {
 namespace snippets {
 namespace op {
 /**
 * @interface Scalar
 * @brief Generated by Canonicalization for a scalar constant Shape() == {1}
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API Scalar  : public ngraph::op::Constant {
 public:
    NGRAPH_RTTI_DECLARATION;
    Scalar() = default;
    Scalar(const std::shared_ptr<runtime::Tensor>& tensor) : Constant(tensor) {}
    template <typename T>
    Scalar(const element::Type& type, Shape shape, const std::vector<T>& values) : Constant(type, shape, values) {}
    Scalar(const element::Type& type, const Shape& shape) : Constant(type, shape) {}
    template <class T, class = typename std::enable_if<std::is_fundamental<T>::value>::type>
    Scalar(const element::Type& type, Shape shape, T value) : Constant(type, shape, value) {}
    Scalar(const element::Type& type, Shape shape, const std::vector<std::string>& values) : Constant(type, shape, values) {}
    Scalar(const element::Type& type, const Shape& shape, const void* data) : Constant(type, shape, data) {}
    Scalar(const Constant& other) : Constant(other) {}
    Scalar(const Scalar& other) : Constant(other) {}
    Scalar& operator=(const Scalar&) = delete;
    ~Scalar() override {}
    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
        check_new_args_count(this, new_args);
        return std::make_shared<Scalar>(*this);
    }
 };
 } // namespace op
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/scalarload.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/scalarload.hpp
@ -0,0 +1,36 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/op/op.hpp>
 #include "load.hpp"
 namespace ngraph {
 namespace snippets {
 namespace op {
 /**
 * @interface ScalarLoad
 * @brief Generated by Canonicalization for a scalar value load to vector register
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API ScalarLoad : public Load {
 public:
    NGRAPH_RTTI_DECLARATION;
    ScalarLoad(const Output<Node>& x);
    ScalarLoad() = default;
    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
        check_new_args_count(this, new_args);
        return std::make_shared<ScalarLoad>(new_args.at(0));
    }
 };
 } // namespace op
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/scalarstore.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/scalarstore.hpp
@ -0,0 +1,36 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/op/op.hpp>
 #include "store.hpp"
 namespace ngraph {
 namespace snippets {
 namespace op {
 /**
 * @interface ScalarStore
 * @brief Generated by Canonicalization for a scalar value store from vector register
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API ScalarStore : public Store {
 public:
    NGRAPH_RTTI_DECLARATION;
    ScalarStore(const Output<Node>& x);
    ScalarStore() = default;
    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
        check_new_args_count(this, new_args);
        return std::make_shared<ScalarStore>(new_args.at(0));
    }
 };
 } // namespace op
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/staticpower.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/staticpower.hpp
@ -0,0 +1,44 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/op/op.hpp>
 #include <ngraph/op/power.hpp>
 #include <snippets/snippets_isa.hpp>
 namespace ngraph {
 namespace snippets {
 namespace op {
 /**
 * @interface PowerStatic
 * @brief Generated by Canonicalization for a spasical case of power innstruction which has constant power value
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API PowerStatic : public ngraph::op::v1::Power {
 public:
    NGRAPH_RTTI_DECLARATION;
    PowerStatic() : Power() {
    }
    PowerStatic(const Output<Node>& arg0,
            const Output<Node>& arg1,
            const ngraph::op::AutoBroadcastSpec& auto_broadcast =
                ngraph::op::AutoBroadcastSpec(ngraph::op::AutoBroadcastType::NUMPY)) : Power(arg0, arg1, auto_broadcast) {
        NGRAPH_CHECK(!!std::dynamic_pointer_cast<ngraph::snippets::op::Scalar>(arg1.get_node_shared_ptr()), "second argument must be scalar constant.");
    }
    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
        check_new_args_count(this, new_args);
        return std::make_shared<PowerStatic>(new_args.at(0), new_args.at(1), this->get_autob());
    }
 };
 } // namespace op
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/store.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/store.hpp
@ -0,0 +1,38 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/op/op.hpp>
 namespace ngraph {
 namespace snippets {
 namespace op {
 /**
 * @interface Load
 * @brief Generated by Canonicalization step where explicit store instruction should be emmiteed
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API Store : public ngraph::op::Op {
 public:
    NGRAPH_RTTI_DECLARATION;
    Store(const Output<Node>& x);
    Store() = default;
    bool visit_attributes(AttributeVisitor& visitor) override;
    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
    void validate_and_infer_types() override;
    bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
 };
 } // namespace op
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/subgraph.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/subgraph.hpp
@ -0,0 +1,101 @@
 // Copyright (C) 2018-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <memory>
 #include <transformations_visibility.hpp>
 #include <ngraph/function.hpp>
 #include <ngraph/op/op.hpp>
 #include <ngraph/rt_info.hpp>
 #include "snippets/generator.hpp"
 namespace ngraph {
 namespace snippets {
 namespace op {
 /**
 * @interface Subgraph
 * @brief An operation that is implemented by a function
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API Subgraph : public ngraph::op::Op {
 public:
    using BlockedShape = std::tuple<ngraph::Shape, ngraph::AxisVector, ngraph::element::Type>;
    using BlockedShapeVector = std::vector<BlockedShape>;
    NGRAPH_RTTI_DECLARATION;
    Subgraph(const OutputVector& args, std::shared_ptr<Function> body);
    Subgraph(const NodeVector& args, std::shared_ptr<Function> body);
    bool visit_attributes(AttributeVisitor& visitor) override;
    void validate_and_infer_types() override;
    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;
    std::shared_ptr<Function> get_body() const {
        return m_body;
    }
    std::shared_ptr<ngraph::snippets::Generator> get_generator() const {
        return m_generator;
    }
    std::shared_ptr<Subgraph> make_canonical_from_this();
    snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes);
    bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
    /// Set a new body for the op; body needs to satisfy requirements on inputs/outputs
    void set_body(std::shared_ptr<Function> body);
    // plugin sets generator for a snippet to some specific generator.
    // it's going to be replaced with Jitters table later
    void set_generator(std::shared_ptr<ngraph::snippets::Generator> generator);
    void print() const;
    void print_statistics(bool verbose);
    static auto wrap_node_as_subgraph(const std::shared_ptr<ngraph::Node>& node) -> std::shared_ptr<Subgraph>;
 private:
    void canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes);
    void convert_to_snippet_dialect();
    std::shared_ptr<Function> m_body;
    std::shared_ptr<ngraph::snippets::Generator> m_generator;
 };
 static inline std::ostream& operator<<(std::ostream& os, const op::Subgraph::BlockedShape& blocked_shape) {
    os << std::get<0>(blocked_shape) << " " << std::get<1>(blocked_shape) << " " << std::get<2>(blocked_shape);
    return os;
 }
 static inline auto is_scalar_constant(const std::shared_ptr<ngraph::Node>& source_output_node) -> bool {
    return !!ngraph::as_type_ptr<ngraph::opset1::Constant>(source_output_node) &&
        (source_output_node->get_shape() == ngraph::Shape() || ngraph::shape_size(source_output_node->get_shape()) == 1);
 };
 static inline auto create_body(std::string name, const ngraph::ResultVector& results, const ngraph::ParameterVector& parameters) ->
    std::shared_ptr<ngraph::Function> {
    auto body = std::make_shared<ngraph::Function>(results, parameters, name);
    return body;
 };
 static inline auto build_subgraph(const std::shared_ptr<ngraph::Node>& node, const ngraph::OutputVector& inputs, const std::shared_ptr<ngraph::Function>& body)
    -> std::shared_ptr<Subgraph>{
    auto subgraph = std::make_shared<Subgraph>(inputs, body);
    copy_runtime_info(node, subgraph);
    subgraph->set_friendly_name(node->get_friendly_name());
    return subgraph;
 };
 }  // namespace op
 }  // namespace snippets
 }  // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/vectorload.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/vectorload.hpp
@ -0,0 +1,36 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/op/op.hpp>
 #include "load.hpp"
 namespace ngraph {
 namespace snippets {
 namespace op {
 /**
 * @interface VectorLoad
 * @brief Generated by Canonicalization for a vector value load to vector register
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API VectorLoad : public Load {
 public:
    NGRAPH_RTTI_DECLARATION;
    VectorLoad(const Output<Node>& x);
    VectorLoad() = default;
    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
        check_new_args_count(this, new_args);
        return std::make_shared<VectorLoad>(new_args.at(0));
    }
 };
 } // namespace op
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/vectorstore.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/vectorstore.hpp
@ -0,0 +1,36 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/op/op.hpp>
 #include "store.hpp"
 namespace ngraph {
 namespace snippets {
 namespace op {
 /**
 * @interface VectorStore
 * @brief Generated by Canonicalization for a vector value store from vector register
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API VectorStore : public Store {
 public:
    NGRAPH_RTTI_DECLARATION;
    VectorStore(const Output<Node>& x);
    VectorStore() = default;
    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
        check_new_args_count(this, new_args);
        return std::make_shared<VectorStore>(new_args.at(0));
    }
 };
 } // namespace op
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/pass/assign_registers.hpp
+++ b/inference-engine/src/snippets/include/snippets/pass/assign_registers.hpp
@ -0,0 +1,30 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/pass/pass.hpp>
 namespace ngraph {
 namespace snippets {
 namespace pass {
 /**
 * @interface AssignRegisters
 * @brief Assigns internal `vector` register indexes to operations.
 * Changing order of variables or datafrow lead to invalidation of register assignment.
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API AssignRegisters : public ngraph::pass::FunctionPass {
 public:
    AssignRegisters() : FunctionPass() {
        set_property(ngraph::pass::PassProperty::REQUIRE_STATIC_SHAPE, true);
    }
    bool run_on_function(std::shared_ptr<ngraph::Function> function) override;
 };
 } // namespace pass
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/pass/collapse_subgraph.hpp
+++ b/inference-engine/src/snippets/include/snippets/pass/collapse_subgraph.hpp
@ -0,0 +1,74 @@
 // Copyright (C) 2018-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/ngraph.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
 #include <ngraph/pattern/matcher.hpp>
 namespace ngraph {
 namespace snippets {
 namespace pass {
 /**
 * @interface StartSubgraph
 * @brief Matches multiple output loyout-oblivious operations to start a new subgraph
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API StartSubgraph: public ngraph::pass::MatcherPass {
 public:
    NGRAPH_RTTI_DECLARATION;
    explicit StartSubgraph(bool tokenize_by_node = false);
 };
 /**
 * @interface AttachToSubgraph
 * @brief Matches loyout-oblivious operations with subgraph operation as an input to attech this node into it
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API AttachToSubgraph: public ngraph::pass::MatcherPass {
 public:
    NGRAPH_RTTI_DECLARATION;
    explicit AttachToSubgraph(bool tokenize_by_node = false);
 };
 /**
 * @interface TokenizeSnippets
 * @brief Splits function to subgraphs if possible using rules above
 * This pass tokenizes topology graph into subgraphs.
 * Those subgraphs consists of unary or binary layout-oblivious (LO) opetations found in subset 1.
 * Non-layout-oblivious (NLO) operations operations (called also support in this context) are ignored and become a fullstop in tokenization routine
 * 1. if a considered LO operation doesn't have any unput subgraphs
 *    -> a new single-op subgraph is introduced
 * 1. if a considered LO operation is a binary or an unary operation with at least one subgraph as an input
 *    -> 1. all inputs from the conput subgraphs are collected together
 *       1. non-subgraph inputs are wrapped into parameters
 *       1. all input bodies are merged and
 *       1. this new operation is added to a body of input subgraph
 *       1. outputs are collected subgraph (outputs consumed by some other node & subgraph outputs consumed by the node to be merged)
 *       1. finally current node is replaced with the new subgraph. We cannot use replace_node because multiple nodes are replaced so
 *       make the replacement manually by redirecting ports
 * Input subgraph is prefented from visiting twice if more than one output of it consumed by currently considered node
 * New subgraph is introduced, if there is a loop introduced
 * New subgraph is introduced, if number of inputs and outputs exceeds 7 due to scheduling limitation
 * New subgraph is introduced, if multiple outputs of merged nodes are not broadcastable to each other (equality of all outputs is too much on the other hand)
 * Scalar constants are placed as is into subgraph due to optimization purpose
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API TokenizeSnippets: public ngraph::pass::GraphRewrite {
 public:
    NGRAPH_RTTI_DECLARATION;
    TokenizeSnippets(bool tokenize_by_node = false) {
        add_matcher<ngraph::snippets::pass::StartSubgraph>(tokenize_by_node);
        add_matcher<ngraph::snippets::pass::AttachToSubgraph>(tokenize_by_node);
    }
 };
 }  // namespace pass
 }  // namespace snippets
 }  // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/pass/insert_load_store.hpp
+++ b/inference-engine/src/snippets/include/snippets/pass/insert_load_store.hpp
@ -0,0 +1,41 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
 #include <ngraph/pattern/matcher.hpp>
 namespace ngraph {
 namespace snippets {
 namespace pass {
 /**
 * @interface InsertLoad
 * @brief Inserts explicit load instruction after each parameter.
 * The pass is used to convert function to a canonical form for code generation
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API InsertLoad: public ngraph::pass::MatcherPass {
 public:
    InsertLoad();
 };
 /**
 * @interface InsertStore
 * @brief Inserts explicit store instruction before each result.
 * The pass is used to convert function to a canonical form for code generation
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API InsertStore: public ngraph::pass::MatcherPass {
 public:
    InsertStore();
 };
 }  // namespace pass
 }  // namespace snippets
 }  // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/pass/insert_movebroadcast.hpp
+++ b/inference-engine/src/snippets/include/snippets/pass/insert_movebroadcast.hpp
@ -0,0 +1,29 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
 #include <ngraph/pattern/matcher.hpp>
 namespace ngraph {
 namespace snippets {
 namespace pass {
 /**
 * @interface InsertMoveBroadcast
 * @brief Inserts explicit MoveBroadcast instruction if broadcasting by most warying dimension is needed.
 * The pass is used to convert function to a canonical form for code generation
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API InsertMoveBroadcast: public ngraph::pass::MatcherPass {
 public:
    InsertMoveBroadcast();
 };
 } // namespace pass
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/pass/load_movebroadcast_to_broadcastload.hpp
+++ b/inference-engine/src/snippets/include/snippets/pass/load_movebroadcast_to_broadcastload.hpp
@ -0,0 +1,29 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
 #include <ngraph/pattern/matcher.hpp>
 namespace ngraph {
 namespace snippets {
 namespace pass {
 /**
 * @interface LoadMoveBroadcastToBroadcastLoad
 * @brief Fuses consecutive Load and MoveBroadcast into a single load insctruction.
 * The pass is used to convert function to a canonical form for code generation
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API LoadMoveBroadcastToBroadcastLoad: public ngraph::pass::MatcherPass {
 public:
    LoadMoveBroadcastToBroadcastLoad();
 };
 }  // namespace pass
 }  // namespace snippets
 }  // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/pass/vector_to_scalar.hpp
+++ b/inference-engine/src/snippets/include/snippets/pass/vector_to_scalar.hpp
@ -0,0 +1,42 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
 #include <ngraph/pattern/matcher.hpp>
 namespace ngraph {
 namespace snippets {
 namespace pass {
 /**
 * @interface ReplaceLoadsWithScalarLoads
 * @brief Replases vector loads with scalar versions.
 * The pass is used to cange alement type of function in a canonical form vector to scalar.
 * Used for tail generation
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API ReplaceLoadsWithScalarLoads: public ngraph::pass::MatcherPass {
 public:
    ReplaceLoadsWithScalarLoads();
 };
 /**
 * @interface ReplaceStoresWithScalarStores
 * @brief Replases vector stores with scalar versions.
 * The pass is used to cange alement type of function in a canonical form vector to scalar.
 * Used for tail generation
 * @ingroup snippets
 */
 class TRANSFORMATIONS_API ReplaceStoresWithScalarStores: public ngraph::pass::MatcherPass {
 public:
    ReplaceStoresWithScalarStores();
 };
 } // namespace pass
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/register_info.hpp
+++ b/inference-engine/src/snippets/include/snippets/register_info.hpp
@ -0,0 +1,24 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <transformations_visibility.hpp>
 #include <ngraph/variant.hpp>
 #include <ngraph/axis_vector.hpp>
 namespace ngraph {
 template <>
 class TRANSFORMATIONS_API VariantWrapper<std::vector<size_t>> : public VariantImpl<std::vector<size_t>> {
 public:
    static constexpr VariantTypeInfo type_info{"Variant::RegInfo|Variant::RuntimeAttribute::AxisVector", 0};
    const VariantTypeInfo& get_type_info() const override { return type_info; }
    VariantWrapper(const value_type& value)
        : VariantImpl<value_type>(value) {
    }
 };
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/snippets_isa.hpp
+++ b/inference-engine/src/snippets/include/snippets/snippets_isa.hpp
@ -0,0 +1,32 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include "ngraph/ops.hpp"
 #include <ngraph/opsets/opset1.hpp>
 #include "op/blockedload.hpp"
 #include "op/blockedparameter.hpp"
 #include "op/broadcastload.hpp"
 #include "op/broadcastmove.hpp"
 #include "op/load.hpp"
 #include "op/nop.hpp"
 #include "op/scalar.hpp"
 #include "op/scalarload.hpp"
 #include "op/scalarstore.hpp"
 #include "op/staticpower.hpp"
 #include "op/store.hpp"
 #include "op/vectorload.hpp"
 #include "op/vectorstore.hpp"
 namespace ngraph {
 namespace snippets {
 namespace isa {
 #define NGRAPH_OP(a, b) using b::a;
 #include "snippets_isa_tbl.hpp"
 #undef NGRAPH_OP
 } // namespace isa
 } // namespace snippets
 } // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/snippets_isa_tbl.hpp
+++ b/inference-engine/src/snippets/include/snippets/snippets_isa_tbl.hpp
@ -0,0 +1,84 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #ifndef NGRAPH_OP
 #warning "NGRAPH_OP not defined"
 #define NGRAPH_OP(x, y)
 #endif
 // SnippetS dialect
 NGRAPH_OP(Load, ngraph::snippets::op)
 NGRAPH_OP(ScalarLoad, ngraph::snippets::op)
 NGRAPH_OP(VectorLoad, ngraph::snippets::op)
 NGRAPH_OP(BlockedLoad, ngraph::snippets::op)
 NGRAPH_OP(BroadcastLoad, ngraph::snippets::op)
 NGRAPH_OP(Store, ngraph::snippets::op)
 NGRAPH_OP(ScalarStore, ngraph::snippets::op)
 NGRAPH_OP(VectorStore, ngraph::snippets::op)
 NGRAPH_OP(BroadcastMove, ngraph::snippets::op)
 NGRAPH_OP(Scalar, ngraph::snippets::op)
 NGRAPH_OP(Nop, ngraph::snippets::op)
 // Layout-oblivious from opset1
 // opset completeness
 NGRAPH_OP(Constant, ngraph::op)
 NGRAPH_OP(Parameter, ngraph::op::v0)
 NGRAPH_OP(BlockedParameter, ngraph::snippets::op)
 NGRAPH_OP(Result, ngraph::op::v0)
 NGRAPH_OP(Broadcast, ngraph::op::v1)
 // unary
 NGRAPH_OP(Abs, ngraph::op::v0)
 NGRAPH_OP(Acos, ngraph::op::v0)
 NGRAPH_OP(Asin, ngraph::op::v0)
 NGRAPH_OP(Atan, ngraph::op::v0)
 NGRAPH_OP(Ceiling, ngraph::op::v0)
 NGRAPH_OP(Clamp, ngraph::op::v0)
 NGRAPH_OP(Cos, ngraph::op::v0)
 NGRAPH_OP(Cosh, ngraph::op::v0)
 NGRAPH_OP(Elu, ngraph::op::v0)
 NGRAPH_OP(Erf, ngraph::op::v0)
 NGRAPH_OP(Exp, ngraph::op::v0)
 NGRAPH_OP(Floor, ngraph::op::v0)
 NGRAPH_OP(HardSigmoid, ngraph::op::v0)
 NGRAPH_OP(Log, ngraph::op::v0)
 NGRAPH_OP(LogicalNot, ngraph::op::v1)
 NGRAPH_OP(Negative, ngraph::op::v0)
 NGRAPH_OP(Relu, ngraph::op::v0)
 NGRAPH_OP(Selu, ngraph::op::v0)
 NGRAPH_OP(Sign, ngraph::op::v0)
 NGRAPH_OP(Sigmoid, ngraph::op::v0)
 NGRAPH_OP(Sin, ngraph::op::v0)
 NGRAPH_OP(Sinh, ngraph::op::v0)
 NGRAPH_OP(Sqrt, ngraph::op::v0)
 NGRAPH_OP(Tan, ngraph::op::v0)
 NGRAPH_OP(Tanh, ngraph::op::v0)
 // binary
 NGRAPH_OP(Add, ngraph::op::v1)
 NGRAPH_OP(Divide, ngraph::op::v1)
 NGRAPH_OP(Equal, ngraph::op::v1)
 NGRAPH_OP(FloorMod, ngraph::op::v1)
 NGRAPH_OP(Greater, ngraph::op::v1)
 NGRAPH_OP(GreaterEqual, ngraph::op::v1)
 NGRAPH_OP(Less, ngraph::op::v1)
 NGRAPH_OP(LessEqual, ngraph::op::v1)
 NGRAPH_OP(LogicalAnd, ngraph::op::v1)
 NGRAPH_OP(LogicalOr, ngraph::op::v1)
 NGRAPH_OP(LogicalXor, ngraph::op::v1)
 NGRAPH_OP(Maximum, ngraph::op::v1)
 NGRAPH_OP(Minimum, ngraph::op::v1)
 NGRAPH_OP(Mod, ngraph::op::v1)
 NGRAPH_OP(Multiply, ngraph::op::v1)
 NGRAPH_OP(NotEqual, ngraph::op::v1)
 NGRAPH_OP(Power, ngraph::op::v1)
 NGRAPH_OP(PRelu, ngraph::op::v0)
 NGRAPH_OP(SquaredDifference, ngraph::op::v0)
 NGRAPH_OP(Subtract, ngraph::op::v1)
 NGRAPH_OP(Xor, ngraph::op::v0)
--- a/inference-engine/src/snippets/src/generator.cpp
+++ b/inference-engine/src/snippets/src/generator.cpp
@ -0,0 +1,30 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "snippets/generator.hpp"
 #include "snippets/register_info.hpp"
 auto ngraph::snippets::getRegisters(std::shared_ptr<ngraph::Node>& n) -> ngraph::snippets::RegInfo {
    auto rt = n->get_rt_info();
    std::vector<size_t> rout;
    if (auto rinfo = rt["reginfo"]) {
        auto reginfo = ngraph::as_type_ptr<ngraph::VariantWrapper<std::vector<size_t>>>(rinfo)->get();
        for (auto reg : reginfo) {
            rout.push_back(reg);
        }
    }
    std::vector<size_t> rin;
    for (auto input : n->inputs()) {
        auto rt = input.get_source_output().get_node_shared_ptr()->get_rt_info();
        if (auto rinfo = rt["reginfo"]) {
            auto reginfo = ngraph::as_type_ptr<ngraph::VariantWrapper<std::vector<size_t>>>(rinfo)->get();
            for (auto reg : reginfo) {
                rin.push_back(reg);
            }
        }
    }
    return std::make_pair(rin, rout);
 }
--- a/inference-engine/src/snippets/src/itt.hpp
+++ b/inference-engine/src/snippets/src/itt.hpp
@ -0,0 +1,71 @@
 //*****************************************************************************
 // Copyright 2017-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //*****************************************************************************
 /**
 * @brief Defines openvino domains for tracing
 * @file itt.hpp
 */
 #pragma once
 #include <openvino/cc/selective_build.h>
 #include <openvino/itt.hpp>
 namespace ngraph {
 namespace pass {
 namespace itt {
 namespace domains {
    OV_ITT_DOMAIN(IETransform);
 }   // namespace domains
 }   // namespace itt
 }   // namespace pass
 }   // namespace ngraph
 OV_CC_DOMAINS(ngraph_pass);
 OV_CC_DOMAINS(internal_op);
 /*
 * RUN_ON_FUNCTION_SCOPE macro allows to disable the run_on_function pass
 * MATCHER_SCOPE macro allows to disable the MatcherPass if matcher isn't applied
 * INTERNAL_OP_SCOPE macro allows to disable parts of internal nGraph operations if they are not used
 */
 #if defined(SELECTIVE_BUILD_ANALYZER)
 #define RUN_ON_FUNCTION_SCOPE(region) OV_SCOPE(ngraph_pass, OV_PP_CAT(region, _run_on_function))
 #define MATCHER_SCOPE(region)                                                                   \
    const std::string matcher_name(OV_PP_TOSTRING(region))
 #define INTERNAL_OP_SCOPE(region) OV_SCOPE(internal_op, region)
 #elif defined(SELECTIVE_BUILD)
 #define MATCHER_SCOPE_(scope, region)                                                           \
    if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(scope, _, region)) == 0)                              \
    throw ngraph::ngraph_error(std::string(OV_PP_TOSTRING(OV_PP_CAT3(scope, _, region))) +      \
                               " is disabled!")
 #define MATCHER_SCOPE(region)                                                                   \
    const std::string matcher_name(OV_PP_TOSTRING(region));                                           \
    if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(ngraph_pass, _, region)) == 0)                        \
        return
 #define INTERNAL_OP_SCOPE(region) MATCHER_SCOPE_(internal_op, region)
 #define RUN_ON_FUNCTION_SCOPE(region) MATCHER_SCOPE_(ngraph_pass, OV_PP_CAT(region, _run_on_function))
 #else
 #define MATCHER_SCOPE(region)                                                                   \
    const std::string matcher_name(OV_PP_TOSTRING(region))
 #define INTERNAL_OP_SCOPE(region)
 #define RUN_ON_FUNCTION_SCOPE(region)
 #endif
--- a/inference-engine/src/snippets/src/op/blockedload.cpp
+++ b/inference-engine/src/snippets/src/op/blockedload.cpp
@ -0,0 +1,12 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "snippets/op/blockedload.hpp"
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(snippets::op::BlockedLoad, "BlockedLoad", 0);
 snippets::op::BlockedLoad::BlockedLoad(const Output<Node>& x) : Load(x) {
 }
--- a/inference-engine/src/snippets/src/op/blockedparameter.cpp
+++ b/inference-engine/src/snippets/src/op/blockedparameter.cpp
@ -0,0 +1,9 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "snippets/op/blockedparameter.hpp"
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(snippets::op::BlockedParameter, "BlockedParameter", 0);
--- a/inference-engine/src/snippets/src/op/broadcastload.cpp
+++ b/inference-engine/src/snippets/src/op/broadcastload.cpp
@ -0,0 +1,35 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "itt.hpp"
 #include "snippets/op/broadcastload.hpp"
 #include <ngraph/runtime/reference/broadcast.hpp>
 using namespace std;
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(snippets::op::BroadcastLoad, "BroadcastLoad", 0);
 snippets::op::BroadcastLoad::BroadcastLoad(const Output<Node>& x, Shape shape)
 : BroadcastMove(x, shape), broadcast_info(x.get_shape().size(), 0) {
    constructor_validate_and_infer_types();
 }
 bool snippets::op::BroadcastLoad::visit_attributes(AttributeVisitor& visitor) {
    return true;
 }
 std::shared_ptr<Node> snippets::op::BroadcastLoad::clone_with_new_inputs(const OutputVector& new_args) const {
    INTERNAL_OP_SCOPE(BroadcastLoad);
    check_new_args_count(this, new_args);
    auto other = std::make_shared<BroadcastLoad>(new_args.at(0), output_shape);
    other->set_broadcast_info(this->broadcast_info);
    return other;
 }
 void snippets::op::BroadcastLoad::validate_and_infer_types() {
    set_output_type(0, get_input_element_type(0), output_shape);
 }
--- a/inference-engine/src/snippets/src/op/broadcastmove.cpp
+++ b/inference-engine/src/snippets/src/op/broadcastmove.cpp
@ -0,0 +1,68 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "itt.hpp"
 #include "snippets/op/broadcastmove.hpp"
 #include <ngraph/runtime/host_tensor.hpp>
 #include <ngraph/runtime/reference/broadcast.hpp>
 using namespace std;
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(snippets::op::BroadcastMove, "BroadcastMove", 0);
 snippets::op::BroadcastMove::BroadcastMove(const Output<Node>& x, Shape shape) : Op({x}), output_shape(shape) {
    constructor_validate_and_infer_types();
 }
 bool snippets::op::BroadcastMove::visit_attributes(AttributeVisitor& visitor) {
    return true;
 }
 std::shared_ptr<Node> snippets::op::BroadcastMove::clone_with_new_inputs(const OutputVector& new_args) const {
    INTERNAL_OP_SCOPE(BroadcastMove);
    check_new_args_count(this, new_args);
    auto other = std::make_shared<BroadcastMove>(new_args.at(0), this->output_shape);
    return other;
 }
 void snippets::op::BroadcastMove::validate_and_infer_types() {
    set_output_type(0, get_input_element_type(0), this->output_shape);
 }
 bool snippets::op::BroadcastMove::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const {
    INTERNAL_OP_SCOPE(BroadcastMove);
    NGRAPH_CHECK(input_values.size() == this->inputs().size(), "wrong input config");
    NGRAPH_CHECK(output_values.size() == this->outputs().size(), "wrong output config");
    NGRAPH_CHECK(input_values.size() == output_values.size() && input_values.size() == 1, "must be 1->1 operation");
    NGRAPH_CHECK(this->output(0).get_shape() == output_values[0]->get_shape(), "output vector must have the same shape as output port");
    NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape");
    auto ishape = input_values[0]->get_shape();
    auto oshape = output_values[0]->get_shape();
    NGRAPH_CHECK(ishape.size() == oshape.size(), "input and output should have the same rank");
    AxisSet broadcast_axes;
    for (size_t k = 0; k < ishape.size(); k++) {
        if (!((ishape[k] == oshape[k])
           || (ishape[k] != oshape[k] && ((ishape[k] == 1) != (oshape[k] == 1) ) ))) {
            throw ngraph_error("FakeBroadcast::evaluate incompatible shapes");
        }
        if (ishape[k] != oshape[k]) {
            broadcast_axes.insert(k);
        }
    }
    runtime::reference::broadcast(input_values[0]->get_data_ptr<char>(),
                                  output_values[0]->get_data_ptr<char>(),
                                  input_values[0]->get_shape(),
                                  output_values[0]->get_shape(),
                                  broadcast_axes,
                                  sizeof(float));
    return true;
 }
--- a/inference-engine/src/snippets/src/op/load.cpp
+++ b/inference-engine/src/snippets/src/op/load.cpp
@ -0,0 +1,48 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "itt.hpp"
 #include "snippets/op/load.hpp"
 #include <ngraph/runtime/host_tensor.hpp>
 using namespace std;
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(snippets::op::Load, "Load", 0);
 snippets::op::Load::Load(const Output<Node>& x) : Op({x}) {
    constructor_validate_and_infer_types();
 }
 bool snippets::op::Load::visit_attributes(AttributeVisitor& visitor) {
    return true;
 }
 std::shared_ptr<Node> snippets::op::Load::clone_with_new_inputs(const OutputVector& new_args) const {
    INTERNAL_OP_SCOPE(Load);
    check_new_args_count(this, new_args);
    return std::make_shared<Load>(new_args.at(0));
 }
 void snippets::op::Load::validate_and_infer_types() {
    set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
 }
 bool snippets::op::Load::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const {
    INTERNAL_OP_SCOPE(Load);
    NGRAPH_CHECK(input_values.size() == this->inputs().size(), "wrong input config");
    NGRAPH_CHECK(output_values.size() == this->outputs().size(), "wrong output config");
    NGRAPH_CHECK(input_values.size() == output_values.size() && input_values.size() == 1, "must be 1->1 operation");
    NGRAPH_CHECK(this->output(0).get_shape() == output_values[0]->get_shape(), "output vector must have the same shape as output port");
    NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape");
    NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape");
    std::copy(input_values[0]->get_data_ptr<uint8_t>(),
        input_values[0]->get_data_ptr<uint8_t>() + shape_size(get_output_shape(0))*output_values[0]->get_element_type().size(),
        output_values[0]->get_data_ptr<uint8_t>());
    return true;
 }
--- a/inference-engine/src/snippets/src/op/nop.cpp
+++ b/inference-engine/src/snippets/src/op/nop.cpp
@ -0,0 +1,18 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "snippets/op/nop.hpp"
 using namespace std;
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(snippets::op::Nop, "Nop", 0);
 snippets::op::Nop::Nop(const OutputVector& arguments, const OutputVector& results) : Op([arguments, results]() -> OutputVector {
    OutputVector x;
    x.insert(x.end(), arguments.begin(), arguments.end());
    x.insert(x.end(), results.begin(), results.end());
    return x;
    }()) {
 }
--- a/inference-engine/src/snippets/src/op/scalar.cpp
+++ b/inference-engine/src/snippets/src/op/scalar.cpp
@ -0,0 +1,9 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "snippets/op/scalar.hpp"
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(snippets::op::Scalar, "Scalar", 0);
--- a/inference-engine/src/snippets/src/op/scalarload.cpp
+++ b/inference-engine/src/snippets/src/op/scalarload.cpp
@ -0,0 +1,12 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "snippets/op/scalarload.hpp"
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(snippets::op::ScalarLoad, "ScalarLoad", 0);
 snippets::op::ScalarLoad::ScalarLoad(const Output<Node>& x) : Load(x) {
 }
--- a/inference-engine/src/snippets/src/op/scalarstore.cpp
+++ b/inference-engine/src/snippets/src/op/scalarstore.cpp
@ -0,0 +1,12 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "snippets/op/scalarstore.hpp"
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(snippets::op::ScalarStore, "ScalarStore", 0);
 snippets::op::ScalarStore::ScalarStore(const Output<Node>& x) : Store(x) {
 }
--- a/inference-engine/src/snippets/src/op/staticpower.cpp
+++ b/inference-engine/src/snippets/src/op/staticpower.cpp
@ -0,0 +1,9 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "snippets/op/staticpower.hpp"
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(snippets::op::PowerStatic, "PowerStatic", 0);
--- a/inference-engine/src/snippets/src/op/store.cpp
+++ b/inference-engine/src/snippets/src/op/store.cpp
@ -0,0 +1,48 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "itt.hpp"
 #include "snippets/op/scalarstore.hpp"
 #include <ngraph/runtime/host_tensor.hpp>
 using namespace std;
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(snippets::op::Store, "Store", 0);
 snippets::op::Store::Store(const Output<Node>& x) : Op({x}) {
    constructor_validate_and_infer_types();
 }
 bool snippets::op::Store::visit_attributes(AttributeVisitor& visitor) {
    return true;
 }
 std::shared_ptr<Node> snippets::op::Store::clone_with_new_inputs(const OutputVector& new_args) const {
    INTERNAL_OP_SCOPE(Store);
    check_new_args_count(this, new_args);
    return std::make_shared<Store>(new_args.at(0));
 }
 void snippets::op::Store::validate_and_infer_types() {
    set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
 }
 bool snippets::op::Store::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const {
    INTERNAL_OP_SCOPE(Store);
    NGRAPH_CHECK(input_values.size() == this->inputs().size(), "wrong input config");
    NGRAPH_CHECK(output_values.size() == this->outputs().size(), "wrong output config");
    NGRAPH_CHECK(input_values.size() == output_values.size() && input_values.size() == 1, "must be 1->1 operation");
    NGRAPH_CHECK(this->output(0).get_shape() == output_values[0]->get_shape(), "output vector must have the same shape as output port");
    NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape");
    NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape");
    std::copy(input_values[0]->get_data_ptr<uint8_t>(),
        input_values[0]->get_data_ptr<uint8_t>() + shape_size(get_output_shape(0))*output_values[0]->get_element_type().size(),
        output_values[0]->get_data_ptr<uint8_t>());
    return true;
 }
--- a/inference-engine/src/snippets/src/op/subgraph.cpp
+++ b/inference-engine/src/snippets/src/op/subgraph.cpp
@ -0,0 +1,344 @@
 // Copyright (C) 2018-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "itt.hpp"
 #include "remarks.hpp"
 #include "snippets/op/subgraph.hpp"
 #include "snippets/pass/insert_load_store.hpp"
 #include "snippets/pass/insert_movebroadcast.hpp"
 #include "snippets/pass/load_movebroadcast_to_broadcastload.hpp"
 #include "snippets/pass/assign_registers.hpp"
 #include <ngraph/pass/manager.hpp>
 #include <algorithm>
 #include <memory>
 #include <array>
 using namespace std;
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(snippets::op::Subgraph, "Subgraph", 0);
 void snippets::op::Subgraph::set_generator(std::shared_ptr<ngraph::snippets::Generator> generator) {
    m_generator = generator;
 }
 snippets::op::Subgraph::Subgraph(const OutputVector& args, std::shared_ptr<Function> body)
    : Op(args), m_body(body), m_generator(nullptr) {
    constructor_validate_and_infer_types();
 }
 snippets::op::Subgraph::Subgraph(const NodeVector& args, std::shared_ptr<Function> body)
    : Subgraph(as_output_vector(args), body) {}
 std::shared_ptr<Node> snippets::op::Subgraph::clone_with_new_inputs(const OutputVector& inputs) const {
    INTERNAL_OP_SCOPE(Subgraph);
    return make_shared<Subgraph>(inputs, ngraph::clone_function(*m_body.get()));
 }
 void snippets::op::Subgraph::validate_and_infer_types() {
    INTERNAL_OP_SCOPE(Subgraph);
    ngraph::ParameterVector old_parameters;
    for (auto op : m_body->get_parameters()) {
        old_parameters.push_back(op);
    }
    for (size_t i = 0; i < get_input_size(); ++i) {
        m_body->replace_parameter(i, std::make_shared<opset1::Parameter>(get_input_element_type(i), get_input_partial_shape(i)));
    }
    m_body->validate_nodes_and_infer_types();
    for (size_t i = 0; i < m_body->get_parameters().size(); i++) {
        m_body->get_parameters()[i]->set_friendly_name(old_parameters[i]->get_friendly_name());
    }
    set_output_size(m_body->get_output_size());
    for (size_t i = 0; i < get_output_size(); ++i) {
        set_output_type(i, m_body->get_output_element_type(i), m_body->get_output_partial_shape(i));
    }
 }
 bool snippets::op::Subgraph::visit_attributes(AttributeVisitor& visitor) {
    return true;
 }
 auto snippets::op::Subgraph::wrap_node_as_subgraph(const std::shared_ptr<ngraph::Node>& node) -> std::shared_ptr<op::Subgraph> {
    INTERNAL_OP_SCOPE(Subgraph);
    ngraph::ParameterVector body_parameters;
    ngraph::OutputVector body_inputs;
    ngraph::OutputVector subgraph_inputs;
    for (auto input : node->inputs()) {
        auto source_output = input.get_source_output();
        if (is_scalar_constant(source_output.get_node_shared_ptr())) {
            body_inputs.push_back(source_output);
        } else {
            auto parameter = std::make_shared<ngraph::opset1::Parameter>(input.get_element_type(), input.get_partial_shape());
            body_parameters.push_back(parameter);
            body_parameters.back()->set_friendly_name(source_output.get_node()->get_friendly_name());
            body_inputs.push_back(parameter->output(0));
            subgraph_inputs.push_back(source_output);
        }
    }
    auto body_node = node->copy_with_new_inputs(body_inputs);
    body_node->set_friendly_name(node->get_friendly_name());
    if (node->get_output_size() != body_node->get_output_size()) {
        throw ngraph::ngraph_error("original node outputs size and extracted subgraph node outputs size doesn't much");
    }
    ngraph::ResultVector body_results;
    for (auto output : node->outputs()) {
        body_results.push_back(std::make_shared<ngraph::opset1::Result>(body_node->output(output.get_index())));
    }
    auto body = create_body(node->get_friendly_name(), body_results, body_parameters);
    auto subgraph = build_subgraph(node, subgraph_inputs, body);
    for (size_t i = 0; i < body->get_parameters().size(); i++) {
        body->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name());
    }
    if (subgraph->get_output_size() != body->get_results().size()) {
        throw ngraph::ngraph_error("newly create subgraph doesn't much number of original node results");
    }
    return subgraph;
 }
 std::shared_ptr<snippets::op::Subgraph> snippets::op::Subgraph::make_canonical_from_this() {
    INTERNAL_OP_SCOPE(Subgraph);
    ngraph::OutputVector subgraph_node_inputs;
    for (auto input : this->input_values()) {
        subgraph_node_inputs.push_back(input);
    }
    auto new_body = ngraph::clone_function(*this->get_body().get());
    auto snippet = std::make_shared<op::Subgraph>(subgraph_node_inputs, new_body);
    ngraph::copy_runtime_info(this->shared_from_this(), snippet);
    snippet->set_friendly_name(this->get_friendly_name());
    snippet->set_generator(this->m_generator);
    return snippet;
 }
 // We also can think of canonization as of pass to copy original subgraph and transforming it to canonical form suitable for code generation
 // pass actual parameters and results shapes to generate for as well as channel mapping,
 // we need to distinguish between 5d tensors that represents <N, C, H, W, c> and <N, C, D, H, W> somehow like locked dimensions
 // ngraph::AxisVector to code
 void snippets::op::Subgraph::canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes) {
    INTERNAL_OP_SCOPE(Subgraph);
    NODE_VALIDATION_CHECK(this, input_shapes.size() == m_body->get_parameters().size(),
        "Number of parameters for snippet doesn't much passed to generate method: ", input_shapes.size(), " vs ", m_body->get_parameters().size(), ".");
    NODE_VALIDATION_CHECK(this, output_shapes.size() == m_body->get_results().size(),
        "number of results for snippet doesn't much passed to generate method: ", output_shapes.size(), " vs ", m_body->get_results().size(), ".");
    // replace only constants which are actually should be represented as scalars during code generation and probably move this step a bit later
    for (auto op : m_body->get_ordered_ops()) {
        if (auto constant = ngraph::as_type_ptr<opset1::Constant>(op)) {
            auto scalar = std::make_shared<snippets::op::Scalar>(*constant);
            scalar->set_friendly_name(constant->get_friendly_name());
            ngraph::copy_runtime_info(constant, scalar);
            ngraph::replace_node(constant, scalar);
        }
    }
    // repalace power with power static
    for (auto op : m_body->get_ordered_ops()) {
        if (auto power = ngraph::as_type_ptr<opset1::Power>(op)) {
            if (ngraph::as_type_ptr<snippets::op::Scalar>(power->input(1).get_node()->shared_from_this())) {
                auto power_static = std::make_shared<snippets::op::PowerStatic>(
                    power->input(0).get_source_output(), power->input(1).get_source_output(), power->get_autob());
                power_static->set_friendly_name(power->get_friendly_name());
                ngraph::copy_runtime_info(power, power_static);
                ngraph::replace_node(power, power_static);
            }
        }
    }
    // it should be in subgraph node to be aligned with internal and external parameter list, but adding this for testing
    // TODO: store blocking into to Parameter's rt_info for future propagation
    for (size_t i = 0; i < m_body->get_parameters().size(); i++) {
        auto param = m_body->get_parameters()[i];
        if (param->get_shape().size() < 4) {
            std::vector<size_t> shape(4, 1);
            std::copy(param->get_shape().begin(), param->get_shape().end(), &shape.at(4 - (param->get_shape().size() == 0 ? 1 : param->get_shape().size())) );
            m_body->replace_parameter(i, std::make_shared<opset1::Parameter>(param->get_element_type(), ngraph::Shape(shape)));
        } else if (param->get_shape().size() >= 4) {
            if (param->get_element_type() != std::get<2>(input_shapes[i])) {
                throw ngraph::ngraph_error("changes in presision. Is it legal??");
            }
            if (param->get_shape().size() != std::get<0>(input_shapes[i]).size()) {
                m_body->replace_parameter(i, std::make_shared<opset1::Parameter>(std::get<2>(input_shapes[i]), std::get<0>(input_shapes[i])));
            }
        }
    }
    m_body->validate_nodes_and_infer_types();
    for (size_t i = 0; i < m_body->get_results().size(); i++) {
        auto result = m_body->get_results()[i];
        PartialShape partial(result->get_shape());
        bool isCompatible = ngraph::PartialShape::broadcast_merge_into(partial, std::get<0>(output_shapes[i]), ::ngraph::op::AutoBroadcastSpec::NUMPY);
        // equality check won't pass since we reshape without changes on external snippet edges
        NODE_VALIDATION_CHECK(this, isCompatible, "Inferend and passed results shapes are difference for snippet : ",
                                                  result->get_shape(), " vs ", std::get<0>(output_shapes[i]), ".");
    }
 }
 void snippets::op::Subgraph::convert_to_snippet_dialect() {
    INTERNAL_OP_SCOPE(Subgraph);
    ngraph::pass::Manager manager;
    manager.register_pass<snippets::pass::InsertLoad>();
    manager.register_pass<snippets::pass::InsertStore>();
    manager.register_pass<snippets::pass::InsertMoveBroadcast>();
    manager.register_pass<snippets::pass::LoadMoveBroadcastToBroadcastLoad>();
    manager.run_passes(m_body);
 }
 snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes) {
    INTERNAL_OP_SCOPE(Subgraph);
    NGRAPH_CHECK(m_generator != nullptr, "generate is called while generator is not set");
    canonicalize(output_shapes, input_shapes);
    convert_to_snippet_dialect();
    // generation flow
    snippets::pass::AssignRegisters().run_on_function(m_body);
    // actual code emission
    ngraph::snippets::code ptr = m_generator->generate(m_body);
    // chack that body doesnt have constants for scheduling
    std::vector<std::shared_ptr<opset1::Constant>> constants;
    for (auto op : m_body->get_ordered_ops()) {
        if (auto constant = as_type_ptr<opset1::Constant>(op)) {
            if (ngraph::shape_size(constant->get_shape()) != 1 && constant->get_shape() != Shape()) {
                constants.push_back(constant);
            }
        }
    }
    NGRAPH_CHECK(!constants.size(), "External constants detected. Snippet is illigal for sheduling");
    // check resulting shapes are broadcastable to each other so can be scheduled
    Shape work_size = m_body->output(0).get_shape();
    for (size_t k = 0; k < m_body->get_output_size(); k++) {
        auto shape = m_body->output(k).get_shape();
        if (work_size.size() != shape.size()) {
            throw ngraph_error("rank for all outputs of a snippet should match");
        }
        for (size_t i = 0; i < work_size.size(); i++) {
            if (work_size[i] != shape[i]) {
                if (work_size[i] == 1) {
                    work_size[i] = shape[i];
                } else {
                    throw ngraph_error("incompatible shapes for output graphs");
                }
            }
        }
    }
    return {work_size, false /*canBeLinearized*/, ptr};
 }
 bool snippets::op::Subgraph::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
    INTERNAL_OP_SCOPE(Subgraph);
    return m_body->evaluate(outputs, inputs);
 }
 void snippets::op::Subgraph::print() const {
    INTERNAL_OP_SCOPE(Subgraph);
    remark(13) << "subgraph " << this->get_friendly_name() << " "
        << this->get_type_name()
        << " which contains " << this->get_body()->get_ops().size() << " nodes" << std::endl;
    int qqq = 0;
    for (auto op : this->get_body()->get_ordered_ops()) {
        remark(13) << "op " << qqq++ << " " << op->get_friendly_name() << " (" << op->get_type_name() << ") " << op << std::endl;
    }
    for (auto& in : this->inputs()) {
        remark(13) << "  -> " << in.get_source_output().get_node_shared_ptr()->get_friendly_name() << " "
            << in.get_source_output().get_node_shared_ptr() << std::endl;
    }
    for (auto& out : this->outputs()) {
        for (auto& user : out.get_target_inputs()) {
            remark(13) << " <- " << user.get_node()->get_friendly_name() << " "  << user.get_node() << std::endl;
        }
        remark(13) << std::endl;
    }
 }
 void snippets::op::Subgraph::print_statistics(bool verbose) {
    INTERNAL_OP_SCOPE(Subgraph);
    auto getNodeInventory = [](std::shared_ptr<ngraph::Node> n) -> size_t {
        size_t total = 0;
        for (auto input : n->inputs()) {
            total += input.get_tensor().size();
        }
        for (auto output : n->outputs()) {
            total += output.get_tensor().size();
        }
        if (auto subgraph = ngraph::as_type_ptr<op::Subgraph>(n)) {
            for (auto op : subgraph->get_body()->get_ordered_ops()) {
                if (ngraph::as_type_ptr<ngraph::opset1::Constant>(op)) {
                    total += op->output(0).get_tensor().size();
                }
            }
        }
        return total;
    };
    auto getFunctionInventory = [getNodeInventory](std::shared_ptr<ngraph::Function> f) -> size_t {
        size_t total = 0;
        for (auto op : f->get_ordered_ops()) {
            // Results and parameters are artificially introduced,
            // while Constants are already considered if they are inputs of other operation
            // this should lead to 1:1 inventory for single node operations
            if (!ngraph::as_type_ptr<ngraph::opset1::Parameter>(op)
             && !ngraph::as_type_ptr<ngraph::opset1::Result>(op)
             && !ngraph::as_type_ptr<ngraph::opset1::Constant>(op)) {
                total += getNodeInventory(op);
            }
        }
        return total;
    };
    auto countConstants = [](std::shared_ptr<ngraph::Function> f) -> size_t {
        size_t count = 0;
        for (auto op : f->get_ordered_ops()) {
            count += !!ngraph::as_type_ptr<ngraph::opset1::Constant>(op) ? 1 : 0;
        }
        return count;
    };
    auto body = this->get_body();
    std::cout << this->get_friendly_name()
                << ";" << this
                << ";" << body->get_ops().size()
                << ";" << body->get_parameters().size()
                << ";" << body->get_results().size()
                << ";" << countConstants(body)
                << ";" << getFunctionInventory(body)
                << ";" << getNodeInventory(this->shared_from_this()) << std::endl;
    if (verbose) {
        this->print();
    }
 }
--- a/inference-engine/src/snippets/src/op/vectorload.cpp
+++ b/inference-engine/src/snippets/src/op/vectorload.cpp
@ -0,0 +1,12 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "snippets/op/vectorload.hpp"
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(snippets::op::VectorLoad, "VectorLoad", 0);
 snippets::op::VectorLoad::VectorLoad(const Output<Node>& x) : Load(x) {
 }
--- a/inference-engine/src/snippets/src/op/vectorstore.cpp
+++ b/inference-engine/src/snippets/src/op/vectorstore.cpp
@ -0,0 +1,12 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "snippets/op/vectorstore.hpp"
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(snippets::op::VectorStore, "VectorStore", 0);
 snippets::op::VectorStore::VectorStore(const Output<Node>& x) : Store(x) {
 }
--- a/inference-engine/src/snippets/src/pass/assign_registers.cpp
+++ b/inference-engine/src/snippets/src/pass/assign_registers.cpp
@ -0,0 +1,183 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 // #include <openvino/cc/selective_build.h>
 #include "itt.hpp"
 #include "remarks.hpp"
 #include "snippets/pass/assign_registers.hpp"
 #include "snippets/register_info.hpp"
 #include "snippets/snippets_isa.hpp"
 #include <ngraph/opsets/opset1.hpp>
 #include <iterator>
 bool ngraph::snippets::pass::AssignRegisters::run_on_function(std::shared_ptr<Function> f) {
    RUN_ON_FUNCTION_SCOPE(AssignRegisters);
    int reg64_tmp_start { 8 }; // R8, R9, R10, R11, R12, R13, R14, R15 inputs+outputs+1
    using Reg = size_t;
    auto ops = f->get_ordered_ops();
    decltype(ops) stmts;
    std::copy_if(ops.begin(), ops.end(), std::back_inserter(stmts), [](decltype(ops[0]) op) {
        return !(std::dynamic_pointer_cast<opset1::Parameter>(op) || std::dynamic_pointer_cast<opset1::Result>(op));
        });
    size_t rdx = 0;
    std::map<std::shared_ptr<descriptor::Tensor>, Reg> regs;
    for (auto op : stmts) {
        for (auto output : op->outputs()) {
            regs[output.get_tensor_ptr()] = rdx++;
        }
    }
    std::vector<std::set<Reg>> used;
    std::vector<std::set<Reg>> def;
    for (auto op : stmts) {
        std::set<Reg> u;
        for (auto input : op->inputs()) {
            if (regs.count(input.get_tensor_ptr())) {
                u.insert(regs[input.get_tensor_ptr()]);
            }
        }
        used.push_back(u);
        std::set<Reg> d;
        if (!std::dynamic_pointer_cast<snippets::op::Store>(op)) {
            for (auto output : op->outputs()) {
                d.insert(regs[output.get_tensor_ptr()]);
            }
        }
        def.push_back(d);
    }
    // define life intervals
    std::vector<std::set<Reg>> lifeIn(stmts.size(), std::set<Reg>());
    std::vector<std::set<Reg>> lifeOut(stmts.size(), std::set<Reg>());
    for (size_t i = 0; i < stmts.size(); i++) {
        for (size_t n = 0; n < stmts.size(); n++) {
            std::set_difference(lifeOut[n].begin(), lifeOut[n].end(), def[n].begin(), def[n].end(), std::inserter(lifeIn[n], lifeIn[n].begin()));
            lifeIn[n].insert(used[n].begin(), used[n].end());
        }
        for (size_t n = 0; n < stmts.size(); n++) {
            auto node = stmts[n];
            if (!std::dynamic_pointer_cast<snippets::op::Store>(node)) {
                for (auto out : node->outputs()) {
                    for (auto port : out.get_target_inputs()) {
                        auto pos = std::find(stmts.begin(), stmts.end(), port.get_node()->shared_from_this());
                        if (pos != stmts.end()) {
                            auto k = pos-stmts.begin();
                            lifeOut[n].insert(lifeIn[k].begin(), lifeIn[k].end());
                        }
                    }
                }
            }
        }
    }
    struct by_starting {
        auto operator()(const std::pair<int, int>& lhs, const std::pair<int, int>& rhs) const -> bool {
            return lhs.first < rhs.first|| (lhs.first == rhs.first && lhs.second < rhs.second);
        }
    };
    struct by_ending {
        auto operator()(const std::pair<int, int>& lhs, const std::pair<int, int>& rhs) const -> bool {
            return lhs.second < rhs.second || (lhs.second == rhs.second && lhs.first < rhs.first);
        }
    };
    std::set<std::pair<int, int>, by_starting> live_intervals;
    std::reverse(lifeIn.begin(), lifeIn.end());
    auto find_last_use = [lifeIn](int i) -> int {
        int ln = lifeIn.size()-1;
        for (auto& x : lifeIn) {
            if (x.find(i) != x.end()) {
                return ln;
            }
            ln--;
        }
        return i;
    };
    for (size_t i = 0; i < stmts.size(); i++) {
        live_intervals.insert(std::make_pair(i, find_last_use(i)));
    }
    // http://web.cs.ucla.edu/~palsberg/course/cs132/linearscan.pdf
    std::multiset<std::pair<int, int>, by_ending> active;
    std::map<Reg, Reg> register_map;
    std::stack<Reg> bank;
    for (int i = 0; i < 16; i++) bank.push(16-1-i);
    for (auto interval : live_intervals) {
        // check expired
        while (!active.empty()) {
            auto x = *active.begin();
            if (x.second >= interval.first) {
                break;
            }
            active.erase(x);
            bank.push(register_map[x.first]);
        }
        // allocate
        if (active.size() == 16) {
            throw ngraph_error("caanot allocate registers for a snippet ");
        } else {
            register_map[interval.first] = bank.top();
            bank.pop();
            active.insert(interval);
        }
    }
    std::map<std::shared_ptr<descriptor::Tensor>, Reg> physical_regs;
    for (auto reg : regs) {
        physical_regs[reg.first] = register_map[reg.second];
    }
    size_t constantID = 0;
    for (auto n : f->get_ordered_ops()) {
        auto& rt = n->get_rt_info();
        // nothing to do for function signature
        if (std::dynamic_pointer_cast<opset1::Parameter>(n) || std::dynamic_pointer_cast<opset1::Result>(n)) {
            continue;
        }
        // store only effective address
        if (auto result = std::dynamic_pointer_cast<snippets::op::Store>(n)) {
            auto ea = reg64_tmp_start+static_cast<int64_t>(f->get_result_index(result) + f->get_parameters().size());
            rt["effectiveAddress"] = std::make_shared<VariantWrapper<int64_t>>(VariantWrapper<int64_t>(ea));
            continue;
        }
        // store effective address and procced with vector registers
        if (as_type_ptr<ngraph::snippets::op::Load>(n) || as_type_ptr<ngraph::snippets::op::BroadcastLoad>(n)) {
            auto source = n->get_input_source_output(0).get_node_shared_ptr();
            if (auto param = as_type_ptr<opset1::Parameter>(source)) {
                auto ea = reg64_tmp_start+static_cast<int64_t>(f->get_parameter_index(param));
                rt["effectiveAddress"] = std::make_shared<VariantWrapper<int64_t>>(VariantWrapper<int64_t>(ea));
            } else if (auto constant = as_type_ptr<opset1::Constant>(source)) {
                auto ea = reg64_tmp_start+static_cast<int64_t>(f->get_parameters().size() + f->get_results().size() + 1 + constantID);
                rt["effectiveAddress"] = std::make_shared<VariantWrapper<int64_t>>(VariantWrapper<int64_t>(ea));
                constantID++;
            } else {
                throw ngraph_error("load/broadcast should follow only Parameter or non-Scalar constant");
            }
        }
        std::vector<size_t> regs; regs.reserve(n->outputs().size());
        for (auto output : n->outputs()) {
            auto allocated = physical_regs[output.get_tensor_ptr()];
            regs.push_back(allocated);
        }
        rt["reginfo"] = std::make_shared<VariantWrapper<std::vector<size_t>>>(VariantWrapper<std::vector<size_t>>(regs));
    }
    return false;
 }
--- a/inference-engine/src/snippets/src/pass/collapse_subgraph.cpp
+++ b/inference-engine/src/snippets/src/pass/collapse_subgraph.cpp
@ -0,0 +1,516 @@
 // Copyright (C) 2018-2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "remarks.hpp"
 #include "itt.hpp"
 #include "snippets/pass/collapse_subgraph.hpp"
 #include "snippets/op/subgraph.hpp"
 #include <ngraph/opsets/opset1.hpp>
 #include <ngraph/rt_info.hpp>
 #include <ngraph/op/loop.hpp>
 #include <memory>
 #include <vector>
 #include <cassert>
 #include <queue>
 #include <string>
 #include <numeric>
 NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::StartSubgraph, "CollapseSubgraph", 0);
 NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::AttachToSubgraph, "CollapseSubgraph", 0);
 NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::TokenizeSnippets, "CollapseSubgraph", 0);
 using namespace ngraph;
 using namespace snippets;
 namespace {
 auto outputs_are_not_broadcastable(const std::shared_ptr<ngraph::Node>& node) -> bool {
    auto outputs = node->outputs();
    auto find_smallest_output_shape = [](const std::vector<ngraph::Output<ngraph::Node>>& outputs) -> ngraph::Shape {
        return std::accumulate(std::begin(outputs), std::end(outputs), ngraph::Shape(outputs.begin()->get_shape()),
            [](ngraph::Shape other_shape, ngraph::Output<ngraph::Node> output){
                return ngraph::shape_size(output.get_shape()) < ngraph::shape_size(other_shape) ? output.get_shape() : other_shape;
            });
    };
    auto ref_shape = find_smallest_output_shape(outputs);
    auto check_shapes_broadcastable = [ref_shape](const ngraph::Output<ngraph::Node>& output) -> bool {
        auto other_shape = output.get_shape();
        if (other_shape.size() != ref_shape.size()) {
            return false;
        }
        return std::inner_product(std::begin(other_shape), std::end(other_shape), std::begin(ref_shape), true,
                            std::logical_and<bool>(), [](ngraph::Shape::value_type lsh, ngraph::Shape::value_type rsh){
                                return rsh == 1 || lsh == rsh;
                            });
    };
    return std::find_if_not(std::begin(outputs), std::end(outputs), check_shapes_broadcastable) != std::end(outputs);
 };
 auto has_cycles_of_dependencies(const std::vector<std::set<ngraph::Input<ngraph::Node>>>& results,
                                const std::vector<ngraph::Input<ngraph::Node>>& inputs) -> bool {
    auto BFS_from_to = [](ngraph::Node* from, ngraph::Node* to) -> bool {
        std::unordered_set<ngraph::Node*> visited;
        std::queue<ngraph::Node*> stack;
        stack.push(from);
        while (stack.size() > 0) {
            ngraph::Node* curr = stack.front();
            visited.insert(curr);
            if (ngraph::op::is_output(curr)) {
                return false;
            }
            stack.pop();
            if (curr != to) {
                for (const auto& next : curr->get_users()) {
                    if (visited.count(next.get()) == 0) {
                        stack.push(next.get());
                    }
                }
            } else {
                return true;
            }
        }
        return false;
    };
    for (auto& result : results) {
        for (auto& user : result) {
            for (auto& input : inputs) {
                auto source = input.get_source_output().get_node();
                auto containsLoop = BFS_from_to(user.get_node(), source);
                remark(1) <<  "checking path from "
                        << user.get_node()->get_friendly_name()
                        << " to " << source->get_friendly_name()
                        << " resulted in " << containsLoop << std::endl;
                if (containsLoop) {
                    return true;
                }
            }
        }
    }
    return false;
 }
 auto has_subgraph_as_input(std::shared_ptr<Node> node) -> bool {
    auto inputs = node->inputs();
    for (auto input : inputs) {
        auto parent = input.get_source_output().get_node_shared_ptr();
        if (!!as_type_ptr<snippets::op::Subgraph>(parent)) {
            return true;
        }
    }
    return false;
 };
 auto is_lo(std::shared_ptr<Node> n) -> bool {
    auto is_lob = [](std::shared_ptr<Node> n) -> bool {
        using ngraph::as_type_ptr;
        return !!as_type_ptr<opset1::Add>(n)
            || !!as_type_ptr<opset1::Divide>(n)
            || !!as_type_ptr<opset1::Equal>(n)
            || !!as_type_ptr<opset1::FloorMod>(n)
            || !!as_type_ptr<opset1::Greater>(n)
            || !!as_type_ptr<opset1::GreaterEqual>(n)
            || !!as_type_ptr<opset1::Less>(n)
            || !!as_type_ptr<opset1::LessEqual>(n)
            || !!as_type_ptr<opset1::LogicalAnd>(n)
            || !!as_type_ptr<opset1::LogicalOr>(n)
            || !!as_type_ptr<opset1::LogicalXor>(n)
            || !!as_type_ptr<opset1::Maximum>(n)
            || !!as_type_ptr<opset1::Minimum>(n)
            || !!as_type_ptr<opset1::Mod>(n)
            || !!as_type_ptr<opset1::Multiply>(n)
            || !!as_type_ptr<opset1::NotEqual>(n)
            || !!as_type_ptr<opset1::PRelu>(n)
            || !!as_type_ptr<opset1::Power>(n)
            || !!as_type_ptr<opset1::SquaredDifference>(n)
            || !!as_type_ptr<opset1::Subtract>(n)
            || !!as_type_ptr<opset1::Xor>(n);
    };
    auto is_lou = [](std::shared_ptr<Node> n) -> bool {
        using ngraph::as_type_ptr;
        return !!as_type_ptr<opset1::Abs>(n)
            // || !!as_type_ptr<opset1::Acos>(n)
            // || !!as_type_ptr<opset1::Asin>(n)
            // || !!as_type_ptr<opset1::Atan>(n)
            // || !!as_type_ptr<opset1::Ceiling>(n) ?
            || !!as_type_ptr<opset1::Clamp>(n)
            // || !!as_type_ptr<opset1::Cos>(n)
            // || !!as_type_ptr<opset1::Cosh>(n)
            || !!as_type_ptr<opset1::Elu>(n)
            || !!as_type_ptr<opset1::Erf>(n)
            || !!as_type_ptr<opset1::Exp>(n)
            // || !!as_type_ptr<opset1::Floor>(n) ?
            // || !!as_type_ptr<opset1::Log>(n) ?
            || !!as_type_ptr<opset1::LogicalNot>(n)
            || !!as_type_ptr<opset1::Negative>(n)
            || !!as_type_ptr<opset1::Relu>(n)
            // || !!as_type_ptr<opset1::Sign>(n) ?
            || !!as_type_ptr<opset1::Sigmoid>(n)
            // || !!as_type_ptr<opset1::Sin>(n)
            // || !!as_type_ptr<opset1::Sinh>(n)
            || !!as_type_ptr<opset1::Sqrt>(n)
            // || !!as_type_ptr<opset1::Tan>(n)
            || !!as_type_ptr<opset1::Tanh>(n);
    };
    auto is_lot = [](std::shared_ptr<Node> n) -> bool {
        using ngraph::as_type_ptr;
        return false;
        // return !!as_type_ptr<opset1::HardSigmoid>(n) // ternary with 2 constants
            // || !!as_type_ptr<opset1::Selu>(n); // ternary with 2 constants / or DW
    };
    auto is_fq = [](std::shared_ptr<Node> n) -> bool {
        using ngraph::as_type_ptr;
        return false;//!!as_type_ptr<opset1::FakeQuantize>(n); // 4->1
    };
    return is_lou(n) || is_lob(n) ||is_lot(n) || is_fq(n);
 }
 auto has_supported_in_out(std::shared_ptr<Node> n) -> bool {
    for (auto in : n->inputs()) {
        if (in.get_tensor().get_element_type() != ngraph::element::f32) {
            return false;
        }
    }
    for (auto out : n->outputs()) {
        if (out.get_tensor().get_element_type() != ngraph::element::f32) {
            return false;
        }
        for (auto in_out : out.get_target_inputs()) {
            if (!!as_type_ptr<ngraph::op::v5::Loop>(in_out.get_node()->shared_from_this())) {
                return false;
            }
        }
    }
    return true;
 };
 } // namespace
 ngraph::snippets::pass::StartSubgraph::StartSubgraph(bool tokenize_by_node) : MatcherPass() {
    MATCHER_SCOPE(StartSubgraph);
    auto has_multiple_output_edges = [](std::shared_ptr<Node> n) -> bool {
        for (auto out : n->outputs()) {
            if (out.get_target_inputs().size() != 1) return true;
        }
        return false;
    };
    register_matcher(std::make_shared<pattern::Matcher>(
        std::make_shared<pattern::op::Label>(pattern::any_input(),
        [tokenize_by_node, has_multiple_output_edges](std::shared_ptr<Node> n) {
            return is_lo(n) &&
                   has_supported_in_out(n) &&
                   (tokenize_by_node || !has_subgraph_as_input(n)) &&
                   has_multiple_output_edges(n);
        })),
        [](ngraph::pattern::Matcher &m) -> bool {
        auto node = m.get_match_root();
        remark(1) << "Match root"
                  << node->get_friendly_name()
                  << " " << node
                  << " Creating new snippet - no input subgraphs found" << std::endl;
        auto subgraph = op::Subgraph::wrap_node_as_subgraph(node);
        ngraph::replace_node(node, subgraph);
        remark(1) << "Replacement (new) done for: "
                  << subgraph->get_friendly_name()
                  << " with " << subgraph->inputs().size()
                  << " inputs and " << subgraph->outputs().size()
                  << " outputs and " << subgraph->get_body()->get_ops().size() << " ops total\n";
        return true;
    });
 }
 ngraph::snippets::pass::AttachToSubgraph::AttachToSubgraph(bool tokenize_by_node) : MatcherPass() {
    MATCHER_SCOPE(AttachToSubgraph);
    enum continuation_strategy {
        reset,
        abort
    };
    continuation_strategy strategy = continuation_strategy::abort;
    ngraph::graph_rewrite_callback continuation_callback = [strategy](ngraph::pattern::Matcher &m) -> bool {
        auto node = m.get_match_root();
        remark(1) << "Match root " << node->get_friendly_name() << " " << node << std::endl;
        // inputs that are already subgraphs
        std::unordered_set<std::shared_ptr<Node>> input_subgraphs;
        // clone bodies because we need a rollback if loop is found
        std::map<std::shared_ptr<Node>, std::shared_ptr<ngraph::Function>> clones;
        ParameterVector body_parameters;
        OutputVector external_inputs;
        OutputVector internal_inputs;
        auto inputs = node->inputs();
        auto is_recurrent = [inputs](const ngraph::Output<ngraph::Node>& to_find) -> bool {
            for (auto in : inputs) {
                if (in.get_source_output().get_node_shared_ptr() == to_find.get_node_shared_ptr()) {
                    return true;
                }
            }
            return false;
        };
        auto get_input_index = [](const Output<Node>& found) -> size_t {
            for (auto& input : found.get_target_inputs()) {
                remark(13) << input.get_node() << " " << input.get_source_output() << " vs "
                    << found << found.get_node() << " : " << input.get_index() << " " << found.get_index() << std::endl;
            }
            for (auto& input : found.get_target_inputs()) {
                remark(13) << input.get_node() << " " << input.get_source_output() << " vs "
                    << found << " : " << input.get_index() << " " << found.get_index() << std::endl;
                if (as_type_ptr<op::Subgraph>(input.get_node()->shared_from_this()) != nullptr && input.get_source_output() == found) {
                    return input.get_index();
                }
            }
            return 0;
        };
        for (auto input : inputs) {
            auto input_node = input.get_source_output().get_node_shared_ptr();
            if (auto subgraph = as_type_ptr<op::Subgraph>(input_node)) {
                if (!clones.count(input_node)) {
                    auto f = ngraph::clone_function(*subgraph->get_body().get());
                    f->set_friendly_name(subgraph->get_body()->get_friendly_name());
                    clones[input_node] = f;
                }
            }
        }
        for (auto input : inputs) {
            auto input_node = input.get_source_output().get_node_shared_ptr();
            if (auto subgraph = as_type_ptr<op::Subgraph>(input_node)) {
                if (!input_subgraphs.count(input_node)) {
                    input_subgraphs.insert(input_node);
                    auto f = clones[input_node];
                    const auto& input_body_parameters = f->get_parameters();
                    for (size_t i = 0; i < input_body_parameters.size(); ++i) {
                        auto found = std::find(external_inputs.begin(), external_inputs.end(), subgraph->input_value(i));
                        if (found != external_inputs.end()) {
                            auto current_input_index = get_input_index(*found);
                            // Handling the case if multiple inputs referencing the same parameter comes from one subgraph => it's not introduced by SS.
                            // It might be better to keep track if body parameter relationship rather than that
                            if (current_input_index < body_parameters.size()) {
                                remark(13) << "replacing " << *found << " " << current_input_index << " with "
                                          << body_parameters[current_input_index] << std::endl;
                                f->replace_parameter(i, body_parameters[current_input_index]);
                            } else {
                                external_inputs.push_back(subgraph->input_value(i));
                                body_parameters.push_back(input_body_parameters[i]);
                            }
                        } else if (is_recurrent(subgraph->input_value(i))) {
                            remark(13) << "ternary merge is conducted " << subgraph->input_value(i).get_node_shared_ptr() << std::endl;
                            auto internal = input_body_parameters[i];
                            auto internal_consumers = internal->outputs();
                            for (auto output : internal->outputs()) {
                                for (auto consumer : output.get_target_inputs()) {
                                    if (auto to_replace_with = as_type_ptr<op::Subgraph>(subgraph->input_value(i).get_node_shared_ptr())) {
                                        auto other_body = clones[subgraph->input_value(i).get_node_shared_ptr()];
                                        auto other_body_result = other_body->get_results()[consumer.get_source_output().get_index()];
                                        auto result_producer = other_body_result->input(0).get_source_output();
                                        consumer.replace_source_output(result_producer.get_node_shared_ptr());
                                    }
                                }
                            }
                        } else {
                            external_inputs.push_back(subgraph->input_value(i));
                            body_parameters.push_back(input_body_parameters[i]);
                        }
                    }
                }
                // this is there stitching happens, get result of a copy of a body of currently processed input and put it to the new inputs
                // internal output index == external output index
                auto& input_body = clones[input_node];
                size_t source_output_index = input.get_source_output().get_index();
                auto source_result = input_body->get_results()[source_output_index];
                // Result op has a single input
                internal_inputs.push_back(source_result->input_value(0));
            } else {
                if (op::is_scalar_constant(input_node)) {
                    internal_inputs.push_back(input_node->output(0));
                } else {
                    external_inputs.push_back(input.get_source_output());
                    auto new_parameter = std::make_shared<opset1::Parameter>(input.get_element_type(), input.get_partial_shape());
                    new_parameter->set_friendly_name(input.get_source_output().get_node()->get_friendly_name());
                    body_parameters.push_back(new_parameter);
                    body_parameters.back()->set_friendly_name(input.get_source_output().get_node()->get_friendly_name());
                    internal_inputs.push_back(new_parameter->output(0));
                }
            }
        }
        auto body_node = node->copy_with_new_inputs(internal_inputs);
        body_node->set_friendly_name(node->get_friendly_name());
        remark(1) << "Original node outputs = " << node->get_output_size()
                    << " body node outputs = " << body_node->get_output_size() << std::endl;
        if (node->get_output_size() != body_node->get_output_size()) {
            throw ngraph_error("original node outputs size and extracted node outputs size doesn't much");
        }
        ResultVector body_results;
        std::vector<std::set<Input<Node>>> subgraph_result_inputs;
        for (auto subgraph : input_subgraphs) {
            for (auto output : subgraph->outputs()) {
                bool first_side_consumer = true;
                for (auto target_input : output.get_target_inputs()) {
                    auto target_node = target_input.get_node()->shared_from_this();
                    if (input_subgraphs.count(target_node)) {
                        remark(13) << "ternary merge is conducted " << subgraph << " -> " << target_node << std::endl;
                    }
                    if (!input_subgraphs.count(target_node) && target_node != node) {
                        if (first_side_consumer) {
                            auto& input_subgraph_body = clones[subgraph];
                            body_results.push_back(std::make_shared<opset1::Result>(input_subgraph_body->get_results()[output.get_index()]->input_value(0)));
                            subgraph_result_inputs.push_back({});
                            first_side_consumer = false;
                        }
                        if (!!subgraph_result_inputs.back().count(target_input)) {
                            throw ngraph_error("target input added twice!!!");
                        }
                        // save target input port outside the body
                        subgraph_result_inputs.back().insert(target_input);
                    }
                }
            }
        }
        for (auto output : node->outputs()) {
            body_results.push_back(std::make_shared<opset1::Result>(body_node->output(output.get_index())));
            subgraph_result_inputs.push_back(output.get_target_inputs());
        }
        if (body_results.size() != subgraph_result_inputs.size()) {
            throw ngraph_error("body results and node results size mismatch during subgraph collaps");
        }
        if (body_parameters.size() + body_results.size() > 7) {
            if (strategy == continuation_strategy::reset) {
                remark(13) << "new subgraph is created. Impossible to schedule subgraph with "
                        << body_parameters.size() << " inputs and " << body_results.size() << " outputs." << std::endl;
                auto single_node_subgraph = op::Subgraph::wrap_node_as_subgraph(node);
                ngraph::replace_node(node, single_node_subgraph);
                return true;
            } else {
                return false;
            }
        }
        auto body = op::create_body(node->get_friendly_name(), body_results, body_parameters);
        for (size_t i = 0; i < body->get_parameters().size(); i++) {
            body->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name());
        }
        auto subgraph = op::build_subgraph(node, external_inputs, body);
        auto act_body = subgraph->get_body();
        for (size_t i = 0; i < act_body->get_parameters().size(); i++) {
            act_body->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name());
        }
        if (subgraph->get_output_size() != subgraph_result_inputs.size()) {
            throw ngraph_error("newly create subgraph doesn't much number of results");
        }
        if (outputs_are_not_broadcastable(subgraph)) {
            if (strategy == continuation_strategy::reset) {
                remark(13) << "New subgraph is created due to outputs of a subgraph not broadcastable." << std::endl;
                auto single_node_subgraph = op::Subgraph::wrap_node_as_subgraph(node);
                single_node_subgraph->validate_and_infer_types();
                ngraph::replace_node(node, single_node_subgraph);
                return true;
            } else {
                return false;
            }
        }
        if (has_cycles_of_dependencies(subgraph_result_inputs, subgraph->inputs())) {
            if (strategy == continuation_strategy::reset) {
                remark(13) << "New subgraph is created due to loop dependency introduced by one of input subgraphs." << std::endl;
                auto single_node_subgraph = op::Subgraph::wrap_node_as_subgraph(node);
                single_node_subgraph->validate_and_infer_types();
                ngraph::replace_node(node, single_node_subgraph);
                return true;
            } else {
                return false;
            }
        }
        for (size_t i = 0; i < subgraph->get_output_size(); ++i) {
            for (auto target_input : subgraph_result_inputs[i]) {
                target_input.replace_source_output(subgraph->output(i));
            }
        }
        subgraph->validate_and_infer_types();
        auto act_body1 = subgraph->get_body();
        for (size_t i = 0; i < act_body1->get_parameters().size(); i++) {
            act_body1->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name());
        }
        remark(1) << "Replacement (merge) done for: "
                    << subgraph->get_friendly_name()
                    << " with " << subgraph->inputs().size()
                    << " inputs and " << subgraph->outputs().size()
                    << " outputs and " << subgraph->get_body()->get_ops().size() << " ops total\n";
        return true;
    };
    register_matcher(std::make_shared<pattern::Matcher>(
        std::make_shared<pattern::op::Label>(pattern::any_input(),
        [](std::shared_ptr<Node> n) {
            return is_lo(n) && has_supported_in_out(n) && has_subgraph_as_input(n);
        })),
        continuation_callback);
 }
--- a/inference-engine/src/snippets/src/pass/insert_load_store.cpp
+++ b/inference-engine/src/snippets/src/pass/insert_load_store.cpp
@ -0,0 +1,67 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "itt.hpp"
 #include "remarks.hpp"
 #include "snippets/pass/insert_load_store.hpp"
 #include "snippets/snippets_isa.hpp"
 #include <ngraph/opsets/opset1.hpp>
 #include <ngraph/rt_info.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
 ngraph::snippets::pass::InsertLoad::InsertLoad() {
    MATCHER_SCOPE(InsertLoad);
    register_matcher(std::make_shared<ngraph::pattern::Matcher>(
        ngraph::pattern::wrap_type<ngraph::opset1::Parameter>()),
            [this](ngraph::pattern::Matcher &m) {
            auto root = m.get_match_root();
            // check if already has Load as an output
            for (auto output : root->outputs()) {
                for (auto consumer : output.get_target_inputs()) {
                    if (dynamic_cast<ngraph::snippets::op::Load*>(consumer.get_node())) {
                        return false;
                    }
                }
            }
            auto load = std::make_shared<ngraph::snippets::op::Load> (root);
            ngraph::copy_runtime_info(root, load);
            bool rewritten = false;
            for (auto output : root->outputs()) {
                for (auto consumer : output.get_target_inputs()) {
                    if (consumer.get_node()->shared_from_this() != load) {
                        consumer.replace_source_output(load);
                        rewritten |= true;
                    }
                }
            }
            return rewritten;
        });
 }
 ngraph::snippets::pass::InsertStore::InsertStore() {
    MATCHER_SCOPE(InsertStore);
    register_matcher(std::make_shared<ngraph::pattern::Matcher>(
        ngraph::pattern::wrap_type<ngraph::opset1::Result>()),
            [this](ngraph::pattern::Matcher &m) {
            auto root = m.get_match_root();
            // check if already has Store as an input
            for (auto input : root->inputs()) {
                if (dynamic_cast<ngraph::snippets::op::Store*>(input.get_source_output().get_node())) {
                    return false;
                }
            }
            auto store = std::make_shared<ngraph::snippets::op::Store> (root->input_value(0));
            ngraph::copy_runtime_info(root, store);
            root->set_argument(0, store);
            return true;
        });
 }
--- a/inference-engine/src/snippets/src/pass/insert_movebroadcast.cpp
+++ b/inference-engine/src/snippets/src/pass/insert_movebroadcast.cpp
@ -0,0 +1,177 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "remarks.hpp"
 #include "itt.hpp"
 #include "snippets/pass/insert_movebroadcast.hpp"
 #include "snippets/snippets_isa.hpp"
 #include <ngraph/opsets/opset1.hpp>
 #include <ngraph/rt_info.hpp>
 #include <iostream>
 #include <numeric>
 using namespace ngraph;
 static std::shared_ptr<ngraph::Node> numpy_broadcast_node(const ngraph::Output<ngraph::Node>& value,
    const ngraph::Shape& output_shape, const ngraph::Shape& source_shape) {
    std::shared_ptr<ngraph::Node> broadcasted_node = value.get_node_shared_ptr();
    if (output_shape == value.get_shape()) {
        return broadcasted_node;
    }
    NGRAPH_CHECK(source_shape.size() == output_shape.size(),
                    "Ranks of source_shape and output_shape dont match: ",
                    source_shape.size(),
                    " vs ",
                    output_shape.size());
    ngraph::AxisVector broadcast_axes;
    ngraph::Shape squeezed_shape;
    for (size_t index = 0; index < output_shape.size(); ++index) {
        if (source_shape.at(index) == 1 && output_shape.at(index) != 1) {
            broadcast_axes.push_back(index);
        } else {
            squeezed_shape.push_back(source_shape.at(index));
        }
    }
    remark(2) << "Insert explicit broadcast " << value.get_node()->get_type_name()
    << " " << broadcast_axes << " " << broadcasted_node->get_shape() << " -> " << output_shape << std::endl;
    // it shouldn't be a probrem for now since we don't consider StridedSlice and Broadcast here
    if (auto constant = ngraph::as_type_ptr<ngraph::opset1::Constant>(broadcasted_node)) {
        if (constant->get_shape() == ngraph::Shape() || ngraph::shape_size(constant->get_shape()) == 1) {
            remark(2) << "Insert explicit broadcast " << value.get_node()->get_type_name()
                       << " to scalar constant " << constant->get_shape() << " -- aborting!" << std::endl;
            return broadcasted_node;
        }
    }
    if (auto constant = ngraph::as_type_ptr<ngraph::snippets::op::Scalar>(broadcasted_node)) {
        if (constant->get_shape() == ngraph::Shape() || ngraph::shape_size(constant->get_shape()) == 1) {
            remark(2) << "Insert explicit broadcast " << value.get_node()->get_type_name()
                       << " to scalar constant " << constant->get_shape() << " -- aborting!" << std::endl;
            return broadcasted_node;
        }
    }
    if (!broadcast_axes.empty()) {
        // ShapeOf
        broadcasted_node = std::make_shared<ngraph::snippets::op::BroadcastMove>(broadcasted_node, output_shape);
    }
    return broadcasted_node;
 }
 static ngraph::Shape calculate_broadcast_shape(ngraph::Shape lhs_shape, ngraph::Shape rhs_shape) {
    ngraph::Shape result;
    auto lhs_rank = lhs_shape.size();
    auto rhs_rank = rhs_shape.size();
    auto max_rank = std::max(lhs_rank, rhs_rank);
    // left-pad the lhs_shape with ones
    lhs_shape.insert(begin(lhs_shape), max_rank - lhs_rank, 1);
    // left-pad the rhs_shape with ones
    rhs_shape.insert(begin(rhs_shape), max_rank - rhs_rank, 1);
    for (size_t index = 0; index < max_rank; ++index) {
        size_t lhs_dim = lhs_shape.at(index);
        size_t rhs_dim = rhs_shape.at(index);
        if (lhs_dim != rhs_dim && lhs_dim != 1 && rhs_dim != 1) {
            throw ngraph::ngraph_error("incompatible shapes");
        }
        result.push_back(std::max(lhs_dim, rhs_dim));
    }
    return result;
 }
 std::pair<ngraph::Shape, std::vector<ngraph::Shape>> get_numpy_broadcast_shapes(const std::vector<ngraph::Shape>& input_shapes) {
    ngraph::Shape target_shape = std::accumulate(begin(input_shapes), end(input_shapes), ngraph::Shape{}, calculate_broadcast_shape);
    std::vector<ngraph::Shape> full_shapes;
    for (const ngraph::Shape& input : input_shapes) {
        ngraph::Shape padded_shape{input};
        padded_shape.insert(begin(padded_shape), target_shape.size() - padded_shape.size(), 1);
        full_shapes.push_back(move(padded_shape));
    }
    return {target_shape, full_shapes};
 }
 auto reset_broacast_config(const std::shared_ptr<ngraph::Node>& op) -> void {
    using namespace ngraph;
    bool is_scalar = false;
    for (auto input : op->inputs()) {
        if (input.get_shape() == Shape() || ngraph::shape_size(input.get_shape()) == 1) {
            is_scalar = true;
        }
    }
    if (!is_scalar) {
        if (auto binary = std::dynamic_pointer_cast<ngraph::op::util::BinaryElementwiseArithmetic>(op)) {
            binary->set_autob(ngraph::op::AutoBroadcastSpec::NONE);
        } else if (auto binary = std::dynamic_pointer_cast<ngraph::op::util::BinaryElementwiseComparison>(op)) {
            binary->set_autob(ngraph::op::AutoBroadcastSpec::NONE);
        } else if (auto binary = std::dynamic_pointer_cast<ngraph::op::util::BinaryElementwiseLogical>(op)) {
            binary->set_autob(ngraph::op::AutoBroadcastSpec::NONE);
        }
    }
 }
 // adds explicit broadcasts if needed
 // ToDO: this indeed make model not reshapable, need to come up with more clever way to insert fake broadcast,
 // well on the other hand, if we replace scalar constant with Scalar op / or ShapeOf, we could have broadcasts that are reshapable
 // TODO: generate FakeBroadcast if and only if broadcast is done by w dimension
 ngraph::snippets::pass::InsertMoveBroadcast::InsertMoveBroadcast() {
    MATCHER_SCOPE(InsertMoveBroadcast);
    ngraph::graph_rewrite_callback callback = [this](ngraph::pattern::Matcher &m) {
        auto root = m.get_match_root();
        const auto& values = root->input_values();
        if (values.empty()) {
            return false;
        }
        std::vector<ngraph::Shape> input_shapes;
        for (const auto& input : values) {
            input_shapes.push_back(input.get_shape());
        }
        // find the output tensor's shape, then broadcast all inputs so that they are compatible
        auto bcast_shapes = get_numpy_broadcast_shapes(input_shapes);
        ngraph::OutputVector broadcasted_inputs;
        for (size_t i = 0; i < values.size(); ++i) {
            auto node = numpy_broadcast_node(values[i], bcast_shapes.first, bcast_shapes.second[i]);
            ngraph::copy_runtime_info(root, node);
            broadcasted_inputs.push_back(node);
        }
        auto new_args = ngraph::as_node_vector(broadcasted_inputs);
        for (size_t i = 0; i < new_args.size(); i++) {
            root->input(i).replace_source_output(new_args[i]->output(0));
        }
        reset_broacast_config(root);
        return true;
    };
    // only numpy broadcast type is supported currently
    auto any = std::make_shared<pattern::op::Label>(pattern::any_input(),
        [](std::shared_ptr<Node> n) {
            // should add supports_auto_broadcast to SquaredDifference
            return (ngraph::op::supports_auto_broadcast(n) || !!as_type_ptr<opset1::SquaredDifference>(n) || !!as_type_ptr<opset1::Mod>(n))
                && n->get_autob().m_type == ngraph::op::AutoBroadcastType::NUMPY; });
    register_matcher(std::make_shared<ngraph::pattern::Matcher>(any), callback);
 }
--- a/inference-engine/src/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp
+++ b/inference-engine/src/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp
@ -0,0 +1,59 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "remarks.hpp"
 #include "itt.hpp"
 #include "snippets/pass/load_movebroadcast_to_broadcastload.hpp"
 #include "snippets/snippets_isa.hpp"
 #include <ngraph/opsets/opset1.hpp>
 #include <ngraph/rt_info.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
 #include <iostream>
 ngraph::snippets::pass::LoadMoveBroadcastToBroadcastLoad::LoadMoveBroadcastToBroadcastLoad() {
    MATCHER_SCOPE(LoadMoveBroadcastToBroadcastLoad);
    auto param_pattern = ngraph::pattern::wrap_type<ngraph::opset1::Parameter>();
    auto load_pattern = std::make_shared<ngraph::snippets::op::Load>(param_pattern);
    auto fbn = std::make_shared<ngraph::snippets::op::BroadcastMove>(load_pattern, Shape{1});
    register_matcher(std::make_shared<ngraph::pattern::Matcher>(fbn),
        [load_pattern, param_pattern](ngraph::pattern::Matcher &m) {
            auto root = m.get_match_root();
            const auto &pm = m.get_pattern_value_map();
            const auto input = pm.at(load_pattern).get_node_shared_ptr();
            const auto param = pm.at(param_pattern).get_node_shared_ptr();
            // check if load has more than 1 user to avoid load+broadcast load on the same parameter
            if (input->output(0).get_target_inputs().size() != 1) {
                return false;
            }
            if (root->inputs().size() != 1 || input->inputs().size() != 1) {
                throw ngraph_error("cannot rewrite Broadcast load with more than one input");
            }
            auto inshape = root->input(0).get_shape();
            auto outshape = root->output(0).get_shape();
            auto broadcastload = std::make_shared<snippets::op::BroadcastLoad>(param, outshape);
            Shape bct(inshape.size(), 0);
            for (size_t k = 0; k < inshape.size(); k++) {
                if (inshape[k] != outshape[k] && inshape[k] == 1) {
                    bct[k] = 1;
                }
            }
            broadcastload->set_broadcast_info(bct);
            if (broadcastload->is_broadcast(outshape.size()-1)) {
                ngraph::copy_runtime_info(root, broadcastload);
                ngraph::replace_node(root, broadcastload);
                return true;
            } else {
                return false;
            }
        });
 }
--- a/inference-engine/src/snippets/src/pass/vector_to_scalar.cpp
+++ b/inference-engine/src/snippets/src/pass/vector_to_scalar.cpp
@ -0,0 +1,40 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "itt.hpp"
 #include "snippets/pass/vector_to_scalar.hpp"
 #include "snippets/snippets_isa.hpp"
 #include <ngraph/opsets/opset1.hpp>
 #include <ngraph/rt_info.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
 ngraph::snippets::pass::ReplaceLoadsWithScalarLoads::ReplaceLoadsWithScalarLoads() {
    MATCHER_SCOPE(ReplaceLoadsWithScalarLoads);
    register_matcher(std::make_shared<ngraph::pattern::Matcher>(
        ngraph::pattern::wrap_type<ngraph::snippets::op::Load>()),
            [this](ngraph::pattern::Matcher &m) {
            auto root = m.get_match_root();
            auto load = std::make_shared<ngraph::snippets::op::ScalarLoad> (root->input_value(0));
            load->set_friendly_name(root->get_friendly_name());
            ngraph::copy_runtime_info(root, load);
            ngraph::replace_node(root, load);
            return true;
        });
 }
 ngraph::snippets::pass::ReplaceStoresWithScalarStores::ReplaceStoresWithScalarStores() {
    MATCHER_SCOPE(ReplaceStoresWithScalarStores);
    register_matcher(std::make_shared<ngraph::pattern::Matcher>(
        ngraph::pattern::wrap_type<ngraph::snippets::op::Store>()),
            [this](ngraph::pattern::Matcher &m) {
            auto root = m.get_match_root();
            auto store = std::make_shared<ngraph::snippets::op::ScalarStore> (root->input_value(0));
            store->set_friendly_name(root->get_friendly_name());
            ngraph::copy_runtime_info(root, store);
            ngraph::replace_node(root, store);
            return true;
        });
 }
--- a/inference-engine/src/snippets/src/register_info.cpp
+++ b/inference-engine/src/snippets/src/register_info.cpp
@ -0,0 +1,9 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "snippets/register_info.hpp"
 template class ngraph::VariantImpl<std::vector<size_t>>;
 constexpr ngraph::VariantTypeInfo ngraph::VariantWrapper<std::vector<size_t>>::type_info;
--- a/inference-engine/src/snippets/src/remarks.hpp
+++ b/inference-engine/src/snippets/src/remarks.hpp
@ -0,0 +1,20 @@
 // Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <iostream>
 class logstreambuf: public std::streambuf {
 public:
    static const int threshold {5};
 };
 template <typename T>
 static inline auto remark(T x) -> std::ostream& {
    static logstreambuf nostreambuf;
    static std::ostream nocout(&nostreambuf);
    return ((x >= logstreambuf::threshold)? std::cout << "Remark: " : nocout);
 }
--- a/inference-engine/tests/functional/inference_engine/CMakeLists.txt
+++ b/inference-engine/tests/functional/inference_engine/CMakeLists.txt
@ -16,6 +16,7 @@ set(LINK_LIBRARIES
    openvino::itt
    openvino::conditional_compilation
    sharedTestClasses
    inference_engine_snippets
 )
 set(DEPENDENCIES
--- a/inference-engine/tests/functional/inference_engine/snippets/broadcast_fusion.cpp
+++ b/inference-engine/tests/functional/inference_engine/snippets/broadcast_fusion.cpp
@ -0,0 +1,135 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <gtest/gtest.h>
 #include <ngraph/function.hpp>
 #include <ngraph/pass/manager.hpp>
 #include <snippets/snippets_isa.hpp>
 #include <snippets/pass/load_movebroadcast_to_broadcastload.hpp>
 #include <transformations/init_node_info.hpp>
 #include "common_test_utils/ngraph_test_utils.hpp"
 using namespace testing;
 using namespace ngraph;
 TEST(TransformationTests, FuseLoadWithBroadcastMoveByX) {
    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 1});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto load0 = std::make_shared<snippets::isa::Load>(data0);
        auto load1 = std::make_shared<snippets::isa::Load>(data1);
        auto bct = std::make_shared<snippets::isa::BroadcastMove>(load0, load1->get_shape());
        auto add = std::make_shared<opset1::Add>(bct, load1);
        auto store = std::make_shared<snippets::isa::Store>(add);
        f = std::make_shared<Function>(NodeVector{store}, ParameterVector{data0, data1});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::LoadMoveBroadcastToBroadcastLoad>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 1});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto load0 = std::make_shared<snippets::isa::BroadcastLoad>(data0, data1->get_shape());
        auto load1 = std::make_shared<snippets::isa::Load>(data1);
        auto add = std::make_shared<opset1::Add>(load0, load1);
        auto store = std::make_shared<snippets::isa::Store>(add);
        f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data0, data1});
    }
    auto res = compare_functions(f, f_ref);
    ASSERT_TRUE(res.first) << res.second;
 }
 TEST(TransformationTests, NotFuseLoadWithBroadcastMoveByY) {
    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto load0 = std::make_shared<snippets::isa::Load>(data0);
        auto load1 = std::make_shared<snippets::isa::Load>(data1);
        auto bct = std::make_shared<snippets::isa::BroadcastMove>(load0, load1->get_shape());
        auto add = std::make_shared<opset1::Add>(bct, load1);
        auto store = std::make_shared<snippets::isa::Store>(add);
        f = std::make_shared<Function>(NodeVector{store}, ParameterVector{data0, data1});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::LoadMoveBroadcastToBroadcastLoad>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto load0 = std::make_shared<snippets::isa::Load>(data0);
        auto load1 = std::make_shared<snippets::isa::Load>(data1);
        auto bct = std::make_shared<snippets::isa::BroadcastMove>(load0, load1->get_shape());
        auto add = std::make_shared<opset1::Add>(bct, load1);
        auto store = std::make_shared<snippets::isa::Store>(add);
        f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data0, data1});
    }
    auto res = compare_functions(f, f_ref);
    ASSERT_TRUE(res.first) << res.second;
 }
 TEST(TransformationTests, NoFuseLoadWithBroadcastMoveMultipleUsers) {
    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 1});
        auto data2 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 1});
        auto load0 = std::make_shared<snippets::isa::Load>(data0);
        auto load1 = std::make_shared<snippets::isa::Load>(data1);
        auto load2 = std::make_shared<snippets::isa::Load>(data2);
        auto bct1 = std::make_shared<snippets::isa::BroadcastMove>(load1, load0->get_shape());
        auto add = std::make_shared<opset1::Add>(load0, bct1);
        auto mul = std::make_shared<opset1::Multiply>(load1, load2);
        auto store0 = std::make_shared<snippets::isa::Store>(add);
        auto store1 = std::make_shared<snippets::isa::Store>(mul);
        f = std::make_shared<Function>(NodeVector{store0, store1}, ParameterVector{data0, data1, data2});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::LoadMoveBroadcastToBroadcastLoad>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 1});
        auto data2 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 1});
        auto load0 = std::make_shared<snippets::isa::Load>(data0);
        auto load1 = std::make_shared<snippets::isa::Load>(data1);
        auto load2 = std::make_shared<snippets::isa::Load>(data2);
        auto bct1 = std::make_shared<snippets::isa::BroadcastMove>(load1, load0->get_shape());
        auto add = std::make_shared<opset1::Add>(load0, bct1);
        auto mul = std::make_shared<opset1::Multiply>(load1, load2);
        auto store0 = std::make_shared<snippets::isa::Store>(add);
        auto store1 = std::make_shared<snippets::isa::Store>(mul);
        f_ref = std::make_shared<Function>(NodeVector{store0, store1}, ParameterVector{data0, data1, data2});
    }
    auto res = compare_functions(f, f_ref);
    ASSERT_TRUE(res.first) << res.second;
 }
--- a/inference-engine/tests/functional/inference_engine/snippets/memory_ops.cpp
+++ b/inference-engine/tests/functional/inference_engine/snippets/memory_ops.cpp
@ -0,0 +1,94 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <gtest/gtest.h>
 #include <ngraph/function.hpp>
 #include <ngraph/pass/manager.hpp>
 #include <snippets/snippets_isa.hpp>
 #include <snippets/pass/insert_load_store.hpp>
 #include <transformations/init_node_info.hpp>
 #include "common_test_utils/ngraph_test_utils.hpp"
 using namespace testing;
 using namespace ngraph;
 TEST(TransformationTests, InsertLoadStore) {
    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
    {
        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto neg = std::make_shared<opset1::Negative>(data);
        f = std::make_shared<Function>(NodeVector{neg}, ParameterVector{data});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::InsertLoad>();
        m.register_pass<snippets::pass::InsertStore>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    {
        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto load = std::make_shared<snippets::isa::Load>(data);
        auto neg = std::make_shared<opset1::Negative>(load);
        auto store = std::make_shared<snippets::isa::Store>(neg);
        f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
    }
    auto res = compare_functions(f, f_ref);
    ASSERT_TRUE(res.first) << res.second;
 }
 TEST(TransformationTests, InsertLoadTwise) {
    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
    {
        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto neg = std::make_shared<opset1::Negative>(data);
        f = std::make_shared<Function>(NodeVector{neg}, ParameterVector{data});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::InsertLoad>();
        m.register_pass<snippets::pass::InsertLoad>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    {
        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto load = std::make_shared<snippets::isa::Load>(data);
        auto neg = std::make_shared<opset1::Negative>(load);
        f_ref = std::make_shared<Function>(NodeVector{neg}, ParameterVector{data});
    }
    auto res = compare_functions(f, f_ref);
    ASSERT_TRUE(res.first) << res.second;
 }
 TEST(TransformationTests, InsertStoreTwise) {
    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
    {
        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto neg = std::make_shared<opset1::Negative>(data);
        f = std::make_shared<Function>(NodeVector{neg}, ParameterVector{data});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::InsertStore>();
        m.register_pass<snippets::pass::InsertStore>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    {
        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto neg = std::make_shared<opset1::Negative>(data);
        auto store = std::make_shared<snippets::isa::Store>(neg);
        f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
    }
    auto res = compare_functions(f, f_ref);
    ASSERT_TRUE(res.first) << res.second;
 }
--- a/inference-engine/tests/functional/inference_engine/snippets/movebroadcast.cpp
+++ b/inference-engine/tests/functional/inference_engine/snippets/movebroadcast.cpp
@ -0,0 +1,44 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <gtest/gtest.h>
 #include <ngraph/function.hpp>
 #include <ngraph/pass/manager.hpp>
 #include <snippets/snippets_isa.hpp>
 #include <snippets/pass/insert_movebroadcast.hpp>
 #include <transformations/init_node_info.hpp>
 #include "common_test_utils/ngraph_test_utils.hpp"
 using namespace testing;
 using namespace ngraph;
 TEST(TransformationTests, InsertBroadcastMove) {
    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto add = std::make_shared<opset1::Add>(data0, data1);
        f = std::make_shared<Function>(NodeVector{add}, ParameterVector{data0, data1});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::InsertMoveBroadcast>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto move = std::make_shared<snippets::isa::BroadcastMove>(data1, data0->output(0).get_shape());
        auto add = std::make_shared<opset1::Add>(data0, move);
        f_ref = std::make_shared<Function>(NodeVector{add}, ParameterVector{data0, data1});
    }
    auto res = compare_functions(f, f_ref);
    ASSERT_TRUE(res.first) << res.second;
 }
--- a/inference-engine/tests/functional/inference_engine/snippets/registers.cpp
+++ b/inference-engine/tests/functional/inference_engine/snippets/registers.cpp
@ -0,0 +1,137 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <gtest/gtest.h>
 #include <ngraph/function.hpp>
 #include <ngraph/pass/manager.hpp>
 #include <ngraph/variant.hpp>
 #include <snippets/snippets_isa.hpp>
 #include <snippets/register_info.hpp>
 #include <snippets/pass/assign_registers.hpp>
 #include <transformations/init_node_info.hpp>
 #include "common_test_utils/ngraph_test_utils.hpp"
 using namespace testing;
 using namespace ngraph;
 TEST(TransformationTests, AssignRegisters) {
    std::shared_ptr<Function> f(nullptr);
    {
        auto p0 = std::make_shared<opset1::Parameter>(element::f32, Shape(1));
        auto p1 = std::make_shared<opset1::Parameter>(element::f32, Shape(1));
        auto y00 = std::make_shared<snippets::isa::Load>(p0); y00->set_friendly_name("y00");
        auto y01 = std::make_shared<snippets::isa::Load>(p1); y01->set_friendly_name("y01");
        auto y02 = std::make_shared<opset1::Multiply>(y00, y01); y02->set_friendly_name("y02");
        auto y03 = std::make_shared<snippets::isa::Store>(y02); y03->set_friendly_name("y03");
        f = std::make_shared<Function>(NodeVector{y03}, ParameterVector{p0, p1});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::AssignRegisters>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    // instead of comparing to a reference function check that registers are correctly assigned
    // and stored to runtime info
    {
        std::map<std::string, size_t> ref_registers {
            {"y00", 0},
            {"y01", 1},
            {"y02", 2}
        };
        auto total_ops = 0;
        for (auto& op : f->get_ordered_ops()) {
            auto& rt = op->get_rt_info();
            if (auto rinfo = rt["reginfo"]) {
                auto reginfo = as_type_ptr<VariantWrapper<std::vector<size_t>>>(rinfo)->get();
                auto reg = reginfo[0];
                ASSERT_TRUE(ref_registers[op->get_friendly_name()] == reg);
                total_ops++;
            }
        }
        ASSERT_EQ(total_ops, ref_registers.size());
    }
 }
 TEST(TransformationTests, AssignRegisters2) {
    std::shared_ptr<Function> f(nullptr);
    {
        auto p0 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
        auto p1 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
        auto p2 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
        auto p3 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
        auto p4 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
        auto p5 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
        auto p6 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
        auto p7 = std::make_shared<opset1::Parameter>(ngraph::element::f32, Shape());
        auto c0 = std::make_shared<snippets::isa::Scalar>(ngraph::element::f32, Shape(), 3.14f); c0->set_friendly_name("r00");
        auto c1 = std::make_shared<snippets::isa::Scalar>(ngraph::element::f32, Shape(), 6.6260701e-34f); c1->set_friendly_name("r01");
        auto y00 = std::make_shared<snippets::isa::Load>(p0); y00->set_friendly_name("r02");
        auto y01 = std::make_shared<snippets::isa::Load>(p1); y01->set_friendly_name("r03");
        auto y02 = std::make_shared<opset1::Multiply>(y00, c0); y02->set_friendly_name("r04");
        auto y03 = std::make_shared<opset1::Multiply>(y01, c1); y03->set_friendly_name("r05");
        auto y04 = std::make_shared<snippets::isa::Load>(p2); y04->set_friendly_name("r06");
        auto y05 = std::make_shared<snippets::isa::Load>(p3); y05->set_friendly_name("r07");
        auto y06 = std::make_shared<opset1::Add>(y02, y03); y06->set_friendly_name("r08");
        auto y07 = std::make_shared<opset1::Multiply>(y04, c0); y07->set_friendly_name("r09");
        auto y08 = std::make_shared<opset1::Multiply>(y05, c1); y08->set_friendly_name("r10");
        auto y09 = std::make_shared<snippets::isa::Load>(p4); y09->set_friendly_name("r11");
        auto y10 = std::make_shared<snippets::isa::Load>(p5); y10->set_friendly_name("r12");
        auto y11 = std::make_shared<opset1::Add>(y07, y08); y11->set_friendly_name("r13");
        auto y12 = std::make_shared<opset1::Multiply>(y09, c0); y12->set_friendly_name("r14");
        auto y13 = std::make_shared<opset1::Multiply>(y10, c1); y13->set_friendly_name("r15");
        auto y14 = std::make_shared<snippets::isa::Load>(p6); y14->set_friendly_name("r16");
        auto y15 = std::make_shared<opset1::Add>(y12, y13); y15->set_friendly_name("r17");
        auto y16 = std::make_shared<snippets::isa::Load>(p7); y16->set_friendly_name("r18");
        auto y17 = std::make_shared<opset1::Multiply>(y14, c0); y17->set_friendly_name("r19");
        auto y18 = std::make_shared<opset1::Multiply>(y16, c1); y18->set_friendly_name("r20");
        auto y19 = std::make_shared<opset1::Add>(y06, y11); y19->set_friendly_name("r21");
        auto y20 = std::make_shared<opset1::Add>(y17, y18); y20->set_friendly_name("r22");
        auto y21 = std::make_shared<opset1::Add>(y15, y19); y21->set_friendly_name("r23");
        auto y22 = std::make_shared<opset1::Add>(y20, y21); y22->set_friendly_name("r24");
        auto y23 = std::make_shared<snippets::isa::Store>(y22);
        f = std::make_shared<Function>(NodeVector{y23}, ParameterVector{p0, p1, p2, p3, p4, p5, p6, p7});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::AssignRegisters>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    // instead of comparing to a reference function check that registers are correctly assigned
    // and stored to runtime info
    {
        std::map<std::string, size_t> ref_registers {
            {"r00", 1}, {"r01", 3}, {"r02", 5}, {"r03", 5}, {"r04", 2}, {"r05", 6}, {"r06", 6}, {"r07", 6},
            {"r08", 5}, {"r09", 2}, {"r10", 1}, {"r11", 4}, {"r12", 4}, {"r13", 6}, {"r14", 2}, {"r15", 5},
            {"r16", 0}, {"r17", 4}, {"r18", 0}, {"r19", 2}, {"r20", 4}, {"r21", 1}, {"r22", 0}, {"r23", 6},
            {"r24", 1}
        };
        auto total_ops = 0;
        for (auto& op : f->get_ordered_ops()) {
            auto& rt = op->get_rt_info();
            if (auto rinfo = rt["reginfo"]) {
                auto reginfo = as_type_ptr<VariantWrapper<std::vector<size_t>>>(rinfo)->get();
                auto reg = reginfo[0];
                ASSERT_TRUE(ref_registers[op->get_friendly_name()] == reg);
                total_ops++;
            }
        }
        ASSERT_EQ(total_ops, ref_registers.size());
    }
 }
--- a/inference-engine/tests/functional/inference_engine/snippets/tokenization.cpp
+++ b/inference-engine/tests/functional/inference_engine/snippets/tokenization.cpp
@ -0,0 +1,154 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <gtest/gtest.h>
 #include <ngraph/function.hpp>
 #include <ngraph/pass/manager.hpp>
 #include <snippets/snippets_isa.hpp>
 #include <snippets/pass/collapse_subgraph.hpp>
 #include <snippets/op/subgraph.hpp>
 #include <transformations/init_node_info.hpp>
 #include "common_test_utils/ngraph_test_utils.hpp"
 using namespace testing;
 using namespace ngraph;
 TEST(TransformationTests, StartSubgraphMultipleOutputs) {
    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto add = std::make_shared<opset1::Add>(data0, data1);
        auto sub = std::make_shared<opset1::Subtract>(add, data1);
        auto mul = std::make_shared<opset1::Multiply>(add, sub);
        f = std::make_shared<Function>(NodeVector{mul}, ParameterVector{data0, data1});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::StartSubgraph>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto indata0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto indata1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto add = std::make_shared<snippets::op::Subgraph>(NodeVector{data0, data1},
            std::make_shared<Function>(NodeVector{std::make_shared<opset1::Add>(indata0, indata1)}, ParameterVector{indata0, indata1}));
        auto sub = std::make_shared<opset1::Subtract>(add, data1);
        auto mul = std::make_shared<opset1::Multiply>(add, sub);
        f_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{data0, data1});
    }
    auto res = compare_functions(f, f_ref);
    ASSERT_TRUE(res.first) << res.second;
 }
 TEST(TransformationTests, DontStartSubgraphSingleOuptut) {
    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto add = std::make_shared<opset1::Add>(data0, data1);
        auto sub = std::make_shared<opset1::Subtract>(add, data1);
        auto mul = std::make_shared<opset1::Multiply>(data0, sub);
        f = std::make_shared<Function>(NodeVector{mul}, ParameterVector{data0, data1});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::StartSubgraph>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto add = std::make_shared<opset1::Add>(data0, data1);
        auto sub = std::make_shared<opset1::Subtract>(add, data1);
        auto mul = std::make_shared<opset1::Multiply>(data0, sub);
        f_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{data0, data1});
    }
    auto res = compare_functions(f, f_ref);
    ASSERT_TRUE(res.first) << res.second;
 }
 TEST(TransformationTests, AttachToSubgraph) {
    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto indata0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto indata1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto add = std::make_shared<snippets::op::Subgraph>(NodeVector{data0, data1},
            std::make_shared<Function>(NodeVector{std::make_shared<opset1::Add>(indata0, indata1)}, ParameterVector{indata0, indata1}));
        auto neg = std::make_shared<opset1::Negative>(add);
        auto concat = std::make_shared<opset1::Concat>(NodeVector{add, neg}, 0);
        f = std::make_shared<Function>(NodeVector{concat}, ParameterVector{data0, data1});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::AttachToSubgraph>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto indata0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto indata1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto inner = std::make_shared<opset1::Add>(indata0, indata1);
        auto add = std::make_shared<snippets::op::Subgraph>(NodeVector{data0, data1},
            std::make_shared<Function>(NodeVector{std::make_shared<opset1::Negative>(inner), inner}, ParameterVector{indata0, indata1}));
        auto concat = std::make_shared<opset1::Concat>(OutputVector{add->output(0), add->output(1)}, 0);
        f_ref = std::make_shared<Function>(NodeVector{concat}, ParameterVector{data0, data1});
    }
    auto res = compare_functions(f, f_ref);
    ASSERT_TRUE(res.first) << res.second;
 }
 TEST(TransformationTests, DontAttachToSubgraphIfLoop) {
    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto indata0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto indata1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto add = std::make_shared<snippets::op::Subgraph>(NodeVector{data0, data1},
            std::make_shared<Function>(NodeVector{std::make_shared<opset1::Add>(indata0, indata1)}, ParameterVector{indata0, indata1}));
        auto log = std::make_shared<opset1::Log>(add);
        auto mul = std::make_shared<opset1::Multiply>(add, log);
        f = std::make_shared<Function>(NodeVector{mul}, ParameterVector{data0, data1});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::AttachToSubgraph>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    {
        auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto indata0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
        auto indata1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 3});
        auto add = std::make_shared<snippets::op::Subgraph>(NodeVector{data0, data1},
            std::make_shared<Function>(NodeVector{std::make_shared<opset1::Add>(indata0, indata1)}, ParameterVector{indata0, indata1}));
        auto log = std::make_shared<opset1::Log>(add);
        auto mul = std::make_shared<opset1::Multiply>(add, log);
        f_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{data0, data1});
    }
    auto res = compare_functions(f, f_ref);
    ASSERT_TRUE(res.first) << res.second;
 }
--- a/inference-engine/tests/functional/inference_engine/snippets/vector_scalar.cpp
+++ b/inference-engine/tests/functional/inference_engine/snippets/vector_scalar.cpp
@ -0,0 +1,72 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <gtest/gtest.h>
 #include <ngraph/function.hpp>
 #include <ngraph/pass/manager.hpp>
 #include <snippets/snippets_isa.hpp>
 #include <snippets/pass/vector_to_scalar.hpp>
 #include <transformations/init_node_info.hpp>
 #include "common_test_utils/ngraph_test_utils.hpp"
 using namespace testing;
 using namespace ngraph;
 TEST(TransformationTests, ReplaceLoadsWithScalarLoads) {
    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
    {
        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto load = std::make_shared<snippets::isa::Load>(data);
        auto neg = std::make_shared<opset1::Negative>(load);
        auto store = std::make_shared<snippets::isa::Store>(neg);
        f = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::ReplaceLoadsWithScalarLoads>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    {
        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto load = std::make_shared<snippets::isa::ScalarLoad>(data);
        auto neg = std::make_shared<opset1::Negative>(load);
        auto store = std::make_shared<snippets::isa::Store>(neg);
        f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
    }
    auto res = compare_functions(f, f_ref);
    ASSERT_TRUE(res.first) << res.second;
 }
 TEST(TransformationTests, ReplaceStoresWithScalarStores) {
    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
    {
        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto load = std::make_shared<snippets::isa::Load>(data);
        auto neg = std::make_shared<opset1::Negative>(load);
        auto store = std::make_shared<snippets::isa::Store>(neg);
        f = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
        pass::Manager m;
        m.register_pass<pass::InitNodeInfo>();
        m.register_pass<snippets::pass::ReplaceStoresWithScalarStores>();
        m.run_passes(f);
        ASSERT_NO_THROW(check_rt_info(f));
    }
    {
        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
        auto load = std::make_shared<snippets::isa::Load>(data);
        auto neg = std::make_shared<opset1::Negative>(load);
        auto store = std::make_shared<snippets::isa::ScalarStore>(neg);
        f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data});
    }
    auto res = compare_functions(f, f_ref);
    ASSERT_TRUE(res.first) << res.second;
 }
--- a/inference-engine/tests_deprecated/unit/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/unit/CMakeLists.txt
@ -137,7 +137,9 @@ target_link_libraries(${TARGET_NAME} PRIVATE
    # dynamic libraries
    inference_engine_transformations
-    inference_engine_lp_transformations)
+    inference_engine_lp_transformations
    inference_engine_snippets
    )
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fuse-ld=gold")
--- a/ngraph/core/src/op/non_zero.cpp
+++ b/ngraph/core/src/op/non_zero.cpp
@ -158,7 +158,7 @@ namespace nonzero
        return rc;
    }
-
+#undef TYPE_OUT_CASE
    bool evaluate_nonzero(const HostTensorPtr& input, const HostTensorPtr& output)
    {
        bool rc = true;
--- a/ngraph/core/src/op/one_hot.cpp
+++ b/ngraph/core/src/op/one_hot.cpp
@ -178,7 +178,7 @@ namespace detail
        return rc;
    }
-
+#undef TYPE_OUT_CASE
    bool evaluate_onehot(const HostTensorVector& output_values,
                         const HostTensorVector& input_values,
                         const int64_t axis)
--- a/ngraph/core/src/op/util/op_types.cpp
+++ b/ngraph/core/src/op/util/op_types.cpp
@ -27,6 +27,7 @@
 #include "ngraph/op/parameter.hpp"
 #include "ngraph/op/result.hpp"
 #include "ngraph/op/select.hpp"
 #include "ngraph/op/squared_difference.hpp"
 #include "ngraph/op/util/binary_elementwise_arithmetic.hpp"
 #include "ngraph/op/util/binary_elementwise_comparison.hpp"
 #include "ngraph/op/util/binary_elementwise_logical.hpp"
@ -60,6 +61,7 @@ bool ngraph::op::is_binary_elementwise_logical(const ngraph::Node* node)
 bool ngraph::op::supports_auto_broadcast(const ngraph::Node* node)
 {
    return dynamic_cast<const ngraph::op::v1::Select*>(node) != nullptr ||
           dynamic_cast<const ngraph::op::v0::SquaredDifference*>(node) != nullptr ||
           dynamic_cast<const ngraph::op::util::BinaryElementwiseComparison*>(node) != nullptr ||
           dynamic_cast<const ngraph::op::util::BinaryElementwiseLogical*>(node) != nullptr ||
           dynamic_cast<const ngraph::op::util::BinaryElementwiseArithmetic*>(node) != nullptr;