[§] cleans snippets interface and adds subgraph tests (#6123)

2021-06-16 01:00:19 +03:00 · 2021-06-16 01:00:19 +03:00 · ad852f78b4
commit ad852f78b4
parent 613bb981ce
20 changed files with 740 additions and 54 deletions
--- a/inference-engine/src/snippets/include/snippets/emitter.hpp
+++ b/inference-engine/src/snippets/include/snippets/emitter.hpp
@ -0,0 +1,56 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include <vector>
+#include <cstdint>
+
+namespace ngraph {
+namespace snippets {
+
+using code = const uint8_t *;
+using RegInfo = std::pair<std::vector<size_t>, std::vector<size_t>>;
+
+/**
+ * @interface Emitter
+ * @brief Base class for all target specific code emitters used by generator.
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API Emitter {
+public:
+    /**
+     * @brief Default constructor
+     */
+    Emitter(const std::shared_ptr<ngraph::Node>& n) {
+    }
+
+    Emitter(std::vector<std::pair<std::shared_ptr<Emitter>, RegInfo>>& region) {
+    }
+
+    /**
+     * @brief called by generator to generate code to produce target code for a specific operation
+     * @param in vector of vector argument registers
+     * @param out vector of vector resulting registers
+     * @param pool optional vector of free vector registers which might be used inside method
+     * @param gpr vector of free generam puproce registers which might be used inside method
+     * @return void
+     */
+    virtual void emit_code(const std::vector<size_t>& in,
+                           const std::vector<size_t>& out,
+                           const std::vector<size_t>& pool = {},
+                           const std::vector<size_t>& gpr  = {}) const = 0;
+
+    /**
+     * @brief called by generator to generate data section, if needed for a specific operation
+     * @return void
+     */
+    virtual void emit_data() const {
+    }
+};
+
+} // namespace snippets
+} // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/generator.hpp
+++ b/inference-engine/src/snippets/include/snippets/generator.hpp
@ -10,49 +10,13 @@

 #include <transformations_visibility.hpp>
 #include "snippets_isa.hpp"
+#include "emitter.hpp"

 namespace ngraph {
 namespace snippets {

-using code = const uint8_t *;
-using RegInfo = std::pair<std::vector<size_t>, std::vector<size_t>>;
-
 TRANSFORMATIONS_API auto getRegisters(std::shared_ptr<ngraph::Node>& n) -> ngraph::snippets::RegInfo;

-/**
- * @interface Emitter
- * @brief Base class for all target specific code emitters used by generator.
- * @ingroup snippets
- */
-class TRANSFORMATIONS_API Emitter {
-public:
-    /**
-     * @brief Default constructor
-     */
-    Emitter(const std::shared_ptr<ngraph::Node>& n) {
-    }
-
-    /**
-     * @brief called by generator to generate code to produce target code for a specific operation
-     * @param in vector of vector argument registers
-     * @param out vector of vector resulting registers
-     * @param pool optional vector of free vector registers which might be used inside method
-     * @param gpr vector of free generam puproce registers which might be used inside method
-     * @return void
-     */
-    virtual void emit_code(const std::vector<size_t>& in,
-                           const std::vector<size_t>& out,
-                           const std::vector<size_t>& pool = {},
-                           const std::vector<size_t>& gpr  = {}) const = 0;
-
-    /**
-     * @brief called by generator to generate data section, if needed for a specific operation
-     * @return void
-     */
-    virtual void emit_data() const {
-    }
-};
-
 /**
 * @interface TargetMachine
 * @brief Base class Target machine representation. Target derives from this class to provide generator information about supported emittors
@ -61,12 +25,45 @@ public:
 class TRANSFORMATIONS_API TargetMachine {
 public:
    /**
-     * @brief called by generator to all the emittors available for a target machine
+     * @brief checks if target is natively supported
+     * @return true, if supported
+     */
+    virtual bool is_supported() const = 0;
+
+    /**
+     * @brief finalizes code generation
+     * @return generated kernel binary
+     */
+    virtual code get_snippet() const = 0;
+
+    /**
+     * @brief gets number of lanes supported by target's vector ISA
+     * @return number of lanes
+     */
+    virtual size_t get_lanes() const = 0;
+
+    /**
+     * @brief called by generator to all the emittor for a target machine
     * @return a map by node's type info with callbacks to create an instance of emmitter for corresponding operation type
     */
-    virtual auto getJitters() -> std::map<const ngraph::DiscreteTypeInfo, std::function<std::shared_ptr<Emitter>(std::shared_ptr<ngraph::Node>)>>{
-        return {};
+    std::function<std::shared_ptr<Emitter>(std::shared_ptr<ngraph::Node>)> get(const ngraph::DiscreteTypeInfo type) const {
+        auto jitter = jitters.find(type);
+        if (jitter == jitters.end()) {
+            throw ngraph_error(std::string("Target code emitter is not available for ") + type.name + " operation.");
+        }
+        return jitter->second;
    }
+
+    /**
+     * @brief checks if emitter for a specific operation is supported
+     * @return true, if supported
+     */
+    bool has(const ngraph::DiscreteTypeInfo type) const {
+        return jitters.find(type) != jitters.end();
+    }
+
+protected:
+    std::map<const ngraph::DiscreteTypeInfo, std::function<std::shared_ptr<Emitter>(std::shared_ptr<ngraph::Node>)>> jitters;
 };

 /**
@ -87,6 +84,12 @@ public:
     * @param p pointer to generated code
     */
    Schedule(const Shape& ws, bool f, code p) : work_size(ws), is_flat(f), ptr(p) {}
+    /**
+     * @brief Returns callable instanse of code pointer
+     */
+    template<typename K> K get_callable() const {
+        return reinterpret_cast<K>(const_cast<unsigned char*>(ptr));
+    }

    Shape work_size {};
    bool is_flat {false};
@ -103,7 +106,7 @@ public:
    /**
     * @brief Default constructor
     */
-    Generator() = default;
+    Generator(const std::shared_ptr<TargetMachine>& t) : target(t) {}
    /**
     * @brief Default destructor
     */
@ -113,10 +116,10 @@ public:
     * @param f runction in canonical for for table-based code generation
     * @return pointer to generated code
     */
-    virtual code generate(std::shared_ptr<Function>& f) const = 0;
+    code generate(std::shared_ptr<Function>& f) const;

 protected:
-    mutable std::map<const ngraph::DiscreteTypeInfo, std::function<std::shared_ptr<Emitter>(std::shared_ptr<ngraph::Node>)>> jitters;
+    std::shared_ptr<TargetMachine> target;
 };

 } // namespace snippets
--- a/inference-engine/src/snippets/include/snippets/op/kernel.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/kernel.hpp
@ -0,0 +1,37 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include "ngraph/op/op.hpp"
+#include "snippets/emitter.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface Kernel
+ * @brief Generated by Canonicalization and represents compute kernel legal for sheduling
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API Kernel : public ngraph::op::Op {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    Kernel(const std::vector<std::pair<std::shared_ptr<ngraph::snippets::Emitter>, ngraph::snippets::RegInfo>>& region);
+    Kernel() = default;
+
+    std::vector<std::pair<std::shared_ptr<ngraph::snippets::Emitter>, ngraph::snippets::RegInfo>> region;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override {
+        return std::make_shared<Kernel>(region);
+    }
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
--- a/inference-engine/src/snippets/include/snippets/op/nop.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/nop.hpp
@ -23,6 +23,10 @@ public:

    Nop(const OutputVector& arguments, const OutputVector& results);
    Nop() = default;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override {
+        return std::make_shared<Nop>();
+    }
 };

 } // namespace op
--- a/inference-engine/src/snippets/include/snippets/op/subgraph.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/subgraph.hpp
@ -10,6 +10,7 @@
 #include <ngraph/function.hpp>
 #include <ngraph/op/op.hpp>
 #include <ngraph/rt_info.hpp>
+#include <ngraph/pass/manager.hpp>

 #include "snippets/generator.hpp"

@ -24,6 +25,47 @@ namespace op {
 */
 class TRANSFORMATIONS_API Subgraph : public ngraph::op::Op {
 public:
+    // < 1, 42, 17, 15, 16> < 0, 1, 2, 3, 1>
+    // should be:
+    // A = < 1, 42, 17, 15> -> < 1, 3, 17, 15, 16> < 0, 1, 2, 3, 1>
+    // B = < 1,  1, 17, 15> -> < 1, 1, 17, 15, 16> < 0, 1, 2, 3, 1>
+    // D = < 1, 42,  1, 1 > -> < 1, 3,  1,  1, 16> < 0, 1, 2, 3, 1> ???
+    // C = A + B
+    // C = < 1, 42, 17, 15> -> < 1, 3, 17, 15, 16> < 0, 1, 2, 3, 1>
+    //
+    // how it works now (multi-demention broadcast):
+    // [BroadcastLoad] doesn't perform post increment
+    // [Load] performs += vlan
+    // [ScalarLoad] performs += 1
+    // A = < 1, 42, 17, 15> -> < 1, 3, 17, 15, 16> < 0, 1, 2, 3, 1>
+    // B = < 1,  1, 17, 15> -> < 1, 1, 17, 15,  1> < 0, 1, 2, 3, 1>
+    // [A]     [B]
+    // [Load]  [ScalarLoad] <- should consider AxisVector to choose right type of load
+    //         [Broadcast]
+    //   [Add]
+    //  [Store]
+    //    [C]
+    // C = A + B
+    // C = < 1, 42, 17, 15> -> < 1, 3, 17, 15, 16> < 0, 1, 2, 3, 1>
+    //
+    // Multiple-dimension broadcasts support?
+    // A = < 1, 42, 17, 15> -> < 1, 3, 17, 15, 16> < 0, 1, 2, 3, 4>
+    // B = < 1,  1, 17, 15> -> < 1, 1, 17, 15,  1> < 0, 1, 2, 3, 4>
+    //
+    // A = < 1, 42, 17, 15> -> < 1, 3, 17, 15, 16> < 0, 1, 2, 3, 4>
+    // B = < 1,  1, 17, 15> -> < 1, 3, 17, 15,  1> < 0, 1, 2, 3, 4>
+    //
+    // Collapse moat varying dimensions with broadcast
+    // A = < 1, 42, 17, 15> -> < 1, 3, 17, 15, 16> < 0, 1, 2, 3, 1>
+    // B = < 1,  1, 17, 15> -> < 1, 3, 17, 15,  1> < 0, 1, 2, 3, 1>
+    //
+    // Collapse for mixed broadcast
+    // A = < 1, 3, 17, 15, 32> < 0, 1, 2, 3, 4>
+    // B = < 1, 3, 17,  1, 32> < 0, 1, 2, 3, 4>
+    // C = < 1, 3,  1, 15, 32> < 0, 1, 2, 3, 4>
+    //
+    // D = < 1, 3, 17, 15, 32> < 0, 1, 2, 3, 4>
+    // E = < 1, 3, 17,  1, 32> < 0, 1, 2, 3, 4>
    using BlockedShape = std::tuple<ngraph::Shape, ngraph::AxisVector, ngraph::element::Type>;
    using BlockedShapeVector = std::vector<BlockedShape>;

@ -49,7 +91,8 @@ public:

    std::shared_ptr<Subgraph> make_canonical_from_this();

-    snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes);
+    snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes,
+                                ngraph::pass::Manager opt = ngraph::pass::Manager());
    bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;

    /// Set a new body for the op; body needs to satisfy requirements on inputs/outputs
@ -62,6 +105,8 @@ public:
    void print() const;
    void print_statistics(bool verbose);

+    void serialize() const;
+
    static auto wrap_node_as_subgraph(const std::shared_ptr<ngraph::Node>& node) -> std::shared_ptr<Subgraph>;

 private:
--- a/inference-engine/src/snippets/include/snippets/op/tile.hpp
+++ b/inference-engine/src/snippets/include/snippets/op/tile.hpp
@ -0,0 +1,36 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transformations_visibility.hpp>
+
+#include "ngraph/op/op.hpp"
+#include "snippets/emitter.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface Tile
+ * @brief Generated by Canonicalization and represents Loop in affine notation
+ * @ingroup snippets
+ */
+class TRANSFORMATIONS_API Tile : public ngraph::op::Op {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    Tile(const std::vector<std::pair<std::shared_ptr<ngraph::snippets::Emitter>, ngraph::snippets::RegInfo>>& region);
+    Tile() = default;
+    std::vector<std::pair<std::shared_ptr<ngraph::snippets::Emitter>, ngraph::snippets::RegInfo>> region;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override {
+        return std::make_shared<Tile>(region);
+    }
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
--- a/inference-engine/src/snippets/src/generator.cpp
+++ b/inference-engine/src/snippets/src/generator.cpp
@ -4,10 +4,18 @@

 #include "snippets/generator.hpp"
 #include "snippets/register_info.hpp"
+#include "snippets/pass/assign_registers.hpp"
+#include "snippets/pass/vector_to_scalar.hpp"
+#include "snippets/pass/insert_load_store.hpp"
+#include "snippets/op/tile.hpp"
+#include "snippets/op/kernel.hpp"
+
+#include <ngraph/pass/manager.hpp>

 auto ngraph::snippets::getRegisters(std::shared_ptr<ngraph::Node>& n) -> ngraph::snippets::RegInfo {
    auto rt = n->get_rt_info();

+    // ToDo: change to reg_t
    std::vector<size_t> rout;
    if (auto rinfo = rt["reginfo"]) {
        auto reginfo = ngraph::as_type_ptr<ngraph::VariantWrapper<std::vector<size_t>>>(rinfo)->get();
@ -28,3 +36,52 @@ auto ngraph::snippets::getRegisters(std::shared_ptr<ngraph::Node>& n) -> ngraph:
    }
    return std::make_pair(rin, rout);
 }
+
+ngraph::snippets::code ngraph::snippets::Generator::generate(std::shared_ptr<ngraph::Function>& f) const {
+    if (!target->is_supported())
+        throw ngraph_error("unsupported architecture for code genration");
+
+    auto params = f->get_parameters();
+    auto results = f->get_results();
+    auto nptrs = results.size() + params.size();
+
+    if (nptrs > 7) {
+        throw ngraph_error("snippet signature should not exceed 7 arguments. got " + std::to_string(nptrs));
+    }
+
+    // vector tile
+    std::vector<std::pair<std::shared_ptr<ngraph::snippets::Emitter>, ngraph::snippets::RegInfo>> lowered;
+    for (auto n : f->get_ordered_ops()) {
+        lowered.push_back(std::make_pair(target->get(n->get_type_info())(n), ngraph::snippets::getRegisters(n)));
+    }
+
+    // scalar tile
+    auto f_scalar = ngraph::clone_function(*f.get());
+    ngraph::pass::Manager m;
+    m.register_pass<ngraph::snippets::pass::ReplaceLoadsWithScalarLoads>();
+    m.register_pass<ngraph::snippets::pass::ReplaceStoresWithScalarStores>();
+    m.run_passes(f_scalar);
+
+    std::vector<std::pair<std::shared_ptr<Emitter>, RegInfo>> scalar_lowered;
+    for (auto n : f_scalar->get_ordered_ops()) {
+        scalar_lowered.push_back(std::make_pair(target->get(n->get_type_info())(n), ngraph::snippets::getRegisters(n)));
+    }
+
+    // wrapping into tiles
+    std::vector<std::pair<std::shared_ptr<Emitter>, RegInfo>> tiles;
+    tiles.push_back(std::make_pair(target->get(ngraph::snippets::op::Tile::type_info)(std::make_shared<ngraph::snippets::op::Tile>(lowered)),
+                                   std::make_pair(std::vector<size_t>({target->get_lanes(), nptrs}), std::vector<size_t>{})));
+    tiles.push_back(std::make_pair(target->get(ngraph::snippets::op::Tile::type_info)(std::make_shared<ngraph::snippets::op::Tile>(scalar_lowered)),
+                    std::make_pair(std::vector<size_t>{{1, nptrs}}, std::vector<size_t>{})));
+
+    // emission
+    std::shared_ptr<Emitter> kernel = target->get(ngraph::snippets::op::Kernel::type_info)(std::make_shared<ngraph::snippets::op::Kernel>(tiles));
+    kernel->emit_code({params.size(), results.size()}, {});
+
+    lowered.insert(lowered.end(), scalar_lowered.begin(), scalar_lowered.end());
+    for (auto& op : lowered) {
+        op.first->emit_data();
+    }
+
+    return target->get_snippet();
+}
--- a/inference-engine/src/snippets/src/op/kernel.cpp
+++ b/inference-engine/src/snippets/src/op/kernel.cpp
@ -0,0 +1,14 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/op/kernel.hpp"
+#include "snippets/generator.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::Kernel, "Kernel", 0);
+
+snippets::op::Kernel::Kernel(const std::vector<std::pair<std::shared_ptr<snippets::Emitter>, snippets::RegInfo>>& nested) : Op(), region(nested) {
+}
--- a/inference-engine/src/snippets/src/op/subgraph.cpp
+++ b/inference-engine/src/snippets/src/op/subgraph.cpp
@ -12,6 +12,7 @@
 #include "snippets/pass/assign_registers.hpp"

 #include <ngraph/pass/manager.hpp>
+#include <transformations/serialize.hpp>

 #include <algorithm>
 #include <memory>
@ -176,9 +177,7 @@ void snippets::op::Subgraph::canonicalize(const BlockedShapeVector& output_shape
            if (param->get_element_type() != std::get<2>(input_shapes[i])) {
                throw ngraph::ngraph_error("changes in presision. Is it legal??");
            }
-            if (param->get_shape().size() != std::get<0>(input_shapes[i]).size()) {
-                m_body->replace_parameter(i, std::make_shared<opset1::Parameter>(std::get<2>(input_shapes[i]), std::get<0>(input_shapes[i])));
-            }
+            m_body->replace_parameter(i, std::make_shared<opset1::Parameter>(std::get<2>(input_shapes[i]), std::get<0>(input_shapes[i])));
        }
    }

@ -204,16 +203,20 @@ void snippets::op::Subgraph::convert_to_snippet_dialect() {
    manager.run_passes(m_body);
 }

-snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes) {
+snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes,
+                                                    ngraph::pass::Manager opt) {
    INTERNAL_OP_SCOPE(Subgraph);
    NGRAPH_CHECK(m_generator != nullptr, "generate is called while generator is not set");

    canonicalize(output_shapes, input_shapes);
    convert_to_snippet_dialect();
+    opt.run_passes(m_body);

    // generation flow
    snippets::pass::AssignRegisters().run_on_function(m_body);

+    // shedule generation should go here and be target agnostic
+
    // actual code emission
    ngraph::snippets::code ptr = m_generator->generate(m_body);

@ -342,3 +345,12 @@ void snippets::op::Subgraph::print_statistics(bool verbose) {
        this->print();
    }
 }
+
+void snippets::op::Subgraph::serialize() const {
+    std::stringstream xmlFile, binFile;
+    ngraph::pass::Serialize serializer(xmlFile, xmlFile, ngraph::pass::Serialize::Version::IR_V10);
+    serializer.run_on_function(get_body());
+    auto m_constants = binFile.str();
+    auto m_model = xmlFile.str();
+    std::cout << m_model << std::endl;
+}
--- a/inference-engine/src/snippets/src/op/tile.cpp
+++ b/inference-engine/src/snippets/src/op/tile.cpp
@ -0,0 +1,14 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/op/tile.hpp"
+#include "snippets/generator.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(snippets::op::Tile, "Tile", 0);
+
+snippets::op::Tile::Tile(const std::vector<std::pair<std::shared_ptr<snippets::Emitter>, snippets::RegInfo>>& nested) : Op(), region(nested) {
+}
--- a/inference-engine/src/snippets/src/pass/collapse_subgraph.cpp
+++ b/inference-engine/src/snippets/src/pass/collapse_subgraph.cpp
@ -12,7 +12,6 @@
 #include <ngraph/rt_info.hpp>
 #include <ngraph/op/loop.hpp>

-
 #include <memory>
 #include <vector>
 #include <cassert>
@ -66,10 +65,6 @@ auto has_cycles_of_dependencies(const std::vector<std::set<ngraph::Input<ngraph:
            ngraph::Node* curr = stack.front();
            visited.insert(curr);

-            if (ngraph::op::is_output(curr)) {
-                return false;
-            }
-
            stack.pop();

            if (curr != to) {
@ -189,6 +184,14 @@ auto has_supported_in_out(std::shared_ptr<Node> n) -> bool {
        if (in.get_tensor().get_element_type() != ngraph::element::f32) {
            return false;
        }
+
+        if (in.get_partial_shape().is_dynamic()) {
+            return false;
+        }
+
+        if (in.get_partial_shape().is_static() && in.get_shape().size() > 6) {
+            return false;
+        }
    }

    for (auto out : n->outputs()) {
@ -196,10 +199,22 @@ auto has_supported_in_out(std::shared_ptr<Node> n) -> bool {
            return false;
        }

+        if (out.get_partial_shape().is_dynamic()) {
+            return false;
+        }
+
+        if (out.get_partial_shape().is_static() && out.get_shape().size() > 6) {
+            return false;
+        }
+
        for (auto in_out : out.get_target_inputs()) {
            if (!!as_type_ptr<ngraph::op::v5::Loop>(in_out.get_node()->shared_from_this())) {
                return false;
            }
+
+            if (!!as_type_ptr<ngraph::op::v0::Result>(in_out.get_node()->shared_from_this())) {
+                return false;
+            }
        }
    }

--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_add.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_add.cpp
@ -0,0 +1,29 @@
+
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/codegen_add.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+    const std::vector<InferenceEngine::Precision> netPrecisions = {
+            InferenceEngine::Precision::FP32
+    };
+
+    INSTANTIATE_TEST_CASE_P(NoReshape, CodegenAdd,
+            ::testing::Combine(
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::Values(InferenceEngine::SizeVector({1, 42, 16, 64})),
+            ::testing::Values(InferenceEngine::SizeVector({1, 42, 16, 64}),
+                              InferenceEngine::SizeVector({1, 42, 16,  1}),
+                              InferenceEngine::SizeVector({1, 42,  1, 64}),
+                              InferenceEngine::SizeVector({1,  1, 16, 64})),
+            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+            CodegenAdd::getTestCaseName);
+}  // namespace
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_bert.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_bert.cpp
@ -0,0 +1,26 @@
+
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/codegen_bert.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+    const std::vector<InferenceEngine::Precision> netPrecisions = {
+            InferenceEngine::Precision::FP32
+    };
+
+    INSTANTIATE_TEST_CASE_P(NoReshape, CodegenBert,
+            ::testing::Combine(
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::Values(InferenceEngine::SizeVector({1, 42, 16, 64})),
+            ::testing::Values(InferenceEngine::SizeVector({1, 42, 64, 64})),
+            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+            CodegenBert::getTestCaseName);
+}  // namespace
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_gelu.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_gelu.cpp
@ -0,0 +1,26 @@
+
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/codegen_gelu.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+    const std::vector<InferenceEngine::Precision> netPrecisions = {
+            InferenceEngine::Precision::FP32
+    };
+
+    INSTANTIATE_TEST_CASE_P(NoReshape, CodegenGelu,
+            ::testing::Combine(
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::Values(InferenceEngine::SizeVector({1, 384, 4096})),
+            ::testing::Values(true, false),
+            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+            CodegenGelu::getTestCaseName);
+}  // namespace
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_add.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_add.hpp
@ -0,0 +1,34 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace LayerTestsDefinitions {
+
+typedef std::tuple<
+        InferenceEngine::Precision,  // Network Precision
+        InferenceEngine::SizeVector, // Input 0 Shape
+        InferenceEngine::SizeVector, // Input 1 Shape
+        std::string                  // Target Device
+> multiInputParams;
+
+class CodegenAdd : public testing::WithParamInterface<LayerTestsDefinitions::multiInputParams>,
+public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::multiInputParams> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_bert.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_bert.hpp
@ -0,0 +1,34 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace LayerTestsDefinitions {
+
+typedef std::tuple<
+        InferenceEngine::Precision,  // Network Precision
+        InferenceEngine::SizeVector, // Input 0 Shape
+        InferenceEngine::SizeVector, // Input 1 Shape
+        std::string                  // Target Device
+> multiInputParams;
+
+class CodegenBert : public testing::WithParamInterface<LayerTestsDefinitions::multiInputParams>,
+public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::multiInputParams> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_gelu.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_gelu.hpp
@ -0,0 +1,34 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace LayerTestsDefinitions {
+
+typedef std::tuple<
+        InferenceEngine::Precision,  // Network Precision
+        InferenceEngine::SizeVector, // Input Shape,
+        bool,
+        std::string                  // Target Device
+> multiInputParams;
+
+class CodegenGelu : public testing::WithParamInterface<LayerTestsDefinitions::multiInputParams>,
+public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::multiInputParams> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_add.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_add.cpp
@ -0,0 +1,60 @@
+
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+#include "subgraph_tests/codegen_add.hpp"
+
+namespace LayerTestsDefinitions {
+
+    std::string CodegenAdd::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::multiInputParams> obj) {
+        InferenceEngine::Precision netPrecision;
+        InferenceEngine::SizeVector inputShapes0, inputShapes1, newInputShapes;
+        std::string targetDevice;
+        std::tie(netPrecision, inputShapes0, inputShapes1, targetDevice) = obj.param;
+
+        std::ostringstream result;
+        result << "IS[0]=" << CommonTestUtils::vec2str(inputShapes0) << "_";
+        result << "IS[1]=" << CommonTestUtils::vec2str(inputShapes1) << "_";
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice;
+        return result.str();
+    }
+
+    void CodegenAdd::SetUp() {
+        std::vector<size_t> inputShape0, inputShape1;
+        InferenceEngine::Precision netPrecision;
+        std::tie(netPrecision, inputShape0, inputShape1, targetDevice) = this->GetParam();
+
+        auto input0 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{inputShape0});
+        auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{inputShape1});
+
+        auto add = std::make_shared<ngraph::opset1::Add>(input0, input1);
+        auto neg = std::make_shared<ngraph::opset1::Negative>(add);
+        auto result = std::make_shared<ngraph::opset1::Result>(neg);
+
+        function = std::make_shared<ngraph::Function>(
+            ngraph::ResultVector{result},
+            ngraph::ParameterVector{input0, input1},
+            "CodegenAdd");
+    }
+
+TEST_P(CodegenAdd, CompareWithRefImpl) {
+    Run();
+};
+
+}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_bert.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_bert.cpp
@ -0,0 +1,76 @@
+
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+#include "subgraph_tests/codegen_bert.hpp"
+
+namespace LayerTestsDefinitions {
+
+    std::string CodegenBert::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::multiInputParams> obj) {
+        InferenceEngine::Precision netPrecision;
+        InferenceEngine::SizeVector inputShapes0, inputShapes1, newInputShapes;
+        std::string targetDevice;
+        std::tie(netPrecision, inputShapes0, inputShapes1, targetDevice) = obj.param;
+
+        std::ostringstream result;
+        result << "IS[0]=" << CommonTestUtils::vec2str(inputShapes0) << "_";
+        result << "IS[1]=" << CommonTestUtils::vec2str(inputShapes1) << "_";
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice;
+        return result.str();
+    }
+
+    // the simplest possible eltwise operation with streaming access to the data
+    void CodegenBert::SetUp() {
+        std::vector<size_t> inputShape0, inputShape1;
+        InferenceEngine::Precision netPrecision;
+        std::tie(netPrecision, inputShape0, inputShape1, targetDevice) = this->GetParam();
+
+        auto shape = ngraph::Shape{inputShape0};
+        auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, shape);
+        auto input2 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, shape);
+
+        auto shapeMM = ngraph::Shape{inputShape1};
+        auto input3 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, shapeMM);
+
+        auto add    = std::make_shared<ngraph::opset1::Add>(input1, input2);
+        auto mm     = std::make_shared<ngraph::opset1::MatMul>(add, input3);
+
+        std::vector<float> vals(ngraph::shape_size(shape));
+        for (int i = 0; i < vals.size(); i++) {
+            vals[i] = static_cast<float>(i)*vals.size();
+        }
+
+        auto c0 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, shape);
+        auto add2    = std::make_shared<ngraph::opset1::Subtract>(mm, c0);
+
+        auto add3    = std::make_shared<ngraph::opset1::Multiply>(add, add2);
+        auto result = std::make_shared<ngraph::opset1::Result>(add3);
+
+        function = std::make_shared<ngraph::Function>(
+            ngraph::ResultVector{result},
+            // it should be some topological order to pass parameters for reference code to be executed correctly
+            ngraph::ParameterVector{input1, input2, c0, input3},
+            "CodegenBert");
+    }
+
+TEST_P(CodegenBert, CompareWithRefImpl) {
+    Run();
+};
+
+}  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_gelu.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_gelu.cpp
@ -0,0 +1,74 @@
+
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+#include "subgraph_tests/codegen_gelu.hpp"
+
+#include <ngraph/pass/constant_folding.hpp>
+#include <ngraph/pass/visualize_tree.hpp>
+
+#include <transformations/init_node_info.hpp>
+#include <transformations/utils/utils.hpp>
+
+namespace LayerTestsDefinitions {
+
+    std::string CodegenGelu::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::multiInputParams> obj) {
+        InferenceEngine::Precision netPrecision;
+        InferenceEngine::SizeVector inputShapes0, newInputShapes;
+        bool useSubgraph;
+        std::string targetDevice;
+        std::tie(netPrecision, inputShapes0, useSubgraph, targetDevice) = obj.param;
+
+        std::ostringstream result;
+        result << "IS[0]=" << CommonTestUtils::vec2str(inputShapes0) << "_";
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "overSnippet=" << (useSubgraph ? "yes" : "no") << "_";
+        result << "targetDevice=" << targetDevice;
+        return result.str();
+    }
+
+    // Gelu from bert-large-uncased-whole-word-masking-squad-fp32-onnx-0001
+    void CodegenGelu::SetUp() {
+        std::vector<size_t> inputShape0;
+        InferenceEngine::Precision netPrecision;
+        bool useSubgraph;
+        std::tie(netPrecision, inputShape0, useSubgraph, targetDevice) = this->GetParam();
+
+        auto input0 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{inputShape0});
+        auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{inputShape0});
+        auto add = std::make_shared<ngraph::opset1::Add>(input0, input1);
+
+        auto gelu = std::make_shared<ngraph::opset2::Gelu>(add);
+        auto result = std::make_shared<ngraph::opset1::Result>(gelu);
+
+        function = std::make_shared<ngraph::Function>(
+            ngraph::ResultVector{result},
+            ngraph::ParameterVector{input0, input1},
+            "CodegenGelu");
+
+        if (useSubgraph) {
+            ngraph::pass::InitNodeInfo().run_on_function(function);
+            ngraph::pass::ConstantFolding().run_on_function(function);
+        }
+    }
+
+TEST_P(CodegenGelu, CompareWithRefImpl) {
+    Run();
+};
+
+}  // namespace LayerTestsDefinitions