[Snippets] Removed limitation on Subgraph creation after Parameters (#13893)

2022-12-26 11:58:13 +01:00
parent 0ce82204bb
commit ba4edc08d9
53 changed files with 381 additions and 395 deletions
--- a/src/plugins/intel_cpu/src/graph.cpp
+++ b/src/plugins/intel_cpu/src/graph.cpp
@@ -1603,7 +1603,7 @@ void Graph::EnforceBF16() {
                    // Concatenation node is exception because it doesn't change an accuracy for BF16 activation
                      node->getType() != Type::Concatenation) &&
                    // exclude Eltwise after Input since it supports conversion to BF16
-                    !(parent->getType() == Type::Input && node->getType() == Type::Eltwise) &&
+                    !(parent->getType() == Type::Input && (node->getType() == Type::Eltwise || node->getType() == Type::Subgraph)) &&
                    node->getOriginalInputPrecisionAtPort(i) == Precision::FP32)
                    node->setOriginalInputPrecisionAtPort(i, Precision::BF16);
            }
--- a/src/plugins/intel_cpu/src/ngraph_transformations/snippets_mark_skipped.cpp
+++ b/src/plugins/intel_cpu/src/ngraph_transformations/snippets_mark_skipped.cpp
@@ -433,10 +433,7 @@ bool SnippetsMarkSkipped::run_on_model(const std::shared_ptr<ov::Model> &m) {
    for (auto &node : m->get_ordered_ops()) {
        if (ngraph::op::is_constant(node))
            continue;
-
-        if (ngraph::op::is_parameter(node)) {
-            SetNodeFusingType(node, NodeFusingType::IgnoredAfterInputs);
-        } else if (isSuitableConvolutionParent(node)) {
+        if (isSuitableConvolutionParent(node)) {
            // Initiate fusing chain
            SetNodeFusingType(node, NodeFusingType::FusedWithConvolution);
            channelAxis = DEFAULT_AXIS;
@@ -490,12 +487,6 @@ bool SnippetsMarkSkipped::run_on_model(const std::shared_ptr<ov::Model> &m) {
                    NodeFusingType updatedChainType = fusingChainType;
                    if (isSuitableChildForFusingMatMul(node, isExecutedInINT8, updatedChainType, channelAxis))
                        PropagateIfHasOnlyChild(node, updatedChainType);
-                } else if (fusingChainType == NodeFusingType::IgnoredAfterInputs && (snippets::pass::AppropriateForSubgraph(node) ||
-                            ov::is_type<ngraph::op::v0::Convert>(node) || ov::is_type<ngraph::op::v1::Transpose>(node))) {
-                    // In OV_API 2.0 after Input node with I8/U8 precisions incerts Convert node, moreother on TF models inserts
-                    // Transpose layer. These brakes an idea to leave Eltwise node with I8/U8 inputs and FP32 outputs instead of Subgrath node
-                    // TODO Remove an additional check on Convert/Transpose here after enabling Subgraths with I8/U8 inputs and FP32 outputs
-                    SetNodeFusingType(node, NodeFusingType::IgnoredAfterInputs);
                }
            }
        }
--- a/src/plugins/intel_cpu/src/ngraph_transformations/snippets_mark_skipped.hpp
+++ b/src/plugins/intel_cpu/src/ngraph_transformations/snippets_mark_skipped.hpp
@@ -37,7 +37,7 @@ enum class NodeFusingType : int64_t {
    NotSet,
    FusedTerminator,
    FusedWithConvolution,  FusedWithBinaryConvolution, FusedWithConvolutionSumActivation,
-    FusedWithMatMul, FusedWithMatMulI8, FusedWithReduce, FusedWithMisc, IgnoredAfterInputs};
+    FusedWithMatMul, FusedWithMatMulI8, FusedWithReduce, FusedWithMisc};

 }   // namespace intel_cpu
 }   // namespace ov
--- a/src/plugins/intel_cpu/src/utils/rt_info/memory_formats_attribute.cpp
+++ b/src/plugins/intel_cpu/src/utils/rt_info/memory_formats_attribute.cpp
@@ -19,7 +19,7 @@ std::string getInputMemoryFormats(const std::shared_ptr<ngraph::Node>& node) {
    auto it_info = node->get_rt_info().find(InputMemoryFormats::get_type_info_static());
    if (it_info != node->get_rt_info().end()) {
        if (it_info->second.is<InputMemoryFormats>()) {
-            return it_info->second.as<InputMemoryFormats>().getMemoryFormats();
+            return it_info->second.as<InputMemoryFormats>().to_string();
        }
    }
    return {};
@@ -31,7 +31,7 @@ std::string getOutputMemoryFormats(const std::shared_ptr<ngraph::Node>& node) {
    auto it_info = node->get_rt_info().find(OutputMemoryFormats::get_type_info_static());
    if (it_info != node->get_rt_info().end()) {
        if (it_info->second.is<OutputMemoryFormats>()) {
-            return it_info->second.as<OutputMemoryFormats>().getMemoryFormats();
+            return it_info->second.as<OutputMemoryFormats>().to_string();
        }
    }
    return {};
--- a/src/plugins/intel_cpu/src/utils/rt_info/memory_formats_attribute.hpp
+++ b/src/plugins/intel_cpu/src/utils/rt_info/memory_formats_attribute.hpp
@@ -25,7 +25,7 @@ protected:
 public:
    MemoryFormats() = default;
    explicit MemoryFormats(const std::string &_memory_format) : memory_format(_memory_format) {}
-    std::string getMemoryFormats() const { return memory_format; }
+    std::string to_string() const override { return memory_format; };
    bool is_copyable(const std::shared_ptr<ov::Node>& to) const override {
        return (!ov::op::util::is_constant(to));
    }
@@ -36,7 +36,7 @@ public:
        for (auto &node : nodes) {
            auto it_info = node->get_rt_info().find(MemoryFormat::get_type_info_static());
            if (it_info != node->get_rt_info().end()) {
-                std::string mem_format = it_info->second.template as<MemoryFormat>().getMemoryFormats();
+                std::string mem_format = it_info->second.template as<MemoryFormat>().to_string();
                if (!mem_format.empty()) {
                    unique_mem_format.insert(mem_format);
                }
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/concat_in_place.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/concat_in_place.cpp
@@ -128,7 +128,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["ADD_1"] = "ndef";
+        expectedPrecisions["ADD_1"] = netPrecision.name();
        expectedPrecisions["CONV_1"] = "BF16";
        expectedPrecisions["CONV_2"] = "BF16";
    }
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/conv_conv.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/conv_conv.cpp
@@ -98,7 +98,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["ADD_1"] = "ndef";
+        expectedPrecisions["ADD_1"] = netPrecision.name();
        expectedPrecisions["CONV_1"] = "BF16";
        expectedPrecisions["CONV_2"] = "BF16";
    }
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/conv_dwconv_relu.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/conv_dwconv_relu.cpp
@@ -117,7 +117,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["ADD_1"] = "ndef";
+        expectedPrecisions["ADD_1"] = netPrecision.name();
        expectedPrecisions["CONV_1"] = "BF16";
        expectedPrecisions["RELU"] = "ndef";
    }
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/faster_100_5_1_1_conv.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/faster_100_5_1_1_conv.cpp
@@ -105,7 +105,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["Add_4"] = "ndef";
+        expectedPrecisions["Add_4"] = netPrecision.name();
        expectedPrecisions["Convolution_6"] = "BF16";
    }
 };
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_eltwise_conv.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_eltwise_conv.cpp
@@ -123,7 +123,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["ADD_1"] = "ndef";
+        expectedPrecisions["ADD_1"] = netPrecision.name();
        expectedPrecisions["CONV_1"] = "BF16";
        expectedPrecisions["CONV_2"] = "BF16";
        expectedPrecisions["ELT_1"] = "ndef";
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp
@@ -127,7 +127,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["ADD_1"] = "ndef";
+        expectedPrecisions["ADD_1"] = netPrecision.name();
        expectedPrecisions["CONV_1"] = "BF16";
        expectedPrecisions["CONV_2"] = "BF16";
        expectedPrecisions["RELU_1"] = "ndef";
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp
@@ -121,7 +121,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["ADD_1"] = "ndef";
+        expectedPrecisions["ADD_1"] = netPrecision.name();
        expectedPrecisions["CONV_1"] = "BF16";
        expectedPrecisions["ELT_1"] = "ndef";
    }
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_elu_conv.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_elu_conv.cpp
@@ -109,7 +109,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["ADD_1"] = "ndef";
+        expectedPrecisions["ADD_1"] = netPrecision.name();
        expectedPrecisions["CONV_1"] = "BF16";
        expectedPrecisions["CONV_2"] = "BF16";
    }
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_relu.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_relu.cpp
@@ -97,7 +97,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["ADD_1"] = "ndef";
+        expectedPrecisions["ADD_1"] = netPrecision.name();
        expectedPrecisions["CONV_1"] = "BF16";
        expectedPrecisions["RELU_1"] = "ndef";
    }
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_concat_relu.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_concat_relu.cpp
@@ -114,7 +114,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["ADD_1"] = "ndef";
+        expectedPrecisions["ADD_1"] = netPrecision.name();
        expectedPrecisions["CONV_1"] = "BF16";
        expectedPrecisions["CONV_2"] = "BF16";
    }
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_eltwise.cpp
@@ -106,7 +106,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["ADD_1"] = "ndef";
+        expectedPrecisions["ADD_1"] = netPrecision.name();
        expectedPrecisions["CONV_1"] = "BF16";
        expectedPrecisions["CONV_2"] = "BF16";
        expectedPrecisions["ELT_1"] = "ndef";
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp
@@ -106,7 +106,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["ADD_1"] = "ndef";
+        expectedPrecisions["ADD_1"] = netPrecision.name();
        expectedPrecisions["CONV_1"] = "BF16";
        expectedPrecisions["CONV_2"] = "BF16";
        expectedPrecisions["ELT_1"] = "ndef";
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp
@@ -110,7 +110,7 @@ protected:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
        expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["ADD_2"] = "ndef";
+        expectedPrecisions["ADD_2"] = netPrecision.name();
        expectedPrecisions["CONV_2"] = "BF16";
        expectedPrecisions["ELT_1"] = "ndef";
    }
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x3_eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x3_eltwise.cpp
@@ -141,7 +141,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["Add_1"] = "ndef";
+        expectedPrecisions["Add_1"] = netPrecision.name();
        expectedPrecisions["Convolution_1"] = "BF16";
        expectedPrecisions["Convolution_2"] = "BF16";
        expectedPrecisions["ELT_1"] = "ndef";
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp
@@ -128,10 +128,9 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["Add_1"] = "ndef";
-        expectedPrecisions["Add_2"] = "ndef";
+        expectedPrecisions["Add_2"] = netPrecision.name();
        expectedPrecisions["Convolution_1"] = "BF16";
-        expectedPrecisions["ELT_1"] = "ndef";
+        expectedPrecisions["ELT_1"] = netPrecision.name();
    }
 };

--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp
@@ -147,9 +147,9 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["Add_1"] = "ndef";
+        expectedPrecisions["Add_1"] = netPrecision.name();
        expectedPrecisions["Convolution_1"] = "BF16";
-        expectedPrecisions["Add_2"] = "ndef";
+        expectedPrecisions["Add_2"] = netPrecision.name();
        expectedPrecisions["ELT_1"] = "ndef";
        expectedPrecisions["RELU_1"] = "ndef";
    }
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/tail_fp32_optimization.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/tail_fp32_optimization.cpp
@@ -112,7 +112,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["Add_4"] = "ndef";
+        expectedPrecisions["Add_4"] = netPrecision.name();
        expectedPrecisions["Convolution_6"] = "BF16";
    }
 };
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/topk_inputs_i32.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/topk_inputs_i32.cpp
@@ -132,7 +132,7 @@ protected:
        // STAGE2:
        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
        // performance counters
-        expectedPrecisions["Add_4"] = "ndef";
+        expectedPrecisions["Add_4"] = netPrecision.name();
        expectedPrecisions["Convolution_1"] = "BF16";
        expectedPrecisions["Convolution_2"] = "BF16";
        expectedPrecisions["TopK_1"] = netPrecision.name(); // tail kept in FP32 precision
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/execution_graph_tests/runtime_precision.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/execution_graph_tests/runtime_precision.cpp
@@ -19,8 +19,8 @@ const std::vector<RuntimePrecisionSpecificParams> params = {
        {makeEltwiseFunction, {Precision::BF16, Precision::BF16}, {{"Eltwise", Precision::BF16}}},
        {makeEltwiseFunction, {Precision::U8, Precision::U8}, {{"Eltwise", Precision::U8}}},
        {makeEltwiseFunction, {Precision::I8, Precision::I8}, {{"Eltwise", Precision::I8}}},
-        {makeFakeQuantizeReluFunction, {Precision::FP32}, {{"FakeQuantize", Precision::FP32}, {"Relu_original", Precision::U8}}},
-        {makeFakeQuantizeReluFunction, {Precision::U8}, {{"FakeQuantize", Precision::U8}, {"Relu", Precision::U8}}},
+        {makeFakeQuantizeReluFunction, {Precision::FP32}, {{"Relu", Precision::FP32}}},
+        {makeFakeQuantizeReluFunction, {Precision::U8}, {{"Relu", Precision::U8}}},
        {makeFakeQuantizeBinaryConvolutionFunction, {Precision::FP32}, {{"FakeQuantize", Precision::FP32}, {"BinaryConvolution", Precision::BIN}}},
 };

--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp
@@ -51,6 +51,11 @@ const std::vector<FakeQuantizeTransformationParam> fakeQuantizeOnDataValues = {
        { 256ul, {}, { -127.5f }, { 0.f }, { -127.5f }, { 0.f } },
        "Pooling", "U8"
    },
+    // corner case: FQ with equal constant values
+    {
+        { 256ul, {}, { 0.f }, { 0.f }, { 0.f }, { 0.f } },
+        "Pooling", "U8"
+    },
    {
        { 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } },
        "Pooling", "U8"
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/multiply_transformation.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/multiply_transformation.cpp
@@ -15,6 +15,10 @@ const std::vector<ngraph::element::Type> netPrecisions = {
    //ngraph::element::f16
 };

+// If snippets fuse all operations into one subgraph node,
+// it's impossible to extract exec precision for the specific layer
+const auto precision_for_fused_cases = ov::element::undefined;
+
 const std::vector<LayerTestsDefinitions::MultiplyTestValues> params = {
    {
        false,
@@ -22,7 +26,7 @@ const std::vector<LayerTestsDefinitions::MultiplyTestValues> params = {
        false,
        { 256ul, ngraph::Shape {}, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
-        ngraph::element::f32,
+        precision_for_fused_cases,
        true
    },
    {
@@ -31,7 +35,7 @@ const std::vector<LayerTestsDefinitions::MultiplyTestValues> params = {
        false,
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
-        ngraph::element::i8,
+        precision_for_fused_cases,
        false
    },
    {
@@ -40,7 +44,7 @@ const std::vector<LayerTestsDefinitions::MultiplyTestValues> params = {
        false,
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
-        ngraph::element::u8,
+        precision_for_fused_cases,
        false
    },
    {
@@ -49,7 +53,7 @@ const std::vector<LayerTestsDefinitions::MultiplyTestValues> params = {
        false,
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
-        ngraph::element::u8,
+        precision_for_fused_cases,
        false
    },
    {
@@ -58,7 +62,7 @@ const std::vector<LayerTestsDefinitions::MultiplyTestValues> params = {
        false,
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
-        ngraph::element::i8,
+        precision_for_fused_cases,
        false
    },
    {
@@ -67,7 +71,7 @@ const std::vector<LayerTestsDefinitions::MultiplyTestValues> params = {
        true,
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
-        ngraph::element::i8,
+        precision_for_fused_cases,
        false
    },
    {
@@ -76,7 +80,7 @@ const std::vector<LayerTestsDefinitions::MultiplyTestValues> params = {
        false,
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
-        ngraph::element::u8,
+        precision_for_fused_cases,
        false
    },
    {
@@ -85,7 +89,7 @@ const std::vector<LayerTestsDefinitions::MultiplyTestValues> params = {
        true,
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.27f }, { 1.28f }, { -1.27f }, { 1.28f } },
        { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
-        ngraph::element::u8,
+        precision_for_fused_cases,
        false
    },
    { false, {}, false, {}, {}, ngraph::element::f32, false },
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/add.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/add.cpp
@@ -17,30 +17,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Eltwise, Add,
                ::testing::Values(ov::Shape {1, 42, 16, 64}),
                ::testing::Values(ov::Shape {1, 42, 16,  1}),
                ::testing::Values(ov::element::f32),
+                ::testing::Values(1),
                ::testing::Values(1), // one node - Add
-                ::testing::Values(0), // SnippetsMarkSkipped disables tokenization for eltwise chains after inputs
                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
        Add::getTestCaseName);

-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Eltwise, AddSinh,
-        ::testing::Combine(
-        ::testing::Values(ov::Shape {1, 42, 16, 64}),
-        ::testing::Values(ov::Shape {1, 42, 16,  1}),
-        ::testing::Values(ov::element::f32),
-        ::testing::Values(3), // Add + 2 sinh after inputs
-        ::testing::Values(1), // Subgraph is created, since the inputs are followed by converts
-        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                         AddSinh::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Eltwise, AddSinhConst,
-        ::testing::Combine(
-                ::testing::Values(ov::Shape {1, 42, 16, 64}),
-                ::testing::Values(ov::element::f32),
-                ::testing::Values(2), // Add + sinh after inputs
-                ::testing::Values(1), // Subgraph is created, since the inputs are followed by converts
-                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-        AddSinhConst::getTestCaseName);
-
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Eltwise, AddRollConst,
        ::testing::Combine(
                ::testing::Values(ov::Shape {1, 42, 16, 64}),
@@ -58,8 +39,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Eltwise_BF16, AddRollConst,
                ::testing::Values(1), // Subgraph is created, since the inputs are followed by converts
                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
        AddRollConst::getTestCaseName);
-
-
 }  // namespace
 } // namespace snippets
 } // namespace test
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/convert.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/convert.cpp
@@ -40,7 +40,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Convert, Convert,
                         ::testing::Combine(
                                 ::testing::ValuesIn(inputShapes_Convert),
                                 ::testing::ValuesIn(types_Convert),
-                                 ::testing::Values(2),
+                                 ::testing::Values(1),
                                 ::testing::Values(1),
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                         Convert::getTestCaseName);
@@ -67,7 +67,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertInput, ConvertInput,
                         ::testing::Combine(
                                 ::testing::ValuesIn(inputShapes_ConvertInput),
                                 ::testing::ValuesIn(types_ConvertInput),
-                                 ::testing::Values(3),
+                                 ::testing::Values(1),
                                 ::testing::Values(1),
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                         Convert::getTestCaseName);
@@ -76,7 +76,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertOutput, ConvertOutput,
                         ::testing::Combine(
                                 ::testing::ValuesIn(inputShapes_ConvertInput),
                                 ::testing::ValuesIn(types_ConvertInput),
-                                 ::testing::Values(3),
+                                 ::testing::Values(1),
                                 ::testing::Values(1),
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                         Convert::getTestCaseName);
@@ -85,7 +85,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertStub, ConvertStub,
                         ::testing::Combine(
                                 ::testing::ValuesIn(inputShapes_ConvertInput),
                                 ::testing::ValuesIn(types_ConvertInput),
-                                 ::testing::Values(4),
+                                 ::testing::Values(2),
                                 ::testing::Values(2),
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                         Convert::getTestCaseName);
@@ -104,7 +104,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertPartialInputsAndResults, ConvertP
                         ::testing::Combine(
                                 ::testing::ValuesIn(inputShapes_ConvertPartialInputsAndResults),
                                 ::testing::ValuesIn(types_ConvertPartialInputsAndResults),
-                                 ::testing::Values(6),
+                                 ::testing::Values(2), // subgraph & roll after subgraph
                                 ::testing::Values(1),
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                         Convert::getTestCaseName);
@@ -119,7 +119,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertManyOnInputs, ConvertManyOnInputs
                         ::testing::Combine(
                                 ::testing::Values(std::vector<ov::Shape>{ov::Shape{5, 5, 5, 5}}),
                                 ::testing::ValuesIn(types_ConvertMany),
-                                 ::testing::Values(2),
+                                 ::testing::Values(1),
                                 ::testing::Values(1),
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                         Convert::getTestCaseName);
@@ -128,7 +128,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertManyOnOutputs, ConvertManyOnOutpu
                         ::testing::Combine(
                                 ::testing::Values(std::vector<ov::Shape>{ov::Shape{5, 5, 5, 5}}),
                                 ::testing::ValuesIn(types_ConvertMany),
-                                 ::testing::Values(2), // sinh + subgraph
+                                 ::testing::Values(1),
                                 ::testing::Values(1),
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                         Convert::getTestCaseName);
@@ -142,7 +142,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ConvertManyOnInputOutput, ConvertManyOnI
                         ::testing::Combine(
                                 ::testing::Values(std::vector<ov::Shape>{ov::Shape{5, 5, 5, 5}}),
                                 ::testing::ValuesIn(types_ConvertManyIO),
-                                 ::testing::Values(2), // sinh + subgraph
+                                 ::testing::Values(1),
                                 ::testing::Values(1),
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                         Convert::getTestCaseName);
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/eltwise_two_results.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/eltwise_two_results.cpp
@@ -14,7 +14,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Eltwise, EltwiseTwoResults,
                        ::testing::Combine(
                             ::testing::Values(ov::Shape {1, 64, 10, 10}),
                             ::testing::Values(ov::Shape {1, 64, 10,  1}),
-                             ::testing::Values(4),
+                             ::testing::Values(2),
                             ::testing::Values(2),
                             ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                         EltwiseTwoResults::getTestCaseName);
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/max_num_params_eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/max_num_params_eltwise.cpp
@@ -12,13 +12,13 @@ namespace {
 // Note that we need these shapes to cover all cases of code emission (none/one/multiple of scalar/vector tiles)
 std::vector<ov::Shape> input_shapes {{1, 64, 10, 10}, {1, 1, 17, 37}, {1, 1, 1, 1}, {1, 1, 1, 7},
                                    {1, 1, 1, 128}, {1, 1, 1, 14}, {1, 1, 1, 16}, {1, 1, 1, 30}};
-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Eltwise, MaxNumParamsEltwiseSinh,
-                     ::testing::Combine(
+INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Eltwise, MaxNumParamsEltwise,
+                         ::testing::Combine(
                             ::testing::ValuesIn(input_shapes),
-                             ::testing::Values(12), // 10 Sinh after inputs + Subgraph + Concat
+                             ::testing::Values(2), // Subgraph + Concat
                             ::testing::Values(1),
                             ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                         MaxNumParamsEltwiseSinh::getTestCaseName);
+                         MaxNumParamsEltwise::getTestCaseName);

 }  // namespace
 } // namespace snippets
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/three_inputs_eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/three_inputs_eltwise.cpp
@@ -15,21 +15,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Eltwise, ThreeInputsEltwise,
                             ::testing::Values(ov::Shape {1, 64, 10, 10}),
                             ::testing::Values(ov::Shape {1, 64, 10,  1}),
                             ::testing::Values(ov::Shape {1, 1, 1,  10}),
-                             ::testing::Values(2), // eltwises fuse only for non-broadcasted shapes
-                             ::testing::Values(0), // SnippetsMarkSkipped disables tokenization for eltwise chains after inputs
+                             ::testing::Values(1), // eltwises fuse only for non-broadcasted shapes
+                             ::testing::Values(1),
                             ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                         ThreeInputsEltwise::getTestCaseName);

-INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Eltwise, ThreeInputsEltwiseSinh,
-        ::testing::Combine(
-        ::testing::Values(ov::Shape {1, 64, 10, 10}),
-        ::testing::Values(ov::Shape {1, 64, 10,  1}),
-        ::testing::Values(ov::Shape {1, 1, 1,  10}),
-        ::testing::Values(4), // Subgraph + 3 converts after inputs
-        ::testing::Values(1), // Subgraph is created, since the inputs are followed by converts
-        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                         ThreeInputsEltwiseSinh::getTestCaseName);
-
 }  // namespace
 } // namespace snippets
 } // namespace test
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/two_inputs_and_outputs.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/two_inputs_and_outputs.cpp
@@ -34,7 +34,7 @@ const std::vector<std::vector<ov::Shape>> input_shapes = {
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Eltwise, TwoInputsAndOutputs,
                         ::testing::Combine(
                             ::testing::ValuesIn(input_shapes),
-                             ::testing::Values(4),
+                             ::testing::Values(2),
                             ::testing::Values(1),
                             ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                         TwoInputsAndOutputs::getTestCaseName);
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/activation.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/activation.cpp
@@ -156,7 +156,6 @@ std::vector<Precision> netPrc = {

 /* ============= Activation (1D) ============= */
 std::vector<CPUSpecificParams> cpuParams_3D = {
-        CPUSpecificParams({nCw16c}, {nCw16c}, {}, {}),
        CPUSpecificParams({nwc}, {nwc}, {}, {}),
        CPUSpecificParams({ncw}, {ncw}, {}, {})
 };
@@ -178,6 +177,27 @@ const auto basicCases3D = ::testing::Combine(

 INSTANTIATE_TEST_SUITE_P(smoke_Activation3D_Eltwise_CPU_BF16, ActivationLayerCPUTest, basicCases3D, ActivationLayerCPUTest::getTestCaseName);

+const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes_blocked = {
+        {Mish,        {{}}},
+        {SoftSign,    {{}}}
+};
+
+std::vector<CPUSpecificParams> cpuParams_3D_blocked = {
+        CPUSpecificParams({nCw16c}, {nCw16c}, {}, {}),
+};
+
+const auto blockedCases3D = ::testing::Combine(
+        ::testing::ValuesIn(static_shapes_to_test_representation(basic3D)),
+        ::testing::Values(activationShapes),
+        ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes_blocked)),
+        ::testing::ValuesIn(netPrc),
+        ::testing::Values(Precision::FP32),
+        ::testing::Values(Precision::FP32),
+        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_3D_blocked))
+);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Activation3D_Eltwise_CPU_BF16_Blocked, ActivationLayerCPUTest, blockedCases3D, ActivationLayerCPUTest::getTestCaseName);
+
 /* ============= Activation (2D) ============= */
 std::vector<CPUSpecificParams> cpuParams_4D = {
        CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}),
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/conversion.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/conversion.cpp
@@ -52,8 +52,18 @@ protected:
        std::tie(shapes, inPrc, outPrc, cpuParams) = GetParam();

        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+        auto primitive = selectedType;
+        if (primitive.empty())
+            primitive = getPrimitiveType();
+        // WA: I32 precision support disabled in snippets => primitive has to be changed
+        // TODO: remove the WA after I32 is supported in snippets (ticket: 99803)
+        if (inPrc == InferenceEngine::Precision::I32 || outPrc == InferenceEngine::Precision::I32)
+            primitive = "unknown";

-        selectedType = std::string("unknown_") + (inPrc == InferenceEngine::Precision::U8 ? "I8" : inPrc.name());
+        auto exec_type_precision = inPrc != InferenceEngine::Precision::U8
+                                       ? inPrc
+                                       : InferenceEngine::Precision(InferenceEngine::Precision::I8);
+        selectedType = makeSelectedTypeStr(primitive, InferenceEngine::details::convertPrecision(exec_type_precision));

        for (size_t i = 0; i < shapes.second.size(); i++) {
            targetStaticShapes.push_back(std::vector<ngraph::Shape>{shapes.second[i]});
@@ -112,12 +122,10 @@ private:
 TEST_P(ConvertCPULayerTest, CompareWithRefs) {
    run();

-    CheckPluginRelatedResults(compiledModel, "Convert");
+    CheckPluginRelatedResults(compiledModel, std::set<std::string>{"Convert", "Subgraph"});
 }

-std::vector<InputShape> inShapes_4D = {
-        {{1, 2, 3, 4}, {{1, 2, 3, 4}}},
-        {{1, 1, 1080, 1920}, {{1, 1, 1080, 1920}}},
+std::vector<InputShape> inShapes_4D_dynamic = {
        {
            // dynamic
            {{-1, -1, -1, -1}},
@@ -154,27 +162,69 @@ const std::vector<Precision> precisions_floating_point = {
        Precision::BF16
 };

-std::vector<CPUSpecificParams> memForm4D = {
-        CPUSpecificParams({nchw}, {nchw}, {}, {}),
-        CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
-        CPUSpecificParams({nChw8c}, {nChw8c}, {}, {}),
-        CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
+std::vector<CPUSpecificParams> memForm4D_dynamic = {
+    CPUSpecificParams({nchw}, {nchw}, {}, "unknown"),
+    CPUSpecificParams({nhwc}, {nhwc}, {}, "unknown"),
+    CPUSpecificParams({nChw8c}, {nChw8c}, {}, "unknown"),
+    CPUSpecificParams({nChw16c}, {nChw16c}, {}, "unknown")
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_Dynamic, ConvertCPULayerTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inShapes_4D_dynamic),
+                                ::testing::ValuesIn(precisions),
+                                ::testing::ValuesIn(precisions),
+                                ::testing::ValuesIn(memForm4D_dynamic)),
+                        ConvertCPULayerTest::getTestCaseName);
+
+std::vector<InputShape> inShapes_4D_static = {
+    {{1, 2, 3, 4}, {{1, 2, 3, 4}}},
+    {{1, 1, 1080, 1920}, {{1, 1, 1080, 1920}}},
+};
+
+std::vector<CPUSpecificParams> memForm4D_static_common = {
+    CPUSpecificParams({nchw}, {nchw}, {}, {}),
+    CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
 };

 INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest, ConvertCPULayerTest,
                        ::testing::Combine(
-                                ::testing::ValuesIn(inShapes_4D),
+                                ::testing::ValuesIn(inShapes_4D_static),
                                ::testing::ValuesIn(precisions),
                                ::testing::ValuesIn(precisions),
-                                ::testing::ValuesIn(memForm4D)),
+                                ::testing::ValuesIn(memForm4D_static_common)),
                        ConvertCPULayerTest::getTestCaseName);

-INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_BOOL, ConvertCPULayerTest,
+std::vector<InputShape> inShapes_4D_blocked = {
+    {{1, 16, 5, 5}, {{1, 16, 5, 5}}},
+};
+
+std::vector<CPUSpecificParams> memForm4D_static_blocked = {
+    CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_Blocked, ConvertCPULayerTest,
                        ::testing::Combine(
-                                ::testing::ValuesIn(inShapes_4D),
+                                ::testing::ValuesIn(inShapes_4D_blocked),
+                                ::testing::ValuesIn(precisions),
+                                ::testing::ValuesIn(precisions),
+                                ::testing::ValuesIn(filterCPUSpecificParams(memForm4D_static_blocked))),
+                        ConvertCPULayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_BOOL_Static, ConvertCPULayerTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inShapes_4D_static),
                                ::testing::ValuesIn(precisions_floating_point),
                                ::testing::Values(Precision::BOOL),
                                ::testing::Values(CPUSpecificParams({nchw}, {nchw}, {}, {}))),
                        ConvertCPULayerTest::getTestCaseName);

+INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest_BOOL_Dynamic, ConvertCPULayerTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inShapes_4D_dynamic),
+                                ::testing::ValuesIn(precisions_floating_point),
+                                ::testing::Values(Precision::BOOL),
+                                ::testing::Values(CPUSpecificParams({nchw}, {nchw}, {}, "unknown"))),
+                        ConvertCPULayerTest::getTestCaseName);
+
 } // namespace CPULayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/eltwise.cpp
@@ -169,7 +169,7 @@ private:

 TEST_P(EltwiseLayerCPUTest, CompareWithRefs) {
    run();
-    CheckPluginRelatedResults(compiledModel, "Eltwise");
+    CheckPluginRelatedResults(compiledModel, std::set<std::string>{"Eltwise", "Subgraph"});
 }

 namespace {
@@ -223,7 +223,7 @@ const std::vector<fusingSpecificParams> fusingParamsSet{
    // fake quantize
    fusingFakeQuantizePerTensorRelu,
    fusingFakeQuantizePerChannelRelu,
-    fusingFQPerChannelSigmoidFQPerChannel
+    fusingFQPerChannelSigmoidFQPerTensor
 };

 std::vector<std::vector<ov::Shape>> inShapes_4D = {
@@ -240,8 +240,8 @@ const auto params_4D = ::testing::Combine(
                ::testing::ValuesIn(secondaryInputTypes),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
@@ -262,8 +262,8 @@ const auto params_4D_fusing = ::testing::Combine(
                ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
                ::testing::ValuesIn(opTypes),
                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(cpuParams_4D),
@@ -278,8 +278,8 @@ const auto params_4D_emptyCPUSpec = ::testing::Combine(
                ::testing::ValuesIn(secondaryInputTypes),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::Values(emptyCPUSpec),
@@ -301,8 +301,8 @@ const auto params_5D = ::testing::Combine(
                ::testing::ValuesIn(secondaryInputTypes),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
@@ -317,8 +317,8 @@ const auto params_5D_emptyCPUSpec = ::testing::Combine(
                ::testing::ValuesIn(secondaryInputTypes),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::Values(emptyCPUSpec),
@@ -346,8 +346,8 @@ const auto params_5D_emptyCPUSpec_I32 = ::testing::Combine(
                ::testing::ValuesIn(secondaryInputTypes),
                ::testing::ValuesIn(opTypes),
                ::testing::Values(ElementType::i32),
-                ::testing::Values(ElementType::i32),
-                ::testing::Values(ElementType::i32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::Values(emptyCPUSpec),
@@ -372,8 +372,8 @@ const auto params_4D_Blocked_Planar = ::testing::Combine(
                ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_Blocked_Planar)),
@@ -398,8 +398,8 @@ const auto params_4D_Planar_Blocked = ::testing::Combine(
                ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_Planar_Blocked)),
@@ -424,8 +424,8 @@ const auto params_5D_Blocked_Planar = ::testing::Combine(
                ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_Blocked_Planar)),
@@ -450,8 +450,8 @@ const auto params_5D_Planar_Blocked = ::testing::Combine(
                ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_Planar_Blocked)),
@@ -478,8 +478,8 @@ const auto params_4D_1D_constant_mode = ::testing::Combine(
                ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_1D_Constant_mode)),
@@ -488,8 +488,6 @@ const auto params_4D_1D_constant_mode = ::testing::Combine(
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_1D_Constant, EltwiseLayerCPUTest, params_4D_1D_constant_mode, EltwiseLayerCPUTest::getTestCaseName);

 std::vector<CPUSpecificParams> cpuParams_4D_1D_Parameter_mode = {
-        CPUSpecificParams({nChw16c, x}, {nChw16c}, {}, {}),
-        CPUSpecificParams({nhwc, x}, {nhwc}, {}, {}),
        CPUSpecificParams({nchw, x}, {nchw}, {}, {})
 };

@@ -500,8 +498,8 @@ const auto params_4D_1D_parameter_mode = ::testing::Combine(
                ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_1D_Parameter_mode)),
@@ -527,8 +525,8 @@ const auto params_5D_1D_constant = ::testing::Combine(
                ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_1D_constant)),
@@ -537,8 +535,6 @@ const auto params_5D_1D_constant = ::testing::Combine(
 INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_1D_Constant, EltwiseLayerCPUTest, params_5D_1D_constant, EltwiseLayerCPUTest::getTestCaseName);

 std::vector<CPUSpecificParams> cpuParams_5D_1D_parameter = {
-        CPUSpecificParams({nCdhw16c, x}, {nCdhw16c}, {}, {}),
-        CPUSpecificParams({ndhwc, x}, {ndhwc}, {}, {}),
        CPUSpecificParams({ncdhw, x}, {ncdhw}, {}, {})
 };

@@ -549,8 +545,8 @@ const auto params_5D_1D_parameter = ::testing::Combine(
                ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_1D_parameter)),
@@ -602,8 +598,8 @@ const auto params_4D_dyn_const = ::testing::Combine(
                ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
@@ -641,8 +637,8 @@ const auto params_4D_dyn_param = ::testing::Combine(
                ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
@@ -682,8 +678,8 @@ const auto params_4D_dyn_param_fusing = ::testing::Combine(
                ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
                ::testing::ValuesIn(opTypes),
                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(cpuParams_4D),
@@ -713,8 +709,8 @@ const auto params_5D_dyn_const = ::testing::Combine(
                ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
@@ -752,8 +748,8 @@ const auto params_5D_dyn_param = ::testing::Combine(
                ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
                ::testing::ValuesIn(opTypes),
                ::testing::ValuesIn(netType),
-                ::testing::Values(ElementType::f32),
-                ::testing::Values(ElementType::f32),
+                ::testing::Values(ov::element::undefined),
+                ::testing::Values(ov::element::undefined),
                ::testing::Values(CommonTestUtils::DEVICE_CPU),
                ::testing::Values(additional_config)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
--- a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp
+++ b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp
@@ -115,9 +115,8 @@ std::string CPUTestsBase::impls2str(const std::vector<std::string> &priority) {
    return str;
 }

-void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, const std::string& nodeType) const {
-    if (!execNet) return;
-    if (nodeType.empty()) return;
+void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, const std::set<std::string>& nodeType) const {
+    if (!execNet || nodeType.empty()) return;

    ASSERT_TRUE(!selectedType.empty()) << "Node type is not defined.";
    InferenceEngine::CNNNetwork execGraphInfo = execNet.GetExecGraphInfo();
@@ -125,16 +124,23 @@ void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork
    CheckPluginRelatedResultsImpl(function, nodeType);
 }

-void CPUTestsBase::CheckPluginRelatedResults(const ov::CompiledModel &execNet, const std::string& nodeType) const {
-    if (!execNet) return;
-    if (nodeType.empty()) return;
+void CPUTestsBase::CheckPluginRelatedResults(const ov::CompiledModel &execNet, const std::set<std::string>& nodeType) const {
+    if (!execNet || nodeType.empty()) return;

    ASSERT_TRUE(!selectedType.empty()) << "Node type is not defined.";
    auto function = execNet.get_runtime_model();
    CheckPluginRelatedResultsImpl(function, nodeType);
 }

-void CPUTestsBase::CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov::Model>& function, const std::string& nodeType) const {
+void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, const std::string& nodeType) const {
+    CheckPluginRelatedResults(execNet, std::set<std::string>{nodeType});
+}
+
+void CPUTestsBase::CheckPluginRelatedResults(const ov::CompiledModel &execNet, const std::string& nodeType) const {
+    CheckPluginRelatedResults(execNet, std::set<std::string>{nodeType});
+}
+
+void CPUTestsBase::CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov::Model>& function, const std::set<std::string>& nodeType) const {
    ASSERT_NE(nullptr, function);
    for (const auto &node : function->get_ops()) {
        const auto & rtInfo = node->get_rt_info();
@@ -161,7 +167,7 @@ void CPUTestsBase::CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov:
            return skip_unsquized_1D || permule_of_1;
        };

-        if (getExecValue(ExecGraphInfoSerialization::LAYER_TYPE) == nodeType) {
+        if (nodeType.count(getExecValue(ExecGraphInfoSerialization::LAYER_TYPE))) {
            ASSERT_LE(inFmts.size(), node->get_input_size());
            ASSERT_LE(outFmts.size(), node->get_output_size());
            for (int i = 0; i < inFmts.size(); i++) {
@@ -212,7 +218,6 @@ void CPUTestsBase::CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov:

                if (should_be_skipped(shape, outFmts[i]))
                    continue;
-
                ASSERT_EQ(outFmts[i], cpu_str2fmt(actualOutputMemoryFormats[i].c_str()));
            }

--- a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp
+++ b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp
@@ -136,13 +136,15 @@ public:
                                                         const std::shared_ptr<ngraph::Node> &lastNode,
                                                         std::string name);

+    void CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, const std::set<std::string>& nodeType) const;
+    void CheckPluginRelatedResults(const ov::CompiledModel &execNet, const std::set<std::string>& nodeType) const;
    void CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, const std::string& nodeType) const;
    void CheckPluginRelatedResults(const ov::CompiledModel &execNet, const std::string& nodeType) const;

    static const char* any_type;

 protected:
-    virtual void CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov::Model>& function, const std::string& nodeType) const;
+    virtual void CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov::Model>& function, const std::set<std::string>& nodeType) const;
    /**
     * @brief This function modifies the initial single layer test graph to add any necessary modifications that are specific to the cpu test scope.
     * @param ngPrc Graph precision.
--- a/src/plugins/intel_cpu/tests/functional/test_utils/fusing_test_utils.cpp
+++ b/src/plugins/intel_cpu/tests/functional/test_utils/fusing_test_utils.cpp
@@ -36,7 +36,7 @@ CpuTestWithFusing::modifyGraph(const ngraph::element::Type &ngPrc, ngraph::Param
    return retNode;
 }

-void CpuTestWithFusing::CheckFusingResults(const std::shared_ptr<const ov::Model>& function, const std::string& nodeType) const {
+void CpuTestWithFusing::CheckFusingResults(const std::shared_ptr<const ov::Model>& function, const std::set<std::string>& nodeType) const {
    ASSERT_NE(nullptr, function);
    bool isNodeFound = false;
    for (const auto & op : function->get_ops()) {
@@ -49,22 +49,29 @@ void CpuTestWithFusing::CheckFusingResults(const std::shared_ptr<const ov::Model
        };

        auto layerType = getExecValue("layerType", rtInfo);
-        if (layerType == nodeType) {
+        if (nodeType.count(layerType)) {
            isNodeFound = true;
            auto originalLayersNames = getExecValue("originalLayersNames", rtInfo);
            std::string opFriendlyName = op->get_friendly_name();
-            auto pos = originalLayersNames.find(opFriendlyName);
-            ASSERT_TRUE(pos != std::string::npos) << "Operation name " << op->get_friendly_name() << " has not been found in originalLayersNames!";
+            ASSERT_TRUE(originalLayersNames.find(opFriendlyName) != std::string::npos)
+                << "Operation name " << opFriendlyName << " has not been found in originalLayersNames!";
+
+            size_t pos = 0;
            for (const auto& fusedOp : fusedOps) {
                pos = originalLayersNames.find(fusedOp, checkFusingPosition ? pos : 0);
                ASSERT_TRUE(pos != std::string::npos) << "Fused op " << fusedOp << " has not been found!";
            }
        }
    }
-    ASSERT_TRUE(isNodeFound) << "Node type name: \"" << nodeType << "\" has not been found.";
+    std::stringstream error_message;
+    error_message << "Node with types \"";
+    for (const auto& elem : nodeType)
+        error_message << elem << ", ";
+    error_message << "\" wasn't found";
+    ASSERT_TRUE(isNodeFound) << error_message.str();
 }

-void CpuTestWithFusing::CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov::Model>& function, const std::string& nodeType) const {
+void CpuTestWithFusing::CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov::Model>& function, const std::set<std::string>& nodeType) const {
    CPUTestsBase::CheckPluginRelatedResultsImpl(function, nodeType);
    CheckFusingResults(function, nodeType);
 }
--- a/src/plugins/intel_cpu/tests/functional/test_utils/fusing_test_utils.hpp
+++ b/src/plugins/intel_cpu/tests/functional/test_utils/fusing_test_utils.hpp
@@ -72,10 +72,10 @@ protected:
                                              ngraph::ParameterVector &params,
                                              const std::shared_ptr<ngraph::Node> &lastNode) override;

-    void CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov::Model>& function, const std::string& nodeType) const override;
+    void CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov::Model>& function, const std::set<std::string>& nodeType) const override;

 private:
-    void CheckFusingResults(const std::shared_ptr<const ov::Model>& function, const std::string& nodeType) const;
+    void CheckFusingResults(const std::shared_ptr<const ov::Model>& function, const std::set<std::string>& nodeType) const;

 protected:
    std::shared_ptr<postOpMgr> postOpMgrPtr;
@@ -325,6 +325,28 @@ const auto fusingFQPerChannelSigmoidFQPerChannel = fusingSpecificParams{std::mak
            return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape);
        }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize", "Sigmoid", "FakeQuantize"}};

+const auto fusingFQPerChannelSigmoidFQPerTensor = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+        {[](postNodeConfig& cfg){
+            auto localPrc = cfg.input->get_element_type();
+            auto shape = cfg.input->get_output_partial_shape(0);
+            if (shape.size() == 1)
+                IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
+            ngraph::Shape newShape(shape.size(), 1);
+            newShape[1] = shape[1].get_length();
+            return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape);
+        }, "FakeQuantize(PerChannel)"},
+        {[](postNodeConfig& cfg){
+            return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Sigmoid);
+        }, "Sigmoid"},
+        {[](postNodeConfig& cfg){
+            auto localPrc = cfg.input->get_element_type();
+            auto shape = cfg.input->get_output_partial_shape(0);
+            if (shape.size() == 1)
+                IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
+            ngraph::Shape newShape(shape.size(), 1);
+            return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape);
+        }, "FakeQuantize(PerTensor)"}}), {"FakeQuantize", "Sigmoid", "FakeQuantize"}};
+
 const auto fusingFakeQuantizePerTensorRelu = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
            {[](postNodeConfig& cfg) {
                auto localPrc = cfg.input->get_element_type();
--- a/src/plugins/intel_cpu/tests/unit/ngraph_transformations/snipptes_mark_skipped.cpp
+++ b/src/plugins/intel_cpu/tests/unit/ngraph_transformations/snipptes_mark_skipped.cpp
@@ -22,14 +22,6 @@ public:
    }
 };

-TEST_F(SnippetsMarkSkippedTests, smoke_Snippets_SkipAfterInputsEltwise) {
-    const auto &f = EltwiseFunction({{2, 3}, {1, 3}});
-    function = f.getOriginal();
-    // None subgraphs are expected, since the whole graph is an eltwise chain after input
-    function_ref = f.getOriginal();
-    run();
-}
-
 TEST_F(SnippetsMarkSkippedTests, smoke_Snippets_SkipAfterInputsMatMulEltwise) {
    const auto &f = MatMulEltwiseBranchesFunction(std::vector<Shape> {{1, 3, 4, 4}, {1, 3, 4, 4}});
    function = f.getOriginal();