[CPU] [DEBUG CAPS] Extension for snippets and other ngraph transformations (#14223)

2022-12-20 06:49:37 +01:00 · 2022-12-20 06:49:37 +01:00 · e306cbc67a
commit e306cbc67a
parent 40e19dec00
20 changed files with 1278 additions and 718 deletions
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@ -14,10 +14,11 @@
 #include "ie_parallel.hpp"
 #include "ie_system_conf.h"
-#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
+#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
 #include "openvino/core/type/element_type_traits.hpp"
 #include "openvino/runtime/properties.hpp"
-#include <cpu/x64/cpu_isa_traits.hpp>
+#include "utils/debug_capabilities.h"
 #include "cpu/x64/cpu_isa_traits.hpp"
 namespace ov {
 namespace intel_cpu {
@ -48,10 +49,24 @@ Config::Config() {
    if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16))
        enforceBF16 = false;
-    CPU_DEBUG_CAP_ENABLE(readDebugCapsProperties());
+    CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
    updateProperties();
 }
 #ifdef CPU_DEBUG_CAPS
 /**
 * Debug capabilities configuration has more priority than common one
 * Some of the debug capabilities also require to enable some of common
 * configuration properties
 */
 void Config::applyDebugCapsProperties() {
    // always enable perf counters for verbose mode and performance summary
    if (!debugCaps.verbose.empty() || !debugCaps.summaryPerf.empty())
        collectPerfCounters = true;
 }
 #endif
 void Config::readProperties(const std::map<std::string, std::string> &prop) {
    const auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys();
    const auto hintsConfigKeys = perfHintsConfig.SupportedKeys();
@ -184,7 +199,7 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
    if (exclusiveAsyncRequests)  // Exclusive request feature disables the streams
        streamExecutorConfig._streams = 1;
-    CPU_DEBUG_CAP_ENABLE(readDebugCapsProperties());
+    CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
    updateProperties();
 }
@ -239,58 +254,6 @@ void Config::updateProperties() {
    _config.insert({PluginConfigParams::KEY_CACHE_DIR, cache_dir});
 }
 #ifdef CPU_DEBUG_CAPS
 void Config::readDebugCapsProperties() {
    auto readEnv = [](const char* envVar) {
        return std::getenv(envVar);
    };
    auto parseDumpFormat = [](const std::string& format) {
        if (format == "BIN")
            return FORMAT::BIN;
        else if (format == "TEXT")
            return FORMAT::TEXT;
        else
            IE_THROW() << "readDebugCapsProperties: Unknown dump format";
    };
    const char* envVarValue = nullptr;
    if (envVarValue = readEnv("OV_CPU_EXEC_GRAPH_PATH"))
        execGraphPath = envVarValue;
    if (envVarValue = readEnv("OV_CPU_VERBOSE"))
        verbose = envVarValue;
    if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_DIR"))
        blobDumpDir = envVarValue;
    if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_FORMAT"))
        blobDumpFormat = parseDumpFormat(envVarValue);
    if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_EXEC_ID"))
        blobDumpFilters[BY_EXEC_ID] = envVarValue;
    if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_PORTS"))
        blobDumpFilters[BY_PORTS] = envVarValue;
    if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_TYPE"))
        blobDumpFilters[BY_TYPE] = envVarValue;
    if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_NAME"))
        blobDumpFilters[BY_NAME] = envVarValue;
    if (envVarValue = readEnv("OV_CPU_SUMMARY_PERF")) {
        collectPerfCounters = true;
        summaryPerf = envVarValue;
    }
    // always enable perf counters for verbose mode
    if (!verbose.empty())
        collectPerfCounters = true;
 }
 #endif // CPU_DEBUG_CAPS
 }   // namespace intel_cpu
 }   // namespace ov
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@ -6,8 +6,11 @@
 #include <threading/ie_istreams_executor.hpp>
 #include <ie_performance_hints.hpp>
-#include "utils/debug_capabilities.h"
+#include <ie/ie_common.h>
 #include <openvino/util/common_util.hpp>
 #include "utils/debug_caps_config.h"
 #include <bitset>
 #include <string>
 #include <map>
 #include <mutex>
@ -57,31 +60,12 @@ struct Config {
    std::map<std::string, std::string> _config;
 #ifdef CPU_DEBUG_CAPS
    enum FILTER {
        BY_PORTS,
        BY_EXEC_ID,
        BY_TYPE,
        BY_NAME,
    };
    enum class FORMAT {
        BIN,
        TEXT,
    };
    std::string execGraphPath;
    std::string verbose;
    std::string blobDumpDir = "cpu_dump";
    FORMAT blobDumpFormat = FORMAT::TEXT;
    // std::hash<int> is necessary for Ubuntu-16.04 (gcc-5.4 and defect in C++11 standart)
    std::unordered_map<FILTER, std::string, std::hash<int>> blobDumpFilters;
    std::string summaryPerf = "";
    void readDebugCapsProperties();
 #endif
    bool isNewApi = true;
 #ifdef CPU_DEBUG_CAPS
    DebugCapsConfig debugCaps;
    void applyDebugCapsProperties();
 #endif
 };
 }   // namespace intel_cpu
--- a/src/plugins/intel_cpu/src/docs/README.md
+++ b/src/plugins/intel_cpu/src/docs/README.md
@ -6,6 +6,7 @@ Use the following cmake option to enable debug capabilities:
 * [Verbose mode](verbose.md)
 * [Blob dumping](blob_dumping.md)
 * [Graph serialization](graph_serialization.md)
 * [Graph transformation disabling](feature_disabling.md#graph-transformations)
 ## Debug log
--- a/src/plugins/intel_cpu/src/docs/blob_dumping.md
+++ b/src/plugins/intel_cpu/src/docs/blob_dumping.md
@ -29,8 +29,8 @@ Default is *cpu_dump*
    OV_CPU_BLOB_DUMP_FORMAT=<format> binary ...
 ```
 Options are:
-* BIN (default)
+* BIN
-* TEXT
+* TEXT (default)
 ## Filter input / output blobs
 To dump only input / output blobs:
--- a/src/plugins/intel_cpu/src/docs/debug_caps_filters.md
+++ b/src/plugins/intel_cpu/src/docs/debug_caps_filters.md
@ -0,0 +1,52 @@
 # Filters
 Filters described below have the following common format:
 ```sh
    filter_name=<comma_separated_tokens>
 ```
 Tokens are processed from left to right and each one includes or excludes corresponding value.\
 For exclusion token is just prepended by minus: *-token*\
 All tokens are case insensitive and no tokens is treated as *all*\
 So filters below are equal:
 * filter_name
 * filter_name=all
 * filter_name=-all,ALL
 ## IR format filter
 IR format filter is used to specify output IR formats, e.g. for [serialization](graph_serialization.md#graph-transformations).
 ```sh
    formats=<comma_separated_tokens>
 ```
 The following tokens are supported:
 * all\
 equals to <xml,dot,svg>
 * xml (default)\
 IR in .xml file. Can be opened using, for example, *netron* app. (For now the option is Linux only)
 * xmlbin\
 IR in .xml and .bin files. Can be opened using, for example, *netron* app.
 * dot\
 IR in .dot file (.svg.dot file if svg is also specified). Can be inspected using, for example, *graphviz* tools.
 * svg\
 IR in .svg file. Requires *dot* tool to be installed on the host, not supported on Windows.\
 Generation is based on dot representation, so IR is additionally dumped to .svg.dot file.
 ## Transformation filter
 Transformation filter is used to specify main graph transformation stages for different purposes,
 e.g. for [disabling](feature_disabling.md#graph-transformation) or [serialization](graph_serialization.md#graph-transformations).
 ```sh
    transformations=<comma_separated_tokens>
 ```
 The following tokens are supported:
 * all (default)\
 equals to <preLpt,lpt,postLpt,snippets,specific>
 * common \
 equals to <preLpt,postLpt>
 * preLpt
 * lpt
 * postLpt
 * snippets
 * specific
--- a/src/plugins/intel_cpu/src/docs/feature_disabling.md
+++ b/src/plugins/intel_cpu/src/docs/feature_disabling.md
@ -0,0 +1,26 @@
 # Feature disabling
 Common way to disable something in CPU plugin is implied by means of environment variable **OV_CPU_DISABLE**:
 ```sh
    OV_CPU_DISABLE=<space_separated_options> binary ...
 ```
 Option names are case insensitive and processed from left to right,\
 so last one overwrites previous ones if duplicated.
 Examples:
 ```sh
    OV_CPU_DISABLE="transformations" binary ...
    OV_CPU_DISABLE="transformations=lpt" binary ...
    OV_CPU_DISABLE="transformations=all,-common" binary ...
 ```
 By means of corresponding options **OV_CPU_DISABLE** controls disabling of the following features:
 ## Graph transformations
 Graph transformation disabling is controlled by the following option inside **OV_CPU_DISABLE**:
 ```sh
 transformations=<comma_separated_tokens>
 ```
 Filter with main transformation stages to disable specified ones.\
 See [transformation filter](debug_caps_filters.md#transformation-filter) for more details.
--- a/src/plugins/intel_cpu/src/docs/graph_serialization.md
+++ b/src/plugins/intel_cpu/src/docs/graph_serialization.md
@ -1,17 +1,43 @@
 # Graph serialization
-The functionality allows to serialize execution graph using environment variable:
+Graph serialization is disabled by default and controlled by environment variables.
 ## Execution graph
 Execution graph could be serialized using environment variable **OV_CPU_EXEC_GRAPH_PATH**:
 ```sh
-    OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
+    OV_CPU_EXEC_GRAPH_PATH=<option> binary ...
 ```
 Possible serialization options:
 * cout\
 Serialize to console output.
 * \<path\>.xml\
 Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app.
 * **TBD**: \<path\>.dot\
 Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
 ## Graph transformations
 Additionally, IR could be serialized at specified stages using environment variable **OV_CPU_DUMP_IR**:
 ```sh
    OV_CPU_DUMP_IR=<space_separated_options> binary ...
 ```
-Possible serialization options:
+Examples:
-* cout
+```sh
    OV_CPU_DUMP_IR="transformations" binary ...
    OV_CPU_DUMP_IR="transformations=snippets dir=path/dumpDir" binary ...
    OV_CPU_DUMP_IR="transformations=all,-common DIR=path/dumpdir formats=svg,xml" binary ...
 ```
-    Serialize to console output
+Option names are case insensitive, the following options are supported:
-* \<path\>.xml
+* dir=\<path\>\
 Path to dumped IR files. If omitted, it defaults to *intel_cpu_dump*
 * formats=<comma_separated_tokens>\
 Filter with IR formats to dump. If omitted, it defaults to *xml*\
 See [IR format filter](debug_caps_filters.md#ir-format-filter) for more details.
 * transformations=<comma_separated_tokens>\
 Filter with main transformation stages to serialize graph before and after specified ones.\
 See [transformation filter](debug_caps_filters.md#transformation-filter) for more details.
-    Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
+Options are processed from left to right, so last one overwrites previous ones if duplicated.
 * \<path\>.dot
    TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
--- a/src/plugins/intel_cpu/src/graph.cpp
+++ b/src/plugins/intel_cpu/src/graph.cpp
@ -1073,7 +1073,7 @@ void Graph::InferStatic(InferRequestBase* request) {
    dnnl::stream stream(eng);
    for (const auto& node : executableGraphNodes) {
-        VERBOSE(node, config.verbose);
+        VERBOSE(node, config.debugCaps.verbose);
        PERF(node, config.collectPerfCounters);
        if (request)
@ -1160,7 +1160,7 @@ void Graph::InferDynamic(InferRequestBase* request) {
        updateNodes(stopIndx);
        for (; inferCounter < stopIndx; ++inferCounter) {
            auto& node = executableGraphNodes[inferCounter];
-            VERBOSE(node, config.verbose);
+            VERBOSE(node, config.debugCaps.verbose);
            PERF(node, config.collectPerfCounters);
            if (request)
@ -1171,7 +1171,7 @@ void Graph::InferDynamic(InferRequestBase* request) {
 }
 inline void Graph::ExecuteNode(const NodePtr& node, const dnnl::stream& stream) const {
-    DUMP(node, config, infer_count);
+    DUMP(node, config.debugCaps, infer_count);
    OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, node->profiling.execute);
    if (node->isDynamicNode()) {
--- a/src/plugins/intel_cpu/src/graph_dumper.cpp
+++ b/src/plugins/intel_cpu/src/graph_dumper.cpp
@ -210,7 +210,7 @@ std::shared_ptr<ngraph::Function> dump_graph_as_ie_ngraph_net(const Graph &graph
 #ifdef CPU_DEBUG_CAPS
 void serialize(const Graph &graph) {
-    const std::string& path = graph.getConfig().execGraphPath;
+    const std::string& path = graph.getConfig().debugCaps.execGraphPath;
    if (path.empty())
        return;
@ -257,7 +257,7 @@ void serializeToCout(const Graph &graph) {
 }
 void summary_perf(const Graph &graph) {
-    const std::string& summaryPerf = graph.getConfig().summaryPerf;
+    const std::string& summaryPerf = graph.getConfig().debugCaps.summaryPerf;
    if (summaryPerf.empty())
        return;
--- a/src/plugins/intel_cpu/src/ngraph_transformations/convert_to_cpu_specific_opset.hpp
+++ b/src/plugins/intel_cpu/src/ngraph_transformations/convert_to_cpu_specific_opset.hpp
@ -27,6 +27,7 @@ namespace intel_cpu {
 inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphFunc) {
    RUN_ON_FUNCTION_SCOPE(ConvertToCPUSpecificOpset);
    ngraph::pass::Manager manager;
    manager.register_pass<ConvertMatMulToFC>();
    manager.register_pass<AlignMatMulInputRanks>();
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@ -2,140 +2,28 @@
 // SPDX-License-Identifier: Apache-2.0
 //
-#include "ie_metric_helpers.hpp"
+#include "ie_metric_helpers.hpp" // must be included first
-#include "plugin.h"
+
-#include "extension_mngr.h"
+#include "plugin.h"
-#include "weights_cache.hpp"
+
-#include "extension.h"
+#include "transformation_pipeline.h"
-#include "itt.h"
+#include "itt.h"
-#include "serialize.h"
+#include "extension_mngr.h"
 #include "extension.h"
 #include "serialize.h"
 #include "threading/ie_executor_manager.hpp"
 #include "ie_icore.hpp"
 #include "ie_plugin_config.hpp"
 #include "ie_system_conf.h"
 #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
 #include <threading/ie_executor_manager.hpp>
 #include <memory>
 #include <ie_plugin_config.hpp>
 #include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 #include <ie_icore.hpp>
 #include <fstream>
 #include <vector>
 #include <tuple>
 #include <unordered_set>
 #include <ie_system_conf.h>
 #include <ie_ngraph_utils.hpp>
 #include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
 #include <transformations/common_optimizations/common_optimizations.hpp>
 #include <transformations/common_optimizations/fq_mul_fusion.hpp>
 #include <transformations/common_optimizations/mul_fake_quantize_fusion.hpp>
 #include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
 #include <transformations/common_optimizations/convert_quantize_dequantize.hpp>
 #include <transformations/common_optimizations/nop_elimination.hpp>
 #include <transformations/common_optimizations/wrap_interpolate_into_transposes.hpp>
 #include <transformations/common_optimizations/transpose_sinking.hpp>
 #include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
 #include <transformations/common_optimizations/lin_op_sequence_fusion.hpp>
 #include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
 #include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
 #include <transformations/op_conversions/convert_broadcast_to_tiles.hpp>
 #include <transformations/op_conversions/convert_depth_to_space.hpp>
 #include <transformations/op_conversions/convert_shuffle_channels3.hpp>
 #include <transformations/op_conversions/convert_slice_to_strided_slice.hpp>
 #include <transformations/op_conversions/convert_space_to_depth.hpp>
 #include <transformations/op_conversions/convert_gelu.hpp>
 #include <transformations/op_conversions/convert_gather_downgrade.hpp>
 #include <transformations/op_conversions/convert_gather_upgrade.hpp>
 #include <transformations/op_conversions/detection_output_downgrade.hpp>
 #include <transformations/op_conversions/detection_output_upgrade.hpp>
 #include <transformations/op_conversions/gelu7_downgrade.hpp>
 #include <transformations/op_conversions/hswish_decomposition.hpp>
 #include <transformations/op_conversions/hsigmoid_decomposition.hpp>
 #include <transformations/op_conversions/mvn6_decomposition.hpp>
 #include <transformations/op_conversions/normalize_l2_decomposition.hpp>
 #include <transformations/op_conversions/reduce_l1_decomposition.hpp>
 #include <transformations/op_conversions/reduce_l2_decomposition.hpp>
 #include <transformations/op_conversions/softplus_decomposition.hpp>
 #include <transformations/op_conversions/convert_space_to_batch.hpp>
 #include <transformations/op_conversions/convert_batch_to_space.hpp>
 #include <transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp>
 #include <transformations/op_conversions/convert_subtract.hpp>
 #include <transformations/op_conversions/softmax_decomposition.hpp>
 #include <transformations/control_flow/unroll_tensor_iterator.hpp>
 #include <transformations/op_conversions/convert_mod.hpp>
 #include <transformations/op_conversions/convert_ti_to_sequences.hpp>
 #include <transformations/op_conversions/lstm_cell_decomposition.hpp>
 #include <transformations/op_conversions/rnn_cell_decomposition.hpp>
 #include <transformations/op_conversions/gru_cell_decomposition.hpp>
 #include <transformations/op_conversions/log_softmax_decomposition.hpp>
 #include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
 #include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
 #include <transformations/op_conversions/convert_previous_nms_to_nms_9.hpp>
 #include <transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp>
 #include <transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp>
 #include <transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp>
 #include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
 #include <transformations/smart_reshape/matmul_sr.hpp>
 #include <transformations/op_conversions/convert_minimum_to_power_and_max.hpp>
 #include <transformations/op_conversions/convert_reduce_to_pooling.hpp>
 #include <transformations/convert_precision.hpp>
 #include <transformations/init_node_info.hpp>
 #include <transformations/disable_decompression_convert_constant_folding.hpp>
 #include <transformations/rt_info/fused_names_attribute.hpp>
 #include <transformations/op_conversions/fq_decomposition.hpp>
 #include <transformations/utils/utils.hpp>
 #include <transformations/op_conversions/convert_roi_align_v9_to_v3.hpp>
 #include <transformations/op_conversions/convert_roi_align_v3_to_v9.hpp>
 #include <transformations/op_conversions/softsign_decomposition.hpp>
 #include "transformations/op_conversions/eye_decomposition.hpp"
 #include "transformations/op_conversions/unique_decomposition.hpp"
 #include "ngraph_transformations/convert_to_cpu_specific_opset.hpp"
 #include "ngraph_transformations/snippets_mark_skipped.hpp"
 #include "ngraph_transformations/mha_fusion.hpp"
 #include "ngraph_transformations/convert_to_interaction.hpp"
 #include "ngraph_transformations/convert_fq_rnn_to_quantized_rnn.hpp"
 #include "ngraph_transformations/move_eltwise_up_data_movement.hpp"
 #include "ngraph_transformations/swap_convert_transpose.hpp"
 #include <snippets/pass/collapse_subgraph.hpp>
 #include <snippets/pass/common_optimizations.hpp>
 #include <snippets/pass/convert_constants.hpp>
 #include <ngraph/opsets/opset1.hpp>
 #include <ngraph/opsets/opset2.hpp>
 #include <ngraph/opsets/opset3.hpp>
 #include <ngraph/opsets/opset4.hpp>
 #include <ngraph/opsets/opset5.hpp>
 #include <ngraph/opsets/opset6.hpp>
 #include <openvino/opsets/opset10.hpp>
 #include <ngraph/op/util/op_types.hpp>
 #include <ngraph/pass/manager.hpp>
 #include <ngraph/graph_util.hpp>
 #include <ov_ops/augru_cell.hpp>
 #include <ov_ops/augru_sequence.hpp>
 #include <transformations/low_precision/mark_dequantization_subgraph.hpp>
 #include <low_precision/common/quantization_granularity_restriction.hpp>
 #include <low_precision/common/precisions_restriction.hpp>
 #include <low_precision/convert_subtract_constant.hpp>
 #include <low_precision/convolution.hpp>
 #include <low_precision/convolution_backprop_data.hpp>
 #include <low_precision/layer_transformation.hpp>
 #include <low_precision/low_precision.hpp>
 #include <low_precision/multiply_to_group_convolution.hpp>
 #include <low_precision/network_helper.hpp>
 #include "openvino/runtime/core.hpp"
 #include "openvino/util/common_util.hpp"
 #include <ie_algorithm.hpp>
 #include "performance_heuristics.hpp"
-#include "nodes/mvn.h"
+#include "weights_cache.hpp"
 #include "nodes/fake_quantize.h"
 #include "nodes/normalize.h"
 #include "nodes/mha.h"
 #include "utils/denormals.hpp"
 #include "transformations/common_optimizations/augru_cell_fusion.hpp"
 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
 #ifndef __GNUC_PREREQ
@ -262,452 +150,6 @@ Engine::~Engine() {
    executorManager()->clear("CPUCallbackExecutor");
 }
 static bool fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, ov::element::Type to, size_t idx) {
    if (auto convert = ov::as_type_ptr<ov::opset10::Convert>(node)) {
        // For Convert node, converting precision from floating point to boolean will lead to mathematical
        // error, because here the output precision boolean is replaced by u8. E.g. floating point value 0.01
        // is converted to be 1 for boolean, but 0 for u8. Thus an Abs and Ceil node should be added before the
        // Convert node for this scenario.
        if (convert->input(0).get_element_type().is_real() &&
            convert->get_convert_element_type() == ngraph::element::boolean && to.is_integral_number()) {
            auto abs = std::make_shared<ov::opset10::Abs>(convert->input_value(0).get_node_shared_ptr());
            auto ceil = std::make_shared<ov::opset10::Ceiling>(abs);
            auto new_convert = std::make_shared<ov::opset10::Convert>(ceil, to);
            new_convert->set_friendly_name(convert->get_friendly_name());
            ov::copy_runtime_info(convert, {abs, ceil, new_convert});
            ov::replace_node(convert, new_convert);
            return true;
        } else {
            convert->set_convert_element_type(to);
            return true;
        }
    }
    return false;
 }
 static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function> nGraphFunc, const bool _enableLPT, const bool _enableBF16,
                                               const bool _enableSnippets, const bool isLegacyApi) {
    ov::pass::Manager manager;
    manager.set_per_pass_validation(false);
    manager.register_pass<ov::pass::InitNodeInfo>();
    const bool useLpt =
            _enableLPT &&
        ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(nGraphFunc);
    auto defaultPrecisions = useLpt ? ngraph::pass::low_precision::precision_set::int8_support : std::vector<ov::element::Type>{};
    bool hasINT16orINT32Levels = false;
    if (useLpt) {
        CPU_LPT_SCOPE(LowPrecisionTransformations_Part1);
        hasINT16orINT32Levels = ngraph::pass::low_precision::LowPrecision::isFQLevelsPresent(
                nGraphFunc,
                {ngraph::pass::low_precision::levels::int16, ngraph::pass::low_precision::levels::int16_narrow_range,
                 ngraph::pass::low_precision::levels::int32, ngraph::pass::low_precision::levels::int32_narrow_range});
        if (hasINT16orINT32Levels) {
            defaultPrecisions = ngraph::pass::low_precision::precision_set::int8_int16_int32_support;
        }
        manager.register_pass<ov::pass::MarkDequantizationSubgraph>(defaultPrecisions);
    }
    auto get_convert_precisions = []() {
        precisions_array array = {
            {ngraph::element::i64,     ngraph::element::i32},
            {ngraph::element::u64,     ngraph::element::i32},
            {ngraph::element::i16,     ngraph::element::i32},
            {ngraph::element::u16,     ngraph::element::i32},
            {ngraph::element::u32,     ngraph::element::i32},
            {ngraph::element::f64,     ngraph::element::f32},
            {ngraph::element::f16,     ngraph::element::f32},
            {ngraph::element::boolean, ngraph::element::u8},
            {ngraph::element::i4,      ngraph::element::i8},
            {ngraph::element::u4,      ngraph::element::u8}
        };
        if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
            array.push_back({ngraph::element::bf16, ngraph::element::f32});
        return array;
    };
    static const auto precisions = get_convert_precisions();
    type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};
    manager.register_pass<ov::pass::AUGRUCellFusion>();
    manager.register_pass<ov::pass::CommonOptimizations>();
    manager.register_pass<ov::pass::WrapInterpolateIntoTransposes>();
    manager.register_pass<ov::pass::TransposeSinking>();
    manager.register_pass<ov::pass::ConvertSequenceToTensorIterator>();
    manager.register_pass<ov::pass::ConvertOpSet3ToOpSet2>();
    manager.register_pass<ov::pass::ConvertOpSet2ToOpSet1>();
    manager.register_pass<ov::pass::LSTMCellDecomposition>();
    manager.register_pass<ov::pass::GRUCellDecomposition>();
    manager.register_pass<ov::pass::RNNCellDecomposition>();
    manager.register_pass<ov::pass::ConvertNMS1ToNMS9>();
    manager.register_pass<ov::pass::ConvertNMS3ToNMS9>();
    manager.register_pass<ov::pass::ConvertNMS4ToNMS9>();
    manager.register_pass<ov::pass::ConvertNMS5ToNMS9>();
    manager.register_pass<ov::pass::ConvertNMS9ToNMSIEInternal>();
    manager.register_pass<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
    manager.register_pass<ov::pass::ConvertMatrixNmsToMatrixNmsIE>();
    manager.register_pass<ov::pass::TransposeMatMul>();
    manager.register_pass<ov::pass::ConstantFolding>();
    if (useLpt) {
        CPU_LPT_SCOPE(LowPrecisionTransformations_Part2);
        manager.register_pass<ngraph::pass::low_precision::ConvertSubtractConstant>(defaultPrecisions);
    }
    manager.register_pass<ov::pass::Validate>();
    manager.register_pass<ov::pass::ConvertPrecision>(precisions, type_to_fuse);
    manager.register_pass<ov::pass::EliminateConvert>();
    manager.register_pass<SwapConvertTranspose>();
    manager.register_pass<ConvertToInteraction>();
    manager.register_pass<ConvertInteractionInt8>();
    auto pass_config = manager.get_pass_config();
    using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
    // SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
    pass_config->set_callback<ov::pass::ConvertSpaceToDepth,
            ov::pass::ConvertDepthToSpace>(
            [](const_node_ptr &node) -> bool {
                return node->input_value(0).get_shape().size() <= 5lu &&
                       node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
            });
    pass_config->set_callback<ov::pass::ConvertBatchToSpace,
                              ov::pass::ConvertSpaceToBatch>(
            [](const_node_ptr &node) -> bool {
                const auto & rank = node->input(0).get_partial_shape().rank().get_length();
                return rank == 4lu || rank == 5lu;
            });
    auto isCellPrimitiveSupported = [](const_node_ptr &node) -> bool {
        if (const auto &rnn_cell = std::dynamic_pointer_cast<const ngraph::opset4::RNNCell>(node)) {
            return rnn_cell->get_clip() == 0.0f;
        } else if (const auto &gru_cell = std::dynamic_pointer_cast<const ngraph::opset4::GRUCell>(
                node)) {
            return gru_cell->get_clip() == 0.0f
                   && gru_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh"};
        } else if (const auto &augru_cell = std::dynamic_pointer_cast<const ov::op::internal::AUGRUCell>(
                node)) {
            return augru_cell->get_clip() == 0.0f
                   && augru_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh"};
        } else if (const auto &lstm_cell = std::dynamic_pointer_cast<const ngraph::opset4::LSTMCell>(
                node)) {
            return lstm_cell->get_clip() == 0.0f &&
                   lstm_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
        } else if (const auto &lstm_cell_v1 = std::dynamic_pointer_cast<const ngraph::opset1::LSTMCell>(
                node)) {
            return lstm_cell_v1->get_clip() == 0.0f &&
                   lstm_cell_v1->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
        }
        return false;
    };
    // Sequences supported by the plugin shouldn't be converted to TensorIterator.
    // sequence_length input is not supported in all Sequences, so if is_seq_len_provided() == true, we
    // should always convert to TensorIterator.
    // RNN/GRU/LSTM Sequences are supported with clip == 0, and with default activations.
    auto isSequencePrimitiveSupported = [](const_node_ptr &node) -> bool {
        const auto& data = node->input(0);
        const auto& data_pshape = data.get_partial_shape();
        // WA: dynamic shapes make impossible to check seq_len due to shapeOf subgraphs
        // but the sequence is still supported in CPU and doesn't need to be decomposed
        if (data_pshape.is_dynamic())
            return true;
        if (data_pshape.rank().is_static() && data_pshape.rank().get_length() > 1 && !data_pshape[1].is_static())
            return false;
        auto max_seq_len = data.get_shape().at(1);
        if (const auto &rnn_seq = std::dynamic_pointer_cast<const ngraph::opset6::RNNSequence>(node)) {
            return rnn_seq->get_clip() == 0.0f &&
                   !ngraph::op::util::is_seq_len_provided(rnn_seq->get_input_node_shared_ptr(2),
                                                          max_seq_len);
        } else if (const auto &gru_seq = std::dynamic_pointer_cast<const ngraph::opset6::GRUSequence>(
                node)) {
            return gru_seq->get_clip() == 0.0f &&
                   gru_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh"} &&
                   !ngraph::op::util::is_seq_len_provided(gru_seq->get_input_node_shared_ptr(2),
                                                          max_seq_len);
        } else if (const auto &augru_seq = std::dynamic_pointer_cast<const ov::op::internal::AUGRUSequence>(
                node)) {
            return augru_seq->get_clip() == 0.0f &&
                   augru_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh"} &&
                   !ngraph::op::util::is_seq_len_provided(augru_seq->get_input_node_shared_ptr(2),
                                                          max_seq_len);
        } else if (const auto &lstm_seq = std::dynamic_pointer_cast<const ngraph::opset6::LSTMSequence>(
                node)) {
            return lstm_seq->get_clip() == 0.0f &&
                   lstm_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"} &&
                   !ngraph::op::util::is_seq_len_provided(lstm_seq->get_input_node_shared_ptr(3),
                                                          max_seq_len);
        }
        return false;
    };
    pass_config->set_callback<ov::pass::ConvertRNNSequenceToTensorIterator,
                              ov::pass::ConvertGRUSequenceToTensorIterator,
                              ov::pass::ConvertLSTMSequenceToTensorIterator>(
            [isSequencePrimitiveSupported](const_node_ptr &node) -> bool {
                return isSequencePrimitiveSupported(node);
            });
    pass_config->set_callback<ov::pass::RNNCellDecomposition, ov::pass::GRUCellDecomposition,
            ov::pass::LSTMCellDecomposition>(
            [isCellPrimitiveSupported](const_node_ptr &node) -> bool {
                return isCellPrimitiveSupported(node);
            });
    pass_config->set_callback<ov::pass::MVN6Decomposition>(
            [](const_node_ptr &node) -> bool {
                std::string errorMessage;
                return node::MVN::isSupportedOperation(node, errorMessage);
            });
    pass_config->set_callback<ov::pass::NormalizeL2Decomposition>(
            [](const_node_ptr &node) -> bool {
                std::string errorMsg;
                return node::NormalizeL2::isSupportedOperation(node, errorMsg);
            });
    pass_config->enable<ov::pass::SoftmaxDecomposition>();
    pass_config->set_callback<ov::pass::SoftmaxDecomposition>(
            [](const_node_ptr &node) -> bool {
                return node->input_value(0).get_partial_shape().rank().get_length() <= 5;
            });
    if (!isLegacyApi) {
        auto nmsCallback = [](const_node_ptr &node) -> bool {
                               for (size_t i = 0; i < node->get_output_size(); i++) {
                                   const auto outputs = node->get_output_target_inputs(i);
                                   for (const auto &out : outputs) {
                                       if (!ngraph::op::is_output(out.get_node())) {
                                           return false;
                                       }
                                   }
                               }
                               return true;
                           };
        pass_config->set_callback<ov::pass::ConvertNMS9ToNMSIEInternal>(nmsCallback);
        pass_config->set_callback<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>(nmsCallback);
        pass_config->set_callback<ov::pass::ConvertMatrixNmsToMatrixNmsIE>(nmsCallback);
    }
    // List of enabled/disabled transformations
    // Allow FP16 Converts to be folded and FP16 constants to be upgraded to FP32 data type
    pass_config->disable<ov::pass::DisableDecompressionConvertConstantFolding>();
    pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
    pass_config->disable<ov::pass::EyeDecomposition>();
    pass_config->disable<ov::pass::ConvertGELU>();
    pass_config->disable<ov::pass::ConvertShuffleChannels3>();
    pass_config->disable<ov::pass::Gelu7Downgrade>();
    pass_config->disable<ov::pass::HSwishDecomposition>();
    pass_config->disable<ov::pass::ReduceL1Decomposition>();
    pass_config->disable<ov::pass::ReduceL2Decomposition>();
    pass_config->disable<ov::pass::SoftPlusDecomposition>();
    pass_config->disable<ov::pass::HSigmoidDecomposition>();
    pass_config->disable<ov::pass::ConvertMod>();
    pass_config->disable<ov::pass::ConvertShuffleChannels3>();
    pass_config->disable<ov::pass::WeightsDequantizeToFakeQuantize>();
    pass_config->disable<ov::pass::SimplifyCTCGreedyDecoderSeqLen>();
    pass_config->disable<ov::pass::ConvertGather7ToGather1>();
    pass_config->disable<ov::pass::ConvertGather8ToGather7>();
    pass_config->disable<ov::pass::ConvertMinimum>();
    pass_config->disable<ov::pass::ConvertBroadcastToTiles>();
    pass_config->disable<ov::pass::ConvertReduceMeanToPooling>();
    pass_config->disable<ov::pass::ConvertReduceMaxToPooling>();
    pass_config->disable<ov::pass::ConvertReduceSumToPooling>();
    pass_config->disable<ov::pass::SliceToStridedSlice>();
    pass_config->disable<ov::pass::ConvertDetectionOutput8ToDetectionOutput1>();
    pass_config->disable<ov::pass::ConvertROIAlign9To3>();
    pass_config->disable<ov::pass::SoftSignDecomposition>();
    pass_config->disable<ov::pass::UniqueDecomposition>();
    pass_config->enable<ov::pass::NormalizeL2Decomposition>();
    pass_config->enable<ov::pass::ConvertInterpolate1ToInterpolate4>();
    pass_config->enable<ov::pass::ConvertGather1ToGather7>();
    pass_config->enable<ov::pass::ConvertDetectionOutput1ToDetectionOutput8>();
    pass_config->enable<ov::pass::ConvertROIAlign3To9>();
    if (useLpt) {
        CPU_LPT_SCOPE(LowPrecisionTransformations_Part3);
        pass_config->set_callback<ov::pass::AddFakeQuantizeFusion,
                                  ov::pass::MulFakeQuantizeFusion,
                                  ov::pass::FakeQuantizeMulFusion>([](const_node_ptr &node) -> bool {
            std::string errMsg;
            return !node::FakeQuantize::isSupportedOperation(node, errMsg);
        });
        pass_config->set_callback<ov::pass::ConvertQuantizeDequantize>([&defaultPrecisions](const_node_ptr &node) -> bool {
            return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(node, defaultPrecisions);
        });
    }
    manager.run_passes(nGraphFunc);
    using namespace ngraph::pass::low_precision;
    if (useLpt) {
        CPU_LPT_SCOPE(LowPrecisionTransformations_Part4);
        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "LowPrecisionTransformations");
        //Only enable conv/group conv signed input on AMX platform.
        std::vector<ngraph::element::Type> input0LowPrecisionList;
        if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
            input0LowPrecisionList = {ngraph::element::u8, ngraph::element::i8};
        } else {
            input0LowPrecisionList = {ngraph::element::u8};
        }
        auto supportedPrecisions = std::vector<PrecisionsRestriction>({
            PrecisionsRestriction::create<ngraph::opset1::Convolution>({
                {{0}, input0LowPrecisionList},
                {{1}, {ngraph::element::i8}},
            }),
            PrecisionsRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
                {{0}, {ngraph::element::u8, ngraph::element::i8}},
                {{1}, {ngraph::element::i8}}
            }),
            PrecisionsRestriction::create<ngraph::opset1::GroupConvolution>({
                {{0}, input0LowPrecisionList},
                {{1}, {ngraph::element::i8}}
            }),
            PrecisionsRestriction::create<ngraph::opset1::Multiply>({
                {{0}, {ngraph::element::u8}},
                {{1}, {ngraph::element::i8}},
            }),
            PrecisionsRestriction::create<ngraph::opset1::MatMul>({
                {{0}, {ngraph::element::u8, ngraph::element::i8}},
                {{1}, {ngraph::element::i8}}
            }),
            PrecisionsRestriction::create<ngraph::opset5::LSTMSequence>({
                {{0, 1}, {ngraph::element::u8, ngraph::element::i8}},
            }),
            PrecisionsRestriction::create<ngraph::opset6::GRUSequence>({
                {{0, 1}, {ngraph::element::u8, ngraph::element::i8}},
            }),
        });
        auto quantizationRestrictions = std::vector<QuantizationGranularityRestriction>({
            QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0}),
            QuantizationGranularityRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0})
        });
        // for GNA networks reference execution
        bool updatePrecision = true;
        if (hasINT16orINT32Levels) {
            updatePrecision = false;
            supportedPrecisions = std::vector<PrecisionsRestriction>({});
        }
        ov::pass::Manager lptManager;
        lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(
            supportedPrecisions,
            quantizationRestrictions,
            LayerTransformation::Params(updatePrecision, ngraph::element::f32, defaultPrecisions));
        lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::MarkupPrecisions>([](const_node_ptr& node) -> bool {
            if (const auto mulitply = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
                return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply);
            }
            return false;
        });
        lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::ConvolutionBackpropDataTransformation>(
            [&defaultPrecisions](const_node_ptr& node) -> bool {
            return LayerTransformation::isAsymmetricQuantization(node, defaultPrecisions) ||
                WeightableLayerTransformation::isAsymmetricOnWeights(node, defaultPrecisions);
        });
        lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::MultiplyToGroupConvolutionTransformation>([](const_node_ptr& node) -> bool {
            return true;//MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(node);
        });
        lptManager.run_passes(nGraphFunc);
    }
    ov::pass::Manager postLPTPassManager;
    postLPTPassManager.register_pass<ov::pass::UnrollTensorIterator>();
    postLPTPassManager.register_pass<ov::pass::ReshapePRelu>();
    postLPTPassManager.get_pass_config()->set_callback<ov::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
        // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
        return node->get_rt_info().count("UNROLL_TI") == 0;
    });
    postLPTPassManager.register_pass<MoveEltwiseUpThroughDataMov>();
    postLPTPassManager.get_pass_config()->set_callback<MoveEltwiseUpThroughDataMov>([](const std::shared_ptr<const ngraph::Node>& node) -> bool {
        if (node->get_input_size() >= 2) {
            return node->get_input_element_type(1) == ngraph::element::i8 || node->get_input_element_type(1) == ngraph::element::u8;
        }
        return false;
    });
    postLPTPassManager.register_pass<ov::pass::ConstantFolding>();
    // Snippets may brake MHA patterns so the fusion has to performed before
    postLPTPassManager.register_pass<MHAFusion>();
    postLPTPassManager.register_pass<FuseFQtoInteraction>();
    postLPTPassManager.get_pass_config()->set_callback<MHAFloatFusion, MHAFloatFusion2,
                                                       MHAQuantFusion, MHAQuantFusion2>([_enableBF16](const std::shared_ptr<const ov::Node>& n) -> bool {
        std::string errorMessage;
        if (!node::MHA::isSupportedOperation(n, errorMessage))
            return true;
        // Implementation calls AMX BF16 brgemm only for tensors with K and N aligned on 2, otherwise fallbacks on vector impl
        // Vector madd BF16 instruction on SPR has reduced performance on HW level, which results in overall perf degradation
        size_t bf16Factor = 2;
        if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16_amx_bf16) &&
                (n->get_input_element_type(0) == element::bf16 || (n->get_input_element_type(0) == element::f32 && _enableBF16)) &&
                (n->get_input_shape(0)[3] % bf16Factor != 0 || n->get_input_shape(1)[1] % bf16Factor != 0 || n->get_input_shape(3)[3] % bf16Factor != 0)) {
            return true;
        }
        return false;
    });
    // Execute before snippets. Otherwise FQ will be converted to Subgraph
    postLPTPassManager.register_pass<ConvertFqRnnToQuantizedRnn>();
    postLPTPassManager.run_passes(nGraphFunc);
    if (_enableSnippets && dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) {
        ov::pass::Manager snippetsManager;
        snippetsManager.register_pass<SnippetsMarkSkipped>();
        snippetsManager.register_pass<ngraph::snippets::pass::EnumerateNodes>();
        snippetsManager.register_pass<ngraph::snippets::pass::TokenizeSnippets>();
        snippetsManager.get_pass_config()->set_callback<ngraph::snippets::pass::TokenizeSnippets>(
                [](const std::shared_ptr<const ov::Node>& n) -> bool {
                    // CPU Plugin support Swish in Subgraph via conversion to SwichCPU which assumes second input to be constant
                    if (ov::is_type<const ov::op::v4::Swish>(n)) {
                        if (n->inputs().size() > 1 && !ov::is_type<const ov::op::v0::Constant>(n->get_input_node_shared_ptr(1)))
                            return true;
                    }
                    const auto& inputs = n->inputs();
                    // todo: clarify whether we can evaluate snippets on const paths
                    const bool has_only_const_inputs = std::all_of(inputs.begin(), inputs.end(),
                                [](const ov::Input<const ov::Node> &in) {
                                        return ov::is_type<ov::op::v0::Constant>(in.get_source_output().get_node_shared_ptr());
                                      });
                    // todo: clarify whether we can evaluate snippets on inputs with larger ranks
                    auto rank_is_too_large = [](const ov::descriptor::Tensor& t ) {
                        // callback is called has_supported_in_out(), so it's safe to assume that the shapes are static
                        return t.get_partial_shape().rank().get_length() > 6;
                    };
                    const bool bad_input_rank = std::any_of(inputs.begin(), inputs.end(),
                                                            [&](const ov::Input<const ov::Node>& in) {return  rank_is_too_large(in.get_tensor());});
                    const auto& outputs = n->outputs();
                    const bool bad_output_rank = std::any_of(outputs.begin(), outputs.end(),
                                                             [&](const ov::Output<const ov::Node>& out) {return  rank_is_too_large(out.get_tensor());});
                    return has_only_const_inputs || bad_input_rank || bad_output_rank;
                });
        snippetsManager.register_pass<ngraph::snippets::pass::CommonOptimizations>();
        snippetsManager.run_passes(nGraphFunc);
    }
    ov::pass::Manager postSnippetsManager;
    postSnippetsManager.register_pass<ov::pass::FakeQuantizeDecomposition>();
    postSnippetsManager.get_pass_config()->set_callback<ov::pass::FakeQuantizeDecomposition>([](const_node_ptr& node) -> bool {
            std::string errMsg;
            return node::FakeQuantize::isSupportedOperation(node, errMsg);
        });
    postSnippetsManager.register_pass<ov::pass::ConstantFolding>();
    postSnippetsManager.run_passes(nGraphFunc);
 }
 static bool streamsSet(const std::map<std::string, std::string>& config) {
    return config.count(PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) ||
           config.count(ov::num_streams.name());
@ -883,7 +325,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
    const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
            || Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled for the plugin */;
    const auto& BF16Prop = config.find(InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16);
-    bool enableBF16;
+    bool enableBF16 = false;
    if (BF16Prop != config.end()) {
        if (BF16Prop->second == PluginConfigParams::YES) {
            enableBF16 = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core);
@ -901,7 +343,8 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
    DEBUG_LOG(PrintableModel(*nGraphFunc, "org_"));
-    TransformationUpToCPUSpecificOpSet(nGraphFunc, enableLPT, enableBF16, enableSnippets, isLegacyAPI());
+    Transformations transformations(nGraphFunc, enableLPT, enableSnippets, enableBF16, isLegacyAPI(), engConfig);
    transformations.UpToCpuSpecificOpSet();
    // need to check that all outputs have static shapes
    // checking that all inputs have static shapes is performed in the common part
@ -914,8 +357,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
    }
    ApplyPerformanceHints(config, nGraphFunc);
-
+    transformations.CpuSpecificOpSet();
    ConvertToCPUSpecificOpset(nGraphFunc);
    DEBUG_LOG(PrintableModel(*nGraphFunc, "cpu_"));
@ -1153,19 +595,20 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
    }
    auto supported = GetSupportedNodes(model,
-    [&](std::shared_ptr<ov::Model>& model) {
+                                       [&](std::shared_ptr<ov::Model>& model) {
-            TransformationUpToCPUSpecificOpSet(model, enableLPT, conf.enforceBF16, enableSnippets, isLegacyAPI());
+                                           Transformations transformation(model, enableLPT, enableSnippets, conf.enforceBF16, isLegacyAPI(), engConfig);
-            ConvertToCPUSpecificOpset(model);
+                                           transformation.UpToCpuSpecificOpSet();
-        },
+                                           transformation.CpuSpecificOpSet();
-    [&](const std::shared_ptr<ngraph::Node>& op) {
+                                       },
-        std::unique_ptr<Node> ptr;
+                                       [&](const std::shared_ptr<ngraph::Node>& op) {
-        try {
+                                           std::unique_ptr<Node> ptr;
-            ptr.reset(Node::factory().create(op, {dnnl::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
+                                           try {
-        } catch (const InferenceEngine::Exception&) {
+                                               ptr.reset(Node::factory().create(op, {dnnl::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
-            return false;
+                                           } catch (const InferenceEngine::Exception&) {
-        }
+                                               return false;
-        return true;
+                                           }
-    });
+                                           return true;
                                       });
    for (auto&& layerName : supported) {
        res.supportedLayersMap.emplace(layerName, GetName());
--- a/src/plugins/intel_cpu/src/plugin.h
+++ b/src/plugins/intel_cpu/src/plugin.h
@ -4,16 +4,12 @@
 #pragma once
 #include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
 #include "exec_network.h"
 #include <string>
 #include <map>
 #include <unordered_map>
 #include <memory>
 #include <functional>
 #include <vector>
 #include <cfloat>
 namespace ov {
 namespace intel_cpu {
--- a/src/plugins/intel_cpu/src/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformation_pipeline.cpp
@ -0,0 +1,609 @@
 // Copyright (C) 2022 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "transformation_pipeline.h"
 // Operations
 #include "openvino/opsets/opset1.hpp"
 #include "openvino/opsets/opset2.hpp"
 #include "openvino/opsets/opset3.hpp"
 #include "openvino/opsets/opset4.hpp"
 #include "openvino/opsets/opset5.hpp"
 #include "openvino/opsets/opset6.hpp"
 #include "openvino/opsets/opset10.hpp"
 #include <ov_ops/augru_cell.hpp>
 #include <ov_ops/augru_sequence.hpp>
 // Common transformations
 #include "transformations/common_optimizations/add_fake_quantize_fusion.hpp"
 #include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
 #include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
 #include "transformations/common_optimizations/fq_mul_fusion.hpp"
 #include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp"
 #include "transformations/common_optimizations/nop_elimination.hpp"
 #include "transformations/common_optimizations/transpose_sinking.hpp"
 #include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp"
 #include "transformations/common_optimizations/augru_cell_fusion.hpp"
 #include "transformations/common_optimizations/common_optimizations.hpp"
 #include "transformations/common_optimizations/wrap_interpolate_into_transposes.hpp"
 #include "transformations/control_flow/unroll_tensor_iterator.hpp"
 #include "transformations/disable_decompression_convert_constant_folding.hpp"
 #include "transformations/op_conversions/convert_batch_to_space.hpp"
 #include "transformations/op_conversions/convert_broadcast_to_tiles.hpp"
 #include "transformations/op_conversions/convert_depth_to_space.hpp"
 #include "transformations/op_conversions/convert_gather_downgrade.hpp"
 #include "transformations/op_conversions/convert_gather_upgrade.hpp"
 #include "transformations/op_conversions/convert_gelu.hpp"
 #include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp"
 #include "transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp"
 #include "transformations/op_conversions/convert_minimum_to_power_and_max.hpp"
 #include "transformations/op_conversions/convert_mod.hpp"
 #include "transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp"
 #include "transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp"
 #include "transformations/op_conversions/convert_previous_nms_to_nms_9.hpp"
 #include "transformations/op_conversions/convert_reduce_to_pooling.hpp"
 #include "transformations/op_conversions/convert_roi_align_v3_to_v9.hpp"
 #include "transformations/op_conversions/convert_roi_align_v9_to_v3.hpp"
 #include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
 #include "transformations/op_conversions/convert_shuffle_channels3.hpp"
 #include "transformations/op_conversions/convert_slice_to_strided_slice.hpp"
 #include "transformations/op_conversions/convert_space_to_batch.hpp"
 #include "transformations/op_conversions/convert_space_to_depth.hpp"
 #include "transformations/op_conversions/convert_subtract.hpp"
 #include "transformations/op_conversions/convert_ti_to_sequences.hpp"
 #include "transformations/op_conversions/detection_output_downgrade.hpp"
 #include "transformations/op_conversions/detection_output_upgrade.hpp"
 #include "transformations/op_conversions/eye_decomposition.hpp"
 #include "transformations/op_conversions/fq_decomposition.hpp"
 #include "transformations/op_conversions/gelu7_downgrade.hpp"
 #include "transformations/op_conversions/hsigmoid_decomposition.hpp"
 #include "transformations/op_conversions/hswish_decomposition.hpp"
 #include "transformations/op_conversions/gru_cell_decomposition.hpp"
 #include "transformations/op_conversions/lstm_cell_decomposition.hpp"
 #include "transformations/op_conversions/mvn6_decomposition.hpp"
 #include "transformations/op_conversions/normalize_l2_decomposition.hpp"
 #include "transformations/op_conversions/reduce_l1_decomposition.hpp"
 #include "transformations/op_conversions/reduce_l2_decomposition.hpp"
 #include "transformations/op_conversions/rnn_cell_decomposition.hpp"
 #include "transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp"
 #include "transformations/op_conversions/softplus_decomposition.hpp"
 #include "transformations/op_conversions/softsign_decomposition.hpp"
 #include "transformations/op_conversions/softmax_decomposition.hpp"
 #include "transformations/op_conversions/unique_decomposition.hpp"
 #include "transformations/opset_conversions/convert_opset2_to_opset1.hpp"
 #include "transformations/opset_conversions/convert_opset3_to_opset2.hpp"
 #include "transformations/smart_reshape/matmul_sr.hpp"
 #include "transformations/init_node_info.hpp"
 #include "utils/ngraph_transformation.hpp"
 // LPT transformations
 #include "transformations/low_precision/mark_dequantization_subgraph.hpp"
 #include "low_precision/convolution_backprop_data.hpp"
 #include "low_precision/convert_subtract_constant.hpp"
 #include "low_precision/network_helper.hpp"
 #include "low_precision/multiply_to_group_convolution.hpp"
 #include "low_precision/group_convolution.hpp"
 // CPU specific transformations
 #include "ngraph_transformations/convert_to_cpu_specific_opset.hpp"
 #include "ngraph_transformations/snippets_mark_skipped.hpp"
 #include "ngraph_transformations/mha_fusion.hpp"
 #include "ngraph_transformations/convert_to_interaction.hpp"
 #include "ngraph_transformations/convert_fq_rnn_to_quantized_rnn.hpp"
 #include "ngraph_transformations/move_eltwise_up_data_movement.hpp"
 #include "ngraph_transformations/swap_convert_transpose.hpp"
 // Snippets
 #include "snippets/pass/collapse_subgraph.hpp"
 #include "snippets/pass/common_optimizations.hpp"
 // Misc
 #include "nodes/mvn.h"
 #include "nodes/normalize.h"
 #include "nodes/fake_quantize.h"
 #include "nodes/mha.h"
 #include "dnnl.hpp"
 #include <cpu/x64/cpu_isa_traits.hpp>
 namespace ov {
 namespace intel_cpu {
 using const_node_ptr = const std::shared_ptr<const ov::Node>;
 bool Transformations::fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, ov::element::Type to, size_t idx) {
    if (auto convert = ov::as_type_ptr<ov::opset10::Convert>(node)) {
        // For Convert node, converting precision from floating point to boolean will lead to mathematical
        // error, because here the output precision boolean is replaced by u8. E.g. floating point value 0.01
        // is converted to be 1 for boolean, but 0 for u8. Thus an Abs and Ceil node should be added before the
        // Convert node for this scenario.
        if (convert->input(0).get_element_type().is_real() &&
            convert->get_convert_element_type() == ngraph::element::boolean && to.is_integral_number()) {
            auto abs = std::make_shared<ov::opset10::Abs>(convert->input_value(0).get_node_shared_ptr());
            auto ceil = std::make_shared<ov::opset10::Ceiling>(abs);
            auto new_convert = std::make_shared<ov::opset10::Convert>(ceil, to);
            new_convert->set_friendly_name(convert->get_friendly_name());
            ov::copy_runtime_info(convert, {abs, ceil, new_convert});
            ov::replace_node(convert, new_convert);
            return true;
        } else {
            convert->set_convert_element_type(to);
            return true;
        }
    }
    return false;
 }
 void Transformations::UpToCpuSpecificOpSet() {
    const bool useLpt = enableLpt &&
        ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(model) &&
        CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config.debugCaps, Lpt);
    const bool useSnippets = enableSnippets &&
        CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config.debugCaps, Snippets);
    auto defaultPrecisions = useLpt ? ngraph::pass::low_precision::precision_set::int8_support : std::vector<ov::element::Type>{};
    bool hasINT16orINT32Levels = false;
    if (useLpt) {
        CPU_LPT_SCOPE(LowPrecisionTransformations_Part1);
        hasINT16orINT32Levels = ngraph::pass::low_precision::LowPrecision::isFQLevelsPresent(
            model,
            {ngraph::pass::low_precision::levels::int16, ngraph::pass::low_precision::levels::int16_narrow_range,
             ngraph::pass::low_precision::levels::int32, ngraph::pass::low_precision::levels::int32_narrow_range});
        if (hasINT16orINT32Levels) {
            defaultPrecisions = ngraph::pass::low_precision::precision_set::int8_int16_int32_support;
        }
    }
    PreLpt(defaultPrecisions, isLegacyApi);
    if (useLpt)
        Lpt(hasINT16orINT32Levels, defaultPrecisions);
    PostLpt();
    if (useSnippets)
        Snippets();
 }
 void Transformations::CpuSpecificOpSet(void) {
    CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Specific);
    ConvertToCPUSpecificOpset(model);
 }
 void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecisions, const bool isLegacyApi) {
    CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, PreLpt);
    ov::pass::Manager manager;
    manager.set_per_pass_validation(false);
    manager.register_pass<ov::pass::InitNodeInfo>();
    const bool useLpt = !defaultPrecisions.empty();
    if (useLpt) {
        manager.register_pass<ov::pass::MarkDequantizationSubgraph>(defaultPrecisions);
    }
    auto get_convert_precisions = []() {
        precisions_array array = {
            {ov::element::i64,     ov::element::i32},
            {ov::element::u64,     ov::element::i32},
            {ov::element::i16,     ov::element::i32},
            {ov::element::u16,     ov::element::i32},
            {ov::element::u32,     ov::element::i32},
            {ov::element::f64,     ov::element::f32},
            {ov::element::f16,     ov::element::f32},
            {ov::element::boolean, ov::element::u8},
            {ov::element::i4,      ov::element::i8},
            {ov::element::u4,      ov::element::u8}
        };
        if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
            array.push_back({ov::element::bf16, ov::element::f32});
        return array;
    };
    static const auto precisions = get_convert_precisions();
    type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};
    manager.register_pass<ov::pass::AUGRUCellFusion>();
    manager.register_pass<ov::pass::CommonOptimizations>();
    manager.register_pass<ov::pass::WrapInterpolateIntoTransposes>();
    manager.register_pass<ov::pass::TransposeSinking>();
    manager.register_pass<ov::pass::ConvertSequenceToTensorIterator>();
    manager.register_pass<ov::pass::ConvertOpSet3ToOpSet2>();
    manager.register_pass<ov::pass::ConvertOpSet2ToOpSet1>();
    manager.register_pass<ov::pass::LSTMCellDecomposition>();
    manager.register_pass<ov::pass::GRUCellDecomposition>();
    manager.register_pass<ov::pass::RNNCellDecomposition>();
    manager.register_pass<ov::pass::ConvertNMS1ToNMS9>();
    manager.register_pass<ov::pass::ConvertNMS3ToNMS9>();
    manager.register_pass<ov::pass::ConvertNMS4ToNMS9>();
    manager.register_pass<ov::pass::ConvertNMS5ToNMS9>();
    manager.register_pass<ov::pass::ConvertNMS9ToNMSIEInternal>();
    manager.register_pass<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
    manager.register_pass<ov::pass::ConvertMatrixNmsToMatrixNmsIE>();
    manager.register_pass<ov::pass::TransposeMatMul>();
    manager.register_pass<ov::pass::ConstantFolding>();
    if (useLpt) {
        CPU_LPT_SCOPE(LowPrecisionTransformations_Part2);
        manager.register_pass<ngraph::pass::low_precision::ConvertSubtractConstant>(defaultPrecisions);
    }
    manager.register_pass<ov::pass::Validate>();
    manager.register_pass<ov::pass::ConvertPrecision>(precisions, type_to_fuse);
    manager.register_pass<ov::pass::EliminateConvert>();
    manager.register_pass<SwapConvertTranspose>();
    manager.register_pass<ConvertToInteraction>();
    manager.register_pass<ConvertInteractionInt8>();
    auto pass_config = manager.get_pass_config();
    // SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
    pass_config->set_callback<ov::pass::ConvertSpaceToDepth,
                              ov::pass::ConvertDepthToSpace>(
                                  [](const_node_ptr &node) -> bool {
                                      return node->input_value(0).get_shape().size() <= 5lu &&
                                          node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
                                  });
    pass_config->set_callback<ov::pass::ConvertBatchToSpace,
                              ov::pass::ConvertSpaceToBatch>(
                                  [](const_node_ptr &node) -> bool {
                                      const auto & rank = node->input(0).get_partial_shape().rank().get_length();
                                      return rank == 4lu || rank == 5lu;
                                  });
    auto isCellPrimitiveSupported = [](const_node_ptr &node) -> bool {
        if (const auto &rnn_cell = std::dynamic_pointer_cast<const ov::opset4::RNNCell>(node)) {
            return rnn_cell->get_clip() == 0.0f;
        } else if (const auto &gru_cell = std::dynamic_pointer_cast<const ov::opset4::GRUCell>(
                       node)) {
            return gru_cell->get_clip() == 0.0f
                && gru_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh"};
        } else if (const auto &augru_cell = std::dynamic_pointer_cast<const ov::op::internal::AUGRUCell>(
                       node)) {
            return augru_cell->get_clip() == 0.0f
                && augru_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh"};
        } else if (const auto &lstm_cell = std::dynamic_pointer_cast<const ov::opset4::LSTMCell>(
                       node)) {
            return lstm_cell->get_clip() == 0.0f &&
                lstm_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
        } else if (const auto &lstm_cell_v1 = std::dynamic_pointer_cast<const ov::opset1::LSTMCell>(
                       node)) {
            return lstm_cell_v1->get_clip() == 0.0f &&
                lstm_cell_v1->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
        }
        return false;
    };
    // Sequences supported by the plugin shouldn't be converted to TensorIterator.
    // sequence_length input is not supported in all Sequences, so if is_seq_len_provided() == true, we
    // should always convert to TensorIterator.
    // RNN/GRU/LSTM Sequences are supported with clip == 0, and with default activations.
    auto isSequencePrimitiveSupported = [](const_node_ptr &node) -> bool {
        const auto& data = node->input(0);
        const auto& data_pshape = data.get_partial_shape();
        // WA: dynamic shapes make impossible to check seq_len due to shapeOf subgraphs
        // but the sequence is still supported in CPU and doesn't need to be decomposed
        if (data_pshape.is_dynamic())
            return true;
        if (data_pshape.rank().is_static() && data_pshape.rank().get_length() > 1 && !data_pshape[1].is_static())
            return false;
        auto max_seq_len = data.get_shape().at(1);
        if (const auto &rnn_seq = std::dynamic_pointer_cast<const ov::opset6::RNNSequence>(node)) {
            return rnn_seq->get_clip() == 0.0f &&
                !ov::op::util::is_seq_len_provided(rnn_seq->get_input_node_shared_ptr(2),
                                                   max_seq_len);
        } else if (const auto &gru_seq = std::dynamic_pointer_cast<const ov::opset6::GRUSequence>(
                       node)) {
            return gru_seq->get_clip() == 0.0f &&
                gru_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh"} &&
                !ov::op::util::is_seq_len_provided(gru_seq->get_input_node_shared_ptr(2),
                                                   max_seq_len);
        } else if (const auto &augru_seq = std::dynamic_pointer_cast<const ov::op::internal::AUGRUSequence>(
                       node)) {
            return augru_seq->get_clip() == 0.0f &&
                augru_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh"} &&
                !ov::op::util::is_seq_len_provided(augru_seq->get_input_node_shared_ptr(2),
                                                   max_seq_len);
        } else if (const auto &lstm_seq = std::dynamic_pointer_cast<const ov::opset6::LSTMSequence>(
                       node)) {
            return lstm_seq->get_clip() == 0.0f &&
                lstm_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"} &&
                !ov::op::util::is_seq_len_provided(lstm_seq->get_input_node_shared_ptr(3),
                                                   max_seq_len);
        }
        return false;
    };
    pass_config->set_callback<ov::pass::ConvertRNNSequenceToTensorIterator,
                              ov::pass::ConvertGRUSequenceToTensorIterator,
                              ov::pass::ConvertLSTMSequenceToTensorIterator>(
                                  [isSequencePrimitiveSupported](const_node_ptr &node) -> bool {
                                      return isSequencePrimitiveSupported(node);
                                  });
    pass_config->set_callback<ov::pass::RNNCellDecomposition, ov::pass::GRUCellDecomposition,
                              ov::pass::LSTMCellDecomposition>(
                                  [isCellPrimitiveSupported](const_node_ptr &node) -> bool {
                                      return isCellPrimitiveSupported(node);
                                  });
    pass_config->set_callback<ov::pass::MVN6Decomposition>(
        [](const_node_ptr &node) -> bool {
            std::string errorMessage;
            return node::MVN::isSupportedOperation(node, errorMessage);
        });
    pass_config->set_callback<ov::pass::NormalizeL2Decomposition>(
        [](const_node_ptr &node) -> bool {
            std::string errorMsg;
            return node::NormalizeL2::isSupportedOperation(node, errorMsg);
        });
    pass_config->enable<ngraph::pass::SoftmaxDecomposition>();
    pass_config->set_callback<ngraph::pass::SoftmaxDecomposition>(
            [](const_node_ptr &node) -> bool {
                return node->input_value(0).get_partial_shape().rank().get_length() <= 5;
            });
    if (!isLegacyApi) {
        auto nmsCallback = [](const_node_ptr &node) -> bool {
            for (size_t i = 0; i < node->get_output_size(); i++) {
                const auto outputs = node->get_output_target_inputs(i);
                for (const auto &out : outputs) {
                    if (!ov::op::util::is_output(out.get_node())) {
                        return false;
                    }
                }
            }
            return true;
        };
        pass_config->set_callback<ov::pass::ConvertNMS9ToNMSIEInternal>(nmsCallback);
        pass_config->set_callback<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>(nmsCallback);
        pass_config->set_callback<ov::pass::ConvertMatrixNmsToMatrixNmsIE>(nmsCallback);
    }
    // List of enabled/disabled transformations
    // Allow FP16 Converts to be folded and FP16 constants to be upgraded to FP32 data type
    pass_config->disable<ov::pass::DisableDecompressionConvertConstantFolding>();
    pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
    pass_config->disable<ov::pass::EyeDecomposition>();
    pass_config->disable<ov::pass::ConvertGELU>();
    pass_config->disable<ov::pass::ConvertShuffleChannels3>();
    pass_config->disable<ov::pass::Gelu7Downgrade>();
    pass_config->disable<ov::pass::HSwishDecomposition>();
    pass_config->disable<ov::pass::ReduceL1Decomposition>();
    pass_config->disable<ov::pass::ReduceL2Decomposition>();
    pass_config->disable<ov::pass::SoftPlusDecomposition>();
    pass_config->disable<ov::pass::HSigmoidDecomposition>();
    pass_config->disable<ov::pass::ConvertMod>();
    pass_config->disable<ov::pass::ConvertShuffleChannels3>();
    pass_config->disable<ov::pass::WeightsDequantizeToFakeQuantize>();
    pass_config->disable<ov::pass::SimplifyCTCGreedyDecoderSeqLen>();
    pass_config->disable<ov::pass::ConvertGather7ToGather1>();
    pass_config->disable<ov::pass::ConvertGather8ToGather7>();
    pass_config->disable<ov::pass::ConvertMinimum>();
    pass_config->disable<ov::pass::ConvertBroadcastToTiles>();
    pass_config->disable<ov::pass::ConvertReduceMeanToPooling>();
    pass_config->disable<ov::pass::ConvertReduceMaxToPooling>();
    pass_config->disable<ov::pass::ConvertReduceSumToPooling>();
    pass_config->disable<ov::pass::SliceToStridedSlice>();
    pass_config->disable<ov::pass::ConvertDetectionOutput8ToDetectionOutput1>();
    pass_config->disable<ov::pass::ConvertROIAlign9To3>();
    pass_config->disable<ov::pass::SoftSignDecomposition>();
    pass_config->disable<ov::pass::UniqueDecomposition>();
    pass_config->enable<ov::pass::NormalizeL2Decomposition>();
    pass_config->enable<ov::pass::ConvertInterpolate1ToInterpolate4>();
    pass_config->enable<ov::pass::ConvertGather1ToGather7>();
    pass_config->enable<ov::pass::ConvertDetectionOutput1ToDetectionOutput8>();
    pass_config->enable<ov::pass::ConvertROIAlign3To9>();
    if (useLpt) {
        CPU_LPT_SCOPE(LowPrecisionTransformations_Part3);
        pass_config->set_callback<ov::pass::AddFakeQuantizeFusion,
                                  ov::pass::MulFakeQuantizeFusion,
                                  ov::pass::FakeQuantizeMulFusion>(
                                      [](const_node_ptr &node) -> bool {
                                          std::string errMsg;
                                          return !node::FakeQuantize::isSupportedOperation(node, errMsg);
                                      });
        pass_config->set_callback<ov::pass::ConvertQuantizeDequantize>([&defaultPrecisions](const_node_ptr &node) -> bool {
            return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(node, defaultPrecisions);
        });
    }
    manager.run_passes(model);
 }
 void Transformations::Lpt(const bool hasINT16orINT32Levels, const std::vector<ov::element::Type>& defaultPrecisions) {
    CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Lpt);
    using namespace ngraph::pass::low_precision;
    CPU_LPT_SCOPE(LowPrecisionTransformations_Part4);
    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "LowPrecisionTransformations");
    //Only enable conv/group conv signed input on AMX platform.
    std::vector<ov::element::Type> input0LowPrecisionList;
    if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
        input0LowPrecisionList = {ov::element::u8, ov::element::i8};
    } else {
        input0LowPrecisionList = {ov::element::u8};
    }
    auto supportedPrecisions = std::vector<PrecisionsRestriction>({
            PrecisionsRestriction::create<ov::opset1::Convolution>({
                    {{0}, input0LowPrecisionList},
                    {{1}, {ov::element::i8}},
                }),
            PrecisionsRestriction::create<ov::opset1::ConvolutionBackpropData>({
                    {{0}, {ov::element::u8, ov::element::i8}},
                    {{1}, {ov::element::i8}}
                }),
            PrecisionsRestriction::create<ov::opset1::GroupConvolution>({
                    {{0}, input0LowPrecisionList},
                    {{1}, {ov::element::i8}}
                }),
            PrecisionsRestriction::create<ov::opset1::Multiply>({
                    {{0}, {ov::element::u8}},
                    {{1}, {ov::element::i8}},
                }),
            PrecisionsRestriction::create<ov::opset1::MatMul>({
                    {{0}, {ov::element::u8, ov::element::i8}},
                    {{1}, {ov::element::i8}}
                }),
            PrecisionsRestriction::create<ov::opset5::LSTMSequence>({
                    {{0, 1}, {ov::element::u8, ov::element::i8}},
                }),
            PrecisionsRestriction::create<ov::opset6::GRUSequence>({
                    {{0, 1}, {ov::element::u8, ov::element::i8}},
                }),
        });
    auto quantizationRestrictions = std::vector<QuantizationGranularityRestriction>({
            QuantizationGranularityRestriction::create<ov::opset1::Convolution>({0}),
            QuantizationGranularityRestriction::create<ov::opset1::ConvolutionBackpropData>({0})
        });
    // for GNA networks reference execution
    bool updatePrecision = true;
    if (hasINT16orINT32Levels) {
        updatePrecision = false;
        supportedPrecisions = std::vector<PrecisionsRestriction>({});
    }
    ov::pass::Manager lptManager;
    lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(
        supportedPrecisions,
        quantizationRestrictions,
        LayerTransformation::Params(updatePrecision, ov::element::f32, defaultPrecisions));
    lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::MarkupPrecisions>([](const_node_ptr& node) -> bool {
        if (const auto mulitply = std::dynamic_pointer_cast<const ov::opset1::Multiply>(node)) {
            return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply);
        }
        return false;
    });
    lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::ConvolutionBackpropDataTransformation>(
        [&defaultPrecisions](const_node_ptr& node) -> bool {
            return LayerTransformation::isAsymmetricQuantization(node, defaultPrecisions) ||
                WeightableLayerTransformation::isAsymmetricOnWeights(node, defaultPrecisions);
        });
    lptManager.get_pass_config()->disable<ngraph::pass::low_precision::MultiplyToGroupConvolutionTransformation>();
    lptManager.run_passes(model);
 }
 void Transformations::PostLpt() {
    CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, PostLpt);
    ov::pass::Manager postLPTPassManager;
    postLPTPassManager.register_pass<ov::pass::UnrollTensorIterator>();
    postLPTPassManager.register_pass<ov::pass::ReshapePRelu>();
    postLPTPassManager.get_pass_config()->set_callback<ov::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
        // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
        return node->get_rt_info().count("UNROLL_TI") == 0;
    });
    postLPTPassManager.register_pass<MoveEltwiseUpThroughDataMov>();
    postLPTPassManager.get_pass_config()->set_callback<MoveEltwiseUpThroughDataMov>([](const std::shared_ptr<const ov::Node>& node) -> bool {
        if (node->get_input_size() >= 2) {
            return node->get_input_element_type(1) == ov::element::i8 || node->get_input_element_type(1) == ov::element::u8;
        }
        return false;
    });
    postLPTPassManager.register_pass<ov::pass::ConstantFolding>();
    // Snippets may brake MHA patterns so the fusion has to performed before
    postLPTPassManager.register_pass<MHAFusion>();
    postLPTPassManager.register_pass<FuseFQtoInteraction>();
    postLPTPassManager.get_pass_config()->set_callback<MHAFloatFusion, MHAFloatFusion2,
                                                       MHAQuantFusion, MHAQuantFusion2>
        ([this](const std::shared_ptr<const ov::Node>& n) -> bool {
            std::string errorMessage;
            if (!node::MHA::isSupportedOperation(n, errorMessage))
                return true;
            // Implementation calls AMX BF16 brgemm only for tensors with K and N aligned on 2, otherwise fallbacks on vector impl
            // Vector madd BF16 instruction on SPR has reduced performance on HW level, which results in overall perf degradation
            size_t bf16Factor = 2;
            if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16_amx_bf16) &&
                (n->get_input_element_type(0) == element::bf16 || (n->get_input_element_type(0) == element::f32 && enableBF16)) &&
                (n->get_input_shape(0)[3] % bf16Factor != 0 || n->get_input_shape(1)[1] % bf16Factor != 0 || n->get_input_shape(3)[3] % bf16Factor != 0)) {
                return true;
            }
            return false;
        });
    // Execute before snippets. Otherwise FQ will be converted to Subgraph
    postLPTPassManager.register_pass<ConvertFqRnnToQuantizedRnn>();
    postLPTPassManager.run_passes(model);
 }
 void Transformations::MainSnippets(void) {
    if (!enableSnippets ||
        !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) // snippets are implemeted only for relevant platforms (avx2+ extentions)
        return;
    ov::pass::Manager snippetsManager;
    snippetsManager.register_pass<SnippetsMarkSkipped>();
    snippetsManager.register_pass<ngraph::snippets::pass::EnumerateNodes>();
    snippetsManager.register_pass<ngraph::snippets::pass::TokenizeSnippets>();
    snippetsManager.get_pass_config()->set_callback<ngraph::snippets::pass::TokenizeSnippets>(
        [](const std::shared_ptr<const ov::Node>& n) -> bool {
            // CPU Plugin support Swish in Subgraph via conversion to SwichCPU which assumes second input to be constant
            if (ov::is_type<const ov::op::v4::Swish>(n)) {
                if (n->inputs().size() > 1 && !ov::is_type<const ov::op::v0::Constant>(n->get_input_node_shared_ptr(1)))
                    return true;
            }
            const auto& inputs = n->inputs();
            // todo: clarify whether we can evaluate snippets on const paths
            const bool has_only_const_inputs = std::all_of(inputs.begin(), inputs.end(),
                                                           [](const ov::Input<const ov::Node> &in) {
                                                               return ov::is_type<ov::op::v0::Constant>(in.get_source_output().get_node_shared_ptr());
                                                           });
            // todo: clarify whether we can evaluate snippets on inputs with larger ranks
            auto rank_is_too_large = [](const ov::descriptor::Tensor& t ) {
                // callback is called has_supported_in_out(), so it's safe to assume that the shapes are static
                return t.get_partial_shape().rank().get_length() > 6;
            };
            const bool bad_input_rank = std::any_of(inputs.begin(), inputs.end(),
                                                    [&](const ov::Input<const ov::Node>& in) {return  rank_is_too_large(in.get_tensor());});
            const auto& outputs = n->outputs();
            const bool bad_output_rank = std::any_of(outputs.begin(), outputs.end(),
                                                     [&](const ov::Output<const ov::Node>& out) {return  rank_is_too_large(out.get_tensor());});
            return has_only_const_inputs || bad_input_rank || bad_output_rank;
        });
    snippetsManager.register_pass<ngraph::snippets::pass::CommonOptimizations>();
    snippetsManager.run_passes(model);
 }
 void Transformations::PostSnippets(void) {
    ov::pass::Manager postSnippetsManager;
    postSnippetsManager.register_pass<ov::pass::FakeQuantizeDecomposition>();
    postSnippetsManager.get_pass_config()->set_callback<ov::pass::FakeQuantizeDecomposition>([](const_node_ptr& node) -> bool {
        std::string errMsg;
        return node::FakeQuantize::isSupportedOperation(node, errMsg);
    });
    postSnippetsManager.register_pass<ov::pass::ConstantFolding>();
    postSnippetsManager.run_passes(model);
 }
 void Transformations::Snippets(void) {
    CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Snippets);
    MainSnippets();
    PostSnippets();
 }
 }   // namespace intel_cpu
 }   // namespace ov
--- a/src/plugins/intel_cpu/src/transformation_pipeline.h
+++ b/src/plugins/intel_cpu/src/transformation_pipeline.h
@ -0,0 +1,65 @@
 // Copyright (C) 2022 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include "openvino/core/model.hpp"
 #include "low_precision/low_precision.hpp"
 #include "config.h"
 #include "itt.h"
 #include <memory>
 #include <vector>
 using namespace InferenceEngine;
 #define IE_CPU_PLUGIN_THROW(...) IE_THROW(__VA_ARGS__) << "CPU plugin: "
 namespace ov {
 namespace intel_cpu {
 class Transformations {
 public:
    Transformations(const std::shared_ptr<ov::Model>& initialModel,
                    const bool                        enableLpt,
                    const bool                        enableSnippets,
                    const bool                        enableBF16,
                    const bool                        isLegacyApi,
                    const Config&                     config)
        : model(initialModel),
          enableLpt(enableLpt),
          enableSnippets(enableSnippets),
          enableBF16(enableBF16),
          isLegacyApi(isLegacyApi),
          config(config) {}
    void UpToCpuSpecificOpSet();
    void CpuSpecificOpSet(void);
 private:
    std::shared_ptr<ov::Model> model;
    const bool    enableLpt;
    const bool    enableSnippets;
    const bool    enableBF16;
    const bool    isLegacyApi;
    const Config& config;
    void PreLpt(const std::vector<ov::element::Type>& defaultPrecisions, const bool isLegacyApi);
    void Lpt(const bool hasINT16orINT32Levels, const std::vector<ov::element::Type>& defaultPrecisions);
    void PostLpt();
    void MainSnippets(void);
    void PostSnippets(void);
    void Snippets(void);
    static bool fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, ov::element::Type to, size_t idx);
 };
 }   // namespace intel_cpu
 }   // namespace ov
--- a/src/plugins/intel_cpu/src/utils/debug_capabilities.h
+++ b/src/plugins/intel_cpu/src/utils/debug_capabilities.h
@ -5,7 +5,7 @@
 #ifdef CPU_DEBUG_CAPS
-#define CPU_DEBUG_CAP_ENABLE(_x) _x;
+#define CPU_DEBUG_CAP_ENABLE(...) __VA_ARGS__
 #define CPU_DEBUG_CAPS_ALWAYS_TRUE(x) true
 #include <string>
@ -147,7 +147,7 @@ static inline std::ostream& write_all_to_stream(std::ostream& os, const T& arg,
 #else // !CPU_DEBUG_CAPS
-#define CPU_DEBUG_CAP_ENABLE(_x)
+#define CPU_DEBUG_CAP_ENABLE(...)
 #define CPU_DEBUG_CAPS_ALWAYS_TRUE(x) x
 #define DEBUG_LOG(...)
--- a/src/plugins/intel_cpu/src/utils/debug_caps_config.cpp
+++ b/src/plugins/intel_cpu/src/utils/debug_caps_config.cpp
@ -0,0 +1,66 @@
 // Copyright (C) 2018-2022 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #ifdef CPU_DEBUG_CAPS
 #include "debug_caps_config.h"
 #include <string>
 namespace ov {
 namespace intel_cpu {
 void DebugCapsConfig::readProperties() {
    auto readEnv = [](const char* envVar) {
        return std::getenv(envVar);
    };
    auto parseDumpFormat = [](const std::string& format) {
        if (format == "BIN")
            return FORMAT::BIN;
        else if (format == "TEXT")
            return FORMAT::TEXT;
        else
            IE_THROW() << "readDebugCapsProperties: Unknown dump format";
    };
    const char* envVarValue = nullptr;
    if ((envVarValue = readEnv("OV_CPU_EXEC_GRAPH_PATH")))
        execGraphPath = envVarValue;
    if ((envVarValue = readEnv("OV_CPU_VERBOSE")))
        verbose = envVarValue;
    if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_DIR")))
        blobDumpDir = envVarValue;
    if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_FORMAT")))
        blobDumpFormat = parseDumpFormat(envVarValue);
    if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_EXEC_ID")))
        blobDumpFilters[BY_EXEC_ID] = envVarValue;
    if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_PORTS")))
        blobDumpFilters[BY_PORTS] = envVarValue;
    if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_TYPE")))
        blobDumpFilters[BY_TYPE] = envVarValue;
    if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_NAME")))
        blobDumpFilters[BY_NAME] = envVarValue;
    if ((envVarValue = readEnv("OV_CPU_SUMMARY_PERF"))) {
        summaryPerf = envVarValue;
    }
    if ((envVarValue = readEnv("OV_CPU_DISABLE")))
        disable.parseAndSet(envVarValue);
    if ((envVarValue = readEnv("OV_CPU_DUMP_IR")))
        dumpIR.parseAndSet(envVarValue);
 }
 }   // namespace intel_cpu
 }   // namespace ov
 #endif // CPU_DEBUG_CAPS
--- a/src/plugins/intel_cpu/src/utils/debug_caps_config.h
+++ b/src/plugins/intel_cpu/src/utils/debug_caps_config.h
@ -0,0 +1,213 @@
 // Copyright (C) 2018-2022 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #ifdef CPU_DEBUG_CAPS
 #include "ie_common.h"
 #include "openvino/util/common_util.hpp"
 #include <bitset>
 #include <unordered_map>
 namespace ov {
 namespace intel_cpu {
 class DebugCapsConfig {
 private:
    struct PropertySetter;
    using PropertySetterPtr = std::shared_ptr<PropertySetter>;
 public:
    DebugCapsConfig() {
        readProperties();
    }
    enum FILTER {
        BY_PORTS,
        BY_EXEC_ID,
        BY_TYPE,
        BY_NAME,
    };
    enum class FORMAT {
        BIN,
        TEXT,
    };
    std::string execGraphPath;
    std::string verbose;
    std::string blobDumpDir = "cpu_dump";
    FORMAT blobDumpFormat = FORMAT::TEXT;
    // std::hash<int> is necessary for Ubuntu-16.04 (gcc-5.4 and defect in C++11 standart)
    std::unordered_map<FILTER, std::string, std::hash<int>> blobDumpFilters;
    std::string summaryPerf = "";
    struct TransformationFilter {
        enum Type : uint8_t {
            PreLpt = 0, Lpt, PostLpt, Snippets, Specific, NumOfTypes
        };
        std::bitset<NumOfTypes> filter;
        PropertySetterPtr getPropertySetter() {
            return PropertySetterPtr(new BitsetFilterPropertySetter<NumOfTypes>("transformations", filter,
                                                                                {{"all", {PreLpt, Lpt, PostLpt, Snippets, Specific}},
                                                                                 {"common", {PreLpt, PostLpt}},
                                                                                 {"prelpt", {PreLpt}},
                                                                                 {"lpt", {Lpt}},
                                                                                 {"postlpt", {PostLpt}},
                                                                                 {"snippets", {Snippets}},
                                                                                 {"specific", {Specific}}
                                                                                }));
        }
    };
    struct IrFormatFilter {
        enum Type : uint8_t {
            Xml = 0, XmlBin, Dot, Svg, NumOfTypes
        };
        std::bitset<NumOfTypes> filter;
        PropertySetterPtr getPropertySetter() {
            return PropertySetterPtr(new BitsetFilterPropertySetter<NumOfTypes>("formats", filter,
                                                                                {{"all", {XmlBin, Dot, Svg}},
                                                                                 {"xml", {Xml}},
                                                                                 {"xmlbin", {XmlBin}},
                                                                                 {"dot", {Dot}},
                                                                                 {"svg", {Svg}},
                                                                                }));
        }
    };
    struct PropertyGroup {
        virtual std::vector<PropertySetterPtr> getPropertySetters(void) = 0;
        void parseAndSet(const std::string& str) {
            const auto& options = ov::util::split(str, ' ');
            const auto& propertySetters = getPropertySetters();
            bool failed = false;
            auto getHelp = [propertySetters] (void) {
                std::string help;
                for (const auto& property : propertySetters)
                    help.append('\t' + property->getPropertyName() + "=<" + property->getPropertyValueDescription() + ">\n");
                return help;
            };
            for (const auto& option : options) {
                const auto& parts = ov::util::split(option, '=');
                if (parts.size() > 2) {
                    failed = true;
                    break;
                }
                const auto& propertyName = ov::util::to_lower(parts.front());
                const auto& foundSetter = std::find_if(propertySetters.begin(), propertySetters.end(),
                                                       [propertyName] (const PropertySetterPtr& setter) { return setter->getPropertyName() == propertyName; });
                if (foundSetter == propertySetters.end() ||
                    !(*foundSetter)->parseAndSet(parts.size() == 1 ? "" : parts.back())) {
                    failed = true;
                    break;
                }
            }
            if (failed)
                IE_THROW() << "Wrong syntax: " << str << std::endl
                           << "The following space separated options are supported (option names are case insensitive):" << std::endl
                           << getHelp();
        }
    };
    struct : PropertyGroup {
        TransformationFilter transformations;
        std::vector<PropertySetterPtr> getPropertySetters(void) override {
            return { transformations.getPropertySetter() };
        }
    } disable;
    struct : PropertyGroup {
        std::string dir = "intel_cpu_dump";
        IrFormatFilter format = { 1 << IrFormatFilter::Xml };
        TransformationFilter transformations;
        std::vector<PropertySetterPtr> getPropertySetters(void) override {
            return { PropertySetterPtr(new StringPropertySetter("dir", dir, "path to dumped IRs")),
                     format.getPropertySetter(),
                     transformations.getPropertySetter() };
        }
    } dumpIR;
 private:
    struct PropertySetter {
        virtual bool parseAndSet(const std::string& str) = 0;
        virtual std::string getPropertyValueDescription(void) const = 0;
        PropertySetter(const std::string&& name) : propertyName(name) {}
        const std::string& getPropertyName(void) const { return propertyName; }
    private:
        const std::string propertyName;
    };
    struct StringPropertySetter : PropertySetter {
        StringPropertySetter(const std::string&& name, std::string& ref, const std::string&& valueDescription)
            : property(ref), propertyValueDescription(valueDescription), PropertySetter(std::move(name)) {}
        bool parseAndSet(const std::string& str) override {
            property = str;
            return true;
        }
        std::string getPropertyValueDescription(void) const override { return propertyValueDescription; }
    private:
        std::string& property;
        const std::string propertyValueDescription;
    };
    template<std::size_t NumOfBits>
    struct BitsetFilterPropertySetter : PropertySetter {
        struct Token {
            std::string name;
            std::vector<size_t> bits;
        };
        BitsetFilterPropertySetter(const std::string&& name, std::bitset<NumOfBits>& ref, const std::vector<Token>&& tokens)
            : property(ref), propertyTokens(tokens), PropertySetter(std::move(name)) {}
        bool parseAndSet(const std::string& str) override {
            const auto& tokens = str.empty() ?
                std::vector<std::string>{"all"} : ov::util::split(ov::util::to_lower(str), ',');
            property.reset();
            for (const auto& token : tokens) {
                const bool tokenVal = (token.front() != '-');
                const auto& tokenName = tokenVal ? token : token.substr(1);
                const auto& foundToken = std::find_if(propertyTokens.begin(), propertyTokens.end(),
                                                      [tokenName] (const Token& token) { return token.name == tokenName; });
                if (foundToken == propertyTokens.end())
                    return false;
                for (const auto& bit : foundToken->bits) {
                    property.set(bit, tokenVal);
                }
            }
            return true;
        }
        std::string getPropertyValueDescription(void) const override {
            std::string supportedTokens = "comma separated filter tokens: ";
            for (auto i = 0; i < propertyTokens.size(); i++) {
                if (i)
                    supportedTokens.push_back(',');
                supportedTokens.append(propertyTokens[i].name);
            }
            supportedTokens.append("; -'token' is used for exclusion, case does not matter, no tokens is treated as 'all'");
            return supportedTokens;
        }
    private:
        std::bitset<NumOfBits>& property;
        const std::vector<Token> propertyTokens;
    };
    void readProperties();
 };
 }   // namespace intel_cpu
 }   // namespace ov
 #endif // CPU_DEBUG_CAPS
--- a/src/plugins/intel_cpu/src/utils/ngraph_transformation.hpp
+++ b/src/plugins/intel_cpu/src/utils/ngraph_transformation.hpp
@ -0,0 +1,113 @@
 // Copyright (C) 2022 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #ifdef CPU_DEBUG_CAPS
 #include "debug_caps_config.h"
 #include "openvino/util/file_util.hpp"
 #include <openvino/pass/manager.hpp>
 #include <openvino/pass/serialize.hpp>
 #include <openvino/pass/visualize_tree.hpp>
 namespace ov {
 namespace intel_cpu {
 class TransformationDumper {
 public:
    explicit TransformationDumper(const DebugCapsConfig& config, const DebugCapsConfig::TransformationFilter::Type type,
                                  const std::shared_ptr<ov::Model>& model)
        : config(config), type(type), model(model) {
        for (auto prev = infoMap.at(type).prev; prev != TransformationType::NumOfTypes;
             prev = infoMap.at(prev).prev) {
            // no need to serialize input graph if there was no transformations from previous dump
            if (config.disable.transformations.filter[prev])
                continue;
            if (!config.dumpIR.transformations.filter[prev])
                break;
            if (wasDumped()[prev])
                return;
        }
        dump("_in");
    }
    ~TransformationDumper() {
        dump("_out");
        wasDumped().set(type);
    }
 private:
    const DebugCapsConfig& config;
    const std::shared_ptr<ov::Model>& model;
    using TransformationType = DebugCapsConfig::TransformationFilter::Type;
    const TransformationType type;
    struct TransformationInfo {
        std::string name;
        TransformationType prev;
    };
    // std::hash<std::underlying_type<FILTER>::type> is necessary for Ubuntu-16.04 (gcc-5.4 and defect in C++11 standart)
    const std::unordered_map<TransformationType, TransformationInfo,
                             std::hash<std::underlying_type<TransformationType>::type>> infoMap =
        {{TransformationType::PreLpt,     {"preLpt", TransformationType::NumOfTypes}},
         {TransformationType::Lpt,        {"lpt", TransformationType::PreLpt}},
         {TransformationType::PostLpt,    {"postLpt", TransformationType::Lpt}},
         {TransformationType::Snippets,   {"snippets", TransformationType::PostLpt}},
         {TransformationType::Specific,   {"cpuSpecific", TransformationType::Snippets}}};
    std::bitset<TransformationType::NumOfTypes>& wasDumped(void) {
        static std::bitset<TransformationType::NumOfTypes> wasDumped;
        return wasDumped;
    }
    void dump(const std::string&& postfix) {
        static int num = 0; // just to keep dumped IRs ordered in filesystem
        const auto pathAndName = config.dumpIR.dir + "/ir_" + std::to_string(num) + '_' +
                                 infoMap.at(type).name + postfix;
        ov::util::create_directory_recursive(config.dumpIR.dir);
        ov::pass::Manager serializer;
        if (config.dumpIR.format.filter[DebugCapsConfig::IrFormatFilter::XmlBin])
            serializer.register_pass<ov::pass::Serialize>(pathAndName + ".xml", "");
        if (config.dumpIR.format.filter[DebugCapsConfig::IrFormatFilter::Xml]) {
            std::string  xmlFile(pathAndName + ".xml");
            std::string  binFile("/dev/null"); // @todo make it crossplatform using dummy implementation of std::ostream
            serializer.register_pass<ov::pass::Serialize>(xmlFile, binFile);
        }
        if (config.dumpIR.format.filter[DebugCapsConfig::IrFormatFilter::Svg]) {
            serializer.register_pass<ov::pass::VisualizeTree>(pathAndName + ".svg");
        }
        if (config.dumpIR.format.filter[DebugCapsConfig::IrFormatFilter::Dot]) {
            serializer.register_pass<ov::pass::VisualizeTree>(pathAndName + ".dot");
        }
        serializer.run_passes(model);
        num++;
    }
 };
 }   // namespace intel_cpu
 }   // namespace ov
 #  define CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_config, _type)                      \
    _config.disable.transformations.filter[DebugCapsConfig::TransformationFilter::Type::_type]
 #  define CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(...) !CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(__VA_ARGS__)
 #  define CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type)                                                      \
    IE_ASSERT(CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(_this->config.debugCaps, _type));                                  \
    auto dumperPtr = _this->config.debugCaps.dumpIR.transformations.filter[DebugCapsConfig::TransformationFilter::Type::_type] ?  \
        std::unique_ptr<TransformationDumper>(new TransformationDumper(_this->config.debugCaps,                          \
                                              DebugCapsConfig::TransformationFilter::Type::_type, _this->model)) : \
        nullptr
 #  define CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(_this, _type)         \
    if (CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_this->config.debugCaps, _type))     \
        return;                                                             \
    CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type)
 #else
 #  define CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_config, _type) false
 #  define CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(...) true
 #  define CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type)
 #  define CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(_this, _type)
 #endif // CPU_DEBUG_CAPS
--- a/src/plugins/intel_cpu/src/utils/node_dumper.cpp
+++ b/src/plugins/intel_cpu/src/utils/node_dumper.cpp
@ -5,6 +5,7 @@
 #include "node_dumper.h"
 #include "utils/debug_caps_config.h"
 #include <node.h>
 #include "ie_common.h"
 #include "utils/blob_dump.h"
@ -26,20 +27,20 @@ static void formatNodeName(std::string& name) {
    std::replace(name.begin(), name.end(), ':', '-');
 }
-static bool shouldBeDumped(const NodePtr& node, const Config& config, const std::string& portsKind) {
+static bool shouldBeDumped(const NodePtr& node, const DebugCapsConfig& config, const std::string& portsKind) {
    const auto& dumpFilters = config.blobDumpFilters;
    if (dumpFilters.empty())
        return false;
-    if (dumpFilters.count(Config::FILTER::BY_PORTS)) { // filter by ports configured
+    if (dumpFilters.count(DebugCapsConfig::FILTER::BY_PORTS)) { // filter by ports configured
-        if (dumpFilters.at(Config::FILTER::BY_PORTS) != "ALL" &&
+        if (dumpFilters.at(DebugCapsConfig::FILTER::BY_PORTS) != "ALL" &&
-            portsKind != dumpFilters.at(Config::FILTER::BY_PORTS))
+            portsKind != dumpFilters.at(DebugCapsConfig::FILTER::BY_PORTS))
            return false;
    }
-    if (dumpFilters.count(Config::FILTER::BY_EXEC_ID)) { // filter by exec id configured
+    if (dumpFilters.count(DebugCapsConfig::FILTER::BY_EXEC_ID)) { // filter by exec id configured
-        std::stringstream ss(dumpFilters.at(Config::FILTER::BY_EXEC_ID));
+        std::stringstream ss(dumpFilters.at(DebugCapsConfig::FILTER::BY_EXEC_ID));
        int id;
        bool matched = false;
@ -54,8 +55,8 @@ static bool shouldBeDumped(const NodePtr& node, const Config& config, const std:
            return false;
    }
-    if (dumpFilters.count(Config::FILTER::BY_TYPE)) { // filter by type configured
+    if (dumpFilters.count(DebugCapsConfig::FILTER::BY_TYPE)) { // filter by type configured
-        std::stringstream ss(dumpFilters.at(Config::FILTER::BY_TYPE));
+        std::stringstream ss(dumpFilters.at(DebugCapsConfig::FILTER::BY_TYPE));
        std::string type;
        bool matched = false;
@ -70,22 +71,22 @@ static bool shouldBeDumped(const NodePtr& node, const Config& config, const std:
            return false;
    }
-    if (dumpFilters.count(Config::FILTER::BY_NAME)) { // filter by name configured
+    if (dumpFilters.count(DebugCapsConfig::FILTER::BY_NAME)) { // filter by name configured
-        if (dumpFilters.at(Config::FILTER::BY_NAME) != "*" && // to have 'single char' option for matching all the names
+        if (dumpFilters.at(DebugCapsConfig::FILTER::BY_NAME) != "*" && // to have 'single char' option for matching all the names
-            !std::regex_match(node->getName(), std::regex(dumpFilters.at(Config::FILTER::BY_NAME)))) // name does not match
+            !std::regex_match(node->getName(), std::regex(dumpFilters.at(DebugCapsConfig::FILTER::BY_NAME)))) // name does not match
            return false;
    }
    return true;
 }
-static void dump(const BlobDumper& bd, const std::string& file, const Config& config) {
+static void dump(const BlobDumper& bd, const std::string& file, const DebugCapsConfig& config) {
    switch (config.blobDumpFormat) {
-    case Config::FORMAT::BIN: {
+    case DebugCapsConfig::FORMAT::BIN: {
        bd.dump(file);
        break;
    }
-    case Config::FORMAT::TEXT: {
+    case DebugCapsConfig::FORMAT::TEXT: {
        bd.dumpAsTxt(file);
        break;
    }
@ -94,7 +95,7 @@ static void dump(const BlobDumper& bd, const std::string& file, const Config& co
    }
 }
-static void dumpInternalBlobs(const NodePtr& node, const Config& config) {
+static void dumpInternalBlobs(const NodePtr& node, const DebugCapsConfig& config) {
    std::string nodeName = node->getName();
    formatNodeName(nodeName);
@ -116,7 +117,7 @@ static void dumpInternalBlobs(const NodePtr& node, const Config& config) {
    }
 }
-void dumpInputBlobs(const NodePtr& node, const Config& config, int count) {
+void dumpInputBlobs(const NodePtr& node, const DebugCapsConfig& config, int count) {
    if (!shouldBeDumped(node, config, "IN"))
        return;
@ -150,7 +151,7 @@ void dumpInputBlobs(const NodePtr& node, const Config& config, int count) {
    dumpInternalBlobs(node, config);
 }
-void dumpOutputBlobs(const NodePtr& node, const Config& config, int count) {
+void dumpOutputBlobs(const NodePtr& node, const DebugCapsConfig& config, int count) {
    if (!shouldBeDumped(node, config, "OUT"))
        return;
--- a/src/plugins/intel_cpu/src/utils/node_dumper.h
+++ b/src/plugins/intel_cpu/src/utils/node_dumper.h
@ -1,25 +1,26 @@
 // Copyright (C) 2018-2022 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #ifdef CPU_DEBUG_CAPS
 #pragma once
 #ifdef CPU_DEBUG_CAPS
 #include "utils/debug_caps_config.h"
 #include <node.h>
 #include "config.h"
 namespace ov {
 namespace intel_cpu {
-void dumpInputBlobs(const NodePtr &node, const Config& config, int count = -1);
+void dumpInputBlobs(const NodePtr &node, const DebugCapsConfig& config, int count = -1);
-void dumpOutputBlobs(const NodePtr &node, const Config& config, int count = -1);
+void dumpOutputBlobs(const NodePtr &node, const DebugCapsConfig& config, int count = -1);
 class DumpHelper {
    const NodePtr& node;
    const int count;
-    const Config& config;
+    const DebugCapsConfig& config;
 public:
-    explicit DumpHelper(const NodePtr& _node, const Config& _config, int _count = -1): node(_node), config(_config), count(_count) {
+    explicit DumpHelper(const NodePtr& _node, const DebugCapsConfig& _config, int _count = -1):
        node(_node), config(_config), count(_count) {
        dumpInputBlobs(node, config, count);
    }