[CPU] [DEBUG CAPS] Extension for snippets and other ngraph transformations (#14223)

2022-12-20 06:49:37 +01:00 · 2022-12-20 06:49:37 +01:00 · e306cbc67a
commit e306cbc67a
parent 40e19dec00
20 changed files with 1278 additions and 718 deletions
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@ -14,10 +14,11 @@
 #include "ie_parallel.hpp"
 #include "ie_system_conf.h"

-#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
+#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
 #include "openvino/core/type/element_type_traits.hpp"
 #include "openvino/runtime/properties.hpp"
-#include <cpu/x64/cpu_isa_traits.hpp>
+#include "utils/debug_capabilities.h"
+#include "cpu/x64/cpu_isa_traits.hpp"

 namespace ov {
 namespace intel_cpu {
@ -48,10 +49,24 @@ Config::Config() {
    if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16))
        enforceBF16 = false;

-    CPU_DEBUG_CAP_ENABLE(readDebugCapsProperties());
+    CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
+
    updateProperties();
 }

+#ifdef CPU_DEBUG_CAPS
+/**
+ * Debug capabilities configuration has more priority than common one
+ * Some of the debug capabilities also require to enable some of common
+ * configuration properties
+ */
+void Config::applyDebugCapsProperties() {
+    // always enable perf counters for verbose mode and performance summary
+    if (!debugCaps.verbose.empty() || !debugCaps.summaryPerf.empty())
+        collectPerfCounters = true;
+}
+#endif
+
 void Config::readProperties(const std::map<std::string, std::string> &prop) {
    const auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys();
    const auto hintsConfigKeys = perfHintsConfig.SupportedKeys();
@ -184,7 +199,7 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
    if (exclusiveAsyncRequests)  // Exclusive request feature disables the streams
        streamExecutorConfig._streams = 1;

-    CPU_DEBUG_CAP_ENABLE(readDebugCapsProperties());
+    CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
    updateProperties();
 }

@ -239,58 +254,6 @@ void Config::updateProperties() {
    _config.insert({PluginConfigParams::KEY_CACHE_DIR, cache_dir});
 }

-#ifdef CPU_DEBUG_CAPS
-void Config::readDebugCapsProperties() {
-    auto readEnv = [](const char* envVar) {
-        return std::getenv(envVar);
-    };
-
-    auto parseDumpFormat = [](const std::string& format) {
-        if (format == "BIN")
-            return FORMAT::BIN;
-        else if (format == "TEXT")
-            return FORMAT::TEXT;
-        else
-            IE_THROW() << "readDebugCapsProperties: Unknown dump format";
-    };
-
-    const char* envVarValue = nullptr;
-
-    if (envVarValue = readEnv("OV_CPU_EXEC_GRAPH_PATH"))
-        execGraphPath = envVarValue;
-
-    if (envVarValue = readEnv("OV_CPU_VERBOSE"))
-        verbose = envVarValue;
-
-    if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_DIR"))
-        blobDumpDir = envVarValue;
-
-    if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_FORMAT"))
-        blobDumpFormat = parseDumpFormat(envVarValue);
-
-    if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_EXEC_ID"))
-        blobDumpFilters[BY_EXEC_ID] = envVarValue;
-
-    if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_PORTS"))
-        blobDumpFilters[BY_PORTS] = envVarValue;
-
-    if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_TYPE"))
-        blobDumpFilters[BY_TYPE] = envVarValue;
-
-    if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_NAME"))
-        blobDumpFilters[BY_NAME] = envVarValue;
-
-    if (envVarValue = readEnv("OV_CPU_SUMMARY_PERF")) {
-        collectPerfCounters = true;
-        summaryPerf = envVarValue;
-    }
-
-    // always enable perf counters for verbose mode
-    if (!verbose.empty())
-        collectPerfCounters = true;
-}
-#endif // CPU_DEBUG_CAPS
-
 }   // namespace intel_cpu
 }   // namespace ov

--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@ -6,8 +6,11 @@

 #include <threading/ie_istreams_executor.hpp>
 #include <ie_performance_hints.hpp>
-#include "utils/debug_capabilities.h"
+#include <ie/ie_common.h>
+#include <openvino/util/common_util.hpp>
+#include "utils/debug_caps_config.h"

+#include <bitset>
 #include <string>
 #include <map>
 #include <mutex>
@ -57,31 +60,12 @@ struct Config {

    std::map<std::string, std::string> _config;

-#ifdef CPU_DEBUG_CAPS
-    enum FILTER {
-        BY_PORTS,
-        BY_EXEC_ID,
-        BY_TYPE,
-        BY_NAME,
-    };
-
-    enum class FORMAT {
-        BIN,
-        TEXT,
-    };
-
-    std::string execGraphPath;
-    std::string verbose;
-    std::string blobDumpDir = "cpu_dump";
-    FORMAT blobDumpFormat = FORMAT::TEXT;
-    // std::hash<int> is necessary for Ubuntu-16.04 (gcc-5.4 and defect in C++11 standart)
-    std::unordered_map<FILTER, std::string, std::hash<int>> blobDumpFilters;
-    std::string summaryPerf = "";
-
-    void readDebugCapsProperties();
-#endif
-
    bool isNewApi = true;
+
+#ifdef CPU_DEBUG_CAPS
+    DebugCapsConfig debugCaps;
+    void applyDebugCapsProperties();
+#endif
 };

 }   // namespace intel_cpu
--- a/src/plugins/intel_cpu/src/docs/README.md
+++ b/src/plugins/intel_cpu/src/docs/README.md
@ -6,6 +6,7 @@ Use the following cmake option to enable debug capabilities:
 * [Verbose mode](verbose.md)
 * [Blob dumping](blob_dumping.md)
 * [Graph serialization](graph_serialization.md)
+* [Graph transformation disabling](feature_disabling.md#graph-transformations)

 ## Debug log

--- a/src/plugins/intel_cpu/src/docs/blob_dumping.md
+++ b/src/plugins/intel_cpu/src/docs/blob_dumping.md
@ -29,8 +29,8 @@ Default is *cpu_dump*
    OV_CPU_BLOB_DUMP_FORMAT=<format> binary ...
 ```
 Options are:
-* BIN (default)
-* TEXT
+* BIN
+* TEXT (default)

 ## Filter input / output blobs
 To dump only input / output blobs:
--- a/src/plugins/intel_cpu/src/docs/debug_caps_filters.md
+++ b/src/plugins/intel_cpu/src/docs/debug_caps_filters.md
@ -0,0 +1,52 @@
+# Filters
+
+Filters described below have the following common format:
+```sh
+    filter_name=<comma_separated_tokens>
+```
+Tokens are processed from left to right and each one includes or excludes corresponding value.\
+For exclusion token is just prepended by minus: *-token*\
+All tokens are case insensitive and no tokens is treated as *all*\
+So filters below are equal:
+* filter_name
+* filter_name=all
+* filter_name=-all,ALL
+
+## IR format filter
+
+IR format filter is used to specify output IR formats, e.g. for [serialization](graph_serialization.md#graph-transformations).
+```sh
+    formats=<comma_separated_tokens>
+```
+
+The following tokens are supported:
+* all\
+equals to <xml,dot,svg>
+* xml (default)\
+IR in .xml file. Can be opened using, for example, *netron* app. (For now the option is Linux only)
+* xmlbin\
+IR in .xml and .bin files. Can be opened using, for example, *netron* app.
+* dot\
+IR in .dot file (.svg.dot file if svg is also specified). Can be inspected using, for example, *graphviz* tools.
+* svg\
+IR in .svg file. Requires *dot* tool to be installed on the host, not supported on Windows.\
+Generation is based on dot representation, so IR is additionally dumped to .svg.dot file.
+
+## Transformation filter
+
+Transformation filter is used to specify main graph transformation stages for different purposes,
+e.g. for [disabling](feature_disabling.md#graph-transformation) or [serialization](graph_serialization.md#graph-transformations).
+```sh
+    transformations=<comma_separated_tokens>
+```
+
+The following tokens are supported:
+* all (default)\
+equals to <preLpt,lpt,postLpt,snippets,specific>
+* common \
+equals to <preLpt,postLpt>
+* preLpt
+* lpt
+* postLpt
+* snippets
+* specific
--- a/src/plugins/intel_cpu/src/docs/feature_disabling.md
+++ b/src/plugins/intel_cpu/src/docs/feature_disabling.md
@ -0,0 +1,26 @@
+# Feature disabling
+
+Common way to disable something in CPU plugin is implied by means of environment variable **OV_CPU_DISABLE**:
+```sh
+    OV_CPU_DISABLE=<space_separated_options> binary ...
+```
+Option names are case insensitive and processed from left to right,\
+so last one overwrites previous ones if duplicated.
+
+Examples:
+```sh
+    OV_CPU_DISABLE="transformations" binary ...
+    OV_CPU_DISABLE="transformations=lpt" binary ...
+    OV_CPU_DISABLE="transformations=all,-common" binary ...
+```
+
+By means of corresponding options **OV_CPU_DISABLE** controls disabling of the following features:
+
+## Graph transformations
+
+Graph transformation disabling is controlled by the following option inside **OV_CPU_DISABLE**:
+```sh
+transformations=<comma_separated_tokens>
+```
+Filter with main transformation stages to disable specified ones.\
+See [transformation filter](debug_caps_filters.md#transformation-filter) for more details.
--- a/src/plugins/intel_cpu/src/docs/graph_serialization.md
+++ b/src/plugins/intel_cpu/src/docs/graph_serialization.md
@ -1,17 +1,43 @@
 # Graph serialization

-The functionality allows to serialize execution graph using environment variable:
+Graph serialization is disabled by default and controlled by environment variables.
+
+## Execution graph
+
+Execution graph could be serialized using environment variable **OV_CPU_EXEC_GRAPH_PATH**:
 ```sh
-    OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
+    OV_CPU_EXEC_GRAPH_PATH=<option> binary ...
+```
+Possible serialization options:
+* cout\
+Serialize to console output.
+* \<path\>.xml\
+Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app.
+* **TBD**: \<path\>.dot\
+Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
+
+## Graph transformations
+
+Additionally, IR could be serialized at specified stages using environment variable **OV_CPU_DUMP_IR**:
+```sh
+    OV_CPU_DUMP_IR=<space_separated_options> binary ...
 ```

-Possible serialization options:
-* cout
+Examples:
+```sh
+    OV_CPU_DUMP_IR="transformations" binary ...
+    OV_CPU_DUMP_IR="transformations=snippets dir=path/dumpDir" binary ...
+    OV_CPU_DUMP_IR="transformations=all,-common DIR=path/dumpdir formats=svg,xml" binary ...
+```

-    Serialize to console output
-* \<path\>.xml
+Option names are case insensitive, the following options are supported:
+* dir=\<path\>\
+Path to dumped IR files. If omitted, it defaults to *intel_cpu_dump*
+* formats=<comma_separated_tokens>\
+Filter with IR formats to dump. If omitted, it defaults to *xml*\
+See [IR format filter](debug_caps_filters.md#ir-format-filter) for more details.
+* transformations=<comma_separated_tokens>\
+Filter with main transformation stages to serialize graph before and after specified ones.\
+See [transformation filter](debug_caps_filters.md#transformation-filter) for more details.

-    Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
-* \<path\>.dot
-
-    TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
+Options are processed from left to right, so last one overwrites previous ones if duplicated.
--- a/src/plugins/intel_cpu/src/graph.cpp
+++ b/src/plugins/intel_cpu/src/graph.cpp
@ -1073,7 +1073,7 @@ void Graph::InferStatic(InferRequestBase* request) {
    dnnl::stream stream(eng);

    for (const auto& node : executableGraphNodes) {
-        VERBOSE(node, config.verbose);
+        VERBOSE(node, config.debugCaps.verbose);
        PERF(node, config.collectPerfCounters);

        if (request)
@ -1160,7 +1160,7 @@ void Graph::InferDynamic(InferRequestBase* request) {
        updateNodes(stopIndx);
        for (; inferCounter < stopIndx; ++inferCounter) {
            auto& node = executableGraphNodes[inferCounter];
-            VERBOSE(node, config.verbose);
+            VERBOSE(node, config.debugCaps.verbose);
            PERF(node, config.collectPerfCounters);

            if (request)
@ -1171,7 +1171,7 @@ void Graph::InferDynamic(InferRequestBase* request) {
 }

 inline void Graph::ExecuteNode(const NodePtr& node, const dnnl::stream& stream) const {
-    DUMP(node, config, infer_count);
+    DUMP(node, config.debugCaps, infer_count);
    OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, node->profiling.execute);

    if (node->isDynamicNode()) {
--- a/src/plugins/intel_cpu/src/graph_dumper.cpp
+++ b/src/plugins/intel_cpu/src/graph_dumper.cpp
@ -210,7 +210,7 @@ std::shared_ptr<ngraph::Function> dump_graph_as_ie_ngraph_net(const Graph &graph

 #ifdef CPU_DEBUG_CAPS
 void serialize(const Graph &graph) {
-    const std::string& path = graph.getConfig().execGraphPath;
+    const std::string& path = graph.getConfig().debugCaps.execGraphPath;

    if (path.empty())
        return;
@ -257,7 +257,7 @@ void serializeToCout(const Graph &graph) {
 }

 void summary_perf(const Graph &graph) {
-    const std::string& summaryPerf = graph.getConfig().summaryPerf;
+    const std::string& summaryPerf = graph.getConfig().debugCaps.summaryPerf;

    if (summaryPerf.empty())
        return;
--- a/src/plugins/intel_cpu/src/ngraph_transformations/convert_to_cpu_specific_opset.hpp
+++ b/src/plugins/intel_cpu/src/ngraph_transformations/convert_to_cpu_specific_opset.hpp
@ -27,6 +27,7 @@ namespace intel_cpu {

 inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphFunc) {
    RUN_ON_FUNCTION_SCOPE(ConvertToCPUSpecificOpset);
+
    ngraph::pass::Manager manager;
    manager.register_pass<ConvertMatMulToFC>();
    manager.register_pass<AlignMatMulInputRanks>();
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@ -2,140 +2,28 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "ie_metric_helpers.hpp"
-#include "plugin.h"
-#include "extension_mngr.h"
-#include "weights_cache.hpp"
-#include "extension.h"
-#include "itt.h"
-#include "serialize.h"
+#include "ie_metric_helpers.hpp" // must be included first
+
+#include "plugin.h"
+
+#include "transformation_pipeline.h"
+#include "itt.h"
+#include "extension_mngr.h"
+#include "extension.h"
+#include "serialize.h"
+#include "threading/ie_executor_manager.hpp"
+
+#include "ie_icore.hpp"
+#include "ie_plugin_config.hpp"
+#include "ie_system_conf.h"
+#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"

-#include <threading/ie_executor_manager.hpp>
-#include <memory>
-#include <ie_plugin_config.hpp>
-#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
-#include <ie_icore.hpp>
-#include <fstream>
-#include <vector>
-#include <tuple>
-#include <unordered_set>
-#include <ie_system_conf.h>
 #include <ie_ngraph_utils.hpp>

-
-#include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
-#include <transformations/common_optimizations/common_optimizations.hpp>
-#include <transformations/common_optimizations/fq_mul_fusion.hpp>
-#include <transformations/common_optimizations/mul_fake_quantize_fusion.hpp>
-#include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
-#include <transformations/common_optimizations/convert_quantize_dequantize.hpp>
-#include <transformations/common_optimizations/nop_elimination.hpp>
-#include <transformations/common_optimizations/wrap_interpolate_into_transposes.hpp>
-#include <transformations/common_optimizations/transpose_sinking.hpp>
-#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
-#include <transformations/common_optimizations/lin_op_sequence_fusion.hpp>
-
-#include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
-#include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
-#include <transformations/op_conversions/convert_broadcast_to_tiles.hpp>
-#include <transformations/op_conversions/convert_depth_to_space.hpp>
-#include <transformations/op_conversions/convert_shuffle_channels3.hpp>
-#include <transformations/op_conversions/convert_slice_to_strided_slice.hpp>
-#include <transformations/op_conversions/convert_space_to_depth.hpp>
-#include <transformations/op_conversions/convert_gelu.hpp>
-#include <transformations/op_conversions/convert_gather_downgrade.hpp>
-#include <transformations/op_conversions/convert_gather_upgrade.hpp>
-#include <transformations/op_conversions/detection_output_downgrade.hpp>
-#include <transformations/op_conversions/detection_output_upgrade.hpp>
-#include <transformations/op_conversions/gelu7_downgrade.hpp>
-#include <transformations/op_conversions/hswish_decomposition.hpp>
-#include <transformations/op_conversions/hsigmoid_decomposition.hpp>
-#include <transformations/op_conversions/mvn6_decomposition.hpp>
-#include <transformations/op_conversions/normalize_l2_decomposition.hpp>
-#include <transformations/op_conversions/reduce_l1_decomposition.hpp>
-#include <transformations/op_conversions/reduce_l2_decomposition.hpp>
-#include <transformations/op_conversions/softplus_decomposition.hpp>
-#include <transformations/op_conversions/convert_space_to_batch.hpp>
-#include <transformations/op_conversions/convert_batch_to_space.hpp>
-#include <transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp>
-#include <transformations/op_conversions/convert_subtract.hpp>
-#include <transformations/op_conversions/softmax_decomposition.hpp>
-#include <transformations/control_flow/unroll_tensor_iterator.hpp>
-#include <transformations/op_conversions/convert_mod.hpp>
-#include <transformations/op_conversions/convert_ti_to_sequences.hpp>
-#include <transformations/op_conversions/lstm_cell_decomposition.hpp>
-#include <transformations/op_conversions/rnn_cell_decomposition.hpp>
-#include <transformations/op_conversions/gru_cell_decomposition.hpp>
-#include <transformations/op_conversions/log_softmax_decomposition.hpp>
-#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
-#include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
-#include <transformations/op_conversions/convert_previous_nms_to_nms_9.hpp>
-#include <transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp>
-#include <transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp>
-#include <transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp>
-#include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
-#include <transformations/smart_reshape/matmul_sr.hpp>
-#include <transformations/op_conversions/convert_minimum_to_power_and_max.hpp>
-#include <transformations/op_conversions/convert_reduce_to_pooling.hpp>
-#include <transformations/convert_precision.hpp>
-#include <transformations/init_node_info.hpp>
-#include <transformations/disable_decompression_convert_constant_folding.hpp>
-#include <transformations/rt_info/fused_names_attribute.hpp>
-#include <transformations/op_conversions/fq_decomposition.hpp>
-#include <transformations/utils/utils.hpp>
-#include <transformations/op_conversions/convert_roi_align_v9_to_v3.hpp>
-#include <transformations/op_conversions/convert_roi_align_v3_to_v9.hpp>
-#include <transformations/op_conversions/softsign_decomposition.hpp>
-#include "transformations/op_conversions/eye_decomposition.hpp"
-#include "transformations/op_conversions/unique_decomposition.hpp"
-
-#include "ngraph_transformations/convert_to_cpu_specific_opset.hpp"
-#include "ngraph_transformations/snippets_mark_skipped.hpp"
-#include "ngraph_transformations/mha_fusion.hpp"
-#include "ngraph_transformations/convert_to_interaction.hpp"
-#include "ngraph_transformations/convert_fq_rnn_to_quantized_rnn.hpp"
-#include "ngraph_transformations/move_eltwise_up_data_movement.hpp"
-#include "ngraph_transformations/swap_convert_transpose.hpp"
-
-#include <snippets/pass/collapse_subgraph.hpp>
-#include <snippets/pass/common_optimizations.hpp>
-#include <snippets/pass/convert_constants.hpp>
-
-#include <ngraph/opsets/opset1.hpp>
-#include <ngraph/opsets/opset2.hpp>
-#include <ngraph/opsets/opset3.hpp>
-#include <ngraph/opsets/opset4.hpp>
-#include <ngraph/opsets/opset5.hpp>
-#include <ngraph/opsets/opset6.hpp>
-#include <openvino/opsets/opset10.hpp>
-#include <ngraph/op/util/op_types.hpp>
-#include <ngraph/pass/manager.hpp>
-#include <ngraph/graph_util.hpp>
-#include <ov_ops/augru_cell.hpp>
-#include <ov_ops/augru_sequence.hpp>
-
-#include <transformations/low_precision/mark_dequantization_subgraph.hpp>
-#include <low_precision/common/quantization_granularity_restriction.hpp>
-#include <low_precision/common/precisions_restriction.hpp>
-#include <low_precision/convert_subtract_constant.hpp>
-#include <low_precision/convolution.hpp>
-#include <low_precision/convolution_backprop_data.hpp>
-#include <low_precision/layer_transformation.hpp>
-#include <low_precision/low_precision.hpp>
-#include <low_precision/multiply_to_group_convolution.hpp>
-#include <low_precision/network_helper.hpp>
-#include "openvino/runtime/core.hpp"
-#include "openvino/util/common_util.hpp"
-
-#include <ie_algorithm.hpp>
 #include "performance_heuristics.hpp"

-#include "nodes/mvn.h"
-#include "nodes/fake_quantize.h"
-#include "nodes/normalize.h"
-#include "nodes/mha.h"
+#include "weights_cache.hpp"
 #include "utils/denormals.hpp"
-#include "transformations/common_optimizations/augru_cell_fusion.hpp"

 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
 #ifndef __GNUC_PREREQ
@ -262,452 +150,6 @@ Engine::~Engine() {
    executorManager()->clear("CPUCallbackExecutor");
 }

-static bool fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, ov::element::Type to, size_t idx) {
-    if (auto convert = ov::as_type_ptr<ov::opset10::Convert>(node)) {
-        // For Convert node, converting precision from floating point to boolean will lead to mathematical
-        // error, because here the output precision boolean is replaced by u8. E.g. floating point value 0.01
-        // is converted to be 1 for boolean, but 0 for u8. Thus an Abs and Ceil node should be added before the
-        // Convert node for this scenario.
-        if (convert->input(0).get_element_type().is_real() &&
-            convert->get_convert_element_type() == ngraph::element::boolean && to.is_integral_number()) {
-            auto abs = std::make_shared<ov::opset10::Abs>(convert->input_value(0).get_node_shared_ptr());
-            auto ceil = std::make_shared<ov::opset10::Ceiling>(abs);
-            auto new_convert = std::make_shared<ov::opset10::Convert>(ceil, to);
-            new_convert->set_friendly_name(convert->get_friendly_name());
-            ov::copy_runtime_info(convert, {abs, ceil, new_convert});
-            ov::replace_node(convert, new_convert);
-            return true;
-        } else {
-            convert->set_convert_element_type(to);
-            return true;
-        }
-    }
-    return false;
-}
-
-static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function> nGraphFunc, const bool _enableLPT, const bool _enableBF16,
-                                               const bool _enableSnippets, const bool isLegacyApi) {
-    ov::pass::Manager manager;
-    manager.set_per_pass_validation(false);
-    manager.register_pass<ov::pass::InitNodeInfo>();
-
-    const bool useLpt =
-            _enableLPT &&
-        ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(nGraphFunc);
-    auto defaultPrecisions = useLpt ? ngraph::pass::low_precision::precision_set::int8_support : std::vector<ov::element::Type>{};
-    bool hasINT16orINT32Levels = false;
-    if (useLpt) {
-        CPU_LPT_SCOPE(LowPrecisionTransformations_Part1);
-        hasINT16orINT32Levels = ngraph::pass::low_precision::LowPrecision::isFQLevelsPresent(
-                nGraphFunc,
-                {ngraph::pass::low_precision::levels::int16, ngraph::pass::low_precision::levels::int16_narrow_range,
-                 ngraph::pass::low_precision::levels::int32, ngraph::pass::low_precision::levels::int32_narrow_range});
-        if (hasINT16orINT32Levels) {
-            defaultPrecisions = ngraph::pass::low_precision::precision_set::int8_int16_int32_support;
-        }
-        manager.register_pass<ov::pass::MarkDequantizationSubgraph>(defaultPrecisions);
-    }
-    auto get_convert_precisions = []() {
-        precisions_array array = {
-            {ngraph::element::i64,     ngraph::element::i32},
-            {ngraph::element::u64,     ngraph::element::i32},
-            {ngraph::element::i16,     ngraph::element::i32},
-            {ngraph::element::u16,     ngraph::element::i32},
-            {ngraph::element::u32,     ngraph::element::i32},
-            {ngraph::element::f64,     ngraph::element::f32},
-            {ngraph::element::f16,     ngraph::element::f32},
-            {ngraph::element::boolean, ngraph::element::u8},
-            {ngraph::element::i4,      ngraph::element::i8},
-            {ngraph::element::u4,      ngraph::element::u8}
-        };
-
-        if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
-            array.push_back({ngraph::element::bf16, ngraph::element::f32});
-
-        return array;
-    };
-
-    static const auto precisions = get_convert_precisions();
-    type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};
-
-    manager.register_pass<ov::pass::AUGRUCellFusion>();
-    manager.register_pass<ov::pass::CommonOptimizations>();
-    manager.register_pass<ov::pass::WrapInterpolateIntoTransposes>();
-    manager.register_pass<ov::pass::TransposeSinking>();
-    manager.register_pass<ov::pass::ConvertSequenceToTensorIterator>();
-    manager.register_pass<ov::pass::ConvertOpSet3ToOpSet2>();
-    manager.register_pass<ov::pass::ConvertOpSet2ToOpSet1>();
-    manager.register_pass<ov::pass::LSTMCellDecomposition>();
-    manager.register_pass<ov::pass::GRUCellDecomposition>();
-    manager.register_pass<ov::pass::RNNCellDecomposition>();
-    manager.register_pass<ov::pass::ConvertNMS1ToNMS9>();
-    manager.register_pass<ov::pass::ConvertNMS3ToNMS9>();
-    manager.register_pass<ov::pass::ConvertNMS4ToNMS9>();
-    manager.register_pass<ov::pass::ConvertNMS5ToNMS9>();
-    manager.register_pass<ov::pass::ConvertNMS9ToNMSIEInternal>();
-    manager.register_pass<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
-    manager.register_pass<ov::pass::ConvertMatrixNmsToMatrixNmsIE>();
-    manager.register_pass<ov::pass::TransposeMatMul>();
-    manager.register_pass<ov::pass::ConstantFolding>();
-
-    if (useLpt) {
-        CPU_LPT_SCOPE(LowPrecisionTransformations_Part2);
-        manager.register_pass<ngraph::pass::low_precision::ConvertSubtractConstant>(defaultPrecisions);
-    }
-    manager.register_pass<ov::pass::Validate>();
-    manager.register_pass<ov::pass::ConvertPrecision>(precisions, type_to_fuse);
-    manager.register_pass<ov::pass::EliminateConvert>();
-    manager.register_pass<SwapConvertTranspose>();
-    manager.register_pass<ConvertToInteraction>();
-    manager.register_pass<ConvertInteractionInt8>();
-
-    auto pass_config = manager.get_pass_config();
-
-    using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
-
-    // SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
-    pass_config->set_callback<ov::pass::ConvertSpaceToDepth,
-            ov::pass::ConvertDepthToSpace>(
-            [](const_node_ptr &node) -> bool {
-                return node->input_value(0).get_shape().size() <= 5lu &&
-                       node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
-            });
-
-    pass_config->set_callback<ov::pass::ConvertBatchToSpace,
-                              ov::pass::ConvertSpaceToBatch>(
-            [](const_node_ptr &node) -> bool {
-                const auto & rank = node->input(0).get_partial_shape().rank().get_length();
-                return rank == 4lu || rank == 5lu;
-            });
-
-    auto isCellPrimitiveSupported = [](const_node_ptr &node) -> bool {
-        if (const auto &rnn_cell = std::dynamic_pointer_cast<const ngraph::opset4::RNNCell>(node)) {
-            return rnn_cell->get_clip() == 0.0f;
-        } else if (const auto &gru_cell = std::dynamic_pointer_cast<const ngraph::opset4::GRUCell>(
-                node)) {
-            return gru_cell->get_clip() == 0.0f
-                   && gru_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh"};
-        } else if (const auto &augru_cell = std::dynamic_pointer_cast<const ov::op::internal::AUGRUCell>(
-                node)) {
-            return augru_cell->get_clip() == 0.0f
-                   && augru_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh"};
-        } else if (const auto &lstm_cell = std::dynamic_pointer_cast<const ngraph::opset4::LSTMCell>(
-                node)) {
-            return lstm_cell->get_clip() == 0.0f &&
-                   lstm_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
-        } else if (const auto &lstm_cell_v1 = std::dynamic_pointer_cast<const ngraph::opset1::LSTMCell>(
-                node)) {
-            return lstm_cell_v1->get_clip() == 0.0f &&
-                   lstm_cell_v1->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
-        }
-        return false;
-    };
-
-    // Sequences supported by the plugin shouldn't be converted to TensorIterator.
-    // sequence_length input is not supported in all Sequences, so if is_seq_len_provided() == true, we
-    // should always convert to TensorIterator.
-    // RNN/GRU/LSTM Sequences are supported with clip == 0, and with default activations.
-    auto isSequencePrimitiveSupported = [](const_node_ptr &node) -> bool {
-        const auto& data = node->input(0);
-        const auto& data_pshape = data.get_partial_shape();
-        // WA: dynamic shapes make impossible to check seq_len due to shapeOf subgraphs
-        // but the sequence is still supported in CPU and doesn't need to be decomposed
-        if (data_pshape.is_dynamic())
-            return true;
-        if (data_pshape.rank().is_static() && data_pshape.rank().get_length() > 1 && !data_pshape[1].is_static())
-            return false;
-        auto max_seq_len = data.get_shape().at(1);
-        if (const auto &rnn_seq = std::dynamic_pointer_cast<const ngraph::opset6::RNNSequence>(node)) {
-            return rnn_seq->get_clip() == 0.0f &&
-                   !ngraph::op::util::is_seq_len_provided(rnn_seq->get_input_node_shared_ptr(2),
-                                                          max_seq_len);
-        } else if (const auto &gru_seq = std::dynamic_pointer_cast<const ngraph::opset6::GRUSequence>(
-                node)) {
-            return gru_seq->get_clip() == 0.0f &&
-                   gru_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh"} &&
-                   !ngraph::op::util::is_seq_len_provided(gru_seq->get_input_node_shared_ptr(2),
-                                                          max_seq_len);
-        } else if (const auto &augru_seq = std::dynamic_pointer_cast<const ov::op::internal::AUGRUSequence>(
-                node)) {
-            return augru_seq->get_clip() == 0.0f &&
-                   augru_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh"} &&
-                   !ngraph::op::util::is_seq_len_provided(augru_seq->get_input_node_shared_ptr(2),
-                                                          max_seq_len);
-        } else if (const auto &lstm_seq = std::dynamic_pointer_cast<const ngraph::opset6::LSTMSequence>(
-                node)) {
-            return lstm_seq->get_clip() == 0.0f &&
-                   lstm_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"} &&
-                   !ngraph::op::util::is_seq_len_provided(lstm_seq->get_input_node_shared_ptr(3),
-                                                          max_seq_len);
-        }
-        return false;
-    };
-
-    pass_config->set_callback<ov::pass::ConvertRNNSequenceToTensorIterator,
-                              ov::pass::ConvertGRUSequenceToTensorIterator,
-                              ov::pass::ConvertLSTMSequenceToTensorIterator>(
-            [isSequencePrimitiveSupported](const_node_ptr &node) -> bool {
-                return isSequencePrimitiveSupported(node);
-            });
-
-    pass_config->set_callback<ov::pass::RNNCellDecomposition, ov::pass::GRUCellDecomposition,
-            ov::pass::LSTMCellDecomposition>(
-            [isCellPrimitiveSupported](const_node_ptr &node) -> bool {
-                return isCellPrimitiveSupported(node);
-            });
-
-    pass_config->set_callback<ov::pass::MVN6Decomposition>(
-            [](const_node_ptr &node) -> bool {
-                std::string errorMessage;
-                return node::MVN::isSupportedOperation(node, errorMessage);
-            });
-
-    pass_config->set_callback<ov::pass::NormalizeL2Decomposition>(
-            [](const_node_ptr &node) -> bool {
-                std::string errorMsg;
-                return node::NormalizeL2::isSupportedOperation(node, errorMsg);
-            });
-
-    pass_config->enable<ov::pass::SoftmaxDecomposition>();
-    pass_config->set_callback<ov::pass::SoftmaxDecomposition>(
-            [](const_node_ptr &node) -> bool {
-                return node->input_value(0).get_partial_shape().rank().get_length() <= 5;
-            });
-
-    if (!isLegacyApi) {
-        auto nmsCallback = [](const_node_ptr &node) -> bool {
-                               for (size_t i = 0; i < node->get_output_size(); i++) {
-                                   const auto outputs = node->get_output_target_inputs(i);
-                                   for (const auto &out : outputs) {
-                                       if (!ngraph::op::is_output(out.get_node())) {
-                                           return false;
-                                       }
-                                   }
-                               }
-                               return true;
-                           };
-
-        pass_config->set_callback<ov::pass::ConvertNMS9ToNMSIEInternal>(nmsCallback);
-        pass_config->set_callback<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>(nmsCallback);
-        pass_config->set_callback<ov::pass::ConvertMatrixNmsToMatrixNmsIE>(nmsCallback);
-    }
-
-    // List of enabled/disabled transformations
-
-    // Allow FP16 Converts to be folded and FP16 constants to be upgraded to FP32 data type
-    pass_config->disable<ov::pass::DisableDecompressionConvertConstantFolding>();
-    pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
-    pass_config->disable<ov::pass::EyeDecomposition>();
-
-    pass_config->disable<ov::pass::ConvertGELU>();
-    pass_config->disable<ov::pass::ConvertShuffleChannels3>();
-    pass_config->disable<ov::pass::Gelu7Downgrade>();
-    pass_config->disable<ov::pass::HSwishDecomposition>();
-    pass_config->disable<ov::pass::ReduceL1Decomposition>();
-    pass_config->disable<ov::pass::ReduceL2Decomposition>();
-    pass_config->disable<ov::pass::SoftPlusDecomposition>();
-    pass_config->disable<ov::pass::HSigmoidDecomposition>();
-    pass_config->disable<ov::pass::ConvertMod>();
-    pass_config->disable<ov::pass::ConvertShuffleChannels3>();
-    pass_config->disable<ov::pass::WeightsDequantizeToFakeQuantize>();
-    pass_config->disable<ov::pass::SimplifyCTCGreedyDecoderSeqLen>();
-    pass_config->disable<ov::pass::ConvertGather7ToGather1>();
-    pass_config->disable<ov::pass::ConvertGather8ToGather7>();
-    pass_config->disable<ov::pass::ConvertMinimum>();
-    pass_config->disable<ov::pass::ConvertBroadcastToTiles>();
-    pass_config->disable<ov::pass::ConvertReduceMeanToPooling>();
-    pass_config->disable<ov::pass::ConvertReduceMaxToPooling>();
-    pass_config->disable<ov::pass::ConvertReduceSumToPooling>();
-    pass_config->disable<ov::pass::SliceToStridedSlice>();
-    pass_config->disable<ov::pass::ConvertDetectionOutput8ToDetectionOutput1>();
-    pass_config->disable<ov::pass::ConvertROIAlign9To3>();
-    pass_config->disable<ov::pass::SoftSignDecomposition>();
-    pass_config->disable<ov::pass::UniqueDecomposition>();
-
-    pass_config->enable<ov::pass::NormalizeL2Decomposition>();
-    pass_config->enable<ov::pass::ConvertInterpolate1ToInterpolate4>();
-    pass_config->enable<ov::pass::ConvertGather1ToGather7>();
-    pass_config->enable<ov::pass::ConvertDetectionOutput1ToDetectionOutput8>();
-    pass_config->enable<ov::pass::ConvertROIAlign3To9>();
-
-    if (useLpt) {
-        CPU_LPT_SCOPE(LowPrecisionTransformations_Part3);
-        pass_config->set_callback<ov::pass::AddFakeQuantizeFusion,
-                                  ov::pass::MulFakeQuantizeFusion,
-                                  ov::pass::FakeQuantizeMulFusion>([](const_node_ptr &node) -> bool {
-            std::string errMsg;
-            return !node::FakeQuantize::isSupportedOperation(node, errMsg);
-        });
-
-        pass_config->set_callback<ov::pass::ConvertQuantizeDequantize>([&defaultPrecisions](const_node_ptr &node) -> bool {
-            return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(node, defaultPrecisions);
-        });
-    }
-
-    manager.run_passes(nGraphFunc);
-
-    using namespace ngraph::pass::low_precision;
-    if (useLpt) {
-        CPU_LPT_SCOPE(LowPrecisionTransformations_Part4);
-        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "LowPrecisionTransformations");
-        //Only enable conv/group conv signed input on AMX platform.
-        std::vector<ngraph::element::Type> input0LowPrecisionList;
-        if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
-            input0LowPrecisionList = {ngraph::element::u8, ngraph::element::i8};
-        } else {
-            input0LowPrecisionList = {ngraph::element::u8};
-        }
-        auto supportedPrecisions = std::vector<PrecisionsRestriction>({
-            PrecisionsRestriction::create<ngraph::opset1::Convolution>({
-                {{0}, input0LowPrecisionList},
-                {{1}, {ngraph::element::i8}},
-            }),
-            PrecisionsRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
-                {{0}, {ngraph::element::u8, ngraph::element::i8}},
-                {{1}, {ngraph::element::i8}}
-            }),
-            PrecisionsRestriction::create<ngraph::opset1::GroupConvolution>({
-                {{0}, input0LowPrecisionList},
-                {{1}, {ngraph::element::i8}}
-            }),
-            PrecisionsRestriction::create<ngraph::opset1::Multiply>({
-                {{0}, {ngraph::element::u8}},
-                {{1}, {ngraph::element::i8}},
-            }),
-            PrecisionsRestriction::create<ngraph::opset1::MatMul>({
-                {{0}, {ngraph::element::u8, ngraph::element::i8}},
-                {{1}, {ngraph::element::i8}}
-            }),
-            PrecisionsRestriction::create<ngraph::opset5::LSTMSequence>({
-                {{0, 1}, {ngraph::element::u8, ngraph::element::i8}},
-            }),
-            PrecisionsRestriction::create<ngraph::opset6::GRUSequence>({
-                {{0, 1}, {ngraph::element::u8, ngraph::element::i8}},
-            }),
-        });
-
-        auto quantizationRestrictions = std::vector<QuantizationGranularityRestriction>({
-            QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0}),
-            QuantizationGranularityRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0})
-        });
-
-        // for GNA networks reference execution
-        bool updatePrecision = true;
-        if (hasINT16orINT32Levels) {
-            updatePrecision = false;
-            supportedPrecisions = std::vector<PrecisionsRestriction>({});
-        }
-
-        ov::pass::Manager lptManager;
-        lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(
-            supportedPrecisions,
-            quantizationRestrictions,
-            LayerTransformation::Params(updatePrecision, ngraph::element::f32, defaultPrecisions));
-        lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::MarkupPrecisions>([](const_node_ptr& node) -> bool {
-            if (const auto mulitply = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
-                return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply);
-            }
-            return false;
-        });
-        lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::ConvolutionBackpropDataTransformation>(
-            [&defaultPrecisions](const_node_ptr& node) -> bool {
-            return LayerTransformation::isAsymmetricQuantization(node, defaultPrecisions) ||
-                WeightableLayerTransformation::isAsymmetricOnWeights(node, defaultPrecisions);
-        });
-        lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::MultiplyToGroupConvolutionTransformation>([](const_node_ptr& node) -> bool {
-            return true;//MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(node);
-        });
-        lptManager.run_passes(nGraphFunc);
-    }
-
-    ov::pass::Manager postLPTPassManager;
-    postLPTPassManager.register_pass<ov::pass::UnrollTensorIterator>();
-    postLPTPassManager.register_pass<ov::pass::ReshapePRelu>();
-    postLPTPassManager.get_pass_config()->set_callback<ov::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
-        // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
-        return node->get_rt_info().count("UNROLL_TI") == 0;
-    });
-    postLPTPassManager.register_pass<MoveEltwiseUpThroughDataMov>();
-    postLPTPassManager.get_pass_config()->set_callback<MoveEltwiseUpThroughDataMov>([](const std::shared_ptr<const ngraph::Node>& node) -> bool {
-        if (node->get_input_size() >= 2) {
-            return node->get_input_element_type(1) == ngraph::element::i8 || node->get_input_element_type(1) == ngraph::element::u8;
-        }
-        return false;
-    });
-
-    postLPTPassManager.register_pass<ov::pass::ConstantFolding>();
-
-    // Snippets may brake MHA patterns so the fusion has to performed before
-    postLPTPassManager.register_pass<MHAFusion>();
-    postLPTPassManager.register_pass<FuseFQtoInteraction>();
-    postLPTPassManager.get_pass_config()->set_callback<MHAFloatFusion, MHAFloatFusion2,
-                                                       MHAQuantFusion, MHAQuantFusion2>([_enableBF16](const std::shared_ptr<const ov::Node>& n) -> bool {
-        std::string errorMessage;
-
-        if (!node::MHA::isSupportedOperation(n, errorMessage))
-            return true;
-
-        // Implementation calls AMX BF16 brgemm only for tensors with K and N aligned on 2, otherwise fallbacks on vector impl
-        // Vector madd BF16 instruction on SPR has reduced performance on HW level, which results in overall perf degradation
-        size_t bf16Factor = 2;
-        if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16_amx_bf16) &&
-                (n->get_input_element_type(0) == element::bf16 || (n->get_input_element_type(0) == element::f32 && _enableBF16)) &&
-                (n->get_input_shape(0)[3] % bf16Factor != 0 || n->get_input_shape(1)[1] % bf16Factor != 0 || n->get_input_shape(3)[3] % bf16Factor != 0)) {
-            return true;
-        }
-
-        return false;
-    });
-
-    // Execute before snippets. Otherwise FQ will be converted to Subgraph
-    postLPTPassManager.register_pass<ConvertFqRnnToQuantizedRnn>();
-    postLPTPassManager.run_passes(nGraphFunc);
-
-    if (_enableSnippets && dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) {
-        ov::pass::Manager snippetsManager;
-        snippetsManager.register_pass<SnippetsMarkSkipped>();
-        snippetsManager.register_pass<ngraph::snippets::pass::EnumerateNodes>();
-        snippetsManager.register_pass<ngraph::snippets::pass::TokenizeSnippets>();
-        snippetsManager.get_pass_config()->set_callback<ngraph::snippets::pass::TokenizeSnippets>(
-                [](const std::shared_ptr<const ov::Node>& n) -> bool {
-                    // CPU Plugin support Swish in Subgraph via conversion to SwichCPU which assumes second input to be constant
-                    if (ov::is_type<const ov::op::v4::Swish>(n)) {
-                        if (n->inputs().size() > 1 && !ov::is_type<const ov::op::v0::Constant>(n->get_input_node_shared_ptr(1)))
-                            return true;
-                    }
-
-                    const auto& inputs = n->inputs();
-                    // todo: clarify whether we can evaluate snippets on const paths
-                    const bool has_only_const_inputs = std::all_of(inputs.begin(), inputs.end(),
-                                [](const ov::Input<const ov::Node> &in) {
-                                        return ov::is_type<ov::op::v0::Constant>(in.get_source_output().get_node_shared_ptr());
-                                      });
-                    // todo: clarify whether we can evaluate snippets on inputs with larger ranks
-                    auto rank_is_too_large = [](const ov::descriptor::Tensor& t ) {
-                        // callback is called has_supported_in_out(), so it's safe to assume that the shapes are static
-                        return t.get_partial_shape().rank().get_length() > 6;
-                    };
-                    const bool bad_input_rank = std::any_of(inputs.begin(), inputs.end(),
-                                                            [&](const ov::Input<const ov::Node>& in) {return  rank_is_too_large(in.get_tensor());});
-                    const auto& outputs = n->outputs();
-                    const bool bad_output_rank = std::any_of(outputs.begin(), outputs.end(),
-                                                             [&](const ov::Output<const ov::Node>& out) {return  rank_is_too_large(out.get_tensor());});
-                    return has_only_const_inputs || bad_input_rank || bad_output_rank;
-                });
-        snippetsManager.register_pass<ngraph::snippets::pass::CommonOptimizations>();
-        snippetsManager.run_passes(nGraphFunc);
-    }
-
-    ov::pass::Manager postSnippetsManager;
-    postSnippetsManager.register_pass<ov::pass::FakeQuantizeDecomposition>();
-    postSnippetsManager.get_pass_config()->set_callback<ov::pass::FakeQuantizeDecomposition>([](const_node_ptr& node) -> bool {
-            std::string errMsg;
-            return node::FakeQuantize::isSupportedOperation(node, errMsg);
-        });
-    postSnippetsManager.register_pass<ov::pass::ConstantFolding>();
-    postSnippetsManager.run_passes(nGraphFunc);
-}
-
 static bool streamsSet(const std::map<std::string, std::string>& config) {
    return config.count(PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) ||
           config.count(ov::num_streams.name());
@ -883,7 +325,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
    const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
            || Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled for the plugin */;
    const auto& BF16Prop = config.find(InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16);
-    bool enableBF16;
+    bool enableBF16 = false;
    if (BF16Prop != config.end()) {
        if (BF16Prop->second == PluginConfigParams::YES) {
            enableBF16 = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core);
@ -901,7 +343,8 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std

    DEBUG_LOG(PrintableModel(*nGraphFunc, "org_"));

-    TransformationUpToCPUSpecificOpSet(nGraphFunc, enableLPT, enableBF16, enableSnippets, isLegacyAPI());
+    Transformations transformations(nGraphFunc, enableLPT, enableSnippets, enableBF16, isLegacyAPI(), engConfig);
+    transformations.UpToCpuSpecificOpSet();

    // need to check that all outputs have static shapes
    // checking that all inputs have static shapes is performed in the common part
@ -914,8 +357,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
    }

    ApplyPerformanceHints(config, nGraphFunc);
-
-    ConvertToCPUSpecificOpset(nGraphFunc);
+    transformations.CpuSpecificOpSet();

    DEBUG_LOG(PrintableModel(*nGraphFunc, "cpu_"));

@ -1153,19 +595,20 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
    }

    auto supported = GetSupportedNodes(model,
-    [&](std::shared_ptr<ov::Model>& model) {
-            TransformationUpToCPUSpecificOpSet(model, enableLPT, conf.enforceBF16, enableSnippets, isLegacyAPI());
-            ConvertToCPUSpecificOpset(model);
-        },
-    [&](const std::shared_ptr<ngraph::Node>& op) {
-        std::unique_ptr<Node> ptr;
-        try {
-            ptr.reset(Node::factory().create(op, {dnnl::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
-        } catch (const InferenceEngine::Exception&) {
-            return false;
-        }
-        return true;
-    });
+                                       [&](std::shared_ptr<ov::Model>& model) {
+                                           Transformations transformation(model, enableLPT, enableSnippets, conf.enforceBF16, isLegacyAPI(), engConfig);
+                                           transformation.UpToCpuSpecificOpSet();
+                                           transformation.CpuSpecificOpSet();
+                                       },
+                                       [&](const std::shared_ptr<ngraph::Node>& op) {
+                                           std::unique_ptr<Node> ptr;
+                                           try {
+                                               ptr.reset(Node::factory().create(op, {dnnl::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
+                                           } catch (const InferenceEngine::Exception&) {
+                                               return false;
+                                           }
+                                           return true;
+                                       });

    for (auto&& layerName : supported) {
        res.supportedLayersMap.emplace(layerName, GetName());
--- a/src/plugins/intel_cpu/src/plugin.h
+++ b/src/plugins/intel_cpu/src/plugin.h
@ -4,16 +4,12 @@

 #pragma once

-#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
 #include "exec_network.h"

 #include <string>
 #include <map>
-#include <unordered_map>
 #include <memory>
 #include <functional>
-#include <vector>
-#include <cfloat>

 namespace ov {
 namespace intel_cpu {
--- a/src/plugins/intel_cpu/src/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformation_pipeline.cpp
@ -0,0 +1,609 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformation_pipeline.h"
+
+// Operations
+#include "openvino/opsets/opset1.hpp"
+#include "openvino/opsets/opset2.hpp"
+#include "openvino/opsets/opset3.hpp"
+#include "openvino/opsets/opset4.hpp"
+#include "openvino/opsets/opset5.hpp"
+#include "openvino/opsets/opset6.hpp"
+#include "openvino/opsets/opset10.hpp"
+#include <ov_ops/augru_cell.hpp>
+#include <ov_ops/augru_sequence.hpp>
+
+// Common transformations
+#include "transformations/common_optimizations/add_fake_quantize_fusion.hpp"
+#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
+#include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
+#include "transformations/common_optimizations/fq_mul_fusion.hpp"
+#include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp"
+#include "transformations/common_optimizations/nop_elimination.hpp"
+#include "transformations/common_optimizations/transpose_sinking.hpp"
+#include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp"
+#include "transformations/common_optimizations/augru_cell_fusion.hpp"
+#include "transformations/common_optimizations/common_optimizations.hpp"
+#include "transformations/common_optimizations/wrap_interpolate_into_transposes.hpp"
+#include "transformations/control_flow/unroll_tensor_iterator.hpp"
+#include "transformations/disable_decompression_convert_constant_folding.hpp"
+#include "transformations/op_conversions/convert_batch_to_space.hpp"
+#include "transformations/op_conversions/convert_broadcast_to_tiles.hpp"
+#include "transformations/op_conversions/convert_depth_to_space.hpp"
+#include "transformations/op_conversions/convert_gather_downgrade.hpp"
+#include "transformations/op_conversions/convert_gather_upgrade.hpp"
+#include "transformations/op_conversions/convert_gelu.hpp"
+#include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp"
+#include "transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp"
+#include "transformations/op_conversions/convert_minimum_to_power_and_max.hpp"
+#include "transformations/op_conversions/convert_mod.hpp"
+#include "transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp"
+#include "transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp"
+#include "transformations/op_conversions/convert_previous_nms_to_nms_9.hpp"
+#include "transformations/op_conversions/convert_reduce_to_pooling.hpp"
+#include "transformations/op_conversions/convert_roi_align_v3_to_v9.hpp"
+#include "transformations/op_conversions/convert_roi_align_v9_to_v3.hpp"
+#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
+#include "transformations/op_conversions/convert_shuffle_channels3.hpp"
+#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp"
+#include "transformations/op_conversions/convert_space_to_batch.hpp"
+#include "transformations/op_conversions/convert_space_to_depth.hpp"
+#include "transformations/op_conversions/convert_subtract.hpp"
+#include "transformations/op_conversions/convert_ti_to_sequences.hpp"
+#include "transformations/op_conversions/detection_output_downgrade.hpp"
+#include "transformations/op_conversions/detection_output_upgrade.hpp"
+#include "transformations/op_conversions/eye_decomposition.hpp"
+#include "transformations/op_conversions/fq_decomposition.hpp"
+#include "transformations/op_conversions/gelu7_downgrade.hpp"
+#include "transformations/op_conversions/hsigmoid_decomposition.hpp"
+#include "transformations/op_conversions/hswish_decomposition.hpp"
+#include "transformations/op_conversions/gru_cell_decomposition.hpp"
+#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
+#include "transformations/op_conversions/mvn6_decomposition.hpp"
+#include "transformations/op_conversions/normalize_l2_decomposition.hpp"
+#include "transformations/op_conversions/reduce_l1_decomposition.hpp"
+#include "transformations/op_conversions/reduce_l2_decomposition.hpp"
+#include "transformations/op_conversions/rnn_cell_decomposition.hpp"
+#include "transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp"
+#include "transformations/op_conversions/softplus_decomposition.hpp"
+#include "transformations/op_conversions/softsign_decomposition.hpp"
+#include "transformations/op_conversions/softmax_decomposition.hpp"
+#include "transformations/op_conversions/unique_decomposition.hpp"
+#include "transformations/opset_conversions/convert_opset2_to_opset1.hpp"
+#include "transformations/opset_conversions/convert_opset3_to_opset2.hpp"
+#include "transformations/smart_reshape/matmul_sr.hpp"
+#include "transformations/init_node_info.hpp"
+#include "utils/ngraph_transformation.hpp"
+
+// LPT transformations
+#include "transformations/low_precision/mark_dequantization_subgraph.hpp"
+#include "low_precision/convolution_backprop_data.hpp"
+#include "low_precision/convert_subtract_constant.hpp"
+#include "low_precision/network_helper.hpp"
+#include "low_precision/multiply_to_group_convolution.hpp"
+#include "low_precision/group_convolution.hpp"
+
+// CPU specific transformations
+#include "ngraph_transformations/convert_to_cpu_specific_opset.hpp"
+#include "ngraph_transformations/snippets_mark_skipped.hpp"
+#include "ngraph_transformations/mha_fusion.hpp"
+#include "ngraph_transformations/convert_to_interaction.hpp"
+#include "ngraph_transformations/convert_fq_rnn_to_quantized_rnn.hpp"
+#include "ngraph_transformations/move_eltwise_up_data_movement.hpp"
+#include "ngraph_transformations/swap_convert_transpose.hpp"
+
+// Snippets
+#include "snippets/pass/collapse_subgraph.hpp"
+#include "snippets/pass/common_optimizations.hpp"
+
+// Misc
+#include "nodes/mvn.h"
+#include "nodes/normalize.h"
+#include "nodes/fake_quantize.h"
+#include "nodes/mha.h"
+
+#include "dnnl.hpp"
+#include <cpu/x64/cpu_isa_traits.hpp>
+
+namespace ov {
+namespace intel_cpu {
+
+using const_node_ptr = const std::shared_ptr<const ov::Node>;
+
+bool Transformations::fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, ov::element::Type to, size_t idx) {
+    if (auto convert = ov::as_type_ptr<ov::opset10::Convert>(node)) {
+        // For Convert node, converting precision from floating point to boolean will lead to mathematical
+        // error, because here the output precision boolean is replaced by u8. E.g. floating point value 0.01
+        // is converted to be 1 for boolean, but 0 for u8. Thus an Abs and Ceil node should be added before the
+        // Convert node for this scenario.
+        if (convert->input(0).get_element_type().is_real() &&
+            convert->get_convert_element_type() == ngraph::element::boolean && to.is_integral_number()) {
+            auto abs = std::make_shared<ov::opset10::Abs>(convert->input_value(0).get_node_shared_ptr());
+            auto ceil = std::make_shared<ov::opset10::Ceiling>(abs);
+            auto new_convert = std::make_shared<ov::opset10::Convert>(ceil, to);
+            new_convert->set_friendly_name(convert->get_friendly_name());
+            ov::copy_runtime_info(convert, {abs, ceil, new_convert});
+            ov::replace_node(convert, new_convert);
+            return true;
+        } else {
+            convert->set_convert_element_type(to);
+            return true;
+        }
+    }
+    return false;
+}
+
+void Transformations::UpToCpuSpecificOpSet() {
+    const bool useLpt = enableLpt &&
+        ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(model) &&
+        CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config.debugCaps, Lpt);
+
+    const bool useSnippets = enableSnippets &&
+        CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config.debugCaps, Snippets);
+
+    auto defaultPrecisions = useLpt ? ngraph::pass::low_precision::precision_set::int8_support : std::vector<ov::element::Type>{};
+    bool hasINT16orINT32Levels = false;
+
+    if (useLpt) {
+        CPU_LPT_SCOPE(LowPrecisionTransformations_Part1);
+        hasINT16orINT32Levels = ngraph::pass::low_precision::LowPrecision::isFQLevelsPresent(
+            model,
+            {ngraph::pass::low_precision::levels::int16, ngraph::pass::low_precision::levels::int16_narrow_range,
+             ngraph::pass::low_precision::levels::int32, ngraph::pass::low_precision::levels::int32_narrow_range});
+        if (hasINT16orINT32Levels) {
+            defaultPrecisions = ngraph::pass::low_precision::precision_set::int8_int16_int32_support;
+        }
+    }
+
+    PreLpt(defaultPrecisions, isLegacyApi);
+
+    if (useLpt)
+        Lpt(hasINT16orINT32Levels, defaultPrecisions);
+
+    PostLpt();
+
+    if (useSnippets)
+        Snippets();
+}
+
+void Transformations::CpuSpecificOpSet(void) {
+    CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Specific);
+
+    ConvertToCPUSpecificOpset(model);
+}
+
+void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecisions, const bool isLegacyApi) {
+    CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, PreLpt);
+
+    ov::pass::Manager manager;
+    manager.set_per_pass_validation(false);
+    manager.register_pass<ov::pass::InitNodeInfo>();
+
+    const bool useLpt = !defaultPrecisions.empty();
+    if (useLpt) {
+        manager.register_pass<ov::pass::MarkDequantizationSubgraph>(defaultPrecisions);
+    }
+
+    auto get_convert_precisions = []() {
+        precisions_array array = {
+            {ov::element::i64,     ov::element::i32},
+            {ov::element::u64,     ov::element::i32},
+            {ov::element::i16,     ov::element::i32},
+            {ov::element::u16,     ov::element::i32},
+            {ov::element::u32,     ov::element::i32},
+            {ov::element::f64,     ov::element::f32},
+            {ov::element::f16,     ov::element::f32},
+            {ov::element::boolean, ov::element::u8},
+            {ov::element::i4,      ov::element::i8},
+            {ov::element::u4,      ov::element::u8}
+        };
+
+        if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
+            array.push_back({ov::element::bf16, ov::element::f32});
+
+        return array;
+    };
+    static const auto precisions = get_convert_precisions();
+    type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};
+
+    manager.register_pass<ov::pass::AUGRUCellFusion>();
+    manager.register_pass<ov::pass::CommonOptimizations>();
+    manager.register_pass<ov::pass::WrapInterpolateIntoTransposes>();
+    manager.register_pass<ov::pass::TransposeSinking>();
+    manager.register_pass<ov::pass::ConvertSequenceToTensorIterator>();
+    manager.register_pass<ov::pass::ConvertOpSet3ToOpSet2>();
+    manager.register_pass<ov::pass::ConvertOpSet2ToOpSet1>();
+    manager.register_pass<ov::pass::LSTMCellDecomposition>();
+    manager.register_pass<ov::pass::GRUCellDecomposition>();
+    manager.register_pass<ov::pass::RNNCellDecomposition>();
+    manager.register_pass<ov::pass::ConvertNMS1ToNMS9>();
+    manager.register_pass<ov::pass::ConvertNMS3ToNMS9>();
+    manager.register_pass<ov::pass::ConvertNMS4ToNMS9>();
+    manager.register_pass<ov::pass::ConvertNMS5ToNMS9>();
+    manager.register_pass<ov::pass::ConvertNMS9ToNMSIEInternal>();
+    manager.register_pass<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
+    manager.register_pass<ov::pass::ConvertMatrixNmsToMatrixNmsIE>();
+    manager.register_pass<ov::pass::TransposeMatMul>();
+    manager.register_pass<ov::pass::ConstantFolding>();
+
+    if (useLpt) {
+        CPU_LPT_SCOPE(LowPrecisionTransformations_Part2);
+        manager.register_pass<ngraph::pass::low_precision::ConvertSubtractConstant>(defaultPrecisions);
+    }
+    manager.register_pass<ov::pass::Validate>();
+    manager.register_pass<ov::pass::ConvertPrecision>(precisions, type_to_fuse);
+    manager.register_pass<ov::pass::EliminateConvert>();
+    manager.register_pass<SwapConvertTranspose>();
+    manager.register_pass<ConvertToInteraction>();
+    manager.register_pass<ConvertInteractionInt8>();
+
+    auto pass_config = manager.get_pass_config();
+
+    // SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
+    pass_config->set_callback<ov::pass::ConvertSpaceToDepth,
+                              ov::pass::ConvertDepthToSpace>(
+                                  [](const_node_ptr &node) -> bool {
+                                      return node->input_value(0).get_shape().size() <= 5lu &&
+                                          node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
+                                  });
+
+    pass_config->set_callback<ov::pass::ConvertBatchToSpace,
+                              ov::pass::ConvertSpaceToBatch>(
+                                  [](const_node_ptr &node) -> bool {
+                                      const auto & rank = node->input(0).get_partial_shape().rank().get_length();
+                                      return rank == 4lu || rank == 5lu;
+                                  });
+
+    auto isCellPrimitiveSupported = [](const_node_ptr &node) -> bool {
+        if (const auto &rnn_cell = std::dynamic_pointer_cast<const ov::opset4::RNNCell>(node)) {
+            return rnn_cell->get_clip() == 0.0f;
+        } else if (const auto &gru_cell = std::dynamic_pointer_cast<const ov::opset4::GRUCell>(
+                       node)) {
+            return gru_cell->get_clip() == 0.0f
+                && gru_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh"};
+        } else if (const auto &augru_cell = std::dynamic_pointer_cast<const ov::op::internal::AUGRUCell>(
+                       node)) {
+            return augru_cell->get_clip() == 0.0f
+                && augru_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh"};
+        } else if (const auto &lstm_cell = std::dynamic_pointer_cast<const ov::opset4::LSTMCell>(
+                       node)) {
+            return lstm_cell->get_clip() == 0.0f &&
+                lstm_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
+        } else if (const auto &lstm_cell_v1 = std::dynamic_pointer_cast<const ov::opset1::LSTMCell>(
+                       node)) {
+            return lstm_cell_v1->get_clip() == 0.0f &&
+                lstm_cell_v1->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
+        }
+        return false;
+    };
+
+    // Sequences supported by the plugin shouldn't be converted to TensorIterator.
+    // sequence_length input is not supported in all Sequences, so if is_seq_len_provided() == true, we
+    // should always convert to TensorIterator.
+    // RNN/GRU/LSTM Sequences are supported with clip == 0, and with default activations.
+    auto isSequencePrimitiveSupported = [](const_node_ptr &node) -> bool {
+        const auto& data = node->input(0);
+        const auto& data_pshape = data.get_partial_shape();
+        // WA: dynamic shapes make impossible to check seq_len due to shapeOf subgraphs
+        // but the sequence is still supported in CPU and doesn't need to be decomposed
+        if (data_pshape.is_dynamic())
+            return true;
+        if (data_pshape.rank().is_static() && data_pshape.rank().get_length() > 1 && !data_pshape[1].is_static())
+            return false;
+        auto max_seq_len = data.get_shape().at(1);
+        if (const auto &rnn_seq = std::dynamic_pointer_cast<const ov::opset6::RNNSequence>(node)) {
+            return rnn_seq->get_clip() == 0.0f &&
+                !ov::op::util::is_seq_len_provided(rnn_seq->get_input_node_shared_ptr(2),
+                                                   max_seq_len);
+        } else if (const auto &gru_seq = std::dynamic_pointer_cast<const ov::opset6::GRUSequence>(
+                       node)) {
+            return gru_seq->get_clip() == 0.0f &&
+                gru_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh"} &&
+                !ov::op::util::is_seq_len_provided(gru_seq->get_input_node_shared_ptr(2),
+                                                   max_seq_len);
+        } else if (const auto &augru_seq = std::dynamic_pointer_cast<const ov::op::internal::AUGRUSequence>(
+                       node)) {
+            return augru_seq->get_clip() == 0.0f &&
+                augru_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh"} &&
+                !ov::op::util::is_seq_len_provided(augru_seq->get_input_node_shared_ptr(2),
+                                                   max_seq_len);
+        } else if (const auto &lstm_seq = std::dynamic_pointer_cast<const ov::opset6::LSTMSequence>(
+                       node)) {
+            return lstm_seq->get_clip() == 0.0f &&
+                lstm_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"} &&
+                !ov::op::util::is_seq_len_provided(lstm_seq->get_input_node_shared_ptr(3),
+                                                   max_seq_len);
+        }
+        return false;
+    };
+
+    pass_config->set_callback<ov::pass::ConvertRNNSequenceToTensorIterator,
+                              ov::pass::ConvertGRUSequenceToTensorIterator,
+                              ov::pass::ConvertLSTMSequenceToTensorIterator>(
+                                  [isSequencePrimitiveSupported](const_node_ptr &node) -> bool {
+                                      return isSequencePrimitiveSupported(node);
+                                  });
+
+    pass_config->set_callback<ov::pass::RNNCellDecomposition, ov::pass::GRUCellDecomposition,
+                              ov::pass::LSTMCellDecomposition>(
+                                  [isCellPrimitiveSupported](const_node_ptr &node) -> bool {
+                                      return isCellPrimitiveSupported(node);
+                                  });
+
+    pass_config->set_callback<ov::pass::MVN6Decomposition>(
+        [](const_node_ptr &node) -> bool {
+            std::string errorMessage;
+            return node::MVN::isSupportedOperation(node, errorMessage);
+        });
+
+    pass_config->set_callback<ov::pass::NormalizeL2Decomposition>(
+        [](const_node_ptr &node) -> bool {
+            std::string errorMsg;
+            return node::NormalizeL2::isSupportedOperation(node, errorMsg);
+        });
+
+    pass_config->enable<ngraph::pass::SoftmaxDecomposition>();
+    pass_config->set_callback<ngraph::pass::SoftmaxDecomposition>(
+            [](const_node_ptr &node) -> bool {
+                return node->input_value(0).get_partial_shape().rank().get_length() <= 5;
+            });
+
+    if (!isLegacyApi) {
+        auto nmsCallback = [](const_node_ptr &node) -> bool {
+            for (size_t i = 0; i < node->get_output_size(); i++) {
+                const auto outputs = node->get_output_target_inputs(i);
+                for (const auto &out : outputs) {
+                    if (!ov::op::util::is_output(out.get_node())) {
+                        return false;
+                    }
+                }
+            }
+            return true;
+        };
+
+        pass_config->set_callback<ov::pass::ConvertNMS9ToNMSIEInternal>(nmsCallback);
+        pass_config->set_callback<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>(nmsCallback);
+        pass_config->set_callback<ov::pass::ConvertMatrixNmsToMatrixNmsIE>(nmsCallback);
+    }
+
+    // List of enabled/disabled transformations
+
+    // Allow FP16 Converts to be folded and FP16 constants to be upgraded to FP32 data type
+    pass_config->disable<ov::pass::DisableDecompressionConvertConstantFolding>();
+    pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
+    pass_config->disable<ov::pass::EyeDecomposition>();
+
+    pass_config->disable<ov::pass::ConvertGELU>();
+    pass_config->disable<ov::pass::ConvertShuffleChannels3>();
+    pass_config->disable<ov::pass::Gelu7Downgrade>();
+    pass_config->disable<ov::pass::HSwishDecomposition>();
+    pass_config->disable<ov::pass::ReduceL1Decomposition>();
+    pass_config->disable<ov::pass::ReduceL2Decomposition>();
+    pass_config->disable<ov::pass::SoftPlusDecomposition>();
+    pass_config->disable<ov::pass::HSigmoidDecomposition>();
+    pass_config->disable<ov::pass::ConvertMod>();
+    pass_config->disable<ov::pass::ConvertShuffleChannels3>();
+    pass_config->disable<ov::pass::WeightsDequantizeToFakeQuantize>();
+    pass_config->disable<ov::pass::SimplifyCTCGreedyDecoderSeqLen>();
+    pass_config->disable<ov::pass::ConvertGather7ToGather1>();
+    pass_config->disable<ov::pass::ConvertGather8ToGather7>();
+    pass_config->disable<ov::pass::ConvertMinimum>();
+    pass_config->disable<ov::pass::ConvertBroadcastToTiles>();
+    pass_config->disable<ov::pass::ConvertReduceMeanToPooling>();
+    pass_config->disable<ov::pass::ConvertReduceMaxToPooling>();
+    pass_config->disable<ov::pass::ConvertReduceSumToPooling>();
+    pass_config->disable<ov::pass::SliceToStridedSlice>();
+    pass_config->disable<ov::pass::ConvertDetectionOutput8ToDetectionOutput1>();
+    pass_config->disable<ov::pass::ConvertROIAlign9To3>();
+    pass_config->disable<ov::pass::SoftSignDecomposition>();
+    pass_config->disable<ov::pass::UniqueDecomposition>();
+
+    pass_config->enable<ov::pass::NormalizeL2Decomposition>();
+    pass_config->enable<ov::pass::ConvertInterpolate1ToInterpolate4>();
+    pass_config->enable<ov::pass::ConvertGather1ToGather7>();
+    pass_config->enable<ov::pass::ConvertDetectionOutput1ToDetectionOutput8>();
+    pass_config->enable<ov::pass::ConvertROIAlign3To9>();
+
+    if (useLpt) {
+        CPU_LPT_SCOPE(LowPrecisionTransformations_Part3);
+        pass_config->set_callback<ov::pass::AddFakeQuantizeFusion,
+                                  ov::pass::MulFakeQuantizeFusion,
+                                  ov::pass::FakeQuantizeMulFusion>(
+                                      [](const_node_ptr &node) -> bool {
+                                          std::string errMsg;
+                                          return !node::FakeQuantize::isSupportedOperation(node, errMsg);
+                                      });
+
+        pass_config->set_callback<ov::pass::ConvertQuantizeDequantize>([&defaultPrecisions](const_node_ptr &node) -> bool {
+            return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(node, defaultPrecisions);
+        });
+    }
+
+    manager.run_passes(model);
+}
+
+void Transformations::Lpt(const bool hasINT16orINT32Levels, const std::vector<ov::element::Type>& defaultPrecisions) {
+    CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Lpt);
+
+    using namespace ngraph::pass::low_precision;
+    CPU_LPT_SCOPE(LowPrecisionTransformations_Part4);
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "LowPrecisionTransformations");
+    //Only enable conv/group conv signed input on AMX platform.
+    std::vector<ov::element::Type> input0LowPrecisionList;
+    if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
+        input0LowPrecisionList = {ov::element::u8, ov::element::i8};
+    } else {
+        input0LowPrecisionList = {ov::element::u8};
+    }
+    auto supportedPrecisions = std::vector<PrecisionsRestriction>({
+            PrecisionsRestriction::create<ov::opset1::Convolution>({
+                    {{0}, input0LowPrecisionList},
+                    {{1}, {ov::element::i8}},
+                }),
+            PrecisionsRestriction::create<ov::opset1::ConvolutionBackpropData>({
+                    {{0}, {ov::element::u8, ov::element::i8}},
+                    {{1}, {ov::element::i8}}
+                }),
+            PrecisionsRestriction::create<ov::opset1::GroupConvolution>({
+                    {{0}, input0LowPrecisionList},
+                    {{1}, {ov::element::i8}}
+                }),
+            PrecisionsRestriction::create<ov::opset1::Multiply>({
+                    {{0}, {ov::element::u8}},
+                    {{1}, {ov::element::i8}},
+                }),
+            PrecisionsRestriction::create<ov::opset1::MatMul>({
+                    {{0}, {ov::element::u8, ov::element::i8}},
+                    {{1}, {ov::element::i8}}
+                }),
+            PrecisionsRestriction::create<ov::opset5::LSTMSequence>({
+                    {{0, 1}, {ov::element::u8, ov::element::i8}},
+                }),
+            PrecisionsRestriction::create<ov::opset6::GRUSequence>({
+                    {{0, 1}, {ov::element::u8, ov::element::i8}},
+                }),
+        });
+
+    auto quantizationRestrictions = std::vector<QuantizationGranularityRestriction>({
+            QuantizationGranularityRestriction::create<ov::opset1::Convolution>({0}),
+            QuantizationGranularityRestriction::create<ov::opset1::ConvolutionBackpropData>({0})
+        });
+
+    // for GNA networks reference execution
+    bool updatePrecision = true;
+    if (hasINT16orINT32Levels) {
+        updatePrecision = false;
+        supportedPrecisions = std::vector<PrecisionsRestriction>({});
+    }
+
+    ov::pass::Manager lptManager;
+    lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(
+        supportedPrecisions,
+        quantizationRestrictions,
+        LayerTransformation::Params(updatePrecision, ov::element::f32, defaultPrecisions));
+    lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::MarkupPrecisions>([](const_node_ptr& node) -> bool {
+        if (const auto mulitply = std::dynamic_pointer_cast<const ov::opset1::Multiply>(node)) {
+            return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply);
+        }
+        return false;
+    });
+    lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::ConvolutionBackpropDataTransformation>(
+        [&defaultPrecisions](const_node_ptr& node) -> bool {
+            return LayerTransformation::isAsymmetricQuantization(node, defaultPrecisions) ||
+                WeightableLayerTransformation::isAsymmetricOnWeights(node, defaultPrecisions);
+        });
+
+    lptManager.get_pass_config()->disable<ngraph::pass::low_precision::MultiplyToGroupConvolutionTransformation>();
+
+    lptManager.run_passes(model);
+}
+
+void Transformations::PostLpt() {
+    CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, PostLpt);
+
+    ov::pass::Manager postLPTPassManager;
+    postLPTPassManager.register_pass<ov::pass::UnrollTensorIterator>();
+    postLPTPassManager.register_pass<ov::pass::ReshapePRelu>();
+    postLPTPassManager.get_pass_config()->set_callback<ov::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
+        // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
+        return node->get_rt_info().count("UNROLL_TI") == 0;
+    });
+    postLPTPassManager.register_pass<MoveEltwiseUpThroughDataMov>();
+    postLPTPassManager.get_pass_config()->set_callback<MoveEltwiseUpThroughDataMov>([](const std::shared_ptr<const ov::Node>& node) -> bool {
+        if (node->get_input_size() >= 2) {
+            return node->get_input_element_type(1) == ov::element::i8 || node->get_input_element_type(1) == ov::element::u8;
+        }
+        return false;
+    });
+
+    postLPTPassManager.register_pass<ov::pass::ConstantFolding>();
+
+    // Snippets may brake MHA patterns so the fusion has to performed before
+    postLPTPassManager.register_pass<MHAFusion>();
+    postLPTPassManager.register_pass<FuseFQtoInteraction>();
+    postLPTPassManager.get_pass_config()->set_callback<MHAFloatFusion, MHAFloatFusion2,
+                                                       MHAQuantFusion, MHAQuantFusion2>
+        ([this](const std::shared_ptr<const ov::Node>& n) -> bool {
+            std::string errorMessage;
+
+            if (!node::MHA::isSupportedOperation(n, errorMessage))
+                return true;
+
+            // Implementation calls AMX BF16 brgemm only for tensors with K and N aligned on 2, otherwise fallbacks on vector impl
+            // Vector madd BF16 instruction on SPR has reduced performance on HW level, which results in overall perf degradation
+            size_t bf16Factor = 2;
+            if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16_amx_bf16) &&
+                (n->get_input_element_type(0) == element::bf16 || (n->get_input_element_type(0) == element::f32 && enableBF16)) &&
+                (n->get_input_shape(0)[3] % bf16Factor != 0 || n->get_input_shape(1)[1] % bf16Factor != 0 || n->get_input_shape(3)[3] % bf16Factor != 0)) {
+                return true;
+            }
+
+            return false;
+        });
+
+    // Execute before snippets. Otherwise FQ will be converted to Subgraph
+    postLPTPassManager.register_pass<ConvertFqRnnToQuantizedRnn>();
+    postLPTPassManager.run_passes(model);
+}
+
+void Transformations::MainSnippets(void) {
+    if (!enableSnippets ||
+        !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) // snippets are implemeted only for relevant platforms (avx2+ extentions)
+        return;
+
+    ov::pass::Manager snippetsManager;
+    snippetsManager.register_pass<SnippetsMarkSkipped>();
+    snippetsManager.register_pass<ngraph::snippets::pass::EnumerateNodes>();
+    snippetsManager.register_pass<ngraph::snippets::pass::TokenizeSnippets>();
+    snippetsManager.get_pass_config()->set_callback<ngraph::snippets::pass::TokenizeSnippets>(
+        [](const std::shared_ptr<const ov::Node>& n) -> bool {
+            // CPU Plugin support Swish in Subgraph via conversion to SwichCPU which assumes second input to be constant
+            if (ov::is_type<const ov::op::v4::Swish>(n)) {
+                if (n->inputs().size() > 1 && !ov::is_type<const ov::op::v0::Constant>(n->get_input_node_shared_ptr(1)))
+                    return true;
+            }
+
+            const auto& inputs = n->inputs();
+            // todo: clarify whether we can evaluate snippets on const paths
+            const bool has_only_const_inputs = std::all_of(inputs.begin(), inputs.end(),
+                                                           [](const ov::Input<const ov::Node> &in) {
+                                                               return ov::is_type<ov::op::v0::Constant>(in.get_source_output().get_node_shared_ptr());
+                                                           });
+            // todo: clarify whether we can evaluate snippets on inputs with larger ranks
+            auto rank_is_too_large = [](const ov::descriptor::Tensor& t ) {
+                // callback is called has_supported_in_out(), so it's safe to assume that the shapes are static
+                return t.get_partial_shape().rank().get_length() > 6;
+            };
+            const bool bad_input_rank = std::any_of(inputs.begin(), inputs.end(),
+                                                    [&](const ov::Input<const ov::Node>& in) {return  rank_is_too_large(in.get_tensor());});
+            const auto& outputs = n->outputs();
+            const bool bad_output_rank = std::any_of(outputs.begin(), outputs.end(),
+                                                     [&](const ov::Output<const ov::Node>& out) {return  rank_is_too_large(out.get_tensor());});
+            return has_only_const_inputs || bad_input_rank || bad_output_rank;
+        });
+    snippetsManager.register_pass<ngraph::snippets::pass::CommonOptimizations>();
+    snippetsManager.run_passes(model);
+}
+
+void Transformations::PostSnippets(void) {
+    ov::pass::Manager postSnippetsManager;
+    postSnippetsManager.register_pass<ov::pass::FakeQuantizeDecomposition>();
+    postSnippetsManager.get_pass_config()->set_callback<ov::pass::FakeQuantizeDecomposition>([](const_node_ptr& node) -> bool {
+        std::string errMsg;
+        return node::FakeQuantize::isSupportedOperation(node, errMsg);
+    });
+    postSnippetsManager.register_pass<ov::pass::ConstantFolding>();
+    postSnippetsManager.run_passes(model);
+}
+
+void Transformations::Snippets(void) {
+    CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Snippets);
+
+    MainSnippets();
+    PostSnippets();
+}
+
+}   // namespace intel_cpu
+}   // namespace ov
--- a/src/plugins/intel_cpu/src/transformation_pipeline.h
+++ b/src/plugins/intel_cpu/src/transformation_pipeline.h
@ -0,0 +1,65 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/core/model.hpp"
+#include "low_precision/low_precision.hpp"
+#include "config.h"
+
+#include "itt.h"
+
+#include <memory>
+#include <vector>
+
+using namespace InferenceEngine;
+
+#define IE_CPU_PLUGIN_THROW(...) IE_THROW(__VA_ARGS__) << "CPU plugin: "
+
+namespace ov {
+namespace intel_cpu {
+
+class Transformations {
+public:
+    Transformations(const std::shared_ptr<ov::Model>& initialModel,
+                    const bool                        enableLpt,
+                    const bool                        enableSnippets,
+                    const bool                        enableBF16,
+                    const bool                        isLegacyApi,
+                    const Config&                     config)
+        : model(initialModel),
+          enableLpt(enableLpt),
+          enableSnippets(enableSnippets),
+          enableBF16(enableBF16),
+          isLegacyApi(isLegacyApi),
+          config(config) {}
+
+    void UpToCpuSpecificOpSet();
+    void CpuSpecificOpSet(void);
+
+private:
+    std::shared_ptr<ov::Model> model;
+    const bool    enableLpt;
+    const bool    enableSnippets;
+    const bool    enableBF16;
+    const bool    isLegacyApi;
+    const Config& config;
+
+    void PreLpt(const std::vector<ov::element::Type>& defaultPrecisions, const bool isLegacyApi);
+
+    void Lpt(const bool hasINT16orINT32Levels, const std::vector<ov::element::Type>& defaultPrecisions);
+
+    void PostLpt();
+
+    void MainSnippets(void);
+
+    void PostSnippets(void);
+
+    void Snippets(void);
+
+    static bool fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, ov::element::Type to, size_t idx);
+};
+
+}   // namespace intel_cpu
+}   // namespace ov
--- a/src/plugins/intel_cpu/src/utils/debug_capabilities.h
+++ b/src/plugins/intel_cpu/src/utils/debug_capabilities.h
@ -5,7 +5,7 @@

 #ifdef CPU_DEBUG_CAPS

-#define CPU_DEBUG_CAP_ENABLE(_x) _x;
+#define CPU_DEBUG_CAP_ENABLE(...) __VA_ARGS__
 #define CPU_DEBUG_CAPS_ALWAYS_TRUE(x) true

 #include <string>
@ -147,7 +147,7 @@ static inline std::ostream& write_all_to_stream(std::ostream& os, const T& arg,

 #else // !CPU_DEBUG_CAPS

-#define CPU_DEBUG_CAP_ENABLE(_x)
+#define CPU_DEBUG_CAP_ENABLE(...)
 #define CPU_DEBUG_CAPS_ALWAYS_TRUE(x) x

 #define DEBUG_LOG(...)
--- a/src/plugins/intel_cpu/src/utils/debug_caps_config.cpp
+++ b/src/plugins/intel_cpu/src/utils/debug_caps_config.cpp
@ -0,0 +1,66 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#ifdef CPU_DEBUG_CAPS
+
+#include "debug_caps_config.h"
+
+#include <string>
+
+namespace ov {
+namespace intel_cpu {
+
+void DebugCapsConfig::readProperties() {
+    auto readEnv = [](const char* envVar) {
+        return std::getenv(envVar);
+    };
+
+    auto parseDumpFormat = [](const std::string& format) {
+        if (format == "BIN")
+            return FORMAT::BIN;
+        else if (format == "TEXT")
+            return FORMAT::TEXT;
+        else
+            IE_THROW() << "readDebugCapsProperties: Unknown dump format";
+    };
+
+    const char* envVarValue = nullptr;
+
+    if ((envVarValue = readEnv("OV_CPU_EXEC_GRAPH_PATH")))
+        execGraphPath = envVarValue;
+
+    if ((envVarValue = readEnv("OV_CPU_VERBOSE")))
+        verbose = envVarValue;
+
+    if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_DIR")))
+        blobDumpDir = envVarValue;
+
+    if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_FORMAT")))
+        blobDumpFormat = parseDumpFormat(envVarValue);
+
+    if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_EXEC_ID")))
+        blobDumpFilters[BY_EXEC_ID] = envVarValue;
+
+    if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_PORTS")))
+        blobDumpFilters[BY_PORTS] = envVarValue;
+
+    if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_TYPE")))
+        blobDumpFilters[BY_TYPE] = envVarValue;
+
+    if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_NAME")))
+        blobDumpFilters[BY_NAME] = envVarValue;
+
+    if ((envVarValue = readEnv("OV_CPU_SUMMARY_PERF"))) {
+        summaryPerf = envVarValue;
+    }
+
+    if ((envVarValue = readEnv("OV_CPU_DISABLE")))
+        disable.parseAndSet(envVarValue);
+
+    if ((envVarValue = readEnv("OV_CPU_DUMP_IR")))
+        dumpIR.parseAndSet(envVarValue);
+}
+
+}   // namespace intel_cpu
+}   // namespace ov
+#endif // CPU_DEBUG_CAPS
--- a/src/plugins/intel_cpu/src/utils/debug_caps_config.h
+++ b/src/plugins/intel_cpu/src/utils/debug_caps_config.h
@ -0,0 +1,213 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+#ifdef CPU_DEBUG_CAPS
+
+#include "ie_common.h"
+#include "openvino/util/common_util.hpp"
+
+#include <bitset>
+#include <unordered_map>
+
+namespace ov {
+namespace intel_cpu {
+
+class DebugCapsConfig {
+private:
+    struct PropertySetter;
+    using PropertySetterPtr = std::shared_ptr<PropertySetter>;
+
+public:
+    DebugCapsConfig() {
+        readProperties();
+    }
+
+    enum FILTER {
+        BY_PORTS,
+        BY_EXEC_ID,
+        BY_TYPE,
+        BY_NAME,
+    };
+
+    enum class FORMAT {
+        BIN,
+        TEXT,
+    };
+
+    std::string execGraphPath;
+    std::string verbose;
+    std::string blobDumpDir = "cpu_dump";
+    FORMAT blobDumpFormat = FORMAT::TEXT;
+    // std::hash<int> is necessary for Ubuntu-16.04 (gcc-5.4 and defect in C++11 standart)
+    std::unordered_map<FILTER, std::string, std::hash<int>> blobDumpFilters;
+    std::string summaryPerf = "";
+
+    struct TransformationFilter {
+        enum Type : uint8_t {
+            PreLpt = 0, Lpt, PostLpt, Snippets, Specific, NumOfTypes
+        };
+        std::bitset<NumOfTypes> filter;
+
+        PropertySetterPtr getPropertySetter() {
+            return PropertySetterPtr(new BitsetFilterPropertySetter<NumOfTypes>("transformations", filter,
+                                                                                {{"all", {PreLpt, Lpt, PostLpt, Snippets, Specific}},
+                                                                                 {"common", {PreLpt, PostLpt}},
+                                                                                 {"prelpt", {PreLpt}},
+                                                                                 {"lpt", {Lpt}},
+                                                                                 {"postlpt", {PostLpt}},
+                                                                                 {"snippets", {Snippets}},
+                                                                                 {"specific", {Specific}}
+                                                                                }));
+        }
+    };
+    struct IrFormatFilter {
+        enum Type : uint8_t {
+            Xml = 0, XmlBin, Dot, Svg, NumOfTypes
+        };
+        std::bitset<NumOfTypes> filter;
+
+        PropertySetterPtr getPropertySetter() {
+            return PropertySetterPtr(new BitsetFilterPropertySetter<NumOfTypes>("formats", filter,
+                                                                                {{"all", {XmlBin, Dot, Svg}},
+                                                                                 {"xml", {Xml}},
+                                                                                 {"xmlbin", {XmlBin}},
+                                                                                 {"dot", {Dot}},
+                                                                                 {"svg", {Svg}},
+                                                                                }));
+        }
+    };
+
+    struct PropertyGroup {
+        virtual std::vector<PropertySetterPtr> getPropertySetters(void) = 0;
+
+        void parseAndSet(const std::string& str) {
+            const auto& options = ov::util::split(str, ' ');
+            const auto& propertySetters = getPropertySetters();
+            bool failed = false;
+            auto getHelp = [propertySetters] (void) {
+                std::string help;
+                for (const auto& property : propertySetters)
+                    help.append('\t' + property->getPropertyName() + "=<" + property->getPropertyValueDescription() + ">\n");
+                return help;
+            };
+
+            for (const auto& option : options) {
+                const auto& parts = ov::util::split(option, '=');
+                if (parts.size() > 2) {
+                    failed = true;
+                    break;
+                }
+                const auto& propertyName = ov::util::to_lower(parts.front());
+                const auto& foundSetter = std::find_if(propertySetters.begin(), propertySetters.end(),
+                                                       [propertyName] (const PropertySetterPtr& setter) { return setter->getPropertyName() == propertyName; });
+                if (foundSetter == propertySetters.end() ||
+                    !(*foundSetter)->parseAndSet(parts.size() == 1 ? "" : parts.back())) {
+                    failed = true;
+                    break;
+                }
+            }
+
+            if (failed)
+                IE_THROW() << "Wrong syntax: " << str << std::endl
+                           << "The following space separated options are supported (option names are case insensitive):" << std::endl
+                           << getHelp();
+        }
+    };
+
+    struct : PropertyGroup {
+        TransformationFilter transformations;
+
+        std::vector<PropertySetterPtr> getPropertySetters(void) override {
+            return { transformations.getPropertySetter() };
+        }
+    } disable;
+
+    struct : PropertyGroup {
+        std::string dir = "intel_cpu_dump";
+        IrFormatFilter format = { 1 << IrFormatFilter::Xml };
+        TransformationFilter transformations;
+
+        std::vector<PropertySetterPtr> getPropertySetters(void) override {
+            return { PropertySetterPtr(new StringPropertySetter("dir", dir, "path to dumped IRs")),
+                     format.getPropertySetter(),
+                     transformations.getPropertySetter() };
+        }
+    } dumpIR;
+
+private:
+    struct PropertySetter {
+        virtual bool parseAndSet(const std::string& str) = 0;
+        virtual std::string getPropertyValueDescription(void) const = 0;
+
+        PropertySetter(const std::string&& name) : propertyName(name) {}
+        const std::string& getPropertyName(void) const { return propertyName; }
+
+    private:
+        const std::string propertyName;
+    };
+
+    struct StringPropertySetter : PropertySetter {
+        StringPropertySetter(const std::string&& name, std::string& ref, const std::string&& valueDescription)
+            : property(ref), propertyValueDescription(valueDescription), PropertySetter(std::move(name)) {}
+        bool parseAndSet(const std::string& str) override {
+            property = str;
+            return true;
+        }
+        std::string getPropertyValueDescription(void) const override { return propertyValueDescription; }
+
+    private:
+        std::string& property;
+        const std::string propertyValueDescription;
+    };
+    template<std::size_t NumOfBits>
+
+    struct BitsetFilterPropertySetter : PropertySetter {
+        struct Token {
+            std::string name;
+            std::vector<size_t> bits;
+        };
+
+        BitsetFilterPropertySetter(const std::string&& name, std::bitset<NumOfBits>& ref, const std::vector<Token>&& tokens)
+            : property(ref), propertyTokens(tokens), PropertySetter(std::move(name)) {}
+        bool parseAndSet(const std::string& str) override {
+            const auto& tokens = str.empty() ?
+                std::vector<std::string>{"all"} : ov::util::split(ov::util::to_lower(str), ',');
+            property.reset();
+            for (const auto& token : tokens) {
+                const bool tokenVal = (token.front() != '-');
+                const auto& tokenName = tokenVal ? token : token.substr(1);
+                const auto& foundToken = std::find_if(propertyTokens.begin(), propertyTokens.end(),
+                                                      [tokenName] (const Token& token) { return token.name == tokenName; });
+                if (foundToken == propertyTokens.end())
+                    return false;
+
+                for (const auto& bit : foundToken->bits) {
+                    property.set(bit, tokenVal);
+                }
+            }
+            return true;
+        }
+        std::string getPropertyValueDescription(void) const override {
+            std::string supportedTokens = "comma separated filter tokens: ";
+            for (auto i = 0; i < propertyTokens.size(); i++) {
+                if (i)
+                    supportedTokens.push_back(',');
+                supportedTokens.append(propertyTokens[i].name);
+            }
+            supportedTokens.append("; -'token' is used for exclusion, case does not matter, no tokens is treated as 'all'");
+            return supportedTokens;
+        }
+
+    private:
+        std::bitset<NumOfBits>& property;
+        const std::vector<Token> propertyTokens;
+    };
+
+    void readProperties();
+};
+
+}   // namespace intel_cpu
+}   // namespace ov
+
+#endif // CPU_DEBUG_CAPS
--- a/src/plugins/intel_cpu/src/utils/ngraph_transformation.hpp
+++ b/src/plugins/intel_cpu/src/utils/ngraph_transformation.hpp
@ -0,0 +1,113 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+#ifdef CPU_DEBUG_CAPS
+
+#include "debug_caps_config.h"
+#include "openvino/util/file_util.hpp"
+#include <openvino/pass/manager.hpp>
+#include <openvino/pass/serialize.hpp>
+#include <openvino/pass/visualize_tree.hpp>
+
+namespace ov {
+namespace intel_cpu {
+
+class TransformationDumper {
+public:
+    explicit TransformationDumper(const DebugCapsConfig& config, const DebugCapsConfig::TransformationFilter::Type type,
+                                  const std::shared_ptr<ov::Model>& model)
+        : config(config), type(type), model(model) {
+        for (auto prev = infoMap.at(type).prev; prev != TransformationType::NumOfTypes;
+             prev = infoMap.at(prev).prev) {
+            // no need to serialize input graph if there was no transformations from previous dump
+            if (config.disable.transformations.filter[prev])
+                continue;
+            if (!config.dumpIR.transformations.filter[prev])
+                break;
+            if (wasDumped()[prev])
+                return;
+        }
+        dump("_in");
+    }
+    ~TransformationDumper() {
+        dump("_out");
+        wasDumped().set(type);
+    }
+
+private:
+    const DebugCapsConfig& config;
+    const std::shared_ptr<ov::Model>& model;
+    using TransformationType = DebugCapsConfig::TransformationFilter::Type;
+    const TransformationType type;
+
+    struct TransformationInfo {
+        std::string name;
+        TransformationType prev;
+    };
+    // std::hash<std::underlying_type<FILTER>::type> is necessary for Ubuntu-16.04 (gcc-5.4 and defect in C++11 standart)
+    const std::unordered_map<TransformationType, TransformationInfo,
+                             std::hash<std::underlying_type<TransformationType>::type>> infoMap =
+        {{TransformationType::PreLpt,     {"preLpt", TransformationType::NumOfTypes}},
+         {TransformationType::Lpt,        {"lpt", TransformationType::PreLpt}},
+         {TransformationType::PostLpt,    {"postLpt", TransformationType::Lpt}},
+         {TransformationType::Snippets,   {"snippets", TransformationType::PostLpt}},
+         {TransformationType::Specific,   {"cpuSpecific", TransformationType::Snippets}}};
+    std::bitset<TransformationType::NumOfTypes>& wasDumped(void) {
+        static std::bitset<TransformationType::NumOfTypes> wasDumped;
+        return wasDumped;
+    }
+    void dump(const std::string&& postfix) {
+        static int num = 0; // just to keep dumped IRs ordered in filesystem
+        const auto pathAndName = config.dumpIR.dir + "/ir_" + std::to_string(num) + '_' +
+                                 infoMap.at(type).name + postfix;
+
+        ov::util::create_directory_recursive(config.dumpIR.dir);
+
+        ov::pass::Manager serializer;
+
+        if (config.dumpIR.format.filter[DebugCapsConfig::IrFormatFilter::XmlBin])
+            serializer.register_pass<ov::pass::Serialize>(pathAndName + ".xml", "");
+
+        if (config.dumpIR.format.filter[DebugCapsConfig::IrFormatFilter::Xml]) {
+            std::string  xmlFile(pathAndName + ".xml");
+            std::string  binFile("/dev/null"); // @todo make it crossplatform using dummy implementation of std::ostream
+
+            serializer.register_pass<ov::pass::Serialize>(xmlFile, binFile);
+        }
+
+        if (config.dumpIR.format.filter[DebugCapsConfig::IrFormatFilter::Svg]) {
+            serializer.register_pass<ov::pass::VisualizeTree>(pathAndName + ".svg");
+        }
+
+        if (config.dumpIR.format.filter[DebugCapsConfig::IrFormatFilter::Dot]) {
+            serializer.register_pass<ov::pass::VisualizeTree>(pathAndName + ".dot");
+        }
+
+        serializer.run_passes(model);
+        num++;
+    }
+};
+
+}   // namespace intel_cpu
+}   // namespace ov
+
+#  define CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_config, _type)                      \
+    _config.disable.transformations.filter[DebugCapsConfig::TransformationFilter::Type::_type]
+#  define CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(...) !CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(__VA_ARGS__)
+#  define CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type)                                                      \
+    IE_ASSERT(CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(_this->config.debugCaps, _type));                                  \
+    auto dumperPtr = _this->config.debugCaps.dumpIR.transformations.filter[DebugCapsConfig::TransformationFilter::Type::_type] ?  \
+        std::unique_ptr<TransformationDumper>(new TransformationDumper(_this->config.debugCaps,                          \
+                                              DebugCapsConfig::TransformationFilter::Type::_type, _this->model)) : \
+        nullptr
+#  define CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(_this, _type)         \
+    if (CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_this->config.debugCaps, _type))     \
+        return;                                                             \
+    CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type)
+#else
+#  define CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_config, _type) false
+#  define CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(...) true
+#  define CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type)
+#  define CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(_this, _type)
+#endif // CPU_DEBUG_CAPS
--- a/src/plugins/intel_cpu/src/utils/node_dumper.cpp
+++ b/src/plugins/intel_cpu/src/utils/node_dumper.cpp
@ -5,6 +5,7 @@

 #include "node_dumper.h"

+#include "utils/debug_caps_config.h"
 #include <node.h>
 #include "ie_common.h"
 #include "utils/blob_dump.h"
@ -26,20 +27,20 @@ static void formatNodeName(std::string& name) {
    std::replace(name.begin(), name.end(), ':', '-');
 }

-static bool shouldBeDumped(const NodePtr& node, const Config& config, const std::string& portsKind) {
+static bool shouldBeDumped(const NodePtr& node, const DebugCapsConfig& config, const std::string& portsKind) {
    const auto& dumpFilters = config.blobDumpFilters;

    if (dumpFilters.empty())
        return false;

-    if (dumpFilters.count(Config::FILTER::BY_PORTS)) { // filter by ports configured
-        if (dumpFilters.at(Config::FILTER::BY_PORTS) != "ALL" &&
-            portsKind != dumpFilters.at(Config::FILTER::BY_PORTS))
+    if (dumpFilters.count(DebugCapsConfig::FILTER::BY_PORTS)) { // filter by ports configured
+        if (dumpFilters.at(DebugCapsConfig::FILTER::BY_PORTS) != "ALL" &&
+            portsKind != dumpFilters.at(DebugCapsConfig::FILTER::BY_PORTS))
            return false;
    }

-    if (dumpFilters.count(Config::FILTER::BY_EXEC_ID)) { // filter by exec id configured
-        std::stringstream ss(dumpFilters.at(Config::FILTER::BY_EXEC_ID));
+    if (dumpFilters.count(DebugCapsConfig::FILTER::BY_EXEC_ID)) { // filter by exec id configured
+        std::stringstream ss(dumpFilters.at(DebugCapsConfig::FILTER::BY_EXEC_ID));
        int id;
        bool matched = false;

@ -54,8 +55,8 @@ static bool shouldBeDumped(const NodePtr& node, const Config& config, const std:
            return false;
    }

-    if (dumpFilters.count(Config::FILTER::BY_TYPE)) { // filter by type configured
-        std::stringstream ss(dumpFilters.at(Config::FILTER::BY_TYPE));
+    if (dumpFilters.count(DebugCapsConfig::FILTER::BY_TYPE)) { // filter by type configured
+        std::stringstream ss(dumpFilters.at(DebugCapsConfig::FILTER::BY_TYPE));
        std::string type;
        bool matched = false;

@ -70,22 +71,22 @@ static bool shouldBeDumped(const NodePtr& node, const Config& config, const std:
            return false;
    }

-    if (dumpFilters.count(Config::FILTER::BY_NAME)) { // filter by name configured
-        if (dumpFilters.at(Config::FILTER::BY_NAME) != "*" && // to have 'single char' option for matching all the names
-            !std::regex_match(node->getName(), std::regex(dumpFilters.at(Config::FILTER::BY_NAME)))) // name does not match
+    if (dumpFilters.count(DebugCapsConfig::FILTER::BY_NAME)) { // filter by name configured
+        if (dumpFilters.at(DebugCapsConfig::FILTER::BY_NAME) != "*" && // to have 'single char' option for matching all the names
+            !std::regex_match(node->getName(), std::regex(dumpFilters.at(DebugCapsConfig::FILTER::BY_NAME)))) // name does not match
            return false;
    }

    return true;
 }

-static void dump(const BlobDumper& bd, const std::string& file, const Config& config) {
+static void dump(const BlobDumper& bd, const std::string& file, const DebugCapsConfig& config) {
    switch (config.blobDumpFormat) {
-    case Config::FORMAT::BIN: {
+    case DebugCapsConfig::FORMAT::BIN: {
        bd.dump(file);
        break;
    }
-    case Config::FORMAT::TEXT: {
+    case DebugCapsConfig::FORMAT::TEXT: {
        bd.dumpAsTxt(file);
        break;
    }
@ -94,7 +95,7 @@ static void dump(const BlobDumper& bd, const std::string& file, const Config& co
    }
 }

-static void dumpInternalBlobs(const NodePtr& node, const Config& config) {
+static void dumpInternalBlobs(const NodePtr& node, const DebugCapsConfig& config) {
    std::string nodeName = node->getName();
    formatNodeName(nodeName);

@ -116,7 +117,7 @@ static void dumpInternalBlobs(const NodePtr& node, const Config& config) {
    }
 }

-void dumpInputBlobs(const NodePtr& node, const Config& config, int count) {
+void dumpInputBlobs(const NodePtr& node, const DebugCapsConfig& config, int count) {
    if (!shouldBeDumped(node, config, "IN"))
        return;

@ -150,7 +151,7 @@ void dumpInputBlobs(const NodePtr& node, const Config& config, int count) {
    dumpInternalBlobs(node, config);
 }

-void dumpOutputBlobs(const NodePtr& node, const Config& config, int count) {
+void dumpOutputBlobs(const NodePtr& node, const DebugCapsConfig& config, int count) {
    if (!shouldBeDumped(node, config, "OUT"))
        return;

--- a/src/plugins/intel_cpu/src/utils/node_dumper.h
+++ b/src/plugins/intel_cpu/src/utils/node_dumper.h
@ -1,25 +1,26 @@
 // Copyright (C) 2018-2022 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-#ifdef CPU_DEBUG_CAPS
 #pragma once

+#ifdef CPU_DEBUG_CAPS
+#include "utils/debug_caps_config.h"
 #include <node.h>
-#include "config.h"

 namespace ov {
 namespace intel_cpu {

-void dumpInputBlobs(const NodePtr &node, const Config& config, int count = -1);
-void dumpOutputBlobs(const NodePtr &node, const Config& config, int count = -1);
+void dumpInputBlobs(const NodePtr &node, const DebugCapsConfig& config, int count = -1);
+void dumpOutputBlobs(const NodePtr &node, const DebugCapsConfig& config, int count = -1);

 class DumpHelper {
    const NodePtr& node;
    const int count;
-    const Config& config;
+    const DebugCapsConfig& config;

 public:
-    explicit DumpHelper(const NodePtr& _node, const Config& _config, int _count = -1): node(_node), config(_config), count(_count) {
+    explicit DumpHelper(const NodePtr& _node, const DebugCapsConfig& _config, int _count = -1):
+        node(_node), config(_config), count(_count) {
        dumpInputBlobs(node, config, count);
    }