[CPU] Add OV_CPU_VERBOSE env var to print node exec info to cout (#6390)

2021-09-26 22:17:57 +03:00 · 2021-09-26 22:17:57 +03:00 · c92988c8e9
commit c92988c8e9
parent b968c7b813
17 changed files with 823 additions and 416 deletions
--- a/inference-engine/src/mkldnn_plugin/cpu_types.cpp
+++ b/inference-engine/src/mkldnn_plugin/cpu_types.cpp
@ -0,0 +1,459 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "cpu_types.h"
+
+#include <vector>
+#include <string>
+
+namespace MKLDNNPlugin {
+
+using Dim = std::size_t;
+using VectorDims = std::vector<Dim>;
+
+const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
+        { "Constant", Input },
+        { "Parameter", Input },
+        { "Result", Output },
+        { "Convolution", Convolution },
+        { "GroupConvolution", Convolution },
+        { "MatMul", MatMul },
+        { "FullyConnected", FullyConnected },
+        { "MaxPool", Pooling },
+        { "AvgPool", Pooling },
+        { "AdaptiveMaxPool", AdaptivePooling},
+        { "AdaptiveAvgPool", AdaptivePooling},
+        { "Add", Eltwise },
+        { "Subtract", Eltwise },
+        { "Multiply", Eltwise },
+        { "Divide", Eltwise },
+        { "SquaredDifference", Eltwise },
+        { "Maximum", Eltwise },
+        { "Minimum", Eltwise },
+        { "Mod", Eltwise },
+        { "FloorMod", Eltwise },
+        { "Power", Eltwise },
+        { "PowerStatic", Eltwise },
+        { "Equal", Eltwise },
+        { "NotEqual", Eltwise },
+        { "Greater", Eltwise },
+        { "GreaterEqual", Eltwise },
+        { "Less", Eltwise },
+        { "LessEqual", Eltwise },
+        { "LogicalAnd", Eltwise },
+        { "LogicalOr", Eltwise },
+        { "LogicalXor", Eltwise },
+        { "LogicalNot", Eltwise },
+        { "Relu", Eltwise },
+        { "LeakyRelu", Eltwise },
+        { "Gelu", Eltwise },
+        { "Elu", Eltwise },
+        { "Tanh", Eltwise },
+        { "Sigmoid", Eltwise },
+        { "Abs", Eltwise },
+        { "Sqrt", Eltwise },
+        { "Clamp", Eltwise },
+        { "Exp", Eltwise },
+        { "SwishCPU", Eltwise },
+        { "HSwish", Eltwise },
+        { "Mish", Eltwise },
+        { "HSigmoid", Eltwise },
+        { "Round", Eltwise },
+        { "PRelu", Eltwise },
+        { "Erf", Eltwise },
+        { "SoftPlus", Eltwise },
+        { "Reshape", Reshape },
+        { "Squeeze", Reshape },
+        { "Unsqueeze", Reshape },
+        { "Softmax", Softmax },
+        { "Reorder", Reorder },
+        { "BatchToSpace", BatchToSpace },
+        { "SpaceToBatch", SpaceToBatch },
+        { "DepthToSpace", DepthToSpace },
+        { "SpaceToDepth", SpaceToDepth },
+        { "Roll", Roll },
+        { "LRN", Lrn },
+        { "Split", Split },
+        { "VariadicSplit", Split },
+        { "Concat", Concatenation },
+        { "ConvolutionBackpropData", Deconvolution },
+        { "GroupConvolutionBackpropData", Deconvolution },
+        { "StridedSlice", StridedSlice },
+        { "Tile", Tile },
+        { "ROIAlign", ROIAlign },
+        { "ROIPooling", ROIPooling },
+        { "PSROIPooling", PSROIPooling },
+        { "DeformablePSROIPooling", PSROIPooling },
+        { "Pad", Pad },
+        { "Transpose", Transpose },
+        { "LSTMCell", RNNCell },
+        { "GRUCell", RNNCell },
+        { "RNNCell", RNNCell },
+        { "LSTMSequence", RNNSeq },
+        { "GRUSequence", RNNSeq },
+        { "RNNSequence", RNNSeq },
+        { "FakeQuantize", FakeQuantize },
+        { "BinaryConvolution", BinaryConvolution },
+        { "DeformableConvolution", DeformableConvolution },
+        { "TensorIterator", TensorIterator },
+        { "Loop", TensorIterator },
+        { "ReadValue", MemoryInput},  // for construction from name ctor, arbitrary name is used
+        { "Assign", MemoryOutput },  // for construction from layer ctor
+        { "Convert", Convert },
+        { "MVN", MVN},
+        { "NormalizeL2", NormalizeL2},
+        { "ScatterUpdate", ScatterUpdate},
+        { "ScatterElementsUpdate", ScatterElementsUpdate},
+        { "ScatterNDUpdate", ScatterNDUpdate},
+        { "Interpolate", Interpolate},
+        { "ReduceL1", Reduce},
+        { "ReduceL2", Reduce},
+        { "ReduceLogicalAnd", Reduce},
+        { "ReduceLogicalOr", Reduce},
+        { "ReduceMax", Reduce},
+        { "ReduceMean", Reduce},
+        { "ReduceMin", Reduce},
+        { "ReduceProd", Reduce},
+        { "ReduceSum", Reduce},
+        { "ReduceLogSum", Reduce},
+        { "ReduceLogSumExp", Reduce},
+        { "ReduceSumSquare", Reduce},
+        { "Broadcast", Broadcast},
+        { "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
+        { "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
+        { "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
+        { "Gather", Gather},
+        { "GatherElements", GatherElements},
+        { "GatherND", GatherND},
+        { "OneHot", OneHot},
+        { "RegionYolo", RegionYolo},
+        { "Select", Select},
+        { "ShuffleChannels", ShuffleChannels},
+        { "DFT", DFT},
+        { "IDFT", DFT},
+        { "Abs", Math},
+        { "Acos", Math},
+        { "Acosh", Math},
+        { "Asin", Math},
+        { "Asinh", Math},
+        { "Atan", Math},
+        { "Atanh", Math},
+        { "Ceil", Math},
+        { "Ceiling", Math},
+        { "Cos", Math},
+        { "Cosh", Math},
+        { "Floor", Math},
+        { "HardSigmoid", Math},
+        { "Log", Math},
+        { "Neg", Math},
+        { "Reciprocal", Math},
+        { "Selu", Math},
+        { "Sign", Math},
+        { "Sin", Math},
+        { "Sinh", Math},
+        { "SoftPlus", Math},
+        { "Softsign", Math},
+        { "Tan", Math},
+        { "CTCLoss", CTCLoss},
+        { "Bucketize", Bucketize},
+        { "CTCGreedyDecoder", CTCGreedyDecoder},
+        { "CTCGreedyDecoderSeqLen", CTCGreedyDecoderSeqLen},
+        { "CumSum", CumSum},
+        { "DetectionOutput", DetectionOutput},
+        { "ExperimentalDetectronDetectionOutput", ExperimentalDetectronDetectionOutput},
+        { "LogSoftmax", LogSoftmax},
+        { "TopK", TopK},
+        { "GatherTree", GatherTree},
+        { "GRN", GRN},
+        { "Range", Range},
+        { "Proposal", Proposal},
+        { "ReorgYolo", ReorgYolo},
+        { "ReverseSequence", ReverseSequence},
+        { "ExperimentalDetectronTopKROIs", ExperimentalDetectronTopKROIs},
+        { "ExperimentalDetectronROIFeatureExtractor", ExperimentalDetectronROIFeatureExtractor},
+        { "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator},
+        { "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage},
+        { "ExtractImagePatches", ExtractImagePatches},
+        { "NonMaxSuppression", NonMaxSuppression},
+        { "NonMaxSuppressionIEInternal", NonMaxSuppression},
+        { "MatrixNms", MatrixNms},
+        { "MulticlassNms", MulticlassNms}
+};
+
+Type TypeFromName(const std::string& type) {
+    auto itType = type_to_name_tbl.find(type);
+    if (type_to_name_tbl.end() != itType) {
+        return itType->second;
+    } else {
+        return Unknown;
+    }
+}
+
+std::string NameFromType(const Type type) {
+    switch (type) {
+        case Generic:
+            return "Generic";
+        case Reorder:
+            return "Reorder";
+        case Input:
+            return "Input";
+        case Output:
+            return "Output";
+        case Convolution:
+            return "Convolution";
+        case Deconvolution:
+            return "Deconvolution";
+        case Lrn:
+            return "Lrn";
+        case Pooling:
+            return "Pooling";
+        case AdaptivePooling:
+            return "AdaptivePooling";
+        case FullyConnected:
+            return "FullyConnected";
+        case MatMul:
+            return "MatMul";
+        case Softmax:
+            return "Softmax";
+        case Split:
+            return "Split";
+        case Concatenation:
+            return "Concatenation";
+        case StridedSlice:
+            return "StridedSlice";
+        case Reshape:
+            return "Reshape";
+        case Tile:
+            return "Tile";
+        case ROIAlign:
+            return "ROIAlign";
+        case ROIPooling:
+            return "ROIPooling";
+        case PSROIPooling:
+            return "PSROIPooling";
+        case DepthToSpace:
+            return "DepthToSpace";
+        case BatchToSpace:
+            return "BatchToSpace";
+        case Pad:
+            return "Pad";
+        case Transpose:
+            return "Transpose";
+        case SpaceToDepth:
+            return "SpaceToDepth";
+        case SpaceToBatch:
+            return "SpaceToBatch";
+        case MemoryOutput:
+            return "MemoryOutput";
+        case MemoryInput:
+            return "MemoryInput";
+        case RNNSeq:
+            return "RNNSeq";
+        case RNNCell:
+            return "RNNCell";
+        case Eltwise:
+            return "Eltwise";
+        case FakeQuantize:
+            return "FakeQuantize";
+        case BinaryConvolution:
+            return "BinaryConvolution";
+        case DeformableConvolution:
+            return "DeformableConvolution";
+        case MVN:
+            return "MVN";
+        case TensorIterator:
+            return "TensorIterator";
+        case Convert:
+            return "Convert";
+        case NormalizeL2:
+            return "NormalizeL2";
+        case ScatterUpdate:
+            return "ScatterUpdate";
+        case ScatterElementsUpdate:
+            return "ScatterElementsUpdate";
+        case ScatterNDUpdate:
+            return "ScatterNDUpdate";
+        case Interpolate:
+            return "Interpolate";
+        case Reduce:
+            return "Reduce";
+        case Broadcast:
+            return "Broadcast";
+        case EmbeddingSegmentsSum:
+            return "EmbeddingSegmentsSum";
+        case EmbeddingBagPackedSum:
+            return "EmbeddingBagPackedSum";
+        case EmbeddingBagOffsetsSum:
+            return "EmbeddingBagOffsetsSum";
+        case Gather:
+            return "Gather";
+        case GatherElements:
+            return "GatherElements";
+        case GatherND:
+            return "GatherND";
+        case OneHot:
+            return "OneHot";
+        case RegionYolo:
+            return "RegionYolo";
+        case Select:
+            return "Select";
+        case Roll:
+            return "Roll";
+        case ShuffleChannels:
+            return "ShuffleChannels";
+        case DFT:
+            return "DFT";
+        case Math:
+            return "Math";
+        case CTCLoss:
+            return "CTCLoss";
+        case Bucketize:
+            return "Bucketize";
+        case CTCGreedyDecoder:
+            return "CTCGreedyDecoder";
+        case CTCGreedyDecoderSeqLen:
+            return "CTCGreedyDecoderSeqLen";
+        case CumSum:
+            return "CumSum";
+        case DetectionOutput:
+            return "DetectionOutput";
+        case ExperimentalDetectronDetectionOutput:
+            return "ExperimentalDetectronDetectionOutput";
+        case LogSoftmax:
+            return "LogSoftmax";
+        case TopK:
+            return "TopK";
+        case GatherTree:
+            return "GatherTree";
+        case GRN:
+            return "GRN";
+        case Range:
+            return "Range";
+        case Proposal:
+            return "Proposal";
+        case ReorgYolo:
+            return "ReorgYolo";
+        case ReverseSequence:
+            return "ReverseSequence";
+        case ExperimentalDetectronTopKROIs:
+            return "ExperimentalDetectronTopKROIs";
+        case ExperimentalDetectronROIFeatureExtractor:
+            return "ExperimentalDetectronROIFeatureExtractor";
+        case ExperimentalDetectronPriorGridGenerator:
+            return "ExperimentalDetectronPriorGridGenerator";
+        case ExperimentalDetectronGenerateProposalsSingleImage:
+            return "ExperimentalDetectronGenerateProposalsSingleImage";
+        case ExtractImagePatches:
+            return "ExtractImagePatches";
+        case NonMaxSuppression:
+            return "NonMaxSuppression";
+        case MatrixNms:
+            return "MatrixNms";
+        case MulticlassNms:
+            return "MulticlassNms";
+        default:
+            return "Unknown";
+    }
+}
+
+std::string algToString(const Algorithm alg) {
+#define CASE(_alg) do {                     \
+    if (alg == _alg) return #_alg;          \
+} while (0)
+    CASE(Default);
+    CASE(PoolingMax);
+    CASE(PoolingAvg);
+    CASE(ConvolutionCommon);
+    CASE(ConvolutionGrouped);
+    CASE(DeconvolutionCommon);
+    CASE(DeconvolutionGrouped);
+    CASE(EltwiseAdd);
+    CASE(EltwiseMultiply);
+    CASE(EltwiseSubtract);
+    CASE(EltwiseDivide);
+    CASE(EltwiseFloorMod);
+    CASE(EltwiseMod);
+    CASE(EltwiseMaximum);
+    CASE(EltwiseMinimum);
+    CASE(EltwiseSquaredDifference);
+    CASE(EltwisePowerDynamic);
+    CASE(EltwisePowerStatic);
+    CASE(EltwiseMulAdd);
+    CASE(EltwiseEqual);
+    CASE(EltwiseNotEqual);
+    CASE(EltwiseGreater);
+    CASE(EltwiseGreaterEqual);
+    CASE(EltwiseLess);
+    CASE(EltwiseLessEqual);
+    CASE(EltwiseLogicalAnd);
+    CASE(EltwiseLogicalOr);
+    CASE(EltwiseLogicalXor);
+    CASE(EltwiseLogicalNot);
+    CASE(EltwiseRelu);
+    CASE(EltwiseGelu);
+    CASE(EltwiseElu);
+    CASE(EltwiseTanh);
+    CASE(EltwiseSigmoid);
+    CASE(EltwiseAbs);
+    CASE(EltwiseSqrt);
+    CASE(EltwiseSoftRelu);
+    CASE(EltwiseExp);
+    CASE(EltwiseClamp);
+    CASE(EltwiseSwish);
+    CASE(EltwisePrelu);
+    CASE(EltwiseMish);
+    CASE(EltwiseHswish);
+    CASE(EltwiseHsigmoid);
+    CASE(EltwiseRoundHalfToEven);
+    CASE(EltwiseRoundHalfAwayFromZero);
+    CASE(EltwiseErf);
+    CASE(FQCommon);
+    CASE(FQQuantization);
+    CASE(FQBinarization);
+    CASE(ROIPoolingMax);
+    CASE(ROIPoolingBilinear);
+    CASE(ROIAlignMax);
+    CASE(ROIAlignAvg);
+    CASE(PSROIPoolingAverage);
+    CASE(PSROIPoolingBilinear);
+    CASE(PSROIPoolingBilinearDeformable);
+    CASE(ReduceL1);
+    CASE(ReduceL2);
+    CASE(ReduceAnd);
+    CASE(ReduceOr);
+    CASE(ReduceMax);
+    CASE(ReduceMean);
+    CASE(ReduceMin);
+    CASE(ReduceProd);
+    CASE(ReduceSum);
+    CASE(ReduceLogSum);
+    CASE(ReduceLogSumExp);
+    CASE(ReduceSumSquare);
+    CASE(MathAbs);
+    CASE(MathAcos);
+    CASE(MathAcosh);
+    CASE(MathAsin);
+    CASE(MathAsinh);
+    CASE(MathAtan);
+    CASE(MathAtanh);
+    CASE(MathCeiling);
+    CASE(MathCos);
+    CASE(MathCosh);
+    CASE(MathErf);
+    CASE(MathFloor);
+    CASE(MathHardSigmoid);
+    CASE(MathLog);
+    CASE(MathNegative);
+    CASE(MathReciprocal);
+    CASE(MathSelu);
+    CASE(MathSign);
+    CASE(MathSin);
+    CASE(MathSinh);
+    CASE(MathSoftPlus);
+    CASE(MathSoftsign);
+    CASE(MathTan);
+#undef CASE
+    return "Undefined";
+}
+
+} // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/cpu_types.h
+++ b/inference-engine/src/mkldnn_plugin/cpu_types.h
@ -4,7 +4,10 @@

 #pragma once

+#include "caseless.hpp"
+
 #include <vector>
+#include <string>

 namespace MKLDNNPlugin {

@ -97,7 +100,7 @@ enum Type {
 };

 enum Algorithm {
-    Undefined,
+    Default,

    // Pooling algorithms
    PoolingMax,
@ -215,4 +218,11 @@ enum Algorithm {
    MathTan
 };

+extern const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl;
+
+Type TypeFromName(const std::string& type);
+
+std::string NameFromType(const Type type);
+
+std::string algToString(const Algorithm alg);
 } // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/docs/README.md
+++ b/inference-engine/src/mkldnn_plugin/docs/README.md
@ -0,0 +1,8 @@
+# Debug capabilities
+Use the following cmake option to enable debug capabilities:
+
+`-DENABLE_DEBUG_CAPS=ON`
+
+* [Verbose mode](verbose.md)
+* [Blob dumping](blob_dumping.md)
+* [Graph serialization](graph_serialization.md)
--- a/inference-engine/src/mkldnn_plugin/docs/blob_dumping.md
+++ b/inference-engine/src/mkldnn_plugin/docs/blob_dumping.md
@ -1,9 +1,4 @@
-# Debug capabilities
-Use the following cmake option to enable debug capabilities:
-
-`-DENABLE_CPU_DEBUG_CAPS=ON`
-
-## Blob dumping
+# Blob dumping
 Blob dumping is controlled by environment variables (filters).

 The variables define conditions of the node which input and output blobs
@ -24,12 +19,12 @@ or for shell session (bash example):
    export OV_CPU_BLOB_DUMP_NODE_PORTS=OUT
    binary ...
 ```
-### Specify dump directory
+## Specify dump directory
 ```sh
    OV_CPU_BLOB_DUMP_DIR=<directory-name> binary ...
 ```
 Default is *mkldnn_dump*
-### Specify dump format
+## Specify dump format
 ```sh
    OV_CPU_BLOB_DUMP_FORMAT=<format> binary ...
 ```
@ -37,7 +32,7 @@ Options are:
 * BIN (default)
 * TEXT

-### Filter input / output blobs
+## Filter input / output blobs
 To dump only input / output blobs:
 ```sh
    OV_CPU_BLOB_DUMP_NODE_PORTS='<ports_kind>' binary ...
@ -51,7 +46,7 @@ Options are:
 * OUT
 * ALL

-### Filter by execution ID
+## Filter by execution ID
 To dump blobs only for nodes with specified execution IDs:
 ```sh
    OV_CPU_BLOB_DUMP_NODE_EXEC_ID='<space_separated_list_of_ids>' binary ...
@ -61,7 +56,7 @@ Example:
    OV_CPU_BLOB_DUMP_NODE_EXEC_ID='1 12 45' binary ...
 ```

-### Filter by type
+## Filter by type
 To dump blobs only for nodes with specified types:
 ```sh
    OV_CPU_BLOB_DUMP_NODE_TYPE=<space_separated_list_of_types> binary ...
@ -73,7 +68,7 @@ Example:

 > **NOTE**: see **enum Type** in [mkldnn_node.h](../mkldnn_node.h) for list of the types

-### Filter by name
+## Filter by name
 To dump blobs only for nodes with name matching specified regex:
 ```sh
    OV_CPU_BLOB_DUMP_NODE_NAME=<regex> binary ...
@ -83,7 +78,7 @@ Example:
    OV_CPU_BLOB_DUMP_NODE_NAME=".+Fused_Add.+" binary ...
 ```

-### Dump all the blobs
+## Dump all the blobs
 ```sh
    OV_CPU_BLOB_DUMP_NODE_NAME="*" binary ...
 ```
@ -95,22 +90,3 @@ Example:
 ```sh
    OV_CPU_BLOB_DUMP_NODE_PORTS=ALL binary ...
 ```
-
-## Graph serialization
-The functionality allows to serialize execution graph using environment variable:
-```sh
-    OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
-```
-
-Possible serialization options:
-* cout
-
-    Serialize to console output
-* \<path\>.xml
-
-    Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
-* \<path\>.dot
-
-    TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
-
-
--- a/inference-engine/src/mkldnn_plugin/docs/graph_serialization.md
+++ b/inference-engine/src/mkldnn_plugin/docs/graph_serialization.md
@ -0,0 +1,17 @@
+# Graph serialization
+
+The functionality allows to serialize execution graph using environment variable:
+```sh
+    OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
+```
+
+Possible serialization options:
+* cout
+
+    Serialize to console output
+* \<path\>.xml
+
+    Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
+* \<path\>.dot
+
+    TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
--- a/inference-engine/src/mkldnn_plugin/docs/verbose.md
+++ b/inference-engine/src/mkldnn_plugin/docs/verbose.md
@ -0,0 +1,38 @@
+# Verbose mode
+
+It is possible to enable tracing execution of plugin nodes to cout and collect statistics, such as:
+  - node implementer:
+    * cpu (CPU plugin)
+    * dnnl (oneDNN library)
+    * ngraph_ref (ngraph reference fallback)
+  - node name
+  - node type
+  - node algorithm
+  - node primitive info
+  - input / output ports info
+  - fused nodes
+  - execution time
+  - etc
+
+Format:
+```sh
+    ov_cpu_verbose,exec,<node_implemeter>,\
+    <node_name>:<node_type>:<node_alg>,<impl_type>,\
+    src:<port_id>:<precision>::<type>:<format>:f0:<shape> ...,\
+    dst:<port_id>:<precision>::<type>:<format>:f0:<shape> ...,\
+    post_ops:'<node_name>:<node_type>:<node_alg>;...;',\
+    <execution_time>
+```
+
+To turn on verbose mode the following environment variable should be used:
+```sh
+    OV_CPU_VERBOSE=<level> binary ...
+```
+
+Currently verbose mode has only one level, any digit can be used for activation.
+
+To have colored verbose output just duplicate level's digit, for example:
+```sh
+    OV_CPU_VERBOSE=11 binary ...
+```
+**NOTE:** Shell color codes are used
--- a/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.h
+++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.h
@ -4,9 +4,11 @@

 #pragma once

+#include "mkldnn/ie_mkldnn.h"
+#include "cpu_types.h"
+
 #include <ie_layouts.h>
 #include <ie_blob.h>
-#include "mkldnn/ie_mkldnn.h"

 namespace MKLDNNPlugin {

--- a/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp
@ -50,3 +50,38 @@ impl_desc_type MKLDNNPlugin::parse_impl_name(std::string impl_desc_name) {

    return res;
 }
+
+const char* MKLDNNPlugin::impl_type_to_string(impl_desc_type type) {
+#define CASE(_type) do {                    \
+    if (type == _type) return #_type;       \
+} while (0)
+    CASE(unknown);
+    CASE(undef);
+    CASE(ref_any);
+    CASE(reorder);
+    CASE(gemm_any);
+    CASE(gemm_blas);
+    CASE(gemm_avx512);
+    CASE(gemm_avx2);
+    CASE(gemm_avx);
+    CASE(gemm_sse42);
+    CASE(jit_gemm);
+    CASE(jit_avx512_winograd);
+    CASE(jit_avx512);
+    CASE(jit_avx2);
+    CASE(jit_avx);
+    CASE(jit_sse42);
+    CASE(jit_uni);
+    CASE(jit_avx512_1x1);
+    CASE(jit_avx2_1x1);
+    CASE(jit_avx_1x1);
+    CASE(jit_sse42_1x1);
+    CASE(jit_uni_1x1);
+    CASE(jit_avx512_dw);
+    CASE(jit_avx2_dw);
+    CASE(jit_avx_dw);
+    CASE(jit_sse42_dw);
+    CASE(jit_uni_dw);
+#undef CASE
+    return "unknown";
+}
--- a/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.h
@ -63,6 +63,7 @@ enum impl_desc_type {
    jit_uni_dw          = jit  | uni    | _dw,
 };

+const char * impl_type_to_string(impl_desc_type type);
 impl_desc_type parse_impl_name(std::string impl_desc_name);

 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@ -39,6 +39,7 @@
 #include "utils/node_dumper.h"
 #include "utils/ngraph_utils.hpp"
 #include "utils/cpu_utils.hpp"
+#include "utils/verbose.h"
 #include "memory_desc/cpu_memory_desc_utils.h"

 #include <ngraph/node.hpp>
@ -828,7 +829,9 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
    mkldnn::stream stream(eng);

    for (const auto& node : executableGraphNodes) {
-        PERF(config.collectPerfCounters, node);
+        VERBOSE(node, config.debugCaps.verbose);
+        PERF(node, config.collectPerfCounters);
+
        if (request)
            request->ThrowIfCanceled();

--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@ -3,6 +3,7 @@
 //

 #include "mkldnn_node.h"
+#include "dnnl_debug.h"
 #include "mkldnn_extension_mngr.h"
 #include "mkldnn_itt.h"

@ -43,14 +44,14 @@
 #include <nodes/mkldnn_shuffle_channels_node.h>
 #include <nodes/mkldnn_reference_node.h>
 #include <nodes/mkldnn_fake_quantize_node.h>
-#include <mkldnn_types.h>
-#include <dnnl_types.h>
 #include "mkldnn_extension_utils.h"
+#include "mkldnn/iml_type_mapper.h"

 #include "nodes/common/cpu_memcpy.h"
 #include "mkldnn_debug.h"
 #include "utils/rt_info/memory_formats_attribute.hpp"

+#include <dnnl_types.h>
 #include <ie_ngraph_utils.hpp>
 #include "utils/general_utils.h"
 #include "utils/cpu_utils.hpp"
@ -63,372 +64,6 @@ using namespace MKLDNNPlugin;
 using namespace openvino;

 using namespace InferenceEngine::details;
-namespace MKLDNNPlugin {
-static const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
-        { "Constant", Input },
-        { "Parameter", Input },
-        { "Result", Output },
-        { "Convolution", Convolution },
-        { "GroupConvolution", Convolution },
-        { "MatMul", MatMul },
-        { "FullyConnected", FullyConnected },
-        { "MaxPool", Pooling },
-        { "AvgPool", Pooling },
-        { "AdaptiveMaxPool", AdaptivePooling},
-        { "AdaptiveAvgPool", AdaptivePooling},
-        { "Add", Eltwise },
-        { "Subtract", Eltwise },
-        { "Multiply", Eltwise },
-        { "Divide", Eltwise },
-        { "SquaredDifference", Eltwise },
-        { "Maximum", Eltwise },
-        { "Minimum", Eltwise },
-        { "Mod", Eltwise },
-        { "FloorMod", Eltwise },
-        { "Power", Eltwise },
-        { "PowerStatic", Eltwise },
-        { "Equal", Eltwise },
-        { "NotEqual", Eltwise },
-        { "Greater", Eltwise },
-        { "GreaterEqual", Eltwise },
-        { "Less", Eltwise },
-        { "LessEqual", Eltwise },
-        { "LogicalAnd", Eltwise },
-        { "LogicalOr", Eltwise },
-        { "LogicalXor", Eltwise },
-        { "LogicalNot", Eltwise },
-        { "Relu", Eltwise },
-        { "LeakyRelu", Eltwise },
-        { "Gelu", Eltwise },
-        { "Elu", Eltwise },
-        { "Tanh", Eltwise },
-        { "Sigmoid", Eltwise },
-        { "Abs", Eltwise },
-        { "Sqrt", Eltwise },
-        { "Clamp", Eltwise },
-        { "Exp", Eltwise },
-        { "SwishCPU", Eltwise },
-        { "HSwish", Eltwise },
-        { "Mish", Eltwise },
-        { "HSigmoid", Eltwise },
-        { "Round", Eltwise },
-        { "PRelu", Eltwise },
-        { "Erf", Eltwise },
-        { "SoftPlus", Eltwise },
-        { "Reshape", Reshape },
-        { "Squeeze", Reshape },
-        { "Unsqueeze", Reshape },
-        { "Softmax", Softmax },
-        { "Reorder", Reorder },
-        { "BatchToSpace", BatchToSpace },
-        { "SpaceToBatch", SpaceToBatch },
-        { "DepthToSpace", DepthToSpace },
-        { "SpaceToDepth", SpaceToDepth },
-        { "Roll", Roll },
-        { "LRN", Lrn },
-        { "Split", Split },
-        { "VariadicSplit", Split },
-        { "Concat", Concatenation },
-        { "ConvolutionBackpropData", Deconvolution },
-        { "GroupConvolutionBackpropData", Deconvolution },
-        { "StridedSlice", StridedSlice },
-        { "Tile", Tile },
-        { "ROIAlign", ROIAlign },
-        { "ROIPooling", ROIPooling },
-        { "PSROIPooling", PSROIPooling },
-        { "DeformablePSROIPooling", PSROIPooling },
-        { "Pad", Pad },
-        { "Transpose", Transpose },
-        { "LSTMCell", RNNCell },
-        { "GRUCell", RNNCell },
-        { "RNNCell", RNNCell },
-        { "LSTMSequence", RNNSeq },
-        { "GRUSequence", RNNSeq },
-        { "RNNSequence", RNNSeq },
-        { "FakeQuantize", FakeQuantize },
-        { "BinaryConvolution", BinaryConvolution },
-        { "DeformableConvolution", DeformableConvolution },
-        { "TensorIterator", TensorIterator },
-        { "Loop", TensorIterator },
-        { "ReadValue", MemoryInput},  // for construction from name ctor, arbitrary name is used
-        { "Assign", MemoryOutput },  // for construction from layer ctor
-        { "Convert", Convert },
-        { "MVN", MVN},
-        { "NormalizeL2", NormalizeL2},
-        { "ScatterUpdate", ScatterUpdate},
-        { "ScatterElementsUpdate", ScatterElementsUpdate},
-        { "ScatterNDUpdate", ScatterNDUpdate},
-        { "Interpolate", Interpolate},
-        { "ReduceL1", Reduce},
-        { "ReduceL2", Reduce},
-        { "ReduceLogicalAnd", Reduce},
-        { "ReduceLogicalOr", Reduce},
-        { "ReduceMax", Reduce},
-        { "ReduceMean", Reduce},
-        { "ReduceMin", Reduce},
-        { "ReduceProd", Reduce},
-        { "ReduceSum", Reduce},
-        { "ReduceLogSum", Reduce},
-        { "ReduceLogSumExp", Reduce},
-        { "ReduceSumSquare", Reduce},
-        { "Broadcast", Broadcast},
-        { "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
-        { "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
-        { "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
-        { "Gather", Gather},
-        { "GatherElements", GatherElements},
-        { "GatherND", GatherND},
-        { "OneHot", OneHot},
-        { "RegionYolo", RegionYolo},
-        { "Select", Select},
-        { "ShuffleChannels", ShuffleChannels},
-        { "DFT", DFT},
-        { "IDFT", DFT},
-        { "Abs", Math},
-        { "Acos", Math},
-        { "Acosh", Math},
-        { "Asin", Math},
-        { "Asinh", Math},
-        { "Atan", Math},
-        { "Atanh", Math},
-        { "Ceil", Math},
-        { "Ceiling", Math},
-        { "Cos", Math},
-        { "Cosh", Math},
-        { "Floor", Math},
-        { "HardSigmoid", Math},
-        { "Log", Math},
-        { "Neg", Math},
-        { "Reciprocal", Math},
-        { "Selu", Math},
-        { "Sign", Math},
-        { "Sin", Math},
-        { "Sinh", Math},
-        { "SoftPlus", Math},
-        { "Softsign", Math},
-        { "Tan", Math},
-        { "CTCLoss", CTCLoss},
-        { "Bucketize", Bucketize},
-        { "CTCGreedyDecoder", CTCGreedyDecoder},
-        { "CTCGreedyDecoderSeqLen", CTCGreedyDecoderSeqLen},
-        { "CumSum", CumSum},
-        { "DetectionOutput", DetectionOutput},
-        { "ExperimentalDetectronDetectionOutput", ExperimentalDetectronDetectionOutput},
-        { "LogSoftmax", LogSoftmax},
-        { "TopK", TopK},
-        { "GatherTree", GatherTree},
-        { "GRN", GRN},
-        { "Range", Range},
-        { "Proposal", Proposal},
-        { "ReorgYolo", ReorgYolo},
-        { "ReverseSequence", ReverseSequence},
-        { "ExperimentalDetectronTopKROIs", ExperimentalDetectronTopKROIs},
-        { "ExperimentalDetectronROIFeatureExtractor", ExperimentalDetectronROIFeatureExtractor},
-        { "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator},
-        { "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage},
-        { "ExtractImagePatches", ExtractImagePatches},
-        { "NonMaxSuppression", NonMaxSuppression},
-        { "NonMaxSuppressionIEInternal", NonMaxSuppression},
-        { "MatrixNms", MatrixNms},
-        { "MulticlassNms", MulticlassNms}
-};
-
-Type TypeFromName(const std::string & type) {
-    auto itType = type_to_name_tbl.find(type);
-    if (type_to_name_tbl.end() != itType) {
-        return itType->second;
-    }
-    return Unknown;
-}
-
-template<>
-DnnlMemoryDescPtr MKLDNNNode::getInputMemDescAtPort<DnnlMemoryDesc, 0, 0>(size_t portNum) const {
-    return MemoryDescUtils::convertToDnnlMemoryDesc(getBaseMemDescAtInputPort(portNum));
-}
-
-template<>
-BlockedMemoryDescPtr MKLDNNNode::getInputMemDescAtPort<BlockedMemoryDesc, 0, 0>(size_t portNum) const {
-    return MemoryDescUtils::convertToBlockedMemoryDesc(getBaseMemDescAtInputPort(portNum));
-}
-
-template<>
-DnnlMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort<DnnlMemoryDesc, 0, 0>(size_t portNum) const {
-    return MemoryDescUtils::convertToDnnlMemoryDesc(getBaseMemDescAtOutputPort(portNum));
-}
-
-template<>
-BlockedMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort<BlockedMemoryDesc, 0, 0>(size_t portNum) const {
-    return MemoryDescUtils::convertToBlockedMemoryDesc(getBaseMemDescAtOutputPort(portNum));
-}
-
-std::string NameFromType(Type type) {
-    switch (type) {
-        case Generic:
-            return "Generic";
-        case Reorder:
-            return "Reorder";
-        case Input:
-            return "Input";
-        case Output:
-            return "Output";
-        case Convolution:
-            return "Convolution";
-        case Deconvolution:
-            return "Deconvolution";
-        case Lrn:
-            return "Lrn";
-        case Pooling:
-            return "Pooling";
-        case AdaptivePooling:
-            return "AdaptivePooling";
-        case FullyConnected:
-            return "FullyConnected";
-        case MatMul:
-            return "MatMul";
-        case Softmax:
-            return "Softmax";
-        case Split:
-            return "Split";
-        case Concatenation:
-            return "Concatenation";
-        case StridedSlice:
-            return "StridedSlice";
-        case Reshape:
-            return "Reshape";
-        case Tile:
-            return "Tile";
-        case ROIAlign:
-            return "ROIAlign";
-        case ROIPooling:
-            return "ROIPooling";
-        case PSROIPooling:
-            return "PSROIPooling";
-        case DepthToSpace:
-            return "DepthToSpace";
-        case BatchToSpace:
-            return "BatchToSpace";
-        case Pad:
-            return "Pad";
-        case Transpose:
-            return "Transpose";
-        case SpaceToDepth:
-            return "SpaceToDepth";
-        case SpaceToBatch:
-            return "SpaceToBatch";
-        case MemoryOutput:
-            return "MemoryOutput";
-        case MemoryInput:
-            return "MemoryInput";
-        case RNNSeq:
-            return "RNNSeq";
-        case RNNCell:
-            return "RNNCell";
-        case Eltwise:
-            return "Eltwise";
-        case FakeQuantize:
-            return "FakeQuantize";
-        case BinaryConvolution:
-            return "BinaryConvolution";
-        case DeformableConvolution:
-            return "DeformableConvolution";
-        case MVN:
-            return "MVN";
-        case TensorIterator:
-            return "TensorIterator";
-        case Convert:
-            return "Convert";
-        case NormalizeL2:
-            return "NormalizeL2";
-        case ScatterUpdate:
-            return "ScatterUpdate";
-        case ScatterElementsUpdate:
-            return "ScatterElementsUpdate";
-        case ScatterNDUpdate:
-            return "ScatterNDUpdate";
-        case Interpolate:
-            return "Interpolate";
-        case Reduce:
-            return "Reduce";
-        case Broadcast:
-            return "Broadcast";
-        case EmbeddingSegmentsSum:
-            return "EmbeddingSegmentsSum";
-        case EmbeddingBagPackedSum:
-            return "EmbeddingBagPackedSum";
-        case EmbeddingBagOffsetsSum:
-            return "EmbeddingBagOffsetsSum";
-        case Gather:
-            return "Gather";
-        case GatherElements:
-            return "GatherElements";
-        case GatherND:
-            return "GatherND";
-        case OneHot:
-            return "OneHot";
-        case RegionYolo:
-            return "RegionYolo";
-        case Select:
-            return "Select";
-        case Roll:
-            return "Roll";
-        case ShuffleChannels:
-            return "ShuffleChannels";
-        case DFT:
-            return "DFT";
-        case Math:
-            return "Math";
-        case CTCLoss:
-            return "CTCLoss";
-        case Bucketize:
-            return "Bucketize";
-        case CTCGreedyDecoder:
-            return "CTCGreedyDecoder";
-        case CTCGreedyDecoderSeqLen:
-            return "CTCGreedyDecoderSeqLen";
-        case CumSum:
-            return "CumSum";
-        case DetectionOutput:
-            return "DetectionOutput";
-        case ExperimentalDetectronDetectionOutput:
-            return "ExperimentalDetectronDetectionOutput";
-        case LogSoftmax:
-            return "LogSoftmax";
-        case TopK:
-            return "TopK";
-        case GatherTree:
-            return "GatherTree";
-        case GRN:
-            return "GRN";
-        case Range:
-            return "Range";
-        case Proposal:
-            return "Proposal";
-        case ReorgYolo:
-            return "ReorgYolo";
-        case ReverseSequence:
-            return "ReverseSequence";
-        case ExperimentalDetectronTopKROIs:
-            return "ExperimentalDetectronTopKROIs";
-        case ExperimentalDetectronROIFeatureExtractor:
-            return "ExperimentalDetectronROIFeatureExtractor";
-        case ExperimentalDetectronPriorGridGenerator:
-            return "ExperimentalDetectronPriorGridGenerator";
-        case ExperimentalDetectronGenerateProposalsSingleImage:
-            return "ExperimentalDetectronGenerateProposalsSingleImage";
-        case ExtractImagePatches:
-            return "ExtractImagePatches";
-        case NonMaxSuppression:
-            return "NonMaxSuppression";
-        case MatrixNms:
-            return "MatrixNms";
-        case MulticlassNms:
-            return "MulticlassNms";
-        default:
-            return "Unknown";
-    }
-}
-
-}  //  namespace MKLDNNPlugin

 MKLDNNNode::NodesFactory & MKLDNNNode::factory() {
    static NodesFactory factoryInstance;
@ -439,7 +74,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
        : selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
          weightCache(w_cache), engine(eng), name(op->get_friendly_name()), typeStr(op->get_type_name()),
          type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) {
-    algorithm = Algorithm::Undefined;
+    algorithm = Algorithm::Default;
    fusingPort = -1;
    const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name();

--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@ -36,9 +36,6 @@ using MKLDNNNodePtr = std::shared_ptr<MKLDNNNode>;
 using MKLDNNNodeConstPtr = std::shared_ptr<const MKLDNNNode>;
 using MKLDNNNodeWeakPtr = std::weak_ptr<MKLDNNNode>;

-Type TypeFromName(const std::string & type);
-std::string NameFromType(Type type);
-
 class PortConfigurator {
 public:
    PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc, const Shape& shape,
@ -629,7 +626,7 @@ protected:

    MKLDNNWeightsSharing::Ptr weightCache;

-    Algorithm algorithm = Algorithm::Undefined;
+    Algorithm algorithm = Algorithm::Default;

    bool isInQuantizedGraph = false;

@ -744,6 +741,10 @@ private:
    void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd);
    enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
    ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);
+
+#ifdef CPU_DEBUG_CAPS
+    friend class Verbose;
+#endif
 };

 class MKLDNNNode::NodesFactory : public openvino::cc::Factory<Type,
--- a/inference-engine/src/mkldnn_plugin/perf_count.h
+++ b/inference-engine/src/mkldnn_plugin/perf_count.h
@ -5,20 +5,25 @@
 #pragma once

 #include <chrono>
+#include <ratio>

 namespace MKLDNNPlugin {

 class PerfCount {
-    uint64_t duration;
+    uint64_t total_duration;
    uint32_t num;

    std::chrono::high_resolution_clock::time_point __start = {};
    std::chrono::high_resolution_clock::time_point __finish = {};

 public:
-    PerfCount(): duration(0), num(0) {}
+    PerfCount(): total_duration(0), num(0) {}

-    uint64_t avg() { return (num == 0) ? 0 : duration / num; }
+    std::chrono::duration<double, std::milli> duration() const {
+        return __finish - __start;
+    }
+
+    uint64_t avg() const { return (num == 0) ? 0 : total_duration / num; }

 private:
    void start_itr() {
@ -27,8 +32,7 @@ private:

    void finish_itr() {
        __finish = std::chrono::high_resolution_clock::now();
-
-        duration += std::chrono::duration_cast<std::chrono::microseconds>(__finish - __start).count();
+        total_duration += std::chrono::duration_cast<std::chrono::microseconds>(__finish - __start).count();
        num++;
    }

@ -46,5 +50,5 @@ public:

 }  // namespace MKLDNNPlugin

-#define GET_PERF(_counter) std::unique_ptr<PerfHelper>(new PerfHelper(_counter->PerfCounter()))
-#define PERF(_need, _counter) auto pc = _need ? GET_PERF(_counter) : nullptr;
+#define GET_PERF(_node) std::unique_ptr<PerfHelper>(new PerfHelper(_node->PerfCounter()))
+#define PERF(_node, _need) auto pc = _need ? GET_PERF(_node) : nullptr;
--- a/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
+++ b/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
@ -24,6 +24,7 @@ public:
        readParam(blobDumpNodeType, "OV_CPU_BLOB_DUMP_NODE_TYPE");
        readParam(blobDumpNodeName, "OV_CPU_BLOB_DUMP_NODE_NAME");
        readParam(execGraphPath, "OV_CPU_EXEC_GRAPH_PATH");
+        readParam(verbose, "OV_CPU_VERBOSE");
    }

    std::string blobDumpDir;
@ -33,9 +34,10 @@ public:
    std::string blobDumpNodeType;
    std::string blobDumpNodeName;
    std::string execGraphPath;
+    std::string verbose;

 private:
-    void readParam(std::string& param, const char* envVar) {
+    static void readParam(std::string& param, const char* envVar) {
        if (const char* envValue = std::getenv(envVar))
            param = envValue;
    }
--- a/inference-engine/src/mkldnn_plugin/utils/verbose.cpp
+++ b/inference-engine/src/mkldnn_plugin/utils/verbose.cpp
@ -0,0 +1,169 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#ifdef CPU_DEBUG_CAPS
+
+#include "verbose.h"
+#include "mkldnn_node.h"
+#include "cpu_types.h"
+#include "memory_desc/cpu_memory_desc_utils.h"
+
+#include "dnnl_types.h"
+#include "dnnl_debug.h"
+
+#include <string>
+#include <cstdlib>
+#include <sstream>
+#include <iostream>
+
+namespace MKLDNNPlugin {
+
+bool Verbose::shouldBePrinted() const {
+    if (lvl < 1)
+        return false;
+
+    if (node->isConstant() ||
+        node->getType() == Input || node->getType() == Output)
+        return false;
+    return true;
+}
+/**
+ * Print node verbose execution information to cout.
+ * Similiar to DNNL_VERBOSE output
+ * Formating written in C using oneDNN format functions.
+ * Can be rewritten in pure C++ if necessary
+ */
+void Verbose::printInfo() {
+    /* 1,  2,  3,  etc -> no color
+     * 11, 22, 33, etc -> colorize */
+    bool colorUp = lvl / 10 > 0 ? true : false;
+
+    enum Color {
+        RED,
+        GREEN,
+        YELLOW,
+        BLUE,
+        PURPLE,
+        CYAN
+    };
+
+    auto colorize = [&](const Color color, const std::string& str) {
+        if (!colorUp)
+            return str;
+
+        const std::string     red("\033[1;31m");
+        const std::string   green("\033[1;32m");
+        const std::string  yellow("\033[1;33m");
+        const std::string    blue("\033[1;34m");
+        const std::string  purple("\033[1;35m");
+        const std::string    cyan("\033[1;36m");
+        const std::string   reset("\033[0m");
+        std::string colorCode;
+
+        switch (color) {
+        case RED:    colorCode = red;
+            break;
+        case GREEN:  colorCode = green;
+            break;
+        case YELLOW: colorCode = yellow;
+            break;
+        case BLUE:   colorCode = blue;
+            break;
+        case PURPLE: colorCode = purple;
+            break;
+        case CYAN:   colorCode = cyan;
+            break;
+        default:     colorCode = reset;
+            break;
+        }
+
+        return colorCode + str + reset;
+    };
+
+    // can be increased if necessary
+    const int CPU_VERBOSE_DAT_LEN = 512;
+    char portsInfo[CPU_VERBOSE_DAT_LEN] = {'\0'};
+    int written = 0;
+    int written_total = 0;
+
+    auto shift = [&](int size) {
+        if (written < 0 || written_total + size > CPU_VERBOSE_DAT_LEN) {
+            const char* errorMsg = "# NOT ENOUGHT BUFFER SIZE #";
+            snprintf(portsInfo, strlen(errorMsg) + 1, "%s", errorMsg);
+            written_total = strlen(errorMsg);
+            return;
+        }
+
+        written_total += size;
+    };
+
+    auto formatMemDesc = [&](const dnnl_memory_desc_t& desc, std::string& prefix) {
+        prefix = colorize(BLUE, prefix);
+        written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, " ");
+        shift(written);
+        written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, "%s", prefix.c_str());
+        shift(written);
+        written = dnnl_md2fmt_str(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, &desc);
+        shift(written);
+        written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, ":");
+        shift(written);
+        written = dnnl_md2dim_str(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, &desc);
+        shift(written);
+    };
+
+    for (int i = 0; i < node->getParentEdges().size(); i++) {
+        std::string prefix("src:" + std::to_string(i) + ':');
+        formatMemDesc(MemoryDescUtils::convertToDnnlMemoryDesc(
+                          node->getParentEdgeAt(i)->getMemory().getDesc().clone())->getDnnlDesc().data,
+                      prefix);
+    }
+
+    for (int i = 0; i < node->getChildEdges().size(); i++) {
+        std::string prefix("dst:" + std::to_string(i) + ':');
+        formatMemDesc(MemoryDescUtils::convertToDnnlMemoryDesc(
+                          node->getChildEdgeAt(i)->getMemory().getDesc().clone())->getDnnlDesc().data,
+                      prefix);
+    }
+
+    std::string post_ops;
+    if (!node->getFusedWith().empty()) {
+        post_ops += "post_ops:'";
+        for (const auto& fusedNode : node->getFusedWith()) {
+            post_ops.append(colorize(GREEN, fusedNode->getName())).append(":")
+                .append(colorize(CYAN, NameFromType(fusedNode->getType()))).append(":")
+                .append(algToString(fusedNode->getAlgorithm()))
+                .append(";");
+        }
+        post_ops += "'";
+    }
+
+    std::string nodeImplementer = "cpu";
+    if (node->prim)
+        nodeImplementer = "dnnl"; // oneDNN
+    else if (node->getType() == Reference)
+        nodeImplementer = "ngraph_ref"; // ngraph reference
+
+    const std::string& nodeName = colorize(GREEN, node->getName());
+    const std::string& nodeType = colorize(CYAN, NameFromType(node->getType()));
+    const std::string& nodeAlg  = algToString(node->getAlgorithm());
+    const std::string& nodePrimImplType =  impl_type_to_string(node->getSelectedPrimitiveDescriptor()->getImplementationType());
+
+    stream << "ov_cpu_verbose" << ','
+           << "exec" << ','
+           << nodeImplementer << ','
+           << nodeName << ":" << nodeType << ":" << nodeAlg << ','
+           << nodePrimImplType << ','
+           << portsInfo << ','
+           << post_ops << ',';
+}
+
+void Verbose::printDuration() {
+    const auto& duration = node->PerfCounter().duration().count();
+    stream << duration << "ms";
+}
+
+void Verbose::flush() const {
+    std::cout << stream.rdbuf() << "\n";
+}
+} // namespace MKLDNNPlugin
+#endif // CPU_DEBUG_CAPS
--- a/inference-engine/src/mkldnn_plugin/utils/verbose.h
+++ b/inference-engine/src/mkldnn_plugin/utils/verbose.h
@ -0,0 +1,46 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#ifdef CPU_DEBUG_CAPS
+
+#include "mkldnn_node.h"
+
+#include <string>
+#include <cstdlib>
+#include <sstream>
+
+namespace MKLDNNPlugin {
+
+class Verbose {
+public:
+    Verbose(const MKLDNNNodePtr& _node, const std::string& _lvl)
+        : node(_node), lvl(atoi(_lvl.c_str())) {
+        if (!shouldBePrinted())
+            return;
+        printInfo();
+    }
+    virtual ~Verbose() {
+        if (!shouldBePrinted())
+            return;
+
+        printDuration();
+        flush();
+    }
+private:
+    const MKLDNNNodePtr& node;
+    const int lvl;
+    std::stringstream stream;
+
+    bool shouldBePrinted() const;
+    void printInfo();
+    void printDuration();
+    void flush() const;
+};
+
+#define VERBOSE(...) Verbose(__VA_ARGS__)
+} // namespace MKLDNNPlugin
+#else
+#define VERBOSE(...)
+#endif // CPU_DEBUG_CAPS
--- a/inference-engine/src/plugin_api/caseless.hpp
+++ b/inference-engine/src/plugin_api/caseless.hpp
@ -12,6 +12,7 @@
 #include <algorithm>
 #include <cctype>
 #include <functional>
+#include <iterator>
 #include <map>
 #include <set>
 #include <unordered_map>