[CPU] Add OV_CPU_VERBOSE env var to print node exec info to cout (#6390)

2021-09-26 22:17:57 +03:00 · 2021-09-26 22:17:57 +03:00 · c92988c8e9
commit c92988c8e9
parent b968c7b813
17 changed files with 823 additions and 416 deletions
--- a/inference-engine/src/mkldnn_plugin/cpu_types.cpp
+++ b/inference-engine/src/mkldnn_plugin/cpu_types.cpp
@ -0,0 +1,459 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "cpu_types.h"
 #include <vector>
 #include <string>
 namespace MKLDNNPlugin {
 using Dim = std::size_t;
 using VectorDims = std::vector<Dim>;
 const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
        { "Constant", Input },
        { "Parameter", Input },
        { "Result", Output },
        { "Convolution", Convolution },
        { "GroupConvolution", Convolution },
        { "MatMul", MatMul },
        { "FullyConnected", FullyConnected },
        { "MaxPool", Pooling },
        { "AvgPool", Pooling },
        { "AdaptiveMaxPool", AdaptivePooling},
        { "AdaptiveAvgPool", AdaptivePooling},
        { "Add", Eltwise },
        { "Subtract", Eltwise },
        { "Multiply", Eltwise },
        { "Divide", Eltwise },
        { "SquaredDifference", Eltwise },
        { "Maximum", Eltwise },
        { "Minimum", Eltwise },
        { "Mod", Eltwise },
        { "FloorMod", Eltwise },
        { "Power", Eltwise },
        { "PowerStatic", Eltwise },
        { "Equal", Eltwise },
        { "NotEqual", Eltwise },
        { "Greater", Eltwise },
        { "GreaterEqual", Eltwise },
        { "Less", Eltwise },
        { "LessEqual", Eltwise },
        { "LogicalAnd", Eltwise },
        { "LogicalOr", Eltwise },
        { "LogicalXor", Eltwise },
        { "LogicalNot", Eltwise },
        { "Relu", Eltwise },
        { "LeakyRelu", Eltwise },
        { "Gelu", Eltwise },
        { "Elu", Eltwise },
        { "Tanh", Eltwise },
        { "Sigmoid", Eltwise },
        { "Abs", Eltwise },
        { "Sqrt", Eltwise },
        { "Clamp", Eltwise },
        { "Exp", Eltwise },
        { "SwishCPU", Eltwise },
        { "HSwish", Eltwise },
        { "Mish", Eltwise },
        { "HSigmoid", Eltwise },
        { "Round", Eltwise },
        { "PRelu", Eltwise },
        { "Erf", Eltwise },
        { "SoftPlus", Eltwise },
        { "Reshape", Reshape },
        { "Squeeze", Reshape },
        { "Unsqueeze", Reshape },
        { "Softmax", Softmax },
        { "Reorder", Reorder },
        { "BatchToSpace", BatchToSpace },
        { "SpaceToBatch", SpaceToBatch },
        { "DepthToSpace", DepthToSpace },
        { "SpaceToDepth", SpaceToDepth },
        { "Roll", Roll },
        { "LRN", Lrn },
        { "Split", Split },
        { "VariadicSplit", Split },
        { "Concat", Concatenation },
        { "ConvolutionBackpropData", Deconvolution },
        { "GroupConvolutionBackpropData", Deconvolution },
        { "StridedSlice", StridedSlice },
        { "Tile", Tile },
        { "ROIAlign", ROIAlign },
        { "ROIPooling", ROIPooling },
        { "PSROIPooling", PSROIPooling },
        { "DeformablePSROIPooling", PSROIPooling },
        { "Pad", Pad },
        { "Transpose", Transpose },
        { "LSTMCell", RNNCell },
        { "GRUCell", RNNCell },
        { "RNNCell", RNNCell },
        { "LSTMSequence", RNNSeq },
        { "GRUSequence", RNNSeq },
        { "RNNSequence", RNNSeq },
        { "FakeQuantize", FakeQuantize },
        { "BinaryConvolution", BinaryConvolution },
        { "DeformableConvolution", DeformableConvolution },
        { "TensorIterator", TensorIterator },
        { "Loop", TensorIterator },
        { "ReadValue", MemoryInput},  // for construction from name ctor, arbitrary name is used
        { "Assign", MemoryOutput },  // for construction from layer ctor
        { "Convert", Convert },
        { "MVN", MVN},
        { "NormalizeL2", NormalizeL2},
        { "ScatterUpdate", ScatterUpdate},
        { "ScatterElementsUpdate", ScatterElementsUpdate},
        { "ScatterNDUpdate", ScatterNDUpdate},
        { "Interpolate", Interpolate},
        { "ReduceL1", Reduce},
        { "ReduceL2", Reduce},
        { "ReduceLogicalAnd", Reduce},
        { "ReduceLogicalOr", Reduce},
        { "ReduceMax", Reduce},
        { "ReduceMean", Reduce},
        { "ReduceMin", Reduce},
        { "ReduceProd", Reduce},
        { "ReduceSum", Reduce},
        { "ReduceLogSum", Reduce},
        { "ReduceLogSumExp", Reduce},
        { "ReduceSumSquare", Reduce},
        { "Broadcast", Broadcast},
        { "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
        { "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
        { "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
        { "Gather", Gather},
        { "GatherElements", GatherElements},
        { "GatherND", GatherND},
        { "OneHot", OneHot},
        { "RegionYolo", RegionYolo},
        { "Select", Select},
        { "ShuffleChannels", ShuffleChannels},
        { "DFT", DFT},
        { "IDFT", DFT},
        { "Abs", Math},
        { "Acos", Math},
        { "Acosh", Math},
        { "Asin", Math},
        { "Asinh", Math},
        { "Atan", Math},
        { "Atanh", Math},
        { "Ceil", Math},
        { "Ceiling", Math},
        { "Cos", Math},
        { "Cosh", Math},
        { "Floor", Math},
        { "HardSigmoid", Math},
        { "Log", Math},
        { "Neg", Math},
        { "Reciprocal", Math},
        { "Selu", Math},
        { "Sign", Math},
        { "Sin", Math},
        { "Sinh", Math},
        { "SoftPlus", Math},
        { "Softsign", Math},
        { "Tan", Math},
        { "CTCLoss", CTCLoss},
        { "Bucketize", Bucketize},
        { "CTCGreedyDecoder", CTCGreedyDecoder},
        { "CTCGreedyDecoderSeqLen", CTCGreedyDecoderSeqLen},
        { "CumSum", CumSum},
        { "DetectionOutput", DetectionOutput},
        { "ExperimentalDetectronDetectionOutput", ExperimentalDetectronDetectionOutput},
        { "LogSoftmax", LogSoftmax},
        { "TopK", TopK},
        { "GatherTree", GatherTree},
        { "GRN", GRN},
        { "Range", Range},
        { "Proposal", Proposal},
        { "ReorgYolo", ReorgYolo},
        { "ReverseSequence", ReverseSequence},
        { "ExperimentalDetectronTopKROIs", ExperimentalDetectronTopKROIs},
        { "ExperimentalDetectronROIFeatureExtractor", ExperimentalDetectronROIFeatureExtractor},
        { "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator},
        { "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage},
        { "ExtractImagePatches", ExtractImagePatches},
        { "NonMaxSuppression", NonMaxSuppression},
        { "NonMaxSuppressionIEInternal", NonMaxSuppression},
        { "MatrixNms", MatrixNms},
        { "MulticlassNms", MulticlassNms}
 };
 Type TypeFromName(const std::string& type) {
    auto itType = type_to_name_tbl.find(type);
    if (type_to_name_tbl.end() != itType) {
        return itType->second;
    } else {
        return Unknown;
    }
 }
 std::string NameFromType(const Type type) {
    switch (type) {
        case Generic:
            return "Generic";
        case Reorder:
            return "Reorder";
        case Input:
            return "Input";
        case Output:
            return "Output";
        case Convolution:
            return "Convolution";
        case Deconvolution:
            return "Deconvolution";
        case Lrn:
            return "Lrn";
        case Pooling:
            return "Pooling";
        case AdaptivePooling:
            return "AdaptivePooling";
        case FullyConnected:
            return "FullyConnected";
        case MatMul:
            return "MatMul";
        case Softmax:
            return "Softmax";
        case Split:
            return "Split";
        case Concatenation:
            return "Concatenation";
        case StridedSlice:
            return "StridedSlice";
        case Reshape:
            return "Reshape";
        case Tile:
            return "Tile";
        case ROIAlign:
            return "ROIAlign";
        case ROIPooling:
            return "ROIPooling";
        case PSROIPooling:
            return "PSROIPooling";
        case DepthToSpace:
            return "DepthToSpace";
        case BatchToSpace:
            return "BatchToSpace";
        case Pad:
            return "Pad";
        case Transpose:
            return "Transpose";
        case SpaceToDepth:
            return "SpaceToDepth";
        case SpaceToBatch:
            return "SpaceToBatch";
        case MemoryOutput:
            return "MemoryOutput";
        case MemoryInput:
            return "MemoryInput";
        case RNNSeq:
            return "RNNSeq";
        case RNNCell:
            return "RNNCell";
        case Eltwise:
            return "Eltwise";
        case FakeQuantize:
            return "FakeQuantize";
        case BinaryConvolution:
            return "BinaryConvolution";
        case DeformableConvolution:
            return "DeformableConvolution";
        case MVN:
            return "MVN";
        case TensorIterator:
            return "TensorIterator";
        case Convert:
            return "Convert";
        case NormalizeL2:
            return "NormalizeL2";
        case ScatterUpdate:
            return "ScatterUpdate";
        case ScatterElementsUpdate:
            return "ScatterElementsUpdate";
        case ScatterNDUpdate:
            return "ScatterNDUpdate";
        case Interpolate:
            return "Interpolate";
        case Reduce:
            return "Reduce";
        case Broadcast:
            return "Broadcast";
        case EmbeddingSegmentsSum:
            return "EmbeddingSegmentsSum";
        case EmbeddingBagPackedSum:
            return "EmbeddingBagPackedSum";
        case EmbeddingBagOffsetsSum:
            return "EmbeddingBagOffsetsSum";
        case Gather:
            return "Gather";
        case GatherElements:
            return "GatherElements";
        case GatherND:
            return "GatherND";
        case OneHot:
            return "OneHot";
        case RegionYolo:
            return "RegionYolo";
        case Select:
            return "Select";
        case Roll:
            return "Roll";
        case ShuffleChannels:
            return "ShuffleChannels";
        case DFT:
            return "DFT";
        case Math:
            return "Math";
        case CTCLoss:
            return "CTCLoss";
        case Bucketize:
            return "Bucketize";
        case CTCGreedyDecoder:
            return "CTCGreedyDecoder";
        case CTCGreedyDecoderSeqLen:
            return "CTCGreedyDecoderSeqLen";
        case CumSum:
            return "CumSum";
        case DetectionOutput:
            return "DetectionOutput";
        case ExperimentalDetectronDetectionOutput:
            return "ExperimentalDetectronDetectionOutput";
        case LogSoftmax:
            return "LogSoftmax";
        case TopK:
            return "TopK";
        case GatherTree:
            return "GatherTree";
        case GRN:
            return "GRN";
        case Range:
            return "Range";
        case Proposal:
            return "Proposal";
        case ReorgYolo:
            return "ReorgYolo";
        case ReverseSequence:
            return "ReverseSequence";
        case ExperimentalDetectronTopKROIs:
            return "ExperimentalDetectronTopKROIs";
        case ExperimentalDetectronROIFeatureExtractor:
            return "ExperimentalDetectronROIFeatureExtractor";
        case ExperimentalDetectronPriorGridGenerator:
            return "ExperimentalDetectronPriorGridGenerator";
        case ExperimentalDetectronGenerateProposalsSingleImage:
            return "ExperimentalDetectronGenerateProposalsSingleImage";
        case ExtractImagePatches:
            return "ExtractImagePatches";
        case NonMaxSuppression:
            return "NonMaxSuppression";
        case MatrixNms:
            return "MatrixNms";
        case MulticlassNms:
            return "MulticlassNms";
        default:
            return "Unknown";
    }
 }
 std::string algToString(const Algorithm alg) {
 #define CASE(_alg) do {                     \
    if (alg == _alg) return #_alg;          \
 } while (0)
    CASE(Default);
    CASE(PoolingMax);
    CASE(PoolingAvg);
    CASE(ConvolutionCommon);
    CASE(ConvolutionGrouped);
    CASE(DeconvolutionCommon);
    CASE(DeconvolutionGrouped);
    CASE(EltwiseAdd);
    CASE(EltwiseMultiply);
    CASE(EltwiseSubtract);
    CASE(EltwiseDivide);
    CASE(EltwiseFloorMod);
    CASE(EltwiseMod);
    CASE(EltwiseMaximum);
    CASE(EltwiseMinimum);
    CASE(EltwiseSquaredDifference);
    CASE(EltwisePowerDynamic);
    CASE(EltwisePowerStatic);
    CASE(EltwiseMulAdd);
    CASE(EltwiseEqual);
    CASE(EltwiseNotEqual);
    CASE(EltwiseGreater);
    CASE(EltwiseGreaterEqual);
    CASE(EltwiseLess);
    CASE(EltwiseLessEqual);
    CASE(EltwiseLogicalAnd);
    CASE(EltwiseLogicalOr);
    CASE(EltwiseLogicalXor);
    CASE(EltwiseLogicalNot);
    CASE(EltwiseRelu);
    CASE(EltwiseGelu);
    CASE(EltwiseElu);
    CASE(EltwiseTanh);
    CASE(EltwiseSigmoid);
    CASE(EltwiseAbs);
    CASE(EltwiseSqrt);
    CASE(EltwiseSoftRelu);
    CASE(EltwiseExp);
    CASE(EltwiseClamp);
    CASE(EltwiseSwish);
    CASE(EltwisePrelu);
    CASE(EltwiseMish);
    CASE(EltwiseHswish);
    CASE(EltwiseHsigmoid);
    CASE(EltwiseRoundHalfToEven);
    CASE(EltwiseRoundHalfAwayFromZero);
    CASE(EltwiseErf);
    CASE(FQCommon);
    CASE(FQQuantization);
    CASE(FQBinarization);
    CASE(ROIPoolingMax);
    CASE(ROIPoolingBilinear);
    CASE(ROIAlignMax);
    CASE(ROIAlignAvg);
    CASE(PSROIPoolingAverage);
    CASE(PSROIPoolingBilinear);
    CASE(PSROIPoolingBilinearDeformable);
    CASE(ReduceL1);
    CASE(ReduceL2);
    CASE(ReduceAnd);
    CASE(ReduceOr);
    CASE(ReduceMax);
    CASE(ReduceMean);
    CASE(ReduceMin);
    CASE(ReduceProd);
    CASE(ReduceSum);
    CASE(ReduceLogSum);
    CASE(ReduceLogSumExp);
    CASE(ReduceSumSquare);
    CASE(MathAbs);
    CASE(MathAcos);
    CASE(MathAcosh);
    CASE(MathAsin);
    CASE(MathAsinh);
    CASE(MathAtan);
    CASE(MathAtanh);
    CASE(MathCeiling);
    CASE(MathCos);
    CASE(MathCosh);
    CASE(MathErf);
    CASE(MathFloor);
    CASE(MathHardSigmoid);
    CASE(MathLog);
    CASE(MathNegative);
    CASE(MathReciprocal);
    CASE(MathSelu);
    CASE(MathSign);
    CASE(MathSin);
    CASE(MathSinh);
    CASE(MathSoftPlus);
    CASE(MathSoftsign);
    CASE(MathTan);
 #undef CASE
    return "Undefined";
 }
 } // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/cpu_types.h
+++ b/inference-engine/src/mkldnn_plugin/cpu_types.h
@ -4,7 +4,10 @@
 #pragma once
 #include "caseless.hpp"
 #include <vector>
 #include <string>
 namespace MKLDNNPlugin {
@ -97,7 +100,7 @@ enum Type {
 };
 enum Algorithm {
-    Undefined,
+    Default,
    // Pooling algorithms
    PoolingMax,
@ -215,4 +218,11 @@ enum Algorithm {
    MathTan
 };
 extern const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl;
 Type TypeFromName(const std::string& type);
 std::string NameFromType(const Type type);
 std::string algToString(const Algorithm alg);
 } // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/docs/README.md
+++ b/inference-engine/src/mkldnn_plugin/docs/README.md
@ -0,0 +1,8 @@
 # Debug capabilities
 Use the following cmake option to enable debug capabilities:
 `-DENABLE_DEBUG_CAPS=ON`
 * [Verbose mode](verbose.md)
 * [Blob dumping](blob_dumping.md)
 * [Graph serialization](graph_serialization.md)
--- a/inference-engine/src/mkldnn_plugin/docs/blob_dumping.md
+++ b/inference-engine/src/mkldnn_plugin/docs/blob_dumping.md
@ -1,9 +1,4 @@
-# Debug capabilities
+# Blob dumping
 Use the following cmake option to enable debug capabilities:
 `-DENABLE_CPU_DEBUG_CAPS=ON`
 ## Blob dumping
 Blob dumping is controlled by environment variables (filters).
 The variables define conditions of the node which input and output blobs
@ -24,12 +19,12 @@ or for shell session (bash example):
    export OV_CPU_BLOB_DUMP_NODE_PORTS=OUT
    binary ...
 ```
-### Specify dump directory
+## Specify dump directory
 ```sh
    OV_CPU_BLOB_DUMP_DIR=<directory-name> binary ...
 ```
 Default is *mkldnn_dump*
-### Specify dump format
+## Specify dump format
 ```sh
    OV_CPU_BLOB_DUMP_FORMAT=<format> binary ...
 ```
@ -37,7 +32,7 @@ Options are:
 * BIN (default)
 * TEXT
-### Filter input / output blobs
+## Filter input / output blobs
 To dump only input / output blobs:
 ```sh
    OV_CPU_BLOB_DUMP_NODE_PORTS='<ports_kind>' binary ...
@ -51,7 +46,7 @@ Options are:
 * OUT
 * ALL
-### Filter by execution ID
+## Filter by execution ID
 To dump blobs only for nodes with specified execution IDs:
 ```sh
    OV_CPU_BLOB_DUMP_NODE_EXEC_ID='<space_separated_list_of_ids>' binary ...
@ -61,7 +56,7 @@ Example:
    OV_CPU_BLOB_DUMP_NODE_EXEC_ID='1 12 45' binary ...
 ```
-### Filter by type
+## Filter by type
 To dump blobs only for nodes with specified types:
 ```sh
    OV_CPU_BLOB_DUMP_NODE_TYPE=<space_separated_list_of_types> binary ...
@ -73,7 +68,7 @@ Example:
 > **NOTE**: see **enum Type** in [mkldnn_node.h](../mkldnn_node.h) for list of the types
-### Filter by name
+## Filter by name
 To dump blobs only for nodes with name matching specified regex:
 ```sh
    OV_CPU_BLOB_DUMP_NODE_NAME=<regex> binary ...
@ -83,7 +78,7 @@ Example:
    OV_CPU_BLOB_DUMP_NODE_NAME=".+Fused_Add.+" binary ...
 ```
-### Dump all the blobs
+## Dump all the blobs
 ```sh
    OV_CPU_BLOB_DUMP_NODE_NAME="*" binary ...
 ```
@ -95,22 +90,3 @@ Example:
 ```sh
    OV_CPU_BLOB_DUMP_NODE_PORTS=ALL binary ...
 ```
 ## Graph serialization
 The functionality allows to serialize execution graph using environment variable:
 ```sh
    OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
 ```
 Possible serialization options:
 * cout
    Serialize to console output
 * \<path\>.xml
    Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
 * \<path\>.dot
    TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
--- a/inference-engine/src/mkldnn_plugin/docs/graph_serialization.md
+++ b/inference-engine/src/mkldnn_plugin/docs/graph_serialization.md
@ -0,0 +1,17 @@
 # Graph serialization
 The functionality allows to serialize execution graph using environment variable:
 ```sh
    OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
 ```
 Possible serialization options:
 * cout
    Serialize to console output
 * \<path\>.xml
    Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
 * \<path\>.dot
    TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
--- a/inference-engine/src/mkldnn_plugin/docs/verbose.md
+++ b/inference-engine/src/mkldnn_plugin/docs/verbose.md
@ -0,0 +1,38 @@
 # Verbose mode
 It is possible to enable tracing execution of plugin nodes to cout and collect statistics, such as:
  - node implementer:
    * cpu (CPU plugin)
    * dnnl (oneDNN library)
    * ngraph_ref (ngraph reference fallback)
  - node name
  - node type
  - node algorithm
  - node primitive info
  - input / output ports info
  - fused nodes
  - execution time
  - etc
 Format:
 ```sh
    ov_cpu_verbose,exec,<node_implemeter>,\
    <node_name>:<node_type>:<node_alg>,<impl_type>,\
    src:<port_id>:<precision>::<type>:<format>:f0:<shape> ...,\
    dst:<port_id>:<precision>::<type>:<format>:f0:<shape> ...,\
    post_ops:'<node_name>:<node_type>:<node_alg>;...;',\
    <execution_time>
 ```
 To turn on verbose mode the following environment variable should be used:
 ```sh
    OV_CPU_VERBOSE=<level> binary ...
 ```
 Currently verbose mode has only one level, any digit can be used for activation.
 To have colored verbose output just duplicate level's digit, for example:
 ```sh
    OV_CPU_VERBOSE=11 binary ...
 ```
 **NOTE:** Shell color codes are used
--- a/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.h
+++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.h
@ -4,9 +4,11 @@
 #pragma once
 #include "mkldnn/ie_mkldnn.h"
 #include "cpu_types.h"
 #include <ie_layouts.h>
 #include <ie_blob.h>
 #include "mkldnn/ie_mkldnn.h"
 namespace MKLDNNPlugin {
--- a/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp
@ -50,3 +50,38 @@ impl_desc_type MKLDNNPlugin::parse_impl_name(std::string impl_desc_name) {
    return res;
 }
 const char* MKLDNNPlugin::impl_type_to_string(impl_desc_type type) {
 #define CASE(_type) do {                    \
    if (type == _type) return #_type;       \
 } while (0)
    CASE(unknown);
    CASE(undef);
    CASE(ref_any);
    CASE(reorder);
    CASE(gemm_any);
    CASE(gemm_blas);
    CASE(gemm_avx512);
    CASE(gemm_avx2);
    CASE(gemm_avx);
    CASE(gemm_sse42);
    CASE(jit_gemm);
    CASE(jit_avx512_winograd);
    CASE(jit_avx512);
    CASE(jit_avx2);
    CASE(jit_avx);
    CASE(jit_sse42);
    CASE(jit_uni);
    CASE(jit_avx512_1x1);
    CASE(jit_avx2_1x1);
    CASE(jit_avx_1x1);
    CASE(jit_sse42_1x1);
    CASE(jit_uni_1x1);
    CASE(jit_avx512_dw);
    CASE(jit_avx2_dw);
    CASE(jit_avx_dw);
    CASE(jit_sse42_dw);
    CASE(jit_uni_dw);
 #undef CASE
    return "unknown";
 }
--- a/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.h
@ -63,6 +63,7 @@ enum impl_desc_type {
    jit_uni_dw          = jit  | uni    | _dw,
 };
 const char * impl_type_to_string(impl_desc_type type);
 impl_desc_type parse_impl_name(std::string impl_desc_name);
 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@ -39,6 +39,7 @@
 #include "utils/node_dumper.h"
 #include "utils/ngraph_utils.hpp"
 #include "utils/cpu_utils.hpp"
 #include "utils/verbose.h"
 #include "memory_desc/cpu_memory_desc_utils.h"
 #include <ngraph/node.hpp>
@ -828,7 +829,9 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
    mkldnn::stream stream(eng);
    for (const auto& node : executableGraphNodes) {
-        PERF(config.collectPerfCounters, node);
+        VERBOSE(node, config.debugCaps.verbose);
        PERF(node, config.collectPerfCounters);
        if (request)
            request->ThrowIfCanceled();
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@ -3,6 +3,7 @@
 //
 #include "mkldnn_node.h"
 #include "dnnl_debug.h"
 #include "mkldnn_extension_mngr.h"
 #include "mkldnn_itt.h"
@ -43,14 +44,14 @@
 #include <nodes/mkldnn_shuffle_channels_node.h>
 #include <nodes/mkldnn_reference_node.h>
 #include <nodes/mkldnn_fake_quantize_node.h>
 #include <mkldnn_types.h>
 #include <dnnl_types.h>
 #include "mkldnn_extension_utils.h"
 #include "mkldnn/iml_type_mapper.h"
 #include "nodes/common/cpu_memcpy.h"
 #include "mkldnn_debug.h"
 #include "utils/rt_info/memory_formats_attribute.hpp"
 #include <dnnl_types.h>
 #include <ie_ngraph_utils.hpp>
 #include "utils/general_utils.h"
 #include "utils/cpu_utils.hpp"
@ -63,372 +64,6 @@ using namespace MKLDNNPlugin;
 using namespace openvino;
 using namespace InferenceEngine::details;
 namespace MKLDNNPlugin {
 static const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
        { "Constant", Input },
        { "Parameter", Input },
        { "Result", Output },
        { "Convolution", Convolution },
        { "GroupConvolution", Convolution },
        { "MatMul", MatMul },
        { "FullyConnected", FullyConnected },
        { "MaxPool", Pooling },
        { "AvgPool", Pooling },
        { "AdaptiveMaxPool", AdaptivePooling},
        { "AdaptiveAvgPool", AdaptivePooling},
        { "Add", Eltwise },
        { "Subtract", Eltwise },
        { "Multiply", Eltwise },
        { "Divide", Eltwise },
        { "SquaredDifference", Eltwise },
        { "Maximum", Eltwise },
        { "Minimum", Eltwise },
        { "Mod", Eltwise },
        { "FloorMod", Eltwise },
        { "Power", Eltwise },
        { "PowerStatic", Eltwise },
        { "Equal", Eltwise },
        { "NotEqual", Eltwise },
        { "Greater", Eltwise },
        { "GreaterEqual", Eltwise },
        { "Less", Eltwise },
        { "LessEqual", Eltwise },
        { "LogicalAnd", Eltwise },
        { "LogicalOr", Eltwise },
        { "LogicalXor", Eltwise },
        { "LogicalNot", Eltwise },
        { "Relu", Eltwise },
        { "LeakyRelu", Eltwise },
        { "Gelu", Eltwise },
        { "Elu", Eltwise },
        { "Tanh", Eltwise },
        { "Sigmoid", Eltwise },
        { "Abs", Eltwise },
        { "Sqrt", Eltwise },
        { "Clamp", Eltwise },
        { "Exp", Eltwise },
        { "SwishCPU", Eltwise },
        { "HSwish", Eltwise },
        { "Mish", Eltwise },
        { "HSigmoid", Eltwise },
        { "Round", Eltwise },
        { "PRelu", Eltwise },
        { "Erf", Eltwise },
        { "SoftPlus", Eltwise },
        { "Reshape", Reshape },
        { "Squeeze", Reshape },
        { "Unsqueeze", Reshape },
        { "Softmax", Softmax },
        { "Reorder", Reorder },
        { "BatchToSpace", BatchToSpace },
        { "SpaceToBatch", SpaceToBatch },
        { "DepthToSpace", DepthToSpace },
        { "SpaceToDepth", SpaceToDepth },
        { "Roll", Roll },
        { "LRN", Lrn },
        { "Split", Split },
        { "VariadicSplit", Split },
        { "Concat", Concatenation },
        { "ConvolutionBackpropData", Deconvolution },
        { "GroupConvolutionBackpropData", Deconvolution },
        { "StridedSlice", StridedSlice },
        { "Tile", Tile },
        { "ROIAlign", ROIAlign },
        { "ROIPooling", ROIPooling },
        { "PSROIPooling", PSROIPooling },
        { "DeformablePSROIPooling", PSROIPooling },
        { "Pad", Pad },
        { "Transpose", Transpose },
        { "LSTMCell", RNNCell },
        { "GRUCell", RNNCell },
        { "RNNCell", RNNCell },
        { "LSTMSequence", RNNSeq },
        { "GRUSequence", RNNSeq },
        { "RNNSequence", RNNSeq },
        { "FakeQuantize", FakeQuantize },
        { "BinaryConvolution", BinaryConvolution },
        { "DeformableConvolution", DeformableConvolution },
        { "TensorIterator", TensorIterator },
        { "Loop", TensorIterator },
        { "ReadValue", MemoryInput},  // for construction from name ctor, arbitrary name is used
        { "Assign", MemoryOutput },  // for construction from layer ctor
        { "Convert", Convert },
        { "MVN", MVN},
        { "NormalizeL2", NormalizeL2},
        { "ScatterUpdate", ScatterUpdate},
        { "ScatterElementsUpdate", ScatterElementsUpdate},
        { "ScatterNDUpdate", ScatterNDUpdate},
        { "Interpolate", Interpolate},
        { "ReduceL1", Reduce},
        { "ReduceL2", Reduce},
        { "ReduceLogicalAnd", Reduce},
        { "ReduceLogicalOr", Reduce},
        { "ReduceMax", Reduce},
        { "ReduceMean", Reduce},
        { "ReduceMin", Reduce},
        { "ReduceProd", Reduce},
        { "ReduceSum", Reduce},
        { "ReduceLogSum", Reduce},
        { "ReduceLogSumExp", Reduce},
        { "ReduceSumSquare", Reduce},
        { "Broadcast", Broadcast},
        { "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
        { "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
        { "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
        { "Gather", Gather},
        { "GatherElements", GatherElements},
        { "GatherND", GatherND},
        { "OneHot", OneHot},
        { "RegionYolo", RegionYolo},
        { "Select", Select},
        { "ShuffleChannels", ShuffleChannels},
        { "DFT", DFT},
        { "IDFT", DFT},
        { "Abs", Math},
        { "Acos", Math},
        { "Acosh", Math},
        { "Asin", Math},
        { "Asinh", Math},
        { "Atan", Math},
        { "Atanh", Math},
        { "Ceil", Math},
        { "Ceiling", Math},
        { "Cos", Math},
        { "Cosh", Math},
        { "Floor", Math},
        { "HardSigmoid", Math},
        { "Log", Math},
        { "Neg", Math},
        { "Reciprocal", Math},
        { "Selu", Math},
        { "Sign", Math},
        { "Sin", Math},
        { "Sinh", Math},
        { "SoftPlus", Math},
        { "Softsign", Math},
        { "Tan", Math},
        { "CTCLoss", CTCLoss},
        { "Bucketize", Bucketize},
        { "CTCGreedyDecoder", CTCGreedyDecoder},
        { "CTCGreedyDecoderSeqLen", CTCGreedyDecoderSeqLen},
        { "CumSum", CumSum},
        { "DetectionOutput", DetectionOutput},
        { "ExperimentalDetectronDetectionOutput", ExperimentalDetectronDetectionOutput},
        { "LogSoftmax", LogSoftmax},
        { "TopK", TopK},
        { "GatherTree", GatherTree},
        { "GRN", GRN},
        { "Range", Range},
        { "Proposal", Proposal},
        { "ReorgYolo", ReorgYolo},
        { "ReverseSequence", ReverseSequence},
        { "ExperimentalDetectronTopKROIs", ExperimentalDetectronTopKROIs},
        { "ExperimentalDetectronROIFeatureExtractor", ExperimentalDetectronROIFeatureExtractor},
        { "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator},
        { "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage},
        { "ExtractImagePatches", ExtractImagePatches},
        { "NonMaxSuppression", NonMaxSuppression},
        { "NonMaxSuppressionIEInternal", NonMaxSuppression},
        { "MatrixNms", MatrixNms},
        { "MulticlassNms", MulticlassNms}
 };
 Type TypeFromName(const std::string & type) {
    auto itType = type_to_name_tbl.find(type);
    if (type_to_name_tbl.end() != itType) {
        return itType->second;
    }
    return Unknown;
 }
 template<>
 DnnlMemoryDescPtr MKLDNNNode::getInputMemDescAtPort<DnnlMemoryDesc, 0, 0>(size_t portNum) const {
    return MemoryDescUtils::convertToDnnlMemoryDesc(getBaseMemDescAtInputPort(portNum));
 }
 template<>
 BlockedMemoryDescPtr MKLDNNNode::getInputMemDescAtPort<BlockedMemoryDesc, 0, 0>(size_t portNum) const {
    return MemoryDescUtils::convertToBlockedMemoryDesc(getBaseMemDescAtInputPort(portNum));
 }
 template<>
 DnnlMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort<DnnlMemoryDesc, 0, 0>(size_t portNum) const {
    return MemoryDescUtils::convertToDnnlMemoryDesc(getBaseMemDescAtOutputPort(portNum));
 }
 template<>
 BlockedMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort<BlockedMemoryDesc, 0, 0>(size_t portNum) const {
    return MemoryDescUtils::convertToBlockedMemoryDesc(getBaseMemDescAtOutputPort(portNum));
 }
 std::string NameFromType(Type type) {
    switch (type) {
        case Generic:
            return "Generic";
        case Reorder:
            return "Reorder";
        case Input:
            return "Input";
        case Output:
            return "Output";
        case Convolution:
            return "Convolution";
        case Deconvolution:
            return "Deconvolution";
        case Lrn:
            return "Lrn";
        case Pooling:
            return "Pooling";
        case AdaptivePooling:
            return "AdaptivePooling";
        case FullyConnected:
            return "FullyConnected";
        case MatMul:
            return "MatMul";
        case Softmax:
            return "Softmax";
        case Split:
            return "Split";
        case Concatenation:
            return "Concatenation";
        case StridedSlice:
            return "StridedSlice";
        case Reshape:
            return "Reshape";
        case Tile:
            return "Tile";
        case ROIAlign:
            return "ROIAlign";
        case ROIPooling:
            return "ROIPooling";
        case PSROIPooling:
            return "PSROIPooling";
        case DepthToSpace:
            return "DepthToSpace";
        case BatchToSpace:
            return "BatchToSpace";
        case Pad:
            return "Pad";
        case Transpose:
            return "Transpose";
        case SpaceToDepth:
            return "SpaceToDepth";
        case SpaceToBatch:
            return "SpaceToBatch";
        case MemoryOutput:
            return "MemoryOutput";
        case MemoryInput:
            return "MemoryInput";
        case RNNSeq:
            return "RNNSeq";
        case RNNCell:
            return "RNNCell";
        case Eltwise:
            return "Eltwise";
        case FakeQuantize:
            return "FakeQuantize";
        case BinaryConvolution:
            return "BinaryConvolution";
        case DeformableConvolution:
            return "DeformableConvolution";
        case MVN:
            return "MVN";
        case TensorIterator:
            return "TensorIterator";
        case Convert:
            return "Convert";
        case NormalizeL2:
            return "NormalizeL2";
        case ScatterUpdate:
            return "ScatterUpdate";
        case ScatterElementsUpdate:
            return "ScatterElementsUpdate";
        case ScatterNDUpdate:
            return "ScatterNDUpdate";
        case Interpolate:
            return "Interpolate";
        case Reduce:
            return "Reduce";
        case Broadcast:
            return "Broadcast";
        case EmbeddingSegmentsSum:
            return "EmbeddingSegmentsSum";
        case EmbeddingBagPackedSum:
            return "EmbeddingBagPackedSum";
        case EmbeddingBagOffsetsSum:
            return "EmbeddingBagOffsetsSum";
        case Gather:
            return "Gather";
        case GatherElements:
            return "GatherElements";
        case GatherND:
            return "GatherND";
        case OneHot:
            return "OneHot";
        case RegionYolo:
            return "RegionYolo";
        case Select:
            return "Select";
        case Roll:
            return "Roll";
        case ShuffleChannels:
            return "ShuffleChannels";
        case DFT:
            return "DFT";
        case Math:
            return "Math";
        case CTCLoss:
            return "CTCLoss";
        case Bucketize:
            return "Bucketize";
        case CTCGreedyDecoder:
            return "CTCGreedyDecoder";
        case CTCGreedyDecoderSeqLen:
            return "CTCGreedyDecoderSeqLen";
        case CumSum:
            return "CumSum";
        case DetectionOutput:
            return "DetectionOutput";
        case ExperimentalDetectronDetectionOutput:
            return "ExperimentalDetectronDetectionOutput";
        case LogSoftmax:
            return "LogSoftmax";
        case TopK:
            return "TopK";
        case GatherTree:
            return "GatherTree";
        case GRN:
            return "GRN";
        case Range:
            return "Range";
        case Proposal:
            return "Proposal";
        case ReorgYolo:
            return "ReorgYolo";
        case ReverseSequence:
            return "ReverseSequence";
        case ExperimentalDetectronTopKROIs:
            return "ExperimentalDetectronTopKROIs";
        case ExperimentalDetectronROIFeatureExtractor:
            return "ExperimentalDetectronROIFeatureExtractor";
        case ExperimentalDetectronPriorGridGenerator:
            return "ExperimentalDetectronPriorGridGenerator";
        case ExperimentalDetectronGenerateProposalsSingleImage:
            return "ExperimentalDetectronGenerateProposalsSingleImage";
        case ExtractImagePatches:
            return "ExtractImagePatches";
        case NonMaxSuppression:
            return "NonMaxSuppression";
        case MatrixNms:
            return "MatrixNms";
        case MulticlassNms:
            return "MulticlassNms";
        default:
            return "Unknown";
    }
 }
 }  //  namespace MKLDNNPlugin
 MKLDNNNode::NodesFactory & MKLDNNNode::factory() {
    static NodesFactory factoryInstance;
@ -439,7 +74,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
        : selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
          weightCache(w_cache), engine(eng), name(op->get_friendly_name()), typeStr(op->get_type_name()),
          type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) {
-    algorithm = Algorithm::Undefined;
+    algorithm = Algorithm::Default;
    fusingPort = -1;
    const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name();
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@ -36,9 +36,6 @@ using MKLDNNNodePtr = std::shared_ptr<MKLDNNNode>;
 using MKLDNNNodeConstPtr = std::shared_ptr<const MKLDNNNode>;
 using MKLDNNNodeWeakPtr = std::weak_ptr<MKLDNNNode>;
 Type TypeFromName(const std::string & type);
 std::string NameFromType(Type type);
 class PortConfigurator {
 public:
    PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc, const Shape& shape,
@ -629,7 +626,7 @@ protected:
    MKLDNNWeightsSharing::Ptr weightCache;
-    Algorithm algorithm = Algorithm::Undefined;
+    Algorithm algorithm = Algorithm::Default;
    bool isInQuantizedGraph = false;
@ -744,6 +741,10 @@ private:
    void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd);
    enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
    ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);
 #ifdef CPU_DEBUG_CAPS
    friend class Verbose;
 #endif
 };
 class MKLDNNNode::NodesFactory : public openvino::cc::Factory<Type,
--- a/inference-engine/src/mkldnn_plugin/perf_count.h
+++ b/inference-engine/src/mkldnn_plugin/perf_count.h
@ -5,20 +5,25 @@
 #pragma once
 #include <chrono>
 #include <ratio>
 namespace MKLDNNPlugin {
 class PerfCount {
-    uint64_t duration;
+    uint64_t total_duration;
    uint32_t num;
    std::chrono::high_resolution_clock::time_point __start = {};
    std::chrono::high_resolution_clock::time_point __finish = {};
 public:
-    PerfCount(): duration(0), num(0) {}
+    PerfCount(): total_duration(0), num(0) {}
-    uint64_t avg() { return (num == 0) ? 0 : duration / num; }
+    std::chrono::duration<double, std::milli> duration() const {
        return __finish - __start;
    }
    uint64_t avg() const { return (num == 0) ? 0 : total_duration / num; }
 private:
    void start_itr() {
@ -27,8 +32,7 @@ private:
    void finish_itr() {
        __finish = std::chrono::high_resolution_clock::now();
-
+        total_duration += std::chrono::duration_cast<std::chrono::microseconds>(__finish - __start).count();
        duration += std::chrono::duration_cast<std::chrono::microseconds>(__finish - __start).count();
        num++;
    }
@ -46,5 +50,5 @@ public:
 }  // namespace MKLDNNPlugin
-#define GET_PERF(_counter) std::unique_ptr<PerfHelper>(new PerfHelper(_counter->PerfCounter()))
+#define GET_PERF(_node) std::unique_ptr<PerfHelper>(new PerfHelper(_node->PerfCounter()))
-#define PERF(_need, _counter) auto pc = _need ? GET_PERF(_counter) : nullptr;
+#define PERF(_node, _need) auto pc = _need ? GET_PERF(_node) : nullptr;
--- a/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
+++ b/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
@ -24,6 +24,7 @@ public:
        readParam(blobDumpNodeType, "OV_CPU_BLOB_DUMP_NODE_TYPE");
        readParam(blobDumpNodeName, "OV_CPU_BLOB_DUMP_NODE_NAME");
        readParam(execGraphPath, "OV_CPU_EXEC_GRAPH_PATH");
        readParam(verbose, "OV_CPU_VERBOSE");
    }
    std::string blobDumpDir;
@ -33,9 +34,10 @@ public:
    std::string blobDumpNodeType;
    std::string blobDumpNodeName;
    std::string execGraphPath;
    std::string verbose;
 private:
-    void readParam(std::string& param, const char* envVar) {
+    static void readParam(std::string& param, const char* envVar) {
        if (const char* envValue = std::getenv(envVar))
            param = envValue;
    }
--- a/inference-engine/src/mkldnn_plugin/utils/verbose.cpp
+++ b/inference-engine/src/mkldnn_plugin/utils/verbose.cpp
@ -0,0 +1,169 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #ifdef CPU_DEBUG_CAPS
 #include "verbose.h"
 #include "mkldnn_node.h"
 #include "cpu_types.h"
 #include "memory_desc/cpu_memory_desc_utils.h"
 #include "dnnl_types.h"
 #include "dnnl_debug.h"
 #include <string>
 #include <cstdlib>
 #include <sstream>
 #include <iostream>
 namespace MKLDNNPlugin {
 bool Verbose::shouldBePrinted() const {
    if (lvl < 1)
        return false;
    if (node->isConstant() ||
        node->getType() == Input || node->getType() == Output)
        return false;
    return true;
 }
 /**
 * Print node verbose execution information to cout.
 * Similiar to DNNL_VERBOSE output
 * Formating written in C using oneDNN format functions.
 * Can be rewritten in pure C++ if necessary
 */
 void Verbose::printInfo() {
    /* 1,  2,  3,  etc -> no color
     * 11, 22, 33, etc -> colorize */
    bool colorUp = lvl / 10 > 0 ? true : false;
    enum Color {
        RED,
        GREEN,
        YELLOW,
        BLUE,
        PURPLE,
        CYAN
    };
    auto colorize = [&](const Color color, const std::string& str) {
        if (!colorUp)
            return str;
        const std::string     red("\033[1;31m");
        const std::string   green("\033[1;32m");
        const std::string  yellow("\033[1;33m");
        const std::string    blue("\033[1;34m");
        const std::string  purple("\033[1;35m");
        const std::string    cyan("\033[1;36m");
        const std::string   reset("\033[0m");
        std::string colorCode;
        switch (color) {
        case RED:    colorCode = red;
            break;
        case GREEN:  colorCode = green;
            break;
        case YELLOW: colorCode = yellow;
            break;
        case BLUE:   colorCode = blue;
            break;
        case PURPLE: colorCode = purple;
            break;
        case CYAN:   colorCode = cyan;
            break;
        default:     colorCode = reset;
            break;
        }
        return colorCode + str + reset;
    };
    // can be increased if necessary
    const int CPU_VERBOSE_DAT_LEN = 512;
    char portsInfo[CPU_VERBOSE_DAT_LEN] = {'\0'};
    int written = 0;
    int written_total = 0;
    auto shift = [&](int size) {
        if (written < 0 || written_total + size > CPU_VERBOSE_DAT_LEN) {
            const char* errorMsg = "# NOT ENOUGHT BUFFER SIZE #";
            snprintf(portsInfo, strlen(errorMsg) + 1, "%s", errorMsg);
            written_total = strlen(errorMsg);
            return;
        }
        written_total += size;
    };
    auto formatMemDesc = [&](const dnnl_memory_desc_t& desc, std::string& prefix) {
        prefix = colorize(BLUE, prefix);
        written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, " ");
        shift(written);
        written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, "%s", prefix.c_str());
        shift(written);
        written = dnnl_md2fmt_str(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, &desc);
        shift(written);
        written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, ":");
        shift(written);
        written = dnnl_md2dim_str(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, &desc);
        shift(written);
    };
    for (int i = 0; i < node->getParentEdges().size(); i++) {
        std::string prefix("src:" + std::to_string(i) + ':');
        formatMemDesc(MemoryDescUtils::convertToDnnlMemoryDesc(
                          node->getParentEdgeAt(i)->getMemory().getDesc().clone())->getDnnlDesc().data,
                      prefix);
    }
    for (int i = 0; i < node->getChildEdges().size(); i++) {
        std::string prefix("dst:" + std::to_string(i) + ':');
        formatMemDesc(MemoryDescUtils::convertToDnnlMemoryDesc(
                          node->getChildEdgeAt(i)->getMemory().getDesc().clone())->getDnnlDesc().data,
                      prefix);
    }
    std::string post_ops;
    if (!node->getFusedWith().empty()) {
        post_ops += "post_ops:'";
        for (const auto& fusedNode : node->getFusedWith()) {
            post_ops.append(colorize(GREEN, fusedNode->getName())).append(":")
                .append(colorize(CYAN, NameFromType(fusedNode->getType()))).append(":")
                .append(algToString(fusedNode->getAlgorithm()))
                .append(";");
        }
        post_ops += "'";
    }
    std::string nodeImplementer = "cpu";
    if (node->prim)
        nodeImplementer = "dnnl"; // oneDNN
    else if (node->getType() == Reference)
        nodeImplementer = "ngraph_ref"; // ngraph reference
    const std::string& nodeName = colorize(GREEN, node->getName());
    const std::string& nodeType = colorize(CYAN, NameFromType(node->getType()));
    const std::string& nodeAlg  = algToString(node->getAlgorithm());
    const std::string& nodePrimImplType =  impl_type_to_string(node->getSelectedPrimitiveDescriptor()->getImplementationType());
    stream << "ov_cpu_verbose" << ','
           << "exec" << ','
           << nodeImplementer << ','
           << nodeName << ":" << nodeType << ":" << nodeAlg << ','
           << nodePrimImplType << ','
           << portsInfo << ','
           << post_ops << ',';
 }
 void Verbose::printDuration() {
    const auto& duration = node->PerfCounter().duration().count();
    stream << duration << "ms";
 }
 void Verbose::flush() const {
    std::cout << stream.rdbuf() << "\n";
 }
 } // namespace MKLDNNPlugin
 #endif // CPU_DEBUG_CAPS
--- a/inference-engine/src/mkldnn_plugin/utils/verbose.h
+++ b/inference-engine/src/mkldnn_plugin/utils/verbose.h
@ -0,0 +1,46 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #ifdef CPU_DEBUG_CAPS
 #include "mkldnn_node.h"
 #include <string>
 #include <cstdlib>
 #include <sstream>
 namespace MKLDNNPlugin {
 class Verbose {
 public:
    Verbose(const MKLDNNNodePtr& _node, const std::string& _lvl)
        : node(_node), lvl(atoi(_lvl.c_str())) {
        if (!shouldBePrinted())
            return;
        printInfo();
    }
    virtual ~Verbose() {
        if (!shouldBePrinted())
            return;
        printDuration();
        flush();
    }
 private:
    const MKLDNNNodePtr& node;
    const int lvl;
    std::stringstream stream;
    bool shouldBePrinted() const;
    void printInfo();
    void printDuration();
    void flush() const;
 };
 #define VERBOSE(...) Verbose(__VA_ARGS__)
 } // namespace MKLDNNPlugin
 #else
 #define VERBOSE(...)
 #endif // CPU_DEBUG_CAPS
--- a/inference-engine/src/plugin_api/caseless.hpp
+++ b/inference-engine/src/plugin_api/caseless.hpp
@ -12,6 +12,7 @@
 #include <algorithm>
 #include <cctype>
 #include <functional>
 #include <iterator>
 #include <map>
 #include <set>
 #include <unordered_map>