[CPU] Add OV_CPU_VERBOSE env var to print node exec info to cout (#6390)

This commit is contained in:
Egor Duplensky 2021-09-26 22:17:57 +03:00 committed by GitHub
parent b968c7b813
commit c92988c8e9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 823 additions and 416 deletions

View File

@ -0,0 +1,459 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cpu_types.h"
#include <vector>
#include <string>
namespace MKLDNNPlugin {
using Dim = std::size_t;
using VectorDims = std::vector<Dim>;
const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
{ "Constant", Input },
{ "Parameter", Input },
{ "Result", Output },
{ "Convolution", Convolution },
{ "GroupConvolution", Convolution },
{ "MatMul", MatMul },
{ "FullyConnected", FullyConnected },
{ "MaxPool", Pooling },
{ "AvgPool", Pooling },
{ "AdaptiveMaxPool", AdaptivePooling},
{ "AdaptiveAvgPool", AdaptivePooling},
{ "Add", Eltwise },
{ "Subtract", Eltwise },
{ "Multiply", Eltwise },
{ "Divide", Eltwise },
{ "SquaredDifference", Eltwise },
{ "Maximum", Eltwise },
{ "Minimum", Eltwise },
{ "Mod", Eltwise },
{ "FloorMod", Eltwise },
{ "Power", Eltwise },
{ "PowerStatic", Eltwise },
{ "Equal", Eltwise },
{ "NotEqual", Eltwise },
{ "Greater", Eltwise },
{ "GreaterEqual", Eltwise },
{ "Less", Eltwise },
{ "LessEqual", Eltwise },
{ "LogicalAnd", Eltwise },
{ "LogicalOr", Eltwise },
{ "LogicalXor", Eltwise },
{ "LogicalNot", Eltwise },
{ "Relu", Eltwise },
{ "LeakyRelu", Eltwise },
{ "Gelu", Eltwise },
{ "Elu", Eltwise },
{ "Tanh", Eltwise },
{ "Sigmoid", Eltwise },
{ "Abs", Eltwise },
{ "Sqrt", Eltwise },
{ "Clamp", Eltwise },
{ "Exp", Eltwise },
{ "SwishCPU", Eltwise },
{ "HSwish", Eltwise },
{ "Mish", Eltwise },
{ "HSigmoid", Eltwise },
{ "Round", Eltwise },
{ "PRelu", Eltwise },
{ "Erf", Eltwise },
{ "SoftPlus", Eltwise },
{ "Reshape", Reshape },
{ "Squeeze", Reshape },
{ "Unsqueeze", Reshape },
{ "Softmax", Softmax },
{ "Reorder", Reorder },
{ "BatchToSpace", BatchToSpace },
{ "SpaceToBatch", SpaceToBatch },
{ "DepthToSpace", DepthToSpace },
{ "SpaceToDepth", SpaceToDepth },
{ "Roll", Roll },
{ "LRN", Lrn },
{ "Split", Split },
{ "VariadicSplit", Split },
{ "Concat", Concatenation },
{ "ConvolutionBackpropData", Deconvolution },
{ "GroupConvolutionBackpropData", Deconvolution },
{ "StridedSlice", StridedSlice },
{ "Tile", Tile },
{ "ROIAlign", ROIAlign },
{ "ROIPooling", ROIPooling },
{ "PSROIPooling", PSROIPooling },
{ "DeformablePSROIPooling", PSROIPooling },
{ "Pad", Pad },
{ "Transpose", Transpose },
{ "LSTMCell", RNNCell },
{ "GRUCell", RNNCell },
{ "RNNCell", RNNCell },
{ "LSTMSequence", RNNSeq },
{ "GRUSequence", RNNSeq },
{ "RNNSequence", RNNSeq },
{ "FakeQuantize", FakeQuantize },
{ "BinaryConvolution", BinaryConvolution },
{ "DeformableConvolution", DeformableConvolution },
{ "TensorIterator", TensorIterator },
{ "Loop", TensorIterator },
{ "ReadValue", MemoryInput}, // for construction from name ctor, arbitrary name is used
{ "Assign", MemoryOutput }, // for construction from layer ctor
{ "Convert", Convert },
{ "MVN", MVN},
{ "NormalizeL2", NormalizeL2},
{ "ScatterUpdate", ScatterUpdate},
{ "ScatterElementsUpdate", ScatterElementsUpdate},
{ "ScatterNDUpdate", ScatterNDUpdate},
{ "Interpolate", Interpolate},
{ "ReduceL1", Reduce},
{ "ReduceL2", Reduce},
{ "ReduceLogicalAnd", Reduce},
{ "ReduceLogicalOr", Reduce},
{ "ReduceMax", Reduce},
{ "ReduceMean", Reduce},
{ "ReduceMin", Reduce},
{ "ReduceProd", Reduce},
{ "ReduceSum", Reduce},
{ "ReduceLogSum", Reduce},
{ "ReduceLogSumExp", Reduce},
{ "ReduceSumSquare", Reduce},
{ "Broadcast", Broadcast},
{ "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
{ "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
{ "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
{ "Gather", Gather},
{ "GatherElements", GatherElements},
{ "GatherND", GatherND},
{ "OneHot", OneHot},
{ "RegionYolo", RegionYolo},
{ "Select", Select},
{ "ShuffleChannels", ShuffleChannels},
{ "DFT", DFT},
{ "IDFT", DFT},
{ "Abs", Math},
{ "Acos", Math},
{ "Acosh", Math},
{ "Asin", Math},
{ "Asinh", Math},
{ "Atan", Math},
{ "Atanh", Math},
{ "Ceil", Math},
{ "Ceiling", Math},
{ "Cos", Math},
{ "Cosh", Math},
{ "Floor", Math},
{ "HardSigmoid", Math},
{ "Log", Math},
{ "Neg", Math},
{ "Reciprocal", Math},
{ "Selu", Math},
{ "Sign", Math},
{ "Sin", Math},
{ "Sinh", Math},
{ "SoftPlus", Math},
{ "Softsign", Math},
{ "Tan", Math},
{ "CTCLoss", CTCLoss},
{ "Bucketize", Bucketize},
{ "CTCGreedyDecoder", CTCGreedyDecoder},
{ "CTCGreedyDecoderSeqLen", CTCGreedyDecoderSeqLen},
{ "CumSum", CumSum},
{ "DetectionOutput", DetectionOutput},
{ "ExperimentalDetectronDetectionOutput", ExperimentalDetectronDetectionOutput},
{ "LogSoftmax", LogSoftmax},
{ "TopK", TopK},
{ "GatherTree", GatherTree},
{ "GRN", GRN},
{ "Range", Range},
{ "Proposal", Proposal},
{ "ReorgYolo", ReorgYolo},
{ "ReverseSequence", ReverseSequence},
{ "ExperimentalDetectronTopKROIs", ExperimentalDetectronTopKROIs},
{ "ExperimentalDetectronROIFeatureExtractor", ExperimentalDetectronROIFeatureExtractor},
{ "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator},
{ "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage},
{ "ExtractImagePatches", ExtractImagePatches},
{ "NonMaxSuppression", NonMaxSuppression},
{ "NonMaxSuppressionIEInternal", NonMaxSuppression},
{ "MatrixNms", MatrixNms},
{ "MulticlassNms", MulticlassNms}
};
Type TypeFromName(const std::string& type) {
auto itType = type_to_name_tbl.find(type);
if (type_to_name_tbl.end() != itType) {
return itType->second;
} else {
return Unknown;
}
}
std::string NameFromType(const Type type) {
switch (type) {
case Generic:
return "Generic";
case Reorder:
return "Reorder";
case Input:
return "Input";
case Output:
return "Output";
case Convolution:
return "Convolution";
case Deconvolution:
return "Deconvolution";
case Lrn:
return "Lrn";
case Pooling:
return "Pooling";
case AdaptivePooling:
return "AdaptivePooling";
case FullyConnected:
return "FullyConnected";
case MatMul:
return "MatMul";
case Softmax:
return "Softmax";
case Split:
return "Split";
case Concatenation:
return "Concatenation";
case StridedSlice:
return "StridedSlice";
case Reshape:
return "Reshape";
case Tile:
return "Tile";
case ROIAlign:
return "ROIAlign";
case ROIPooling:
return "ROIPooling";
case PSROIPooling:
return "PSROIPooling";
case DepthToSpace:
return "DepthToSpace";
case BatchToSpace:
return "BatchToSpace";
case Pad:
return "Pad";
case Transpose:
return "Transpose";
case SpaceToDepth:
return "SpaceToDepth";
case SpaceToBatch:
return "SpaceToBatch";
case MemoryOutput:
return "MemoryOutput";
case MemoryInput:
return "MemoryInput";
case RNNSeq:
return "RNNSeq";
case RNNCell:
return "RNNCell";
case Eltwise:
return "Eltwise";
case FakeQuantize:
return "FakeQuantize";
case BinaryConvolution:
return "BinaryConvolution";
case DeformableConvolution:
return "DeformableConvolution";
case MVN:
return "MVN";
case TensorIterator:
return "TensorIterator";
case Convert:
return "Convert";
case NormalizeL2:
return "NormalizeL2";
case ScatterUpdate:
return "ScatterUpdate";
case ScatterElementsUpdate:
return "ScatterElementsUpdate";
case ScatterNDUpdate:
return "ScatterNDUpdate";
case Interpolate:
return "Interpolate";
case Reduce:
return "Reduce";
case Broadcast:
return "Broadcast";
case EmbeddingSegmentsSum:
return "EmbeddingSegmentsSum";
case EmbeddingBagPackedSum:
return "EmbeddingBagPackedSum";
case EmbeddingBagOffsetsSum:
return "EmbeddingBagOffsetsSum";
case Gather:
return "Gather";
case GatherElements:
return "GatherElements";
case GatherND:
return "GatherND";
case OneHot:
return "OneHot";
case RegionYolo:
return "RegionYolo";
case Select:
return "Select";
case Roll:
return "Roll";
case ShuffleChannels:
return "ShuffleChannels";
case DFT:
return "DFT";
case Math:
return "Math";
case CTCLoss:
return "CTCLoss";
case Bucketize:
return "Bucketize";
case CTCGreedyDecoder:
return "CTCGreedyDecoder";
case CTCGreedyDecoderSeqLen:
return "CTCGreedyDecoderSeqLen";
case CumSum:
return "CumSum";
case DetectionOutput:
return "DetectionOutput";
case ExperimentalDetectronDetectionOutput:
return "ExperimentalDetectronDetectionOutput";
case LogSoftmax:
return "LogSoftmax";
case TopK:
return "TopK";
case GatherTree:
return "GatherTree";
case GRN:
return "GRN";
case Range:
return "Range";
case Proposal:
return "Proposal";
case ReorgYolo:
return "ReorgYolo";
case ReverseSequence:
return "ReverseSequence";
case ExperimentalDetectronTopKROIs:
return "ExperimentalDetectronTopKROIs";
case ExperimentalDetectronROIFeatureExtractor:
return "ExperimentalDetectronROIFeatureExtractor";
case ExperimentalDetectronPriorGridGenerator:
return "ExperimentalDetectronPriorGridGenerator";
case ExperimentalDetectronGenerateProposalsSingleImage:
return "ExperimentalDetectronGenerateProposalsSingleImage";
case ExtractImagePatches:
return "ExtractImagePatches";
case NonMaxSuppression:
return "NonMaxSuppression";
case MatrixNms:
return "MatrixNms";
case MulticlassNms:
return "MulticlassNms";
default:
return "Unknown";
}
}
std::string algToString(const Algorithm alg) {
#define CASE(_alg) do { \
if (alg == _alg) return #_alg; \
} while (0)
CASE(Default);
CASE(PoolingMax);
CASE(PoolingAvg);
CASE(ConvolutionCommon);
CASE(ConvolutionGrouped);
CASE(DeconvolutionCommon);
CASE(DeconvolutionGrouped);
CASE(EltwiseAdd);
CASE(EltwiseMultiply);
CASE(EltwiseSubtract);
CASE(EltwiseDivide);
CASE(EltwiseFloorMod);
CASE(EltwiseMod);
CASE(EltwiseMaximum);
CASE(EltwiseMinimum);
CASE(EltwiseSquaredDifference);
CASE(EltwisePowerDynamic);
CASE(EltwisePowerStatic);
CASE(EltwiseMulAdd);
CASE(EltwiseEqual);
CASE(EltwiseNotEqual);
CASE(EltwiseGreater);
CASE(EltwiseGreaterEqual);
CASE(EltwiseLess);
CASE(EltwiseLessEqual);
CASE(EltwiseLogicalAnd);
CASE(EltwiseLogicalOr);
CASE(EltwiseLogicalXor);
CASE(EltwiseLogicalNot);
CASE(EltwiseRelu);
CASE(EltwiseGelu);
CASE(EltwiseElu);
CASE(EltwiseTanh);
CASE(EltwiseSigmoid);
CASE(EltwiseAbs);
CASE(EltwiseSqrt);
CASE(EltwiseSoftRelu);
CASE(EltwiseExp);
CASE(EltwiseClamp);
CASE(EltwiseSwish);
CASE(EltwisePrelu);
CASE(EltwiseMish);
CASE(EltwiseHswish);
CASE(EltwiseHsigmoid);
CASE(EltwiseRoundHalfToEven);
CASE(EltwiseRoundHalfAwayFromZero);
CASE(EltwiseErf);
CASE(FQCommon);
CASE(FQQuantization);
CASE(FQBinarization);
CASE(ROIPoolingMax);
CASE(ROIPoolingBilinear);
CASE(ROIAlignMax);
CASE(ROIAlignAvg);
CASE(PSROIPoolingAverage);
CASE(PSROIPoolingBilinear);
CASE(PSROIPoolingBilinearDeformable);
CASE(ReduceL1);
CASE(ReduceL2);
CASE(ReduceAnd);
CASE(ReduceOr);
CASE(ReduceMax);
CASE(ReduceMean);
CASE(ReduceMin);
CASE(ReduceProd);
CASE(ReduceSum);
CASE(ReduceLogSum);
CASE(ReduceLogSumExp);
CASE(ReduceSumSquare);
CASE(MathAbs);
CASE(MathAcos);
CASE(MathAcosh);
CASE(MathAsin);
CASE(MathAsinh);
CASE(MathAtan);
CASE(MathAtanh);
CASE(MathCeiling);
CASE(MathCos);
CASE(MathCosh);
CASE(MathErf);
CASE(MathFloor);
CASE(MathHardSigmoid);
CASE(MathLog);
CASE(MathNegative);
CASE(MathReciprocal);
CASE(MathSelu);
CASE(MathSign);
CASE(MathSin);
CASE(MathSinh);
CASE(MathSoftPlus);
CASE(MathSoftsign);
CASE(MathTan);
#undef CASE
return "Undefined";
}
} // namespace MKLDNNPlugin

View File

@ -4,7 +4,10 @@
#pragma once #pragma once
#include "caseless.hpp"
#include <vector> #include <vector>
#include <string>
namespace MKLDNNPlugin { namespace MKLDNNPlugin {
@ -97,7 +100,7 @@ enum Type {
}; };
enum Algorithm { enum Algorithm {
Undefined, Default,
// Pooling algorithms // Pooling algorithms
PoolingMax, PoolingMax,
@ -215,4 +218,11 @@ enum Algorithm {
MathTan MathTan
}; };
extern const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl;
Type TypeFromName(const std::string& type);
std::string NameFromType(const Type type);
std::string algToString(const Algorithm alg);
} // namespace MKLDNNPlugin } // namespace MKLDNNPlugin

View File

@ -0,0 +1,8 @@
# Debug capabilities
Use the following cmake option to enable debug capabilities:
`-DENABLE_DEBUG_CAPS=ON`
* [Verbose mode](verbose.md)
* [Blob dumping](blob_dumping.md)
* [Graph serialization](graph_serialization.md)

View File

@ -1,9 +1,4 @@
# Debug capabilities # Blob dumping
Use the following cmake option to enable debug capabilities:
`-DENABLE_CPU_DEBUG_CAPS=ON`
## Blob dumping
Blob dumping is controlled by environment variables (filters). Blob dumping is controlled by environment variables (filters).
The variables define conditions of the node which input and output blobs The variables define conditions of the node which input and output blobs
@ -24,12 +19,12 @@ or for shell session (bash example):
export OV_CPU_BLOB_DUMP_NODE_PORTS=OUT export OV_CPU_BLOB_DUMP_NODE_PORTS=OUT
binary ... binary ...
``` ```
### Specify dump directory ## Specify dump directory
```sh ```sh
OV_CPU_BLOB_DUMP_DIR=<directory-name> binary ... OV_CPU_BLOB_DUMP_DIR=<directory-name> binary ...
``` ```
Default is *mkldnn_dump* Default is *mkldnn_dump*
### Specify dump format ## Specify dump format
```sh ```sh
OV_CPU_BLOB_DUMP_FORMAT=<format> binary ... OV_CPU_BLOB_DUMP_FORMAT=<format> binary ...
``` ```
@ -37,7 +32,7 @@ Options are:
* BIN (default) * BIN (default)
* TEXT * TEXT
### Filter input / output blobs ## Filter input / output blobs
To dump only input / output blobs: To dump only input / output blobs:
```sh ```sh
OV_CPU_BLOB_DUMP_NODE_PORTS='<ports_kind>' binary ... OV_CPU_BLOB_DUMP_NODE_PORTS='<ports_kind>' binary ...
@ -51,7 +46,7 @@ Options are:
* OUT * OUT
* ALL * ALL
### Filter by execution ID ## Filter by execution ID
To dump blobs only for nodes with specified execution IDs: To dump blobs only for nodes with specified execution IDs:
```sh ```sh
OV_CPU_BLOB_DUMP_NODE_EXEC_ID='<space_separated_list_of_ids>' binary ... OV_CPU_BLOB_DUMP_NODE_EXEC_ID='<space_separated_list_of_ids>' binary ...
@ -61,7 +56,7 @@ Example:
OV_CPU_BLOB_DUMP_NODE_EXEC_ID='1 12 45' binary ... OV_CPU_BLOB_DUMP_NODE_EXEC_ID='1 12 45' binary ...
``` ```
### Filter by type ## Filter by type
To dump blobs only for nodes with specified types: To dump blobs only for nodes with specified types:
```sh ```sh
OV_CPU_BLOB_DUMP_NODE_TYPE=<space_separated_list_of_types> binary ... OV_CPU_BLOB_DUMP_NODE_TYPE=<space_separated_list_of_types> binary ...
@ -73,7 +68,7 @@ Example:
> **NOTE**: see **enum Type** in [mkldnn_node.h](../mkldnn_node.h) for list of the types > **NOTE**: see **enum Type** in [mkldnn_node.h](../mkldnn_node.h) for list of the types
### Filter by name ## Filter by name
To dump blobs only for nodes with name matching specified regex: To dump blobs only for nodes with name matching specified regex:
```sh ```sh
OV_CPU_BLOB_DUMP_NODE_NAME=<regex> binary ... OV_CPU_BLOB_DUMP_NODE_NAME=<regex> binary ...
@ -83,7 +78,7 @@ Example:
OV_CPU_BLOB_DUMP_NODE_NAME=".+Fused_Add.+" binary ... OV_CPU_BLOB_DUMP_NODE_NAME=".+Fused_Add.+" binary ...
``` ```
### Dump all the blobs ## Dump all the blobs
```sh ```sh
OV_CPU_BLOB_DUMP_NODE_NAME="*" binary ... OV_CPU_BLOB_DUMP_NODE_NAME="*" binary ...
``` ```
@ -95,22 +90,3 @@ Example:
```sh ```sh
OV_CPU_BLOB_DUMP_NODE_PORTS=ALL binary ... OV_CPU_BLOB_DUMP_NODE_PORTS=ALL binary ...
``` ```
## Graph serialization
The functionality allows to serialize execution graph using environment variable:
```sh
OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
```
Possible serialization options:
* cout
Serialize to console output
* \<path\>.xml
Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
* \<path\>.dot
TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.

View File

@ -0,0 +1,17 @@
# Graph serialization
The functionality allows to serialize execution graph using environment variable:
```sh
OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
```
Possible serialization options:
* cout
Serialize to console output
* \<path\>.xml
Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
* \<path\>.dot
TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.

View File

@ -0,0 +1,38 @@
# Verbose mode
It is possible to enable tracing execution of plugin nodes to cout and collect statistics, such as:
- node implementer:
* cpu (CPU plugin)
* dnnl (oneDNN library)
* ngraph_ref (ngraph reference fallback)
- node name
- node type
- node algorithm
- node primitive info
- input / output ports info
- fused nodes
- execution time
- etc
Format:
```sh
ov_cpu_verbose,exec,<node_implemeter>,\
<node_name>:<node_type>:<node_alg>,<impl_type>,\
src:<port_id>:<precision>::<type>:<format>:f0:<shape> ...,\
dst:<port_id>:<precision>::<type>:<format>:f0:<shape> ...,\
post_ops:'<node_name>:<node_type>:<node_alg>;...;',\
<execution_time>
```
To turn on verbose mode the following environment variable should be used:
```sh
OV_CPU_VERBOSE=<level> binary ...
```
Currently verbose mode has only one level, any digit can be used for activation.
To have colored verbose output just duplicate level's digit, for example:
```sh
OV_CPU_VERBOSE=11 binary ...
```
**NOTE:** Shell color codes are used

View File

@ -4,9 +4,11 @@
#pragma once #pragma once
#include "mkldnn/ie_mkldnn.h"
#include "cpu_types.h"
#include <ie_layouts.h> #include <ie_layouts.h>
#include <ie_blob.h> #include <ie_blob.h>
#include "mkldnn/ie_mkldnn.h"
namespace MKLDNNPlugin { namespace MKLDNNPlugin {

View File

@ -50,3 +50,38 @@ impl_desc_type MKLDNNPlugin::parse_impl_name(std::string impl_desc_name) {
return res; return res;
} }
const char* MKLDNNPlugin::impl_type_to_string(impl_desc_type type) {
#define CASE(_type) do { \
if (type == _type) return #_type; \
} while (0)
CASE(unknown);
CASE(undef);
CASE(ref_any);
CASE(reorder);
CASE(gemm_any);
CASE(gemm_blas);
CASE(gemm_avx512);
CASE(gemm_avx2);
CASE(gemm_avx);
CASE(gemm_sse42);
CASE(jit_gemm);
CASE(jit_avx512_winograd);
CASE(jit_avx512);
CASE(jit_avx2);
CASE(jit_avx);
CASE(jit_sse42);
CASE(jit_uni);
CASE(jit_avx512_1x1);
CASE(jit_avx2_1x1);
CASE(jit_avx_1x1);
CASE(jit_sse42_1x1);
CASE(jit_uni_1x1);
CASE(jit_avx512_dw);
CASE(jit_avx2_dw);
CASE(jit_avx_dw);
CASE(jit_sse42_dw);
CASE(jit_uni_dw);
#undef CASE
return "unknown";
}

View File

@ -63,6 +63,7 @@ enum impl_desc_type {
jit_uni_dw = jit | uni | _dw, jit_uni_dw = jit | uni | _dw,
}; };
const char * impl_type_to_string(impl_desc_type type);
impl_desc_type parse_impl_name(std::string impl_desc_name); impl_desc_type parse_impl_name(std::string impl_desc_name);
} // namespace MKLDNNPlugin } // namespace MKLDNNPlugin

View File

@ -39,6 +39,7 @@
#include "utils/node_dumper.h" #include "utils/node_dumper.h"
#include "utils/ngraph_utils.hpp" #include "utils/ngraph_utils.hpp"
#include "utils/cpu_utils.hpp" #include "utils/cpu_utils.hpp"
#include "utils/verbose.h"
#include "memory_desc/cpu_memory_desc_utils.h" #include "memory_desc/cpu_memory_desc_utils.h"
#include <ngraph/node.hpp> #include <ngraph/node.hpp>
@ -828,7 +829,9 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
mkldnn::stream stream(eng); mkldnn::stream stream(eng);
for (const auto& node : executableGraphNodes) { for (const auto& node : executableGraphNodes) {
PERF(config.collectPerfCounters, node); VERBOSE(node, config.debugCaps.verbose);
PERF(node, config.collectPerfCounters);
if (request) if (request)
request->ThrowIfCanceled(); request->ThrowIfCanceled();

View File

@ -3,6 +3,7 @@
// //
#include "mkldnn_node.h" #include "mkldnn_node.h"
#include "dnnl_debug.h"
#include "mkldnn_extension_mngr.h" #include "mkldnn_extension_mngr.h"
#include "mkldnn_itt.h" #include "mkldnn_itt.h"
@ -43,14 +44,14 @@
#include <nodes/mkldnn_shuffle_channels_node.h> #include <nodes/mkldnn_shuffle_channels_node.h>
#include <nodes/mkldnn_reference_node.h> #include <nodes/mkldnn_reference_node.h>
#include <nodes/mkldnn_fake_quantize_node.h> #include <nodes/mkldnn_fake_quantize_node.h>
#include <mkldnn_types.h>
#include <dnnl_types.h>
#include "mkldnn_extension_utils.h" #include "mkldnn_extension_utils.h"
#include "mkldnn/iml_type_mapper.h"
#include "nodes/common/cpu_memcpy.h" #include "nodes/common/cpu_memcpy.h"
#include "mkldnn_debug.h" #include "mkldnn_debug.h"
#include "utils/rt_info/memory_formats_attribute.hpp" #include "utils/rt_info/memory_formats_attribute.hpp"
#include <dnnl_types.h>
#include <ie_ngraph_utils.hpp> #include <ie_ngraph_utils.hpp>
#include "utils/general_utils.h" #include "utils/general_utils.h"
#include "utils/cpu_utils.hpp" #include "utils/cpu_utils.hpp"
@ -63,372 +64,6 @@ using namespace MKLDNNPlugin;
using namespace openvino; using namespace openvino;
using namespace InferenceEngine::details; using namespace InferenceEngine::details;
namespace MKLDNNPlugin {
static const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
{ "Constant", Input },
{ "Parameter", Input },
{ "Result", Output },
{ "Convolution", Convolution },
{ "GroupConvolution", Convolution },
{ "MatMul", MatMul },
{ "FullyConnected", FullyConnected },
{ "MaxPool", Pooling },
{ "AvgPool", Pooling },
{ "AdaptiveMaxPool", AdaptivePooling},
{ "AdaptiveAvgPool", AdaptivePooling},
{ "Add", Eltwise },
{ "Subtract", Eltwise },
{ "Multiply", Eltwise },
{ "Divide", Eltwise },
{ "SquaredDifference", Eltwise },
{ "Maximum", Eltwise },
{ "Minimum", Eltwise },
{ "Mod", Eltwise },
{ "FloorMod", Eltwise },
{ "Power", Eltwise },
{ "PowerStatic", Eltwise },
{ "Equal", Eltwise },
{ "NotEqual", Eltwise },
{ "Greater", Eltwise },
{ "GreaterEqual", Eltwise },
{ "Less", Eltwise },
{ "LessEqual", Eltwise },
{ "LogicalAnd", Eltwise },
{ "LogicalOr", Eltwise },
{ "LogicalXor", Eltwise },
{ "LogicalNot", Eltwise },
{ "Relu", Eltwise },
{ "LeakyRelu", Eltwise },
{ "Gelu", Eltwise },
{ "Elu", Eltwise },
{ "Tanh", Eltwise },
{ "Sigmoid", Eltwise },
{ "Abs", Eltwise },
{ "Sqrt", Eltwise },
{ "Clamp", Eltwise },
{ "Exp", Eltwise },
{ "SwishCPU", Eltwise },
{ "HSwish", Eltwise },
{ "Mish", Eltwise },
{ "HSigmoid", Eltwise },
{ "Round", Eltwise },
{ "PRelu", Eltwise },
{ "Erf", Eltwise },
{ "SoftPlus", Eltwise },
{ "Reshape", Reshape },
{ "Squeeze", Reshape },
{ "Unsqueeze", Reshape },
{ "Softmax", Softmax },
{ "Reorder", Reorder },
{ "BatchToSpace", BatchToSpace },
{ "SpaceToBatch", SpaceToBatch },
{ "DepthToSpace", DepthToSpace },
{ "SpaceToDepth", SpaceToDepth },
{ "Roll", Roll },
{ "LRN", Lrn },
{ "Split", Split },
{ "VariadicSplit", Split },
{ "Concat", Concatenation },
{ "ConvolutionBackpropData", Deconvolution },
{ "GroupConvolutionBackpropData", Deconvolution },
{ "StridedSlice", StridedSlice },
{ "Tile", Tile },
{ "ROIAlign", ROIAlign },
{ "ROIPooling", ROIPooling },
{ "PSROIPooling", PSROIPooling },
{ "DeformablePSROIPooling", PSROIPooling },
{ "Pad", Pad },
{ "Transpose", Transpose },
{ "LSTMCell", RNNCell },
{ "GRUCell", RNNCell },
{ "RNNCell", RNNCell },
{ "LSTMSequence", RNNSeq },
{ "GRUSequence", RNNSeq },
{ "RNNSequence", RNNSeq },
{ "FakeQuantize", FakeQuantize },
{ "BinaryConvolution", BinaryConvolution },
{ "DeformableConvolution", DeformableConvolution },
{ "TensorIterator", TensorIterator },
{ "Loop", TensorIterator },
{ "ReadValue", MemoryInput}, // for construction from name ctor, arbitrary name is used
{ "Assign", MemoryOutput }, // for construction from layer ctor
{ "Convert", Convert },
{ "MVN", MVN},
{ "NormalizeL2", NormalizeL2},
{ "ScatterUpdate", ScatterUpdate},
{ "ScatterElementsUpdate", ScatterElementsUpdate},
{ "ScatterNDUpdate", ScatterNDUpdate},
{ "Interpolate", Interpolate},
{ "ReduceL1", Reduce},
{ "ReduceL2", Reduce},
{ "ReduceLogicalAnd", Reduce},
{ "ReduceLogicalOr", Reduce},
{ "ReduceMax", Reduce},
{ "ReduceMean", Reduce},
{ "ReduceMin", Reduce},
{ "ReduceProd", Reduce},
{ "ReduceSum", Reduce},
{ "ReduceLogSum", Reduce},
{ "ReduceLogSumExp", Reduce},
{ "ReduceSumSquare", Reduce},
{ "Broadcast", Broadcast},
{ "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
{ "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
{ "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
{ "Gather", Gather},
{ "GatherElements", GatherElements},
{ "GatherND", GatherND},
{ "OneHot", OneHot},
{ "RegionYolo", RegionYolo},
{ "Select", Select},
{ "ShuffleChannels", ShuffleChannels},
{ "DFT", DFT},
{ "IDFT", DFT},
{ "Abs", Math},
{ "Acos", Math},
{ "Acosh", Math},
{ "Asin", Math},
{ "Asinh", Math},
{ "Atan", Math},
{ "Atanh", Math},
{ "Ceil", Math},
{ "Ceiling", Math},
{ "Cos", Math},
{ "Cosh", Math},
{ "Floor", Math},
{ "HardSigmoid", Math},
{ "Log", Math},
{ "Neg", Math},
{ "Reciprocal", Math},
{ "Selu", Math},
{ "Sign", Math},
{ "Sin", Math},
{ "Sinh", Math},
{ "SoftPlus", Math},
{ "Softsign", Math},
{ "Tan", Math},
{ "CTCLoss", CTCLoss},
{ "Bucketize", Bucketize},
{ "CTCGreedyDecoder", CTCGreedyDecoder},
{ "CTCGreedyDecoderSeqLen", CTCGreedyDecoderSeqLen},
{ "CumSum", CumSum},
{ "DetectionOutput", DetectionOutput},
{ "ExperimentalDetectronDetectionOutput", ExperimentalDetectronDetectionOutput},
{ "LogSoftmax", LogSoftmax},
{ "TopK", TopK},
{ "GatherTree", GatherTree},
{ "GRN", GRN},
{ "Range", Range},
{ "Proposal", Proposal},
{ "ReorgYolo", ReorgYolo},
{ "ReverseSequence", ReverseSequence},
{ "ExperimentalDetectronTopKROIs", ExperimentalDetectronTopKROIs},
{ "ExperimentalDetectronROIFeatureExtractor", ExperimentalDetectronROIFeatureExtractor},
{ "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator},
{ "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage},
{ "ExtractImagePatches", ExtractImagePatches},
{ "NonMaxSuppression", NonMaxSuppression},
{ "NonMaxSuppressionIEInternal", NonMaxSuppression},
{ "MatrixNms", MatrixNms},
{ "MulticlassNms", MulticlassNms}
};
Type TypeFromName(const std::string & type) {
auto itType = type_to_name_tbl.find(type);
if (type_to_name_tbl.end() != itType) {
return itType->second;
}
return Unknown;
}
template<>
DnnlMemoryDescPtr MKLDNNNode::getInputMemDescAtPort<DnnlMemoryDesc, 0, 0>(size_t portNum) const {
return MemoryDescUtils::convertToDnnlMemoryDesc(getBaseMemDescAtInputPort(portNum));
}
template<>
BlockedMemoryDescPtr MKLDNNNode::getInputMemDescAtPort<BlockedMemoryDesc, 0, 0>(size_t portNum) const {
return MemoryDescUtils::convertToBlockedMemoryDesc(getBaseMemDescAtInputPort(portNum));
}
template<>
DnnlMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort<DnnlMemoryDesc, 0, 0>(size_t portNum) const {
return MemoryDescUtils::convertToDnnlMemoryDesc(getBaseMemDescAtOutputPort(portNum));
}
template<>
BlockedMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort<BlockedMemoryDesc, 0, 0>(size_t portNum) const {
return MemoryDescUtils::convertToBlockedMemoryDesc(getBaseMemDescAtOutputPort(portNum));
}
std::string NameFromType(Type type) {
switch (type) {
case Generic:
return "Generic";
case Reorder:
return "Reorder";
case Input:
return "Input";
case Output:
return "Output";
case Convolution:
return "Convolution";
case Deconvolution:
return "Deconvolution";
case Lrn:
return "Lrn";
case Pooling:
return "Pooling";
case AdaptivePooling:
return "AdaptivePooling";
case FullyConnected:
return "FullyConnected";
case MatMul:
return "MatMul";
case Softmax:
return "Softmax";
case Split:
return "Split";
case Concatenation:
return "Concatenation";
case StridedSlice:
return "StridedSlice";
case Reshape:
return "Reshape";
case Tile:
return "Tile";
case ROIAlign:
return "ROIAlign";
case ROIPooling:
return "ROIPooling";
case PSROIPooling:
return "PSROIPooling";
case DepthToSpace:
return "DepthToSpace";
case BatchToSpace:
return "BatchToSpace";
case Pad:
return "Pad";
case Transpose:
return "Transpose";
case SpaceToDepth:
return "SpaceToDepth";
case SpaceToBatch:
return "SpaceToBatch";
case MemoryOutput:
return "MemoryOutput";
case MemoryInput:
return "MemoryInput";
case RNNSeq:
return "RNNSeq";
case RNNCell:
return "RNNCell";
case Eltwise:
return "Eltwise";
case FakeQuantize:
return "FakeQuantize";
case BinaryConvolution:
return "BinaryConvolution";
case DeformableConvolution:
return "DeformableConvolution";
case MVN:
return "MVN";
case TensorIterator:
return "TensorIterator";
case Convert:
return "Convert";
case NormalizeL2:
return "NormalizeL2";
case ScatterUpdate:
return "ScatterUpdate";
case ScatterElementsUpdate:
return "ScatterElementsUpdate";
case ScatterNDUpdate:
return "ScatterNDUpdate";
case Interpolate:
return "Interpolate";
case Reduce:
return "Reduce";
case Broadcast:
return "Broadcast";
case EmbeddingSegmentsSum:
return "EmbeddingSegmentsSum";
case EmbeddingBagPackedSum:
return "EmbeddingBagPackedSum";
case EmbeddingBagOffsetsSum:
return "EmbeddingBagOffsetsSum";
case Gather:
return "Gather";
case GatherElements:
return "GatherElements";
case GatherND:
return "GatherND";
case OneHot:
return "OneHot";
case RegionYolo:
return "RegionYolo";
case Select:
return "Select";
case Roll:
return "Roll";
case ShuffleChannels:
return "ShuffleChannels";
case DFT:
return "DFT";
case Math:
return "Math";
case CTCLoss:
return "CTCLoss";
case Bucketize:
return "Bucketize";
case CTCGreedyDecoder:
return "CTCGreedyDecoder";
case CTCGreedyDecoderSeqLen:
return "CTCGreedyDecoderSeqLen";
case CumSum:
return "CumSum";
case DetectionOutput:
return "DetectionOutput";
case ExperimentalDetectronDetectionOutput:
return "ExperimentalDetectronDetectionOutput";
case LogSoftmax:
return "LogSoftmax";
case TopK:
return "TopK";
case GatherTree:
return "GatherTree";
case GRN:
return "GRN";
case Range:
return "Range";
case Proposal:
return "Proposal";
case ReorgYolo:
return "ReorgYolo";
case ReverseSequence:
return "ReverseSequence";
case ExperimentalDetectronTopKROIs:
return "ExperimentalDetectronTopKROIs";
case ExperimentalDetectronROIFeatureExtractor:
return "ExperimentalDetectronROIFeatureExtractor";
case ExperimentalDetectronPriorGridGenerator:
return "ExperimentalDetectronPriorGridGenerator";
case ExperimentalDetectronGenerateProposalsSingleImage:
return "ExperimentalDetectronGenerateProposalsSingleImage";
case ExtractImagePatches:
return "ExtractImagePatches";
case NonMaxSuppression:
return "NonMaxSuppression";
case MatrixNms:
return "MatrixNms";
case MulticlassNms:
return "MulticlassNms";
default:
return "Unknown";
}
}
} // namespace MKLDNNPlugin
MKLDNNNode::NodesFactory & MKLDNNNode::factory() { MKLDNNNode::NodesFactory & MKLDNNNode::factory() {
static NodesFactory factoryInstance; static NodesFactory factoryInstance;
@ -439,7 +74,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
: selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown), : selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
weightCache(w_cache), engine(eng), name(op->get_friendly_name()), typeStr(op->get_type_name()), weightCache(w_cache), engine(eng), name(op->get_friendly_name()), typeStr(op->get_type_name()),
type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) { type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) {
algorithm = Algorithm::Undefined; algorithm = Algorithm::Default;
fusingPort = -1; fusingPort = -1;
const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name(); const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name();

View File

@ -36,9 +36,6 @@ using MKLDNNNodePtr = std::shared_ptr<MKLDNNNode>;
using MKLDNNNodeConstPtr = std::shared_ptr<const MKLDNNNode>; using MKLDNNNodeConstPtr = std::shared_ptr<const MKLDNNNode>;
using MKLDNNNodeWeakPtr = std::weak_ptr<MKLDNNNode>; using MKLDNNNodeWeakPtr = std::weak_ptr<MKLDNNNode>;
Type TypeFromName(const std::string & type);
std::string NameFromType(Type type);
class PortConfigurator { class PortConfigurator {
public: public:
PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc, const Shape& shape, PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc, const Shape& shape,
@ -629,7 +626,7 @@ protected:
MKLDNNWeightsSharing::Ptr weightCache; MKLDNNWeightsSharing::Ptr weightCache;
Algorithm algorithm = Algorithm::Undefined; Algorithm algorithm = Algorithm::Default;
bool isInQuantizedGraph = false; bool isInQuantizedGraph = false;
@ -744,6 +741,10 @@ private:
void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd); void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd);
enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 }; enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes); ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);
#ifdef CPU_DEBUG_CAPS
friend class Verbose;
#endif
}; };
class MKLDNNNode::NodesFactory : public openvino::cc::Factory<Type, class MKLDNNNode::NodesFactory : public openvino::cc::Factory<Type,

View File

@ -5,20 +5,25 @@
#pragma once #pragma once
#include <chrono> #include <chrono>
#include <ratio>
namespace MKLDNNPlugin { namespace MKLDNNPlugin {
class PerfCount { class PerfCount {
uint64_t duration; uint64_t total_duration;
uint32_t num; uint32_t num;
std::chrono::high_resolution_clock::time_point __start = {}; std::chrono::high_resolution_clock::time_point __start = {};
std::chrono::high_resolution_clock::time_point __finish = {}; std::chrono::high_resolution_clock::time_point __finish = {};
public: public:
PerfCount(): duration(0), num(0) {} PerfCount(): total_duration(0), num(0) {}
uint64_t avg() { return (num == 0) ? 0 : duration / num; } std::chrono::duration<double, std::milli> duration() const {
return __finish - __start;
}
uint64_t avg() const { return (num == 0) ? 0 : total_duration / num; }
private: private:
void start_itr() { void start_itr() {
@ -27,8 +32,7 @@ private:
void finish_itr() { void finish_itr() {
__finish = std::chrono::high_resolution_clock::now(); __finish = std::chrono::high_resolution_clock::now();
total_duration += std::chrono::duration_cast<std::chrono::microseconds>(__finish - __start).count();
duration += std::chrono::duration_cast<std::chrono::microseconds>(__finish - __start).count();
num++; num++;
} }
@ -46,5 +50,5 @@ public:
} // namespace MKLDNNPlugin } // namespace MKLDNNPlugin
#define GET_PERF(_counter) std::unique_ptr<PerfHelper>(new PerfHelper(_counter->PerfCounter())) #define GET_PERF(_node) std::unique_ptr<PerfHelper>(new PerfHelper(_node->PerfCounter()))
#define PERF(_need, _counter) auto pc = _need ? GET_PERF(_counter) : nullptr; #define PERF(_node, _need) auto pc = _need ? GET_PERF(_node) : nullptr;

View File

@ -24,6 +24,7 @@ public:
readParam(blobDumpNodeType, "OV_CPU_BLOB_DUMP_NODE_TYPE"); readParam(blobDumpNodeType, "OV_CPU_BLOB_DUMP_NODE_TYPE");
readParam(blobDumpNodeName, "OV_CPU_BLOB_DUMP_NODE_NAME"); readParam(blobDumpNodeName, "OV_CPU_BLOB_DUMP_NODE_NAME");
readParam(execGraphPath, "OV_CPU_EXEC_GRAPH_PATH"); readParam(execGraphPath, "OV_CPU_EXEC_GRAPH_PATH");
readParam(verbose, "OV_CPU_VERBOSE");
} }
std::string blobDumpDir; std::string blobDumpDir;
@ -33,9 +34,10 @@ public:
std::string blobDumpNodeType; std::string blobDumpNodeType;
std::string blobDumpNodeName; std::string blobDumpNodeName;
std::string execGraphPath; std::string execGraphPath;
std::string verbose;
private: private:
void readParam(std::string& param, const char* envVar) { static void readParam(std::string& param, const char* envVar) {
if (const char* envValue = std::getenv(envVar)) if (const char* envValue = std::getenv(envVar))
param = envValue; param = envValue;
} }

View File

@ -0,0 +1,169 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#ifdef CPU_DEBUG_CAPS
#include "verbose.h"
#include "mkldnn_node.h"
#include "cpu_types.h"
#include "memory_desc/cpu_memory_desc_utils.h"
#include "dnnl_types.h"
#include "dnnl_debug.h"
#include <string>
#include <cstdlib>
#include <sstream>
#include <iostream>
namespace MKLDNNPlugin {
bool Verbose::shouldBePrinted() const {
if (lvl < 1)
return false;
if (node->isConstant() ||
node->getType() == Input || node->getType() == Output)
return false;
return true;
}
/**
* Print node verbose execution information to cout.
* Similiar to DNNL_VERBOSE output
* Formating written in C using oneDNN format functions.
* Can be rewritten in pure C++ if necessary
*/
void Verbose::printInfo() {
/* 1, 2, 3, etc -> no color
* 11, 22, 33, etc -> colorize */
bool colorUp = lvl / 10 > 0 ? true : false;
enum Color {
RED,
GREEN,
YELLOW,
BLUE,
PURPLE,
CYAN
};
auto colorize = [&](const Color color, const std::string& str) {
if (!colorUp)
return str;
const std::string red("\033[1;31m");
const std::string green("\033[1;32m");
const std::string yellow("\033[1;33m");
const std::string blue("\033[1;34m");
const std::string purple("\033[1;35m");
const std::string cyan("\033[1;36m");
const std::string reset("\033[0m");
std::string colorCode;
switch (color) {
case RED: colorCode = red;
break;
case GREEN: colorCode = green;
break;
case YELLOW: colorCode = yellow;
break;
case BLUE: colorCode = blue;
break;
case PURPLE: colorCode = purple;
break;
case CYAN: colorCode = cyan;
break;
default: colorCode = reset;
break;
}
return colorCode + str + reset;
};
// can be increased if necessary
const int CPU_VERBOSE_DAT_LEN = 512;
char portsInfo[CPU_VERBOSE_DAT_LEN] = {'\0'};
int written = 0;
int written_total = 0;
auto shift = [&](int size) {
if (written < 0 || written_total + size > CPU_VERBOSE_DAT_LEN) {
const char* errorMsg = "# NOT ENOUGHT BUFFER SIZE #";
snprintf(portsInfo, strlen(errorMsg) + 1, "%s", errorMsg);
written_total = strlen(errorMsg);
return;
}
written_total += size;
};
auto formatMemDesc = [&](const dnnl_memory_desc_t& desc, std::string& prefix) {
prefix = colorize(BLUE, prefix);
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, " ");
shift(written);
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, "%s", prefix.c_str());
shift(written);
written = dnnl_md2fmt_str(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, &desc);
shift(written);
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, ":");
shift(written);
written = dnnl_md2dim_str(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, &desc);
shift(written);
};
for (int i = 0; i < node->getParentEdges().size(); i++) {
std::string prefix("src:" + std::to_string(i) + ':');
formatMemDesc(MemoryDescUtils::convertToDnnlMemoryDesc(
node->getParentEdgeAt(i)->getMemory().getDesc().clone())->getDnnlDesc().data,
prefix);
}
for (int i = 0; i < node->getChildEdges().size(); i++) {
std::string prefix("dst:" + std::to_string(i) + ':');
formatMemDesc(MemoryDescUtils::convertToDnnlMemoryDesc(
node->getChildEdgeAt(i)->getMemory().getDesc().clone())->getDnnlDesc().data,
prefix);
}
std::string post_ops;
if (!node->getFusedWith().empty()) {
post_ops += "post_ops:'";
for (const auto& fusedNode : node->getFusedWith()) {
post_ops.append(colorize(GREEN, fusedNode->getName())).append(":")
.append(colorize(CYAN, NameFromType(fusedNode->getType()))).append(":")
.append(algToString(fusedNode->getAlgorithm()))
.append(";");
}
post_ops += "'";
}
std::string nodeImplementer = "cpu";
if (node->prim)
nodeImplementer = "dnnl"; // oneDNN
else if (node->getType() == Reference)
nodeImplementer = "ngraph_ref"; // ngraph reference
const std::string& nodeName = colorize(GREEN, node->getName());
const std::string& nodeType = colorize(CYAN, NameFromType(node->getType()));
const std::string& nodeAlg = algToString(node->getAlgorithm());
const std::string& nodePrimImplType = impl_type_to_string(node->getSelectedPrimitiveDescriptor()->getImplementationType());
stream << "ov_cpu_verbose" << ','
<< "exec" << ','
<< nodeImplementer << ','
<< nodeName << ":" << nodeType << ":" << nodeAlg << ','
<< nodePrimImplType << ','
<< portsInfo << ','
<< post_ops << ',';
}
void Verbose::printDuration() {
const auto& duration = node->PerfCounter().duration().count();
stream << duration << "ms";
}
void Verbose::flush() const {
std::cout << stream.rdbuf() << "\n";
}
} // namespace MKLDNNPlugin
#endif // CPU_DEBUG_CAPS

View File

@ -0,0 +1,46 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#ifdef CPU_DEBUG_CAPS
#include "mkldnn_node.h"
#include <string>
#include <cstdlib>
#include <sstream>
namespace MKLDNNPlugin {
class Verbose {
public:
Verbose(const MKLDNNNodePtr& _node, const std::string& _lvl)
: node(_node), lvl(atoi(_lvl.c_str())) {
if (!shouldBePrinted())
return;
printInfo();
}
virtual ~Verbose() {
if (!shouldBePrinted())
return;
printDuration();
flush();
}
private:
const MKLDNNNodePtr& node;
const int lvl;
std::stringstream stream;
bool shouldBePrinted() const;
void printInfo();
void printDuration();
void flush() const;
};
#define VERBOSE(...) Verbose(__VA_ARGS__)
} // namespace MKLDNNPlugin
#else
#define VERBOSE(...)
#endif // CPU_DEBUG_CAPS

View File

@ -12,6 +12,7 @@
#include <algorithm> #include <algorithm>
#include <cctype> #include <cctype>
#include <functional> #include <functional>
#include <iterator>
#include <map> #include <map>
#include <set> #include <set>
#include <unordered_map> #include <unordered_map>