[CPU] Add OV_CPU_VERBOSE env var to print node exec info to cout (#6390)

This commit is contained in:
Egor Duplensky 2021-09-26 22:17:57 +03:00 committed by GitHub
parent b968c7b813
commit c92988c8e9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 823 additions and 416 deletions

View File

@ -0,0 +1,459 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cpu_types.h"
#include <vector>
#include <string>
namespace MKLDNNPlugin {
using Dim = std::size_t;
using VectorDims = std::vector<Dim>;
const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
{ "Constant", Input },
{ "Parameter", Input },
{ "Result", Output },
{ "Convolution", Convolution },
{ "GroupConvolution", Convolution },
{ "MatMul", MatMul },
{ "FullyConnected", FullyConnected },
{ "MaxPool", Pooling },
{ "AvgPool", Pooling },
{ "AdaptiveMaxPool", AdaptivePooling},
{ "AdaptiveAvgPool", AdaptivePooling},
{ "Add", Eltwise },
{ "Subtract", Eltwise },
{ "Multiply", Eltwise },
{ "Divide", Eltwise },
{ "SquaredDifference", Eltwise },
{ "Maximum", Eltwise },
{ "Minimum", Eltwise },
{ "Mod", Eltwise },
{ "FloorMod", Eltwise },
{ "Power", Eltwise },
{ "PowerStatic", Eltwise },
{ "Equal", Eltwise },
{ "NotEqual", Eltwise },
{ "Greater", Eltwise },
{ "GreaterEqual", Eltwise },
{ "Less", Eltwise },
{ "LessEqual", Eltwise },
{ "LogicalAnd", Eltwise },
{ "LogicalOr", Eltwise },
{ "LogicalXor", Eltwise },
{ "LogicalNot", Eltwise },
{ "Relu", Eltwise },
{ "LeakyRelu", Eltwise },
{ "Gelu", Eltwise },
{ "Elu", Eltwise },
{ "Tanh", Eltwise },
{ "Sigmoid", Eltwise },
{ "Abs", Eltwise },
{ "Sqrt", Eltwise },
{ "Clamp", Eltwise },
{ "Exp", Eltwise },
{ "SwishCPU", Eltwise },
{ "HSwish", Eltwise },
{ "Mish", Eltwise },
{ "HSigmoid", Eltwise },
{ "Round", Eltwise },
{ "PRelu", Eltwise },
{ "Erf", Eltwise },
{ "SoftPlus", Eltwise },
{ "Reshape", Reshape },
{ "Squeeze", Reshape },
{ "Unsqueeze", Reshape },
{ "Softmax", Softmax },
{ "Reorder", Reorder },
{ "BatchToSpace", BatchToSpace },
{ "SpaceToBatch", SpaceToBatch },
{ "DepthToSpace", DepthToSpace },
{ "SpaceToDepth", SpaceToDepth },
{ "Roll", Roll },
{ "LRN", Lrn },
{ "Split", Split },
{ "VariadicSplit", Split },
{ "Concat", Concatenation },
{ "ConvolutionBackpropData", Deconvolution },
{ "GroupConvolutionBackpropData", Deconvolution },
{ "StridedSlice", StridedSlice },
{ "Tile", Tile },
{ "ROIAlign", ROIAlign },
{ "ROIPooling", ROIPooling },
{ "PSROIPooling", PSROIPooling },
{ "DeformablePSROIPooling", PSROIPooling },
{ "Pad", Pad },
{ "Transpose", Transpose },
{ "LSTMCell", RNNCell },
{ "GRUCell", RNNCell },
{ "RNNCell", RNNCell },
{ "LSTMSequence", RNNSeq },
{ "GRUSequence", RNNSeq },
{ "RNNSequence", RNNSeq },
{ "FakeQuantize", FakeQuantize },
{ "BinaryConvolution", BinaryConvolution },
{ "DeformableConvolution", DeformableConvolution },
{ "TensorIterator", TensorIterator },
{ "Loop", TensorIterator },
{ "ReadValue", MemoryInput}, // for construction from name ctor, arbitrary name is used
{ "Assign", MemoryOutput }, // for construction from layer ctor
{ "Convert", Convert },
{ "MVN", MVN},
{ "NormalizeL2", NormalizeL2},
{ "ScatterUpdate", ScatterUpdate},
{ "ScatterElementsUpdate", ScatterElementsUpdate},
{ "ScatterNDUpdate", ScatterNDUpdate},
{ "Interpolate", Interpolate},
{ "ReduceL1", Reduce},
{ "ReduceL2", Reduce},
{ "ReduceLogicalAnd", Reduce},
{ "ReduceLogicalOr", Reduce},
{ "ReduceMax", Reduce},
{ "ReduceMean", Reduce},
{ "ReduceMin", Reduce},
{ "ReduceProd", Reduce},
{ "ReduceSum", Reduce},
{ "ReduceLogSum", Reduce},
{ "ReduceLogSumExp", Reduce},
{ "ReduceSumSquare", Reduce},
{ "Broadcast", Broadcast},
{ "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
{ "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
{ "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
{ "Gather", Gather},
{ "GatherElements", GatherElements},
{ "GatherND", GatherND},
{ "OneHot", OneHot},
{ "RegionYolo", RegionYolo},
{ "Select", Select},
{ "ShuffleChannels", ShuffleChannels},
{ "DFT", DFT},
{ "IDFT", DFT},
{ "Abs", Math},
{ "Acos", Math},
{ "Acosh", Math},
{ "Asin", Math},
{ "Asinh", Math},
{ "Atan", Math},
{ "Atanh", Math},
{ "Ceil", Math},
{ "Ceiling", Math},
{ "Cos", Math},
{ "Cosh", Math},
{ "Floor", Math},
{ "HardSigmoid", Math},
{ "Log", Math},
{ "Neg", Math},
{ "Reciprocal", Math},
{ "Selu", Math},
{ "Sign", Math},
{ "Sin", Math},
{ "Sinh", Math},
{ "SoftPlus", Math},
{ "Softsign", Math},
{ "Tan", Math},
{ "CTCLoss", CTCLoss},
{ "Bucketize", Bucketize},
{ "CTCGreedyDecoder", CTCGreedyDecoder},
{ "CTCGreedyDecoderSeqLen", CTCGreedyDecoderSeqLen},
{ "CumSum", CumSum},
{ "DetectionOutput", DetectionOutput},
{ "ExperimentalDetectronDetectionOutput", ExperimentalDetectronDetectionOutput},
{ "LogSoftmax", LogSoftmax},
{ "TopK", TopK},
{ "GatherTree", GatherTree},
{ "GRN", GRN},
{ "Range", Range},
{ "Proposal", Proposal},
{ "ReorgYolo", ReorgYolo},
{ "ReverseSequence", ReverseSequence},
{ "ExperimentalDetectronTopKROIs", ExperimentalDetectronTopKROIs},
{ "ExperimentalDetectronROIFeatureExtractor", ExperimentalDetectronROIFeatureExtractor},
{ "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator},
{ "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage},
{ "ExtractImagePatches", ExtractImagePatches},
{ "NonMaxSuppression", NonMaxSuppression},
{ "NonMaxSuppressionIEInternal", NonMaxSuppression},
{ "MatrixNms", MatrixNms},
{ "MulticlassNms", MulticlassNms}
};
Type TypeFromName(const std::string& type) {
auto itType = type_to_name_tbl.find(type);
if (type_to_name_tbl.end() != itType) {
return itType->second;
} else {
return Unknown;
}
}
std::string NameFromType(const Type type) {
switch (type) {
case Generic:
return "Generic";
case Reorder:
return "Reorder";
case Input:
return "Input";
case Output:
return "Output";
case Convolution:
return "Convolution";
case Deconvolution:
return "Deconvolution";
case Lrn:
return "Lrn";
case Pooling:
return "Pooling";
case AdaptivePooling:
return "AdaptivePooling";
case FullyConnected:
return "FullyConnected";
case MatMul:
return "MatMul";
case Softmax:
return "Softmax";
case Split:
return "Split";
case Concatenation:
return "Concatenation";
case StridedSlice:
return "StridedSlice";
case Reshape:
return "Reshape";
case Tile:
return "Tile";
case ROIAlign:
return "ROIAlign";
case ROIPooling:
return "ROIPooling";
case PSROIPooling:
return "PSROIPooling";
case DepthToSpace:
return "DepthToSpace";
case BatchToSpace:
return "BatchToSpace";
case Pad:
return "Pad";
case Transpose:
return "Transpose";
case SpaceToDepth:
return "SpaceToDepth";
case SpaceToBatch:
return "SpaceToBatch";
case MemoryOutput:
return "MemoryOutput";
case MemoryInput:
return "MemoryInput";
case RNNSeq:
return "RNNSeq";
case RNNCell:
return "RNNCell";
case Eltwise:
return "Eltwise";
case FakeQuantize:
return "FakeQuantize";
case BinaryConvolution:
return "BinaryConvolution";
case DeformableConvolution:
return "DeformableConvolution";
case MVN:
return "MVN";
case TensorIterator:
return "TensorIterator";
case Convert:
return "Convert";
case NormalizeL2:
return "NormalizeL2";
case ScatterUpdate:
return "ScatterUpdate";
case ScatterElementsUpdate:
return "ScatterElementsUpdate";
case ScatterNDUpdate:
return "ScatterNDUpdate";
case Interpolate:
return "Interpolate";
case Reduce:
return "Reduce";
case Broadcast:
return "Broadcast";
case EmbeddingSegmentsSum:
return "EmbeddingSegmentsSum";
case EmbeddingBagPackedSum:
return "EmbeddingBagPackedSum";
case EmbeddingBagOffsetsSum:
return "EmbeddingBagOffsetsSum";
case Gather:
return "Gather";
case GatherElements:
return "GatherElements";
case GatherND:
return "GatherND";
case OneHot:
return "OneHot";
case RegionYolo:
return "RegionYolo";
case Select:
return "Select";
case Roll:
return "Roll";
case ShuffleChannels:
return "ShuffleChannels";
case DFT:
return "DFT";
case Math:
return "Math";
case CTCLoss:
return "CTCLoss";
case Bucketize:
return "Bucketize";
case CTCGreedyDecoder:
return "CTCGreedyDecoder";
case CTCGreedyDecoderSeqLen:
return "CTCGreedyDecoderSeqLen";
case CumSum:
return "CumSum";
case DetectionOutput:
return "DetectionOutput";
case ExperimentalDetectronDetectionOutput:
return "ExperimentalDetectronDetectionOutput";
case LogSoftmax:
return "LogSoftmax";
case TopK:
return "TopK";
case GatherTree:
return "GatherTree";
case GRN:
return "GRN";
case Range:
return "Range";
case Proposal:
return "Proposal";
case ReorgYolo:
return "ReorgYolo";
case ReverseSequence:
return "ReverseSequence";
case ExperimentalDetectronTopKROIs:
return "ExperimentalDetectronTopKROIs";
case ExperimentalDetectronROIFeatureExtractor:
return "ExperimentalDetectronROIFeatureExtractor";
case ExperimentalDetectronPriorGridGenerator:
return "ExperimentalDetectronPriorGridGenerator";
case ExperimentalDetectronGenerateProposalsSingleImage:
return "ExperimentalDetectronGenerateProposalsSingleImage";
case ExtractImagePatches:
return "ExtractImagePatches";
case NonMaxSuppression:
return "NonMaxSuppression";
case MatrixNms:
return "MatrixNms";
case MulticlassNms:
return "MulticlassNms";
default:
return "Unknown";
}
}
std::string algToString(const Algorithm alg) {
#define CASE(_alg) do { \
if (alg == _alg) return #_alg; \
} while (0)
CASE(Default);
CASE(PoolingMax);
CASE(PoolingAvg);
CASE(ConvolutionCommon);
CASE(ConvolutionGrouped);
CASE(DeconvolutionCommon);
CASE(DeconvolutionGrouped);
CASE(EltwiseAdd);
CASE(EltwiseMultiply);
CASE(EltwiseSubtract);
CASE(EltwiseDivide);
CASE(EltwiseFloorMod);
CASE(EltwiseMod);
CASE(EltwiseMaximum);
CASE(EltwiseMinimum);
CASE(EltwiseSquaredDifference);
CASE(EltwisePowerDynamic);
CASE(EltwisePowerStatic);
CASE(EltwiseMulAdd);
CASE(EltwiseEqual);
CASE(EltwiseNotEqual);
CASE(EltwiseGreater);
CASE(EltwiseGreaterEqual);
CASE(EltwiseLess);
CASE(EltwiseLessEqual);
CASE(EltwiseLogicalAnd);
CASE(EltwiseLogicalOr);
CASE(EltwiseLogicalXor);
CASE(EltwiseLogicalNot);
CASE(EltwiseRelu);
CASE(EltwiseGelu);
CASE(EltwiseElu);
CASE(EltwiseTanh);
CASE(EltwiseSigmoid);
CASE(EltwiseAbs);
CASE(EltwiseSqrt);
CASE(EltwiseSoftRelu);
CASE(EltwiseExp);
CASE(EltwiseClamp);
CASE(EltwiseSwish);
CASE(EltwisePrelu);
CASE(EltwiseMish);
CASE(EltwiseHswish);
CASE(EltwiseHsigmoid);
CASE(EltwiseRoundHalfToEven);
CASE(EltwiseRoundHalfAwayFromZero);
CASE(EltwiseErf);
CASE(FQCommon);
CASE(FQQuantization);
CASE(FQBinarization);
CASE(ROIPoolingMax);
CASE(ROIPoolingBilinear);
CASE(ROIAlignMax);
CASE(ROIAlignAvg);
CASE(PSROIPoolingAverage);
CASE(PSROIPoolingBilinear);
CASE(PSROIPoolingBilinearDeformable);
CASE(ReduceL1);
CASE(ReduceL2);
CASE(ReduceAnd);
CASE(ReduceOr);
CASE(ReduceMax);
CASE(ReduceMean);
CASE(ReduceMin);
CASE(ReduceProd);
CASE(ReduceSum);
CASE(ReduceLogSum);
CASE(ReduceLogSumExp);
CASE(ReduceSumSquare);
CASE(MathAbs);
CASE(MathAcos);
CASE(MathAcosh);
CASE(MathAsin);
CASE(MathAsinh);
CASE(MathAtan);
CASE(MathAtanh);
CASE(MathCeiling);
CASE(MathCos);
CASE(MathCosh);
CASE(MathErf);
CASE(MathFloor);
CASE(MathHardSigmoid);
CASE(MathLog);
CASE(MathNegative);
CASE(MathReciprocal);
CASE(MathSelu);
CASE(MathSign);
CASE(MathSin);
CASE(MathSinh);
CASE(MathSoftPlus);
CASE(MathSoftsign);
CASE(MathTan);
#undef CASE
return "Undefined";
}
} // namespace MKLDNNPlugin

View File

@ -4,7 +4,10 @@
#pragma once
#include "caseless.hpp"
#include <vector>
#include <string>
namespace MKLDNNPlugin {
@ -97,7 +100,7 @@ enum Type {
};
enum Algorithm {
Undefined,
Default,
// Pooling algorithms
PoolingMax,
@ -215,4 +218,11 @@ enum Algorithm {
MathTan
};
extern const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl;
Type TypeFromName(const std::string& type);
std::string NameFromType(const Type type);
std::string algToString(const Algorithm alg);
} // namespace MKLDNNPlugin

View File

@ -0,0 +1,8 @@
# Debug capabilities
Use the following cmake option to enable debug capabilities:
`-DENABLE_DEBUG_CAPS=ON`
* [Verbose mode](verbose.md)
* [Blob dumping](blob_dumping.md)
* [Graph serialization](graph_serialization.md)

View File

@ -1,9 +1,4 @@
# Debug capabilities
Use the following cmake option to enable debug capabilities:
`-DENABLE_CPU_DEBUG_CAPS=ON`
## Blob dumping
# Blob dumping
Blob dumping is controlled by environment variables (filters).
The variables define conditions of the node which input and output blobs
@ -24,12 +19,12 @@ or for shell session (bash example):
export OV_CPU_BLOB_DUMP_NODE_PORTS=OUT
binary ...
```
### Specify dump directory
## Specify dump directory
```sh
OV_CPU_BLOB_DUMP_DIR=<directory-name> binary ...
```
Default is *mkldnn_dump*
### Specify dump format
## Specify dump format
```sh
OV_CPU_BLOB_DUMP_FORMAT=<format> binary ...
```
@ -37,7 +32,7 @@ Options are:
* BIN (default)
* TEXT
### Filter input / output blobs
## Filter input / output blobs
To dump only input / output blobs:
```sh
OV_CPU_BLOB_DUMP_NODE_PORTS='<ports_kind>' binary ...
@ -51,7 +46,7 @@ Options are:
* OUT
* ALL
### Filter by execution ID
## Filter by execution ID
To dump blobs only for nodes with specified execution IDs:
```sh
OV_CPU_BLOB_DUMP_NODE_EXEC_ID='<space_separated_list_of_ids>' binary ...
@ -61,7 +56,7 @@ Example:
OV_CPU_BLOB_DUMP_NODE_EXEC_ID='1 12 45' binary ...
```
### Filter by type
## Filter by type
To dump blobs only for nodes with specified types:
```sh
OV_CPU_BLOB_DUMP_NODE_TYPE=<space_separated_list_of_types> binary ...
@ -73,7 +68,7 @@ Example:
> **NOTE**: see **enum Type** in [mkldnn_node.h](../mkldnn_node.h) for list of the types
### Filter by name
## Filter by name
To dump blobs only for nodes with name matching specified regex:
```sh
OV_CPU_BLOB_DUMP_NODE_NAME=<regex> binary ...
@ -83,7 +78,7 @@ Example:
OV_CPU_BLOB_DUMP_NODE_NAME=".+Fused_Add.+" binary ...
```
### Dump all the blobs
## Dump all the blobs
```sh
OV_CPU_BLOB_DUMP_NODE_NAME="*" binary ...
```
@ -95,22 +90,3 @@ Example:
```sh
OV_CPU_BLOB_DUMP_NODE_PORTS=ALL binary ...
```
## Graph serialization
The functionality allows to serialize execution graph using environment variable:
```sh
OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
```
Possible serialization options:
* cout
Serialize to console output
* \<path\>.xml
Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
* \<path\>.dot
TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.

View File

@ -0,0 +1,17 @@
# Graph serialization
The functionality allows to serialize execution graph using environment variable:
```sh
OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
```
Possible serialization options:
* cout
Serialize to console output
* \<path\>.xml
Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
* \<path\>.dot
TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.

View File

@ -0,0 +1,38 @@
# Verbose mode
It is possible to enable tracing execution of plugin nodes to cout and collect statistics, such as:
- node implementer:
* cpu (CPU plugin)
* dnnl (oneDNN library)
* ngraph_ref (ngraph reference fallback)
- node name
- node type
- node algorithm
- node primitive info
- input / output ports info
- fused nodes
- execution time
- etc
Format:
```sh
ov_cpu_verbose,exec,<node_implemeter>,\
<node_name>:<node_type>:<node_alg>,<impl_type>,\
src:<port_id>:<precision>::<type>:<format>:f0:<shape> ...,\
dst:<port_id>:<precision>::<type>:<format>:f0:<shape> ...,\
post_ops:'<node_name>:<node_type>:<node_alg>;...;',\
<execution_time>
```
To turn on verbose mode the following environment variable should be used:
```sh
OV_CPU_VERBOSE=<level> binary ...
```
Currently verbose mode has only one level, any digit can be used for activation.
To have colored verbose output just duplicate level's digit, for example:
```sh
OV_CPU_VERBOSE=11 binary ...
```
**NOTE:** Shell color codes are used

View File

@ -4,9 +4,11 @@
#pragma once
#include "mkldnn/ie_mkldnn.h"
#include "cpu_types.h"
#include <ie_layouts.h>
#include <ie_blob.h>
#include "mkldnn/ie_mkldnn.h"
namespace MKLDNNPlugin {

View File

@ -50,3 +50,38 @@ impl_desc_type MKLDNNPlugin::parse_impl_name(std::string impl_desc_name) {
return res;
}
const char* MKLDNNPlugin::impl_type_to_string(impl_desc_type type) {
#define CASE(_type) do { \
if (type == _type) return #_type; \
} while (0)
CASE(unknown);
CASE(undef);
CASE(ref_any);
CASE(reorder);
CASE(gemm_any);
CASE(gemm_blas);
CASE(gemm_avx512);
CASE(gemm_avx2);
CASE(gemm_avx);
CASE(gemm_sse42);
CASE(jit_gemm);
CASE(jit_avx512_winograd);
CASE(jit_avx512);
CASE(jit_avx2);
CASE(jit_avx);
CASE(jit_sse42);
CASE(jit_uni);
CASE(jit_avx512_1x1);
CASE(jit_avx2_1x1);
CASE(jit_avx_1x1);
CASE(jit_sse42_1x1);
CASE(jit_uni_1x1);
CASE(jit_avx512_dw);
CASE(jit_avx2_dw);
CASE(jit_avx_dw);
CASE(jit_sse42_dw);
CASE(jit_uni_dw);
#undef CASE
return "unknown";
}

View File

@ -63,6 +63,7 @@ enum impl_desc_type {
jit_uni_dw = jit | uni | _dw,
};
const char * impl_type_to_string(impl_desc_type type);
impl_desc_type parse_impl_name(std::string impl_desc_name);
} // namespace MKLDNNPlugin

View File

@ -39,6 +39,7 @@
#include "utils/node_dumper.h"
#include "utils/ngraph_utils.hpp"
#include "utils/cpu_utils.hpp"
#include "utils/verbose.h"
#include "memory_desc/cpu_memory_desc_utils.h"
#include <ngraph/node.hpp>
@ -828,7 +829,9 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
mkldnn::stream stream(eng);
for (const auto& node : executableGraphNodes) {
PERF(config.collectPerfCounters, node);
VERBOSE(node, config.debugCaps.verbose);
PERF(node, config.collectPerfCounters);
if (request)
request->ThrowIfCanceled();

View File

@ -3,6 +3,7 @@
//
#include "mkldnn_node.h"
#include "dnnl_debug.h"
#include "mkldnn_extension_mngr.h"
#include "mkldnn_itt.h"
@ -43,14 +44,14 @@
#include <nodes/mkldnn_shuffle_channels_node.h>
#include <nodes/mkldnn_reference_node.h>
#include <nodes/mkldnn_fake_quantize_node.h>
#include <mkldnn_types.h>
#include <dnnl_types.h>
#include "mkldnn_extension_utils.h"
#include "mkldnn/iml_type_mapper.h"
#include "nodes/common/cpu_memcpy.h"
#include "mkldnn_debug.h"
#include "utils/rt_info/memory_formats_attribute.hpp"
#include <dnnl_types.h>
#include <ie_ngraph_utils.hpp>
#include "utils/general_utils.h"
#include "utils/cpu_utils.hpp"
@ -63,372 +64,6 @@ using namespace MKLDNNPlugin;
using namespace openvino;
using namespace InferenceEngine::details;
namespace MKLDNNPlugin {
static const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
{ "Constant", Input },
{ "Parameter", Input },
{ "Result", Output },
{ "Convolution", Convolution },
{ "GroupConvolution", Convolution },
{ "MatMul", MatMul },
{ "FullyConnected", FullyConnected },
{ "MaxPool", Pooling },
{ "AvgPool", Pooling },
{ "AdaptiveMaxPool", AdaptivePooling},
{ "AdaptiveAvgPool", AdaptivePooling},
{ "Add", Eltwise },
{ "Subtract", Eltwise },
{ "Multiply", Eltwise },
{ "Divide", Eltwise },
{ "SquaredDifference", Eltwise },
{ "Maximum", Eltwise },
{ "Minimum", Eltwise },
{ "Mod", Eltwise },
{ "FloorMod", Eltwise },
{ "Power", Eltwise },
{ "PowerStatic", Eltwise },
{ "Equal", Eltwise },
{ "NotEqual", Eltwise },
{ "Greater", Eltwise },
{ "GreaterEqual", Eltwise },
{ "Less", Eltwise },
{ "LessEqual", Eltwise },
{ "LogicalAnd", Eltwise },
{ "LogicalOr", Eltwise },
{ "LogicalXor", Eltwise },
{ "LogicalNot", Eltwise },
{ "Relu", Eltwise },
{ "LeakyRelu", Eltwise },
{ "Gelu", Eltwise },
{ "Elu", Eltwise },
{ "Tanh", Eltwise },
{ "Sigmoid", Eltwise },
{ "Abs", Eltwise },
{ "Sqrt", Eltwise },
{ "Clamp", Eltwise },
{ "Exp", Eltwise },
{ "SwishCPU", Eltwise },
{ "HSwish", Eltwise },
{ "Mish", Eltwise },
{ "HSigmoid", Eltwise },
{ "Round", Eltwise },
{ "PRelu", Eltwise },
{ "Erf", Eltwise },
{ "SoftPlus", Eltwise },
{ "Reshape", Reshape },
{ "Squeeze", Reshape },
{ "Unsqueeze", Reshape },
{ "Softmax", Softmax },
{ "Reorder", Reorder },
{ "BatchToSpace", BatchToSpace },
{ "SpaceToBatch", SpaceToBatch },
{ "DepthToSpace", DepthToSpace },
{ "SpaceToDepth", SpaceToDepth },
{ "Roll", Roll },
{ "LRN", Lrn },
{ "Split", Split },
{ "VariadicSplit", Split },
{ "Concat", Concatenation },
{ "ConvolutionBackpropData", Deconvolution },
{ "GroupConvolutionBackpropData", Deconvolution },
{ "StridedSlice", StridedSlice },
{ "Tile", Tile },
{ "ROIAlign", ROIAlign },
{ "ROIPooling", ROIPooling },
{ "PSROIPooling", PSROIPooling },
{ "DeformablePSROIPooling", PSROIPooling },
{ "Pad", Pad },
{ "Transpose", Transpose },
{ "LSTMCell", RNNCell },
{ "GRUCell", RNNCell },
{ "RNNCell", RNNCell },
{ "LSTMSequence", RNNSeq },
{ "GRUSequence", RNNSeq },
{ "RNNSequence", RNNSeq },
{ "FakeQuantize", FakeQuantize },
{ "BinaryConvolution", BinaryConvolution },
{ "DeformableConvolution", DeformableConvolution },
{ "TensorIterator", TensorIterator },
{ "Loop", TensorIterator },
{ "ReadValue", MemoryInput}, // for construction from name ctor, arbitrary name is used
{ "Assign", MemoryOutput }, // for construction from layer ctor
{ "Convert", Convert },
{ "MVN", MVN},
{ "NormalizeL2", NormalizeL2},
{ "ScatterUpdate", ScatterUpdate},
{ "ScatterElementsUpdate", ScatterElementsUpdate},
{ "ScatterNDUpdate", ScatterNDUpdate},
{ "Interpolate", Interpolate},
{ "ReduceL1", Reduce},
{ "ReduceL2", Reduce},
{ "ReduceLogicalAnd", Reduce},
{ "ReduceLogicalOr", Reduce},
{ "ReduceMax", Reduce},
{ "ReduceMean", Reduce},
{ "ReduceMin", Reduce},
{ "ReduceProd", Reduce},
{ "ReduceSum", Reduce},
{ "ReduceLogSum", Reduce},
{ "ReduceLogSumExp", Reduce},
{ "ReduceSumSquare", Reduce},
{ "Broadcast", Broadcast},
{ "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
{ "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
{ "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
{ "Gather", Gather},
{ "GatherElements", GatherElements},
{ "GatherND", GatherND},
{ "OneHot", OneHot},
{ "RegionYolo", RegionYolo},
{ "Select", Select},
{ "ShuffleChannels", ShuffleChannels},
{ "DFT", DFT},
{ "IDFT", DFT},
{ "Abs", Math},
{ "Acos", Math},
{ "Acosh", Math},
{ "Asin", Math},
{ "Asinh", Math},
{ "Atan", Math},
{ "Atanh", Math},
{ "Ceil", Math},
{ "Ceiling", Math},
{ "Cos", Math},
{ "Cosh", Math},
{ "Floor", Math},
{ "HardSigmoid", Math},
{ "Log", Math},
{ "Neg", Math},
{ "Reciprocal", Math},
{ "Selu", Math},
{ "Sign", Math},
{ "Sin", Math},
{ "Sinh", Math},
{ "SoftPlus", Math},
{ "Softsign", Math},
{ "Tan", Math},
{ "CTCLoss", CTCLoss},
{ "Bucketize", Bucketize},
{ "CTCGreedyDecoder", CTCGreedyDecoder},
{ "CTCGreedyDecoderSeqLen", CTCGreedyDecoderSeqLen},
{ "CumSum", CumSum},
{ "DetectionOutput", DetectionOutput},
{ "ExperimentalDetectronDetectionOutput", ExperimentalDetectronDetectionOutput},
{ "LogSoftmax", LogSoftmax},
{ "TopK", TopK},
{ "GatherTree", GatherTree},
{ "GRN", GRN},
{ "Range", Range},
{ "Proposal", Proposal},
{ "ReorgYolo", ReorgYolo},
{ "ReverseSequence", ReverseSequence},
{ "ExperimentalDetectronTopKROIs", ExperimentalDetectronTopKROIs},
{ "ExperimentalDetectronROIFeatureExtractor", ExperimentalDetectronROIFeatureExtractor},
{ "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator},
{ "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage},
{ "ExtractImagePatches", ExtractImagePatches},
{ "NonMaxSuppression", NonMaxSuppression},
{ "NonMaxSuppressionIEInternal", NonMaxSuppression},
{ "MatrixNms", MatrixNms},
{ "MulticlassNms", MulticlassNms}
};
Type TypeFromName(const std::string & type) {
auto itType = type_to_name_tbl.find(type);
if (type_to_name_tbl.end() != itType) {
return itType->second;
}
return Unknown;
}
template<>
DnnlMemoryDescPtr MKLDNNNode::getInputMemDescAtPort<DnnlMemoryDesc, 0, 0>(size_t portNum) const {
return MemoryDescUtils::convertToDnnlMemoryDesc(getBaseMemDescAtInputPort(portNum));
}
template<>
BlockedMemoryDescPtr MKLDNNNode::getInputMemDescAtPort<BlockedMemoryDesc, 0, 0>(size_t portNum) const {
return MemoryDescUtils::convertToBlockedMemoryDesc(getBaseMemDescAtInputPort(portNum));
}
template<>
DnnlMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort<DnnlMemoryDesc, 0, 0>(size_t portNum) const {
return MemoryDescUtils::convertToDnnlMemoryDesc(getBaseMemDescAtOutputPort(portNum));
}
template<>
BlockedMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort<BlockedMemoryDesc, 0, 0>(size_t portNum) const {
return MemoryDescUtils::convertToBlockedMemoryDesc(getBaseMemDescAtOutputPort(portNum));
}
std::string NameFromType(Type type) {
switch (type) {
case Generic:
return "Generic";
case Reorder:
return "Reorder";
case Input:
return "Input";
case Output:
return "Output";
case Convolution:
return "Convolution";
case Deconvolution:
return "Deconvolution";
case Lrn:
return "Lrn";
case Pooling:
return "Pooling";
case AdaptivePooling:
return "AdaptivePooling";
case FullyConnected:
return "FullyConnected";
case MatMul:
return "MatMul";
case Softmax:
return "Softmax";
case Split:
return "Split";
case Concatenation:
return "Concatenation";
case StridedSlice:
return "StridedSlice";
case Reshape:
return "Reshape";
case Tile:
return "Tile";
case ROIAlign:
return "ROIAlign";
case ROIPooling:
return "ROIPooling";
case PSROIPooling:
return "PSROIPooling";
case DepthToSpace:
return "DepthToSpace";
case BatchToSpace:
return "BatchToSpace";
case Pad:
return "Pad";
case Transpose:
return "Transpose";
case SpaceToDepth:
return "SpaceToDepth";
case SpaceToBatch:
return "SpaceToBatch";
case MemoryOutput:
return "MemoryOutput";
case MemoryInput:
return "MemoryInput";
case RNNSeq:
return "RNNSeq";
case RNNCell:
return "RNNCell";
case Eltwise:
return "Eltwise";
case FakeQuantize:
return "FakeQuantize";
case BinaryConvolution:
return "BinaryConvolution";
case DeformableConvolution:
return "DeformableConvolution";
case MVN:
return "MVN";
case TensorIterator:
return "TensorIterator";
case Convert:
return "Convert";
case NormalizeL2:
return "NormalizeL2";
case ScatterUpdate:
return "ScatterUpdate";
case ScatterElementsUpdate:
return "ScatterElementsUpdate";
case ScatterNDUpdate:
return "ScatterNDUpdate";
case Interpolate:
return "Interpolate";
case Reduce:
return "Reduce";
case Broadcast:
return "Broadcast";
case EmbeddingSegmentsSum:
return "EmbeddingSegmentsSum";
case EmbeddingBagPackedSum:
return "EmbeddingBagPackedSum";
case EmbeddingBagOffsetsSum:
return "EmbeddingBagOffsetsSum";
case Gather:
return "Gather";
case GatherElements:
return "GatherElements";
case GatherND:
return "GatherND";
case OneHot:
return "OneHot";
case RegionYolo:
return "RegionYolo";
case Select:
return "Select";
case Roll:
return "Roll";
case ShuffleChannels:
return "ShuffleChannels";
case DFT:
return "DFT";
case Math:
return "Math";
case CTCLoss:
return "CTCLoss";
case Bucketize:
return "Bucketize";
case CTCGreedyDecoder:
return "CTCGreedyDecoder";
case CTCGreedyDecoderSeqLen:
return "CTCGreedyDecoderSeqLen";
case CumSum:
return "CumSum";
case DetectionOutput:
return "DetectionOutput";
case ExperimentalDetectronDetectionOutput:
return "ExperimentalDetectronDetectionOutput";
case LogSoftmax:
return "LogSoftmax";
case TopK:
return "TopK";
case GatherTree:
return "GatherTree";
case GRN:
return "GRN";
case Range:
return "Range";
case Proposal:
return "Proposal";
case ReorgYolo:
return "ReorgYolo";
case ReverseSequence:
return "ReverseSequence";
case ExperimentalDetectronTopKROIs:
return "ExperimentalDetectronTopKROIs";
case ExperimentalDetectronROIFeatureExtractor:
return "ExperimentalDetectronROIFeatureExtractor";
case ExperimentalDetectronPriorGridGenerator:
return "ExperimentalDetectronPriorGridGenerator";
case ExperimentalDetectronGenerateProposalsSingleImage:
return "ExperimentalDetectronGenerateProposalsSingleImage";
case ExtractImagePatches:
return "ExtractImagePatches";
case NonMaxSuppression:
return "NonMaxSuppression";
case MatrixNms:
return "MatrixNms";
case MulticlassNms:
return "MulticlassNms";
default:
return "Unknown";
}
}
} // namespace MKLDNNPlugin
MKLDNNNode::NodesFactory & MKLDNNNode::factory() {
static NodesFactory factoryInstance;
@ -439,7 +74,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
: selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
weightCache(w_cache), engine(eng), name(op->get_friendly_name()), typeStr(op->get_type_name()),
type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) {
algorithm = Algorithm::Undefined;
algorithm = Algorithm::Default;
fusingPort = -1;
const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name();

View File

@ -36,9 +36,6 @@ using MKLDNNNodePtr = std::shared_ptr<MKLDNNNode>;
using MKLDNNNodeConstPtr = std::shared_ptr<const MKLDNNNode>;
using MKLDNNNodeWeakPtr = std::weak_ptr<MKLDNNNode>;
Type TypeFromName(const std::string & type);
std::string NameFromType(Type type);
class PortConfigurator {
public:
PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc, const Shape& shape,
@ -629,7 +626,7 @@ protected:
MKLDNNWeightsSharing::Ptr weightCache;
Algorithm algorithm = Algorithm::Undefined;
Algorithm algorithm = Algorithm::Default;
bool isInQuantizedGraph = false;
@ -744,6 +741,10 @@ private:
void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd);
enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);
#ifdef CPU_DEBUG_CAPS
friend class Verbose;
#endif
};
class MKLDNNNode::NodesFactory : public openvino::cc::Factory<Type,

View File

@ -5,20 +5,25 @@
#pragma once
#include <chrono>
#include <ratio>
namespace MKLDNNPlugin {
class PerfCount {
uint64_t duration;
uint64_t total_duration;
uint32_t num;
std::chrono::high_resolution_clock::time_point __start = {};
std::chrono::high_resolution_clock::time_point __finish = {};
public:
PerfCount(): duration(0), num(0) {}
PerfCount(): total_duration(0), num(0) {}
uint64_t avg() { return (num == 0) ? 0 : duration / num; }
std::chrono::duration<double, std::milli> duration() const {
return __finish - __start;
}
uint64_t avg() const { return (num == 0) ? 0 : total_duration / num; }
private:
void start_itr() {
@ -27,8 +32,7 @@ private:
void finish_itr() {
__finish = std::chrono::high_resolution_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(__finish - __start).count();
total_duration += std::chrono::duration_cast<std::chrono::microseconds>(__finish - __start).count();
num++;
}
@ -46,5 +50,5 @@ public:
} // namespace MKLDNNPlugin
#define GET_PERF(_counter) std::unique_ptr<PerfHelper>(new PerfHelper(_counter->PerfCounter()))
#define PERF(_need, _counter) auto pc = _need ? GET_PERF(_counter) : nullptr;
#define GET_PERF(_node) std::unique_ptr<PerfHelper>(new PerfHelper(_node->PerfCounter()))
#define PERF(_node, _need) auto pc = _need ? GET_PERF(_node) : nullptr;

View File

@ -24,6 +24,7 @@ public:
readParam(blobDumpNodeType, "OV_CPU_BLOB_DUMP_NODE_TYPE");
readParam(blobDumpNodeName, "OV_CPU_BLOB_DUMP_NODE_NAME");
readParam(execGraphPath, "OV_CPU_EXEC_GRAPH_PATH");
readParam(verbose, "OV_CPU_VERBOSE");
}
std::string blobDumpDir;
@ -33,9 +34,10 @@ public:
std::string blobDumpNodeType;
std::string blobDumpNodeName;
std::string execGraphPath;
std::string verbose;
private:
void readParam(std::string& param, const char* envVar) {
static void readParam(std::string& param, const char* envVar) {
if (const char* envValue = std::getenv(envVar))
param = envValue;
}

View File

@ -0,0 +1,169 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#ifdef CPU_DEBUG_CAPS
#include "verbose.h"
#include "mkldnn_node.h"
#include "cpu_types.h"
#include "memory_desc/cpu_memory_desc_utils.h"
#include "dnnl_types.h"
#include "dnnl_debug.h"
#include <string>
#include <cstdlib>
#include <sstream>
#include <iostream>
namespace MKLDNNPlugin {
bool Verbose::shouldBePrinted() const {
if (lvl < 1)
return false;
if (node->isConstant() ||
node->getType() == Input || node->getType() == Output)
return false;
return true;
}
/**
* Print node verbose execution information to cout.
* Similiar to DNNL_VERBOSE output
* Formating written in C using oneDNN format functions.
* Can be rewritten in pure C++ if necessary
*/
void Verbose::printInfo() {
/* 1, 2, 3, etc -> no color
* 11, 22, 33, etc -> colorize */
bool colorUp = lvl / 10 > 0 ? true : false;
enum Color {
RED,
GREEN,
YELLOW,
BLUE,
PURPLE,
CYAN
};
auto colorize = [&](const Color color, const std::string& str) {
if (!colorUp)
return str;
const std::string red("\033[1;31m");
const std::string green("\033[1;32m");
const std::string yellow("\033[1;33m");
const std::string blue("\033[1;34m");
const std::string purple("\033[1;35m");
const std::string cyan("\033[1;36m");
const std::string reset("\033[0m");
std::string colorCode;
switch (color) {
case RED: colorCode = red;
break;
case GREEN: colorCode = green;
break;
case YELLOW: colorCode = yellow;
break;
case BLUE: colorCode = blue;
break;
case PURPLE: colorCode = purple;
break;
case CYAN: colorCode = cyan;
break;
default: colorCode = reset;
break;
}
return colorCode + str + reset;
};
// can be increased if necessary
const int CPU_VERBOSE_DAT_LEN = 512;
char portsInfo[CPU_VERBOSE_DAT_LEN] = {'\0'};
int written = 0;
int written_total = 0;
auto shift = [&](int size) {
if (written < 0 || written_total + size > CPU_VERBOSE_DAT_LEN) {
const char* errorMsg = "# NOT ENOUGHT BUFFER SIZE #";
snprintf(portsInfo, strlen(errorMsg) + 1, "%s", errorMsg);
written_total = strlen(errorMsg);
return;
}
written_total += size;
};
auto formatMemDesc = [&](const dnnl_memory_desc_t& desc, std::string& prefix) {
prefix = colorize(BLUE, prefix);
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, " ");
shift(written);
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, "%s", prefix.c_str());
shift(written);
written = dnnl_md2fmt_str(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, &desc);
shift(written);
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, ":");
shift(written);
written = dnnl_md2dim_str(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, &desc);
shift(written);
};
for (int i = 0; i < node->getParentEdges().size(); i++) {
std::string prefix("src:" + std::to_string(i) + ':');
formatMemDesc(MemoryDescUtils::convertToDnnlMemoryDesc(
node->getParentEdgeAt(i)->getMemory().getDesc().clone())->getDnnlDesc().data,
prefix);
}
for (int i = 0; i < node->getChildEdges().size(); i++) {
std::string prefix("dst:" + std::to_string(i) + ':');
formatMemDesc(MemoryDescUtils::convertToDnnlMemoryDesc(
node->getChildEdgeAt(i)->getMemory().getDesc().clone())->getDnnlDesc().data,
prefix);
}
std::string post_ops;
if (!node->getFusedWith().empty()) {
post_ops += "post_ops:'";
for (const auto& fusedNode : node->getFusedWith()) {
post_ops.append(colorize(GREEN, fusedNode->getName())).append(":")
.append(colorize(CYAN, NameFromType(fusedNode->getType()))).append(":")
.append(algToString(fusedNode->getAlgorithm()))
.append(";");
}
post_ops += "'";
}
std::string nodeImplementer = "cpu";
if (node->prim)
nodeImplementer = "dnnl"; // oneDNN
else if (node->getType() == Reference)
nodeImplementer = "ngraph_ref"; // ngraph reference
const std::string& nodeName = colorize(GREEN, node->getName());
const std::string& nodeType = colorize(CYAN, NameFromType(node->getType()));
const std::string& nodeAlg = algToString(node->getAlgorithm());
const std::string& nodePrimImplType = impl_type_to_string(node->getSelectedPrimitiveDescriptor()->getImplementationType());
stream << "ov_cpu_verbose" << ','
<< "exec" << ','
<< nodeImplementer << ','
<< nodeName << ":" << nodeType << ":" << nodeAlg << ','
<< nodePrimImplType << ','
<< portsInfo << ','
<< post_ops << ',';
}
void Verbose::printDuration() {
const auto& duration = node->PerfCounter().duration().count();
stream << duration << "ms";
}
void Verbose::flush() const {
std::cout << stream.rdbuf() << "\n";
}
} // namespace MKLDNNPlugin
#endif // CPU_DEBUG_CAPS

View File

@ -0,0 +1,46 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#ifdef CPU_DEBUG_CAPS
#include "mkldnn_node.h"
#include <string>
#include <cstdlib>
#include <sstream>
namespace MKLDNNPlugin {
class Verbose {
public:
Verbose(const MKLDNNNodePtr& _node, const std::string& _lvl)
: node(_node), lvl(atoi(_lvl.c_str())) {
if (!shouldBePrinted())
return;
printInfo();
}
virtual ~Verbose() {
if (!shouldBePrinted())
return;
printDuration();
flush();
}
private:
const MKLDNNNodePtr& node;
const int lvl;
std::stringstream stream;
bool shouldBePrinted() const;
void printInfo();
void printDuration();
void flush() const;
};
#define VERBOSE(...) Verbose(__VA_ARGS__)
} // namespace MKLDNNPlugin
#else
#define VERBOSE(...)
#endif // CPU_DEBUG_CAPS

View File

@ -12,6 +12,7 @@
#include <algorithm>
#include <cctype>
#include <functional>
#include <iterator>
#include <map>
#include <set>
#include <unordered_map>