[CPU] Add OV_CPU_VERBOSE env var to print node exec info to cout (#6390)
This commit is contained in:
parent
b968c7b813
commit
c92988c8e9
459
inference-engine/src/mkldnn_plugin/cpu_types.cpp
Normal file
459
inference-engine/src/mkldnn_plugin/cpu_types.cpp
Normal file
@ -0,0 +1,459 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include "cpu_types.h"
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
using Dim = std::size_t;
|
||||
using VectorDims = std::vector<Dim>;
|
||||
|
||||
const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
|
||||
{ "Constant", Input },
|
||||
{ "Parameter", Input },
|
||||
{ "Result", Output },
|
||||
{ "Convolution", Convolution },
|
||||
{ "GroupConvolution", Convolution },
|
||||
{ "MatMul", MatMul },
|
||||
{ "FullyConnected", FullyConnected },
|
||||
{ "MaxPool", Pooling },
|
||||
{ "AvgPool", Pooling },
|
||||
{ "AdaptiveMaxPool", AdaptivePooling},
|
||||
{ "AdaptiveAvgPool", AdaptivePooling},
|
||||
{ "Add", Eltwise },
|
||||
{ "Subtract", Eltwise },
|
||||
{ "Multiply", Eltwise },
|
||||
{ "Divide", Eltwise },
|
||||
{ "SquaredDifference", Eltwise },
|
||||
{ "Maximum", Eltwise },
|
||||
{ "Minimum", Eltwise },
|
||||
{ "Mod", Eltwise },
|
||||
{ "FloorMod", Eltwise },
|
||||
{ "Power", Eltwise },
|
||||
{ "PowerStatic", Eltwise },
|
||||
{ "Equal", Eltwise },
|
||||
{ "NotEqual", Eltwise },
|
||||
{ "Greater", Eltwise },
|
||||
{ "GreaterEqual", Eltwise },
|
||||
{ "Less", Eltwise },
|
||||
{ "LessEqual", Eltwise },
|
||||
{ "LogicalAnd", Eltwise },
|
||||
{ "LogicalOr", Eltwise },
|
||||
{ "LogicalXor", Eltwise },
|
||||
{ "LogicalNot", Eltwise },
|
||||
{ "Relu", Eltwise },
|
||||
{ "LeakyRelu", Eltwise },
|
||||
{ "Gelu", Eltwise },
|
||||
{ "Elu", Eltwise },
|
||||
{ "Tanh", Eltwise },
|
||||
{ "Sigmoid", Eltwise },
|
||||
{ "Abs", Eltwise },
|
||||
{ "Sqrt", Eltwise },
|
||||
{ "Clamp", Eltwise },
|
||||
{ "Exp", Eltwise },
|
||||
{ "SwishCPU", Eltwise },
|
||||
{ "HSwish", Eltwise },
|
||||
{ "Mish", Eltwise },
|
||||
{ "HSigmoid", Eltwise },
|
||||
{ "Round", Eltwise },
|
||||
{ "PRelu", Eltwise },
|
||||
{ "Erf", Eltwise },
|
||||
{ "SoftPlus", Eltwise },
|
||||
{ "Reshape", Reshape },
|
||||
{ "Squeeze", Reshape },
|
||||
{ "Unsqueeze", Reshape },
|
||||
{ "Softmax", Softmax },
|
||||
{ "Reorder", Reorder },
|
||||
{ "BatchToSpace", BatchToSpace },
|
||||
{ "SpaceToBatch", SpaceToBatch },
|
||||
{ "DepthToSpace", DepthToSpace },
|
||||
{ "SpaceToDepth", SpaceToDepth },
|
||||
{ "Roll", Roll },
|
||||
{ "LRN", Lrn },
|
||||
{ "Split", Split },
|
||||
{ "VariadicSplit", Split },
|
||||
{ "Concat", Concatenation },
|
||||
{ "ConvolutionBackpropData", Deconvolution },
|
||||
{ "GroupConvolutionBackpropData", Deconvolution },
|
||||
{ "StridedSlice", StridedSlice },
|
||||
{ "Tile", Tile },
|
||||
{ "ROIAlign", ROIAlign },
|
||||
{ "ROIPooling", ROIPooling },
|
||||
{ "PSROIPooling", PSROIPooling },
|
||||
{ "DeformablePSROIPooling", PSROIPooling },
|
||||
{ "Pad", Pad },
|
||||
{ "Transpose", Transpose },
|
||||
{ "LSTMCell", RNNCell },
|
||||
{ "GRUCell", RNNCell },
|
||||
{ "RNNCell", RNNCell },
|
||||
{ "LSTMSequence", RNNSeq },
|
||||
{ "GRUSequence", RNNSeq },
|
||||
{ "RNNSequence", RNNSeq },
|
||||
{ "FakeQuantize", FakeQuantize },
|
||||
{ "BinaryConvolution", BinaryConvolution },
|
||||
{ "DeformableConvolution", DeformableConvolution },
|
||||
{ "TensorIterator", TensorIterator },
|
||||
{ "Loop", TensorIterator },
|
||||
{ "ReadValue", MemoryInput}, // for construction from name ctor, arbitrary name is used
|
||||
{ "Assign", MemoryOutput }, // for construction from layer ctor
|
||||
{ "Convert", Convert },
|
||||
{ "MVN", MVN},
|
||||
{ "NormalizeL2", NormalizeL2},
|
||||
{ "ScatterUpdate", ScatterUpdate},
|
||||
{ "ScatterElementsUpdate", ScatterElementsUpdate},
|
||||
{ "ScatterNDUpdate", ScatterNDUpdate},
|
||||
{ "Interpolate", Interpolate},
|
||||
{ "ReduceL1", Reduce},
|
||||
{ "ReduceL2", Reduce},
|
||||
{ "ReduceLogicalAnd", Reduce},
|
||||
{ "ReduceLogicalOr", Reduce},
|
||||
{ "ReduceMax", Reduce},
|
||||
{ "ReduceMean", Reduce},
|
||||
{ "ReduceMin", Reduce},
|
||||
{ "ReduceProd", Reduce},
|
||||
{ "ReduceSum", Reduce},
|
||||
{ "ReduceLogSum", Reduce},
|
||||
{ "ReduceLogSumExp", Reduce},
|
||||
{ "ReduceSumSquare", Reduce},
|
||||
{ "Broadcast", Broadcast},
|
||||
{ "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
|
||||
{ "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
|
||||
{ "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
|
||||
{ "Gather", Gather},
|
||||
{ "GatherElements", GatherElements},
|
||||
{ "GatherND", GatherND},
|
||||
{ "OneHot", OneHot},
|
||||
{ "RegionYolo", RegionYolo},
|
||||
{ "Select", Select},
|
||||
{ "ShuffleChannels", ShuffleChannels},
|
||||
{ "DFT", DFT},
|
||||
{ "IDFT", DFT},
|
||||
{ "Abs", Math},
|
||||
{ "Acos", Math},
|
||||
{ "Acosh", Math},
|
||||
{ "Asin", Math},
|
||||
{ "Asinh", Math},
|
||||
{ "Atan", Math},
|
||||
{ "Atanh", Math},
|
||||
{ "Ceil", Math},
|
||||
{ "Ceiling", Math},
|
||||
{ "Cos", Math},
|
||||
{ "Cosh", Math},
|
||||
{ "Floor", Math},
|
||||
{ "HardSigmoid", Math},
|
||||
{ "Log", Math},
|
||||
{ "Neg", Math},
|
||||
{ "Reciprocal", Math},
|
||||
{ "Selu", Math},
|
||||
{ "Sign", Math},
|
||||
{ "Sin", Math},
|
||||
{ "Sinh", Math},
|
||||
{ "SoftPlus", Math},
|
||||
{ "Softsign", Math},
|
||||
{ "Tan", Math},
|
||||
{ "CTCLoss", CTCLoss},
|
||||
{ "Bucketize", Bucketize},
|
||||
{ "CTCGreedyDecoder", CTCGreedyDecoder},
|
||||
{ "CTCGreedyDecoderSeqLen", CTCGreedyDecoderSeqLen},
|
||||
{ "CumSum", CumSum},
|
||||
{ "DetectionOutput", DetectionOutput},
|
||||
{ "ExperimentalDetectronDetectionOutput", ExperimentalDetectronDetectionOutput},
|
||||
{ "LogSoftmax", LogSoftmax},
|
||||
{ "TopK", TopK},
|
||||
{ "GatherTree", GatherTree},
|
||||
{ "GRN", GRN},
|
||||
{ "Range", Range},
|
||||
{ "Proposal", Proposal},
|
||||
{ "ReorgYolo", ReorgYolo},
|
||||
{ "ReverseSequence", ReverseSequence},
|
||||
{ "ExperimentalDetectronTopKROIs", ExperimentalDetectronTopKROIs},
|
||||
{ "ExperimentalDetectronROIFeatureExtractor", ExperimentalDetectronROIFeatureExtractor},
|
||||
{ "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator},
|
||||
{ "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage},
|
||||
{ "ExtractImagePatches", ExtractImagePatches},
|
||||
{ "NonMaxSuppression", NonMaxSuppression},
|
||||
{ "NonMaxSuppressionIEInternal", NonMaxSuppression},
|
||||
{ "MatrixNms", MatrixNms},
|
||||
{ "MulticlassNms", MulticlassNms}
|
||||
};
|
||||
|
||||
Type TypeFromName(const std::string& type) {
|
||||
auto itType = type_to_name_tbl.find(type);
|
||||
if (type_to_name_tbl.end() != itType) {
|
||||
return itType->second;
|
||||
} else {
|
||||
return Unknown;
|
||||
}
|
||||
}
|
||||
|
||||
std::string NameFromType(const Type type) {
|
||||
switch (type) {
|
||||
case Generic:
|
||||
return "Generic";
|
||||
case Reorder:
|
||||
return "Reorder";
|
||||
case Input:
|
||||
return "Input";
|
||||
case Output:
|
||||
return "Output";
|
||||
case Convolution:
|
||||
return "Convolution";
|
||||
case Deconvolution:
|
||||
return "Deconvolution";
|
||||
case Lrn:
|
||||
return "Lrn";
|
||||
case Pooling:
|
||||
return "Pooling";
|
||||
case AdaptivePooling:
|
||||
return "AdaptivePooling";
|
||||
case FullyConnected:
|
||||
return "FullyConnected";
|
||||
case MatMul:
|
||||
return "MatMul";
|
||||
case Softmax:
|
||||
return "Softmax";
|
||||
case Split:
|
||||
return "Split";
|
||||
case Concatenation:
|
||||
return "Concatenation";
|
||||
case StridedSlice:
|
||||
return "StridedSlice";
|
||||
case Reshape:
|
||||
return "Reshape";
|
||||
case Tile:
|
||||
return "Tile";
|
||||
case ROIAlign:
|
||||
return "ROIAlign";
|
||||
case ROIPooling:
|
||||
return "ROIPooling";
|
||||
case PSROIPooling:
|
||||
return "PSROIPooling";
|
||||
case DepthToSpace:
|
||||
return "DepthToSpace";
|
||||
case BatchToSpace:
|
||||
return "BatchToSpace";
|
||||
case Pad:
|
||||
return "Pad";
|
||||
case Transpose:
|
||||
return "Transpose";
|
||||
case SpaceToDepth:
|
||||
return "SpaceToDepth";
|
||||
case SpaceToBatch:
|
||||
return "SpaceToBatch";
|
||||
case MemoryOutput:
|
||||
return "MemoryOutput";
|
||||
case MemoryInput:
|
||||
return "MemoryInput";
|
||||
case RNNSeq:
|
||||
return "RNNSeq";
|
||||
case RNNCell:
|
||||
return "RNNCell";
|
||||
case Eltwise:
|
||||
return "Eltwise";
|
||||
case FakeQuantize:
|
||||
return "FakeQuantize";
|
||||
case BinaryConvolution:
|
||||
return "BinaryConvolution";
|
||||
case DeformableConvolution:
|
||||
return "DeformableConvolution";
|
||||
case MVN:
|
||||
return "MVN";
|
||||
case TensorIterator:
|
||||
return "TensorIterator";
|
||||
case Convert:
|
||||
return "Convert";
|
||||
case NormalizeL2:
|
||||
return "NormalizeL2";
|
||||
case ScatterUpdate:
|
||||
return "ScatterUpdate";
|
||||
case ScatterElementsUpdate:
|
||||
return "ScatterElementsUpdate";
|
||||
case ScatterNDUpdate:
|
||||
return "ScatterNDUpdate";
|
||||
case Interpolate:
|
||||
return "Interpolate";
|
||||
case Reduce:
|
||||
return "Reduce";
|
||||
case Broadcast:
|
||||
return "Broadcast";
|
||||
case EmbeddingSegmentsSum:
|
||||
return "EmbeddingSegmentsSum";
|
||||
case EmbeddingBagPackedSum:
|
||||
return "EmbeddingBagPackedSum";
|
||||
case EmbeddingBagOffsetsSum:
|
||||
return "EmbeddingBagOffsetsSum";
|
||||
case Gather:
|
||||
return "Gather";
|
||||
case GatherElements:
|
||||
return "GatherElements";
|
||||
case GatherND:
|
||||
return "GatherND";
|
||||
case OneHot:
|
||||
return "OneHot";
|
||||
case RegionYolo:
|
||||
return "RegionYolo";
|
||||
case Select:
|
||||
return "Select";
|
||||
case Roll:
|
||||
return "Roll";
|
||||
case ShuffleChannels:
|
||||
return "ShuffleChannels";
|
||||
case DFT:
|
||||
return "DFT";
|
||||
case Math:
|
||||
return "Math";
|
||||
case CTCLoss:
|
||||
return "CTCLoss";
|
||||
case Bucketize:
|
||||
return "Bucketize";
|
||||
case CTCGreedyDecoder:
|
||||
return "CTCGreedyDecoder";
|
||||
case CTCGreedyDecoderSeqLen:
|
||||
return "CTCGreedyDecoderSeqLen";
|
||||
case CumSum:
|
||||
return "CumSum";
|
||||
case DetectionOutput:
|
||||
return "DetectionOutput";
|
||||
case ExperimentalDetectronDetectionOutput:
|
||||
return "ExperimentalDetectronDetectionOutput";
|
||||
case LogSoftmax:
|
||||
return "LogSoftmax";
|
||||
case TopK:
|
||||
return "TopK";
|
||||
case GatherTree:
|
||||
return "GatherTree";
|
||||
case GRN:
|
||||
return "GRN";
|
||||
case Range:
|
||||
return "Range";
|
||||
case Proposal:
|
||||
return "Proposal";
|
||||
case ReorgYolo:
|
||||
return "ReorgYolo";
|
||||
case ReverseSequence:
|
||||
return "ReverseSequence";
|
||||
case ExperimentalDetectronTopKROIs:
|
||||
return "ExperimentalDetectronTopKROIs";
|
||||
case ExperimentalDetectronROIFeatureExtractor:
|
||||
return "ExperimentalDetectronROIFeatureExtractor";
|
||||
case ExperimentalDetectronPriorGridGenerator:
|
||||
return "ExperimentalDetectronPriorGridGenerator";
|
||||
case ExperimentalDetectronGenerateProposalsSingleImage:
|
||||
return "ExperimentalDetectronGenerateProposalsSingleImage";
|
||||
case ExtractImagePatches:
|
||||
return "ExtractImagePatches";
|
||||
case NonMaxSuppression:
|
||||
return "NonMaxSuppression";
|
||||
case MatrixNms:
|
||||
return "MatrixNms";
|
||||
case MulticlassNms:
|
||||
return "MulticlassNms";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
std::string algToString(const Algorithm alg) {
|
||||
#define CASE(_alg) do { \
|
||||
if (alg == _alg) return #_alg; \
|
||||
} while (0)
|
||||
CASE(Default);
|
||||
CASE(PoolingMax);
|
||||
CASE(PoolingAvg);
|
||||
CASE(ConvolutionCommon);
|
||||
CASE(ConvolutionGrouped);
|
||||
CASE(DeconvolutionCommon);
|
||||
CASE(DeconvolutionGrouped);
|
||||
CASE(EltwiseAdd);
|
||||
CASE(EltwiseMultiply);
|
||||
CASE(EltwiseSubtract);
|
||||
CASE(EltwiseDivide);
|
||||
CASE(EltwiseFloorMod);
|
||||
CASE(EltwiseMod);
|
||||
CASE(EltwiseMaximum);
|
||||
CASE(EltwiseMinimum);
|
||||
CASE(EltwiseSquaredDifference);
|
||||
CASE(EltwisePowerDynamic);
|
||||
CASE(EltwisePowerStatic);
|
||||
CASE(EltwiseMulAdd);
|
||||
CASE(EltwiseEqual);
|
||||
CASE(EltwiseNotEqual);
|
||||
CASE(EltwiseGreater);
|
||||
CASE(EltwiseGreaterEqual);
|
||||
CASE(EltwiseLess);
|
||||
CASE(EltwiseLessEqual);
|
||||
CASE(EltwiseLogicalAnd);
|
||||
CASE(EltwiseLogicalOr);
|
||||
CASE(EltwiseLogicalXor);
|
||||
CASE(EltwiseLogicalNot);
|
||||
CASE(EltwiseRelu);
|
||||
CASE(EltwiseGelu);
|
||||
CASE(EltwiseElu);
|
||||
CASE(EltwiseTanh);
|
||||
CASE(EltwiseSigmoid);
|
||||
CASE(EltwiseAbs);
|
||||
CASE(EltwiseSqrt);
|
||||
CASE(EltwiseSoftRelu);
|
||||
CASE(EltwiseExp);
|
||||
CASE(EltwiseClamp);
|
||||
CASE(EltwiseSwish);
|
||||
CASE(EltwisePrelu);
|
||||
CASE(EltwiseMish);
|
||||
CASE(EltwiseHswish);
|
||||
CASE(EltwiseHsigmoid);
|
||||
CASE(EltwiseRoundHalfToEven);
|
||||
CASE(EltwiseRoundHalfAwayFromZero);
|
||||
CASE(EltwiseErf);
|
||||
CASE(FQCommon);
|
||||
CASE(FQQuantization);
|
||||
CASE(FQBinarization);
|
||||
CASE(ROIPoolingMax);
|
||||
CASE(ROIPoolingBilinear);
|
||||
CASE(ROIAlignMax);
|
||||
CASE(ROIAlignAvg);
|
||||
CASE(PSROIPoolingAverage);
|
||||
CASE(PSROIPoolingBilinear);
|
||||
CASE(PSROIPoolingBilinearDeformable);
|
||||
CASE(ReduceL1);
|
||||
CASE(ReduceL2);
|
||||
CASE(ReduceAnd);
|
||||
CASE(ReduceOr);
|
||||
CASE(ReduceMax);
|
||||
CASE(ReduceMean);
|
||||
CASE(ReduceMin);
|
||||
CASE(ReduceProd);
|
||||
CASE(ReduceSum);
|
||||
CASE(ReduceLogSum);
|
||||
CASE(ReduceLogSumExp);
|
||||
CASE(ReduceSumSquare);
|
||||
CASE(MathAbs);
|
||||
CASE(MathAcos);
|
||||
CASE(MathAcosh);
|
||||
CASE(MathAsin);
|
||||
CASE(MathAsinh);
|
||||
CASE(MathAtan);
|
||||
CASE(MathAtanh);
|
||||
CASE(MathCeiling);
|
||||
CASE(MathCos);
|
||||
CASE(MathCosh);
|
||||
CASE(MathErf);
|
||||
CASE(MathFloor);
|
||||
CASE(MathHardSigmoid);
|
||||
CASE(MathLog);
|
||||
CASE(MathNegative);
|
||||
CASE(MathReciprocal);
|
||||
CASE(MathSelu);
|
||||
CASE(MathSign);
|
||||
CASE(MathSin);
|
||||
CASE(MathSinh);
|
||||
CASE(MathSoftPlus);
|
||||
CASE(MathSoftsign);
|
||||
CASE(MathTan);
|
||||
#undef CASE
|
||||
return "Undefined";
|
||||
}
|
||||
|
||||
} // namespace MKLDNNPlugin
|
@ -4,7 +4,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "caseless.hpp"
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
@ -97,7 +100,7 @@ enum Type {
|
||||
};
|
||||
|
||||
enum Algorithm {
|
||||
Undefined,
|
||||
Default,
|
||||
|
||||
// Pooling algorithms
|
||||
PoolingMax,
|
||||
@ -215,4 +218,11 @@ enum Algorithm {
|
||||
MathTan
|
||||
};
|
||||
|
||||
extern const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl;
|
||||
|
||||
Type TypeFromName(const std::string& type);
|
||||
|
||||
std::string NameFromType(const Type type);
|
||||
|
||||
std::string algToString(const Algorithm alg);
|
||||
} // namespace MKLDNNPlugin
|
||||
|
8
inference-engine/src/mkldnn_plugin/docs/README.md
Normal file
8
inference-engine/src/mkldnn_plugin/docs/README.md
Normal file
@ -0,0 +1,8 @@
|
||||
# Debug capabilities
|
||||
Use the following cmake option to enable debug capabilities:
|
||||
|
||||
`-DENABLE_DEBUG_CAPS=ON`
|
||||
|
||||
* [Verbose mode](verbose.md)
|
||||
* [Blob dumping](blob_dumping.md)
|
||||
* [Graph serialization](graph_serialization.md)
|
@ -1,9 +1,4 @@
|
||||
# Debug capabilities
|
||||
Use the following cmake option to enable debug capabilities:
|
||||
|
||||
`-DENABLE_CPU_DEBUG_CAPS=ON`
|
||||
|
||||
## Blob dumping
|
||||
# Blob dumping
|
||||
Blob dumping is controlled by environment variables (filters).
|
||||
|
||||
The variables define conditions of the node which input and output blobs
|
||||
@ -24,12 +19,12 @@ or for shell session (bash example):
|
||||
export OV_CPU_BLOB_DUMP_NODE_PORTS=OUT
|
||||
binary ...
|
||||
```
|
||||
### Specify dump directory
|
||||
## Specify dump directory
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_DIR=<directory-name> binary ...
|
||||
```
|
||||
Default is *mkldnn_dump*
|
||||
### Specify dump format
|
||||
## Specify dump format
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_FORMAT=<format> binary ...
|
||||
```
|
||||
@ -37,7 +32,7 @@ Options are:
|
||||
* BIN (default)
|
||||
* TEXT
|
||||
|
||||
### Filter input / output blobs
|
||||
## Filter input / output blobs
|
||||
To dump only input / output blobs:
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_PORTS='<ports_kind>' binary ...
|
||||
@ -51,7 +46,7 @@ Options are:
|
||||
* OUT
|
||||
* ALL
|
||||
|
||||
### Filter by execution ID
|
||||
## Filter by execution ID
|
||||
To dump blobs only for nodes with specified execution IDs:
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_EXEC_ID='<space_separated_list_of_ids>' binary ...
|
||||
@ -61,7 +56,7 @@ Example:
|
||||
OV_CPU_BLOB_DUMP_NODE_EXEC_ID='1 12 45' binary ...
|
||||
```
|
||||
|
||||
### Filter by type
|
||||
## Filter by type
|
||||
To dump blobs only for nodes with specified types:
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_TYPE=<space_separated_list_of_types> binary ...
|
||||
@ -73,7 +68,7 @@ Example:
|
||||
|
||||
> **NOTE**: see **enum Type** in [mkldnn_node.h](../mkldnn_node.h) for list of the types
|
||||
|
||||
### Filter by name
|
||||
## Filter by name
|
||||
To dump blobs only for nodes with name matching specified regex:
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_NAME=<regex> binary ...
|
||||
@ -83,7 +78,7 @@ Example:
|
||||
OV_CPU_BLOB_DUMP_NODE_NAME=".+Fused_Add.+" binary ...
|
||||
```
|
||||
|
||||
### Dump all the blobs
|
||||
## Dump all the blobs
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_NAME="*" binary ...
|
||||
```
|
||||
@ -95,22 +90,3 @@ Example:
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_PORTS=ALL binary ...
|
||||
```
|
||||
|
||||
## Graph serialization
|
||||
The functionality allows to serialize execution graph using environment variable:
|
||||
```sh
|
||||
OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
|
||||
```
|
||||
|
||||
Possible serialization options:
|
||||
* cout
|
||||
|
||||
Serialize to console output
|
||||
* \<path\>.xml
|
||||
|
||||
Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
|
||||
* \<path\>.dot
|
||||
|
||||
TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
|
||||
|
||||
|
@ -0,0 +1,17 @@
|
||||
# Graph serialization
|
||||
|
||||
The functionality allows to serialize execution graph using environment variable:
|
||||
```sh
|
||||
OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
|
||||
```
|
||||
|
||||
Possible serialization options:
|
||||
* cout
|
||||
|
||||
Serialize to console output
|
||||
* \<path\>.xml
|
||||
|
||||
Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
|
||||
* \<path\>.dot
|
||||
|
||||
TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
|
38
inference-engine/src/mkldnn_plugin/docs/verbose.md
Normal file
38
inference-engine/src/mkldnn_plugin/docs/verbose.md
Normal file
@ -0,0 +1,38 @@
|
||||
# Verbose mode
|
||||
|
||||
It is possible to enable tracing execution of plugin nodes to cout and collect statistics, such as:
|
||||
- node implementer:
|
||||
* cpu (CPU plugin)
|
||||
* dnnl (oneDNN library)
|
||||
* ngraph_ref (ngraph reference fallback)
|
||||
- node name
|
||||
- node type
|
||||
- node algorithm
|
||||
- node primitive info
|
||||
- input / output ports info
|
||||
- fused nodes
|
||||
- execution time
|
||||
- etc
|
||||
|
||||
Format:
|
||||
```sh
|
||||
ov_cpu_verbose,exec,<node_implemeter>,\
|
||||
<node_name>:<node_type>:<node_alg>,<impl_type>,\
|
||||
src:<port_id>:<precision>::<type>:<format>:f0:<shape> ...,\
|
||||
dst:<port_id>:<precision>::<type>:<format>:f0:<shape> ...,\
|
||||
post_ops:'<node_name>:<node_type>:<node_alg>;...;',\
|
||||
<execution_time>
|
||||
```
|
||||
|
||||
To turn on verbose mode the following environment variable should be used:
|
||||
```sh
|
||||
OV_CPU_VERBOSE=<level> binary ...
|
||||
```
|
||||
|
||||
Currently verbose mode has only one level, any digit can be used for activation.
|
||||
|
||||
To have colored verbose output just duplicate level's digit, for example:
|
||||
```sh
|
||||
OV_CPU_VERBOSE=11 binary ...
|
||||
```
|
||||
**NOTE:** Shell color codes are used
|
@ -4,9 +4,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "mkldnn/ie_mkldnn.h"
|
||||
#include "cpu_types.h"
|
||||
|
||||
#include <ie_layouts.h>
|
||||
#include <ie_blob.h>
|
||||
#include "mkldnn/ie_mkldnn.h"
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
|
@ -50,3 +50,38 @@ impl_desc_type MKLDNNPlugin::parse_impl_name(std::string impl_desc_name) {
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
const char* MKLDNNPlugin::impl_type_to_string(impl_desc_type type) {
|
||||
#define CASE(_type) do { \
|
||||
if (type == _type) return #_type; \
|
||||
} while (0)
|
||||
CASE(unknown);
|
||||
CASE(undef);
|
||||
CASE(ref_any);
|
||||
CASE(reorder);
|
||||
CASE(gemm_any);
|
||||
CASE(gemm_blas);
|
||||
CASE(gemm_avx512);
|
||||
CASE(gemm_avx2);
|
||||
CASE(gemm_avx);
|
||||
CASE(gemm_sse42);
|
||||
CASE(jit_gemm);
|
||||
CASE(jit_avx512_winograd);
|
||||
CASE(jit_avx512);
|
||||
CASE(jit_avx2);
|
||||
CASE(jit_avx);
|
||||
CASE(jit_sse42);
|
||||
CASE(jit_uni);
|
||||
CASE(jit_avx512_1x1);
|
||||
CASE(jit_avx2_1x1);
|
||||
CASE(jit_avx_1x1);
|
||||
CASE(jit_sse42_1x1);
|
||||
CASE(jit_uni_1x1);
|
||||
CASE(jit_avx512_dw);
|
||||
CASE(jit_avx2_dw);
|
||||
CASE(jit_avx_dw);
|
||||
CASE(jit_sse42_dw);
|
||||
CASE(jit_uni_dw);
|
||||
#undef CASE
|
||||
return "unknown";
|
||||
}
|
||||
|
@ -63,6 +63,7 @@ enum impl_desc_type {
|
||||
jit_uni_dw = jit | uni | _dw,
|
||||
};
|
||||
|
||||
const char * impl_type_to_string(impl_desc_type type);
|
||||
impl_desc_type parse_impl_name(std::string impl_desc_name);
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include "utils/node_dumper.h"
|
||||
#include "utils/ngraph_utils.hpp"
|
||||
#include "utils/cpu_utils.hpp"
|
||||
#include "utils/verbose.h"
|
||||
#include "memory_desc/cpu_memory_desc_utils.h"
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
@ -828,7 +829,9 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
|
||||
mkldnn::stream stream(eng);
|
||||
|
||||
for (const auto& node : executableGraphNodes) {
|
||||
PERF(config.collectPerfCounters, node);
|
||||
VERBOSE(node, config.debugCaps.verbose);
|
||||
PERF(node, config.collectPerfCounters);
|
||||
|
||||
if (request)
|
||||
request->ThrowIfCanceled();
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
//
|
||||
|
||||
#include "mkldnn_node.h"
|
||||
#include "dnnl_debug.h"
|
||||
#include "mkldnn_extension_mngr.h"
|
||||
#include "mkldnn_itt.h"
|
||||
|
||||
@ -43,14 +44,14 @@
|
||||
#include <nodes/mkldnn_shuffle_channels_node.h>
|
||||
#include <nodes/mkldnn_reference_node.h>
|
||||
#include <nodes/mkldnn_fake_quantize_node.h>
|
||||
#include <mkldnn_types.h>
|
||||
#include <dnnl_types.h>
|
||||
#include "mkldnn_extension_utils.h"
|
||||
#include "mkldnn/iml_type_mapper.h"
|
||||
|
||||
#include "nodes/common/cpu_memcpy.h"
|
||||
#include "mkldnn_debug.h"
|
||||
#include "utils/rt_info/memory_formats_attribute.hpp"
|
||||
|
||||
#include <dnnl_types.h>
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include "utils/general_utils.h"
|
||||
#include "utils/cpu_utils.hpp"
|
||||
@ -63,372 +64,6 @@ using namespace MKLDNNPlugin;
|
||||
using namespace openvino;
|
||||
|
||||
using namespace InferenceEngine::details;
|
||||
namespace MKLDNNPlugin {
|
||||
static const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
|
||||
{ "Constant", Input },
|
||||
{ "Parameter", Input },
|
||||
{ "Result", Output },
|
||||
{ "Convolution", Convolution },
|
||||
{ "GroupConvolution", Convolution },
|
||||
{ "MatMul", MatMul },
|
||||
{ "FullyConnected", FullyConnected },
|
||||
{ "MaxPool", Pooling },
|
||||
{ "AvgPool", Pooling },
|
||||
{ "AdaptiveMaxPool", AdaptivePooling},
|
||||
{ "AdaptiveAvgPool", AdaptivePooling},
|
||||
{ "Add", Eltwise },
|
||||
{ "Subtract", Eltwise },
|
||||
{ "Multiply", Eltwise },
|
||||
{ "Divide", Eltwise },
|
||||
{ "SquaredDifference", Eltwise },
|
||||
{ "Maximum", Eltwise },
|
||||
{ "Minimum", Eltwise },
|
||||
{ "Mod", Eltwise },
|
||||
{ "FloorMod", Eltwise },
|
||||
{ "Power", Eltwise },
|
||||
{ "PowerStatic", Eltwise },
|
||||
{ "Equal", Eltwise },
|
||||
{ "NotEqual", Eltwise },
|
||||
{ "Greater", Eltwise },
|
||||
{ "GreaterEqual", Eltwise },
|
||||
{ "Less", Eltwise },
|
||||
{ "LessEqual", Eltwise },
|
||||
{ "LogicalAnd", Eltwise },
|
||||
{ "LogicalOr", Eltwise },
|
||||
{ "LogicalXor", Eltwise },
|
||||
{ "LogicalNot", Eltwise },
|
||||
{ "Relu", Eltwise },
|
||||
{ "LeakyRelu", Eltwise },
|
||||
{ "Gelu", Eltwise },
|
||||
{ "Elu", Eltwise },
|
||||
{ "Tanh", Eltwise },
|
||||
{ "Sigmoid", Eltwise },
|
||||
{ "Abs", Eltwise },
|
||||
{ "Sqrt", Eltwise },
|
||||
{ "Clamp", Eltwise },
|
||||
{ "Exp", Eltwise },
|
||||
{ "SwishCPU", Eltwise },
|
||||
{ "HSwish", Eltwise },
|
||||
{ "Mish", Eltwise },
|
||||
{ "HSigmoid", Eltwise },
|
||||
{ "Round", Eltwise },
|
||||
{ "PRelu", Eltwise },
|
||||
{ "Erf", Eltwise },
|
||||
{ "SoftPlus", Eltwise },
|
||||
{ "Reshape", Reshape },
|
||||
{ "Squeeze", Reshape },
|
||||
{ "Unsqueeze", Reshape },
|
||||
{ "Softmax", Softmax },
|
||||
{ "Reorder", Reorder },
|
||||
{ "BatchToSpace", BatchToSpace },
|
||||
{ "SpaceToBatch", SpaceToBatch },
|
||||
{ "DepthToSpace", DepthToSpace },
|
||||
{ "SpaceToDepth", SpaceToDepth },
|
||||
{ "Roll", Roll },
|
||||
{ "LRN", Lrn },
|
||||
{ "Split", Split },
|
||||
{ "VariadicSplit", Split },
|
||||
{ "Concat", Concatenation },
|
||||
{ "ConvolutionBackpropData", Deconvolution },
|
||||
{ "GroupConvolutionBackpropData", Deconvolution },
|
||||
{ "StridedSlice", StridedSlice },
|
||||
{ "Tile", Tile },
|
||||
{ "ROIAlign", ROIAlign },
|
||||
{ "ROIPooling", ROIPooling },
|
||||
{ "PSROIPooling", PSROIPooling },
|
||||
{ "DeformablePSROIPooling", PSROIPooling },
|
||||
{ "Pad", Pad },
|
||||
{ "Transpose", Transpose },
|
||||
{ "LSTMCell", RNNCell },
|
||||
{ "GRUCell", RNNCell },
|
||||
{ "RNNCell", RNNCell },
|
||||
{ "LSTMSequence", RNNSeq },
|
||||
{ "GRUSequence", RNNSeq },
|
||||
{ "RNNSequence", RNNSeq },
|
||||
{ "FakeQuantize", FakeQuantize },
|
||||
{ "BinaryConvolution", BinaryConvolution },
|
||||
{ "DeformableConvolution", DeformableConvolution },
|
||||
{ "TensorIterator", TensorIterator },
|
||||
{ "Loop", TensorIterator },
|
||||
{ "ReadValue", MemoryInput}, // for construction from name ctor, arbitrary name is used
|
||||
{ "Assign", MemoryOutput }, // for construction from layer ctor
|
||||
{ "Convert", Convert },
|
||||
{ "MVN", MVN},
|
||||
{ "NormalizeL2", NormalizeL2},
|
||||
{ "ScatterUpdate", ScatterUpdate},
|
||||
{ "ScatterElementsUpdate", ScatterElementsUpdate},
|
||||
{ "ScatterNDUpdate", ScatterNDUpdate},
|
||||
{ "Interpolate", Interpolate},
|
||||
{ "ReduceL1", Reduce},
|
||||
{ "ReduceL2", Reduce},
|
||||
{ "ReduceLogicalAnd", Reduce},
|
||||
{ "ReduceLogicalOr", Reduce},
|
||||
{ "ReduceMax", Reduce},
|
||||
{ "ReduceMean", Reduce},
|
||||
{ "ReduceMin", Reduce},
|
||||
{ "ReduceProd", Reduce},
|
||||
{ "ReduceSum", Reduce},
|
||||
{ "ReduceLogSum", Reduce},
|
||||
{ "ReduceLogSumExp", Reduce},
|
||||
{ "ReduceSumSquare", Reduce},
|
||||
{ "Broadcast", Broadcast},
|
||||
{ "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
|
||||
{ "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
|
||||
{ "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
|
||||
{ "Gather", Gather},
|
||||
{ "GatherElements", GatherElements},
|
||||
{ "GatherND", GatherND},
|
||||
{ "OneHot", OneHot},
|
||||
{ "RegionYolo", RegionYolo},
|
||||
{ "Select", Select},
|
||||
{ "ShuffleChannels", ShuffleChannels},
|
||||
{ "DFT", DFT},
|
||||
{ "IDFT", DFT},
|
||||
{ "Abs", Math},
|
||||
{ "Acos", Math},
|
||||
{ "Acosh", Math},
|
||||
{ "Asin", Math},
|
||||
{ "Asinh", Math},
|
||||
{ "Atan", Math},
|
||||
{ "Atanh", Math},
|
||||
{ "Ceil", Math},
|
||||
{ "Ceiling", Math},
|
||||
{ "Cos", Math},
|
||||
{ "Cosh", Math},
|
||||
{ "Floor", Math},
|
||||
{ "HardSigmoid", Math},
|
||||
{ "Log", Math},
|
||||
{ "Neg", Math},
|
||||
{ "Reciprocal", Math},
|
||||
{ "Selu", Math},
|
||||
{ "Sign", Math},
|
||||
{ "Sin", Math},
|
||||
{ "Sinh", Math},
|
||||
{ "SoftPlus", Math},
|
||||
{ "Softsign", Math},
|
||||
{ "Tan", Math},
|
||||
{ "CTCLoss", CTCLoss},
|
||||
{ "Bucketize", Bucketize},
|
||||
{ "CTCGreedyDecoder", CTCGreedyDecoder},
|
||||
{ "CTCGreedyDecoderSeqLen", CTCGreedyDecoderSeqLen},
|
||||
{ "CumSum", CumSum},
|
||||
{ "DetectionOutput", DetectionOutput},
|
||||
{ "ExperimentalDetectronDetectionOutput", ExperimentalDetectronDetectionOutput},
|
||||
{ "LogSoftmax", LogSoftmax},
|
||||
{ "TopK", TopK},
|
||||
{ "GatherTree", GatherTree},
|
||||
{ "GRN", GRN},
|
||||
{ "Range", Range},
|
||||
{ "Proposal", Proposal},
|
||||
{ "ReorgYolo", ReorgYolo},
|
||||
{ "ReverseSequence", ReverseSequence},
|
||||
{ "ExperimentalDetectronTopKROIs", ExperimentalDetectronTopKROIs},
|
||||
{ "ExperimentalDetectronROIFeatureExtractor", ExperimentalDetectronROIFeatureExtractor},
|
||||
{ "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator},
|
||||
{ "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage},
|
||||
{ "ExtractImagePatches", ExtractImagePatches},
|
||||
{ "NonMaxSuppression", NonMaxSuppression},
|
||||
{ "NonMaxSuppressionIEInternal", NonMaxSuppression},
|
||||
{ "MatrixNms", MatrixNms},
|
||||
{ "MulticlassNms", MulticlassNms}
|
||||
};
|
||||
|
||||
Type TypeFromName(const std::string & type) {
|
||||
auto itType = type_to_name_tbl.find(type);
|
||||
if (type_to_name_tbl.end() != itType) {
|
||||
return itType->second;
|
||||
}
|
||||
return Unknown;
|
||||
}
|
||||
|
||||
template<>
|
||||
DnnlMemoryDescPtr MKLDNNNode::getInputMemDescAtPort<DnnlMemoryDesc, 0, 0>(size_t portNum) const {
|
||||
return MemoryDescUtils::convertToDnnlMemoryDesc(getBaseMemDescAtInputPort(portNum));
|
||||
}
|
||||
|
||||
template<>
|
||||
BlockedMemoryDescPtr MKLDNNNode::getInputMemDescAtPort<BlockedMemoryDesc, 0, 0>(size_t portNum) const {
|
||||
return MemoryDescUtils::convertToBlockedMemoryDesc(getBaseMemDescAtInputPort(portNum));
|
||||
}
|
||||
|
||||
template<>
|
||||
DnnlMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort<DnnlMemoryDesc, 0, 0>(size_t portNum) const {
|
||||
return MemoryDescUtils::convertToDnnlMemoryDesc(getBaseMemDescAtOutputPort(portNum));
|
||||
}
|
||||
|
||||
template<>
|
||||
BlockedMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort<BlockedMemoryDesc, 0, 0>(size_t portNum) const {
|
||||
return MemoryDescUtils::convertToBlockedMemoryDesc(getBaseMemDescAtOutputPort(portNum));
|
||||
}
|
||||
|
||||
std::string NameFromType(Type type) {
|
||||
switch (type) {
|
||||
case Generic:
|
||||
return "Generic";
|
||||
case Reorder:
|
||||
return "Reorder";
|
||||
case Input:
|
||||
return "Input";
|
||||
case Output:
|
||||
return "Output";
|
||||
case Convolution:
|
||||
return "Convolution";
|
||||
case Deconvolution:
|
||||
return "Deconvolution";
|
||||
case Lrn:
|
||||
return "Lrn";
|
||||
case Pooling:
|
||||
return "Pooling";
|
||||
case AdaptivePooling:
|
||||
return "AdaptivePooling";
|
||||
case FullyConnected:
|
||||
return "FullyConnected";
|
||||
case MatMul:
|
||||
return "MatMul";
|
||||
case Softmax:
|
||||
return "Softmax";
|
||||
case Split:
|
||||
return "Split";
|
||||
case Concatenation:
|
||||
return "Concatenation";
|
||||
case StridedSlice:
|
||||
return "StridedSlice";
|
||||
case Reshape:
|
||||
return "Reshape";
|
||||
case Tile:
|
||||
return "Tile";
|
||||
case ROIAlign:
|
||||
return "ROIAlign";
|
||||
case ROIPooling:
|
||||
return "ROIPooling";
|
||||
case PSROIPooling:
|
||||
return "PSROIPooling";
|
||||
case DepthToSpace:
|
||||
return "DepthToSpace";
|
||||
case BatchToSpace:
|
||||
return "BatchToSpace";
|
||||
case Pad:
|
||||
return "Pad";
|
||||
case Transpose:
|
||||
return "Transpose";
|
||||
case SpaceToDepth:
|
||||
return "SpaceToDepth";
|
||||
case SpaceToBatch:
|
||||
return "SpaceToBatch";
|
||||
case MemoryOutput:
|
||||
return "MemoryOutput";
|
||||
case MemoryInput:
|
||||
return "MemoryInput";
|
||||
case RNNSeq:
|
||||
return "RNNSeq";
|
||||
case RNNCell:
|
||||
return "RNNCell";
|
||||
case Eltwise:
|
||||
return "Eltwise";
|
||||
case FakeQuantize:
|
||||
return "FakeQuantize";
|
||||
case BinaryConvolution:
|
||||
return "BinaryConvolution";
|
||||
case DeformableConvolution:
|
||||
return "DeformableConvolution";
|
||||
case MVN:
|
||||
return "MVN";
|
||||
case TensorIterator:
|
||||
return "TensorIterator";
|
||||
case Convert:
|
||||
return "Convert";
|
||||
case NormalizeL2:
|
||||
return "NormalizeL2";
|
||||
case ScatterUpdate:
|
||||
return "ScatterUpdate";
|
||||
case ScatterElementsUpdate:
|
||||
return "ScatterElementsUpdate";
|
||||
case ScatterNDUpdate:
|
||||
return "ScatterNDUpdate";
|
||||
case Interpolate:
|
||||
return "Interpolate";
|
||||
case Reduce:
|
||||
return "Reduce";
|
||||
case Broadcast:
|
||||
return "Broadcast";
|
||||
case EmbeddingSegmentsSum:
|
||||
return "EmbeddingSegmentsSum";
|
||||
case EmbeddingBagPackedSum:
|
||||
return "EmbeddingBagPackedSum";
|
||||
case EmbeddingBagOffsetsSum:
|
||||
return "EmbeddingBagOffsetsSum";
|
||||
case Gather:
|
||||
return "Gather";
|
||||
case GatherElements:
|
||||
return "GatherElements";
|
||||
case GatherND:
|
||||
return "GatherND";
|
||||
case OneHot:
|
||||
return "OneHot";
|
||||
case RegionYolo:
|
||||
return "RegionYolo";
|
||||
case Select:
|
||||
return "Select";
|
||||
case Roll:
|
||||
return "Roll";
|
||||
case ShuffleChannels:
|
||||
return "ShuffleChannels";
|
||||
case DFT:
|
||||
return "DFT";
|
||||
case Math:
|
||||
return "Math";
|
||||
case CTCLoss:
|
||||
return "CTCLoss";
|
||||
case Bucketize:
|
||||
return "Bucketize";
|
||||
case CTCGreedyDecoder:
|
||||
return "CTCGreedyDecoder";
|
||||
case CTCGreedyDecoderSeqLen:
|
||||
return "CTCGreedyDecoderSeqLen";
|
||||
case CumSum:
|
||||
return "CumSum";
|
||||
case DetectionOutput:
|
||||
return "DetectionOutput";
|
||||
case ExperimentalDetectronDetectionOutput:
|
||||
return "ExperimentalDetectronDetectionOutput";
|
||||
case LogSoftmax:
|
||||
return "LogSoftmax";
|
||||
case TopK:
|
||||
return "TopK";
|
||||
case GatherTree:
|
||||
return "GatherTree";
|
||||
case GRN:
|
||||
return "GRN";
|
||||
case Range:
|
||||
return "Range";
|
||||
case Proposal:
|
||||
return "Proposal";
|
||||
case ReorgYolo:
|
||||
return "ReorgYolo";
|
||||
case ReverseSequence:
|
||||
return "ReverseSequence";
|
||||
case ExperimentalDetectronTopKROIs:
|
||||
return "ExperimentalDetectronTopKROIs";
|
||||
case ExperimentalDetectronROIFeatureExtractor:
|
||||
return "ExperimentalDetectronROIFeatureExtractor";
|
||||
case ExperimentalDetectronPriorGridGenerator:
|
||||
return "ExperimentalDetectronPriorGridGenerator";
|
||||
case ExperimentalDetectronGenerateProposalsSingleImage:
|
||||
return "ExperimentalDetectronGenerateProposalsSingleImage";
|
||||
case ExtractImagePatches:
|
||||
return "ExtractImagePatches";
|
||||
case NonMaxSuppression:
|
||||
return "NonMaxSuppression";
|
||||
case MatrixNms:
|
||||
return "MatrixNms";
|
||||
case MulticlassNms:
|
||||
return "MulticlassNms";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
||||
MKLDNNNode::NodesFactory & MKLDNNNode::factory() {
|
||||
static NodesFactory factoryInstance;
|
||||
@ -439,7 +74,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
|
||||
: selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
|
||||
weightCache(w_cache), engine(eng), name(op->get_friendly_name()), typeStr(op->get_type_name()),
|
||||
type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) {
|
||||
algorithm = Algorithm::Undefined;
|
||||
algorithm = Algorithm::Default;
|
||||
fusingPort = -1;
|
||||
const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name();
|
||||
|
||||
|
@ -36,9 +36,6 @@ using MKLDNNNodePtr = std::shared_ptr<MKLDNNNode>;
|
||||
using MKLDNNNodeConstPtr = std::shared_ptr<const MKLDNNNode>;
|
||||
using MKLDNNNodeWeakPtr = std::weak_ptr<MKLDNNNode>;
|
||||
|
||||
Type TypeFromName(const std::string & type);
|
||||
std::string NameFromType(Type type);
|
||||
|
||||
class PortConfigurator {
|
||||
public:
|
||||
PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc, const Shape& shape,
|
||||
@ -629,7 +626,7 @@ protected:
|
||||
|
||||
MKLDNNWeightsSharing::Ptr weightCache;
|
||||
|
||||
Algorithm algorithm = Algorithm::Undefined;
|
||||
Algorithm algorithm = Algorithm::Default;
|
||||
|
||||
bool isInQuantizedGraph = false;
|
||||
|
||||
@ -744,6 +741,10 @@ private:
|
||||
void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd);
|
||||
enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
|
||||
ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);
|
||||
|
||||
#ifdef CPU_DEBUG_CAPS
|
||||
friend class Verbose;
|
||||
#endif
|
||||
};
|
||||
|
||||
class MKLDNNNode::NodesFactory : public openvino::cc::Factory<Type,
|
||||
|
@ -5,20 +5,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <ratio>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class PerfCount {
|
||||
uint64_t duration;
|
||||
uint64_t total_duration;
|
||||
uint32_t num;
|
||||
|
||||
std::chrono::high_resolution_clock::time_point __start = {};
|
||||
std::chrono::high_resolution_clock::time_point __finish = {};
|
||||
|
||||
public:
|
||||
PerfCount(): duration(0), num(0) {}
|
||||
PerfCount(): total_duration(0), num(0) {}
|
||||
|
||||
uint64_t avg() { return (num == 0) ? 0 : duration / num; }
|
||||
std::chrono::duration<double, std::milli> duration() const {
|
||||
return __finish - __start;
|
||||
}
|
||||
|
||||
uint64_t avg() const { return (num == 0) ? 0 : total_duration / num; }
|
||||
|
||||
private:
|
||||
void start_itr() {
|
||||
@ -27,8 +32,7 @@ private:
|
||||
|
||||
void finish_itr() {
|
||||
__finish = std::chrono::high_resolution_clock::now();
|
||||
|
||||
duration += std::chrono::duration_cast<std::chrono::microseconds>(__finish - __start).count();
|
||||
total_duration += std::chrono::duration_cast<std::chrono::microseconds>(__finish - __start).count();
|
||||
num++;
|
||||
}
|
||||
|
||||
@ -46,5 +50,5 @@ public:
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
||||
#define GET_PERF(_counter) std::unique_ptr<PerfHelper>(new PerfHelper(_counter->PerfCounter()))
|
||||
#define PERF(_need, _counter) auto pc = _need ? GET_PERF(_counter) : nullptr;
|
||||
#define GET_PERF(_node) std::unique_ptr<PerfHelper>(new PerfHelper(_node->PerfCounter()))
|
||||
#define PERF(_node, _need) auto pc = _need ? GET_PERF(_node) : nullptr;
|
||||
|
@ -24,6 +24,7 @@ public:
|
||||
readParam(blobDumpNodeType, "OV_CPU_BLOB_DUMP_NODE_TYPE");
|
||||
readParam(blobDumpNodeName, "OV_CPU_BLOB_DUMP_NODE_NAME");
|
||||
readParam(execGraphPath, "OV_CPU_EXEC_GRAPH_PATH");
|
||||
readParam(verbose, "OV_CPU_VERBOSE");
|
||||
}
|
||||
|
||||
std::string blobDumpDir;
|
||||
@ -33,9 +34,10 @@ public:
|
||||
std::string blobDumpNodeType;
|
||||
std::string blobDumpNodeName;
|
||||
std::string execGraphPath;
|
||||
std::string verbose;
|
||||
|
||||
private:
|
||||
void readParam(std::string& param, const char* envVar) {
|
||||
static void readParam(std::string& param, const char* envVar) {
|
||||
if (const char* envValue = std::getenv(envVar))
|
||||
param = envValue;
|
||||
}
|
||||
|
169
inference-engine/src/mkldnn_plugin/utils/verbose.cpp
Normal file
169
inference-engine/src/mkldnn_plugin/utils/verbose.cpp
Normal file
@ -0,0 +1,169 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#ifdef CPU_DEBUG_CAPS
|
||||
|
||||
#include "verbose.h"
|
||||
#include "mkldnn_node.h"
|
||||
#include "cpu_types.h"
|
||||
#include "memory_desc/cpu_memory_desc_utils.h"
|
||||
|
||||
#include "dnnl_types.h"
|
||||
#include "dnnl_debug.h"
|
||||
|
||||
#include <string>
|
||||
#include <cstdlib>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
bool Verbose::shouldBePrinted() const {
|
||||
if (lvl < 1)
|
||||
return false;
|
||||
|
||||
if (node->isConstant() ||
|
||||
node->getType() == Input || node->getType() == Output)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* Print node verbose execution information to cout.
|
||||
* Similiar to DNNL_VERBOSE output
|
||||
* Formating written in C using oneDNN format functions.
|
||||
* Can be rewritten in pure C++ if necessary
|
||||
*/
|
||||
void Verbose::printInfo() {
|
||||
/* 1, 2, 3, etc -> no color
|
||||
* 11, 22, 33, etc -> colorize */
|
||||
bool colorUp = lvl / 10 > 0 ? true : false;
|
||||
|
||||
enum Color {
|
||||
RED,
|
||||
GREEN,
|
||||
YELLOW,
|
||||
BLUE,
|
||||
PURPLE,
|
||||
CYAN
|
||||
};
|
||||
|
||||
auto colorize = [&](const Color color, const std::string& str) {
|
||||
if (!colorUp)
|
||||
return str;
|
||||
|
||||
const std::string red("\033[1;31m");
|
||||
const std::string green("\033[1;32m");
|
||||
const std::string yellow("\033[1;33m");
|
||||
const std::string blue("\033[1;34m");
|
||||
const std::string purple("\033[1;35m");
|
||||
const std::string cyan("\033[1;36m");
|
||||
const std::string reset("\033[0m");
|
||||
std::string colorCode;
|
||||
|
||||
switch (color) {
|
||||
case RED: colorCode = red;
|
||||
break;
|
||||
case GREEN: colorCode = green;
|
||||
break;
|
||||
case YELLOW: colorCode = yellow;
|
||||
break;
|
||||
case BLUE: colorCode = blue;
|
||||
break;
|
||||
case PURPLE: colorCode = purple;
|
||||
break;
|
||||
case CYAN: colorCode = cyan;
|
||||
break;
|
||||
default: colorCode = reset;
|
||||
break;
|
||||
}
|
||||
|
||||
return colorCode + str + reset;
|
||||
};
|
||||
|
||||
// can be increased if necessary
|
||||
const int CPU_VERBOSE_DAT_LEN = 512;
|
||||
char portsInfo[CPU_VERBOSE_DAT_LEN] = {'\0'};
|
||||
int written = 0;
|
||||
int written_total = 0;
|
||||
|
||||
auto shift = [&](int size) {
|
||||
if (written < 0 || written_total + size > CPU_VERBOSE_DAT_LEN) {
|
||||
const char* errorMsg = "# NOT ENOUGHT BUFFER SIZE #";
|
||||
snprintf(portsInfo, strlen(errorMsg) + 1, "%s", errorMsg);
|
||||
written_total = strlen(errorMsg);
|
||||
return;
|
||||
}
|
||||
|
||||
written_total += size;
|
||||
};
|
||||
|
||||
auto formatMemDesc = [&](const dnnl_memory_desc_t& desc, std::string& prefix) {
|
||||
prefix = colorize(BLUE, prefix);
|
||||
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, " ");
|
||||
shift(written);
|
||||
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, "%s", prefix.c_str());
|
||||
shift(written);
|
||||
written = dnnl_md2fmt_str(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, &desc);
|
||||
shift(written);
|
||||
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, ":");
|
||||
shift(written);
|
||||
written = dnnl_md2dim_str(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, &desc);
|
||||
shift(written);
|
||||
};
|
||||
|
||||
for (int i = 0; i < node->getParentEdges().size(); i++) {
|
||||
std::string prefix("src:" + std::to_string(i) + ':');
|
||||
formatMemDesc(MemoryDescUtils::convertToDnnlMemoryDesc(
|
||||
node->getParentEdgeAt(i)->getMemory().getDesc().clone())->getDnnlDesc().data,
|
||||
prefix);
|
||||
}
|
||||
|
||||
for (int i = 0; i < node->getChildEdges().size(); i++) {
|
||||
std::string prefix("dst:" + std::to_string(i) + ':');
|
||||
formatMemDesc(MemoryDescUtils::convertToDnnlMemoryDesc(
|
||||
node->getChildEdgeAt(i)->getMemory().getDesc().clone())->getDnnlDesc().data,
|
||||
prefix);
|
||||
}
|
||||
|
||||
std::string post_ops;
|
||||
if (!node->getFusedWith().empty()) {
|
||||
post_ops += "post_ops:'";
|
||||
for (const auto& fusedNode : node->getFusedWith()) {
|
||||
post_ops.append(colorize(GREEN, fusedNode->getName())).append(":")
|
||||
.append(colorize(CYAN, NameFromType(fusedNode->getType()))).append(":")
|
||||
.append(algToString(fusedNode->getAlgorithm()))
|
||||
.append(";");
|
||||
}
|
||||
post_ops += "'";
|
||||
}
|
||||
|
||||
std::string nodeImplementer = "cpu";
|
||||
if (node->prim)
|
||||
nodeImplementer = "dnnl"; // oneDNN
|
||||
else if (node->getType() == Reference)
|
||||
nodeImplementer = "ngraph_ref"; // ngraph reference
|
||||
|
||||
const std::string& nodeName = colorize(GREEN, node->getName());
|
||||
const std::string& nodeType = colorize(CYAN, NameFromType(node->getType()));
|
||||
const std::string& nodeAlg = algToString(node->getAlgorithm());
|
||||
const std::string& nodePrimImplType = impl_type_to_string(node->getSelectedPrimitiveDescriptor()->getImplementationType());
|
||||
|
||||
stream << "ov_cpu_verbose" << ','
|
||||
<< "exec" << ','
|
||||
<< nodeImplementer << ','
|
||||
<< nodeName << ":" << nodeType << ":" << nodeAlg << ','
|
||||
<< nodePrimImplType << ','
|
||||
<< portsInfo << ','
|
||||
<< post_ops << ',';
|
||||
}
|
||||
|
||||
void Verbose::printDuration() {
|
||||
const auto& duration = node->PerfCounter().duration().count();
|
||||
stream << duration << "ms";
|
||||
}
|
||||
|
||||
void Verbose::flush() const {
|
||||
std::cout << stream.rdbuf() << "\n";
|
||||
}
|
||||
} // namespace MKLDNNPlugin
|
||||
#endif // CPU_DEBUG_CAPS
|
46
inference-engine/src/mkldnn_plugin/utils/verbose.h
Normal file
46
inference-engine/src/mkldnn_plugin/utils/verbose.h
Normal file
@ -0,0 +1,46 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#ifdef CPU_DEBUG_CAPS
|
||||
|
||||
#include "mkldnn_node.h"
|
||||
|
||||
#include <string>
|
||||
#include <cstdlib>
|
||||
#include <sstream>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class Verbose {
|
||||
public:
|
||||
Verbose(const MKLDNNNodePtr& _node, const std::string& _lvl)
|
||||
: node(_node), lvl(atoi(_lvl.c_str())) {
|
||||
if (!shouldBePrinted())
|
||||
return;
|
||||
printInfo();
|
||||
}
|
||||
virtual ~Verbose() {
|
||||
if (!shouldBePrinted())
|
||||
return;
|
||||
|
||||
printDuration();
|
||||
flush();
|
||||
}
|
||||
private:
|
||||
const MKLDNNNodePtr& node;
|
||||
const int lvl;
|
||||
std::stringstream stream;
|
||||
|
||||
bool shouldBePrinted() const;
|
||||
void printInfo();
|
||||
void printDuration();
|
||||
void flush() const;
|
||||
};
|
||||
|
||||
#define VERBOSE(...) Verbose(__VA_ARGS__)
|
||||
} // namespace MKLDNNPlugin
|
||||
#else
|
||||
#define VERBOSE(...)
|
||||
#endif // CPU_DEBUG_CAPS
|
@ -12,6 +12,7 @@
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
|
Loading…
Reference in New Issue
Block a user