[CPU] Add OV_CPU_VERBOSE env var to print node exec info to cout (#6390)
This commit is contained in:
parent
b968c7b813
commit
c92988c8e9
459
inference-engine/src/mkldnn_plugin/cpu_types.cpp
Normal file
459
inference-engine/src/mkldnn_plugin/cpu_types.cpp
Normal file
@ -0,0 +1,459 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#include "cpu_types.h"
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace MKLDNNPlugin {
|
||||||
|
|
||||||
|
using Dim = std::size_t;
|
||||||
|
using VectorDims = std::vector<Dim>;
|
||||||
|
|
||||||
|
const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
|
||||||
|
{ "Constant", Input },
|
||||||
|
{ "Parameter", Input },
|
||||||
|
{ "Result", Output },
|
||||||
|
{ "Convolution", Convolution },
|
||||||
|
{ "GroupConvolution", Convolution },
|
||||||
|
{ "MatMul", MatMul },
|
||||||
|
{ "FullyConnected", FullyConnected },
|
||||||
|
{ "MaxPool", Pooling },
|
||||||
|
{ "AvgPool", Pooling },
|
||||||
|
{ "AdaptiveMaxPool", AdaptivePooling},
|
||||||
|
{ "AdaptiveAvgPool", AdaptivePooling},
|
||||||
|
{ "Add", Eltwise },
|
||||||
|
{ "Subtract", Eltwise },
|
||||||
|
{ "Multiply", Eltwise },
|
||||||
|
{ "Divide", Eltwise },
|
||||||
|
{ "SquaredDifference", Eltwise },
|
||||||
|
{ "Maximum", Eltwise },
|
||||||
|
{ "Minimum", Eltwise },
|
||||||
|
{ "Mod", Eltwise },
|
||||||
|
{ "FloorMod", Eltwise },
|
||||||
|
{ "Power", Eltwise },
|
||||||
|
{ "PowerStatic", Eltwise },
|
||||||
|
{ "Equal", Eltwise },
|
||||||
|
{ "NotEqual", Eltwise },
|
||||||
|
{ "Greater", Eltwise },
|
||||||
|
{ "GreaterEqual", Eltwise },
|
||||||
|
{ "Less", Eltwise },
|
||||||
|
{ "LessEqual", Eltwise },
|
||||||
|
{ "LogicalAnd", Eltwise },
|
||||||
|
{ "LogicalOr", Eltwise },
|
||||||
|
{ "LogicalXor", Eltwise },
|
||||||
|
{ "LogicalNot", Eltwise },
|
||||||
|
{ "Relu", Eltwise },
|
||||||
|
{ "LeakyRelu", Eltwise },
|
||||||
|
{ "Gelu", Eltwise },
|
||||||
|
{ "Elu", Eltwise },
|
||||||
|
{ "Tanh", Eltwise },
|
||||||
|
{ "Sigmoid", Eltwise },
|
||||||
|
{ "Abs", Eltwise },
|
||||||
|
{ "Sqrt", Eltwise },
|
||||||
|
{ "Clamp", Eltwise },
|
||||||
|
{ "Exp", Eltwise },
|
||||||
|
{ "SwishCPU", Eltwise },
|
||||||
|
{ "HSwish", Eltwise },
|
||||||
|
{ "Mish", Eltwise },
|
||||||
|
{ "HSigmoid", Eltwise },
|
||||||
|
{ "Round", Eltwise },
|
||||||
|
{ "PRelu", Eltwise },
|
||||||
|
{ "Erf", Eltwise },
|
||||||
|
{ "SoftPlus", Eltwise },
|
||||||
|
{ "Reshape", Reshape },
|
||||||
|
{ "Squeeze", Reshape },
|
||||||
|
{ "Unsqueeze", Reshape },
|
||||||
|
{ "Softmax", Softmax },
|
||||||
|
{ "Reorder", Reorder },
|
||||||
|
{ "BatchToSpace", BatchToSpace },
|
||||||
|
{ "SpaceToBatch", SpaceToBatch },
|
||||||
|
{ "DepthToSpace", DepthToSpace },
|
||||||
|
{ "SpaceToDepth", SpaceToDepth },
|
||||||
|
{ "Roll", Roll },
|
||||||
|
{ "LRN", Lrn },
|
||||||
|
{ "Split", Split },
|
||||||
|
{ "VariadicSplit", Split },
|
||||||
|
{ "Concat", Concatenation },
|
||||||
|
{ "ConvolutionBackpropData", Deconvolution },
|
||||||
|
{ "GroupConvolutionBackpropData", Deconvolution },
|
||||||
|
{ "StridedSlice", StridedSlice },
|
||||||
|
{ "Tile", Tile },
|
||||||
|
{ "ROIAlign", ROIAlign },
|
||||||
|
{ "ROIPooling", ROIPooling },
|
||||||
|
{ "PSROIPooling", PSROIPooling },
|
||||||
|
{ "DeformablePSROIPooling", PSROIPooling },
|
||||||
|
{ "Pad", Pad },
|
||||||
|
{ "Transpose", Transpose },
|
||||||
|
{ "LSTMCell", RNNCell },
|
||||||
|
{ "GRUCell", RNNCell },
|
||||||
|
{ "RNNCell", RNNCell },
|
||||||
|
{ "LSTMSequence", RNNSeq },
|
||||||
|
{ "GRUSequence", RNNSeq },
|
||||||
|
{ "RNNSequence", RNNSeq },
|
||||||
|
{ "FakeQuantize", FakeQuantize },
|
||||||
|
{ "BinaryConvolution", BinaryConvolution },
|
||||||
|
{ "DeformableConvolution", DeformableConvolution },
|
||||||
|
{ "TensorIterator", TensorIterator },
|
||||||
|
{ "Loop", TensorIterator },
|
||||||
|
{ "ReadValue", MemoryInput}, // for construction from name ctor, arbitrary name is used
|
||||||
|
{ "Assign", MemoryOutput }, // for construction from layer ctor
|
||||||
|
{ "Convert", Convert },
|
||||||
|
{ "MVN", MVN},
|
||||||
|
{ "NormalizeL2", NormalizeL2},
|
||||||
|
{ "ScatterUpdate", ScatterUpdate},
|
||||||
|
{ "ScatterElementsUpdate", ScatterElementsUpdate},
|
||||||
|
{ "ScatterNDUpdate", ScatterNDUpdate},
|
||||||
|
{ "Interpolate", Interpolate},
|
||||||
|
{ "ReduceL1", Reduce},
|
||||||
|
{ "ReduceL2", Reduce},
|
||||||
|
{ "ReduceLogicalAnd", Reduce},
|
||||||
|
{ "ReduceLogicalOr", Reduce},
|
||||||
|
{ "ReduceMax", Reduce},
|
||||||
|
{ "ReduceMean", Reduce},
|
||||||
|
{ "ReduceMin", Reduce},
|
||||||
|
{ "ReduceProd", Reduce},
|
||||||
|
{ "ReduceSum", Reduce},
|
||||||
|
{ "ReduceLogSum", Reduce},
|
||||||
|
{ "ReduceLogSumExp", Reduce},
|
||||||
|
{ "ReduceSumSquare", Reduce},
|
||||||
|
{ "Broadcast", Broadcast},
|
||||||
|
{ "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
|
||||||
|
{ "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
|
||||||
|
{ "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
|
||||||
|
{ "Gather", Gather},
|
||||||
|
{ "GatherElements", GatherElements},
|
||||||
|
{ "GatherND", GatherND},
|
||||||
|
{ "OneHot", OneHot},
|
||||||
|
{ "RegionYolo", RegionYolo},
|
||||||
|
{ "Select", Select},
|
||||||
|
{ "ShuffleChannels", ShuffleChannels},
|
||||||
|
{ "DFT", DFT},
|
||||||
|
{ "IDFT", DFT},
|
||||||
|
{ "Abs", Math},
|
||||||
|
{ "Acos", Math},
|
||||||
|
{ "Acosh", Math},
|
||||||
|
{ "Asin", Math},
|
||||||
|
{ "Asinh", Math},
|
||||||
|
{ "Atan", Math},
|
||||||
|
{ "Atanh", Math},
|
||||||
|
{ "Ceil", Math},
|
||||||
|
{ "Ceiling", Math},
|
||||||
|
{ "Cos", Math},
|
||||||
|
{ "Cosh", Math},
|
||||||
|
{ "Floor", Math},
|
||||||
|
{ "HardSigmoid", Math},
|
||||||
|
{ "Log", Math},
|
||||||
|
{ "Neg", Math},
|
||||||
|
{ "Reciprocal", Math},
|
||||||
|
{ "Selu", Math},
|
||||||
|
{ "Sign", Math},
|
||||||
|
{ "Sin", Math},
|
||||||
|
{ "Sinh", Math},
|
||||||
|
{ "SoftPlus", Math},
|
||||||
|
{ "Softsign", Math},
|
||||||
|
{ "Tan", Math},
|
||||||
|
{ "CTCLoss", CTCLoss},
|
||||||
|
{ "Bucketize", Bucketize},
|
||||||
|
{ "CTCGreedyDecoder", CTCGreedyDecoder},
|
||||||
|
{ "CTCGreedyDecoderSeqLen", CTCGreedyDecoderSeqLen},
|
||||||
|
{ "CumSum", CumSum},
|
||||||
|
{ "DetectionOutput", DetectionOutput},
|
||||||
|
{ "ExperimentalDetectronDetectionOutput", ExperimentalDetectronDetectionOutput},
|
||||||
|
{ "LogSoftmax", LogSoftmax},
|
||||||
|
{ "TopK", TopK},
|
||||||
|
{ "GatherTree", GatherTree},
|
||||||
|
{ "GRN", GRN},
|
||||||
|
{ "Range", Range},
|
||||||
|
{ "Proposal", Proposal},
|
||||||
|
{ "ReorgYolo", ReorgYolo},
|
||||||
|
{ "ReverseSequence", ReverseSequence},
|
||||||
|
{ "ExperimentalDetectronTopKROIs", ExperimentalDetectronTopKROIs},
|
||||||
|
{ "ExperimentalDetectronROIFeatureExtractor", ExperimentalDetectronROIFeatureExtractor},
|
||||||
|
{ "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator},
|
||||||
|
{ "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage},
|
||||||
|
{ "ExtractImagePatches", ExtractImagePatches},
|
||||||
|
{ "NonMaxSuppression", NonMaxSuppression},
|
||||||
|
{ "NonMaxSuppressionIEInternal", NonMaxSuppression},
|
||||||
|
{ "MatrixNms", MatrixNms},
|
||||||
|
{ "MulticlassNms", MulticlassNms}
|
||||||
|
};
|
||||||
|
|
||||||
|
Type TypeFromName(const std::string& type) {
|
||||||
|
auto itType = type_to_name_tbl.find(type);
|
||||||
|
if (type_to_name_tbl.end() != itType) {
|
||||||
|
return itType->second;
|
||||||
|
} else {
|
||||||
|
return Unknown;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string NameFromType(const Type type) {
|
||||||
|
switch (type) {
|
||||||
|
case Generic:
|
||||||
|
return "Generic";
|
||||||
|
case Reorder:
|
||||||
|
return "Reorder";
|
||||||
|
case Input:
|
||||||
|
return "Input";
|
||||||
|
case Output:
|
||||||
|
return "Output";
|
||||||
|
case Convolution:
|
||||||
|
return "Convolution";
|
||||||
|
case Deconvolution:
|
||||||
|
return "Deconvolution";
|
||||||
|
case Lrn:
|
||||||
|
return "Lrn";
|
||||||
|
case Pooling:
|
||||||
|
return "Pooling";
|
||||||
|
case AdaptivePooling:
|
||||||
|
return "AdaptivePooling";
|
||||||
|
case FullyConnected:
|
||||||
|
return "FullyConnected";
|
||||||
|
case MatMul:
|
||||||
|
return "MatMul";
|
||||||
|
case Softmax:
|
||||||
|
return "Softmax";
|
||||||
|
case Split:
|
||||||
|
return "Split";
|
||||||
|
case Concatenation:
|
||||||
|
return "Concatenation";
|
||||||
|
case StridedSlice:
|
||||||
|
return "StridedSlice";
|
||||||
|
case Reshape:
|
||||||
|
return "Reshape";
|
||||||
|
case Tile:
|
||||||
|
return "Tile";
|
||||||
|
case ROIAlign:
|
||||||
|
return "ROIAlign";
|
||||||
|
case ROIPooling:
|
||||||
|
return "ROIPooling";
|
||||||
|
case PSROIPooling:
|
||||||
|
return "PSROIPooling";
|
||||||
|
case DepthToSpace:
|
||||||
|
return "DepthToSpace";
|
||||||
|
case BatchToSpace:
|
||||||
|
return "BatchToSpace";
|
||||||
|
case Pad:
|
||||||
|
return "Pad";
|
||||||
|
case Transpose:
|
||||||
|
return "Transpose";
|
||||||
|
case SpaceToDepth:
|
||||||
|
return "SpaceToDepth";
|
||||||
|
case SpaceToBatch:
|
||||||
|
return "SpaceToBatch";
|
||||||
|
case MemoryOutput:
|
||||||
|
return "MemoryOutput";
|
||||||
|
case MemoryInput:
|
||||||
|
return "MemoryInput";
|
||||||
|
case RNNSeq:
|
||||||
|
return "RNNSeq";
|
||||||
|
case RNNCell:
|
||||||
|
return "RNNCell";
|
||||||
|
case Eltwise:
|
||||||
|
return "Eltwise";
|
||||||
|
case FakeQuantize:
|
||||||
|
return "FakeQuantize";
|
||||||
|
case BinaryConvolution:
|
||||||
|
return "BinaryConvolution";
|
||||||
|
case DeformableConvolution:
|
||||||
|
return "DeformableConvolution";
|
||||||
|
case MVN:
|
||||||
|
return "MVN";
|
||||||
|
case TensorIterator:
|
||||||
|
return "TensorIterator";
|
||||||
|
case Convert:
|
||||||
|
return "Convert";
|
||||||
|
case NormalizeL2:
|
||||||
|
return "NormalizeL2";
|
||||||
|
case ScatterUpdate:
|
||||||
|
return "ScatterUpdate";
|
||||||
|
case ScatterElementsUpdate:
|
||||||
|
return "ScatterElementsUpdate";
|
||||||
|
case ScatterNDUpdate:
|
||||||
|
return "ScatterNDUpdate";
|
||||||
|
case Interpolate:
|
||||||
|
return "Interpolate";
|
||||||
|
case Reduce:
|
||||||
|
return "Reduce";
|
||||||
|
case Broadcast:
|
||||||
|
return "Broadcast";
|
||||||
|
case EmbeddingSegmentsSum:
|
||||||
|
return "EmbeddingSegmentsSum";
|
||||||
|
case EmbeddingBagPackedSum:
|
||||||
|
return "EmbeddingBagPackedSum";
|
||||||
|
case EmbeddingBagOffsetsSum:
|
||||||
|
return "EmbeddingBagOffsetsSum";
|
||||||
|
case Gather:
|
||||||
|
return "Gather";
|
||||||
|
case GatherElements:
|
||||||
|
return "GatherElements";
|
||||||
|
case GatherND:
|
||||||
|
return "GatherND";
|
||||||
|
case OneHot:
|
||||||
|
return "OneHot";
|
||||||
|
case RegionYolo:
|
||||||
|
return "RegionYolo";
|
||||||
|
case Select:
|
||||||
|
return "Select";
|
||||||
|
case Roll:
|
||||||
|
return "Roll";
|
||||||
|
case ShuffleChannels:
|
||||||
|
return "ShuffleChannels";
|
||||||
|
case DFT:
|
||||||
|
return "DFT";
|
||||||
|
case Math:
|
||||||
|
return "Math";
|
||||||
|
case CTCLoss:
|
||||||
|
return "CTCLoss";
|
||||||
|
case Bucketize:
|
||||||
|
return "Bucketize";
|
||||||
|
case CTCGreedyDecoder:
|
||||||
|
return "CTCGreedyDecoder";
|
||||||
|
case CTCGreedyDecoderSeqLen:
|
||||||
|
return "CTCGreedyDecoderSeqLen";
|
||||||
|
case CumSum:
|
||||||
|
return "CumSum";
|
||||||
|
case DetectionOutput:
|
||||||
|
return "DetectionOutput";
|
||||||
|
case ExperimentalDetectronDetectionOutput:
|
||||||
|
return "ExperimentalDetectronDetectionOutput";
|
||||||
|
case LogSoftmax:
|
||||||
|
return "LogSoftmax";
|
||||||
|
case TopK:
|
||||||
|
return "TopK";
|
||||||
|
case GatherTree:
|
||||||
|
return "GatherTree";
|
||||||
|
case GRN:
|
||||||
|
return "GRN";
|
||||||
|
case Range:
|
||||||
|
return "Range";
|
||||||
|
case Proposal:
|
||||||
|
return "Proposal";
|
||||||
|
case ReorgYolo:
|
||||||
|
return "ReorgYolo";
|
||||||
|
case ReverseSequence:
|
||||||
|
return "ReverseSequence";
|
||||||
|
case ExperimentalDetectronTopKROIs:
|
||||||
|
return "ExperimentalDetectronTopKROIs";
|
||||||
|
case ExperimentalDetectronROIFeatureExtractor:
|
||||||
|
return "ExperimentalDetectronROIFeatureExtractor";
|
||||||
|
case ExperimentalDetectronPriorGridGenerator:
|
||||||
|
return "ExperimentalDetectronPriorGridGenerator";
|
||||||
|
case ExperimentalDetectronGenerateProposalsSingleImage:
|
||||||
|
return "ExperimentalDetectronGenerateProposalsSingleImage";
|
||||||
|
case ExtractImagePatches:
|
||||||
|
return "ExtractImagePatches";
|
||||||
|
case NonMaxSuppression:
|
||||||
|
return "NonMaxSuppression";
|
||||||
|
case MatrixNms:
|
||||||
|
return "MatrixNms";
|
||||||
|
case MulticlassNms:
|
||||||
|
return "MulticlassNms";
|
||||||
|
default:
|
||||||
|
return "Unknown";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string algToString(const Algorithm alg) {
|
||||||
|
#define CASE(_alg) do { \
|
||||||
|
if (alg == _alg) return #_alg; \
|
||||||
|
} while (0)
|
||||||
|
CASE(Default);
|
||||||
|
CASE(PoolingMax);
|
||||||
|
CASE(PoolingAvg);
|
||||||
|
CASE(ConvolutionCommon);
|
||||||
|
CASE(ConvolutionGrouped);
|
||||||
|
CASE(DeconvolutionCommon);
|
||||||
|
CASE(DeconvolutionGrouped);
|
||||||
|
CASE(EltwiseAdd);
|
||||||
|
CASE(EltwiseMultiply);
|
||||||
|
CASE(EltwiseSubtract);
|
||||||
|
CASE(EltwiseDivide);
|
||||||
|
CASE(EltwiseFloorMod);
|
||||||
|
CASE(EltwiseMod);
|
||||||
|
CASE(EltwiseMaximum);
|
||||||
|
CASE(EltwiseMinimum);
|
||||||
|
CASE(EltwiseSquaredDifference);
|
||||||
|
CASE(EltwisePowerDynamic);
|
||||||
|
CASE(EltwisePowerStatic);
|
||||||
|
CASE(EltwiseMulAdd);
|
||||||
|
CASE(EltwiseEqual);
|
||||||
|
CASE(EltwiseNotEqual);
|
||||||
|
CASE(EltwiseGreater);
|
||||||
|
CASE(EltwiseGreaterEqual);
|
||||||
|
CASE(EltwiseLess);
|
||||||
|
CASE(EltwiseLessEqual);
|
||||||
|
CASE(EltwiseLogicalAnd);
|
||||||
|
CASE(EltwiseLogicalOr);
|
||||||
|
CASE(EltwiseLogicalXor);
|
||||||
|
CASE(EltwiseLogicalNot);
|
||||||
|
CASE(EltwiseRelu);
|
||||||
|
CASE(EltwiseGelu);
|
||||||
|
CASE(EltwiseElu);
|
||||||
|
CASE(EltwiseTanh);
|
||||||
|
CASE(EltwiseSigmoid);
|
||||||
|
CASE(EltwiseAbs);
|
||||||
|
CASE(EltwiseSqrt);
|
||||||
|
CASE(EltwiseSoftRelu);
|
||||||
|
CASE(EltwiseExp);
|
||||||
|
CASE(EltwiseClamp);
|
||||||
|
CASE(EltwiseSwish);
|
||||||
|
CASE(EltwisePrelu);
|
||||||
|
CASE(EltwiseMish);
|
||||||
|
CASE(EltwiseHswish);
|
||||||
|
CASE(EltwiseHsigmoid);
|
||||||
|
CASE(EltwiseRoundHalfToEven);
|
||||||
|
CASE(EltwiseRoundHalfAwayFromZero);
|
||||||
|
CASE(EltwiseErf);
|
||||||
|
CASE(FQCommon);
|
||||||
|
CASE(FQQuantization);
|
||||||
|
CASE(FQBinarization);
|
||||||
|
CASE(ROIPoolingMax);
|
||||||
|
CASE(ROIPoolingBilinear);
|
||||||
|
CASE(ROIAlignMax);
|
||||||
|
CASE(ROIAlignAvg);
|
||||||
|
CASE(PSROIPoolingAverage);
|
||||||
|
CASE(PSROIPoolingBilinear);
|
||||||
|
CASE(PSROIPoolingBilinearDeformable);
|
||||||
|
CASE(ReduceL1);
|
||||||
|
CASE(ReduceL2);
|
||||||
|
CASE(ReduceAnd);
|
||||||
|
CASE(ReduceOr);
|
||||||
|
CASE(ReduceMax);
|
||||||
|
CASE(ReduceMean);
|
||||||
|
CASE(ReduceMin);
|
||||||
|
CASE(ReduceProd);
|
||||||
|
CASE(ReduceSum);
|
||||||
|
CASE(ReduceLogSum);
|
||||||
|
CASE(ReduceLogSumExp);
|
||||||
|
CASE(ReduceSumSquare);
|
||||||
|
CASE(MathAbs);
|
||||||
|
CASE(MathAcos);
|
||||||
|
CASE(MathAcosh);
|
||||||
|
CASE(MathAsin);
|
||||||
|
CASE(MathAsinh);
|
||||||
|
CASE(MathAtan);
|
||||||
|
CASE(MathAtanh);
|
||||||
|
CASE(MathCeiling);
|
||||||
|
CASE(MathCos);
|
||||||
|
CASE(MathCosh);
|
||||||
|
CASE(MathErf);
|
||||||
|
CASE(MathFloor);
|
||||||
|
CASE(MathHardSigmoid);
|
||||||
|
CASE(MathLog);
|
||||||
|
CASE(MathNegative);
|
||||||
|
CASE(MathReciprocal);
|
||||||
|
CASE(MathSelu);
|
||||||
|
CASE(MathSign);
|
||||||
|
CASE(MathSin);
|
||||||
|
CASE(MathSinh);
|
||||||
|
CASE(MathSoftPlus);
|
||||||
|
CASE(MathSoftsign);
|
||||||
|
CASE(MathTan);
|
||||||
|
#undef CASE
|
||||||
|
return "Undefined";
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace MKLDNNPlugin
|
@ -4,7 +4,10 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "caseless.hpp"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
namespace MKLDNNPlugin {
|
namespace MKLDNNPlugin {
|
||||||
|
|
||||||
@ -97,7 +100,7 @@ enum Type {
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum Algorithm {
|
enum Algorithm {
|
||||||
Undefined,
|
Default,
|
||||||
|
|
||||||
// Pooling algorithms
|
// Pooling algorithms
|
||||||
PoolingMax,
|
PoolingMax,
|
||||||
@ -215,4 +218,11 @@ enum Algorithm {
|
|||||||
MathTan
|
MathTan
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extern const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl;
|
||||||
|
|
||||||
|
Type TypeFromName(const std::string& type);
|
||||||
|
|
||||||
|
std::string NameFromType(const Type type);
|
||||||
|
|
||||||
|
std::string algToString(const Algorithm alg);
|
||||||
} // namespace MKLDNNPlugin
|
} // namespace MKLDNNPlugin
|
||||||
|
8
inference-engine/src/mkldnn_plugin/docs/README.md
Normal file
8
inference-engine/src/mkldnn_plugin/docs/README.md
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# Debug capabilities
|
||||||
|
Use the following cmake option to enable debug capabilities:
|
||||||
|
|
||||||
|
`-DENABLE_DEBUG_CAPS=ON`
|
||||||
|
|
||||||
|
* [Verbose mode](verbose.md)
|
||||||
|
* [Blob dumping](blob_dumping.md)
|
||||||
|
* [Graph serialization](graph_serialization.md)
|
@ -1,9 +1,4 @@
|
|||||||
# Debug capabilities
|
# Blob dumping
|
||||||
Use the following cmake option to enable debug capabilities:
|
|
||||||
|
|
||||||
`-DENABLE_CPU_DEBUG_CAPS=ON`
|
|
||||||
|
|
||||||
## Blob dumping
|
|
||||||
Blob dumping is controlled by environment variables (filters).
|
Blob dumping is controlled by environment variables (filters).
|
||||||
|
|
||||||
The variables define conditions of the node which input and output blobs
|
The variables define conditions of the node which input and output blobs
|
||||||
@ -24,12 +19,12 @@ or for shell session (bash example):
|
|||||||
export OV_CPU_BLOB_DUMP_NODE_PORTS=OUT
|
export OV_CPU_BLOB_DUMP_NODE_PORTS=OUT
|
||||||
binary ...
|
binary ...
|
||||||
```
|
```
|
||||||
### Specify dump directory
|
## Specify dump directory
|
||||||
```sh
|
```sh
|
||||||
OV_CPU_BLOB_DUMP_DIR=<directory-name> binary ...
|
OV_CPU_BLOB_DUMP_DIR=<directory-name> binary ...
|
||||||
```
|
```
|
||||||
Default is *mkldnn_dump*
|
Default is *mkldnn_dump*
|
||||||
### Specify dump format
|
## Specify dump format
|
||||||
```sh
|
```sh
|
||||||
OV_CPU_BLOB_DUMP_FORMAT=<format> binary ...
|
OV_CPU_BLOB_DUMP_FORMAT=<format> binary ...
|
||||||
```
|
```
|
||||||
@ -37,7 +32,7 @@ Options are:
|
|||||||
* BIN (default)
|
* BIN (default)
|
||||||
* TEXT
|
* TEXT
|
||||||
|
|
||||||
### Filter input / output blobs
|
## Filter input / output blobs
|
||||||
To dump only input / output blobs:
|
To dump only input / output blobs:
|
||||||
```sh
|
```sh
|
||||||
OV_CPU_BLOB_DUMP_NODE_PORTS='<ports_kind>' binary ...
|
OV_CPU_BLOB_DUMP_NODE_PORTS='<ports_kind>' binary ...
|
||||||
@ -51,7 +46,7 @@ Options are:
|
|||||||
* OUT
|
* OUT
|
||||||
* ALL
|
* ALL
|
||||||
|
|
||||||
### Filter by execution ID
|
## Filter by execution ID
|
||||||
To dump blobs only for nodes with specified execution IDs:
|
To dump blobs only for nodes with specified execution IDs:
|
||||||
```sh
|
```sh
|
||||||
OV_CPU_BLOB_DUMP_NODE_EXEC_ID='<space_separated_list_of_ids>' binary ...
|
OV_CPU_BLOB_DUMP_NODE_EXEC_ID='<space_separated_list_of_ids>' binary ...
|
||||||
@ -61,7 +56,7 @@ Example:
|
|||||||
OV_CPU_BLOB_DUMP_NODE_EXEC_ID='1 12 45' binary ...
|
OV_CPU_BLOB_DUMP_NODE_EXEC_ID='1 12 45' binary ...
|
||||||
```
|
```
|
||||||
|
|
||||||
### Filter by type
|
## Filter by type
|
||||||
To dump blobs only for nodes with specified types:
|
To dump blobs only for nodes with specified types:
|
||||||
```sh
|
```sh
|
||||||
OV_CPU_BLOB_DUMP_NODE_TYPE=<space_separated_list_of_types> binary ...
|
OV_CPU_BLOB_DUMP_NODE_TYPE=<space_separated_list_of_types> binary ...
|
||||||
@ -73,7 +68,7 @@ Example:
|
|||||||
|
|
||||||
> **NOTE**: see **enum Type** in [mkldnn_node.h](../mkldnn_node.h) for list of the types
|
> **NOTE**: see **enum Type** in [mkldnn_node.h](../mkldnn_node.h) for list of the types
|
||||||
|
|
||||||
### Filter by name
|
## Filter by name
|
||||||
To dump blobs only for nodes with name matching specified regex:
|
To dump blobs only for nodes with name matching specified regex:
|
||||||
```sh
|
```sh
|
||||||
OV_CPU_BLOB_DUMP_NODE_NAME=<regex> binary ...
|
OV_CPU_BLOB_DUMP_NODE_NAME=<regex> binary ...
|
||||||
@ -83,7 +78,7 @@ Example:
|
|||||||
OV_CPU_BLOB_DUMP_NODE_NAME=".+Fused_Add.+" binary ...
|
OV_CPU_BLOB_DUMP_NODE_NAME=".+Fused_Add.+" binary ...
|
||||||
```
|
```
|
||||||
|
|
||||||
### Dump all the blobs
|
## Dump all the blobs
|
||||||
```sh
|
```sh
|
||||||
OV_CPU_BLOB_DUMP_NODE_NAME="*" binary ...
|
OV_CPU_BLOB_DUMP_NODE_NAME="*" binary ...
|
||||||
```
|
```
|
||||||
@ -95,22 +90,3 @@ Example:
|
|||||||
```sh
|
```sh
|
||||||
OV_CPU_BLOB_DUMP_NODE_PORTS=ALL binary ...
|
OV_CPU_BLOB_DUMP_NODE_PORTS=ALL binary ...
|
||||||
```
|
```
|
||||||
|
|
||||||
## Graph serialization
|
|
||||||
The functionality allows to serialize execution graph using environment variable:
|
|
||||||
```sh
|
|
||||||
OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
|
|
||||||
```
|
|
||||||
|
|
||||||
Possible serialization options:
|
|
||||||
* cout
|
|
||||||
|
|
||||||
Serialize to console output
|
|
||||||
* \<path\>.xml
|
|
||||||
|
|
||||||
Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
|
|
||||||
* \<path\>.dot
|
|
||||||
|
|
||||||
TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,17 @@
|
|||||||
|
# Graph serialization
|
||||||
|
|
||||||
|
The functionality allows to serialize execution graph using environment variable:
|
||||||
|
```sh
|
||||||
|
OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
|
||||||
|
```
|
||||||
|
|
||||||
|
Possible serialization options:
|
||||||
|
* cout
|
||||||
|
|
||||||
|
Serialize to console output
|
||||||
|
* \<path\>.xml
|
||||||
|
|
||||||
|
Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
|
||||||
|
* \<path\>.dot
|
||||||
|
|
||||||
|
TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
|
38
inference-engine/src/mkldnn_plugin/docs/verbose.md
Normal file
38
inference-engine/src/mkldnn_plugin/docs/verbose.md
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
# Verbose mode
|
||||||
|
|
||||||
|
It is possible to enable tracing execution of plugin nodes to cout and collect statistics, such as:
|
||||||
|
- node implementer:
|
||||||
|
* cpu (CPU plugin)
|
||||||
|
* dnnl (oneDNN library)
|
||||||
|
* ngraph_ref (ngraph reference fallback)
|
||||||
|
- node name
|
||||||
|
- node type
|
||||||
|
- node algorithm
|
||||||
|
- node primitive info
|
||||||
|
- input / output ports info
|
||||||
|
- fused nodes
|
||||||
|
- execution time
|
||||||
|
- etc
|
||||||
|
|
||||||
|
Format:
|
||||||
|
```sh
|
||||||
|
ov_cpu_verbose,exec,<node_implemeter>,\
|
||||||
|
<node_name>:<node_type>:<node_alg>,<impl_type>,\
|
||||||
|
src:<port_id>:<precision>::<type>:<format>:f0:<shape> ...,\
|
||||||
|
dst:<port_id>:<precision>::<type>:<format>:f0:<shape> ...,\
|
||||||
|
post_ops:'<node_name>:<node_type>:<node_alg>;...;',\
|
||||||
|
<execution_time>
|
||||||
|
```
|
||||||
|
|
||||||
|
To turn on verbose mode the following environment variable should be used:
|
||||||
|
```sh
|
||||||
|
OV_CPU_VERBOSE=<level> binary ...
|
||||||
|
```
|
||||||
|
|
||||||
|
Currently verbose mode has only one level, any digit can be used for activation.
|
||||||
|
|
||||||
|
To have colored verbose output just duplicate level's digit, for example:
|
||||||
|
```sh
|
||||||
|
OV_CPU_VERBOSE=11 binary ...
|
||||||
|
```
|
||||||
|
**NOTE:** Shell color codes are used
|
@ -4,9 +4,11 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "mkldnn/ie_mkldnn.h"
|
||||||
|
#include "cpu_types.h"
|
||||||
|
|
||||||
#include <ie_layouts.h>
|
#include <ie_layouts.h>
|
||||||
#include <ie_blob.h>
|
#include <ie_blob.h>
|
||||||
#include "mkldnn/ie_mkldnn.h"
|
|
||||||
|
|
||||||
namespace MKLDNNPlugin {
|
namespace MKLDNNPlugin {
|
||||||
|
|
||||||
|
@ -50,3 +50,38 @@ impl_desc_type MKLDNNPlugin::parse_impl_name(std::string impl_desc_name) {
|
|||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char* MKLDNNPlugin::impl_type_to_string(impl_desc_type type) {
|
||||||
|
#define CASE(_type) do { \
|
||||||
|
if (type == _type) return #_type; \
|
||||||
|
} while (0)
|
||||||
|
CASE(unknown);
|
||||||
|
CASE(undef);
|
||||||
|
CASE(ref_any);
|
||||||
|
CASE(reorder);
|
||||||
|
CASE(gemm_any);
|
||||||
|
CASE(gemm_blas);
|
||||||
|
CASE(gemm_avx512);
|
||||||
|
CASE(gemm_avx2);
|
||||||
|
CASE(gemm_avx);
|
||||||
|
CASE(gemm_sse42);
|
||||||
|
CASE(jit_gemm);
|
||||||
|
CASE(jit_avx512_winograd);
|
||||||
|
CASE(jit_avx512);
|
||||||
|
CASE(jit_avx2);
|
||||||
|
CASE(jit_avx);
|
||||||
|
CASE(jit_sse42);
|
||||||
|
CASE(jit_uni);
|
||||||
|
CASE(jit_avx512_1x1);
|
||||||
|
CASE(jit_avx2_1x1);
|
||||||
|
CASE(jit_avx_1x1);
|
||||||
|
CASE(jit_sse42_1x1);
|
||||||
|
CASE(jit_uni_1x1);
|
||||||
|
CASE(jit_avx512_dw);
|
||||||
|
CASE(jit_avx2_dw);
|
||||||
|
CASE(jit_avx_dw);
|
||||||
|
CASE(jit_sse42_dw);
|
||||||
|
CASE(jit_uni_dw);
|
||||||
|
#undef CASE
|
||||||
|
return "unknown";
|
||||||
|
}
|
||||||
|
@ -63,6 +63,7 @@ enum impl_desc_type {
|
|||||||
jit_uni_dw = jit | uni | _dw,
|
jit_uni_dw = jit | uni | _dw,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const char * impl_type_to_string(impl_desc_type type);
|
||||||
impl_desc_type parse_impl_name(std::string impl_desc_name);
|
impl_desc_type parse_impl_name(std::string impl_desc_name);
|
||||||
|
|
||||||
} // namespace MKLDNNPlugin
|
} // namespace MKLDNNPlugin
|
||||||
|
@ -39,6 +39,7 @@
|
|||||||
#include "utils/node_dumper.h"
|
#include "utils/node_dumper.h"
|
||||||
#include "utils/ngraph_utils.hpp"
|
#include "utils/ngraph_utils.hpp"
|
||||||
#include "utils/cpu_utils.hpp"
|
#include "utils/cpu_utils.hpp"
|
||||||
|
#include "utils/verbose.h"
|
||||||
#include "memory_desc/cpu_memory_desc_utils.h"
|
#include "memory_desc/cpu_memory_desc_utils.h"
|
||||||
|
|
||||||
#include <ngraph/node.hpp>
|
#include <ngraph/node.hpp>
|
||||||
@ -828,7 +829,9 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
|
|||||||
mkldnn::stream stream(eng);
|
mkldnn::stream stream(eng);
|
||||||
|
|
||||||
for (const auto& node : executableGraphNodes) {
|
for (const auto& node : executableGraphNodes) {
|
||||||
PERF(config.collectPerfCounters, node);
|
VERBOSE(node, config.debugCaps.verbose);
|
||||||
|
PERF(node, config.collectPerfCounters);
|
||||||
|
|
||||||
if (request)
|
if (request)
|
||||||
request->ThrowIfCanceled();
|
request->ThrowIfCanceled();
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
//
|
//
|
||||||
|
|
||||||
#include "mkldnn_node.h"
|
#include "mkldnn_node.h"
|
||||||
|
#include "dnnl_debug.h"
|
||||||
#include "mkldnn_extension_mngr.h"
|
#include "mkldnn_extension_mngr.h"
|
||||||
#include "mkldnn_itt.h"
|
#include "mkldnn_itt.h"
|
||||||
|
|
||||||
@ -43,14 +44,14 @@
|
|||||||
#include <nodes/mkldnn_shuffle_channels_node.h>
|
#include <nodes/mkldnn_shuffle_channels_node.h>
|
||||||
#include <nodes/mkldnn_reference_node.h>
|
#include <nodes/mkldnn_reference_node.h>
|
||||||
#include <nodes/mkldnn_fake_quantize_node.h>
|
#include <nodes/mkldnn_fake_quantize_node.h>
|
||||||
#include <mkldnn_types.h>
|
|
||||||
#include <dnnl_types.h>
|
|
||||||
#include "mkldnn_extension_utils.h"
|
#include "mkldnn_extension_utils.h"
|
||||||
|
#include "mkldnn/iml_type_mapper.h"
|
||||||
|
|
||||||
#include "nodes/common/cpu_memcpy.h"
|
#include "nodes/common/cpu_memcpy.h"
|
||||||
#include "mkldnn_debug.h"
|
#include "mkldnn_debug.h"
|
||||||
#include "utils/rt_info/memory_formats_attribute.hpp"
|
#include "utils/rt_info/memory_formats_attribute.hpp"
|
||||||
|
|
||||||
|
#include <dnnl_types.h>
|
||||||
#include <ie_ngraph_utils.hpp>
|
#include <ie_ngraph_utils.hpp>
|
||||||
#include "utils/general_utils.h"
|
#include "utils/general_utils.h"
|
||||||
#include "utils/cpu_utils.hpp"
|
#include "utils/cpu_utils.hpp"
|
||||||
@ -63,372 +64,6 @@ using namespace MKLDNNPlugin;
|
|||||||
using namespace openvino;
|
using namespace openvino;
|
||||||
|
|
||||||
using namespace InferenceEngine::details;
|
using namespace InferenceEngine::details;
|
||||||
namespace MKLDNNPlugin {
|
|
||||||
static const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
|
|
||||||
{ "Constant", Input },
|
|
||||||
{ "Parameter", Input },
|
|
||||||
{ "Result", Output },
|
|
||||||
{ "Convolution", Convolution },
|
|
||||||
{ "GroupConvolution", Convolution },
|
|
||||||
{ "MatMul", MatMul },
|
|
||||||
{ "FullyConnected", FullyConnected },
|
|
||||||
{ "MaxPool", Pooling },
|
|
||||||
{ "AvgPool", Pooling },
|
|
||||||
{ "AdaptiveMaxPool", AdaptivePooling},
|
|
||||||
{ "AdaptiveAvgPool", AdaptivePooling},
|
|
||||||
{ "Add", Eltwise },
|
|
||||||
{ "Subtract", Eltwise },
|
|
||||||
{ "Multiply", Eltwise },
|
|
||||||
{ "Divide", Eltwise },
|
|
||||||
{ "SquaredDifference", Eltwise },
|
|
||||||
{ "Maximum", Eltwise },
|
|
||||||
{ "Minimum", Eltwise },
|
|
||||||
{ "Mod", Eltwise },
|
|
||||||
{ "FloorMod", Eltwise },
|
|
||||||
{ "Power", Eltwise },
|
|
||||||
{ "PowerStatic", Eltwise },
|
|
||||||
{ "Equal", Eltwise },
|
|
||||||
{ "NotEqual", Eltwise },
|
|
||||||
{ "Greater", Eltwise },
|
|
||||||
{ "GreaterEqual", Eltwise },
|
|
||||||
{ "Less", Eltwise },
|
|
||||||
{ "LessEqual", Eltwise },
|
|
||||||
{ "LogicalAnd", Eltwise },
|
|
||||||
{ "LogicalOr", Eltwise },
|
|
||||||
{ "LogicalXor", Eltwise },
|
|
||||||
{ "LogicalNot", Eltwise },
|
|
||||||
{ "Relu", Eltwise },
|
|
||||||
{ "LeakyRelu", Eltwise },
|
|
||||||
{ "Gelu", Eltwise },
|
|
||||||
{ "Elu", Eltwise },
|
|
||||||
{ "Tanh", Eltwise },
|
|
||||||
{ "Sigmoid", Eltwise },
|
|
||||||
{ "Abs", Eltwise },
|
|
||||||
{ "Sqrt", Eltwise },
|
|
||||||
{ "Clamp", Eltwise },
|
|
||||||
{ "Exp", Eltwise },
|
|
||||||
{ "SwishCPU", Eltwise },
|
|
||||||
{ "HSwish", Eltwise },
|
|
||||||
{ "Mish", Eltwise },
|
|
||||||
{ "HSigmoid", Eltwise },
|
|
||||||
{ "Round", Eltwise },
|
|
||||||
{ "PRelu", Eltwise },
|
|
||||||
{ "Erf", Eltwise },
|
|
||||||
{ "SoftPlus", Eltwise },
|
|
||||||
{ "Reshape", Reshape },
|
|
||||||
{ "Squeeze", Reshape },
|
|
||||||
{ "Unsqueeze", Reshape },
|
|
||||||
{ "Softmax", Softmax },
|
|
||||||
{ "Reorder", Reorder },
|
|
||||||
{ "BatchToSpace", BatchToSpace },
|
|
||||||
{ "SpaceToBatch", SpaceToBatch },
|
|
||||||
{ "DepthToSpace", DepthToSpace },
|
|
||||||
{ "SpaceToDepth", SpaceToDepth },
|
|
||||||
{ "Roll", Roll },
|
|
||||||
{ "LRN", Lrn },
|
|
||||||
{ "Split", Split },
|
|
||||||
{ "VariadicSplit", Split },
|
|
||||||
{ "Concat", Concatenation },
|
|
||||||
{ "ConvolutionBackpropData", Deconvolution },
|
|
||||||
{ "GroupConvolutionBackpropData", Deconvolution },
|
|
||||||
{ "StridedSlice", StridedSlice },
|
|
||||||
{ "Tile", Tile },
|
|
||||||
{ "ROIAlign", ROIAlign },
|
|
||||||
{ "ROIPooling", ROIPooling },
|
|
||||||
{ "PSROIPooling", PSROIPooling },
|
|
||||||
{ "DeformablePSROIPooling", PSROIPooling },
|
|
||||||
{ "Pad", Pad },
|
|
||||||
{ "Transpose", Transpose },
|
|
||||||
{ "LSTMCell", RNNCell },
|
|
||||||
{ "GRUCell", RNNCell },
|
|
||||||
{ "RNNCell", RNNCell },
|
|
||||||
{ "LSTMSequence", RNNSeq },
|
|
||||||
{ "GRUSequence", RNNSeq },
|
|
||||||
{ "RNNSequence", RNNSeq },
|
|
||||||
{ "FakeQuantize", FakeQuantize },
|
|
||||||
{ "BinaryConvolution", BinaryConvolution },
|
|
||||||
{ "DeformableConvolution", DeformableConvolution },
|
|
||||||
{ "TensorIterator", TensorIterator },
|
|
||||||
{ "Loop", TensorIterator },
|
|
||||||
{ "ReadValue", MemoryInput}, // for construction from name ctor, arbitrary name is used
|
|
||||||
{ "Assign", MemoryOutput }, // for construction from layer ctor
|
|
||||||
{ "Convert", Convert },
|
|
||||||
{ "MVN", MVN},
|
|
||||||
{ "NormalizeL2", NormalizeL2},
|
|
||||||
{ "ScatterUpdate", ScatterUpdate},
|
|
||||||
{ "ScatterElementsUpdate", ScatterElementsUpdate},
|
|
||||||
{ "ScatterNDUpdate", ScatterNDUpdate},
|
|
||||||
{ "Interpolate", Interpolate},
|
|
||||||
{ "ReduceL1", Reduce},
|
|
||||||
{ "ReduceL2", Reduce},
|
|
||||||
{ "ReduceLogicalAnd", Reduce},
|
|
||||||
{ "ReduceLogicalOr", Reduce},
|
|
||||||
{ "ReduceMax", Reduce},
|
|
||||||
{ "ReduceMean", Reduce},
|
|
||||||
{ "ReduceMin", Reduce},
|
|
||||||
{ "ReduceProd", Reduce},
|
|
||||||
{ "ReduceSum", Reduce},
|
|
||||||
{ "ReduceLogSum", Reduce},
|
|
||||||
{ "ReduceLogSumExp", Reduce},
|
|
||||||
{ "ReduceSumSquare", Reduce},
|
|
||||||
{ "Broadcast", Broadcast},
|
|
||||||
{ "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
|
|
||||||
{ "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
|
|
||||||
{ "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
|
|
||||||
{ "Gather", Gather},
|
|
||||||
{ "GatherElements", GatherElements},
|
|
||||||
{ "GatherND", GatherND},
|
|
||||||
{ "OneHot", OneHot},
|
|
||||||
{ "RegionYolo", RegionYolo},
|
|
||||||
{ "Select", Select},
|
|
||||||
{ "ShuffleChannels", ShuffleChannels},
|
|
||||||
{ "DFT", DFT},
|
|
||||||
{ "IDFT", DFT},
|
|
||||||
{ "Abs", Math},
|
|
||||||
{ "Acos", Math},
|
|
||||||
{ "Acosh", Math},
|
|
||||||
{ "Asin", Math},
|
|
||||||
{ "Asinh", Math},
|
|
||||||
{ "Atan", Math},
|
|
||||||
{ "Atanh", Math},
|
|
||||||
{ "Ceil", Math},
|
|
||||||
{ "Ceiling", Math},
|
|
||||||
{ "Cos", Math},
|
|
||||||
{ "Cosh", Math},
|
|
||||||
{ "Floor", Math},
|
|
||||||
{ "HardSigmoid", Math},
|
|
||||||
{ "Log", Math},
|
|
||||||
{ "Neg", Math},
|
|
||||||
{ "Reciprocal", Math},
|
|
||||||
{ "Selu", Math},
|
|
||||||
{ "Sign", Math},
|
|
||||||
{ "Sin", Math},
|
|
||||||
{ "Sinh", Math},
|
|
||||||
{ "SoftPlus", Math},
|
|
||||||
{ "Softsign", Math},
|
|
||||||
{ "Tan", Math},
|
|
||||||
{ "CTCLoss", CTCLoss},
|
|
||||||
{ "Bucketize", Bucketize},
|
|
||||||
{ "CTCGreedyDecoder", CTCGreedyDecoder},
|
|
||||||
{ "CTCGreedyDecoderSeqLen", CTCGreedyDecoderSeqLen},
|
|
||||||
{ "CumSum", CumSum},
|
|
||||||
{ "DetectionOutput", DetectionOutput},
|
|
||||||
{ "ExperimentalDetectronDetectionOutput", ExperimentalDetectronDetectionOutput},
|
|
||||||
{ "LogSoftmax", LogSoftmax},
|
|
||||||
{ "TopK", TopK},
|
|
||||||
{ "GatherTree", GatherTree},
|
|
||||||
{ "GRN", GRN},
|
|
||||||
{ "Range", Range},
|
|
||||||
{ "Proposal", Proposal},
|
|
||||||
{ "ReorgYolo", ReorgYolo},
|
|
||||||
{ "ReverseSequence", ReverseSequence},
|
|
||||||
{ "ExperimentalDetectronTopKROIs", ExperimentalDetectronTopKROIs},
|
|
||||||
{ "ExperimentalDetectronROIFeatureExtractor", ExperimentalDetectronROIFeatureExtractor},
|
|
||||||
{ "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator},
|
|
||||||
{ "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage},
|
|
||||||
{ "ExtractImagePatches", ExtractImagePatches},
|
|
||||||
{ "NonMaxSuppression", NonMaxSuppression},
|
|
||||||
{ "NonMaxSuppressionIEInternal", NonMaxSuppression},
|
|
||||||
{ "MatrixNms", MatrixNms},
|
|
||||||
{ "MulticlassNms", MulticlassNms}
|
|
||||||
};
|
|
||||||
|
|
||||||
Type TypeFromName(const std::string & type) {
|
|
||||||
auto itType = type_to_name_tbl.find(type);
|
|
||||||
if (type_to_name_tbl.end() != itType) {
|
|
||||||
return itType->second;
|
|
||||||
}
|
|
||||||
return Unknown;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
|
||||||
DnnlMemoryDescPtr MKLDNNNode::getInputMemDescAtPort<DnnlMemoryDesc, 0, 0>(size_t portNum) const {
|
|
||||||
return MemoryDescUtils::convertToDnnlMemoryDesc(getBaseMemDescAtInputPort(portNum));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
|
||||||
BlockedMemoryDescPtr MKLDNNNode::getInputMemDescAtPort<BlockedMemoryDesc, 0, 0>(size_t portNum) const {
|
|
||||||
return MemoryDescUtils::convertToBlockedMemoryDesc(getBaseMemDescAtInputPort(portNum));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
|
||||||
DnnlMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort<DnnlMemoryDesc, 0, 0>(size_t portNum) const {
|
|
||||||
return MemoryDescUtils::convertToDnnlMemoryDesc(getBaseMemDescAtOutputPort(portNum));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<>
|
|
||||||
BlockedMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort<BlockedMemoryDesc, 0, 0>(size_t portNum) const {
|
|
||||||
return MemoryDescUtils::convertToBlockedMemoryDesc(getBaseMemDescAtOutputPort(portNum));
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string NameFromType(Type type) {
|
|
||||||
switch (type) {
|
|
||||||
case Generic:
|
|
||||||
return "Generic";
|
|
||||||
case Reorder:
|
|
||||||
return "Reorder";
|
|
||||||
case Input:
|
|
||||||
return "Input";
|
|
||||||
case Output:
|
|
||||||
return "Output";
|
|
||||||
case Convolution:
|
|
||||||
return "Convolution";
|
|
||||||
case Deconvolution:
|
|
||||||
return "Deconvolution";
|
|
||||||
case Lrn:
|
|
||||||
return "Lrn";
|
|
||||||
case Pooling:
|
|
||||||
return "Pooling";
|
|
||||||
case AdaptivePooling:
|
|
||||||
return "AdaptivePooling";
|
|
||||||
case FullyConnected:
|
|
||||||
return "FullyConnected";
|
|
||||||
case MatMul:
|
|
||||||
return "MatMul";
|
|
||||||
case Softmax:
|
|
||||||
return "Softmax";
|
|
||||||
case Split:
|
|
||||||
return "Split";
|
|
||||||
case Concatenation:
|
|
||||||
return "Concatenation";
|
|
||||||
case StridedSlice:
|
|
||||||
return "StridedSlice";
|
|
||||||
case Reshape:
|
|
||||||
return "Reshape";
|
|
||||||
case Tile:
|
|
||||||
return "Tile";
|
|
||||||
case ROIAlign:
|
|
||||||
return "ROIAlign";
|
|
||||||
case ROIPooling:
|
|
||||||
return "ROIPooling";
|
|
||||||
case PSROIPooling:
|
|
||||||
return "PSROIPooling";
|
|
||||||
case DepthToSpace:
|
|
||||||
return "DepthToSpace";
|
|
||||||
case BatchToSpace:
|
|
||||||
return "BatchToSpace";
|
|
||||||
case Pad:
|
|
||||||
return "Pad";
|
|
||||||
case Transpose:
|
|
||||||
return "Transpose";
|
|
||||||
case SpaceToDepth:
|
|
||||||
return "SpaceToDepth";
|
|
||||||
case SpaceToBatch:
|
|
||||||
return "SpaceToBatch";
|
|
||||||
case MemoryOutput:
|
|
||||||
return "MemoryOutput";
|
|
||||||
case MemoryInput:
|
|
||||||
return "MemoryInput";
|
|
||||||
case RNNSeq:
|
|
||||||
return "RNNSeq";
|
|
||||||
case RNNCell:
|
|
||||||
return "RNNCell";
|
|
||||||
case Eltwise:
|
|
||||||
return "Eltwise";
|
|
||||||
case FakeQuantize:
|
|
||||||
return "FakeQuantize";
|
|
||||||
case BinaryConvolution:
|
|
||||||
return "BinaryConvolution";
|
|
||||||
case DeformableConvolution:
|
|
||||||
return "DeformableConvolution";
|
|
||||||
case MVN:
|
|
||||||
return "MVN";
|
|
||||||
case TensorIterator:
|
|
||||||
return "TensorIterator";
|
|
||||||
case Convert:
|
|
||||||
return "Convert";
|
|
||||||
case NormalizeL2:
|
|
||||||
return "NormalizeL2";
|
|
||||||
case ScatterUpdate:
|
|
||||||
return "ScatterUpdate";
|
|
||||||
case ScatterElementsUpdate:
|
|
||||||
return "ScatterElementsUpdate";
|
|
||||||
case ScatterNDUpdate:
|
|
||||||
return "ScatterNDUpdate";
|
|
||||||
case Interpolate:
|
|
||||||
return "Interpolate";
|
|
||||||
case Reduce:
|
|
||||||
return "Reduce";
|
|
||||||
case Broadcast:
|
|
||||||
return "Broadcast";
|
|
||||||
case EmbeddingSegmentsSum:
|
|
||||||
return "EmbeddingSegmentsSum";
|
|
||||||
case EmbeddingBagPackedSum:
|
|
||||||
return "EmbeddingBagPackedSum";
|
|
||||||
case EmbeddingBagOffsetsSum:
|
|
||||||
return "EmbeddingBagOffsetsSum";
|
|
||||||
case Gather:
|
|
||||||
return "Gather";
|
|
||||||
case GatherElements:
|
|
||||||
return "GatherElements";
|
|
||||||
case GatherND:
|
|
||||||
return "GatherND";
|
|
||||||
case OneHot:
|
|
||||||
return "OneHot";
|
|
||||||
case RegionYolo:
|
|
||||||
return "RegionYolo";
|
|
||||||
case Select:
|
|
||||||
return "Select";
|
|
||||||
case Roll:
|
|
||||||
return "Roll";
|
|
||||||
case ShuffleChannels:
|
|
||||||
return "ShuffleChannels";
|
|
||||||
case DFT:
|
|
||||||
return "DFT";
|
|
||||||
case Math:
|
|
||||||
return "Math";
|
|
||||||
case CTCLoss:
|
|
||||||
return "CTCLoss";
|
|
||||||
case Bucketize:
|
|
||||||
return "Bucketize";
|
|
||||||
case CTCGreedyDecoder:
|
|
||||||
return "CTCGreedyDecoder";
|
|
||||||
case CTCGreedyDecoderSeqLen:
|
|
||||||
return "CTCGreedyDecoderSeqLen";
|
|
||||||
case CumSum:
|
|
||||||
return "CumSum";
|
|
||||||
case DetectionOutput:
|
|
||||||
return "DetectionOutput";
|
|
||||||
case ExperimentalDetectronDetectionOutput:
|
|
||||||
return "ExperimentalDetectronDetectionOutput";
|
|
||||||
case LogSoftmax:
|
|
||||||
return "LogSoftmax";
|
|
||||||
case TopK:
|
|
||||||
return "TopK";
|
|
||||||
case GatherTree:
|
|
||||||
return "GatherTree";
|
|
||||||
case GRN:
|
|
||||||
return "GRN";
|
|
||||||
case Range:
|
|
||||||
return "Range";
|
|
||||||
case Proposal:
|
|
||||||
return "Proposal";
|
|
||||||
case ReorgYolo:
|
|
||||||
return "ReorgYolo";
|
|
||||||
case ReverseSequence:
|
|
||||||
return "ReverseSequence";
|
|
||||||
case ExperimentalDetectronTopKROIs:
|
|
||||||
return "ExperimentalDetectronTopKROIs";
|
|
||||||
case ExperimentalDetectronROIFeatureExtractor:
|
|
||||||
return "ExperimentalDetectronROIFeatureExtractor";
|
|
||||||
case ExperimentalDetectronPriorGridGenerator:
|
|
||||||
return "ExperimentalDetectronPriorGridGenerator";
|
|
||||||
case ExperimentalDetectronGenerateProposalsSingleImage:
|
|
||||||
return "ExperimentalDetectronGenerateProposalsSingleImage";
|
|
||||||
case ExtractImagePatches:
|
|
||||||
return "ExtractImagePatches";
|
|
||||||
case NonMaxSuppression:
|
|
||||||
return "NonMaxSuppression";
|
|
||||||
case MatrixNms:
|
|
||||||
return "MatrixNms";
|
|
||||||
case MulticlassNms:
|
|
||||||
return "MulticlassNms";
|
|
||||||
default:
|
|
||||||
return "Unknown";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace MKLDNNPlugin
|
|
||||||
|
|
||||||
MKLDNNNode::NodesFactory & MKLDNNNode::factory() {
|
MKLDNNNode::NodesFactory & MKLDNNNode::factory() {
|
||||||
static NodesFactory factoryInstance;
|
static NodesFactory factoryInstance;
|
||||||
@ -439,7 +74,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
|
|||||||
: selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
|
: selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
|
||||||
weightCache(w_cache), engine(eng), name(op->get_friendly_name()), typeStr(op->get_type_name()),
|
weightCache(w_cache), engine(eng), name(op->get_friendly_name()), typeStr(op->get_type_name()),
|
||||||
type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) {
|
type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) {
|
||||||
algorithm = Algorithm::Undefined;
|
algorithm = Algorithm::Default;
|
||||||
fusingPort = -1;
|
fusingPort = -1;
|
||||||
const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name();
|
const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name();
|
||||||
|
|
||||||
|
@ -36,9 +36,6 @@ using MKLDNNNodePtr = std::shared_ptr<MKLDNNNode>;
|
|||||||
using MKLDNNNodeConstPtr = std::shared_ptr<const MKLDNNNode>;
|
using MKLDNNNodeConstPtr = std::shared_ptr<const MKLDNNNode>;
|
||||||
using MKLDNNNodeWeakPtr = std::weak_ptr<MKLDNNNode>;
|
using MKLDNNNodeWeakPtr = std::weak_ptr<MKLDNNNode>;
|
||||||
|
|
||||||
Type TypeFromName(const std::string & type);
|
|
||||||
std::string NameFromType(Type type);
|
|
||||||
|
|
||||||
class PortConfigurator {
|
class PortConfigurator {
|
||||||
public:
|
public:
|
||||||
PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc, const Shape& shape,
|
PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc, const Shape& shape,
|
||||||
@ -629,7 +626,7 @@ protected:
|
|||||||
|
|
||||||
MKLDNNWeightsSharing::Ptr weightCache;
|
MKLDNNWeightsSharing::Ptr weightCache;
|
||||||
|
|
||||||
Algorithm algorithm = Algorithm::Undefined;
|
Algorithm algorithm = Algorithm::Default;
|
||||||
|
|
||||||
bool isInQuantizedGraph = false;
|
bool isInQuantizedGraph = false;
|
||||||
|
|
||||||
@ -744,6 +741,10 @@ private:
|
|||||||
void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd);
|
void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd);
|
||||||
enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
|
enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
|
||||||
ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);
|
ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);
|
||||||
|
|
||||||
|
#ifdef CPU_DEBUG_CAPS
|
||||||
|
friend class Verbose;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
class MKLDNNNode::NodesFactory : public openvino::cc::Factory<Type,
|
class MKLDNNNode::NodesFactory : public openvino::cc::Factory<Type,
|
||||||
|
@ -5,20 +5,25 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <ratio>
|
||||||
|
|
||||||
namespace MKLDNNPlugin {
|
namespace MKLDNNPlugin {
|
||||||
|
|
||||||
class PerfCount {
|
class PerfCount {
|
||||||
uint64_t duration;
|
uint64_t total_duration;
|
||||||
uint32_t num;
|
uint32_t num;
|
||||||
|
|
||||||
std::chrono::high_resolution_clock::time_point __start = {};
|
std::chrono::high_resolution_clock::time_point __start = {};
|
||||||
std::chrono::high_resolution_clock::time_point __finish = {};
|
std::chrono::high_resolution_clock::time_point __finish = {};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
PerfCount(): duration(0), num(0) {}
|
PerfCount(): total_duration(0), num(0) {}
|
||||||
|
|
||||||
uint64_t avg() { return (num == 0) ? 0 : duration / num; }
|
std::chrono::duration<double, std::milli> duration() const {
|
||||||
|
return __finish - __start;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t avg() const { return (num == 0) ? 0 : total_duration / num; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void start_itr() {
|
void start_itr() {
|
||||||
@ -27,8 +32,7 @@ private:
|
|||||||
|
|
||||||
void finish_itr() {
|
void finish_itr() {
|
||||||
__finish = std::chrono::high_resolution_clock::now();
|
__finish = std::chrono::high_resolution_clock::now();
|
||||||
|
total_duration += std::chrono::duration_cast<std::chrono::microseconds>(__finish - __start).count();
|
||||||
duration += std::chrono::duration_cast<std::chrono::microseconds>(__finish - __start).count();
|
|
||||||
num++;
|
num++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -46,5 +50,5 @@ public:
|
|||||||
|
|
||||||
} // namespace MKLDNNPlugin
|
} // namespace MKLDNNPlugin
|
||||||
|
|
||||||
#define GET_PERF(_counter) std::unique_ptr<PerfHelper>(new PerfHelper(_counter->PerfCounter()))
|
#define GET_PERF(_node) std::unique_ptr<PerfHelper>(new PerfHelper(_node->PerfCounter()))
|
||||||
#define PERF(_need, _counter) auto pc = _need ? GET_PERF(_counter) : nullptr;
|
#define PERF(_node, _need) auto pc = _need ? GET_PERF(_node) : nullptr;
|
||||||
|
@ -24,6 +24,7 @@ public:
|
|||||||
readParam(blobDumpNodeType, "OV_CPU_BLOB_DUMP_NODE_TYPE");
|
readParam(blobDumpNodeType, "OV_CPU_BLOB_DUMP_NODE_TYPE");
|
||||||
readParam(blobDumpNodeName, "OV_CPU_BLOB_DUMP_NODE_NAME");
|
readParam(blobDumpNodeName, "OV_CPU_BLOB_DUMP_NODE_NAME");
|
||||||
readParam(execGraphPath, "OV_CPU_EXEC_GRAPH_PATH");
|
readParam(execGraphPath, "OV_CPU_EXEC_GRAPH_PATH");
|
||||||
|
readParam(verbose, "OV_CPU_VERBOSE");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string blobDumpDir;
|
std::string blobDumpDir;
|
||||||
@ -33,9 +34,10 @@ public:
|
|||||||
std::string blobDumpNodeType;
|
std::string blobDumpNodeType;
|
||||||
std::string blobDumpNodeName;
|
std::string blobDumpNodeName;
|
||||||
std::string execGraphPath;
|
std::string execGraphPath;
|
||||||
|
std::string verbose;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void readParam(std::string& param, const char* envVar) {
|
static void readParam(std::string& param, const char* envVar) {
|
||||||
if (const char* envValue = std::getenv(envVar))
|
if (const char* envValue = std::getenv(envVar))
|
||||||
param = envValue;
|
param = envValue;
|
||||||
}
|
}
|
||||||
|
169
inference-engine/src/mkldnn_plugin/utils/verbose.cpp
Normal file
169
inference-engine/src/mkldnn_plugin/utils/verbose.cpp
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#ifdef CPU_DEBUG_CAPS
|
||||||
|
|
||||||
|
#include "verbose.h"
|
||||||
|
#include "mkldnn_node.h"
|
||||||
|
#include "cpu_types.h"
|
||||||
|
#include "memory_desc/cpu_memory_desc_utils.h"
|
||||||
|
|
||||||
|
#include "dnnl_types.h"
|
||||||
|
#include "dnnl_debug.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <sstream>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
namespace MKLDNNPlugin {
|
||||||
|
|
||||||
|
bool Verbose::shouldBePrinted() const {
|
||||||
|
if (lvl < 1)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (node->isConstant() ||
|
||||||
|
node->getType() == Input || node->getType() == Output)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Print node verbose execution information to cout.
|
||||||
|
* Similiar to DNNL_VERBOSE output
|
||||||
|
* Formating written in C using oneDNN format functions.
|
||||||
|
* Can be rewritten in pure C++ if necessary
|
||||||
|
*/
|
||||||
|
void Verbose::printInfo() {
|
||||||
|
/* 1, 2, 3, etc -> no color
|
||||||
|
* 11, 22, 33, etc -> colorize */
|
||||||
|
bool colorUp = lvl / 10 > 0 ? true : false;
|
||||||
|
|
||||||
|
enum Color {
|
||||||
|
RED,
|
||||||
|
GREEN,
|
||||||
|
YELLOW,
|
||||||
|
BLUE,
|
||||||
|
PURPLE,
|
||||||
|
CYAN
|
||||||
|
};
|
||||||
|
|
||||||
|
auto colorize = [&](const Color color, const std::string& str) {
|
||||||
|
if (!colorUp)
|
||||||
|
return str;
|
||||||
|
|
||||||
|
const std::string red("\033[1;31m");
|
||||||
|
const std::string green("\033[1;32m");
|
||||||
|
const std::string yellow("\033[1;33m");
|
||||||
|
const std::string blue("\033[1;34m");
|
||||||
|
const std::string purple("\033[1;35m");
|
||||||
|
const std::string cyan("\033[1;36m");
|
||||||
|
const std::string reset("\033[0m");
|
||||||
|
std::string colorCode;
|
||||||
|
|
||||||
|
switch (color) {
|
||||||
|
case RED: colorCode = red;
|
||||||
|
break;
|
||||||
|
case GREEN: colorCode = green;
|
||||||
|
break;
|
||||||
|
case YELLOW: colorCode = yellow;
|
||||||
|
break;
|
||||||
|
case BLUE: colorCode = blue;
|
||||||
|
break;
|
||||||
|
case PURPLE: colorCode = purple;
|
||||||
|
break;
|
||||||
|
case CYAN: colorCode = cyan;
|
||||||
|
break;
|
||||||
|
default: colorCode = reset;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return colorCode + str + reset;
|
||||||
|
};
|
||||||
|
|
||||||
|
// can be increased if necessary
|
||||||
|
const int CPU_VERBOSE_DAT_LEN = 512;
|
||||||
|
char portsInfo[CPU_VERBOSE_DAT_LEN] = {'\0'};
|
||||||
|
int written = 0;
|
||||||
|
int written_total = 0;
|
||||||
|
|
||||||
|
auto shift = [&](int size) {
|
||||||
|
if (written < 0 || written_total + size > CPU_VERBOSE_DAT_LEN) {
|
||||||
|
const char* errorMsg = "# NOT ENOUGHT BUFFER SIZE #";
|
||||||
|
snprintf(portsInfo, strlen(errorMsg) + 1, "%s", errorMsg);
|
||||||
|
written_total = strlen(errorMsg);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
written_total += size;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto formatMemDesc = [&](const dnnl_memory_desc_t& desc, std::string& prefix) {
|
||||||
|
prefix = colorize(BLUE, prefix);
|
||||||
|
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, " ");
|
||||||
|
shift(written);
|
||||||
|
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, "%s", prefix.c_str());
|
||||||
|
shift(written);
|
||||||
|
written = dnnl_md2fmt_str(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, &desc);
|
||||||
|
shift(written);
|
||||||
|
written = snprintf(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, ":");
|
||||||
|
shift(written);
|
||||||
|
written = dnnl_md2dim_str(portsInfo + written_total, CPU_VERBOSE_DAT_LEN - written_total, &desc);
|
||||||
|
shift(written);
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int i = 0; i < node->getParentEdges().size(); i++) {
|
||||||
|
std::string prefix("src:" + std::to_string(i) + ':');
|
||||||
|
formatMemDesc(MemoryDescUtils::convertToDnnlMemoryDesc(
|
||||||
|
node->getParentEdgeAt(i)->getMemory().getDesc().clone())->getDnnlDesc().data,
|
||||||
|
prefix);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < node->getChildEdges().size(); i++) {
|
||||||
|
std::string prefix("dst:" + std::to_string(i) + ':');
|
||||||
|
formatMemDesc(MemoryDescUtils::convertToDnnlMemoryDesc(
|
||||||
|
node->getChildEdgeAt(i)->getMemory().getDesc().clone())->getDnnlDesc().data,
|
||||||
|
prefix);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string post_ops;
|
||||||
|
if (!node->getFusedWith().empty()) {
|
||||||
|
post_ops += "post_ops:'";
|
||||||
|
for (const auto& fusedNode : node->getFusedWith()) {
|
||||||
|
post_ops.append(colorize(GREEN, fusedNode->getName())).append(":")
|
||||||
|
.append(colorize(CYAN, NameFromType(fusedNode->getType()))).append(":")
|
||||||
|
.append(algToString(fusedNode->getAlgorithm()))
|
||||||
|
.append(";");
|
||||||
|
}
|
||||||
|
post_ops += "'";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string nodeImplementer = "cpu";
|
||||||
|
if (node->prim)
|
||||||
|
nodeImplementer = "dnnl"; // oneDNN
|
||||||
|
else if (node->getType() == Reference)
|
||||||
|
nodeImplementer = "ngraph_ref"; // ngraph reference
|
||||||
|
|
||||||
|
const std::string& nodeName = colorize(GREEN, node->getName());
|
||||||
|
const std::string& nodeType = colorize(CYAN, NameFromType(node->getType()));
|
||||||
|
const std::string& nodeAlg = algToString(node->getAlgorithm());
|
||||||
|
const std::string& nodePrimImplType = impl_type_to_string(node->getSelectedPrimitiveDescriptor()->getImplementationType());
|
||||||
|
|
||||||
|
stream << "ov_cpu_verbose" << ','
|
||||||
|
<< "exec" << ','
|
||||||
|
<< nodeImplementer << ','
|
||||||
|
<< nodeName << ":" << nodeType << ":" << nodeAlg << ','
|
||||||
|
<< nodePrimImplType << ','
|
||||||
|
<< portsInfo << ','
|
||||||
|
<< post_ops << ',';
|
||||||
|
}
|
||||||
|
|
||||||
|
void Verbose::printDuration() {
|
||||||
|
const auto& duration = node->PerfCounter().duration().count();
|
||||||
|
stream << duration << "ms";
|
||||||
|
}
|
||||||
|
|
||||||
|
void Verbose::flush() const {
|
||||||
|
std::cout << stream.rdbuf() << "\n";
|
||||||
|
}
|
||||||
|
} // namespace MKLDNNPlugin
|
||||||
|
#endif // CPU_DEBUG_CAPS
|
46
inference-engine/src/mkldnn_plugin/utils/verbose.h
Normal file
46
inference-engine/src/mkldnn_plugin/utils/verbose.h
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef CPU_DEBUG_CAPS
|
||||||
|
|
||||||
|
#include "mkldnn_node.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
namespace MKLDNNPlugin {
|
||||||
|
|
||||||
|
class Verbose {
|
||||||
|
public:
|
||||||
|
Verbose(const MKLDNNNodePtr& _node, const std::string& _lvl)
|
||||||
|
: node(_node), lvl(atoi(_lvl.c_str())) {
|
||||||
|
if (!shouldBePrinted())
|
||||||
|
return;
|
||||||
|
printInfo();
|
||||||
|
}
|
||||||
|
virtual ~Verbose() {
|
||||||
|
if (!shouldBePrinted())
|
||||||
|
return;
|
||||||
|
|
||||||
|
printDuration();
|
||||||
|
flush();
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
const MKLDNNNodePtr& node;
|
||||||
|
const int lvl;
|
||||||
|
std::stringstream stream;
|
||||||
|
|
||||||
|
bool shouldBePrinted() const;
|
||||||
|
void printInfo();
|
||||||
|
void printDuration();
|
||||||
|
void flush() const;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define VERBOSE(...) Verbose(__VA_ARGS__)
|
||||||
|
} // namespace MKLDNNPlugin
|
||||||
|
#else
|
||||||
|
#define VERBOSE(...)
|
||||||
|
#endif // CPU_DEBUG_CAPS
|
@ -12,6 +12,7 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <iterator>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
Loading…
Reference in New Issue
Block a user