ITT performance counters for first inference (#1741)
* ITT performance counters for first inference ITT counters for nGraph passes * RTTI for trnasformation passes * The MKLDNN plugin first inference counters improvements
This commit is contained in:
parent
121f75f49d
commit
be3df1837c
@ -332,6 +332,8 @@ public:
|
||||
* @return Reference to a CPP plugin wrapper
|
||||
*/
|
||||
InferencePlugin GetCPPPluginByName(const std::string& deviceName) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::Impl::GetCPPPluginByName");
|
||||
|
||||
std::lock_guard<std::mutex> lock(pluginsMutex);
|
||||
|
||||
auto it = pluginRegistry.find(deviceName);
|
||||
|
@ -27,6 +27,7 @@ namespace InferenceEngine {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(IE);
|
||||
OV_ITT_DOMAIN(IE_LT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -27,6 +27,7 @@ class INFERENCE_ENGINE_API_CLASS(ConvertMatMulToGemm);
|
||||
|
||||
class ngraph::pass::ConvertMatMulToFCorGemm: public ngraph::pass::GraphRewrite {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvertMatMulToFCorGemm() {
|
||||
add_matcher<ngraph::pass::ConvertMatMulToFC>();
|
||||
add_matcher<ngraph::pass::ConvertMatMulToGemm>();
|
||||
|
@ -32,12 +32,9 @@ class INFERENCE_ENGINE_API_CLASS(ConvertMulOrAddFinally);
|
||||
|
||||
class ngraph::pass::ConvertMulOrAddFinally: public ngraph::pass::GraphRewrite {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
// This pass finally converts single Multiply and Add operations to ScaleShift or Power operation
|
||||
ConvertMulOrAddFinally() : GraphRewrite() {
|
||||
convert_mul_or_add_finally<ngraph::opset1::Add>();
|
||||
convert_mul_or_add_finally<ngraph::opset1::Subtract>();
|
||||
convert_mul_or_add_finally<ngraph::opset1::Multiply>();
|
||||
}
|
||||
ConvertMulOrAddFinally();
|
||||
|
||||
private:
|
||||
template<typename T>
|
||||
|
@ -31,6 +31,7 @@ class INFERENCE_ENGINE_API_CLASS(ConvertRNNSequenceMatcher);
|
||||
|
||||
class ngraph::pass::ConvertLSTMSequenceMatcher : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvertLSTMSequenceMatcher();
|
||||
};
|
||||
|
||||
@ -43,6 +44,7 @@ public:
|
||||
|
||||
class ngraph::pass::ConvertGRUSequenceMatcher : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvertGRUSequenceMatcher();
|
||||
};
|
||||
|
||||
@ -55,5 +57,6 @@ public:
|
||||
|
||||
class ngraph::pass::ConvertRNNSequenceMatcher : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvertRNNSequenceMatcher();
|
||||
};
|
||||
|
@ -145,4 +145,5 @@ ngraph::pass::ConvertRNNCellMatcher::ConvertRNNCellMatcher() {
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(rnn_cell_ngraph, "ConvertRNNCellToRNNCellIE");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -37,4 +37,4 @@ ngraph::pass::ConvertGatherTreeToGatherTreeIEMatcher::ConvertGatherTreeToGatherT
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(gt, "ConvertGatherTreeToGatherTreeIE");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
}
|
||||
|
@ -54,4 +54,4 @@ ngraph::pass::ConvertHardSigmoidToLegacyMatcher::ConvertHardSigmoidToLegacyMatch
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(node, "ConvertHardSigmoidToLegacy");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
}
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <legacy/ngraph_ops/fully_connected.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMatMulToFCorGemm, "ConvertMatMulToFCorGemm", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMatMulToFC, "ConvertMatMulToFC", 0);
|
||||
|
||||
ngraph::pass::ConvertMatMulToFC::ConvertMatMulToFC() {
|
||||
|
@ -0,0 +1,13 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "legacy/transformations/convert_opset1_to_legacy/convert_mul_or_add_finally.hpp"
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMulOrAddFinally, "ConvertMulOrAddFinally", 0);
|
||||
|
||||
ngraph::pass::ConvertMulOrAddFinally::ConvertMulOrAddFinally() : GraphRewrite() {
|
||||
convert_mul_or_add_finally<ngraph::opset1::Add>();
|
||||
convert_mul_or_add_finally<ngraph::opset1::Subtract>();
|
||||
convert_mul_or_add_finally<ngraph::opset1::Multiply>();
|
||||
}
|
@ -101,4 +101,4 @@ ngraph::pass::ConvertNMSToNMSIEMatcher::ConvertNMSToNMSIEMatcher() {
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(nms, "ConvertNMSToNMSIE");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
}
|
||||
|
@ -46,8 +46,6 @@
|
||||
#include <transformations/common_optimizations/conv_bias_fusion.hpp>
|
||||
#include <transformations/op_conversions/convert_convolutions.hpp>
|
||||
|
||||
#include "ie_legacy_itt.hpp"
|
||||
|
||||
#include <ngraph/pass/constant_folding.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
|
||||
@ -59,7 +57,6 @@
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertOpSet1ToLegacy, "ConvertOpSet1ToLegacy", 0);
|
||||
|
||||
bool ngraph::pass::ConvertOpSet1ToLegacy::run_on_function(std::shared_ptr<ngraph::Function> f) {
|
||||
OV_ITT_SCOPED_TASK(InferenceEngine::itt::domains::IELegacy, "ngraph::pass::ConvertOpSet1ToLegacy");
|
||||
|
||||
ngraph::pass::Manager manager(get_pass_config());
|
||||
|
||||
|
@ -31,4 +31,4 @@ ngraph::pass::ConvertPadToLegacyMatcher::ConvertPadToLegacyMatcher() {
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(m_pad, "ConvertPadToLegacy");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
}
|
||||
|
@ -51,4 +51,4 @@ ngraph::pass::ConvertSeluToSeluIEMatcher::ConvertSeluToSeluIEMatcher() {
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(selu, "ConvertSeluToSeluIE");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
}
|
||||
|
@ -14,6 +14,10 @@
|
||||
#include <legacy/ngraph_ops/gru_sequence_ie.hpp>
|
||||
#include <legacy/ngraph_ops/rnn_sequence_ie.hpp>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertLSTMSequenceMatcher, "ConvertLSTMSequenceMatcher", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertGRUSequenceMatcher, "ConvertGRUSequenceMatcher", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertRNNSequenceMatcher, "ConvertRNNSequenceMatcher", 0);
|
||||
|
||||
ngraph::pass::ConvertLSTMSequenceMatcher::ConvertLSTMSequenceMatcher() {
|
||||
auto lstm_sequence_ngraph = ngraph::pattern::wrap_type<ngraph::opset5::LSTMSequence>();
|
||||
|
||||
|
@ -45,4 +45,4 @@ ngraph::pass::ConvertSwishToSwishIEMatcher::ConvertSwishToSwishIEMatcher() {
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(swish, "ConvertSwishToSwishIE");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
}
|
||||
|
@ -173,4 +173,5 @@ ngraph::pass::Reshape1DMaxPool::Reshape1DMaxPool() {
|
||||
auto pool = ngraph::pattern::wrap_type<opset1::MaxPool>(pattern::has_static_shape());
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(pool, "Reshape1DMaxPool");
|
||||
this->register_matcher(m, get_callback());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -44,7 +44,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
|
||||
extensionManager(extMgr),
|
||||
_cfg{cfg},
|
||||
_name{network.getName()} {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNExecNetwork::MKLDNNExecNetwork");
|
||||
OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "MKLDNNExecNetwork", "cloneNet");
|
||||
|
||||
// we are cloning network if we have statistics and we can transform network.
|
||||
_clonedNetwork = cloneNet(network);
|
||||
@ -78,8 +78,10 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
|
||||
}
|
||||
}
|
||||
|
||||
OV_ITT_TASK_NEXT(taskChain, "UnrollPasses");
|
||||
MKLDNNGraph::ApplyUnrollPasses(static_cast<ICNNNetwork&>(*_clonedNetwork));
|
||||
|
||||
OV_ITT_TASK_NEXT(taskChain, "createConstInputs");
|
||||
auto createConstInputTo = [&](CNNLayerPtr layer, Blob::Ptr blob, std::string name) {
|
||||
LayerParams attrs = {layer.get()->name + "_const_" + name, "Const", blob->getTensorDesc().getPrecision()};
|
||||
auto constLayer = std::make_shared<InferenceEngine::CNNLayer>(attrs);
|
||||
@ -133,6 +135,8 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
|
||||
}
|
||||
}
|
||||
|
||||
OV_ITT_TASK_SKIP(taskChain);
|
||||
|
||||
if (_cfg.batchLimit > 1) {
|
||||
// check topology for applicability
|
||||
if (!CanProcessDynBatch(*_clonedNetwork)) {
|
||||
@ -159,6 +163,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
|
||||
// TODO: Remove `cloneNet` to `localNetwork` when `MKLDNNGraph::CreateGraph`
|
||||
// is fixed and does not change content of network passed (CVS-26420)
|
||||
auto localNetwork = cloneNet(static_cast<ICNNNetwork&>(*_clonedNetwork));
|
||||
|
||||
auto graph = std::make_shared<MKLDNNGraph>();
|
||||
{
|
||||
std::unique_lock<std::mutex> lock{_cfgMutex};
|
||||
@ -169,6 +174,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
|
||||
if (nullptr != streamExecutor) {
|
||||
numaNode = streamExecutor->GetNumaNodeId();
|
||||
}
|
||||
|
||||
graph->CreateGraph(static_cast<ICNNNetwork&>(*localNetwork), extensionManager, numaNodesWeights[numaNode]);
|
||||
return graph;
|
||||
}};
|
||||
|
@ -89,6 +89,8 @@ template void MKLDNNGraph::ApplyUnrollPasses(ICNNNetwork&);
|
||||
template<typename NET>
|
||||
void MKLDNNGraph::CreateGraph(const NET &net, const MKLDNNExtensionManager::Ptr& extMgr,
|
||||
MKLDNNWeightsSharing::Ptr &w_cache) {
|
||||
OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNN_LT, "CreateGraph");
|
||||
|
||||
if (IsReady())
|
||||
ForgetGraphData();
|
||||
// disable caching if graph was created only once
|
||||
@ -332,44 +334,25 @@ void MKLDNNGraph::InitGraph() {
|
||||
|
||||
SortTopologically();
|
||||
InitNodes();
|
||||
|
||||
optimizer.ApplyCommonGraphOptimizations(*this);
|
||||
SortTopologically();
|
||||
|
||||
InitDescriptors();
|
||||
|
||||
for (auto &node : graphNodes) {
|
||||
node->initOptimalPrimitiveDescriptor();
|
||||
}
|
||||
InitOptimalPrimitiveDescriptors();
|
||||
|
||||
InitEdges();
|
||||
|
||||
optimizer.ApplyImplSpecificGraphOptimizations(*this);
|
||||
|
||||
SortTopologically();
|
||||
|
||||
Allocate();
|
||||
|
||||
CreatePrimitives();
|
||||
|
||||
// Do it before cleanup. Because it will lose original layers information
|
||||
for (auto &graphNode : graphNodes) {
|
||||
auto nodeType = graphNode->getType();
|
||||
if (nodeType == Reorder || nodeType == Output) continue;
|
||||
SetOriginalLayerNames();
|
||||
|
||||
if (graphNode->getOriginalLayers().empty()) {
|
||||
graphNode->addOriginalLayer(graphNode->getCnnLayer());
|
||||
}
|
||||
|
||||
if (graphNode->getFusedWith().size() || graphNode->getMergeWith().size()) {
|
||||
// Original layer names
|
||||
std::vector<MKLDNNNodePtr> internal = graphNode->getFusedWith();
|
||||
auto &merged = graphNode->getMergeWith();
|
||||
internal.insert(internal.end(), merged.begin(), merged.end());
|
||||
|
||||
for (auto &sub_node : internal) {
|
||||
graphNode->addOriginalLayer(sub_node->getCnnLayer());
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!config.dumpToDot.empty())
|
||||
dumpToDotFile(config.dumpToDot + "_init.dot");
|
||||
|
||||
@ -397,21 +380,44 @@ void MKLDNNGraph::InitGraph() {
|
||||
}
|
||||
#endif
|
||||
|
||||
mkldnn::stream stream = mkldnn::stream(stream::kind::eager);
|
||||
ExecuteConstantNodesOnly();
|
||||
}
|
||||
|
||||
void MKLDNNGraph::SetOriginalLayerNames() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::SetOriginalLayerNames");
|
||||
|
||||
// Do it before cleanup. Because it will lose original layers information
|
||||
for (auto &graphNode : graphNodes) {
|
||||
if (!graphNode->isConstant())
|
||||
continue;
|
||||
graphNode->execute(stream);
|
||||
auto nodeType = graphNode->getType();
|
||||
if (nodeType == Reorder || nodeType == Output) continue;
|
||||
|
||||
if (graphNode->getOriginalLayers().empty()) {
|
||||
graphNode->addOriginalLayer(graphNode->getCnnLayer());
|
||||
}
|
||||
|
||||
if (graphNode->getFusedWith().size() || graphNode->getMergeWith().size()) {
|
||||
// Original layer names
|
||||
std::vector<MKLDNNNodePtr> internal = graphNode->getFusedWith();
|
||||
auto &merged = graphNode->getMergeWith();
|
||||
internal.insert(internal.end(), merged.begin(), merged.end());
|
||||
|
||||
for (auto &sub_node : internal) {
|
||||
graphNode->addOriginalLayer(sub_node->getCnnLayer());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGraph::InitNodes() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitNodes");
|
||||
for (auto &node : graphNodes) {
|
||||
node->init();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGraph::InitDescriptors() {
|
||||
OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "InitDescriptors", "Prepare");
|
||||
|
||||
for (auto &node : graphNodes) {
|
||||
#if defined (COMPILED_CPU_MKLDNN_INPUT_NODE)
|
||||
if (node->getType() == Input && _meanImages.find(node->getName()) != _meanImages.end()) {
|
||||
@ -420,18 +426,43 @@ void MKLDNNGraph::InitDescriptors() {
|
||||
inputNode->withMeanImage();
|
||||
}
|
||||
#endif
|
||||
OV_ITT_TASK_NEXT(taskChain, node->profiling.getSupportedDescriptors);
|
||||
node->getSupportedDescriptors();
|
||||
|
||||
OV_ITT_TASK_NEXT(taskChain, node->profiling.initSupportedPrimitiveDescriptors);
|
||||
node->initSupportedPrimitiveDescriptors();
|
||||
|
||||
OV_ITT_TASK_NEXT(taskChain, node->profiling.filterSupportedPrimitiveDescriptors);
|
||||
node->filterSupportedPrimitiveDescriptors();
|
||||
}
|
||||
|
||||
for (auto &node : graphNodes) {
|
||||
OV_ITT_TASK_NEXT(taskChain, node->profiling.selectOptimalPrimitiveDescriptor);
|
||||
node->selectOptimalPrimitiveDescriptor();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGraph::InitOptimalPrimitiveDescriptors() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::InitOptimalPrimitiveDescriptors");
|
||||
for (auto &node : graphNodes) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, node->profiling.initOptimalPrimitiveDescriptor);
|
||||
node->initOptimalPrimitiveDescriptor();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGraph::ExecuteConstantNodesOnly() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly");
|
||||
mkldnn::stream stream = mkldnn::stream(stream::kind::eager);
|
||||
for (auto &graphNode : graphNodes) {
|
||||
if (!graphNode->isConstant())
|
||||
continue;
|
||||
graphNode->execute(stream);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGraph::InitEdges() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges");
|
||||
|
||||
auto reorderArgs = [](const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) {
|
||||
std::string inArgs, outArgs;
|
||||
if (parentDesc.getPrecision() != childDesc.getPrecision()) {
|
||||
@ -669,6 +700,8 @@ void MKLDNNGraph::AllocateWithReuse() {
|
||||
}
|
||||
|
||||
void MKLDNNGraph::Allocate() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::Allocate");
|
||||
|
||||
// resolve edges. Define which will be a view on others
|
||||
// NeedAllocation - real blob
|
||||
// NotAllocated - view on other blob, peer or in-place
|
||||
@ -687,6 +720,7 @@ void MKLDNNGraph::Allocate() {
|
||||
void MKLDNNGraph::CreatePrimitives() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::CreatePrimitives");
|
||||
for (auto& node : graphNodes) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, node->profiling.createPrimitive);
|
||||
node->createPrimitive();
|
||||
}
|
||||
}
|
||||
@ -789,7 +823,7 @@ void MKLDNNGraph::Infer(int batch) {
|
||||
ENABLE_DUMP(do_before(DUMP_DIR, graphNodes[i]));
|
||||
|
||||
if (!graphNodes[i]->isConstant()) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, graphNodes[i]->profilingTask);
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, graphNodes[i]->profiling.execute);
|
||||
graphNodes[i]->execute(stream);
|
||||
}
|
||||
|
||||
@ -821,6 +855,8 @@ void MKLDNNGraph::VisitNode(MKLDNNNodePtr node, std::vector<MKLDNNNodePtr>& sort
|
||||
}
|
||||
|
||||
void MKLDNNGraph::SortTopologically() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::SortTopologically");
|
||||
|
||||
std::vector<MKLDNNNodePtr> unsorted;
|
||||
std::vector<MKLDNNNodePtr> sorted;
|
||||
|
||||
|
@ -140,10 +140,13 @@ protected:
|
||||
void InitGraph();
|
||||
void InitNodes();
|
||||
void InitDescriptors();
|
||||
void InitOptimalPrimitiveDescriptors();
|
||||
void InitEdges();
|
||||
void Allocate();
|
||||
void AllocateWithReuse();
|
||||
void CreatePrimitives();
|
||||
void ExecuteConstantNodesOnly();
|
||||
void SetOriginalLayerNames();
|
||||
|
||||
void do_before(const std::string &dir, const MKLDNNNodePtr &node);
|
||||
void do_after(const std::string &dir, const MKLDNNNodePtr &node);
|
||||
|
@ -38,6 +38,8 @@
|
||||
#include <set>
|
||||
#include <algorithm>
|
||||
|
||||
#include "mkldnn_itt.h"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
@ -45,6 +47,8 @@ using namespace InferenceEngine;
|
||||
MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {}
|
||||
|
||||
void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations");
|
||||
|
||||
MergeTwoEqualScaleShifts(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
@ -134,6 +138,8 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
|
||||
}
|
||||
|
||||
void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &graph) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations");
|
||||
|
||||
RemoveIOScaleShifts(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
|
@ -27,6 +27,7 @@ namespace MKLDNNPlugin {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(MKLDNNPlugin);
|
||||
OV_ITT_DOMAIN(MKLDNN_LT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4,11 +4,14 @@
|
||||
|
||||
#include "mkldnn_node.h"
|
||||
#include "mkldnn_extension_mngr.h"
|
||||
#include "mkldnn_itt.h"
|
||||
|
||||
#include "caseless.hpp"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <limits>
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <nodes/mkldnn_batchnorm_node.h>
|
||||
#include <nodes/mkldnn_concat_node.h>
|
||||
@ -158,7 +161,7 @@ MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::
|
||||
MKLDNNWeightsSharing::Ptr &w_cache)
|
||||
: selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
|
||||
weightCache(w_cache), cnnLayer(layer), engine(eng), name(layer->name), typeStr(layer->type),
|
||||
type(TypeFromName(layer->type)), profilingTask(itt::handle(name)) {
|
||||
type(TypeFromName(layer->type)), profiling(layer->name) {
|
||||
if (!layer->outData.empty()) {
|
||||
for (const auto& outData : layer->outData) {
|
||||
outDims.emplace_back(outData->getDims());
|
||||
|
@ -261,6 +261,39 @@ public:
|
||||
template<typename To>
|
||||
class Registrar;
|
||||
|
||||
template<typename T, int N>
|
||||
struct Tag {};
|
||||
|
||||
struct PerfCounters {
|
||||
PerfCounters(std::string const& name)
|
||||
: execute(openvino::itt::handle(name))
|
||||
, getSupportedDescriptors(openvino::itt::handle<Tag<MKLDNNNode, 0>>("MKLDNNNode::getSupportedDescriptors"))
|
||||
, initSupportedPrimitiveDescriptors(openvino::itt::handle<Tag<MKLDNNNode, 1>>("MKLDNNNode::initSupportedPrimitiveDescriptors"))
|
||||
, filterSupportedPrimitiveDescriptors(openvino::itt::handle<Tag<MKLDNNNode, 2>>("MKLDNNNode::filterSupportedPrimitiveDescriptors"))
|
||||
, selectOptimalPrimitiveDescriptor(openvino::itt::handle<Tag<MKLDNNNode, 3>>("MKLDNNNode::selectOptimalPrimitiveDescriptor"))
|
||||
, createPrimitive(openvino::itt::handle<Tag<MKLDNNNode, 4>>("MKLDNNNode::createPrimitive"))
|
||||
, initOptimalPrimitiveDescriptor(openvino::itt::handle<Tag<MKLDNNNode, 5>>("MKLDNNNode::initOptimalPrimitiveDescriptor"))
|
||||
{}
|
||||
|
||||
template<typename NodeType>
|
||||
void buildClassCounters(const std::string& type_name) {
|
||||
getSupportedDescriptors = openvino::itt::handle<Tag<NodeType, 0>>(type_name + "::getSupportedDescriptors");
|
||||
initSupportedPrimitiveDescriptors = openvino::itt::handle<Tag<NodeType, 1>>(type_name + "::initSupportedPrimitiveDescriptors");
|
||||
filterSupportedPrimitiveDescriptors = openvino::itt::handle<Tag<NodeType, 2>>(type_name + "::filterSupportedPrimitiveDescriptors");
|
||||
selectOptimalPrimitiveDescriptor = openvino::itt::handle<Tag<NodeType, 3>>(type_name + "::selectOptimalPrimitiveDescriptor");
|
||||
createPrimitive = openvino::itt::handle<Tag<NodeType, 4>>(type_name + "::createPrimitive");
|
||||
initOptimalPrimitiveDescriptor = openvino::itt::handle<Tag<NodeType, 5>>(type_name + "::initOptimalPrimitiveDescriptor");
|
||||
}
|
||||
|
||||
openvino::itt::handle_t execute;
|
||||
openvino::itt::handle_t getSupportedDescriptors;
|
||||
openvino::itt::handle_t initSupportedPrimitiveDescriptors;
|
||||
openvino::itt::handle_t filterSupportedPrimitiveDescriptors;
|
||||
openvino::itt::handle_t selectOptimalPrimitiveDescriptor;
|
||||
openvino::itt::handle_t createPrimitive;
|
||||
openvino::itt::handle_t initOptimalPrimitiveDescriptor;
|
||||
};
|
||||
|
||||
static Factory & factory();
|
||||
|
||||
~MKLDNNNode() override = default;
|
||||
@ -474,6 +507,14 @@ public:
|
||||
return desc.outputNumbers();
|
||||
}
|
||||
|
||||
const PerfCounters & perfCounters() const {
|
||||
return profiling;
|
||||
}
|
||||
|
||||
PerfCounters & perfCounters() {
|
||||
return profiling;
|
||||
}
|
||||
|
||||
protected:
|
||||
// TODO: It is necessary only in order to avoid modifications of cnnLayers and original topology
|
||||
std::vector<MKLDNNDims> outDims;
|
||||
@ -563,7 +604,7 @@ private:
|
||||
std::string typeToStr(Type type);
|
||||
|
||||
PerfCount perfCounter;
|
||||
openvino::itt::handle_t profilingTask;
|
||||
PerfCounters profiling;
|
||||
|
||||
bool isEdgesEmpty(const std::vector<MKLDNNEdgeWeakPtr>& edges) const;
|
||||
|
||||
@ -611,6 +652,7 @@ public:
|
||||
[type](const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
|
||||
MKLDNNWeightsSharing::Ptr &w_cache) -> MKLDNNNode* {
|
||||
MKLDNNNode *node = new To(layer, eng, w_cache);
|
||||
node->perfCounters().buildClassCounters<To>(NameFromType(type));
|
||||
return node;
|
||||
});
|
||||
}
|
||||
|
@ -87,8 +87,6 @@ Engine::~Engine() {
|
||||
}
|
||||
|
||||
static void Transformation(ICNNNetwork::Ptr& clonedNetwork, const Config& conf) {
|
||||
OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNNPlugin, "Transformation");
|
||||
|
||||
auto nGraphFunc = clonedNetwork->getFunction();
|
||||
// Disable shape inference (WA for generic operations)
|
||||
ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
|
||||
@ -198,8 +196,12 @@ static void Transformation(ICNNNetwork::Ptr& clonedNetwork, const Config& conf)
|
||||
});
|
||||
legacyManager.run_passes(nGraphFunc);
|
||||
|
||||
OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork");
|
||||
|
||||
clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork);
|
||||
|
||||
OV_ITT_TASK_NEXT(taskChain, "ConvertIOPrecision");
|
||||
|
||||
// WA: after conversion to CNNNetwork user precision can redefine input/output precisions
|
||||
// so we need to apply additional precision conversion but only for inputs and outputs
|
||||
for (auto & precision : convert_precision_list) {
|
||||
@ -241,6 +243,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const st
|
||||
}
|
||||
|
||||
std::shared_ptr<ICNNNetwork> clonedNetwork = cloneNetwork(network);
|
||||
|
||||
bool is_transformed = false;
|
||||
if (clonedNetwork->getFunction()) {
|
||||
Transformation(clonedNetwork, conf);
|
||||
@ -248,6 +251,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const st
|
||||
}
|
||||
auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(clonedNetwork);
|
||||
if (implNetwork) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "CNNNet_based_ConstFolding");
|
||||
// valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
|
||||
ConstTransformer transformator(implNetwork.get());
|
||||
transformator.fullTrim();
|
||||
|
@ -19,6 +19,8 @@
|
||||
#include "cpp_interfaces/base/ie_executable_network_base.hpp"
|
||||
#include "cpp_interfaces/impl/ie_executable_network_internal.hpp"
|
||||
#include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
|
||||
#include "cpp_interfaces/plugin_itt.hpp"
|
||||
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
|
@ -27,6 +27,7 @@ namespace InferenceEngine {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(Plugin)
|
||||
OV_ITT_DOMAIN(Plugin_LT)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -27,6 +27,7 @@ namespace InferenceEngine {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(V10Reader);
|
||||
OV_ITT_DOMAIN(V10Reader_RT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
//
|
||||
|
||||
#include "ie_ir_parser.hpp"
|
||||
#include "ie_ir_itt.hpp"
|
||||
|
||||
#include <typeinfo>
|
||||
#include <unordered_set>
|
||||
@ -83,6 +84,8 @@ V10Parser::V10Parser(const std::vector<IExtensionPtr>& exts) : _exts(exts) {
|
||||
}
|
||||
|
||||
std::shared_ptr<ICNNNetwork> V10Parser::parse(const pugi::xml_node& root, std::istream& binStream) {
|
||||
OV_ITT_TASK_CHAIN(taskChain, itt::domains::V10Reader_RT, "V10Parser", "Parse");
|
||||
|
||||
using node_params = struct {
|
||||
pugi::xml_node xml;
|
||||
GenericLayerParams params;
|
||||
@ -130,6 +133,8 @@ std::shared_ptr<ICNNNetwork> V10Parser::parse(const pugi::xml_node& root, std::i
|
||||
};
|
||||
std::for_each(outputs.begin(), outputs.end(), dfs);
|
||||
|
||||
OV_ITT_TASK_NEXT(taskChain, "ConstructNgraphNodes");
|
||||
|
||||
ngraph::ParameterVector parameter_nodes;
|
||||
ngraph::ResultVector result_nodes;
|
||||
ngraph::NodeVector allNodes;
|
||||
@ -186,6 +191,8 @@ std::shared_ptr<ICNNNetwork> V10Parser::parse(const pugi::xml_node& root, std::i
|
||||
allNodes.emplace_back(node);
|
||||
}
|
||||
|
||||
OV_ITT_TASK_NEXT(taskChain, "ConstructNgraphFunction");
|
||||
|
||||
::ngraph::op::GenericIE::DisableReshape noReshape(allNodes);
|
||||
auto function = std::make_shared<ngraph::Function>(result_nodes, assign_nodes, parameter_nodes, GetStrAttr(root, "name", ""));
|
||||
for (const auto& assign : assign_nodes) {
|
||||
@ -193,8 +200,12 @@ std::shared_ptr<ICNNNetwork> V10Parser::parse(const pugi::xml_node& root, std::i
|
||||
variable_id_to_read_value.at(std::dynamic_pointer_cast<ngraph::op::Assign>(assign)->get_variable_id()));
|
||||
}
|
||||
|
||||
OV_ITT_TASK_NEXT(taskChain, "ConstructCNNNetwork");
|
||||
|
||||
CNNNetwork net(function, _exts);
|
||||
|
||||
parsePreProcess(net, root, binStream);
|
||||
|
||||
return net;
|
||||
}
|
||||
|
||||
|
@ -31,5 +31,6 @@ class TRANSFORMATIONS_API AlgebraicSimplification;
|
||||
|
||||
class ngraph::pass::AlgebraicSimplification : public FunctionPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||
};
|
||||
|
@ -36,6 +36,7 @@ class TRANSFORMATIONS_API DeconvAddFusion;
|
||||
|
||||
class ngraph::pass::ConvFusion: public ngraph::pass::GraphRewrite {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvFusion() : GraphRewrite() {
|
||||
add_matcher<ngraph::pass::ConvAddFusion>();
|
||||
add_matcher<ngraph::pass::ConvMultiplyFusion>();
|
||||
|
@ -28,5 +28,6 @@ class TRANSFORMATIONS_API FakeQuantizeMulFusion;
|
||||
|
||||
class ngraph::pass::FakeQuantizeMulFusion : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
FakeQuantizeMulFusion();
|
||||
};
|
||||
|
@ -28,5 +28,6 @@ class TRANSFORMATIONS_API FakeQuantizeReshapeFusion;
|
||||
|
||||
class ngraph::pass::FakeQuantizeReshapeFusion : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
FakeQuantizeReshapeFusion();
|
||||
};
|
||||
|
@ -22,5 +22,6 @@ class TRANSFORMATIONS_API NopElimination;
|
||||
|
||||
class ngraph::pass::NopElimination: public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
bool run_on_function(std::shared_ptr<Function>) override;
|
||||
};
|
||||
|
@ -69,12 +69,7 @@ public:
|
||||
class ngraph::pass::StridedSliceOptimization: public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override {
|
||||
bool rewritten = UselessStridedSliceEraser().run_on_function(f);
|
||||
rewritten |= SharedStridedSliceEraser().run_on_function(f);
|
||||
rewritten |= GroupedStridedSliceOptimizer().run_on_function(f);
|
||||
return rewritten;
|
||||
}
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||
};
|
||||
|
||||
ngraph::SlicePlan get_slice_plan(std::shared_ptr<ngraph::opset1::StridedSlice> slice);
|
@ -30,6 +30,7 @@ class TRANSFORMATIONS_API BidirectionalRNNSequenceDecomposition;
|
||||
|
||||
class ngraph::pass::BidirectionalLSTMSequenceDecomposition : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
BidirectionalLSTMSequenceDecomposition();
|
||||
};
|
||||
|
||||
@ -41,6 +42,7 @@ public:
|
||||
|
||||
class ngraph::pass::BidirectionalGRUSequenceDecomposition : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
BidirectionalGRUSequenceDecomposition();
|
||||
};
|
||||
|
||||
@ -52,5 +54,6 @@ public:
|
||||
|
||||
class ngraph::pass::BidirectionalRNNSequenceDecomposition : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
BidirectionalRNNSequenceDecomposition();
|
||||
};
|
||||
|
@ -26,6 +26,7 @@ class TRANSFORMATIONS_API ConvertGroupDeconvolution;
|
||||
|
||||
class ngraph::pass::ConvertConvolutions: public ngraph::pass::GraphRewrite {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvertConvolutions() {
|
||||
add_matcher<ngraph::pass::ConvertConvolution>();
|
||||
add_matcher<ngraph::pass::ConvertGroupConvolution>();
|
||||
|
@ -37,6 +37,7 @@ public:
|
||||
|
||||
class ngraph::pass::ConvertReduceToPooling: public ngraph::pass::GraphRewrite {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvertReduceToPooling() {
|
||||
add_matcher<ConvertReduceMeanToPooling>();
|
||||
add_matcher<ConvertReduceMaxToPooling>();
|
||||
@ -46,6 +47,7 @@ public:
|
||||
|
||||
class ngraph::pass::ConvertReduceMeanToPooling: public ConvertReduceBase {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvertReduceMeanToPooling() {
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(
|
||||
ngraph::pattern::wrap_type<opset1::ReduceMean>({pattern::any_input(pattern::has_static_shape()),
|
||||
@ -57,6 +59,7 @@ public:
|
||||
|
||||
class ngraph::pass::ConvertReduceMaxToPooling: public ConvertReduceBase {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvertReduceMaxToPooling() {
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(
|
||||
ngraph::pattern::wrap_type<opset1::ReduceMax>({pattern::any_input(pattern::has_static_shape()),
|
||||
@ -68,6 +71,7 @@ public:
|
||||
|
||||
class ngraph::pass::ConvertReduceSumToPooling: public ConvertReduceBase {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvertReduceSumToPooling() {
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(
|
||||
ngraph::pattern::wrap_type<opset1::ReduceSum>({pattern::any_input(pattern::has_static_shape()),
|
||||
|
@ -32,6 +32,7 @@ class TRANSFORMATIONS_API ConvertTensorIteratorToGRUSequence;
|
||||
|
||||
class ngraph::pass::ConvertTensorIteratorToLSTMSequence: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvertTensorIteratorToLSTMSequence();
|
||||
};
|
||||
|
||||
@ -43,6 +44,7 @@ public:
|
||||
|
||||
class ngraph::pass::ConvertTensorIteratorToRNNSequence: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvertTensorIteratorToRNNSequence();
|
||||
};
|
||||
|
||||
@ -54,5 +56,6 @@ public:
|
||||
|
||||
class ngraph::pass::ConvertTensorIteratorToGRUSequence: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvertTensorIteratorToGRUSequence();
|
||||
};
|
@ -28,6 +28,8 @@
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::AlgebraicSimplification, "AlgebraicSimplification", 0);
|
||||
|
||||
//`simplify_gather`, optimizes gather if Gather is gathering the
|
||||
// whole input tensor
|
||||
static bool simplify_gather(std::shared_ptr<Node> node) {
|
||||
|
@ -52,8 +52,6 @@
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::CommonOptimizations, "CommonOptimizations", 0);
|
||||
|
||||
bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::Function> f) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IETransform, "ngraph::pass::CommonOptimizations");
|
||||
|
||||
ngraph::pass::Manager manager(get_pass_config());
|
||||
|
||||
// This pass must be called first in pipeline
|
||||
|
@ -19,6 +19,8 @@
|
||||
|
||||
using namespace ngraph;
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvFusion, "ConvFusion", 0);
|
||||
|
||||
template <class A, class B>
|
||||
std::pair<std::shared_ptr<A>, std::shared_ptr<B>> parse_eltwise_inputs(std::shared_ptr<ngraph::Node> node) {
|
||||
auto eltwise = std::dynamic_pointer_cast<A>(node->input(0).get_source_output().get_node_shared_ptr());
|
||||
|
@ -269,4 +269,4 @@ ngraph::pass::GroupConvolutionBackpropDataMultiplyFusion::GroupConvolutionBackpr
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(mul, "GroupConvolutionMultiplyFusion");
|
||||
register_matcher(m, callback);
|
||||
}
|
||||
}
|
||||
|
@ -12,6 +12,8 @@
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::FakeQuantizeMulFusion, "FakeQuantizeMulFusion", 0);
|
||||
|
||||
namespace {
|
||||
std::pair<ngraph::Output<ngraph::Node>, ngraph::Output<ngraph::Node>>
|
||||
get_adjusted_output_range(ngraph::Output<ngraph::Node> out_low,
|
||||
|
@ -11,6 +11,8 @@
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::FakeQuantizeReshapeFusion, "FakeQuantizeReshapeFusion", 0);
|
||||
|
||||
ngraph::pass::FakeQuantizeReshapeFusion::FakeQuantizeReshapeFusion() {
|
||||
const auto fq_node_p = ngraph::pattern::wrap_type<opset4::FakeQuantize>(
|
||||
{ngraph::pattern::wrap_type<opset4::Constant>(), // for weights only
|
||||
|
@ -324,6 +324,8 @@ static bool eliminate_squeeze(const std::shared_ptr<Node>& node) {
|
||||
return false;
|
||||
}
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::NopElimination, "NopElimination", 0);
|
||||
|
||||
bool pass::NopElimination::run_on_function(std::shared_ptr<Function> function) {
|
||||
static const std::unordered_map<NodeTypeInfo, std::function<bool(const std::shared_ptr<Node>&)>>
|
||||
dispatcher{{TI(opset3::Pad), &eliminate_nop},
|
||||
|
@ -231,3 +231,9 @@ bool ngraph::pass::GroupedStridedSliceOptimizer::run_on_function(std::shared_ptr
|
||||
return graph_rewritten;
|
||||
}
|
||||
|
||||
bool ngraph::pass::StridedSliceOptimization::run_on_function(std::shared_ptr<ngraph::Function> f) {
|
||||
bool rewritten = UselessStridedSliceEraser().run_on_function(f);
|
||||
rewritten |= SharedStridedSliceEraser().run_on_function(f);
|
||||
rewritten |= GroupedStridedSliceOptimizer().run_on_function(f);
|
||||
return rewritten;
|
||||
}
|
||||
|
@ -105,4 +105,4 @@ void ngraph::pass::RemoveFilteringBoxesBySize::remove_filtering_boxes_by_size()
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(cast, "RemoveFilteringBoxesBySize");
|
||||
this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
|
||||
}
|
||||
}
|
||||
|
@ -28,7 +28,8 @@ namespace pass {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(IETransform);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
OV_ITT_DOMAIN(nGraphPass_LT);
|
||||
} // namespace domains
|
||||
} // namespace itt
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
@ -11,6 +11,10 @@
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::BidirectionalLSTMSequenceDecomposition, "BidirectionalLSTMSequenceDecomposition", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::BidirectionalGRUSequenceDecomposition, "BidirectionalGRUSequenceDecomposition", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::BidirectionalRNNSequenceDecomposition, "BidirectionalRNNSequenceDecomposition", 0);
|
||||
|
||||
ngraph::pass::BidirectionalLSTMSequenceDecomposition::BidirectionalLSTMSequenceDecomposition() {
|
||||
auto lstm_sequence_ngraph = ngraph::pattern::wrap_type<ngraph::opset5::LSTMSequence>();
|
||||
|
||||
|
@ -222,4 +222,4 @@ void ngraph::pass::ConvertBatchToSpace::convert_batch_to_space_by_elements() {
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(batch_to_space, "ConvertBatchToSpace");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
}
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertConvolutions, "ConvertConvolutions", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertConvolution, "ConvertConvolution", 0);
|
||||
|
||||
ngraph::pass::ConvertConvolution::ConvertConvolution() {
|
||||
|
@ -36,4 +36,4 @@ ngraph::pass::ConvertDivide::ConvertDivide() {
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(div, "ConvertDivide");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
}
|
||||
|
@ -45,4 +45,4 @@ ngraph::pass::ConvertMinimum::ConvertMinimum() {
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(minimum, "ConvertMinimum");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
}
|
||||
|
@ -47,4 +47,4 @@ ngraph::pass::ConvertMod::ConvertMod() {
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(mod, "ConvertMod");
|
||||
this->register_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
|
||||
}
|
||||
}
|
||||
|
@ -32,4 +32,4 @@ ngraph::pass::ConvertNegative::ConvertNegative() {
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(neg, "ConvertNegative");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,10 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "transformations/op_conversions/convert_reduce_to_pooling.hpp"
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertReduceToPooling, "ConvertReduceToPooling", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertReduceMeanToPooling, "ConvertReduceMeanToPooling", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertReduceMaxToPooling, "ConvertReduceMaxToPooling", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertReduceSumToPooling, "ConvertReduceSumToPooling", 0);
|
@ -211,3 +211,4 @@ void ngraph::pass::ConvertScatterElementsToScatter::convert_scatter_elements_to_
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(scatter, "ConvertScatterElementsToScatter");
|
||||
this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
|
||||
}
|
||||
|
||||
|
@ -44,4 +44,4 @@ void ngraph::pass::ConvertShapeOf3::convert_shapeof3() {
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(shapeof, "ConvertShapeOf3");
|
||||
this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
|
||||
}
|
||||
}
|
||||
|
@ -99,4 +99,4 @@ void ngraph::pass::ConvertShuffleChannels3::convert_shuffle_channels3() {
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(shuffle_channels, "ConvertShuffleChannels3");
|
||||
this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
|
||||
}
|
||||
}
|
||||
|
@ -206,4 +206,5 @@ void ngraph::pass::ConvertSpaceToBatch::convert_space_to_batch_by_elements() {
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(space_to_batch, "ConvertSpaceToBatch");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,4 +91,4 @@ ngraph::pass::ConvertSpaceToDepth::ConvertSpaceToDepth() {
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(dts, "ConvertSpaceToDepth");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
}
|
||||
|
@ -16,6 +16,10 @@
|
||||
#include <ngraph/specialize_function.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertTensorIteratorToLSTMSequence, "ConvertTensorIteratorToLSTMSequence", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertTensorIteratorToRNNSequence, "ConvertTensorIteratorToRNNSequence", 0);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertTensorIteratorToGRUSequence, "ConvertTensorIteratorToGRUSequence", 0);
|
||||
|
||||
ngraph::pass::ConvertTensorIteratorToLSTMSequence::ConvertTensorIteratorToLSTMSequence() {
|
||||
auto tensor_iterator = std::make_shared<ngraph::pattern::op::Label>(ngraph::element::f32,
|
||||
ngraph::Shape{}, ngraph::pattern::has_class<ngraph::opset5::TensorIterator>());
|
||||
|
@ -16,8 +16,6 @@
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertOpSet2ToOpSet1, "ConvertOpSet2ToOpSet1", 0);
|
||||
|
||||
bool ngraph::pass::ConvertOpSet2ToOpSet1::run_on_function(std::shared_ptr<ngraph::Function> f) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IETransform, "ngraph::pass::ConvertOpSet2ToOpSet1");
|
||||
|
||||
ngraph::pass::Manager manager(get_pass_config());
|
||||
|
||||
manager.register_pass<ngraph::pass::ConvertSpaceToBatch>();
|
||||
|
@ -19,8 +19,6 @@
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertOpSet3ToOpSet2, "ConvertOpSet3ToOpSet2", 0);
|
||||
|
||||
bool ngraph::pass::ConvertOpSet3ToOpSet2::run_on_function(std::shared_ptr<ngraph::Function> f) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IETransform, "ngraph::pass::ConvertOpSet3ToOpSet2");
|
||||
|
||||
ngraph::pass::Manager manager(get_pass_config());
|
||||
|
||||
manager.register_pass<ngraph::pass::ConvertBroadcast3>();
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <transformations/itt.hpp>
|
||||
#include <transformations/smart_reshape/proposal_scales_stridedslice.hpp>
|
||||
#include <transformations/smart_reshape/reshape_to_1D.hpp>
|
||||
#include <transformations/smart_reshape/matmul_sr.hpp>
|
||||
@ -18,8 +17,6 @@
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::SmartReshape, "SmartReshape", 0);
|
||||
|
||||
bool ngraph::pass::SmartReshape::run_on_function(std::shared_ptr<ngraph::Function> f) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IETransform, "ngraph::pass::SmartReshape");
|
||||
|
||||
ngraph::pass::Manager static_manager;
|
||||
// This pass must be called first in pipeline
|
||||
static_manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
|
@ -15,6 +15,7 @@ using Transformations = std::unordered_map<ngraph::NodeTypeInfo, std::function<v
|
||||
|
||||
class DynamicToStaticShape: public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
explicit DynamicToStaticShape(const Transformations& specificTransformations = {});
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> function) override;
|
||||
|
||||
|
@ -10,6 +10,7 @@ namespace vpu {
|
||||
|
||||
class EliminateShapeOfAfterDSR : public ngraph::pass::GraphRewrite {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
EliminateShapeOfAfterDSR();
|
||||
};
|
||||
|
||||
|
@ -10,6 +10,7 @@ namespace vpu {
|
||||
|
||||
class MergeSubsequentDSROperations : public ngraph::pass::GraphRewrite {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
MergeSubsequentDSROperations();
|
||||
};
|
||||
|
||||
|
@ -147,6 +147,8 @@ std::set<NodeTypeInfo> getSupportedTypes(const Transformations& transformations)
|
||||
|
||||
} // namespace
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(DynamicToStaticShape, "DynamicToStaticShape", 0);
|
||||
|
||||
DynamicToStaticShape::DynamicToStaticShape(const Transformations& specificTransformations)
|
||||
: transformations(specificTransformations.empty() ? getDefaultTransformations() : specificTransformations) {
|
||||
transformations.emplace(ngraph::opset3::Result::type_info, [](const std::shared_ptr<ngraph::Node>&){});
|
||||
|
@ -9,6 +9,8 @@
|
||||
|
||||
#include <ngraph/opsets/opset3.hpp>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(vpu::EliminateShapeOfAfterDSR, "EliminateShapeOfAfterDSR", 0);
|
||||
|
||||
namespace vpu {
|
||||
|
||||
EliminateShapeOfAfterDSR::EliminateShapeOfAfterDSR() : GraphRewrite() {
|
||||
|
@ -5,6 +5,8 @@
|
||||
#include "vpu/ngraph/transformations/merge_subsequent_dsr_operations.hpp"
|
||||
#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(vpu::MergeSubsequentDSROperations, "MergeSubsequentDSROperations", 0);
|
||||
|
||||
namespace vpu {
|
||||
|
||||
MergeSubsequentDSROperations::MergeSubsequentDSROperations() : ngraph::pass::GraphRewrite() {
|
||||
|
@ -11,6 +11,7 @@ set(LINK_LIBRARIES
|
||||
funcTestUtils
|
||||
ngraphFunctions
|
||||
inference_engine_transformations
|
||||
openvino::itt
|
||||
)
|
||||
set(DEPENDENCIES
|
||||
mock_engine
|
||||
@ -40,7 +41,7 @@ addIeTargetTest(
|
||||
ADD_CPPLINT
|
||||
DEPENDENCIES ${DEPENDENCIES}
|
||||
LABELS
|
||||
IE
|
||||
IE
|
||||
)
|
||||
|
||||
ie_faster_build(${TARGET_NAME}
|
||||
|
@ -27,10 +27,13 @@ add_library(${TARGET_NAME} SHARED
|
||||
${LIBRARY_SRC}
|
||||
${LIBRARY_HEADERS})
|
||||
|
||||
target_include_directories (${TARGET_NAME} PRIVATE
|
||||
$<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
PUBLIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_include_directories(${TARGET_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
if(TARGET IE::inference_engine_plugin_api)
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE IE::inference_engine_plugin_api)
|
||||
else()
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine_plugin_api)
|
||||
endif()
|
||||
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine)
|
||||
|
||||
|
@ -32,6 +32,7 @@ namespace ngraph
|
||||
class NGRAPH_API ngraph::pass::ConstantFolding : public ngraph::pass::GraphRewrite
|
||||
{
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConstantFolding(const ngraph::BuildNodeExecutorMap& cfmap = ngraph::BuildNodeExecutorMap());
|
||||
|
||||
private:
|
||||
|
@ -30,6 +30,7 @@ namespace ngraph
|
||||
class NGRAPH_API ngraph::pass::ConvertFP32ToFP16 : public ngraph::pass::GraphRewrite
|
||||
{
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvertFP32ToFP16()
|
||||
: GraphRewrite()
|
||||
{
|
||||
|
@ -101,7 +101,8 @@ Function::Function(const OutputVector& results,
|
||||
|
||||
void Function::validate_nodes_and_infer_types()
|
||||
{
|
||||
OV_ITT_SCOPED_TASK(itt::domains::nGraph, "Function::validate_nodes_and_infer_types");
|
||||
OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraphPass_LT,
|
||||
"Function::validate_nodes_and_infer_types");
|
||||
|
||||
for (auto& node : get_ordered_ops())
|
||||
{
|
||||
|
@ -30,6 +30,7 @@ namespace ngraph
|
||||
namespace domains
|
||||
{
|
||||
OV_ITT_DOMAIN(nGraph);
|
||||
OV_ITT_DOMAIN(nGraphPass_LT);
|
||||
OV_ITT_DOMAIN(nGraphOp, "nGraph::Op");
|
||||
}
|
||||
}
|
||||
|
@ -20,6 +20,8 @@
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConstantFolding, "ConstantFolding", 0);
|
||||
|
||||
ngraph::pass::ConstantFolding::ConstantFolding(const ngraph::BuildNodeExecutorMap& cfmap)
|
||||
: GraphRewrite()
|
||||
, m_cfmap{cfmap}
|
||||
|
@ -21,6 +21,8 @@
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertFP32ToFP16, "ConvertFP32ToFP16", 0);
|
||||
|
||||
void pass::ConvertFP32ToFP16::convert_constants_precision()
|
||||
{
|
||||
auto constant =
|
||||
|
@ -18,6 +18,8 @@
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "itt.hpp"
|
||||
#include "ngraph/env_util.hpp"
|
||||
@ -34,6 +36,47 @@
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
|
||||
namespace ngraph
|
||||
{
|
||||
namespace pass
|
||||
{
|
||||
namespace
|
||||
{
|
||||
class PerfCounters
|
||||
{
|
||||
PerfCounters(PerfCounters const&) = delete;
|
||||
PerfCounters& operator=(PerfCounters const&) = delete;
|
||||
|
||||
public:
|
||||
PerfCounters() = default;
|
||||
|
||||
openvino::itt::handle_t operator[](::ngraph::Node::type_info_t const& type_inf)
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(m_mutex);
|
||||
auto it = m_counters.find(&type_inf);
|
||||
if (it != m_counters.end())
|
||||
return it->second;
|
||||
return m_counters[&type_inf] = openvino::itt::handle(type_inf.name);
|
||||
}
|
||||
|
||||
private:
|
||||
using key = ::ngraph::Node::type_info_t const*;
|
||||
using value = openvino::itt::handle_t;
|
||||
using counters_map = std::unordered_map<key, value>;
|
||||
|
||||
std::mutex m_mutex;
|
||||
counters_map m_counters;
|
||||
};
|
||||
|
||||
PerfCounters& perf_counters()
|
||||
{
|
||||
static PerfCounters counters;
|
||||
return counters;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pass::Manager::Manager()
|
||||
: m_visualize(getenv_bool("NGRAPH_ENABLE_VISUALIZE_TRACING"))
|
||||
, m_pass_config(std::make_shared<PassConfig>())
|
||||
@ -68,6 +111,9 @@ void pass::Manager::run_passes(shared_ptr<Function> func)
|
||||
continue;
|
||||
}
|
||||
|
||||
OV_ITT_SCOPED_TASK(itt::domains::nGraphPass_LT,
|
||||
pass::perf_counters()[pass->get_type_info()]);
|
||||
|
||||
pass_timer.start();
|
||||
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <openvino/function_name.hpp>
|
||||
#include <openvino/macro_overload.hpp>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
namespace openvino
|
||||
{
|
||||
@ -125,11 +126,97 @@ namespace openvino
|
||||
{
|
||||
internal::taskBegin(domain(), taskHandle);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief The ScopedTask destructor closes or ends the task scope
|
||||
*/
|
||||
~ScopedTask() noexcept { internal::taskEnd(domain()); }
|
||||
|
||||
ScopedTask(const ScopedTask&) = delete;
|
||||
ScopedTask& operator=(const ScopedTask&) = delete;
|
||||
};
|
||||
|
||||
/**
|
||||
* @class TaskChain
|
||||
* @ingroup ie_dev_profiling
|
||||
* @brief Used to annotate a sequence of sections of code which would be named at runtime
|
||||
* @tparam The @p domain parameter is domain type which shoud be defined with OV_ITT_DOMAIN() macro.
|
||||
*/
|
||||
template <domain_t(*domain)()>
|
||||
class TaskChain
|
||||
{
|
||||
uint32_t _id = 1;
|
||||
std::string _prefix;
|
||||
bool _skipped {};
|
||||
|
||||
TaskChain(const TaskChain&) = delete;
|
||||
TaskChain& operator=(const TaskChain&) = delete;
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief Construct TaskChain with defined annotation handle
|
||||
*/
|
||||
TaskChain(handle_t taskHandle, std::string && prefix) noexcept
|
||||
: _prefix(std::forward<std::string>(prefix))
|
||||
{
|
||||
internal::taskBegin(domain(), taskHandle);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief The TaskChain destructor closes or ends the task scope
|
||||
*/
|
||||
~TaskChain() noexcept { skip(); }
|
||||
|
||||
/**
|
||||
* @brief Ends the previous task from the chain and starts a new one with the given annotation handle
|
||||
*/
|
||||
void next(handle_t taskHandle)
|
||||
{
|
||||
if(_skipped)
|
||||
_skipped = false;
|
||||
else
|
||||
internal::taskEnd(domain());
|
||||
internal::taskBegin(domain(), taskHandle);
|
||||
++_id;
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Generating a task name using a sequence number.
|
||||
*/
|
||||
std::string taskName() const
|
||||
{
|
||||
return _prefix + "_" + std::to_string(_id);
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Generating a task name using a scope name.
|
||||
*/
|
||||
std::string taskNameOrHandle(const std::string & name) const
|
||||
{
|
||||
return _prefix + "_" + name;
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Returns a handle provided as argument.
|
||||
*/
|
||||
handle_t taskNameOrHandle(handle_t handle) const
|
||||
{
|
||||
return handle;
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Skips the remaining task scope.
|
||||
*/
|
||||
void skip()
|
||||
{
|
||||
if(!_skipped)
|
||||
{
|
||||
_skipped = true;
|
||||
internal::taskEnd(domain());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @def OV_ITT_DOMAIN(domainName)
|
||||
* @ingroup ie_dev_profiling
|
||||
@ -143,6 +230,9 @@ namespace openvino
|
||||
* @cond
|
||||
*/
|
||||
|
||||
#define OV_ITT_CONCAT2(X, Y) X ## Y
|
||||
#define OV_ITT_CONCAT(X, Y) OV_ITT_CONCAT2(X, Y)
|
||||
|
||||
#define OV_ITT_DOMAIN_1(domainName) \
|
||||
inline openvino::itt::domain_t domainName() noexcept \
|
||||
{ \
|
||||
@ -176,17 +266,87 @@ inline openvino::itt::domain_t domainName() noexcept
|
||||
*/
|
||||
|
||||
#define OV_ITT_SCOPED_TASK_1(domain) \
|
||||
struct Task ## __LINE__ {}; \
|
||||
openvino::itt::ScopedTask<domain> ittScopedTask ## __LINE__ \
|
||||
(openvino::itt::handle<Task ## __LINE__>(ITT_FUNCTION_NAME));
|
||||
openvino::itt::ScopedTask<domain> OV_ITT_CONCAT(ittScopedTask, __LINE__) \
|
||||
(openvino::itt::handle<struct OV_ITT_CONCAT(Task, __LINE__)>(ITT_FUNCTION_NAME));
|
||||
|
||||
#define OV_ITT_SCOPED_TASK_2(domain, taskOrTaskName) \
|
||||
struct Task ## __LINE__ {}; \
|
||||
openvino::itt::ScopedTask<domain> ittScopedTask ## __LINE__ \
|
||||
(openvino::itt::handle<Task ## __LINE__>(taskOrTaskName));
|
||||
openvino::itt::ScopedTask<domain> OV_ITT_CONCAT(ittScopedTask, __LINE__) \
|
||||
(openvino::itt::handle<struct OV_ITT_CONCAT(Task, __LINE__)>(taskOrTaskName));
|
||||
|
||||
/**
|
||||
* @endcond
|
||||
*/
|
||||
|
||||
/**
|
||||
* @def OV_ITT_TASK_CHAIN(chainId, domain, prefix, taskName)
|
||||
* @ingroup ie_dev_profiling
|
||||
* @brief Begins the sequrence of an annotated sections of code using @p prefix and @p taskName as section id.
|
||||
* @details In case if prefix absent, the current function name is used,
|
||||
* if taskName absent, the first chain index is used, i.e 1.
|
||||
* @param chainId [in] The tasks chain identifier.
|
||||
* @param domainName [in] Known at compile time name of module or library (the domain name).
|
||||
* @param prefix [in] The task chain name prefix. The task name starts with this prefix. Parameter is optional.
|
||||
* @param taskName [in] The annotation name for section of code. Parameter is optional.
|
||||
*/
|
||||
#define OV_ITT_TASK_CHAIN(...) OV_ITT_MACRO_OVERLOAD(OV_ITT_TASK_CHAIN, __VA_ARGS__)
|
||||
|
||||
/**
|
||||
* @cond
|
||||
*/
|
||||
|
||||
#define OV_ITT_TASK_CHAIN_2(chainId, domain) \
|
||||
openvino::itt::TaskChain<domain> chainId \
|
||||
(openvino::itt::handle<struct OV_ITT_CONCAT(Task, __LINE__)> \
|
||||
(std::string(ITT_FUNCTION_NAME) + "_1"), \
|
||||
ITT_FUNCTION_NAME);
|
||||
|
||||
#define OV_ITT_TASK_CHAIN_3(chainId, domain, prefix) \
|
||||
openvino::itt::TaskChain<domain> chainId \
|
||||
(openvino::itt::handle<struct OV_ITT_CONCAT(Task, __LINE__)> \
|
||||
(std::string(prefix) + "_1"), \
|
||||
prefix);
|
||||
|
||||
#define OV_ITT_TASK_CHAIN_4(chainId, domain, prefix, taskName) \
|
||||
openvino::itt::TaskChain<domain> chainId \
|
||||
(openvino::itt::handle<struct OV_ITT_CONCAT(Task, __LINE__)> \
|
||||
(std::string(prefix) + "_" + taskName), \
|
||||
prefix);
|
||||
|
||||
/**
|
||||
* @endcond
|
||||
*/
|
||||
|
||||
/**
|
||||
* @def OV_ITT_TASK_NEXT(chainId, taskName)
|
||||
* @ingroup ie_dev_profiling
|
||||
* @brief Inserts new annotated section of code to tasks chain using @p taskName as section id.
|
||||
* @details If taskName is missing, the current chain index is used.
|
||||
* @param chainId [in] The tasks chain identifier.
|
||||
* @param taskOrTaskName [in] The annotation name or handle for section of code. Parameter is optional.
|
||||
*/
|
||||
#define OV_ITT_TASK_NEXT(...) OV_ITT_MACRO_OVERLOAD(OV_ITT_TASK_NEXT, __VA_ARGS__)
|
||||
|
||||
/**
|
||||
* @cond
|
||||
*/
|
||||
|
||||
#define OV_ITT_TASK_NEXT_1(chainId) \
|
||||
chainId.next(openvino::itt::handle<struct OV_ITT_CONCAT(Task, __LINE__)>(chainId.taskName()));
|
||||
|
||||
#define OV_ITT_TASK_NEXT_2(chainId, taskOrTaskName) \
|
||||
chainId.next(openvino::itt::handle<struct OV_ITT_CONCAT(Task, __LINE__)>(chainId.taskNameOrHandle(taskOrTaskName)));
|
||||
|
||||
/**
|
||||
* @endcond
|
||||
*/
|
||||
|
||||
/**
|
||||
* @def OV_ITT_TASK_SKIP(chainId)
|
||||
* @ingroup ie_dev_profiling
|
||||
* @brief Skips the remaining task scope.
|
||||
* @param chainId [in] The tasks chain identifier.
|
||||
*/
|
||||
#define OV_ITT_TASK_SKIP(chainId) chainId.skip();
|
||||
|
||||
} // namespace itt
|
||||
} // namespace openvino
|
||||
|
@ -25,8 +25,8 @@
|
||||
|
||||
#define OV_ITT_MACRO_NARG(...) OV_ITT_MACRO_EXPAND( OV_ITT_MACRO_NARG_(__VA_ARGS__, OV_ITT_MACRO_RSEQ_N()) )
|
||||
#define OV_ITT_MACRO_NARG_(...) OV_ITT_MACRO_EXPAND( OV_ITT_MACRO_ARG_N(__VA_ARGS__) )
|
||||
#define OV_ITT_MACRO_ARG_N(_1, _2, _3, N, ...) N
|
||||
#define OV_ITT_MACRO_RSEQ_N() 3, 2, 1, 0
|
||||
#define OV_ITT_MACRO_ARG_N(_1, _2, _3, _4, N, ...) N
|
||||
#define OV_ITT_MACRO_RSEQ_N() 4, 3, 2, 1, 0
|
||||
|
||||
#define OV_ITT_MACRO_EVAL_(NAME, N) NAME ## _ ## N
|
||||
#define OV_ITT_MACRO_EVAL(NAME, N) OV_ITT_MACRO_EVAL_(NAME, N)
|
||||
|
Loading…
Reference in New Issue
Block a user