Nested ITT counters lead to invalid performance measurement results (#5172)

* Compile time enabling or disabling of first inference time counters

* First inference time counters

* Counters for validate_nodes_and_infer_types and check_all_parameters_registered removed from first inference time counters scope

* Code style fix

* Missing macro for CC and invalid domain names

* Code style fix

* Unused function warnings fixed
This commit is contained in:
Vladislav Volkov 2021-04-29 07:33:21 +03:00 committed by GitHub
parent b3de228206
commit c97bb90a91
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 308 additions and 131 deletions

View File

@ -10,6 +10,14 @@ ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "
ie_option (ENABLE_PROFILING_ITT "Build with ITT tracing. Optionally configure pre-built ittnotify library though INTEL_VTUNE_DIR variable." OFF)
ie_option_enum(ENABLE_PROFILING_FILTER "Enable or disable ITT counter groups.\
Supported values:\
ALL - enable all ITT counters (default value)\
FIRST_INFERENCE - enable only first inference time counters" ALL
ALLOWED_VALUES ALL FIRST_INFERENCE)
ie_option (ENABLE_PROFILING_FIRST_INFERENCE "Build with ITT tracing of first inference time." ON)
ie_option (ENABLE_DOCS "Build docs using Doxygen" OFF)
ie_option(ENABLE_TEMPLATE_PLUGIN "Register template plugin into plugins.xml" OFF)

View File

@ -125,7 +125,7 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DI
$<TARGET_PROPERTY:${TARGET_NAME}_transformations,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>)
target_link_libraries(${TARGET_NAME}_obj PRIVATE ${TARGET_NAME}_reader_api)
target_link_libraries(${TARGET_NAME}_obj PRIVATE ${TARGET_NAME}_reader_api openvino::itt)
set_ie_threading_interface_for(${TARGET_NAME}_obj)
if (TBBBIND_2_4_FOUND)

View File

@ -87,7 +87,7 @@ std::string NetworkCompilationContext::calculateFileInfo(const std::string& file
std::string NetworkCompilationContext::computeHash(const CNNNetwork& network,
const std::map<std::string, std::string>& compileOptions) {
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "NetworkCompilationContext::computeHash - CNN");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - CNN");
OstreamHashWrapper xmlHash;
OstreamHashWrapper binHash;
std::ostream xml(&xmlHash);
@ -163,7 +163,7 @@ std::string NetworkCompilationContext::computeHash(const CNNNetwork& network,
std::string NetworkCompilationContext::computeHash(const std::string& modelName,
const std::map<std::string, std::string>& compileOptions) {
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "NetworkCompilationContext::computeHash - ModelName");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - ModelName");
size_t seed {};
try {
seed = hash_combine(seed, FileUtils::absoluteFilePath(modelName));

View File

@ -228,7 +228,7 @@ class Core::Impl : public ICore {
const std::string& blobID,
const std::string& modelPath = std::string(),
bool forceDisableCache = false) {
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::Impl::LoadNetworkImpl");
OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::Impl::LoadNetworkImpl");
ExecutableNetwork execNetwork;
execNetwork = context ? plugin.LoadNetwork(network, context, parsedConfig) :
plugin.LoadNetwork(network, parsedConfig);
@ -236,7 +236,7 @@ class Core::Impl : public ICore {
if (!forceDisableCache && cacheManager && DeviceSupportsImportExport(plugin)) {
try {
// need to export network for further import from "cache"
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::Export");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::Export");
cacheManager->writeCacheEntry(blobID, [&](std::ostream& networkStream) {
networkStream << CompiledBlobHeader(GetInferenceEngineVersion()->buildNumber,
NetworkCompilationContext::calculateFileInfo(modelPath));
@ -263,7 +263,7 @@ class Core::Impl : public ICore {
IE_ASSERT(cacheManager != nullptr);
try {
cacheManager->readCacheEntry(blobId, [&](std::istream &networkStream) {
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetworkFromCache::ReadStreamAndImport");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetworkFromCache::ReadStreamAndImport");
try {
CompiledBlobHeader header;
networkStream >> header;
@ -434,19 +434,19 @@ public:
}
CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath) const override {
OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::Impl::ReadNetwork from file");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "Core::Impl::ReadNetwork from file");
return details::ReadNetwork(modelPath, binPath, extensions);
}
CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights) const override {
OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::Impl::ReadNetwork from memory");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "Core::Impl::ReadNetwork from memory");
return details::ReadNetwork(model, weights, extensions);
}
// TODO: In future this method can be added to ICore interface
ExecutableNetwork LoadNetwork(const CNNNetwork& network, const RemoteContext::Ptr& context,
const std::map<std::string, std::string>& config) {
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::RemoteContext");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::RemoteContext");
if (context == nullptr) {
IE_THROW() << "Remote context is null";
}
@ -470,7 +470,7 @@ public:
ExecutableNetwork LoadNetwork(const CNNNetwork& network, const std::string& deviceName,
const std::map<std::string, std::string>& config) override {
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::CNN");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::CNN");
bool forceDisableCache = config.count(CONFIG_KEY_INTERNAL(FORCE_DISABLE_CACHE)) > 0;
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
if (forceDisableCache) {
@ -497,7 +497,7 @@ public:
// TODO: In future this method can be added to ICore interface
ExecutableNetwork LoadNetwork(const std::string& modelPath, const std::string& deviceName,
const std::map<std::string, std::string>& config) {
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::Path");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::Path");
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
auto plugin = GetCPPPluginByName(parsed._deviceName);
ExecutableNetwork res;
@ -634,7 +634,7 @@ public:
* @return Reference to a CPP plugin wrapper
*/
InferencePlugin GetCPPPluginByName(const std::string& deviceName) const {
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::Impl::GetCPPPluginByName");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::Impl::GetCPPPluginByName");
std::lock_guard<std::mutex> lock(pluginsMutex);

View File

@ -16,6 +16,7 @@ namespace itt {
namespace domains {
OV_ITT_DOMAIN(IE);
OV_ITT_DOMAIN(IE_LT);
OV_ITT_DOMAIN(IE_RT);
}
}
}

View File

@ -151,7 +151,6 @@ void assertIfIRv7LikeModel(std::istream & modelStream) {
} // namespace
CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& binPath, const std::vector<IExtensionPtr>& exts) {
OV_ITT_SCOPED_TASK(itt::domains::IE, "details::ReadNetwork");
// Register readers if it is needed
registerReaders();
@ -210,11 +209,13 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string&
binStream.seekg(0, std::ios::beg);
Blob::Ptr weights = make_shared_blob<uint8_t>({Precision::U8, { fileSize }, C });
{
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "ReadNetworkWeights");
weights->allocate();
binStream.read(weights->buffer(), fileSize);
binStream.close();
}
// read model with weights
auto network = reader->read(modelStream, weights, exts);
@ -230,7 +231,6 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string&
}
CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weights, const std::vector<IExtensionPtr>& exts) {
OV_ITT_SCOPED_TASK(itt::domains::IE, "details::ReadNetwork");
// Register readers if it is needed
registerReaders();
std::istringstream modelStream(model);

View File

@ -50,6 +50,8 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE
target_compile_definitions(${TARGET_NAME}_obj PRIVATE $<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_COMPILE_DEFINITIONS>)
target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt)
add_cpplint_target(${TARGET_NAME}_obj_cpplint FOR_TARGETS ${TARGET_NAME}_obj)
# Create shared library

View File

@ -15,6 +15,7 @@ namespace InferenceEngine {
namespace itt {
namespace domains {
OV_ITT_DOMAIN(IELegacy);
OV_ITT_DOMAIN(IELegacy_LT);
}
}
}

View File

@ -148,7 +148,7 @@ CNNLayerPtr clonelayer(const CNNLayer& source) {
}
CNNNetwork cloneNetwork(const CNNNetwork& network) {
OV_ITT_SCOPED_TASK(itt::domains::IELegacy, "cloneNetwork");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IELegacy_LT, "cloneNetwork");
if (network.getFunction()) {
return CNNNetwork(std::make_shared<details::CNNNetworkNGraphImpl>(network));

View File

@ -0,0 +1,27 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief Defines openvino domains for tracing
* @file lpt_itt.h
*/
#pragma once
#include <openvino/itt.hpp>
namespace ngraph {
namespace pass {
namespace low_precision {
namespace itt {
namespace domains {
OV_ITT_DOMAIN(LPT);
OV_ITT_DOMAIN(LPT_LT);
} // namespace domains
} // namespace itt
} // namespace low_precision
} // namespace pass
} // namespace ngraph

View File

@ -21,6 +21,8 @@
#include "ngraph/pass/constant_folding.hpp"
#include "ngraph/opsets/opset6.hpp"
#include "lpt_itt.h"
// branch specific transformations
#include "low_precision/concat.hpp"
#include "low_precision/concat_multi_channels.hpp"
@ -360,6 +362,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
return;
}
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::LPT_LT, "LowPrecisionTransformer", "transform");
ngraph::pass::ConstantFolding constantFolding;
constantFolding.run_on_function(network);
@ -368,12 +372,16 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
TransformationContext context(network);
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "TypeRelaxedReplacer");
// Extend necessary operations with polymorphic semantics
{
TypeRelaxedReplacer pass;
pass.run_on_function(network);
}
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "BranchSpecificTransformations");
{
// Branch specific transformations
GraphRewrite pass;
@ -381,6 +389,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
pass.run_on_function(network);
}
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FakeQuantizeDecomposition");
{
// Step #1: FakeQuantize decomposition transformation execution
GraphRewrite pass;
@ -388,6 +398,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
pass.run_on_function(network);
}
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "LayerTransformations");
{
// Step #2: layer transformations execution
GraphRewrite pass;
@ -395,6 +407,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
pass.run_on_function(network);
}
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "CleanupTransformations");
{
// Step #3: cleanup transformations execution
GraphRewrite pass;
@ -402,6 +416,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
pass.run_on_function(network);
}
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "StandaloneCleanupTransformations");
{
// Step #4: standalone cleanup transformations execution

View File

@ -44,7 +44,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
_cfg{cfg},
_name{network.getName()},
_numaNodesWeights(numaNodesWeights) {
OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "MKLDNNExecNetwork", "cloneNet");
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "MKLDNNExecNetwork", "cloneNet");
// we are cloning network if we have statistics and we can transform network.
_clonedNetwork = cloneNetwork(network);
@ -98,7 +98,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
}
}
OV_ITT_TASK_NEXT(taskChain, "createConstInputs");
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "createConstInputs");
auto createConstInputTo = [&](CNNLayerPtr layer, Blob::Ptr blob, const std::vector<size_t>& shape, const std::string& name) {
LayerParams attrs = {layer->name + "_const_" + name, "Const", blob->getTensorDesc().getPrecision()};
auto constLayer = std::make_shared<InferenceEngine::CNNLayer>(attrs);

View File

@ -97,7 +97,7 @@ template void MKLDNNGraph::ApplyUnrollPasses(CNNNetwork&);
template<typename NET>
void MKLDNNGraph::CreateGraph(const NET &net, const MKLDNNExtensionManager::Ptr& extMgr,
MKLDNNWeightsSharing::Ptr &w_cache) {
OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNN_LT, "CreateGraph");
OV_ITT_SCOPE(FIRST_INFERENCE, MKLDNNPlugin::itt::domains::MKLDNN_LT, "CreateGraph");
if (IsReady())
ForgetGraphData();
@ -210,6 +210,7 @@ void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNEx
}
void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) {
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "MKLDNNGraph::Replicate", "CNNNetwork");
InputsDataMap inputs = network.getInputsInfo();
this->_name = network.getName();
@ -234,6 +235,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
return -1;
};
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "AllNodes");
// Replicate All Nodes in topological order
for (const auto layer : CNNNetSortTopologically(network)) {
CNNLayerPtr _layer = layer;
@ -271,6 +274,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
}
}
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "Outputs");
OutputsDataMap outputs = network.getOutputsInfo();
for (const auto &output : outputs) {
const auto data = output.second;
@ -293,6 +298,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
unused_data.erase(data);
}
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "AddStubs");
// Add stub output node for unused data
for (auto to_stub_data : unused_data) {
auto parent_layer = getCreatorLayer(to_stub_data).lock();
@ -309,6 +316,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
graphNodes.push_back(node);
}
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "Inputs");
// Replicate input nodes
for (const auto& input : inputs) {
auto inputLayer = getCreatorLayer(input.second->getInputData()).lock();
@ -384,7 +393,7 @@ void MKLDNNGraph::InitGraph() {
}
void MKLDNNGraph::SetOriginalLayerNames() {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::SetOriginalLayerNames");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::SetOriginalLayerNames");
// Do it before cleanup. Because it will lose original layers information
for (auto &graphNode : graphNodes) {
@ -409,14 +418,14 @@ void MKLDNNGraph::SetOriginalLayerNames() {
}
void MKLDNNGraph::InitNodes() {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitNodes");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::InitNodes");
for (auto &node : graphNodes) {
node->init();
}
}
void MKLDNNGraph::InitDescriptors() {
OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "InitDescriptors", "Prepare");
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "InitDescriptors", "Prepare");
for (auto &node : graphNodes) {
if (node->getType() == Input && _meanImages.find(node->getName()) != _meanImages.end()) {
@ -424,18 +433,18 @@ void MKLDNNGraph::InitDescriptors() {
if (inputNode)
inputNode->withMeanImage();
}
OV_ITT_TASK_NEXT(taskChain, node->profiling.getSupportedDescriptors);
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.getSupportedDescriptors);
node->getSupportedDescriptors();
OV_ITT_TASK_NEXT(taskChain, node->profiling.initSupportedPrimitiveDescriptors);
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.initSupportedPrimitiveDescriptors);
node->initSupportedPrimitiveDescriptors();
OV_ITT_TASK_NEXT(taskChain, node->profiling.filterSupportedPrimitiveDescriptors);
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.filterSupportedPrimitiveDescriptors);
node->filterSupportedPrimitiveDescriptors();
}
for (auto &node : graphNodes) {
OV_ITT_TASK_NEXT(taskChain, node->profiling.selectOptimalPrimitiveDescriptor);
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.selectOptimalPrimitiveDescriptor);
node->selectOptimalPrimitiveDescriptor();
}
}
@ -443,13 +452,13 @@ void MKLDNNGraph::InitDescriptors() {
void MKLDNNGraph::InitOptimalPrimitiveDescriptors() {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::InitOptimalPrimitiveDescriptors");
for (auto &node : graphNodes) {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, node->profiling.initOptimalPrimitiveDescriptor);
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, node->profiling.initOptimalPrimitiveDescriptor);
node->initOptimalPrimitiveDescriptor();
}
}
void MKLDNNGraph::ExecuteConstantNodesOnly() {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly");
mkldnn::stream stream(eng);
using shared_memory_ptr = MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr;
@ -511,7 +520,7 @@ static bool isReorderAvailable(const TensorDesc& parentDesc, const TensorDesc& c
}
void MKLDNNGraph::InitEdges() {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges");
size_t numberOfEdges = graphEdges.size();
@ -730,7 +739,7 @@ void MKLDNNGraph::AllocateWithReuse() {
}
void MKLDNNGraph::Allocate() {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::Allocate");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::Allocate");
// resolve edges. Define which will be a view on others
// NeedAllocation - real blob
@ -750,7 +759,7 @@ void MKLDNNGraph::Allocate() {
void MKLDNNGraph::CreatePrimitives() {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::CreatePrimitives");
for (auto& node : graphNodes) {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, node->profiling.createPrimitive);
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, node->profiling.createPrimitive);
node->createPrimitive();
}
}
@ -888,7 +897,7 @@ void MKLDNNGraph::VisitNode(MKLDNNNodePtr node, std::vector<MKLDNNNodePtr>& sort
}
void MKLDNNGraph::SortTopologically() {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::SortTopologically");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::SortTopologically");
std::vector<MKLDNNNodePtr> unsorted;
std::vector<MKLDNNNodePtr> sorted;

View File

@ -50,88 +50,111 @@ using namespace InferenceEngine;
MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {}
void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations");
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "ApplyCommonGraphOptimizations", "MergeTwoEqualScaleShifts");
MergeTwoEqualScaleShifts(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBroadcastAndEltwise");
FuseBroadcastAndEltwise(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseClampAndQuantize");
FuseClampAndQuantize(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseScaleShiftAndQuantize");
FuseScaleShiftAndQuantize(graph);
graph.RemoveDroppedNodes();
MergeGroupConvolution(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndZeroPoints");
FuseConvolutionAndZeroPoints(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise");
FuseConvolutionAndDepthwise(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndActivation");
FuseConvolutionAndActivation(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise");
FuseConvolutionAndDepthwise(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndQuantize");
FuseConvolutionAndQuantize(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges");
graph.SortTopologically();
graph.RemoveDroppedEdges();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise");
FuseConvolutionAndDepthwise(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FusePoolingAndQuantize");
FusePoolingAndQuantize(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges");
graph.SortTopologically();
graph.RemoveDroppedEdges();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDWConvolution");
FuseConvolutionAndDWConvolution(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBinaryConvolutionAndQuantize");
FuseBinaryConvolutionAndQuantize(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBatchNormWithScale");
FuseBatchNormWithScale(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveIdentityOperator");
RemoveIdentityOperator(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionSumAndConvolutionSumActivation");
FuseConvolutionSumAndConvolutionSumActivation(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndSimpleOperation");
FuseConvolutionAndSimpleOperation(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseFullyConnectedAndSimpleOperation");
FuseFullyConnectedAndSimpleOperation(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMVNAndSimpleOperation");
FuseMVNAndSimpleOperation(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseInterpolateAndSimpleOperation");
FuseInterpolateAndSimpleOperation(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseNormalizeAndSimpleOperation");
FuseNormalizeAndSimpleOperation(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseEltwiseAndSimple");
FuseEltwiseAndSimple(graph);
graph.RemoveDroppedNodes();
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges");
graph.RemoveDroppedEdges();
}
void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &graph) {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations");
RemoveIOScaleShifts(graph);
graph.RemoveDroppedNodes();

View File

@ -299,7 +299,7 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
using namespace ngraph::pass::low_precision;
if (useLpt) {
OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNN_LT, "LowPrecisionTransformations");
OV_ITT_SCOPE(FIRST_INFERENCE, MKLDNNPlugin::itt::domains::MKLDNN_LT, "LowPrecisionTransformations");
ngraph::pass::Manager manager;
auto lptPrerequisites = manager.register_pass<ngraph::pass::GraphRewrite>();
@ -363,11 +363,11 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
legacyManager.run_passes(nGraphFunc);
OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork");
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork");
clonedNetwork = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, clonedNetwork, has_fake_quantize));
OV_ITT_TASK_NEXT(taskChain, "ConvertIOPrecision");
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "ConvertIOPrecision");
// WA: after conversion to CNNNetwork user precision can redefine input/output precisions
// so we need to apply additional precision conversion but only for inputs and outputs
@ -423,7 +423,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
IE_SUPPRESS_DEPRECATED_END
auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(icnnnet);
if (implNetwork) {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "CNNNet_based_ConstFolding");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "CNNNet_based_ConstFolding");
// valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
ConstTransformer transformator(implNetwork.get());
transformator.fullTrim();

View File

@ -545,7 +545,7 @@ void XmlDeserializer::on_adapter(
std::shared_ptr<ngraph::Function> XmlDeserializer::parse_function(
const pugi::xml_node& root, const Blob::CPtr& weights) {
OV_ITT_TASK_CHAIN(taskChain, itt::domains::V10Reader_RT, "V10Parser", "Parse");
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::V10Reader_RT, "V10Parser", "Parse");
struct FunctionNodes {
ngraph::ParameterVector parameters;
@ -604,7 +604,7 @@ std::shared_ptr<ngraph::Function> XmlDeserializer::parse_function(
};
std::for_each(outputs.begin(), outputs.end(), dfs);
OV_ITT_TASK_NEXT(taskChain, "ConstructNgraphNodes");
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "ConstructNgraphNodes");
FunctionNodes func_nodes;
@ -665,7 +665,7 @@ std::shared_ptr<ngraph::Function> XmlDeserializer::parse_function(
func_nodes.all.emplace_back(node);
}
OV_ITT_TASK_NEXT(taskChain, "ConstructNgraphFunction");
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "ConstructNgraphFunction");
auto function = std::make_shared<ngraph::Function>(
func_nodes.results, func_nodes.sinks, func_nodes.parameters, GetStrAttr(root, "name", ""));
@ -876,7 +876,7 @@ std::shared_ptr<ICNNNetwork> V10Parser::parse(
XmlDeserializer visitor(root, weights, opsets, variables);
visitor.on_attribute("net", function);
OV_ITT_SCOPED_TASK(itt::domains::V10Reader_RT, "ConstructCNNNetwork");
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::V10Reader_RT, "ConstructCNNNetwork");
CNNNetwork net(function, _exts);
parsePreProcess(net, root, weights);

View File

@ -33,14 +33,19 @@ CNNNetwork IRReader::read(std::istream& model, const std::vector<IExtensionPtr>&
return read(model, nullptr, exts);
}
CNNNetwork IRReader::read(std::istream& model, const Blob::CPtr& weights, const std::vector<IExtensionPtr>& exts) const {
OV_ITT_SCOPED_TASK(itt::domains::V10Reader, "IRReader::read");
pugi::xml_document xmlDoc;
static void loadXml(pugi::xml_document &xmlDoc, std::istream& model) {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::V10Reader_RT, "loadXml");
pugi::xml_parse_result res = xmlDoc.load(model);
if (res.status != pugi::status_ok) {
IE_THROW() << res.description() << "at offset " << res.offset;
}
}
CNNNetwork IRReader::read(std::istream& model, const Blob::CPtr& weights, const std::vector<IExtensionPtr>& exts) const {
OV_ITT_SCOPED_TASK(itt::domains::V10Reader, "IRReader::read");
pugi::xml_document xmlDoc;
loadXml(xmlDoc, model);
pugi::xml_node root = xmlDoc.document_element();
auto version = details::GetIRVersion(root);

View File

@ -30,6 +30,7 @@ namespace ngraph
gen.vpmovzxbd(i32vec, u8vec);
gen.vcvtdq2ps(fvec, i32vec);
gen.vcvtps2ph(f16vec, fvec, 0);
gen.vzeroupper();
gen.movdqu(gen.xword[dst], f16vec);
}

View File

@ -88,8 +88,8 @@ Function::Function(const OutputVector& results,
void Function::check_all_parameters_registered() const
{
OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraphPass_LT,
"Function::check_all_parameters_registered");
OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraph, "Function::check_all_parameters_registered");
std::stringstream unregistered_parameters;
for (auto& node : get_ordered_ops())
{
@ -104,8 +104,7 @@ void Function::check_all_parameters_registered() const
void Function::validate_nodes_and_infer_types() const
{
OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraphPass_LT,
"Function::validate_nodes_and_infer_types");
OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraph, "Function::validate_nodes_and_infer_types");
struct Counter
{

View File

@ -62,14 +62,14 @@ namespace ngraph
{
namespace pass
{
namespace
namespace internal
{
PerfCounters& perf_counters_graph_rewrite()
{
static PerfCounters counters;
return counters;
}
} // namespace
} // namespace internal
} // namespace pass
} // namespace ngraph
@ -428,7 +428,8 @@ void ngraph::pass::MatcherPass::register_matcher(const std::shared_ptr<ngraph::p
bool ngraph::pass::MatcherPass::apply(std::shared_ptr<ngraph::Node> node)
{
OV_ITT_SCOPED_TASK(itt::domains::nGraph, pass::perf_counters_graph_rewrite()[get_type_info()]);
OV_ITT_SCOPED_TASK(itt::domains::nGraph,
pass::internal::perf_counters_graph_rewrite()[get_type_info()]);
m_new_nodes.clear();
if (m_handler)
return m_handler(node);

View File

@ -29,14 +29,14 @@ namespace ngraph
{
namespace pass
{
namespace
namespace internal
{
PerfCounters& perf_counters_manager()
PerfCounters& perf_counters()
{
static PerfCounters counters;
return counters;
}
} // namespace
} // namespace internal
} // namespace pass
} // namespace ngraph
@ -72,8 +72,9 @@ void pass::Manager::run_passes(shared_ptr<Function> func)
continue;
}
OV_ITT_SCOPED_TASK(itt::domains::nGraphPass_LT,
pass::perf_counters_manager()[pass->get_type_info()]);
OV_ITT_SCOPE(FIRST_INFERENCE,
itt::domains::nGraphPass_LT,
pass::internal::perf_counters()[pass->get_type_info()]);
pass_timer.start();

View File

@ -187,25 +187,10 @@ bool match(char const *region, Ctx && ctx, T && val, Case && cs, Cases&&... case
#define OV_CC_DOMAINS(Module)
// Placeholder for first macro argument
#define OV_CC_SCOPE_ARG_PLACEHOLDER_1 0,
// This macro returns second argument, first argument is ignored
#define OV_CC_SCOPE_SECOND_ARG(...) OV_PP_EXPAND(OV_CC_SCOPE_SECOND_ARG_(__VA_ARGS__, 0))
#define OV_CC_SCOPE_SECOND_ARG_(...) OV_PP_EXPAND(OV_CC_SCOPE_SECOND_ARG_GET(__VA_ARGS__))
#define OV_CC_SCOPE_SECOND_ARG_GET(ignored, val, ...) val
// Return macro argument value
#define OV_CC_SCOPE_IS_ENABLED(x) OV_CC_SCOPE_IS_ENABLED1(x)
// Generate junk macro or {0, } sequence if val is 1
#define OV_CC_SCOPE_IS_ENABLED1(val) OV_CC_SCOPE_IS_ENABLED2(OV_PP_CAT(OV_CC_SCOPE_ARG_PLACEHOLDER_, val))
// Return second argument from possible sequences {1, 0}, {0, 1, 0}
#define OV_CC_SCOPE_IS_ENABLED2(arg1_or_junk) OV_CC_SCOPE_SECOND_ARG(arg1_or_junk 1, 0)
#define OV_CC_SCOPE_IS_ENABLED OV_PP_IS_ENABLED
#define OV_SCOPE(Module, region) \
for (bool ovCCScopeIsEnabled = OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(Module, _, region)); ovCCScopeIsEnabled; ovCCScopeIsEnabled = false)
for (bool ovCCScopeIsEnabled = OV_PP_IS_ENABLED(OV_PP_CAT3(Module, _, region)); ovCCScopeIsEnabled; ovCCScopeIsEnabled = false)
// Switch is disabled
#define OV_CC_SWITCH_0(Module, fn, ctx, val)
@ -214,7 +199,7 @@ bool match(char const *region, Ctx && ctx, T && val, Case && cs, Cases&&... case
#define OV_CC_SWITCH_1(Module, fn, ctx, val) openvino::cc::internal::match<fn>(ctx, val, OV_PP_CAT4(Module, _, fn, _cases));
#define OV_SWITCH(Module, fn, ctx, val, ...) \
OV_PP_EXPAND(OV_PP_CAT(OV_CC_SWITCH_, OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(Module, _, fn)))(Module, fn, ctx, val))
OV_PP_EXPAND(OV_PP_CAT(OV_CC_SWITCH_, OV_PP_IS_ENABLED(OV_PP_CAT3(Module, _, fn)))(Module, fn, ctx, val))
#define OV_CASE(Case, Type) openvino::cc::internal::make_case_wrapper<Type>(Case)

View File

@ -14,6 +14,16 @@ target_link_libraries(${TARGET_NAME} PUBLIC openvino::pp)
if(TARGET ittnotify)
target_link_libraries(${TARGET_NAME} PUBLIC ittnotify)
if(ENABLE_PROFILING_FILTER STREQUAL "ALL")
target_compile_definitions(${TARGET_NAME} PUBLIC
ENABLE_PROFILING_ALL
ENABLE_PROFILING_FIRST_INFERENCE)
elseif(ENABLE_PROFILING_FILTER STREQUAL "FIRST_INFERENCE")
target_compile_definitions(${TARGET_NAME} PUBLIC
ENABLE_PROFILING_FIRST_INFERENCE)
else()
message(FATAL_ERROR "The ${ENABLE_PROFILING_FILTER} profiling filter isn't supported")
endif()
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")

View File

@ -214,6 +214,8 @@ namespace openvino
*/
#define OV_ITT_DOMAIN(...) OV_PP_OVERLOAD(OV_ITT_DOMAIN, __VA_ARGS__)
#define OV_ITT_GROUP(group) OV_PP_CAT(ENABLE_PROFILING_, group)
/**
* @cond
*/
@ -232,6 +234,37 @@ inline openvino::itt::domain_t domainName() noexcept
return d; \
}
/**
* @endcond
*/
/**
* @def OV_ITT_SCOPE(domain, handleOrTaskName)
* @ingroup ie_dev_profiling
* @brief Annotate section of code till scope exit to be profiled using known @p handle or @p taskName as section id.
* @details In case if handle or taskName absent, the current function name is used.
* @param group [in] ITT counter group name used for enabling/disabling at compile time.
* @param domainName [in] Known at compile time name of module or library (the domain name).
* @param handleOrTaskName [in] The annotation name or handle for section of code. Parameter is optional.
*/
#define OV_ITT_SCOPE(group, ...) \
OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_IMPL_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(__VA_ARGS__))
/**
* @cond
*/
#define OV_ITT_SCOPE_IMPL_0(...)
#define OV_ITT_SCOPE_IMPL_1(...) OV_PP_OVERLOAD(OV_ITT_SCOPE, __VA_ARGS__)
#define OV_ITT_SCOPE_1(domain) \
openvino::itt::ScopedTask<domain> OV_PP_CAT(ittScopedTask, __LINE__) \
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(ITT_FUNCTION_NAME));
#define OV_ITT_SCOPE_2(domain, taskOrTaskName) \
openvino::itt::ScopedTask<domain> OV_PP_CAT(ittScopedTask, __LINE__) \
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(taskOrTaskName));
/**
* @endcond
*/
@ -244,19 +277,97 @@ inline openvino::itt::domain_t domainName() noexcept
* @param domainName [in] Known at compile time name of module or library (the domain name).
* @param handleOrTaskName [in] The annotation name or handle for section of code. Parameter is optional.
*/
#define OV_ITT_SCOPED_TASK(...) OV_PP_OVERLOAD(OV_ITT_SCOPED_TASK, __VA_ARGS__)
#define OV_ITT_SCOPED_TASK(...) OV_ITT_SCOPE(ALL, __VA_ARGS__)
/**
* @def OV_ITT_TASK_CHAIN(chainId, domain, prefix, taskName)
* @ingroup ie_dev_profiling
* @brief Begins the sequrence of an annotated sections of code using @p prefix and @p taskName as section id.
* @details In case if prefix absent, the current function name is used,
* if taskName absent, the first chain index is used, i.e 1.
* @param group [in] ITT counter group name used for enabling/disabling at compile time.
* @param chainId [in] The tasks chain identifier.
* @param domainName [in] Known at compile time name of module or library (the domain name).
* @param prefix [in] The task chain name prefix. The task name starts with this prefix. Parameter is optional.
* @param taskName [in] The annotation name for section of code. Parameter is optional.
*/
#define OV_ITT_SCOPE_CHAIN(group, ...) \
OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_CHAIN_IMPL_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(__VA_ARGS__))
/**
* @cond
*/
#define OV_ITT_SCOPED_TASK_1(domain) \
openvino::itt::ScopedTask<domain> OV_PP_CAT(ittScopedTask, __LINE__) \
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(ITT_FUNCTION_NAME));
#define OV_ITT_SCOPE_CHAIN_IMPL_0(...)
#define OV_ITT_SCOPE_CHAIN_IMPL_1(...) OV_PP_OVERLOAD(OV_ITT_SCOPE_CHAIN, __VA_ARGS__)
#define OV_ITT_SCOPED_TASK_2(domain, taskOrTaskName) \
openvino::itt::ScopedTask<domain> OV_PP_CAT(ittScopedTask, __LINE__) \
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(taskOrTaskName));
#define OV_ITT_SCOPE_CHAIN_2(chainId, domain) \
openvino::itt::TaskChain<domain> chainId \
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)> \
(std::string(ITT_FUNCTION_NAME) + "_1"), \
ITT_FUNCTION_NAME);
#define OV_ITT_SCOPE_CHAIN_3(chainId, domain, prefix) \
openvino::itt::TaskChain<domain> chainId \
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)> \
(std::string(prefix) + "_1"), \
prefix);
#define OV_ITT_SCOPE_CHAIN_4(chainId, domain, prefix, taskName) \
openvino::itt::TaskChain<domain> chainId \
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)> \
(std::string(prefix) + "_" + taskName), \
prefix);
/**
* @endcond
*/
/**
* @def OV_ITT_SCOPE_NEXT(group, chainId, taskName)
* @ingroup ie_dev_profiling
* @brief Inserts new annotated section of code to tasks chain using @p taskName as section id.
* @details If taskName is missing, the current chain index is used.
* @param group [in] ITT counter group name used for enabling/disabling at compile time.
* @param chainId [in] The tasks chain identifier.
* @param taskOrTaskName [in] The annotation name or handle for section of code. Parameter is optional.
*/
#define OV_ITT_SCOPE_NEXT(group, ...) \
OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_NEXT_IMPL_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(__VA_ARGS__))
/**
* @cond
*/
#define OV_ITT_SCOPE_NEXT_IMPL_0(...)
#define OV_ITT_SCOPE_NEXT_IMPL_1(...) OV_PP_OVERLOAD(OV_ITT_SCOPE_NEXT, __VA_ARGS__)
#define OV_ITT_SCOPE_NEXT_1(chainId) \
chainId.next(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(chainId.taskName()));
#define OV_ITT_SCOPE_NEXT_2(chainId, taskOrTaskName) \
chainId.next(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(chainId.taskNameOrHandle(taskOrTaskName)));
/**
* @endcond
*/
/**
* @def OV_ITT_SCOPE_SKIP(group, chainId)
* @ingroup ie_dev_profiling
* @brief Skips the remaining task scope.
* @param group [in] ITT counter group name used for enabling/disabling at compile time.
* @param chainId [in] The tasks chain identifier.
*/
#define OV_ITT_SCOPE_SKIP(group, chainId) \
OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_SKIP_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(chainId))
/**
* @cond
*/
#define OV_ITT_SCOPE_SKIP_0(chainId)
#define OV_ITT_SCOPE_SKIP_1(chainId) chainId.skip();
/**
* @endcond
@ -273,33 +384,7 @@ inline openvino::itt::domain_t domainName() noexcept
* @param prefix [in] The task chain name prefix. The task name starts with this prefix. Parameter is optional.
* @param taskName [in] The annotation name for section of code. Parameter is optional.
*/
#define OV_ITT_TASK_CHAIN(...) OV_PP_OVERLOAD(OV_ITT_TASK_CHAIN, __VA_ARGS__)
/**
* @cond
*/
#define OV_ITT_TASK_CHAIN_2(chainId, domain) \
openvino::itt::TaskChain<domain> chainId \
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)> \
(std::string(ITT_FUNCTION_NAME) + "_1"), \
ITT_FUNCTION_NAME);
#define OV_ITT_TASK_CHAIN_3(chainId, domain, prefix) \
openvino::itt::TaskChain<domain> chainId \
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)> \
(std::string(prefix) + "_1"), \
prefix);
#define OV_ITT_TASK_CHAIN_4(chainId, domain, prefix, taskName) \
openvino::itt::TaskChain<domain> chainId \
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)> \
(std::string(prefix) + "_" + taskName), \
prefix);
/**
* @endcond
*/
#define OV_ITT_TASK_CHAIN(...) OV_ITT_SCOPE_CHAIN(ALL, __VA_ARGS__)
/**
* @def OV_ITT_TASK_NEXT(chainId, taskName)
@ -309,21 +394,7 @@ inline openvino::itt::domain_t domainName() noexcept
* @param chainId [in] The tasks chain identifier.
* @param taskOrTaskName [in] The annotation name or handle for section of code. Parameter is optional.
*/
#define OV_ITT_TASK_NEXT(...) OV_PP_OVERLOAD(OV_ITT_TASK_NEXT, __VA_ARGS__)
/**
* @cond
*/
#define OV_ITT_TASK_NEXT_1(chainId) \
chainId.next(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(chainId.taskName()));
#define OV_ITT_TASK_NEXT_2(chainId, taskOrTaskName) \
chainId.next(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(chainId.taskNameOrHandle(taskOrTaskName)));
/**
* @endcond
*/
#define OV_ITT_TASK_NEXT(...) OV_ITT_SCOPE_NEXT(ALL, __VA_ARGS__)
/**
* @def OV_ITT_TASK_SKIP(chainId)
@ -331,7 +402,7 @@ inline openvino::itt::domain_t domainName() noexcept
* @brief Skips the remaining task scope.
* @param chainId [in] The tasks chain identifier.
*/
#define OV_ITT_TASK_SKIP(chainId) chainId.skip();
#define OV_ITT_TASK_SKIP(chainId) OV_ITT_SCOPE_SKIP(ALL, chainId);
} // namespace itt
} // namespace openvino

View File

@ -40,7 +40,7 @@ void taskBegin(domain_t d, handle_t t) {
}
void taskEnd(domain_t d) {
if (!callStackDepth() || call_stack_depth-- > 0)
if (!callStackDepth() || --call_stack_depth < callStackDepth())
__itt_task_end(reinterpret_cast<__itt_domain*>(d));
}

View File

@ -30,3 +30,20 @@
#define OV_PP_CAT4(x, y, z, w) OV_PP_CAT4_(x, y, z, w)
#define OV_PP_OVERLOAD(NAME, ...) OV_PP_EXPAND( OV_PP_CAT3(NAME, _, OV_PP_EXPAND( OV_PP_NARG(OV_PP_NO_ARGS __VA_ARGS__ (NAME)) ))(__VA_ARGS__) )
// Placeholder for first macro argument
#define OV_PP_ARG_PLACEHOLDER_1 0,
// This macro returns second argument, first argument is ignored
#define OV_PP_SECOND_ARG(...) OV_PP_EXPAND(OV_PP_SECOND_ARG_(__VA_ARGS__, 0))
#define OV_PP_SECOND_ARG_(...) OV_PP_EXPAND(OV_PP_SECOND_ARG_GET(__VA_ARGS__))
#define OV_PP_SECOND_ARG_GET(ignored, val, ...) val
// Return macro argument value
#define OV_PP_IS_ENABLED(x) OV_PP_IS_ENABLED1(x)
// Generate junk macro or {0, } sequence if val is 1
#define OV_PP_IS_ENABLED1(val) OV_PP_IS_ENABLED2(OV_PP_CAT(OV_PP_ARG_PLACEHOLDER_, val))
// Return second argument from possible sequences {1, 0}, {0, 1, 0}
#define OV_PP_IS_ENABLED2(arg1_or_junk) OV_PP_SECOND_ARG(arg1_or_junk 1, 0)