diff --git a/cmake/features.cmake b/cmake/features.cmake index 275961f1ad8..ef8c2fbfc45 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -10,6 +10,14 @@ ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON " ie_option (ENABLE_PROFILING_ITT "Build with ITT tracing. Optionally configure pre-built ittnotify library though INTEL_VTUNE_DIR variable." OFF) +ie_option_enum(ENABLE_PROFILING_FILTER "Enable or disable ITT counter groups.\ +Supported values:\ + ALL - enable all ITT counters (default value)\ + FIRST_INFERENCE - enable only first inference time counters" ALL + ALLOWED_VALUES ALL FIRST_INFERENCE) + +ie_option (ENABLE_PROFILING_FIRST_INFERENCE "Build with ITT tracing of first inference time." ON) + ie_option (ENABLE_DOCS "Build docs using Doxygen" OFF) ie_option(ENABLE_TEMPLATE_PLUGIN "Register template plugin into plugins.xml" OFF) diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt index 7c9200b3ded..03f41078046 100644 --- a/inference-engine/src/inference_engine/CMakeLists.txt +++ b/inference-engine/src/inference_engine/CMakeLists.txt @@ -125,7 +125,7 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DI $ $) -target_link_libraries(${TARGET_NAME}_obj PRIVATE ${TARGET_NAME}_reader_api) +target_link_libraries(${TARGET_NAME}_obj PRIVATE ${TARGET_NAME}_reader_api openvino::itt) set_ie_threading_interface_for(${TARGET_NAME}_obj) if (TBBBIND_2_4_FOUND) diff --git a/inference-engine/src/inference_engine/compilation_context.cpp b/inference-engine/src/inference_engine/compilation_context.cpp index 1463dfc48be..bcbf8627ba0 100644 --- a/inference-engine/src/inference_engine/compilation_context.cpp +++ b/inference-engine/src/inference_engine/compilation_context.cpp @@ -87,7 +87,7 @@ std::string NetworkCompilationContext::calculateFileInfo(const std::string& file std::string NetworkCompilationContext::computeHash(const CNNNetwork& network, const std::map& compileOptions) { - OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "NetworkCompilationContext::computeHash - CNN"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - CNN"); OstreamHashWrapper xmlHash; OstreamHashWrapper binHash; std::ostream xml(&xmlHash); @@ -163,7 +163,7 @@ std::string NetworkCompilationContext::computeHash(const CNNNetwork& network, std::string NetworkCompilationContext::computeHash(const std::string& modelName, const std::map& compileOptions) { - OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "NetworkCompilationContext::computeHash - ModelName"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - ModelName"); size_t seed {}; try { seed = hash_combine(seed, FileUtils::absoluteFilePath(modelName)); diff --git a/inference-engine/src/inference_engine/ie_core.cpp b/inference-engine/src/inference_engine/ie_core.cpp index 94165f0887b..94047c8562f 100644 --- a/inference-engine/src/inference_engine/ie_core.cpp +++ b/inference-engine/src/inference_engine/ie_core.cpp @@ -228,7 +228,7 @@ class Core::Impl : public ICore { const std::string& blobID, const std::string& modelPath = std::string(), bool forceDisableCache = false) { - OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::Impl::LoadNetworkImpl"); + OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::Impl::LoadNetworkImpl"); ExecutableNetwork execNetwork; execNetwork = context ? plugin.LoadNetwork(network, context, parsedConfig) : plugin.LoadNetwork(network, parsedConfig); @@ -236,7 +236,7 @@ class Core::Impl : public ICore { if (!forceDisableCache && cacheManager && DeviceSupportsImportExport(plugin)) { try { // need to export network for further import from "cache" - OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::Export"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::Export"); cacheManager->writeCacheEntry(blobID, [&](std::ostream& networkStream) { networkStream << CompiledBlobHeader(GetInferenceEngineVersion()->buildNumber, NetworkCompilationContext::calculateFileInfo(modelPath)); @@ -263,7 +263,7 @@ class Core::Impl : public ICore { IE_ASSERT(cacheManager != nullptr); try { cacheManager->readCacheEntry(blobId, [&](std::istream &networkStream) { - OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetworkFromCache::ReadStreamAndImport"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetworkFromCache::ReadStreamAndImport"); try { CompiledBlobHeader header; networkStream >> header; @@ -434,19 +434,19 @@ public: } CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath) const override { - OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::Impl::ReadNetwork from file"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "Core::Impl::ReadNetwork from file"); return details::ReadNetwork(modelPath, binPath, extensions); } CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights) const override { - OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::Impl::ReadNetwork from memory"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "Core::Impl::ReadNetwork from memory"); return details::ReadNetwork(model, weights, extensions); } // TODO: In future this method can be added to ICore interface ExecutableNetwork LoadNetwork(const CNNNetwork& network, const RemoteContext::Ptr& context, const std::map& config) { - OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::RemoteContext"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::RemoteContext"); if (context == nullptr) { IE_THROW() << "Remote context is null"; } @@ -470,7 +470,7 @@ public: ExecutableNetwork LoadNetwork(const CNNNetwork& network, const std::string& deviceName, const std::map& config) override { - OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::CNN"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::CNN"); bool forceDisableCache = config.count(CONFIG_KEY_INTERNAL(FORCE_DISABLE_CACHE)) > 0; auto parsed = parseDeviceNameIntoConfig(deviceName, config); if (forceDisableCache) { @@ -497,7 +497,7 @@ public: // TODO: In future this method can be added to ICore interface ExecutableNetwork LoadNetwork(const std::string& modelPath, const std::string& deviceName, const std::map& config) { - OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::Path"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::Path"); auto parsed = parseDeviceNameIntoConfig(deviceName, config); auto plugin = GetCPPPluginByName(parsed._deviceName); ExecutableNetwork res; @@ -634,7 +634,7 @@ public: * @return Reference to a CPP plugin wrapper */ InferencePlugin GetCPPPluginByName(const std::string& deviceName) const { - OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::Impl::GetCPPPluginByName"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::Impl::GetCPPPluginByName"); std::lock_guard lock(pluginsMutex); diff --git a/inference-engine/src/inference_engine/ie_itt.hpp b/inference-engine/src/inference_engine/ie_itt.hpp index 8bd72cfc215..343fdc110c1 100644 --- a/inference-engine/src/inference_engine/ie_itt.hpp +++ b/inference-engine/src/inference_engine/ie_itt.hpp @@ -16,6 +16,7 @@ namespace itt { namespace domains { OV_ITT_DOMAIN(IE); OV_ITT_DOMAIN(IE_LT); + OV_ITT_DOMAIN(IE_RT); } } } diff --git a/inference-engine/src/inference_engine/ie_network_reader.cpp b/inference-engine/src/inference_engine/ie_network_reader.cpp index 9dd7b7a0b41..b7bccd841e3 100644 --- a/inference-engine/src/inference_engine/ie_network_reader.cpp +++ b/inference-engine/src/inference_engine/ie_network_reader.cpp @@ -151,7 +151,6 @@ void assertIfIRv7LikeModel(std::istream & modelStream) { } // namespace CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& binPath, const std::vector& exts) { - OV_ITT_SCOPED_TASK(itt::domains::IE, "details::ReadNetwork"); // Register readers if it is needed registerReaders(); @@ -210,11 +209,13 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& binStream.seekg(0, std::ios::beg); Blob::Ptr weights = make_shared_blob({Precision::U8, { fileSize }, C }); - weights->allocate(); - binStream.read(weights->buffer(), fileSize); - - binStream.close(); + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "ReadNetworkWeights"); + weights->allocate(); + binStream.read(weights->buffer(), fileSize); + binStream.close(); + } // read model with weights auto network = reader->read(modelStream, weights, exts); @@ -230,7 +231,6 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& } CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weights, const std::vector& exts) { - OV_ITT_SCOPED_TASK(itt::domains::IE, "details::ReadNetwork"); // Register readers if it is needed registerReaders(); std::istringstream modelStream(model); diff --git a/inference-engine/src/legacy_api/CMakeLists.txt b/inference-engine/src/legacy_api/CMakeLists.txt index 5993b0ad312..f7de7ce20ec 100644 --- a/inference-engine/src/legacy_api/CMakeLists.txt +++ b/inference-engine/src/legacy_api/CMakeLists.txt @@ -50,6 +50,8 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE target_compile_definitions(${TARGET_NAME}_obj PRIVATE $) +target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt) + add_cpplint_target(${TARGET_NAME}_obj_cpplint FOR_TARGETS ${TARGET_NAME}_obj) # Create shared library diff --git a/inference-engine/src/legacy_api/src/ie_legacy_itt.hpp b/inference-engine/src/legacy_api/src/ie_legacy_itt.hpp index 203394e6eaf..91d7f09b3e6 100644 --- a/inference-engine/src/legacy_api/src/ie_legacy_itt.hpp +++ b/inference-engine/src/legacy_api/src/ie_legacy_itt.hpp @@ -15,6 +15,7 @@ namespace InferenceEngine { namespace itt { namespace domains { OV_ITT_DOMAIN(IELegacy); + OV_ITT_DOMAIN(IELegacy_LT); } } } diff --git a/inference-engine/src/legacy_api/src/ie_util_internal.cpp b/inference-engine/src/legacy_api/src/ie_util_internal.cpp index cbe1e3ec8ee..a5e39527b7b 100644 --- a/inference-engine/src/legacy_api/src/ie_util_internal.cpp +++ b/inference-engine/src/legacy_api/src/ie_util_internal.cpp @@ -148,7 +148,7 @@ CNNLayerPtr clonelayer(const CNNLayer& source) { } CNNNetwork cloneNetwork(const CNNNetwork& network) { - OV_ITT_SCOPED_TASK(itt::domains::IELegacy, "cloneNetwork"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IELegacy_LT, "cloneNetwork"); if (network.getFunction()) { return CNNNetwork(std::make_shared(network)); diff --git a/inference-engine/src/low_precision_transformations/src/lpt_itt.h b/inference-engine/src/low_precision_transformations/src/lpt_itt.h new file mode 100644 index 00000000000..5b3f1b524bc --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/lpt_itt.h @@ -0,0 +1,27 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief Defines openvino domains for tracing + * @file lpt_itt.h + */ + +#pragma once + +#include + +namespace ngraph { +namespace pass { +namespace low_precision { +namespace itt { +namespace domains { + +OV_ITT_DOMAIN(LPT); +OV_ITT_DOMAIN(LPT_LT); + +} // namespace domains +} // namespace itt +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/transformer.cpp b/inference-engine/src/low_precision_transformations/src/transformer.cpp index 31ad8c8b498..eee96c562e7 100644 --- a/inference-engine/src/low_precision_transformations/src/transformer.cpp +++ b/inference-engine/src/low_precision_transformations/src/transformer.cpp @@ -21,6 +21,8 @@ #include "ngraph/pass/constant_folding.hpp" #include "ngraph/opsets/opset6.hpp" +#include "lpt_itt.h" + // branch specific transformations #include "low_precision/concat.hpp" #include "low_precision/concat_multi_channels.hpp" @@ -360,6 +362,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr network) { return; } + OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::LPT_LT, "LowPrecisionTransformer", "transform"); + ngraph::pass::ConstantFolding constantFolding; constantFolding.run_on_function(network); @@ -368,12 +372,16 @@ void LowPrecisionTransformer::transform(std::shared_ptr network) { TransformationContext context(network); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "TypeRelaxedReplacer"); + // Extend necessary operations with polymorphic semantics { TypeRelaxedReplacer pass; pass.run_on_function(network); } + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "BranchSpecificTransformations"); + { // Branch specific transformations GraphRewrite pass; @@ -381,6 +389,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr network) { pass.run_on_function(network); } + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FakeQuantizeDecomposition"); + { // Step #1: FakeQuantize decomposition transformation execution GraphRewrite pass; @@ -388,6 +398,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr network) { pass.run_on_function(network); } + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "LayerTransformations"); + { // Step #2: layer transformations execution GraphRewrite pass; @@ -395,6 +407,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr network) { pass.run_on_function(network); } + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "CleanupTransformations"); + { // Step #3: cleanup transformations execution GraphRewrite pass; @@ -402,6 +416,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr network) { pass.run_on_function(network); } + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "StandaloneCleanupTransformations"); + { // Step #4: standalone cleanup transformations execution diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp index 156317c1d73..f3acd6d5a71 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp @@ -44,7 +44,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network, _cfg{cfg}, _name{network.getName()}, _numaNodesWeights(numaNodesWeights) { - OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "MKLDNNExecNetwork", "cloneNet"); + OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "MKLDNNExecNetwork", "cloneNet"); // we are cloning network if we have statistics and we can transform network. _clonedNetwork = cloneNetwork(network); @@ -98,7 +98,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network, } } - OV_ITT_TASK_NEXT(taskChain, "createConstInputs"); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "createConstInputs"); auto createConstInputTo = [&](CNNLayerPtr layer, Blob::Ptr blob, const std::vector& shape, const std::string& name) { LayerParams attrs = {layer->name + "_const_" + name, "Const", blob->getTensorDesc().getPrecision()}; auto constLayer = std::make_shared(attrs); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index e9db4a3076e..d1507ad1dad 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -97,7 +97,7 @@ template void MKLDNNGraph::ApplyUnrollPasses(CNNNetwork&); template void MKLDNNGraph::CreateGraph(const NET &net, const MKLDNNExtensionManager::Ptr& extMgr, MKLDNNWeightsSharing::Ptr &w_cache) { - OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNN_LT, "CreateGraph"); + OV_ITT_SCOPE(FIRST_INFERENCE, MKLDNNPlugin::itt::domains::MKLDNN_LT, "CreateGraph"); if (IsReady()) ForgetGraphData(); @@ -210,6 +210,7 @@ void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNEx } void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) { + OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "MKLDNNGraph::Replicate", "CNNNetwork"); InputsDataMap inputs = network.getInputsInfo(); this->_name = network.getName(); @@ -234,6 +235,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana return -1; }; + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "AllNodes"); + // Replicate All Nodes in topological order for (const auto layer : CNNNetSortTopologically(network)) { CNNLayerPtr _layer = layer; @@ -271,6 +274,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana } } + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "Outputs"); + OutputsDataMap outputs = network.getOutputsInfo(); for (const auto &output : outputs) { const auto data = output.second; @@ -293,6 +298,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana unused_data.erase(data); } + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "AddStubs"); + // Add stub output node for unused data for (auto to_stub_data : unused_data) { auto parent_layer = getCreatorLayer(to_stub_data).lock(); @@ -309,6 +316,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana graphNodes.push_back(node); } + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "Inputs"); + // Replicate input nodes for (const auto& input : inputs) { auto inputLayer = getCreatorLayer(input.second->getInputData()).lock(); @@ -384,7 +393,7 @@ void MKLDNNGraph::InitGraph() { } void MKLDNNGraph::SetOriginalLayerNames() { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::SetOriginalLayerNames"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::SetOriginalLayerNames"); // Do it before cleanup. Because it will lose original layers information for (auto &graphNode : graphNodes) { @@ -409,14 +418,14 @@ void MKLDNNGraph::SetOriginalLayerNames() { } void MKLDNNGraph::InitNodes() { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitNodes"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::InitNodes"); for (auto &node : graphNodes) { node->init(); } } void MKLDNNGraph::InitDescriptors() { - OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "InitDescriptors", "Prepare"); + OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "InitDescriptors", "Prepare"); for (auto &node : graphNodes) { if (node->getType() == Input && _meanImages.find(node->getName()) != _meanImages.end()) { @@ -424,18 +433,18 @@ void MKLDNNGraph::InitDescriptors() { if (inputNode) inputNode->withMeanImage(); } - OV_ITT_TASK_NEXT(taskChain, node->profiling.getSupportedDescriptors); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.getSupportedDescriptors); node->getSupportedDescriptors(); - OV_ITT_TASK_NEXT(taskChain, node->profiling.initSupportedPrimitiveDescriptors); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.initSupportedPrimitiveDescriptors); node->initSupportedPrimitiveDescriptors(); - OV_ITT_TASK_NEXT(taskChain, node->profiling.filterSupportedPrimitiveDescriptors); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.filterSupportedPrimitiveDescriptors); node->filterSupportedPrimitiveDescriptors(); } for (auto &node : graphNodes) { - OV_ITT_TASK_NEXT(taskChain, node->profiling.selectOptimalPrimitiveDescriptor); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.selectOptimalPrimitiveDescriptor); node->selectOptimalPrimitiveDescriptor(); } } @@ -443,13 +452,13 @@ void MKLDNNGraph::InitDescriptors() { void MKLDNNGraph::InitOptimalPrimitiveDescriptors() { OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::InitOptimalPrimitiveDescriptors"); for (auto &node : graphNodes) { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, node->profiling.initOptimalPrimitiveDescriptor); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, node->profiling.initOptimalPrimitiveDescriptor); node->initOptimalPrimitiveDescriptor(); } } void MKLDNNGraph::ExecuteConstantNodesOnly() { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly"); mkldnn::stream stream(eng); using shared_memory_ptr = MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr; @@ -511,7 +520,7 @@ static bool isReorderAvailable(const TensorDesc& parentDesc, const TensorDesc& c } void MKLDNNGraph::InitEdges() { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges"); size_t numberOfEdges = graphEdges.size(); @@ -730,7 +739,7 @@ void MKLDNNGraph::AllocateWithReuse() { } void MKLDNNGraph::Allocate() { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::Allocate"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::Allocate"); // resolve edges. Define which will be a view on others // NeedAllocation - real blob @@ -750,7 +759,7 @@ void MKLDNNGraph::Allocate() { void MKLDNNGraph::CreatePrimitives() { OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::CreatePrimitives"); for (auto& node : graphNodes) { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, node->profiling.createPrimitive); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, node->profiling.createPrimitive); node->createPrimitive(); } } @@ -888,7 +897,7 @@ void MKLDNNGraph::VisitNode(MKLDNNNodePtr node, std::vector& sort } void MKLDNNGraph::SortTopologically() { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::SortTopologically"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::SortTopologically"); std::vector unsorted; std::vector sorted; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index 5452c2343a6..10c808ba2b2 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -50,88 +50,111 @@ using namespace InferenceEngine; MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {} void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations"); - + OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "ApplyCommonGraphOptimizations", "MergeTwoEqualScaleShifts"); MergeTwoEqualScaleShifts(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBroadcastAndEltwise"); FuseBroadcastAndEltwise(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseClampAndQuantize"); FuseClampAndQuantize(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseScaleShiftAndQuantize"); FuseScaleShiftAndQuantize(graph); graph.RemoveDroppedNodes(); MergeGroupConvolution(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndZeroPoints"); FuseConvolutionAndZeroPoints(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise"); FuseConvolutionAndDepthwise(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndActivation"); FuseConvolutionAndActivation(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise"); FuseConvolutionAndDepthwise(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndQuantize"); FuseConvolutionAndQuantize(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges"); graph.SortTopologically(); graph.RemoveDroppedEdges(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise"); FuseConvolutionAndDepthwise(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FusePoolingAndQuantize"); FusePoolingAndQuantize(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges"); graph.SortTopologically(); graph.RemoveDroppedEdges(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDWConvolution"); FuseConvolutionAndDWConvolution(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBinaryConvolutionAndQuantize"); FuseBinaryConvolutionAndQuantize(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBatchNormWithScale"); FuseBatchNormWithScale(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveIdentityOperator"); RemoveIdentityOperator(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionSumAndConvolutionSumActivation"); FuseConvolutionSumAndConvolutionSumActivation(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndSimpleOperation"); FuseConvolutionAndSimpleOperation(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseFullyConnectedAndSimpleOperation"); FuseFullyConnectedAndSimpleOperation(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMVNAndSimpleOperation"); FuseMVNAndSimpleOperation(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseInterpolateAndSimpleOperation"); FuseInterpolateAndSimpleOperation(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseNormalizeAndSimpleOperation"); FuseNormalizeAndSimpleOperation(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseEltwiseAndSimple"); FuseEltwiseAndSimple(graph); graph.RemoveDroppedNodes(); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges"); graph.RemoveDroppedEdges(); } void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &graph) { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations"); RemoveIOScaleShifts(graph); graph.RemoveDroppedNodes(); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 7f043c40361..95302f9d442 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -299,7 +299,7 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { using namespace ngraph::pass::low_precision; if (useLpt) { - OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNN_LT, "LowPrecisionTransformations"); + OV_ITT_SCOPE(FIRST_INFERENCE, MKLDNNPlugin::itt::domains::MKLDNN_LT, "LowPrecisionTransformations"); ngraph::pass::Manager manager; auto lptPrerequisites = manager.register_pass(); @@ -363,11 +363,11 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { legacyManager.run_passes(nGraphFunc); - OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork"); + OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork"); clonedNetwork = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, clonedNetwork, has_fake_quantize)); - OV_ITT_TASK_NEXT(taskChain, "ConvertIOPrecision"); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "ConvertIOPrecision"); // WA: after conversion to CNNNetwork user precision can redefine input/output precisions // so we need to apply additional precision conversion but only for inputs and outputs @@ -423,7 +423,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std IE_SUPPRESS_DEPRECATED_END auto implNetwork = std::dynamic_pointer_cast(icnnnet); if (implNetwork) { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "CNNNet_based_ConstFolding"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "CNNNet_based_ConstFolding"); // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network ConstTransformer transformator(implNetwork.get()); transformator.fullTrim(); diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp index 919ecbddfe1..0c29f342600 100644 --- a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp +++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp @@ -545,7 +545,7 @@ void XmlDeserializer::on_adapter( std::shared_ptr XmlDeserializer::parse_function( const pugi::xml_node& root, const Blob::CPtr& weights) { - OV_ITT_TASK_CHAIN(taskChain, itt::domains::V10Reader_RT, "V10Parser", "Parse"); + OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::V10Reader_RT, "V10Parser", "Parse"); struct FunctionNodes { ngraph::ParameterVector parameters; @@ -604,7 +604,7 @@ std::shared_ptr XmlDeserializer::parse_function( }; std::for_each(outputs.begin(), outputs.end(), dfs); - OV_ITT_TASK_NEXT(taskChain, "ConstructNgraphNodes"); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "ConstructNgraphNodes"); FunctionNodes func_nodes; @@ -665,7 +665,7 @@ std::shared_ptr XmlDeserializer::parse_function( func_nodes.all.emplace_back(node); } - OV_ITT_TASK_NEXT(taskChain, "ConstructNgraphFunction"); + OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "ConstructNgraphFunction"); auto function = std::make_shared( func_nodes.results, func_nodes.sinks, func_nodes.parameters, GetStrAttr(root, "name", "")); @@ -876,7 +876,7 @@ std::shared_ptr V10Parser::parse( XmlDeserializer visitor(root, weights, opsets, variables); visitor.on_attribute("net", function); - OV_ITT_SCOPED_TASK(itt::domains::V10Reader_RT, "ConstructCNNNetwork"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::V10Reader_RT, "ConstructCNNNetwork"); CNNNetwork net(function, _exts); parsePreProcess(net, root, weights); diff --git a/inference-engine/src/readers/ir_reader/ie_ir_reader.cpp b/inference-engine/src/readers/ir_reader/ie_ir_reader.cpp index 9d659e1c5db..4133f7073ef 100644 --- a/inference-engine/src/readers/ir_reader/ie_ir_reader.cpp +++ b/inference-engine/src/readers/ir_reader/ie_ir_reader.cpp @@ -33,14 +33,19 @@ CNNNetwork IRReader::read(std::istream& model, const std::vector& return read(model, nullptr, exts); } -CNNNetwork IRReader::read(std::istream& model, const Blob::CPtr& weights, const std::vector& exts) const { - OV_ITT_SCOPED_TASK(itt::domains::V10Reader, "IRReader::read"); - - pugi::xml_document xmlDoc; +static void loadXml(pugi::xml_document &xmlDoc, std::istream& model) { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::V10Reader_RT, "loadXml"); pugi::xml_parse_result res = xmlDoc.load(model); if (res.status != pugi::status_ok) { IE_THROW() << res.description() << "at offset " << res.offset; } +} + +CNNNetwork IRReader::read(std::istream& model, const Blob::CPtr& weights, const std::vector& exts) const { + OV_ITT_SCOPED_TASK(itt::domains::V10Reader, "IRReader::read"); + + pugi::xml_document xmlDoc; + loadXml(xmlDoc, model); pugi::xml_node root = xmlDoc.document_element(); auto version = details::GetIRVersion(root); diff --git a/ngraph/core/reference/src/runtime/reference/convert.cpp b/ngraph/core/reference/src/runtime/reference/convert.cpp index d87567b9b43..809c9ca3dc5 100644 --- a/ngraph/core/reference/src/runtime/reference/convert.cpp +++ b/ngraph/core/reference/src/runtime/reference/convert.cpp @@ -30,6 +30,7 @@ namespace ngraph gen.vpmovzxbd(i32vec, u8vec); gen.vcvtdq2ps(fvec, i32vec); gen.vcvtps2ph(f16vec, fvec, 0); + gen.vzeroupper(); gen.movdqu(gen.xword[dst], f16vec); } diff --git a/ngraph/core/src/function.cpp b/ngraph/core/src/function.cpp index b2d831470c6..10db441ec58 100644 --- a/ngraph/core/src/function.cpp +++ b/ngraph/core/src/function.cpp @@ -88,8 +88,8 @@ Function::Function(const OutputVector& results, void Function::check_all_parameters_registered() const { - OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraphPass_LT, - "Function::check_all_parameters_registered"); + OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraph, "Function::check_all_parameters_registered"); + std::stringstream unregistered_parameters; for (auto& node : get_ordered_ops()) { @@ -104,8 +104,7 @@ void Function::check_all_parameters_registered() const void Function::validate_nodes_and_infer_types() const { - OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraphPass_LT, - "Function::validate_nodes_and_infer_types"); + OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraph, "Function::validate_nodes_and_infer_types"); struct Counter { diff --git a/ngraph/core/src/pass/graph_rewrite.cpp b/ngraph/core/src/pass/graph_rewrite.cpp index 693dd8e226c..75e6b7ca9ae 100644 --- a/ngraph/core/src/pass/graph_rewrite.cpp +++ b/ngraph/core/src/pass/graph_rewrite.cpp @@ -62,14 +62,14 @@ namespace ngraph { namespace pass { - namespace + namespace internal { PerfCounters& perf_counters_graph_rewrite() { static PerfCounters counters; return counters; } - } // namespace + } // namespace internal } // namespace pass } // namespace ngraph @@ -428,7 +428,8 @@ void ngraph::pass::MatcherPass::register_matcher(const std::shared_ptr node) { - OV_ITT_SCOPED_TASK(itt::domains::nGraph, pass::perf_counters_graph_rewrite()[get_type_info()]); + OV_ITT_SCOPED_TASK(itt::domains::nGraph, + pass::internal::perf_counters_graph_rewrite()[get_type_info()]); m_new_nodes.clear(); if (m_handler) return m_handler(node); diff --git a/ngraph/core/src/pass/manager.cpp b/ngraph/core/src/pass/manager.cpp index c04cc7f1331..59f728f09cf 100644 --- a/ngraph/core/src/pass/manager.cpp +++ b/ngraph/core/src/pass/manager.cpp @@ -29,14 +29,14 @@ namespace ngraph { namespace pass { - namespace + namespace internal { - PerfCounters& perf_counters_manager() + PerfCounters& perf_counters() { static PerfCounters counters; return counters; } - } // namespace + } // namespace internal } // namespace pass } // namespace ngraph @@ -72,8 +72,9 @@ void pass::Manager::run_passes(shared_ptr func) continue; } - OV_ITT_SCOPED_TASK(itt::domains::nGraphPass_LT, - pass::perf_counters_manager()[pass->get_type_info()]); + OV_ITT_SCOPE(FIRST_INFERENCE, + itt::domains::nGraphPass_LT, + pass::internal::perf_counters()[pass->get_type_info()]); pass_timer.start(); diff --git a/openvino/conditional_compilation/include/openvino/cc/selective_build.h b/openvino/conditional_compilation/include/openvino/cc/selective_build.h index 15a7b70805b..756179695a3 100644 --- a/openvino/conditional_compilation/include/openvino/cc/selective_build.h +++ b/openvino/conditional_compilation/include/openvino/cc/selective_build.h @@ -187,25 +187,10 @@ bool match(char const *region, Ctx && ctx, T && val, Case && cs, Cases&&... case #define OV_CC_DOMAINS(Module) -// Placeholder for first macro argument -#define OV_CC_SCOPE_ARG_PLACEHOLDER_1 0, - -// This macro returns second argument, first argument is ignored -#define OV_CC_SCOPE_SECOND_ARG(...) OV_PP_EXPAND(OV_CC_SCOPE_SECOND_ARG_(__VA_ARGS__, 0)) -#define OV_CC_SCOPE_SECOND_ARG_(...) OV_PP_EXPAND(OV_CC_SCOPE_SECOND_ARG_GET(__VA_ARGS__)) -#define OV_CC_SCOPE_SECOND_ARG_GET(ignored, val, ...) val - -// Return macro argument value -#define OV_CC_SCOPE_IS_ENABLED(x) OV_CC_SCOPE_IS_ENABLED1(x) - -// Generate junk macro or {0, } sequence if val is 1 -#define OV_CC_SCOPE_IS_ENABLED1(val) OV_CC_SCOPE_IS_ENABLED2(OV_PP_CAT(OV_CC_SCOPE_ARG_PLACEHOLDER_, val)) - -// Return second argument from possible sequences {1, 0}, {0, 1, 0} -#define OV_CC_SCOPE_IS_ENABLED2(arg1_or_junk) OV_CC_SCOPE_SECOND_ARG(arg1_or_junk 1, 0) +#define OV_CC_SCOPE_IS_ENABLED OV_PP_IS_ENABLED #define OV_SCOPE(Module, region) \ - for (bool ovCCScopeIsEnabled = OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(Module, _, region)); ovCCScopeIsEnabled; ovCCScopeIsEnabled = false) + for (bool ovCCScopeIsEnabled = OV_PP_IS_ENABLED(OV_PP_CAT3(Module, _, region)); ovCCScopeIsEnabled; ovCCScopeIsEnabled = false) // Switch is disabled #define OV_CC_SWITCH_0(Module, fn, ctx, val) @@ -214,7 +199,7 @@ bool match(char const *region, Ctx && ctx, T && val, Case && cs, Cases&&... case #define OV_CC_SWITCH_1(Module, fn, ctx, val) openvino::cc::internal::match(ctx, val, OV_PP_CAT4(Module, _, fn, _cases)); #define OV_SWITCH(Module, fn, ctx, val, ...) \ - OV_PP_EXPAND(OV_PP_CAT(OV_CC_SWITCH_, OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(Module, _, fn)))(Module, fn, ctx, val)) + OV_PP_EXPAND(OV_PP_CAT(OV_CC_SWITCH_, OV_PP_IS_ENABLED(OV_PP_CAT3(Module, _, fn)))(Module, fn, ctx, val)) #define OV_CASE(Case, Type) openvino::cc::internal::make_case_wrapper(Case) diff --git a/openvino/itt/CMakeLists.txt b/openvino/itt/CMakeLists.txt index 0f9437c9f57..e9f880b8ce7 100644 --- a/openvino/itt/CMakeLists.txt +++ b/openvino/itt/CMakeLists.txt @@ -14,6 +14,16 @@ target_link_libraries(${TARGET_NAME} PUBLIC openvino::pp) if(TARGET ittnotify) target_link_libraries(${TARGET_NAME} PUBLIC ittnotify) + if(ENABLE_PROFILING_FILTER STREQUAL "ALL") + target_compile_definitions(${TARGET_NAME} PUBLIC + ENABLE_PROFILING_ALL + ENABLE_PROFILING_FIRST_INFERENCE) + elseif(ENABLE_PROFILING_FILTER STREQUAL "FIRST_INFERENCE") + target_compile_definitions(${TARGET_NAME} PUBLIC + ENABLE_PROFILING_FIRST_INFERENCE) + else() + message(FATAL_ERROR "The ${ENABLE_PROFILING_FILTER} profiling filter isn't supported") + endif() endif() if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") diff --git a/openvino/itt/include/openvino/itt.hpp b/openvino/itt/include/openvino/itt.hpp index d950664ea80..8a22efa54fe 100644 --- a/openvino/itt/include/openvino/itt.hpp +++ b/openvino/itt/include/openvino/itt.hpp @@ -214,6 +214,8 @@ namespace openvino */ #define OV_ITT_DOMAIN(...) OV_PP_OVERLOAD(OV_ITT_DOMAIN, __VA_ARGS__) +#define OV_ITT_GROUP(group) OV_PP_CAT(ENABLE_PROFILING_, group) + /** * @cond */ @@ -232,6 +234,37 @@ inline openvino::itt::domain_t domainName() noexcept return d; \ } +/** + * @endcond + */ + +/** + * @def OV_ITT_SCOPE(domain, handleOrTaskName) + * @ingroup ie_dev_profiling + * @brief Annotate section of code till scope exit to be profiled using known @p handle or @p taskName as section id. + * @details In case if handle or taskName absent, the current function name is used. + * @param group [in] ITT counter group name used for enabling/disabling at compile time. + * @param domainName [in] Known at compile time name of module or library (the domain name). + * @param handleOrTaskName [in] The annotation name or handle for section of code. Parameter is optional. + */ +#define OV_ITT_SCOPE(group, ...) \ + OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_IMPL_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(__VA_ARGS__)) + +/** + * @cond + */ + +#define OV_ITT_SCOPE_IMPL_0(...) +#define OV_ITT_SCOPE_IMPL_1(...) OV_PP_OVERLOAD(OV_ITT_SCOPE, __VA_ARGS__) + +#define OV_ITT_SCOPE_1(domain) \ + openvino::itt::ScopedTask OV_PP_CAT(ittScopedTask, __LINE__) \ + (openvino::itt::handle(ITT_FUNCTION_NAME)); + +#define OV_ITT_SCOPE_2(domain, taskOrTaskName) \ + openvino::itt::ScopedTask OV_PP_CAT(ittScopedTask, __LINE__) \ + (openvino::itt::handle(taskOrTaskName)); + /** * @endcond */ @@ -244,19 +277,97 @@ inline openvino::itt::domain_t domainName() noexcept * @param domainName [in] Known at compile time name of module or library (the domain name). * @param handleOrTaskName [in] The annotation name or handle for section of code. Parameter is optional. */ -#define OV_ITT_SCOPED_TASK(...) OV_PP_OVERLOAD(OV_ITT_SCOPED_TASK, __VA_ARGS__) +#define OV_ITT_SCOPED_TASK(...) OV_ITT_SCOPE(ALL, __VA_ARGS__) + +/** + * @def OV_ITT_TASK_CHAIN(chainId, domain, prefix, taskName) + * @ingroup ie_dev_profiling + * @brief Begins the sequrence of an annotated sections of code using @p prefix and @p taskName as section id. + * @details In case if prefix absent, the current function name is used, + * if taskName absent, the first chain index is used, i.e 1. + * @param group [in] ITT counter group name used for enabling/disabling at compile time. + * @param chainId [in] The tasks chain identifier. + * @param domainName [in] Known at compile time name of module or library (the domain name). + * @param prefix [in] The task chain name prefix. The task name starts with this prefix. Parameter is optional. + * @param taskName [in] The annotation name for section of code. Parameter is optional. + */ +#define OV_ITT_SCOPE_CHAIN(group, ...) \ + OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_CHAIN_IMPL_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(__VA_ARGS__)) /** * @cond */ -#define OV_ITT_SCOPED_TASK_1(domain) \ - openvino::itt::ScopedTask OV_PP_CAT(ittScopedTask, __LINE__) \ - (openvino::itt::handle(ITT_FUNCTION_NAME)); +#define OV_ITT_SCOPE_CHAIN_IMPL_0(...) +#define OV_ITT_SCOPE_CHAIN_IMPL_1(...) OV_PP_OVERLOAD(OV_ITT_SCOPE_CHAIN, __VA_ARGS__) -#define OV_ITT_SCOPED_TASK_2(domain, taskOrTaskName) \ - openvino::itt::ScopedTask OV_PP_CAT(ittScopedTask, __LINE__) \ - (openvino::itt::handle(taskOrTaskName)); +#define OV_ITT_SCOPE_CHAIN_2(chainId, domain) \ + openvino::itt::TaskChain chainId \ + (openvino::itt::handle \ + (std::string(ITT_FUNCTION_NAME) + "_1"), \ + ITT_FUNCTION_NAME); + +#define OV_ITT_SCOPE_CHAIN_3(chainId, domain, prefix) \ + openvino::itt::TaskChain chainId \ + (openvino::itt::handle \ + (std::string(prefix) + "_1"), \ + prefix); + +#define OV_ITT_SCOPE_CHAIN_4(chainId, domain, prefix, taskName) \ + openvino::itt::TaskChain chainId \ + (openvino::itt::handle \ + (std::string(prefix) + "_" + taskName), \ + prefix); + +/** + * @endcond + */ + +/** + * @def OV_ITT_SCOPE_NEXT(group, chainId, taskName) + * @ingroup ie_dev_profiling + * @brief Inserts new annotated section of code to tasks chain using @p taskName as section id. + * @details If taskName is missing, the current chain index is used. + * @param group [in] ITT counter group name used for enabling/disabling at compile time. + * @param chainId [in] The tasks chain identifier. + * @param taskOrTaskName [in] The annotation name or handle for section of code. Parameter is optional. + */ +#define OV_ITT_SCOPE_NEXT(group, ...) \ + OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_NEXT_IMPL_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(__VA_ARGS__)) + +/** + * @cond + */ + +#define OV_ITT_SCOPE_NEXT_IMPL_0(...) +#define OV_ITT_SCOPE_NEXT_IMPL_1(...) OV_PP_OVERLOAD(OV_ITT_SCOPE_NEXT, __VA_ARGS__) + +#define OV_ITT_SCOPE_NEXT_1(chainId) \ + chainId.next(openvino::itt::handle(chainId.taskName())); + +#define OV_ITT_SCOPE_NEXT_2(chainId, taskOrTaskName) \ + chainId.next(openvino::itt::handle(chainId.taskNameOrHandle(taskOrTaskName))); + +/** + * @endcond + */ + +/** + * @def OV_ITT_SCOPE_SKIP(group, chainId) + * @ingroup ie_dev_profiling + * @brief Skips the remaining task scope. + * @param group [in] ITT counter group name used for enabling/disabling at compile time. + * @param chainId [in] The tasks chain identifier. + */ +#define OV_ITT_SCOPE_SKIP(group, chainId) \ + OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_SKIP_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(chainId)) + +/** + * @cond + */ + +#define OV_ITT_SCOPE_SKIP_0(chainId) +#define OV_ITT_SCOPE_SKIP_1(chainId) chainId.skip(); /** * @endcond @@ -273,33 +384,7 @@ inline openvino::itt::domain_t domainName() noexcept * @param prefix [in] The task chain name prefix. The task name starts with this prefix. Parameter is optional. * @param taskName [in] The annotation name for section of code. Parameter is optional. */ -#define OV_ITT_TASK_CHAIN(...) OV_PP_OVERLOAD(OV_ITT_TASK_CHAIN, __VA_ARGS__) - -/** - * @cond - */ - -#define OV_ITT_TASK_CHAIN_2(chainId, domain) \ - openvino::itt::TaskChain chainId \ - (openvino::itt::handle \ - (std::string(ITT_FUNCTION_NAME) + "_1"), \ - ITT_FUNCTION_NAME); - -#define OV_ITT_TASK_CHAIN_3(chainId, domain, prefix) \ - openvino::itt::TaskChain chainId \ - (openvino::itt::handle \ - (std::string(prefix) + "_1"), \ - prefix); - -#define OV_ITT_TASK_CHAIN_4(chainId, domain, prefix, taskName) \ - openvino::itt::TaskChain chainId \ - (openvino::itt::handle \ - (std::string(prefix) + "_" + taskName), \ - prefix); - -/** - * @endcond - */ +#define OV_ITT_TASK_CHAIN(...) OV_ITT_SCOPE_CHAIN(ALL, __VA_ARGS__) /** * @def OV_ITT_TASK_NEXT(chainId, taskName) @@ -309,21 +394,7 @@ inline openvino::itt::domain_t domainName() noexcept * @param chainId [in] The tasks chain identifier. * @param taskOrTaskName [in] The annotation name or handle for section of code. Parameter is optional. */ -#define OV_ITT_TASK_NEXT(...) OV_PP_OVERLOAD(OV_ITT_TASK_NEXT, __VA_ARGS__) - -/** - * @cond - */ - -#define OV_ITT_TASK_NEXT_1(chainId) \ - chainId.next(openvino::itt::handle(chainId.taskName())); - -#define OV_ITT_TASK_NEXT_2(chainId, taskOrTaskName) \ - chainId.next(openvino::itt::handle(chainId.taskNameOrHandle(taskOrTaskName))); - -/** - * @endcond - */ +#define OV_ITT_TASK_NEXT(...) OV_ITT_SCOPE_NEXT(ALL, __VA_ARGS__) /** * @def OV_ITT_TASK_SKIP(chainId) @@ -331,7 +402,7 @@ inline openvino::itt::domain_t domainName() noexcept * @brief Skips the remaining task scope. * @param chainId [in] The tasks chain identifier. */ -#define OV_ITT_TASK_SKIP(chainId) chainId.skip(); +#define OV_ITT_TASK_SKIP(chainId) OV_ITT_SCOPE_SKIP(ALL, chainId); } // namespace itt } // namespace openvino diff --git a/openvino/itt/src/itt.cpp b/openvino/itt/src/itt.cpp index 5fa17f81213..de3fa04da8e 100644 --- a/openvino/itt/src/itt.cpp +++ b/openvino/itt/src/itt.cpp @@ -40,7 +40,7 @@ void taskBegin(domain_t d, handle_t t) { } void taskEnd(domain_t d) { - if (!callStackDepth() || call_stack_depth-- > 0) + if (!callStackDepth() || --call_stack_depth < callStackDepth()) __itt_task_end(reinterpret_cast<__itt_domain*>(d)); } diff --git a/openvino/pp/include/openvino/pp.hpp b/openvino/pp/include/openvino/pp.hpp index f54e6e1b015..6931c8f1845 100644 --- a/openvino/pp/include/openvino/pp.hpp +++ b/openvino/pp/include/openvino/pp.hpp @@ -30,3 +30,20 @@ #define OV_PP_CAT4(x, y, z, w) OV_PP_CAT4_(x, y, z, w) #define OV_PP_OVERLOAD(NAME, ...) OV_PP_EXPAND( OV_PP_CAT3(NAME, _, OV_PP_EXPAND( OV_PP_NARG(OV_PP_NO_ARGS __VA_ARGS__ (NAME)) ))(__VA_ARGS__) ) + +// Placeholder for first macro argument +#define OV_PP_ARG_PLACEHOLDER_1 0, + +// This macro returns second argument, first argument is ignored +#define OV_PP_SECOND_ARG(...) OV_PP_EXPAND(OV_PP_SECOND_ARG_(__VA_ARGS__, 0)) +#define OV_PP_SECOND_ARG_(...) OV_PP_EXPAND(OV_PP_SECOND_ARG_GET(__VA_ARGS__)) +#define OV_PP_SECOND_ARG_GET(ignored, val, ...) val + +// Return macro argument value +#define OV_PP_IS_ENABLED(x) OV_PP_IS_ENABLED1(x) + +// Generate junk macro or {0, } sequence if val is 1 +#define OV_PP_IS_ENABLED1(val) OV_PP_IS_ENABLED2(OV_PP_CAT(OV_PP_ARG_PLACEHOLDER_, val)) + +// Return second argument from possible sequences {1, 0}, {0, 1, 0} +#define OV_PP_IS_ENABLED2(arg1_or_junk) OV_PP_SECOND_ARG(arg1_or_junk 1, 0)