Nested ITT counters lead to invalid performance measurement results (#5172)
* Compile time enabling or disabling of first inference time counters * First inference time counters * Counters for validate_nodes_and_infer_types and check_all_parameters_registered removed from first inference time counters scope * Code style fix * Missing macro for CC and invalid domain names * Code style fix * Unused function warnings fixed
This commit is contained in:
parent
b3de228206
commit
c97bb90a91
@ -10,6 +10,14 @@ ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "
|
||||
|
||||
ie_option (ENABLE_PROFILING_ITT "Build with ITT tracing. Optionally configure pre-built ittnotify library though INTEL_VTUNE_DIR variable." OFF)
|
||||
|
||||
ie_option_enum(ENABLE_PROFILING_FILTER "Enable or disable ITT counter groups.\
|
||||
Supported values:\
|
||||
ALL - enable all ITT counters (default value)\
|
||||
FIRST_INFERENCE - enable only first inference time counters" ALL
|
||||
ALLOWED_VALUES ALL FIRST_INFERENCE)
|
||||
|
||||
ie_option (ENABLE_PROFILING_FIRST_INFERENCE "Build with ITT tracing of first inference time." ON)
|
||||
|
||||
ie_option (ENABLE_DOCS "Build docs using Doxygen" OFF)
|
||||
|
||||
ie_option(ENABLE_TEMPLATE_PLUGIN "Register template plugin into plugins.xml" OFF)
|
||||
|
@ -125,7 +125,7 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DI
|
||||
$<TARGET_PROPERTY:${TARGET_NAME}_transformations,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
$<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
|
||||
target_link_libraries(${TARGET_NAME}_obj PRIVATE ${TARGET_NAME}_reader_api)
|
||||
target_link_libraries(${TARGET_NAME}_obj PRIVATE ${TARGET_NAME}_reader_api openvino::itt)
|
||||
|
||||
set_ie_threading_interface_for(${TARGET_NAME}_obj)
|
||||
if (TBBBIND_2_4_FOUND)
|
||||
|
@ -87,7 +87,7 @@ std::string NetworkCompilationContext::calculateFileInfo(const std::string& file
|
||||
|
||||
std::string NetworkCompilationContext::computeHash(const CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& compileOptions) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "NetworkCompilationContext::computeHash - CNN");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - CNN");
|
||||
OstreamHashWrapper xmlHash;
|
||||
OstreamHashWrapper binHash;
|
||||
std::ostream xml(&xmlHash);
|
||||
@ -163,7 +163,7 @@ std::string NetworkCompilationContext::computeHash(const CNNNetwork& network,
|
||||
|
||||
std::string NetworkCompilationContext::computeHash(const std::string& modelName,
|
||||
const std::map<std::string, std::string>& compileOptions) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "NetworkCompilationContext::computeHash - ModelName");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - ModelName");
|
||||
size_t seed {};
|
||||
try {
|
||||
seed = hash_combine(seed, FileUtils::absoluteFilePath(modelName));
|
||||
|
@ -228,7 +228,7 @@ class Core::Impl : public ICore {
|
||||
const std::string& blobID,
|
||||
const std::string& modelPath = std::string(),
|
||||
bool forceDisableCache = false) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::Impl::LoadNetworkImpl");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::Impl::LoadNetworkImpl");
|
||||
ExecutableNetwork execNetwork;
|
||||
execNetwork = context ? plugin.LoadNetwork(network, context, parsedConfig) :
|
||||
plugin.LoadNetwork(network, parsedConfig);
|
||||
@ -236,7 +236,7 @@ class Core::Impl : public ICore {
|
||||
if (!forceDisableCache && cacheManager && DeviceSupportsImportExport(plugin)) {
|
||||
try {
|
||||
// need to export network for further import from "cache"
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::Export");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::Export");
|
||||
cacheManager->writeCacheEntry(blobID, [&](std::ostream& networkStream) {
|
||||
networkStream << CompiledBlobHeader(GetInferenceEngineVersion()->buildNumber,
|
||||
NetworkCompilationContext::calculateFileInfo(modelPath));
|
||||
@ -263,7 +263,7 @@ class Core::Impl : public ICore {
|
||||
IE_ASSERT(cacheManager != nullptr);
|
||||
try {
|
||||
cacheManager->readCacheEntry(blobId, [&](std::istream &networkStream) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetworkFromCache::ReadStreamAndImport");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetworkFromCache::ReadStreamAndImport");
|
||||
try {
|
||||
CompiledBlobHeader header;
|
||||
networkStream >> header;
|
||||
@ -434,19 +434,19 @@ public:
|
||||
}
|
||||
|
||||
CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath) const override {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::Impl::ReadNetwork from file");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "Core::Impl::ReadNetwork from file");
|
||||
return details::ReadNetwork(modelPath, binPath, extensions);
|
||||
}
|
||||
|
||||
CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights) const override {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::Impl::ReadNetwork from memory");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "Core::Impl::ReadNetwork from memory");
|
||||
return details::ReadNetwork(model, weights, extensions);
|
||||
}
|
||||
|
||||
// TODO: In future this method can be added to ICore interface
|
||||
ExecutableNetwork LoadNetwork(const CNNNetwork& network, const RemoteContext::Ptr& context,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::RemoteContext");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::RemoteContext");
|
||||
if (context == nullptr) {
|
||||
IE_THROW() << "Remote context is null";
|
||||
}
|
||||
@ -470,7 +470,7 @@ public:
|
||||
|
||||
ExecutableNetwork LoadNetwork(const CNNNetwork& network, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) override {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::CNN");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::CNN");
|
||||
bool forceDisableCache = config.count(CONFIG_KEY_INTERNAL(FORCE_DISABLE_CACHE)) > 0;
|
||||
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
|
||||
if (forceDisableCache) {
|
||||
@ -497,7 +497,7 @@ public:
|
||||
// TODO: In future this method can be added to ICore interface
|
||||
ExecutableNetwork LoadNetwork(const std::string& modelPath, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::Path");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::Path");
|
||||
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
|
||||
auto plugin = GetCPPPluginByName(parsed._deviceName);
|
||||
ExecutableNetwork res;
|
||||
@ -634,7 +634,7 @@ public:
|
||||
* @return Reference to a CPP plugin wrapper
|
||||
*/
|
||||
InferencePlugin GetCPPPluginByName(const std::string& deviceName) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::Impl::GetCPPPluginByName");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::Impl::GetCPPPluginByName");
|
||||
|
||||
std::lock_guard<std::mutex> lock(pluginsMutex);
|
||||
|
||||
|
@ -16,6 +16,7 @@ namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(IE);
|
||||
OV_ITT_DOMAIN(IE_LT);
|
||||
OV_ITT_DOMAIN(IE_RT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -151,7 +151,6 @@ void assertIfIRv7LikeModel(std::istream & modelStream) {
|
||||
} // namespace
|
||||
|
||||
CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& binPath, const std::vector<IExtensionPtr>& exts) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE, "details::ReadNetwork");
|
||||
// Register readers if it is needed
|
||||
registerReaders();
|
||||
|
||||
@ -210,11 +209,13 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string&
|
||||
binStream.seekg(0, std::ios::beg);
|
||||
|
||||
Blob::Ptr weights = make_shared_blob<uint8_t>({Precision::U8, { fileSize }, C });
|
||||
weights->allocate();
|
||||
|
||||
binStream.read(weights->buffer(), fileSize);
|
||||
|
||||
binStream.close();
|
||||
{
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "ReadNetworkWeights");
|
||||
weights->allocate();
|
||||
binStream.read(weights->buffer(), fileSize);
|
||||
binStream.close();
|
||||
}
|
||||
|
||||
// read model with weights
|
||||
auto network = reader->read(modelStream, weights, exts);
|
||||
@ -230,7 +231,6 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string&
|
||||
}
|
||||
|
||||
CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weights, const std::vector<IExtensionPtr>& exts) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IE, "details::ReadNetwork");
|
||||
// Register readers if it is needed
|
||||
registerReaders();
|
||||
std::istringstream modelStream(model);
|
||||
|
@ -50,6 +50,8 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE
|
||||
|
||||
target_compile_definitions(${TARGET_NAME}_obj PRIVATE $<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_COMPILE_DEFINITIONS>)
|
||||
|
||||
target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt)
|
||||
|
||||
add_cpplint_target(${TARGET_NAME}_obj_cpplint FOR_TARGETS ${TARGET_NAME}_obj)
|
||||
|
||||
# Create shared library
|
||||
|
@ -15,6 +15,7 @@ namespace InferenceEngine {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(IELegacy);
|
||||
OV_ITT_DOMAIN(IELegacy_LT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -148,7 +148,7 @@ CNNLayerPtr clonelayer(const CNNLayer& source) {
|
||||
}
|
||||
|
||||
CNNNetwork cloneNetwork(const CNNNetwork& network) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::IELegacy, "cloneNetwork");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IELegacy_LT, "cloneNetwork");
|
||||
|
||||
if (network.getFunction()) {
|
||||
return CNNNetwork(std::make_shared<details::CNNNetworkNGraphImpl>(network));
|
||||
|
@ -0,0 +1,27 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief Defines openvino domains for tracing
|
||||
* @file lpt_itt.h
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <openvino/itt.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
|
||||
OV_ITT_DOMAIN(LPT);
|
||||
OV_ITT_DOMAIN(LPT_LT);
|
||||
|
||||
} // namespace domains
|
||||
} // namespace itt
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -21,6 +21,8 @@
|
||||
#include "ngraph/pass/constant_folding.hpp"
|
||||
#include "ngraph/opsets/opset6.hpp"
|
||||
|
||||
#include "lpt_itt.h"
|
||||
|
||||
// branch specific transformations
|
||||
#include "low_precision/concat.hpp"
|
||||
#include "low_precision/concat_multi_channels.hpp"
|
||||
@ -360,6 +362,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
|
||||
return;
|
||||
}
|
||||
|
||||
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::LPT_LT, "LowPrecisionTransformer", "transform");
|
||||
|
||||
ngraph::pass::ConstantFolding constantFolding;
|
||||
constantFolding.run_on_function(network);
|
||||
|
||||
@ -368,12 +372,16 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
|
||||
|
||||
TransformationContext context(network);
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "TypeRelaxedReplacer");
|
||||
|
||||
// Extend necessary operations with polymorphic semantics
|
||||
{
|
||||
TypeRelaxedReplacer pass;
|
||||
pass.run_on_function(network);
|
||||
}
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "BranchSpecificTransformations");
|
||||
|
||||
{
|
||||
// Branch specific transformations
|
||||
GraphRewrite pass;
|
||||
@ -381,6 +389,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
|
||||
pass.run_on_function(network);
|
||||
}
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FakeQuantizeDecomposition");
|
||||
|
||||
{
|
||||
// Step #1: FakeQuantize decomposition transformation execution
|
||||
GraphRewrite pass;
|
||||
@ -388,6 +398,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
|
||||
pass.run_on_function(network);
|
||||
}
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "LayerTransformations");
|
||||
|
||||
{
|
||||
// Step #2: layer transformations execution
|
||||
GraphRewrite pass;
|
||||
@ -395,6 +407,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
|
||||
pass.run_on_function(network);
|
||||
}
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "CleanupTransformations");
|
||||
|
||||
{
|
||||
// Step #3: cleanup transformations execution
|
||||
GraphRewrite pass;
|
||||
@ -402,6 +416,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
|
||||
pass.run_on_function(network);
|
||||
}
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "StandaloneCleanupTransformations");
|
||||
|
||||
{
|
||||
// Step #4: standalone cleanup transformations execution
|
||||
|
||||
|
@ -44,7 +44,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
|
||||
_cfg{cfg},
|
||||
_name{network.getName()},
|
||||
_numaNodesWeights(numaNodesWeights) {
|
||||
OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "MKLDNNExecNetwork", "cloneNet");
|
||||
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "MKLDNNExecNetwork", "cloneNet");
|
||||
|
||||
// we are cloning network if we have statistics and we can transform network.
|
||||
_clonedNetwork = cloneNetwork(network);
|
||||
@ -98,7 +98,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
|
||||
}
|
||||
}
|
||||
|
||||
OV_ITT_TASK_NEXT(taskChain, "createConstInputs");
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "createConstInputs");
|
||||
auto createConstInputTo = [&](CNNLayerPtr layer, Blob::Ptr blob, const std::vector<size_t>& shape, const std::string& name) {
|
||||
LayerParams attrs = {layer->name + "_const_" + name, "Const", blob->getTensorDesc().getPrecision()};
|
||||
auto constLayer = std::make_shared<InferenceEngine::CNNLayer>(attrs);
|
||||
|
@ -97,7 +97,7 @@ template void MKLDNNGraph::ApplyUnrollPasses(CNNNetwork&);
|
||||
template<typename NET>
|
||||
void MKLDNNGraph::CreateGraph(const NET &net, const MKLDNNExtensionManager::Ptr& extMgr,
|
||||
MKLDNNWeightsSharing::Ptr &w_cache) {
|
||||
OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNN_LT, "CreateGraph");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, MKLDNNPlugin::itt::domains::MKLDNN_LT, "CreateGraph");
|
||||
|
||||
if (IsReady())
|
||||
ForgetGraphData();
|
||||
@ -210,6 +210,7 @@ void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNEx
|
||||
}
|
||||
|
||||
void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) {
|
||||
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "MKLDNNGraph::Replicate", "CNNNetwork");
|
||||
InputsDataMap inputs = network.getInputsInfo();
|
||||
|
||||
this->_name = network.getName();
|
||||
@ -234,6 +235,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
|
||||
return -1;
|
||||
};
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "AllNodes");
|
||||
|
||||
// Replicate All Nodes in topological order
|
||||
for (const auto layer : CNNNetSortTopologically(network)) {
|
||||
CNNLayerPtr _layer = layer;
|
||||
@ -271,6 +274,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
|
||||
}
|
||||
}
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "Outputs");
|
||||
|
||||
OutputsDataMap outputs = network.getOutputsInfo();
|
||||
for (const auto &output : outputs) {
|
||||
const auto data = output.second;
|
||||
@ -293,6 +298,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
|
||||
unused_data.erase(data);
|
||||
}
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "AddStubs");
|
||||
|
||||
// Add stub output node for unused data
|
||||
for (auto to_stub_data : unused_data) {
|
||||
auto parent_layer = getCreatorLayer(to_stub_data).lock();
|
||||
@ -309,6 +316,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
|
||||
graphNodes.push_back(node);
|
||||
}
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "Inputs");
|
||||
|
||||
// Replicate input nodes
|
||||
for (const auto& input : inputs) {
|
||||
auto inputLayer = getCreatorLayer(input.second->getInputData()).lock();
|
||||
@ -384,7 +393,7 @@ void MKLDNNGraph::InitGraph() {
|
||||
}
|
||||
|
||||
void MKLDNNGraph::SetOriginalLayerNames() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::SetOriginalLayerNames");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::SetOriginalLayerNames");
|
||||
|
||||
// Do it before cleanup. Because it will lose original layers information
|
||||
for (auto &graphNode : graphNodes) {
|
||||
@ -409,14 +418,14 @@ void MKLDNNGraph::SetOriginalLayerNames() {
|
||||
}
|
||||
|
||||
void MKLDNNGraph::InitNodes() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitNodes");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::InitNodes");
|
||||
for (auto &node : graphNodes) {
|
||||
node->init();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGraph::InitDescriptors() {
|
||||
OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "InitDescriptors", "Prepare");
|
||||
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "InitDescriptors", "Prepare");
|
||||
|
||||
for (auto &node : graphNodes) {
|
||||
if (node->getType() == Input && _meanImages.find(node->getName()) != _meanImages.end()) {
|
||||
@ -424,18 +433,18 @@ void MKLDNNGraph::InitDescriptors() {
|
||||
if (inputNode)
|
||||
inputNode->withMeanImage();
|
||||
}
|
||||
OV_ITT_TASK_NEXT(taskChain, node->profiling.getSupportedDescriptors);
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.getSupportedDescriptors);
|
||||
node->getSupportedDescriptors();
|
||||
|
||||
OV_ITT_TASK_NEXT(taskChain, node->profiling.initSupportedPrimitiveDescriptors);
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.initSupportedPrimitiveDescriptors);
|
||||
node->initSupportedPrimitiveDescriptors();
|
||||
|
||||
OV_ITT_TASK_NEXT(taskChain, node->profiling.filterSupportedPrimitiveDescriptors);
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.filterSupportedPrimitiveDescriptors);
|
||||
node->filterSupportedPrimitiveDescriptors();
|
||||
}
|
||||
|
||||
for (auto &node : graphNodes) {
|
||||
OV_ITT_TASK_NEXT(taskChain, node->profiling.selectOptimalPrimitiveDescriptor);
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.selectOptimalPrimitiveDescriptor);
|
||||
node->selectOptimalPrimitiveDescriptor();
|
||||
}
|
||||
}
|
||||
@ -443,13 +452,13 @@ void MKLDNNGraph::InitDescriptors() {
|
||||
void MKLDNNGraph::InitOptimalPrimitiveDescriptors() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::InitOptimalPrimitiveDescriptors");
|
||||
for (auto &node : graphNodes) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, node->profiling.initOptimalPrimitiveDescriptor);
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, node->profiling.initOptimalPrimitiveDescriptor);
|
||||
node->initOptimalPrimitiveDescriptor();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGraph::ExecuteConstantNodesOnly() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly");
|
||||
mkldnn::stream stream(eng);
|
||||
|
||||
using shared_memory_ptr = MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr;
|
||||
@ -511,7 +520,7 @@ static bool isReorderAvailable(const TensorDesc& parentDesc, const TensorDesc& c
|
||||
}
|
||||
|
||||
void MKLDNNGraph::InitEdges() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges");
|
||||
|
||||
size_t numberOfEdges = graphEdges.size();
|
||||
|
||||
@ -730,7 +739,7 @@ void MKLDNNGraph::AllocateWithReuse() {
|
||||
}
|
||||
|
||||
void MKLDNNGraph::Allocate() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::Allocate");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::Allocate");
|
||||
|
||||
// resolve edges. Define which will be a view on others
|
||||
// NeedAllocation - real blob
|
||||
@ -750,7 +759,7 @@ void MKLDNNGraph::Allocate() {
|
||||
void MKLDNNGraph::CreatePrimitives() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::CreatePrimitives");
|
||||
for (auto& node : graphNodes) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, node->profiling.createPrimitive);
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, node->profiling.createPrimitive);
|
||||
node->createPrimitive();
|
||||
}
|
||||
}
|
||||
@ -888,7 +897,7 @@ void MKLDNNGraph::VisitNode(MKLDNNNodePtr node, std::vector<MKLDNNNodePtr>& sort
|
||||
}
|
||||
|
||||
void MKLDNNGraph::SortTopologically() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::SortTopologically");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::SortTopologically");
|
||||
|
||||
std::vector<MKLDNNNodePtr> unsorted;
|
||||
std::vector<MKLDNNNodePtr> sorted;
|
||||
|
@ -50,88 +50,111 @@ using namespace InferenceEngine;
|
||||
MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {}
|
||||
|
||||
void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations");
|
||||
|
||||
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "ApplyCommonGraphOptimizations", "MergeTwoEqualScaleShifts");
|
||||
MergeTwoEqualScaleShifts(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBroadcastAndEltwise");
|
||||
FuseBroadcastAndEltwise(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseClampAndQuantize");
|
||||
FuseClampAndQuantize(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseScaleShiftAndQuantize");
|
||||
FuseScaleShiftAndQuantize(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
MergeGroupConvolution(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndZeroPoints");
|
||||
FuseConvolutionAndZeroPoints(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise");
|
||||
FuseConvolutionAndDepthwise(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndActivation");
|
||||
FuseConvolutionAndActivation(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise");
|
||||
FuseConvolutionAndDepthwise(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndQuantize");
|
||||
FuseConvolutionAndQuantize(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges");
|
||||
graph.SortTopologically();
|
||||
graph.RemoveDroppedEdges();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise");
|
||||
FuseConvolutionAndDepthwise(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FusePoolingAndQuantize");
|
||||
FusePoolingAndQuantize(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges");
|
||||
graph.SortTopologically();
|
||||
graph.RemoveDroppedEdges();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDWConvolution");
|
||||
FuseConvolutionAndDWConvolution(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBinaryConvolutionAndQuantize");
|
||||
FuseBinaryConvolutionAndQuantize(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBatchNormWithScale");
|
||||
FuseBatchNormWithScale(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveIdentityOperator");
|
||||
RemoveIdentityOperator(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionSumAndConvolutionSumActivation");
|
||||
FuseConvolutionSumAndConvolutionSumActivation(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndSimpleOperation");
|
||||
FuseConvolutionAndSimpleOperation(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseFullyConnectedAndSimpleOperation");
|
||||
FuseFullyConnectedAndSimpleOperation(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMVNAndSimpleOperation");
|
||||
FuseMVNAndSimpleOperation(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseInterpolateAndSimpleOperation");
|
||||
FuseInterpolateAndSimpleOperation(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseNormalizeAndSimpleOperation");
|
||||
FuseNormalizeAndSimpleOperation(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseEltwiseAndSimple");
|
||||
FuseEltwiseAndSimple(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges");
|
||||
graph.RemoveDroppedEdges();
|
||||
}
|
||||
|
||||
void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &graph) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations");
|
||||
|
||||
RemoveIOScaleShifts(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
@ -299,7 +299,7 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
|
||||
|
||||
using namespace ngraph::pass::low_precision;
|
||||
if (useLpt) {
|
||||
OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNN_LT, "LowPrecisionTransformations");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, MKLDNNPlugin::itt::domains::MKLDNN_LT, "LowPrecisionTransformations");
|
||||
|
||||
ngraph::pass::Manager manager;
|
||||
auto lptPrerequisites = manager.register_pass<ngraph::pass::GraphRewrite>();
|
||||
@ -363,11 +363,11 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
|
||||
|
||||
legacyManager.run_passes(nGraphFunc);
|
||||
|
||||
OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork");
|
||||
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork");
|
||||
|
||||
clonedNetwork = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, clonedNetwork, has_fake_quantize));
|
||||
|
||||
OV_ITT_TASK_NEXT(taskChain, "ConvertIOPrecision");
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "ConvertIOPrecision");
|
||||
|
||||
// WA: after conversion to CNNNetwork user precision can redefine input/output precisions
|
||||
// so we need to apply additional precision conversion but only for inputs and outputs
|
||||
@ -423,7 +423,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(icnnnet);
|
||||
if (implNetwork) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "CNNNet_based_ConstFolding");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "CNNNet_based_ConstFolding");
|
||||
// valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
|
||||
ConstTransformer transformator(implNetwork.get());
|
||||
transformator.fullTrim();
|
||||
|
@ -545,7 +545,7 @@ void XmlDeserializer::on_adapter(
|
||||
|
||||
std::shared_ptr<ngraph::Function> XmlDeserializer::parse_function(
|
||||
const pugi::xml_node& root, const Blob::CPtr& weights) {
|
||||
OV_ITT_TASK_CHAIN(taskChain, itt::domains::V10Reader_RT, "V10Parser", "Parse");
|
||||
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::V10Reader_RT, "V10Parser", "Parse");
|
||||
|
||||
struct FunctionNodes {
|
||||
ngraph::ParameterVector parameters;
|
||||
@ -604,7 +604,7 @@ std::shared_ptr<ngraph::Function> XmlDeserializer::parse_function(
|
||||
};
|
||||
std::for_each(outputs.begin(), outputs.end(), dfs);
|
||||
|
||||
OV_ITT_TASK_NEXT(taskChain, "ConstructNgraphNodes");
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "ConstructNgraphNodes");
|
||||
|
||||
FunctionNodes func_nodes;
|
||||
|
||||
@ -665,7 +665,7 @@ std::shared_ptr<ngraph::Function> XmlDeserializer::parse_function(
|
||||
func_nodes.all.emplace_back(node);
|
||||
}
|
||||
|
||||
OV_ITT_TASK_NEXT(taskChain, "ConstructNgraphFunction");
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "ConstructNgraphFunction");
|
||||
|
||||
auto function = std::make_shared<ngraph::Function>(
|
||||
func_nodes.results, func_nodes.sinks, func_nodes.parameters, GetStrAttr(root, "name", ""));
|
||||
@ -876,7 +876,7 @@ std::shared_ptr<ICNNNetwork> V10Parser::parse(
|
||||
XmlDeserializer visitor(root, weights, opsets, variables);
|
||||
visitor.on_attribute("net", function);
|
||||
|
||||
OV_ITT_SCOPED_TASK(itt::domains::V10Reader_RT, "ConstructCNNNetwork");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::V10Reader_RT, "ConstructCNNNetwork");
|
||||
|
||||
CNNNetwork net(function, _exts);
|
||||
parsePreProcess(net, root, weights);
|
||||
|
@ -33,14 +33,19 @@ CNNNetwork IRReader::read(std::istream& model, const std::vector<IExtensionPtr>&
|
||||
return read(model, nullptr, exts);
|
||||
}
|
||||
|
||||
CNNNetwork IRReader::read(std::istream& model, const Blob::CPtr& weights, const std::vector<IExtensionPtr>& exts) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::V10Reader, "IRReader::read");
|
||||
|
||||
pugi::xml_document xmlDoc;
|
||||
static void loadXml(pugi::xml_document &xmlDoc, std::istream& model) {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::V10Reader_RT, "loadXml");
|
||||
pugi::xml_parse_result res = xmlDoc.load(model);
|
||||
if (res.status != pugi::status_ok) {
|
||||
IE_THROW() << res.description() << "at offset " << res.offset;
|
||||
}
|
||||
}
|
||||
|
||||
CNNNetwork IRReader::read(std::istream& model, const Blob::CPtr& weights, const std::vector<IExtensionPtr>& exts) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::V10Reader, "IRReader::read");
|
||||
|
||||
pugi::xml_document xmlDoc;
|
||||
loadXml(xmlDoc, model);
|
||||
pugi::xml_node root = xmlDoc.document_element();
|
||||
|
||||
auto version = details::GetIRVersion(root);
|
||||
|
@ -30,6 +30,7 @@ namespace ngraph
|
||||
gen.vpmovzxbd(i32vec, u8vec);
|
||||
gen.vcvtdq2ps(fvec, i32vec);
|
||||
gen.vcvtps2ph(f16vec, fvec, 0);
|
||||
gen.vzeroupper();
|
||||
gen.movdqu(gen.xword[dst], f16vec);
|
||||
}
|
||||
|
||||
|
@ -88,8 +88,8 @@ Function::Function(const OutputVector& results,
|
||||
|
||||
void Function::check_all_parameters_registered() const
|
||||
{
|
||||
OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraphPass_LT,
|
||||
"Function::check_all_parameters_registered");
|
||||
OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraph, "Function::check_all_parameters_registered");
|
||||
|
||||
std::stringstream unregistered_parameters;
|
||||
for (auto& node : get_ordered_ops())
|
||||
{
|
||||
@ -104,8 +104,7 @@ void Function::check_all_parameters_registered() const
|
||||
|
||||
void Function::validate_nodes_and_infer_types() const
|
||||
{
|
||||
OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraphPass_LT,
|
||||
"Function::validate_nodes_and_infer_types");
|
||||
OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraph, "Function::validate_nodes_and_infer_types");
|
||||
|
||||
struct Counter
|
||||
{
|
||||
|
@ -62,14 +62,14 @@ namespace ngraph
|
||||
{
|
||||
namespace pass
|
||||
{
|
||||
namespace
|
||||
namespace internal
|
||||
{
|
||||
PerfCounters& perf_counters_graph_rewrite()
|
||||
{
|
||||
static PerfCounters counters;
|
||||
return counters;
|
||||
}
|
||||
} // namespace
|
||||
} // namespace internal
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
@ -428,7 +428,8 @@ void ngraph::pass::MatcherPass::register_matcher(const std::shared_ptr<ngraph::p
|
||||
|
||||
bool ngraph::pass::MatcherPass::apply(std::shared_ptr<ngraph::Node> node)
|
||||
{
|
||||
OV_ITT_SCOPED_TASK(itt::domains::nGraph, pass::perf_counters_graph_rewrite()[get_type_info()]);
|
||||
OV_ITT_SCOPED_TASK(itt::domains::nGraph,
|
||||
pass::internal::perf_counters_graph_rewrite()[get_type_info()]);
|
||||
m_new_nodes.clear();
|
||||
if (m_handler)
|
||||
return m_handler(node);
|
||||
|
@ -29,14 +29,14 @@ namespace ngraph
|
||||
{
|
||||
namespace pass
|
||||
{
|
||||
namespace
|
||||
namespace internal
|
||||
{
|
||||
PerfCounters& perf_counters_manager()
|
||||
PerfCounters& perf_counters()
|
||||
{
|
||||
static PerfCounters counters;
|
||||
return counters;
|
||||
}
|
||||
} // namespace
|
||||
} // namespace internal
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
@ -72,8 +72,9 @@ void pass::Manager::run_passes(shared_ptr<Function> func)
|
||||
continue;
|
||||
}
|
||||
|
||||
OV_ITT_SCOPED_TASK(itt::domains::nGraphPass_LT,
|
||||
pass::perf_counters_manager()[pass->get_type_info()]);
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE,
|
||||
itt::domains::nGraphPass_LT,
|
||||
pass::internal::perf_counters()[pass->get_type_info()]);
|
||||
|
||||
pass_timer.start();
|
||||
|
||||
|
@ -187,25 +187,10 @@ bool match(char const *region, Ctx && ctx, T && val, Case && cs, Cases&&... case
|
||||
|
||||
#define OV_CC_DOMAINS(Module)
|
||||
|
||||
// Placeholder for first macro argument
|
||||
#define OV_CC_SCOPE_ARG_PLACEHOLDER_1 0,
|
||||
|
||||
// This macro returns second argument, first argument is ignored
|
||||
#define OV_CC_SCOPE_SECOND_ARG(...) OV_PP_EXPAND(OV_CC_SCOPE_SECOND_ARG_(__VA_ARGS__, 0))
|
||||
#define OV_CC_SCOPE_SECOND_ARG_(...) OV_PP_EXPAND(OV_CC_SCOPE_SECOND_ARG_GET(__VA_ARGS__))
|
||||
#define OV_CC_SCOPE_SECOND_ARG_GET(ignored, val, ...) val
|
||||
|
||||
// Return macro argument value
|
||||
#define OV_CC_SCOPE_IS_ENABLED(x) OV_CC_SCOPE_IS_ENABLED1(x)
|
||||
|
||||
// Generate junk macro or {0, } sequence if val is 1
|
||||
#define OV_CC_SCOPE_IS_ENABLED1(val) OV_CC_SCOPE_IS_ENABLED2(OV_PP_CAT(OV_CC_SCOPE_ARG_PLACEHOLDER_, val))
|
||||
|
||||
// Return second argument from possible sequences {1, 0}, {0, 1, 0}
|
||||
#define OV_CC_SCOPE_IS_ENABLED2(arg1_or_junk) OV_CC_SCOPE_SECOND_ARG(arg1_or_junk 1, 0)
|
||||
#define OV_CC_SCOPE_IS_ENABLED OV_PP_IS_ENABLED
|
||||
|
||||
#define OV_SCOPE(Module, region) \
|
||||
for (bool ovCCScopeIsEnabled = OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(Module, _, region)); ovCCScopeIsEnabled; ovCCScopeIsEnabled = false)
|
||||
for (bool ovCCScopeIsEnabled = OV_PP_IS_ENABLED(OV_PP_CAT3(Module, _, region)); ovCCScopeIsEnabled; ovCCScopeIsEnabled = false)
|
||||
|
||||
// Switch is disabled
|
||||
#define OV_CC_SWITCH_0(Module, fn, ctx, val)
|
||||
@ -214,7 +199,7 @@ bool match(char const *region, Ctx && ctx, T && val, Case && cs, Cases&&... case
|
||||
#define OV_CC_SWITCH_1(Module, fn, ctx, val) openvino::cc::internal::match<fn>(ctx, val, OV_PP_CAT4(Module, _, fn, _cases));
|
||||
|
||||
#define OV_SWITCH(Module, fn, ctx, val, ...) \
|
||||
OV_PP_EXPAND(OV_PP_CAT(OV_CC_SWITCH_, OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(Module, _, fn)))(Module, fn, ctx, val))
|
||||
OV_PP_EXPAND(OV_PP_CAT(OV_CC_SWITCH_, OV_PP_IS_ENABLED(OV_PP_CAT3(Module, _, fn)))(Module, fn, ctx, val))
|
||||
|
||||
#define OV_CASE(Case, Type) openvino::cc::internal::make_case_wrapper<Type>(Case)
|
||||
|
||||
|
@ -14,6 +14,16 @@ target_link_libraries(${TARGET_NAME} PUBLIC openvino::pp)
|
||||
|
||||
if(TARGET ittnotify)
|
||||
target_link_libraries(${TARGET_NAME} PUBLIC ittnotify)
|
||||
if(ENABLE_PROFILING_FILTER STREQUAL "ALL")
|
||||
target_compile_definitions(${TARGET_NAME} PUBLIC
|
||||
ENABLE_PROFILING_ALL
|
||||
ENABLE_PROFILING_FIRST_INFERENCE)
|
||||
elseif(ENABLE_PROFILING_FILTER STREQUAL "FIRST_INFERENCE")
|
||||
target_compile_definitions(${TARGET_NAME} PUBLIC
|
||||
ENABLE_PROFILING_FIRST_INFERENCE)
|
||||
else()
|
||||
message(FATAL_ERROR "The ${ENABLE_PROFILING_FILTER} profiling filter isn't supported")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
|
@ -214,6 +214,8 @@ namespace openvino
|
||||
*/
|
||||
#define OV_ITT_DOMAIN(...) OV_PP_OVERLOAD(OV_ITT_DOMAIN, __VA_ARGS__)
|
||||
|
||||
#define OV_ITT_GROUP(group) OV_PP_CAT(ENABLE_PROFILING_, group)
|
||||
|
||||
/**
|
||||
* @cond
|
||||
*/
|
||||
@ -232,6 +234,37 @@ inline openvino::itt::domain_t domainName() noexcept
|
||||
return d; \
|
||||
}
|
||||
|
||||
/**
|
||||
* @endcond
|
||||
*/
|
||||
|
||||
/**
|
||||
* @def OV_ITT_SCOPE(domain, handleOrTaskName)
|
||||
* @ingroup ie_dev_profiling
|
||||
* @brief Annotate section of code till scope exit to be profiled using known @p handle or @p taskName as section id.
|
||||
* @details In case if handle or taskName absent, the current function name is used.
|
||||
* @param group [in] ITT counter group name used for enabling/disabling at compile time.
|
||||
* @param domainName [in] Known at compile time name of module or library (the domain name).
|
||||
* @param handleOrTaskName [in] The annotation name or handle for section of code. Parameter is optional.
|
||||
*/
|
||||
#define OV_ITT_SCOPE(group, ...) \
|
||||
OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_IMPL_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(__VA_ARGS__))
|
||||
|
||||
/**
|
||||
* @cond
|
||||
*/
|
||||
|
||||
#define OV_ITT_SCOPE_IMPL_0(...)
|
||||
#define OV_ITT_SCOPE_IMPL_1(...) OV_PP_OVERLOAD(OV_ITT_SCOPE, __VA_ARGS__)
|
||||
|
||||
#define OV_ITT_SCOPE_1(domain) \
|
||||
openvino::itt::ScopedTask<domain> OV_PP_CAT(ittScopedTask, __LINE__) \
|
||||
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(ITT_FUNCTION_NAME));
|
||||
|
||||
#define OV_ITT_SCOPE_2(domain, taskOrTaskName) \
|
||||
openvino::itt::ScopedTask<domain> OV_PP_CAT(ittScopedTask, __LINE__) \
|
||||
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(taskOrTaskName));
|
||||
|
||||
/**
|
||||
* @endcond
|
||||
*/
|
||||
@ -244,19 +277,97 @@ inline openvino::itt::domain_t domainName() noexcept
|
||||
* @param domainName [in] Known at compile time name of module or library (the domain name).
|
||||
* @param handleOrTaskName [in] The annotation name or handle for section of code. Parameter is optional.
|
||||
*/
|
||||
#define OV_ITT_SCOPED_TASK(...) OV_PP_OVERLOAD(OV_ITT_SCOPED_TASK, __VA_ARGS__)
|
||||
#define OV_ITT_SCOPED_TASK(...) OV_ITT_SCOPE(ALL, __VA_ARGS__)
|
||||
|
||||
/**
|
||||
* @def OV_ITT_TASK_CHAIN(chainId, domain, prefix, taskName)
|
||||
* @ingroup ie_dev_profiling
|
||||
* @brief Begins the sequrence of an annotated sections of code using @p prefix and @p taskName as section id.
|
||||
* @details In case if prefix absent, the current function name is used,
|
||||
* if taskName absent, the first chain index is used, i.e 1.
|
||||
* @param group [in] ITT counter group name used for enabling/disabling at compile time.
|
||||
* @param chainId [in] The tasks chain identifier.
|
||||
* @param domainName [in] Known at compile time name of module or library (the domain name).
|
||||
* @param prefix [in] The task chain name prefix. The task name starts with this prefix. Parameter is optional.
|
||||
* @param taskName [in] The annotation name for section of code. Parameter is optional.
|
||||
*/
|
||||
#define OV_ITT_SCOPE_CHAIN(group, ...) \
|
||||
OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_CHAIN_IMPL_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(__VA_ARGS__))
|
||||
|
||||
/**
|
||||
* @cond
|
||||
*/
|
||||
|
||||
#define OV_ITT_SCOPED_TASK_1(domain) \
|
||||
openvino::itt::ScopedTask<domain> OV_PP_CAT(ittScopedTask, __LINE__) \
|
||||
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(ITT_FUNCTION_NAME));
|
||||
#define OV_ITT_SCOPE_CHAIN_IMPL_0(...)
|
||||
#define OV_ITT_SCOPE_CHAIN_IMPL_1(...) OV_PP_OVERLOAD(OV_ITT_SCOPE_CHAIN, __VA_ARGS__)
|
||||
|
||||
#define OV_ITT_SCOPED_TASK_2(domain, taskOrTaskName) \
|
||||
openvino::itt::ScopedTask<domain> OV_PP_CAT(ittScopedTask, __LINE__) \
|
||||
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(taskOrTaskName));
|
||||
#define OV_ITT_SCOPE_CHAIN_2(chainId, domain) \
|
||||
openvino::itt::TaskChain<domain> chainId \
|
||||
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)> \
|
||||
(std::string(ITT_FUNCTION_NAME) + "_1"), \
|
||||
ITT_FUNCTION_NAME);
|
||||
|
||||
#define OV_ITT_SCOPE_CHAIN_3(chainId, domain, prefix) \
|
||||
openvino::itt::TaskChain<domain> chainId \
|
||||
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)> \
|
||||
(std::string(prefix) + "_1"), \
|
||||
prefix);
|
||||
|
||||
#define OV_ITT_SCOPE_CHAIN_4(chainId, domain, prefix, taskName) \
|
||||
openvino::itt::TaskChain<domain> chainId \
|
||||
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)> \
|
||||
(std::string(prefix) + "_" + taskName), \
|
||||
prefix);
|
||||
|
||||
/**
|
||||
* @endcond
|
||||
*/
|
||||
|
||||
/**
|
||||
* @def OV_ITT_SCOPE_NEXT(group, chainId, taskName)
|
||||
* @ingroup ie_dev_profiling
|
||||
* @brief Inserts new annotated section of code to tasks chain using @p taskName as section id.
|
||||
* @details If taskName is missing, the current chain index is used.
|
||||
* @param group [in] ITT counter group name used for enabling/disabling at compile time.
|
||||
* @param chainId [in] The tasks chain identifier.
|
||||
* @param taskOrTaskName [in] The annotation name or handle for section of code. Parameter is optional.
|
||||
*/
|
||||
#define OV_ITT_SCOPE_NEXT(group, ...) \
|
||||
OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_NEXT_IMPL_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(__VA_ARGS__))
|
||||
|
||||
/**
|
||||
* @cond
|
||||
*/
|
||||
|
||||
#define OV_ITT_SCOPE_NEXT_IMPL_0(...)
|
||||
#define OV_ITT_SCOPE_NEXT_IMPL_1(...) OV_PP_OVERLOAD(OV_ITT_SCOPE_NEXT, __VA_ARGS__)
|
||||
|
||||
#define OV_ITT_SCOPE_NEXT_1(chainId) \
|
||||
chainId.next(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(chainId.taskName()));
|
||||
|
||||
#define OV_ITT_SCOPE_NEXT_2(chainId, taskOrTaskName) \
|
||||
chainId.next(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(chainId.taskNameOrHandle(taskOrTaskName)));
|
||||
|
||||
/**
|
||||
* @endcond
|
||||
*/
|
||||
|
||||
/**
|
||||
* @def OV_ITT_SCOPE_SKIP(group, chainId)
|
||||
* @ingroup ie_dev_profiling
|
||||
* @brief Skips the remaining task scope.
|
||||
* @param group [in] ITT counter group name used for enabling/disabling at compile time.
|
||||
* @param chainId [in] The tasks chain identifier.
|
||||
*/
|
||||
#define OV_ITT_SCOPE_SKIP(group, chainId) \
|
||||
OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_SKIP_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(chainId))
|
||||
|
||||
/**
|
||||
* @cond
|
||||
*/
|
||||
|
||||
#define OV_ITT_SCOPE_SKIP_0(chainId)
|
||||
#define OV_ITT_SCOPE_SKIP_1(chainId) chainId.skip();
|
||||
|
||||
/**
|
||||
* @endcond
|
||||
@ -273,33 +384,7 @@ inline openvino::itt::domain_t domainName() noexcept
|
||||
* @param prefix [in] The task chain name prefix. The task name starts with this prefix. Parameter is optional.
|
||||
* @param taskName [in] The annotation name for section of code. Parameter is optional.
|
||||
*/
|
||||
#define OV_ITT_TASK_CHAIN(...) OV_PP_OVERLOAD(OV_ITT_TASK_CHAIN, __VA_ARGS__)
|
||||
|
||||
/**
|
||||
* @cond
|
||||
*/
|
||||
|
||||
#define OV_ITT_TASK_CHAIN_2(chainId, domain) \
|
||||
openvino::itt::TaskChain<domain> chainId \
|
||||
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)> \
|
||||
(std::string(ITT_FUNCTION_NAME) + "_1"), \
|
||||
ITT_FUNCTION_NAME);
|
||||
|
||||
#define OV_ITT_TASK_CHAIN_3(chainId, domain, prefix) \
|
||||
openvino::itt::TaskChain<domain> chainId \
|
||||
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)> \
|
||||
(std::string(prefix) + "_1"), \
|
||||
prefix);
|
||||
|
||||
#define OV_ITT_TASK_CHAIN_4(chainId, domain, prefix, taskName) \
|
||||
openvino::itt::TaskChain<domain> chainId \
|
||||
(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)> \
|
||||
(std::string(prefix) + "_" + taskName), \
|
||||
prefix);
|
||||
|
||||
/**
|
||||
* @endcond
|
||||
*/
|
||||
#define OV_ITT_TASK_CHAIN(...) OV_ITT_SCOPE_CHAIN(ALL, __VA_ARGS__)
|
||||
|
||||
/**
|
||||
* @def OV_ITT_TASK_NEXT(chainId, taskName)
|
||||
@ -309,21 +394,7 @@ inline openvino::itt::domain_t domainName() noexcept
|
||||
* @param chainId [in] The tasks chain identifier.
|
||||
* @param taskOrTaskName [in] The annotation name or handle for section of code. Parameter is optional.
|
||||
*/
|
||||
#define OV_ITT_TASK_NEXT(...) OV_PP_OVERLOAD(OV_ITT_TASK_NEXT, __VA_ARGS__)
|
||||
|
||||
/**
|
||||
* @cond
|
||||
*/
|
||||
|
||||
#define OV_ITT_TASK_NEXT_1(chainId) \
|
||||
chainId.next(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(chainId.taskName()));
|
||||
|
||||
#define OV_ITT_TASK_NEXT_2(chainId, taskOrTaskName) \
|
||||
chainId.next(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(chainId.taskNameOrHandle(taskOrTaskName)));
|
||||
|
||||
/**
|
||||
* @endcond
|
||||
*/
|
||||
#define OV_ITT_TASK_NEXT(...) OV_ITT_SCOPE_NEXT(ALL, __VA_ARGS__)
|
||||
|
||||
/**
|
||||
* @def OV_ITT_TASK_SKIP(chainId)
|
||||
@ -331,7 +402,7 @@ inline openvino::itt::domain_t domainName() noexcept
|
||||
* @brief Skips the remaining task scope.
|
||||
* @param chainId [in] The tasks chain identifier.
|
||||
*/
|
||||
#define OV_ITT_TASK_SKIP(chainId) chainId.skip();
|
||||
#define OV_ITT_TASK_SKIP(chainId) OV_ITT_SCOPE_SKIP(ALL, chainId);
|
||||
|
||||
} // namespace itt
|
||||
} // namespace openvino
|
||||
|
@ -40,7 +40,7 @@ void taskBegin(domain_t d, handle_t t) {
|
||||
}
|
||||
|
||||
void taskEnd(domain_t d) {
|
||||
if (!callStackDepth() || call_stack_depth-- > 0)
|
||||
if (!callStackDepth() || --call_stack_depth < callStackDepth())
|
||||
__itt_task_end(reinterpret_cast<__itt_domain*>(d));
|
||||
}
|
||||
|
||||
|
@ -30,3 +30,20 @@
|
||||
#define OV_PP_CAT4(x, y, z, w) OV_PP_CAT4_(x, y, z, w)
|
||||
|
||||
#define OV_PP_OVERLOAD(NAME, ...) OV_PP_EXPAND( OV_PP_CAT3(NAME, _, OV_PP_EXPAND( OV_PP_NARG(OV_PP_NO_ARGS __VA_ARGS__ (NAME)) ))(__VA_ARGS__) )
|
||||
|
||||
// Placeholder for first macro argument
|
||||
#define OV_PP_ARG_PLACEHOLDER_1 0,
|
||||
|
||||
// This macro returns second argument, first argument is ignored
|
||||
#define OV_PP_SECOND_ARG(...) OV_PP_EXPAND(OV_PP_SECOND_ARG_(__VA_ARGS__, 0))
|
||||
#define OV_PP_SECOND_ARG_(...) OV_PP_EXPAND(OV_PP_SECOND_ARG_GET(__VA_ARGS__))
|
||||
#define OV_PP_SECOND_ARG_GET(ignored, val, ...) val
|
||||
|
||||
// Return macro argument value
|
||||
#define OV_PP_IS_ENABLED(x) OV_PP_IS_ENABLED1(x)
|
||||
|
||||
// Generate junk macro or {0, } sequence if val is 1
|
||||
#define OV_PP_IS_ENABLED1(val) OV_PP_IS_ENABLED2(OV_PP_CAT(OV_PP_ARG_PLACEHOLDER_, val))
|
||||
|
||||
// Return second argument from possible sequences {1, 0}, {0, 1, 0}
|
||||
#define OV_PP_IS_ENABLED2(arg1_or_junk) OV_PP_SECOND_ARG(arg1_or_junk 1, 0)
|
||||
|
Loading…
Reference in New Issue
Block a user