[IE CLDNN] Added ITT counters in the plugin (#3719)

This commit is contained in:
Elizaveta Gerashchenko 2021-01-14 09:56:18 +03:00 committed by GitHub
parent 036259481d
commit d2303262a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 88 additions and 25 deletions

View File

@ -11,6 +11,7 @@
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
#include "ie_api.h"
#include "file_utils.h"
#include "cldnn_itt.h"
#ifdef _WIN32
# include <direct.h>
@ -40,6 +41,7 @@ static void createDirectory(std::string _path) {
}
void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Config::UpdateFromMap");
for (auto& kvp : configMap) {
std::string key = kvp.first;
std::string val = kvp.second;
@ -228,6 +230,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
}
void Config::adjustKeyMapValues() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Config::adjustKeyMapValues");
if (useProfiling)
key_config_map[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES;
else

View File

@ -15,6 +15,7 @@
#endif
#include "simple_math.h"
#include "cldnn_itt.h"
using namespace InferenceEngine;
using namespace XMLParseUtils;
@ -224,6 +225,7 @@ cldnn::format CLDNNCustomLayer::FormatFromString(const std::string & str) {
}
void CLDNNCustomLayer::LoadFromFile(const std::string configFile, CLDNNCustomLayerMap& customLayers, bool can_be_missed) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNCustomLayer::LoadFromFile");
pugi::xml_document xmlDoc;
pugi::xml_parse_result res = xmlDoc.load_file(configFile.c_str());
if (res.status != pugi::status_ok) {

View File

@ -1,4 +1,4 @@
// Copyright (C) 2018-2020 Intel Corporation
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
@ -64,6 +64,7 @@
#include "cldnn_engine.h"
#include "cldnn_executable_network.h"
#include "cldnn_custom_layer.h"
#include "cldnn_itt.h"
#ifdef __linux__
# include <dlfcn.h>
@ -122,9 +123,11 @@ static bool disableReduceDecomposition(const std::shared_ptr<const ngraph::Node>
InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,
const CLDNNPlugin::Config& config) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::CloneAndTransformNetwork");
CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network);
if (clonedNetwork.getFunction()) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork");
auto nGraphFunc = clonedNetwork.getFunction();
// Disable shape inference (WA for generic operations)
ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
@ -268,6 +271,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
bool enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc);
if (enableInt8) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork::LPT");
using namespace ngraph::pass::low_precision;
ngraph::pass::Manager conversion_manager;
// [WA part1] Convert quantized FP16 model to FP32 to avoid possible overflow and mixed precision errors
@ -284,6 +288,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
}
{
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork::RunPasses");
ngraph::pass::Manager manager;
// This ConstantFolding pass is added to fold reshapes added for constant inputs on NMS internal operation which prevents upper-bound calculation
// TODO: check why we have these reshapes
@ -349,6 +354,7 @@ auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) {
};
void clDNNEngine::UpdateConfig(CLDNNPlugin::Config& conf, const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &params) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::UpdateConfig");
auto device_info = GetDeviceInfo(params);
conf.enableInt8 = device_info.supports_imad || device_info.supports_immad;
conf.UpdateFromMap(params);
@ -359,6 +365,7 @@ void clDNNEngine::UpdateConfig(CLDNNPlugin::Config& conf, const InferenceEngine:
ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
const std::map<std::string, std::string> &config) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl");
// verification of supported input
InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
check_inputs(_networkInputs);
@ -389,6 +396,7 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn
};
{
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl::CreateContext");
std::lock_guard<std::mutex> lock(engine_mutex);
if (!canReuseDefaultContext()) {
m_defaultContext.reset(new CLDNNRemoteCLContext(shared_from_this(), ParamMap(), conf));
@ -398,7 +406,10 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn
context = m_defaultContext;
auto transformedNetwork = CloneAndTransformNetwork(network, conf);
return std::make_shared<CLDNNExecNetwork>(transformedNetwork, context, conf);
{
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl::CreateExeNetwork");
return std::make_shared<CLDNNExecNetwork>(transformedNetwork, context, conf);
}
}
ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
@ -451,6 +462,7 @@ void clDNNEngine::SetConfig(const std::map<std::string, std::string> &config) {
QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
const std::map<std::string, std::string>& config) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::QueryNetwork");
QueryNetworkResult res;
CLDNNPlugin::Config conf = _impl->m_config;
UpdateConfig(conf, network, config);
@ -664,6 +676,7 @@ QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
}
Parameter clDNNEngine::GetConfig(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::GetConfig");
Parameter result;
auto option = _impl->m_config.key_config_map.find(name);
if (option != _impl->m_config.key_config_map.end()) {
@ -692,6 +705,7 @@ auto StringRightTrim = [](std::string string, std::string substring, bool case_s
};
Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& options) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::GetMetric");
auto device_id = GetConfig(CONFIG_KEY(DEVICE_ID), {});
if (options.find(CONFIG_KEY(DEVICE_ID)) != options.end())
device_id = options.at(CONFIG_KEY(DEVICE_ID)).as<std::string>();

View File

@ -13,6 +13,7 @@
#include <cmath>
#include <algorithm>
#include "cldnn_graph.h"
#include "cldnn_itt.h"
#include <description_buffer.hpp>
#include <cldnn/cldnn_config.hpp>
@ -63,6 +64,7 @@ CLDNNExecNetwork::CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, RemoteC
InferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(InputsDataMap networkInputs,
OutputsDataMap networkOutputs) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::CreateInferRequestImpl");
if (m_graphs.empty()) {
THROW_IE_EXCEPTION << NETWORK_NOT_LOADED_str;
}
@ -90,6 +92,7 @@ InferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(InputsDataMap
}
IInferRequest::Ptr CLDNNExecNetwork::CreateInferRequest() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::CreateInferRequest");
return CreateAsyncInferRequestFromSync<CLDNNAsyncInferRequest>();
}
@ -110,6 +113,7 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetConfig(const std::string &name)
}
InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::GetMetric");
if (name == METRIC_KEY(NETWORK_NAME)) {
IE_ASSERT(!m_graphs.empty());
IE_SET_METRIC_RETURN(NETWORK_NAME, m_graphs[0]->getName());

View File

@ -27,6 +27,7 @@
#include <ie_ngraph_utils.hpp>
#include "generic_ie.hpp"
#include <ngraph/variant.hpp>
#include "cldnn_itt.h"
using namespace InferenceEngine;
using namespace InferenceEngine::details;
@ -52,6 +53,7 @@ CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
}
void CLDNNGraph::UpdateLayersMaps() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdateLayersMaps");
primitiveIDs = m_program->primitiveIDs;
primitivesToIRLayersMap = m_program->primitivesToIRLayersMap;
IRToNgraphLayersMap = m_program->IRToNgraphLayersMap;
@ -62,6 +64,7 @@ void CLDNNGraph::UpdateLayersMaps() {
}
void CLDNNGraph::Build() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::Build");
UpdateLayersMaps();
if (GetMaxDynamicBatchSize() > 1) {
@ -81,6 +84,7 @@ void CLDNNGraph::Build() {
}
std::shared_ptr<cldnn::network> CLDNNGraph::BuildNetwork(std::shared_ptr<cldnn::program> program) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::BuildNetwork");
auto network = std::make_shared<cldnn::network>(*program, m_stream_id);
if (!m_config.graph_dumps_dir.empty() && m_stream_id == 0) {
@ -101,6 +105,7 @@ std::shared_ptr<cldnn::network> CLDNNGraph::BuildNetwork(std::shared_ptr<cldnn::
InferenceEngine::CNNNetwork CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& primitives_info,
bool filter_const_primitives) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::GetExecGraphInfoByPrimitivesInfo");
if (m_config.useProfiling) {
try {
// Update may throw an exception for step-by-step runtime graph dump,
@ -474,6 +479,7 @@ InferenceEngine::CNNNetwork CLDNNGraph::GetExecGraphInfo() {
void CLDNNGraph::UpdatePerfStatistics() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdatePerfStatistics");
if (GetNetworksCount() == 0) {
return;
}
@ -545,6 +551,7 @@ bool CLDNNGraph::IsLoaded() const {
}
void CLDNNGraph::UpdateImplementationsMap() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdateImplementationsMap");
if (m_config.useProfiling) {
auto extractImplementationFromInfo = [](const std::string& info) -> std::string {
std::string def_implementation = "undef";
@ -587,6 +594,7 @@ void CLDNNGraph::UpdateImplementationsMap() {
}
void CLDNNGraph::GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &result) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::GetPerformanceCounts");
bool combinePrimByIRLayers = false;
unsigned i = 0;
auto allIds = GetNetwork()->get_all_primitive_org_ids();

View File

@ -23,6 +23,7 @@ const char cannot_set_compound[] = "cannot set compound blob: supported only for
const char wrong_nv12_blob[] = "NV12 input blob is expected for input with NV12 color format";
Blob::Ptr CLDNNInferRequest::createInputBlob(const TensorDesc& desc, uint8_t* mem_ptr) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::createInputBlob");
const Precision p = desc.getPrecision();
switch (p) {
@ -77,6 +78,7 @@ Blob::Ptr CLDNNInferRequest::createInputBlob(const TensorDesc& desc, uint8_t* me
}
Blob::Ptr CLDNNInferRequest::createOutputBlob(const TensorDesc& desc, uint8_t* mem_ptr) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::createOutputBlob");
const Precision p = desc.getPrecision();
switch (p) {
@ -106,6 +108,7 @@ Blob::Ptr CLDNNInferRequest::createOutputBlob(const TensorDesc& desc, uint8_t* m
}
void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory& inputMem) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_attach");
auto impl = getContextImpl(m_graph->GetContext());
impl->acquire_lock();
@ -120,6 +123,7 @@ void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory& in
}
void CLDNNInferRequest::input_alloc(cldnn::primitive_id name, const cldnn::layout& layout) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_alloc");
cldnn::memory input_mem = cldnn::memory::allocate(*(m_graph->GetEngine()), layout);
input_attach(name, input_mem);
}
@ -127,6 +131,7 @@ void CLDNNInferRequest::input_alloc(cldnn::primitive_id name, const cldnn::layou
void CLDNNInferRequest::copyOutputData(const cldnn::memory& outputMemory,
Blob::Ptr bptr,
buf_info* bi) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyOutputData");
size_t n = (bi == nullptr) ? bptr->size() : bi->buf_size;
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
@ -270,6 +275,7 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr<cldnn::network> network,
const cldnn::primitive_id &inputName,
const cldnn::layout& inputLayout,
const Blob &inputBlob, buf_info* bi) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyInputData");
size_t n = (bi == nullptr) ? inputBlob.size() : bi->buf_size;
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
@ -386,6 +392,7 @@ void checkOutputBlob(const Blob::Ptr &blob,
}
void CLDNNInferRequest::checkBlobs() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::checkBlobs");
for (auto const &input : _inputs) {
InputInfo::Ptr foundInput = nullptr;
auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs),
@ -415,7 +422,7 @@ void CLDNNInferRequest::checkBlobs() {
}
void CLDNNInferRequest::GetBlob(const char *name, Blob::Ptr &data) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "GetBlob");
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::GetBlob");
InputInfo::Ptr foundInput;
DataPtr foundOutput;
bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput);
@ -436,7 +443,7 @@ void CLDNNInferRequest::GetBlob(const char *name, Blob::Ptr &data) {
}
void CLDNNInferRequest::SetBlob(const char *name, const Blob::Ptr &data) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "SetBlob");
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetBlob");
// perform all common checks first
if (name == nullptr) {
@ -562,6 +569,7 @@ void CLDNNInferRequest::SetBlob(const char *name, const Blob::Ptr &data) {
}
void CLDNNInferRequest::AllocateInputs() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateInputs");
auto inputLayouts = m_graph->GetInputLayouts();
// allocate inputs
for (auto& ni : _networkInputs) {
@ -611,6 +619,7 @@ void CLDNNInferRequest::AllocateInputs() {
}
void CLDNNInferRequest::AllocateInputsDyn() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateInputsDyn");
// allocate inputs
for (auto &input : m_graph->GetInputLayouts()) {
InputInfo::Ptr ni = _networkInputs.at(input.first);
@ -636,6 +645,7 @@ void CLDNNInferRequest::AllocateInputsDyn() {
}
void CLDNNInferRequest::AllocateOutputs() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateOutputs");
// allocate outputs
bool can_reuse_internal_mem = !m_useStreams;
for (auto& no : _networkOutputs) {
@ -661,6 +671,7 @@ void CLDNNInferRequest::AllocateOutputs() {
}
void CLDNNInferRequest::AllocateOutputsDyn() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateOutputsDyn");
// allocate outputs
for (auto& no : _networkOutputs) {
DataPtr oi = no.second;
@ -680,6 +691,7 @@ void CLDNNInferRequest::AllocateOutputsDyn() {
}
void CLDNNInferRequest::SetGraph(std::shared_ptr<CLDNNPlugin::CLDNNGraph> graph) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetGraph");
m_graph = graph;
if (m_graph == nullptr) {
@ -697,6 +709,7 @@ void CLDNNInferRequest::SetGraph(std::shared_ptr<CLDNNPlugin::CLDNNGraph> graph)
}
void CLDNNInferRequest::SetBatch(int new_batch) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetBatch");
if (m_graph->GetMaxDynamicBatchSize() < 0)
THROW_IE_EXCEPTION << "Dynamic batch is not enabled.";
@ -774,6 +787,7 @@ CLDNNInferRequest::CLDNNInferRequest(InputsDataMap networkInputs, OutputsDataMap
}
void CLDNNInferRequest::execAndParse() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParse");
auto networkOutputs = m_graph->GetNetwork()->execute();
// Collect outputs as requested by the model
@ -804,6 +818,7 @@ void CLDNNInferRequest::execAndParse() {
}
void CLDNNInferRequest::execAndParseDyn() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParseDyn");
std::vector<std::map<cldnn::primitive_id, cldnn::network_output>> networkOutputs(m_graph->GetNetworksCount());
// set up exection and put all graphs into driver queue
@ -832,7 +847,7 @@ void CLDNNInferRequest::execAndParseDyn() {
}
void CLDNNInferRequest::InferImpl() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNN_INFER");
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::InferImpl");
int streamID = 0;
if (nullptr != streamExecutor) {
streamID = streamExecutor->GetStreamId();
@ -871,6 +886,7 @@ void CLDNNInferRequest::InferImpl() {
void CLDNNInferRequest::GetPerformanceCounts(
std::map<std::string, InferenceEngineProfileInfo> &perfMap) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::GetPerformanceCounts");
if (!m_useProfiling) {
THROW_IE_EXCEPTION << "Performance counters were not enabled";
} else {
@ -882,6 +898,7 @@ namespace {
template <typename T>
void copyToFloat(float* dst, const InferenceEngine::Blob* src) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "copyToFloat");
if (!dst) {
return;
}
@ -901,6 +918,7 @@ void copyToFloat(float* dst, const InferenceEngine::Blob* src) {
} // namespace
void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const Blob &inputBlob) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::PrepareInput");
// Get input layout
if (m_graph->GetInputLayouts().find(inputName) == m_graph->GetInputLayouts().end()) {
THROW_IE_EXCEPTION << "Input name mismatch.";
@ -959,6 +977,7 @@ void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const
}
void CLDNNInferRequest::PrepareInputDyn(const cldnn::primitive_id &inputName, const Blob &inputBlob) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::PrepareInputDyn");
// now try to get execution results
for (unsigned nb = 0; nb < m_graph->GetNetworksCount(); nb++) {
unsigned int mask = 1 << nb;

View File

@ -5,6 +5,7 @@
#include "cldnn_program.h"
#include "ngraph/ops.hpp"
#include "ngraph_ops/nms_ie_internal.hpp"
#include "cldnn_itt.h"
using namespace InferenceEngine;
using namespace InferenceEngine::details;
@ -175,6 +176,7 @@ void Program::CleanupBuild() {
std::shared_ptr<cldnn::program> Program::BuildProgram(std::vector<std::shared_ptr<ngraph::Node>> ops,
InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::BuildProgram");
cldnn::build_options options;
if (!m_config.graph_dumps_dir.empty()) {
options.set_option(cldnn::build_option::graph_dumps_dir(m_config.graph_dumps_dir));
@ -186,14 +188,17 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(std::vector<std::shared_pt
for (auto op : ops) {
CreateSingleLayerPrimitive(*m_topology, op);
}
{
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::CreateProgram");
auto program = std::make_shared<cldnn::program>(*m_engine, *m_topology, options);
CleanupBuild();
auto program = std::make_shared<cldnn::program>(*m_engine, *m_topology, options);
CleanupBuild();
return program;
return program;
}
}
bool Program::IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr<ngraph::Node>& op) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::IsOpSupported");
cldnn::topology topology;
try {
// Query mode disables checks that input primitives are created,
@ -220,6 +225,7 @@ bool Program::IsOpSupported(const InferenceEngine::CNNNetwork& network, const st
}
void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::CreateSingleLayerPrimitive");
InitProfileInfo(op->get_friendly_name(), op->get_type_name());
bool is_created = false;

View File

@ -4,6 +4,7 @@
#include <memory>
#include "cldnn_remote_context.h"
#include "cldnn_itt.h"
using namespace InferenceEngine;
using namespace InferenceEngine::gpu;
@ -80,6 +81,7 @@ bool CLDNNRemoteBlobImpl::is_locked() const noexcept {
}
void CLDNNRemoteBlobImpl::allocate_if_needed() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNRemoteBlobImpl::allocate_if_needed");
auto _impl = getContextImpl(m_context.lock());
_impl->acquire_lock();
@ -116,6 +118,7 @@ void CLDNNRemoteBlobImpl::allocate_if_needed() {
}
void CLDNNRemoteBlobImpl::allocate() noexcept {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNRemoteBlobImpl::allocate");
assert(m_memObject == nullptr);
std::shared_ptr<const cldnn::engine> eng = getContextImpl(m_context.lock())->GetEngine();
@ -224,6 +227,7 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
m_type(ContextType::OCL),
m_config(config),
m_va_display(nullptr) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecutionContextImpl");
lock.clear(std::memory_order_relaxed);
gpu_handle_param _context_id = nullptr;
gpu_handle_param _va_device = nullptr;
@ -248,22 +252,25 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
auto iter = device_map.find(m_config.device_id);
auto& dev = iter != device_map.end() ? iter->second : device_map.begin()->second;
m_engine = std::make_shared<cldnn::engine>(dev,
cldnn::engine_configuration((m_config.useProfiling ||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_tune_and_cache) ||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_retune_and_cache)),
false,
m_config.dumpCustomKernels,
std::string(),
std::string(),
true,
std::string(),
m_config.sources_dumps_dir,
m_config.queuePriority,
m_config.queueThrottle,
m_config.memory_pool_on,
m_config.throughput_streams,
m_config.kernels_cache_dir));
{
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecutionContextImpl::Create");
m_engine = std::make_shared<cldnn::engine>(dev,
cldnn::engine_configuration((m_config.useProfiling ||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_tune_and_cache) ||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_retune_and_cache)),
false,
m_config.dumpCustomKernels,
std::string(),
std::string(),
true,
std::string(),
m_config.sources_dumps_dir,
m_config.queuePriority,
m_config.queueThrottle,
m_config.memory_pool_on,
m_config.throughput_streams,
m_config.kernels_cache_dir));
}
}
ParamMap CLDNNExecutionContextImpl::getParams() const {