[IE CLDNN] Added ITT counters in the plugin (#3719)
This commit is contained in:
parent
036259481d
commit
d2303262a2
@ -11,6 +11,7 @@
|
||||
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
|
||||
#include "ie_api.h"
|
||||
#include "file_utils.h"
|
||||
#include "cldnn_itt.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <direct.h>
|
||||
@ -40,6 +41,7 @@ static void createDirectory(std::string _path) {
|
||||
}
|
||||
|
||||
void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Config::UpdateFromMap");
|
||||
for (auto& kvp : configMap) {
|
||||
std::string key = kvp.first;
|
||||
std::string val = kvp.second;
|
||||
@ -228,6 +230,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
|
||||
}
|
||||
|
||||
void Config::adjustKeyMapValues() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Config::adjustKeyMapValues");
|
||||
if (useProfiling)
|
||||
key_config_map[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES;
|
||||
else
|
||||
|
@ -15,6 +15,7 @@
|
||||
#endif
|
||||
|
||||
#include "simple_math.h"
|
||||
#include "cldnn_itt.h"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace XMLParseUtils;
|
||||
@ -224,6 +225,7 @@ cldnn::format CLDNNCustomLayer::FormatFromString(const std::string & str) {
|
||||
}
|
||||
|
||||
void CLDNNCustomLayer::LoadFromFile(const std::string configFile, CLDNNCustomLayerMap& customLayers, bool can_be_missed) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNCustomLayer::LoadFromFile");
|
||||
pugi::xml_document xmlDoc;
|
||||
pugi::xml_parse_result res = xmlDoc.load_file(configFile.c_str());
|
||||
if (res.status != pugi::status_ok) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
@ -64,6 +64,7 @@
|
||||
#include "cldnn_engine.h"
|
||||
#include "cldnn_executable_network.h"
|
||||
#include "cldnn_custom_layer.h"
|
||||
#include "cldnn_itt.h"
|
||||
|
||||
#ifdef __linux__
|
||||
# include <dlfcn.h>
|
||||
@ -122,9 +123,11 @@ static bool disableReduceDecomposition(const std::shared_ptr<const ngraph::Node>
|
||||
|
||||
InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,
|
||||
const CLDNNPlugin::Config& config) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::CloneAndTransformNetwork");
|
||||
CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network);
|
||||
|
||||
if (clonedNetwork.getFunction()) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork");
|
||||
auto nGraphFunc = clonedNetwork.getFunction();
|
||||
// Disable shape inference (WA for generic operations)
|
||||
ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
|
||||
@ -268,6 +271,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
|
||||
|
||||
bool enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc);
|
||||
if (enableInt8) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork::LPT");
|
||||
using namespace ngraph::pass::low_precision;
|
||||
ngraph::pass::Manager conversion_manager;
|
||||
// [WA part1] Convert quantized FP16 model to FP32 to avoid possible overflow and mixed precision errors
|
||||
@ -284,6 +288,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
|
||||
}
|
||||
|
||||
{
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork::RunPasses");
|
||||
ngraph::pass::Manager manager;
|
||||
// This ConstantFolding pass is added to fold reshapes added for constant inputs on NMS internal operation which prevents upper-bound calculation
|
||||
// TODO: check why we have these reshapes
|
||||
@ -349,6 +354,7 @@ auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) {
|
||||
};
|
||||
|
||||
void clDNNEngine::UpdateConfig(CLDNNPlugin::Config& conf, const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> ¶ms) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::UpdateConfig");
|
||||
auto device_info = GetDeviceInfo(params);
|
||||
conf.enableInt8 = device_info.supports_imad || device_info.supports_immad;
|
||||
conf.UpdateFromMap(params);
|
||||
@ -359,6 +365,7 @@ void clDNNEngine::UpdateConfig(CLDNNPlugin::Config& conf, const InferenceEngine:
|
||||
|
||||
ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
|
||||
const std::map<std::string, std::string> &config) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl");
|
||||
// verification of supported input
|
||||
InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
|
||||
check_inputs(_networkInputs);
|
||||
@ -389,6 +396,7 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn
|
||||
};
|
||||
|
||||
{
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl::CreateContext");
|
||||
std::lock_guard<std::mutex> lock(engine_mutex);
|
||||
if (!canReuseDefaultContext()) {
|
||||
m_defaultContext.reset(new CLDNNRemoteCLContext(shared_from_this(), ParamMap(), conf));
|
||||
@ -398,7 +406,10 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn
|
||||
context = m_defaultContext;
|
||||
|
||||
auto transformedNetwork = CloneAndTransformNetwork(network, conf);
|
||||
return std::make_shared<CLDNNExecNetwork>(transformedNetwork, context, conf);
|
||||
{
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl::CreateExeNetwork");
|
||||
return std::make_shared<CLDNNExecNetwork>(transformedNetwork, context, conf);
|
||||
}
|
||||
}
|
||||
|
||||
ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
|
||||
@ -451,6 +462,7 @@ void clDNNEngine::SetConfig(const std::map<std::string, std::string> &config) {
|
||||
|
||||
QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& config) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::QueryNetwork");
|
||||
QueryNetworkResult res;
|
||||
CLDNNPlugin::Config conf = _impl->m_config;
|
||||
UpdateConfig(conf, network, config);
|
||||
@ -664,6 +676,7 @@ QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
|
||||
}
|
||||
|
||||
Parameter clDNNEngine::GetConfig(const std::string& name, const std::map<std::string, Parameter>& /*options*/) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::GetConfig");
|
||||
Parameter result;
|
||||
auto option = _impl->m_config.key_config_map.find(name);
|
||||
if (option != _impl->m_config.key_config_map.end()) {
|
||||
@ -692,6 +705,7 @@ auto StringRightTrim = [](std::string string, std::string substring, bool case_s
|
||||
};
|
||||
|
||||
Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& options) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::GetMetric");
|
||||
auto device_id = GetConfig(CONFIG_KEY(DEVICE_ID), {});
|
||||
if (options.find(CONFIG_KEY(DEVICE_ID)) != options.end())
|
||||
device_id = options.at(CONFIG_KEY(DEVICE_ID)).as<std::string>();
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include "cldnn_graph.h"
|
||||
#include "cldnn_itt.h"
|
||||
|
||||
#include <description_buffer.hpp>
|
||||
#include <cldnn/cldnn_config.hpp>
|
||||
@ -63,6 +64,7 @@ CLDNNExecNetwork::CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, RemoteC
|
||||
|
||||
InferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(InputsDataMap networkInputs,
|
||||
OutputsDataMap networkOutputs) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::CreateInferRequestImpl");
|
||||
if (m_graphs.empty()) {
|
||||
THROW_IE_EXCEPTION << NETWORK_NOT_LOADED_str;
|
||||
}
|
||||
@ -90,6 +92,7 @@ InferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(InputsDataMap
|
||||
}
|
||||
|
||||
IInferRequest::Ptr CLDNNExecNetwork::CreateInferRequest() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::CreateInferRequest");
|
||||
return CreateAsyncInferRequestFromSync<CLDNNAsyncInferRequest>();
|
||||
}
|
||||
|
||||
@ -110,6 +113,7 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetConfig(const std::string &name)
|
||||
}
|
||||
|
||||
InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::GetMetric");
|
||||
if (name == METRIC_KEY(NETWORK_NAME)) {
|
||||
IE_ASSERT(!m_graphs.empty());
|
||||
IE_SET_METRIC_RETURN(NETWORK_NAME, m_graphs[0]->getName());
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include "generic_ie.hpp"
|
||||
#include <ngraph/variant.hpp>
|
||||
#include "cldnn_itt.h"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
@ -52,6 +53,7 @@ CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
|
||||
}
|
||||
|
||||
void CLDNNGraph::UpdateLayersMaps() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdateLayersMaps");
|
||||
primitiveIDs = m_program->primitiveIDs;
|
||||
primitivesToIRLayersMap = m_program->primitivesToIRLayersMap;
|
||||
IRToNgraphLayersMap = m_program->IRToNgraphLayersMap;
|
||||
@ -62,6 +64,7 @@ void CLDNNGraph::UpdateLayersMaps() {
|
||||
}
|
||||
|
||||
void CLDNNGraph::Build() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::Build");
|
||||
UpdateLayersMaps();
|
||||
|
||||
if (GetMaxDynamicBatchSize() > 1) {
|
||||
@ -81,6 +84,7 @@ void CLDNNGraph::Build() {
|
||||
}
|
||||
|
||||
std::shared_ptr<cldnn::network> CLDNNGraph::BuildNetwork(std::shared_ptr<cldnn::program> program) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::BuildNetwork");
|
||||
auto network = std::make_shared<cldnn::network>(*program, m_stream_id);
|
||||
|
||||
if (!m_config.graph_dumps_dir.empty() && m_stream_id == 0) {
|
||||
@ -101,6 +105,7 @@ std::shared_ptr<cldnn::network> CLDNNGraph::BuildNetwork(std::shared_ptr<cldnn::
|
||||
|
||||
InferenceEngine::CNNNetwork CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& primitives_info,
|
||||
bool filter_const_primitives) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::GetExecGraphInfoByPrimitivesInfo");
|
||||
if (m_config.useProfiling) {
|
||||
try {
|
||||
// Update may throw an exception for step-by-step runtime graph dump,
|
||||
@ -474,6 +479,7 @@ InferenceEngine::CNNNetwork CLDNNGraph::GetExecGraphInfo() {
|
||||
|
||||
|
||||
void CLDNNGraph::UpdatePerfStatistics() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdatePerfStatistics");
|
||||
if (GetNetworksCount() == 0) {
|
||||
return;
|
||||
}
|
||||
@ -545,6 +551,7 @@ bool CLDNNGraph::IsLoaded() const {
|
||||
}
|
||||
|
||||
void CLDNNGraph::UpdateImplementationsMap() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdateImplementationsMap");
|
||||
if (m_config.useProfiling) {
|
||||
auto extractImplementationFromInfo = [](const std::string& info) -> std::string {
|
||||
std::string def_implementation = "undef";
|
||||
@ -587,6 +594,7 @@ void CLDNNGraph::UpdateImplementationsMap() {
|
||||
}
|
||||
|
||||
void CLDNNGraph::GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &result) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::GetPerformanceCounts");
|
||||
bool combinePrimByIRLayers = false;
|
||||
unsigned i = 0;
|
||||
auto allIds = GetNetwork()->get_all_primitive_org_ids();
|
||||
|
@ -23,6 +23,7 @@ const char cannot_set_compound[] = "cannot set compound blob: supported only for
|
||||
const char wrong_nv12_blob[] = "NV12 input blob is expected for input with NV12 color format";
|
||||
|
||||
Blob::Ptr CLDNNInferRequest::createInputBlob(const TensorDesc& desc, uint8_t* mem_ptr) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::createInputBlob");
|
||||
const Precision p = desc.getPrecision();
|
||||
|
||||
switch (p) {
|
||||
@ -77,6 +78,7 @@ Blob::Ptr CLDNNInferRequest::createInputBlob(const TensorDesc& desc, uint8_t* me
|
||||
}
|
||||
|
||||
Blob::Ptr CLDNNInferRequest::createOutputBlob(const TensorDesc& desc, uint8_t* mem_ptr) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::createOutputBlob");
|
||||
const Precision p = desc.getPrecision();
|
||||
|
||||
switch (p) {
|
||||
@ -106,6 +108,7 @@ Blob::Ptr CLDNNInferRequest::createOutputBlob(const TensorDesc& desc, uint8_t* m
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory& inputMem) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_attach");
|
||||
auto impl = getContextImpl(m_graph->GetContext());
|
||||
impl->acquire_lock();
|
||||
|
||||
@ -120,6 +123,7 @@ void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory& in
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::input_alloc(cldnn::primitive_id name, const cldnn::layout& layout) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_alloc");
|
||||
cldnn::memory input_mem = cldnn::memory::allocate(*(m_graph->GetEngine()), layout);
|
||||
input_attach(name, input_mem);
|
||||
}
|
||||
@ -127,6 +131,7 @@ void CLDNNInferRequest::input_alloc(cldnn::primitive_id name, const cldnn::layou
|
||||
void CLDNNInferRequest::copyOutputData(const cldnn::memory& outputMemory,
|
||||
Blob::Ptr bptr,
|
||||
buf_info* bi) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyOutputData");
|
||||
size_t n = (bi == nullptr) ? bptr->size() : bi->buf_size;
|
||||
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
|
||||
|
||||
@ -270,6 +275,7 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr<cldnn::network> network,
|
||||
const cldnn::primitive_id &inputName,
|
||||
const cldnn::layout& inputLayout,
|
||||
const Blob &inputBlob, buf_info* bi) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyInputData");
|
||||
size_t n = (bi == nullptr) ? inputBlob.size() : bi->buf_size;
|
||||
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
|
||||
|
||||
@ -386,6 +392,7 @@ void checkOutputBlob(const Blob::Ptr &blob,
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::checkBlobs() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::checkBlobs");
|
||||
for (auto const &input : _inputs) {
|
||||
InputInfo::Ptr foundInput = nullptr;
|
||||
auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs),
|
||||
@ -415,7 +422,7 @@ void CLDNNInferRequest::checkBlobs() {
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::GetBlob(const char *name, Blob::Ptr &data) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "GetBlob");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::GetBlob");
|
||||
InputInfo::Ptr foundInput;
|
||||
DataPtr foundOutput;
|
||||
bool is_input = findInputAndOutputBlobByName(name, foundInput, foundOutput);
|
||||
@ -436,7 +443,7 @@ void CLDNNInferRequest::GetBlob(const char *name, Blob::Ptr &data) {
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::SetBlob(const char *name, const Blob::Ptr &data) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "SetBlob");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetBlob");
|
||||
|
||||
// perform all common checks first
|
||||
if (name == nullptr) {
|
||||
@ -562,6 +569,7 @@ void CLDNNInferRequest::SetBlob(const char *name, const Blob::Ptr &data) {
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::AllocateInputs() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateInputs");
|
||||
auto inputLayouts = m_graph->GetInputLayouts();
|
||||
// allocate inputs
|
||||
for (auto& ni : _networkInputs) {
|
||||
@ -611,6 +619,7 @@ void CLDNNInferRequest::AllocateInputs() {
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::AllocateInputsDyn() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateInputsDyn");
|
||||
// allocate inputs
|
||||
for (auto &input : m_graph->GetInputLayouts()) {
|
||||
InputInfo::Ptr ni = _networkInputs.at(input.first);
|
||||
@ -636,6 +645,7 @@ void CLDNNInferRequest::AllocateInputsDyn() {
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::AllocateOutputs() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateOutputs");
|
||||
// allocate outputs
|
||||
bool can_reuse_internal_mem = !m_useStreams;
|
||||
for (auto& no : _networkOutputs) {
|
||||
@ -661,6 +671,7 @@ void CLDNNInferRequest::AllocateOutputs() {
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::AllocateOutputsDyn() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateOutputsDyn");
|
||||
// allocate outputs
|
||||
for (auto& no : _networkOutputs) {
|
||||
DataPtr oi = no.second;
|
||||
@ -680,6 +691,7 @@ void CLDNNInferRequest::AllocateOutputsDyn() {
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::SetGraph(std::shared_ptr<CLDNNPlugin::CLDNNGraph> graph) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetGraph");
|
||||
m_graph = graph;
|
||||
|
||||
if (m_graph == nullptr) {
|
||||
@ -697,6 +709,7 @@ void CLDNNInferRequest::SetGraph(std::shared_ptr<CLDNNPlugin::CLDNNGraph> graph)
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::SetBatch(int new_batch) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetBatch");
|
||||
if (m_graph->GetMaxDynamicBatchSize() < 0)
|
||||
THROW_IE_EXCEPTION << "Dynamic batch is not enabled.";
|
||||
|
||||
@ -774,6 +787,7 @@ CLDNNInferRequest::CLDNNInferRequest(InputsDataMap networkInputs, OutputsDataMap
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::execAndParse() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParse");
|
||||
auto networkOutputs = m_graph->GetNetwork()->execute();
|
||||
|
||||
// Collect outputs as requested by the model
|
||||
@ -804,6 +818,7 @@ void CLDNNInferRequest::execAndParse() {
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::execAndParseDyn() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParseDyn");
|
||||
std::vector<std::map<cldnn::primitive_id, cldnn::network_output>> networkOutputs(m_graph->GetNetworksCount());
|
||||
|
||||
// set up exection and put all graphs into driver queue
|
||||
@ -832,7 +847,7 @@ void CLDNNInferRequest::execAndParseDyn() {
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::InferImpl() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNN_INFER");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::InferImpl");
|
||||
int streamID = 0;
|
||||
if (nullptr != streamExecutor) {
|
||||
streamID = streamExecutor->GetStreamId();
|
||||
@ -871,6 +886,7 @@ void CLDNNInferRequest::InferImpl() {
|
||||
|
||||
void CLDNNInferRequest::GetPerformanceCounts(
|
||||
std::map<std::string, InferenceEngineProfileInfo> &perfMap) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::GetPerformanceCounts");
|
||||
if (!m_useProfiling) {
|
||||
THROW_IE_EXCEPTION << "Performance counters were not enabled";
|
||||
} else {
|
||||
@ -882,6 +898,7 @@ namespace {
|
||||
|
||||
template <typename T>
|
||||
void copyToFloat(float* dst, const InferenceEngine::Blob* src) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "copyToFloat");
|
||||
if (!dst) {
|
||||
return;
|
||||
}
|
||||
@ -901,6 +918,7 @@ void copyToFloat(float* dst, const InferenceEngine::Blob* src) {
|
||||
} // namespace
|
||||
|
||||
void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const Blob &inputBlob) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::PrepareInput");
|
||||
// Get input layout
|
||||
if (m_graph->GetInputLayouts().find(inputName) == m_graph->GetInputLayouts().end()) {
|
||||
THROW_IE_EXCEPTION << "Input name mismatch.";
|
||||
@ -959,6 +977,7 @@ void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::PrepareInputDyn(const cldnn::primitive_id &inputName, const Blob &inputBlob) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::PrepareInputDyn");
|
||||
// now try to get execution results
|
||||
for (unsigned nb = 0; nb < m_graph->GetNetworksCount(); nb++) {
|
||||
unsigned int mask = 1 << nb;
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "cldnn_program.h"
|
||||
#include "ngraph/ops.hpp"
|
||||
#include "ngraph_ops/nms_ie_internal.hpp"
|
||||
#include "cldnn_itt.h"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
@ -175,6 +176,7 @@ void Program::CleanupBuild() {
|
||||
std::shared_ptr<cldnn::program> Program::BuildProgram(std::vector<std::shared_ptr<ngraph::Node>> ops,
|
||||
InferenceEngine::InputsDataMap networkInputs,
|
||||
InferenceEngine::OutputsDataMap networkOutputs) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::BuildProgram");
|
||||
cldnn::build_options options;
|
||||
if (!m_config.graph_dumps_dir.empty()) {
|
||||
options.set_option(cldnn::build_option::graph_dumps_dir(m_config.graph_dumps_dir));
|
||||
@ -186,14 +188,17 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(std::vector<std::shared_pt
|
||||
for (auto op : ops) {
|
||||
CreateSingleLayerPrimitive(*m_topology, op);
|
||||
}
|
||||
{
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::CreateProgram");
|
||||
auto program = std::make_shared<cldnn::program>(*m_engine, *m_topology, options);
|
||||
CleanupBuild();
|
||||
|
||||
auto program = std::make_shared<cldnn::program>(*m_engine, *m_topology, options);
|
||||
CleanupBuild();
|
||||
|
||||
return program;
|
||||
return program;
|
||||
}
|
||||
}
|
||||
|
||||
bool Program::IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr<ngraph::Node>& op) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::IsOpSupported");
|
||||
cldnn::topology topology;
|
||||
try {
|
||||
// Query mode disables checks that input primitives are created,
|
||||
@ -220,6 +225,7 @@ bool Program::IsOpSupported(const InferenceEngine::CNNNetwork& network, const st
|
||||
}
|
||||
|
||||
void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::CreateSingleLayerPrimitive");
|
||||
InitProfileInfo(op->get_friendly_name(), op->get_type_name());
|
||||
|
||||
bool is_created = false;
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include <memory>
|
||||
#include "cldnn_remote_context.h"
|
||||
#include "cldnn_itt.h"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::gpu;
|
||||
@ -80,6 +81,7 @@ bool CLDNNRemoteBlobImpl::is_locked() const noexcept {
|
||||
}
|
||||
|
||||
void CLDNNRemoteBlobImpl::allocate_if_needed() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNRemoteBlobImpl::allocate_if_needed");
|
||||
auto _impl = getContextImpl(m_context.lock());
|
||||
_impl->acquire_lock();
|
||||
|
||||
@ -116,6 +118,7 @@ void CLDNNRemoteBlobImpl::allocate_if_needed() {
|
||||
}
|
||||
|
||||
void CLDNNRemoteBlobImpl::allocate() noexcept {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNRemoteBlobImpl::allocate");
|
||||
assert(m_memObject == nullptr);
|
||||
|
||||
std::shared_ptr<const cldnn::engine> eng = getContextImpl(m_context.lock())->GetEngine();
|
||||
@ -224,6 +227,7 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
|
||||
m_type(ContextType::OCL),
|
||||
m_config(config),
|
||||
m_va_display(nullptr) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecutionContextImpl");
|
||||
lock.clear(std::memory_order_relaxed);
|
||||
gpu_handle_param _context_id = nullptr;
|
||||
gpu_handle_param _va_device = nullptr;
|
||||
@ -248,22 +252,25 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
|
||||
auto iter = device_map.find(m_config.device_id);
|
||||
auto& dev = iter != device_map.end() ? iter->second : device_map.begin()->second;
|
||||
|
||||
m_engine = std::make_shared<cldnn::engine>(dev,
|
||||
cldnn::engine_configuration((m_config.useProfiling ||
|
||||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_tune_and_cache) ||
|
||||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_retune_and_cache)),
|
||||
false,
|
||||
m_config.dumpCustomKernels,
|
||||
std::string(),
|
||||
std::string(),
|
||||
true,
|
||||
std::string(),
|
||||
m_config.sources_dumps_dir,
|
||||
m_config.queuePriority,
|
||||
m_config.queueThrottle,
|
||||
m_config.memory_pool_on,
|
||||
m_config.throughput_streams,
|
||||
m_config.kernels_cache_dir));
|
||||
{
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecutionContextImpl::Create");
|
||||
m_engine = std::make_shared<cldnn::engine>(dev,
|
||||
cldnn::engine_configuration((m_config.useProfiling ||
|
||||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_tune_and_cache) ||
|
||||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_retune_and_cache)),
|
||||
false,
|
||||
m_config.dumpCustomKernels,
|
||||
std::string(),
|
||||
std::string(),
|
||||
true,
|
||||
std::string(),
|
||||
m_config.sources_dumps_dir,
|
||||
m_config.queuePriority,
|
||||
m_config.queueThrottle,
|
||||
m_config.memory_pool_on,
|
||||
m_config.throughput_streams,
|
||||
m_config.kernels_cache_dir));
|
||||
}
|
||||
}
|
||||
|
||||
ParamMap CLDNNExecutionContextImpl::getParams() const {
|
||||
|
Loading…
Reference in New Issue
Block a user