refactor the perf counters to get really on-demand (rather than on every inference) (#10526)
* refactor the perf counters to get really on-demand (rather than on every inference) * removed the (now) un-needed needPerfCounters flag
This commit is contained in:
@@ -57,11 +57,9 @@ AutoBatchInferRequest::AutoBatchInferRequest(const std::vector<std::shared_ptr<c
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
|
||||
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
|
||||
int batch_id,
|
||||
int num_batch,
|
||||
bool needPerfCounters)
|
||||
int num_batch)
|
||||
: IInferRequestInternal(inputs, outputs),
|
||||
_myBatchedRequestWrapper(workerRequest),
|
||||
_needPerfCounters(needPerfCounters),
|
||||
_batchId(batch_id),
|
||||
_batchSize(num_batch) {
|
||||
ShareBlobsWithBatchRequest();
|
||||
@@ -71,11 +69,9 @@ AutoBatchInferRequest::AutoBatchInferRequest(const InputsDataMap& networkInputs,
|
||||
const OutputsDataMap& networkOutputs,
|
||||
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
|
||||
int batch_id,
|
||||
int num_batch,
|
||||
bool needPerfCounters)
|
||||
int num_batch)
|
||||
: IInferRequestInternal(networkInputs, networkOutputs),
|
||||
_myBatchedRequestWrapper(workerRequest),
|
||||
_needPerfCounters(needPerfCounters),
|
||||
_batchId(batch_id),
|
||||
_batchSize(num_batch) {
|
||||
ShareBlobsWithBatchRequest();
|
||||
@@ -316,13 +312,8 @@ void AutoBatchInferRequest::CopyOutputsIfNeeded() {
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> AutoBatchInferRequest::GetPerformanceCounts() const {
|
||||
return _perfMap;
|
||||
}
|
||||
|
||||
AutoBatchAsyncInferRequest::AutoBatchAsyncInferRequest(
|
||||
const AutoBatchInferRequest::Ptr& inferRequest,
|
||||
const bool needPerfCounters,
|
||||
InferenceEngine::SoIInferRequestInternal& inferRequestWithoutBatch,
|
||||
const ITaskExecutor::Ptr& callbackExecutor)
|
||||
: AsyncInferRequestThreadSafeDefault(inferRequest, nullptr, callbackExecutor),
|
||||
@@ -345,27 +336,26 @@ AutoBatchAsyncInferRequest::AutoBatchAsyncInferRequest(
|
||||
};
|
||||
AutoBatchAsyncInferRequest* _this = nullptr;
|
||||
};
|
||||
_pipeline = {
|
||||
{/*TaskExecutor*/ std::make_shared<ThisRequestExecutor>(this), /*task*/ [this, needPerfCounters] {
|
||||
if (this->_inferRequest->_exceptionPtr) // if the exception happened in the batch1 fallback
|
||||
std::rethrow_exception(this->_inferRequest->_exceptionPtr);
|
||||
auto& batchReq = this->_inferRequest->_myBatchedRequestWrapper;
|
||||
if (batchReq._exceptionPtr) // when the batchN execution failed
|
||||
std::rethrow_exception(batchReq._exceptionPtr);
|
||||
// in the case of non-batched execution the blobs were set explicitly
|
||||
if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED == this->_inferRequest->_wasBatchedRequestUsed)
|
||||
this->_inferRequest->CopyOutputsIfNeeded();
|
||||
if (needPerfCounters) {
|
||||
try {
|
||||
if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED ==
|
||||
this->_inferRequest->_wasBatchedRequestUsed)
|
||||
this->_inferRequest->_perfMap = batchReq._inferRequestBatched->GetPerformanceCounts();
|
||||
else
|
||||
this->_inferRequest->_perfMap = this->_inferRequestWithoutBatch->GetPerformanceCounts();
|
||||
} catch (...) {
|
||||
}
|
||||
}
|
||||
}}};
|
||||
_pipeline = {{/*TaskExecutor*/ std::make_shared<ThisRequestExecutor>(this), /*task*/ [this] {
|
||||
if (this->_inferRequest->_exceptionPtr) // if the exception happened in the batch1 fallback
|
||||
std::rethrow_exception(this->_inferRequest->_exceptionPtr);
|
||||
auto& batchReq = this->_inferRequest->_myBatchedRequestWrapper;
|
||||
if (batchReq._exceptionPtr) // when the batchN execution failed
|
||||
std::rethrow_exception(batchReq._exceptionPtr);
|
||||
// in the case of non-batched execution the blobs were set explicitly
|
||||
if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED ==
|
||||
this->_inferRequest->_wasBatchedRequestUsed)
|
||||
this->_inferRequest->CopyOutputsIfNeeded();
|
||||
}}};
|
||||
}
|
||||
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> AutoBatchAsyncInferRequest::GetPerformanceCounts()
|
||||
const {
|
||||
CheckState();
|
||||
if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED == _inferRequest->_wasBatchedRequestUsed)
|
||||
return _inferRequest->_myBatchedRequestWrapper._inferRequestBatched->GetPerformanceCounts();
|
||||
else
|
||||
return _inferRequestWithoutBatch->GetPerformanceCounts();
|
||||
}
|
||||
|
||||
void AutoBatchAsyncInferRequest::Infer_ThreadUnsafe() {
|
||||
@@ -381,14 +371,12 @@ AutoBatchExecutableNetwork::AutoBatchExecutableNetwork(
|
||||
const InferenceEngine::SoExecutableNetworkInternal& networkWithBatch,
|
||||
const InferenceEngine::SoExecutableNetworkInternal& networkWithoutBatch,
|
||||
const DeviceInformation& networkDevice,
|
||||
const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
|
||||
const bool needPerfCounters)
|
||||
const std::unordered_map<std::string, InferenceEngine::Parameter>& config)
|
||||
: InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr,
|
||||
std::make_shared<InferenceEngine::ImmediateExecutor>()),
|
||||
_network{networkWithBatch},
|
||||
_networkWithoutBatch{networkWithoutBatch},
|
||||
_config{config},
|
||||
_needPerfCounters{needPerfCounters} {
|
||||
_config{config} {
|
||||
// WA for gcc 4.8 ( fails compilation with member init-list)
|
||||
_device = networkDevice;
|
||||
auto time_out = config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT));
|
||||
@@ -423,8 +411,7 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
|
||||
networkOutputs,
|
||||
workerRequestPtrAndId.first,
|
||||
workerRequestPtrAndId.second,
|
||||
_device.batchForDevice,
|
||||
_needPerfCounters);
|
||||
_device.batchForDevice);
|
||||
}
|
||||
|
||||
InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl(
|
||||
@@ -440,8 +427,7 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
|
||||
outputs,
|
||||
workerRequestPtrAndId.first,
|
||||
workerRequestPtrAndId.second,
|
||||
_device.batchForDevice,
|
||||
_needPerfCounters);
|
||||
_device.batchForDevice);
|
||||
}
|
||||
|
||||
std::pair<AutoBatchExecutableNetwork::WorkerInferRequest&, int> AutoBatchExecutableNetwork::GetWorkerInferRequest() {
|
||||
@@ -537,7 +523,6 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
|
||||
_networkWithoutBatch._so};
|
||||
return std::make_shared<AutoBatchAsyncInferRequest>(
|
||||
std::static_pointer_cast<AutoBatchInferRequest>(syncRequestImpl),
|
||||
_needPerfCounters,
|
||||
inferRequestWithoutBatch,
|
||||
_callbackExecutor);
|
||||
}
|
||||
@@ -845,11 +830,6 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
|
||||
metaDevice.batchForDevice = 1;
|
||||
}
|
||||
|
||||
const auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT);
|
||||
const auto perfConfigInTargetPlugin =
|
||||
core->GetConfig(deviceName, PluginConfigParams::KEY_PERF_COUNT).as<std::string>() == PluginConfigParams::YES;
|
||||
const bool enablePerfCounters = perfConfigInTargetPlugin || ((fullConfig.end() != perfConfig) &&
|
||||
(perfConfig->second == PluginConfigParams::YES));
|
||||
auto report_footprint = [](std::shared_ptr<ICore> pCore, std::string device) -> size_t {
|
||||
size_t footprint = 0;
|
||||
// TODO: use the per-network metric (22.2) rather than plugin-level
|
||||
@@ -901,8 +881,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
|
||||
return std::make_shared<AutoBatchExecutableNetwork>(executableNetworkWithBatch,
|
||||
executableNetworkWithoutBatch,
|
||||
metaDevice,
|
||||
networkConfig,
|
||||
enablePerfCounters);
|
||||
networkConfig);
|
||||
}
|
||||
|
||||
InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(
|
||||
|
||||
@@ -49,8 +49,7 @@ public:
|
||||
const InferenceEngine::SoExecutableNetworkInternal& networkForDevice,
|
||||
const InferenceEngine::SoExecutableNetworkInternal& networkForDeviceWithoutBatch,
|
||||
const DeviceInformation& networkDevices,
|
||||
const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
|
||||
const bool needPerfCounters = false);
|
||||
const std::unordered_map<std::string, InferenceEngine::Parameter>& config);
|
||||
|
||||
void SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) override;
|
||||
InferenceEngine::Parameter GetConfig(const std::string& name) const override;
|
||||
@@ -90,16 +89,13 @@ public:
|
||||
const InferenceEngine::OutputsDataMap& networkOutputs,
|
||||
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr,
|
||||
int batch_id,
|
||||
int num_batch,
|
||||
bool _needPerfCounters = false);
|
||||
int num_batch);
|
||||
explicit AutoBatchInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
|
||||
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr,
|
||||
int batch_id,
|
||||
int num_batch,
|
||||
bool _needPerfCounters = false);
|
||||
int num_batch);
|
||||
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
|
||||
// Batch-Device impl specific: sets the data (blobs from the device request to the batched device request)
|
||||
void SetBlobsToAnotherRequest(InferenceEngine::SoIInferRequestInternal& req);
|
||||
void CopyInputsIfNeeded();
|
||||
@@ -111,10 +107,8 @@ public:
|
||||
BATCH_EXECUTED,
|
||||
TIMEOUT_EXECUTED
|
||||
} _wasBatchedRequestUsed = eExecutionFlavor::NOT_EXECUTED;
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> _perfMap;
|
||||
|
||||
protected:
|
||||
bool _needPerfCounters = false;
|
||||
void CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, InferenceEngine::Blob::Ptr dst, bool bInput);
|
||||
void ShareBlobsWithBatchRequest();
|
||||
size_t _batchId;
|
||||
@@ -126,11 +120,11 @@ public:
|
||||
using Ptr = std::shared_ptr<AutoBatchAsyncInferRequest>;
|
||||
|
||||
explicit AutoBatchAsyncInferRequest(const AutoBatchInferRequest::Ptr& inferRequest,
|
||||
const bool needPerfCounters,
|
||||
InferenceEngine::SoIInferRequestInternal& inferRequestWithoutBatch,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor);
|
||||
void Infer_ThreadUnsafe() override;
|
||||
virtual ~AutoBatchAsyncInferRequest();
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
|
||||
|
||||
InferenceEngine::SoIInferRequestInternal _inferRequestWithoutBatch;
|
||||
AutoBatchInferRequest::Ptr _inferRequest;
|
||||
|
||||
Reference in New Issue
Block a user