refactor the perf counters to get really on-demand (rather than on every inference) (#10526)

* refactor the perf counters to get really on-demand (rather than on every inference)

* removed the (now) un-needed needPerfCounters flag
This commit is contained in:
Maxim Shevtsov
2022-02-20 20:56:15 +03:00
committed by GitHub
parent 982942fa5d
commit a52c755d21
2 changed files with 31 additions and 58 deletions

View File

@@ -57,11 +57,9 @@ AutoBatchInferRequest::AutoBatchInferRequest(const std::vector<std::shared_ptr<c
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
int batch_id,
int num_batch,
bool needPerfCounters)
int num_batch)
: IInferRequestInternal(inputs, outputs),
_myBatchedRequestWrapper(workerRequest),
_needPerfCounters(needPerfCounters),
_batchId(batch_id),
_batchSize(num_batch) {
ShareBlobsWithBatchRequest();
@@ -71,11 +69,9 @@ AutoBatchInferRequest::AutoBatchInferRequest(const InputsDataMap& networkInputs,
const OutputsDataMap& networkOutputs,
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
int batch_id,
int num_batch,
bool needPerfCounters)
int num_batch)
: IInferRequestInternal(networkInputs, networkOutputs),
_myBatchedRequestWrapper(workerRequest),
_needPerfCounters(needPerfCounters),
_batchId(batch_id),
_batchSize(num_batch) {
ShareBlobsWithBatchRequest();
@@ -316,13 +312,8 @@ void AutoBatchInferRequest::CopyOutputsIfNeeded() {
}
}
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> AutoBatchInferRequest::GetPerformanceCounts() const {
return _perfMap;
}
AutoBatchAsyncInferRequest::AutoBatchAsyncInferRequest(
const AutoBatchInferRequest::Ptr& inferRequest,
const bool needPerfCounters,
InferenceEngine::SoIInferRequestInternal& inferRequestWithoutBatch,
const ITaskExecutor::Ptr& callbackExecutor)
: AsyncInferRequestThreadSafeDefault(inferRequest, nullptr, callbackExecutor),
@@ -345,27 +336,26 @@ AutoBatchAsyncInferRequest::AutoBatchAsyncInferRequest(
};
AutoBatchAsyncInferRequest* _this = nullptr;
};
_pipeline = {
{/*TaskExecutor*/ std::make_shared<ThisRequestExecutor>(this), /*task*/ [this, needPerfCounters] {
if (this->_inferRequest->_exceptionPtr) // if the exception happened in the batch1 fallback
std::rethrow_exception(this->_inferRequest->_exceptionPtr);
auto& batchReq = this->_inferRequest->_myBatchedRequestWrapper;
if (batchReq._exceptionPtr) // when the batchN execution failed
std::rethrow_exception(batchReq._exceptionPtr);
// in the case of non-batched execution the blobs were set explicitly
if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED == this->_inferRequest->_wasBatchedRequestUsed)
this->_inferRequest->CopyOutputsIfNeeded();
if (needPerfCounters) {
try {
if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED ==
this->_inferRequest->_wasBatchedRequestUsed)
this->_inferRequest->_perfMap = batchReq._inferRequestBatched->GetPerformanceCounts();
else
this->_inferRequest->_perfMap = this->_inferRequestWithoutBatch->GetPerformanceCounts();
} catch (...) {
}
}
}}};
_pipeline = {{/*TaskExecutor*/ std::make_shared<ThisRequestExecutor>(this), /*task*/ [this] {
if (this->_inferRequest->_exceptionPtr) // if the exception happened in the batch1 fallback
std::rethrow_exception(this->_inferRequest->_exceptionPtr);
auto& batchReq = this->_inferRequest->_myBatchedRequestWrapper;
if (batchReq._exceptionPtr) // when the batchN execution failed
std::rethrow_exception(batchReq._exceptionPtr);
// in the case of non-batched execution the blobs were set explicitly
if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED ==
this->_inferRequest->_wasBatchedRequestUsed)
this->_inferRequest->CopyOutputsIfNeeded();
}}};
}
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> AutoBatchAsyncInferRequest::GetPerformanceCounts()
const {
CheckState();
if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED == _inferRequest->_wasBatchedRequestUsed)
return _inferRequest->_myBatchedRequestWrapper._inferRequestBatched->GetPerformanceCounts();
else
return _inferRequestWithoutBatch->GetPerformanceCounts();
}
void AutoBatchAsyncInferRequest::Infer_ThreadUnsafe() {
@@ -381,14 +371,12 @@ AutoBatchExecutableNetwork::AutoBatchExecutableNetwork(
const InferenceEngine::SoExecutableNetworkInternal& networkWithBatch,
const InferenceEngine::SoExecutableNetworkInternal& networkWithoutBatch,
const DeviceInformation& networkDevice,
const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
const bool needPerfCounters)
const std::unordered_map<std::string, InferenceEngine::Parameter>& config)
: InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr,
std::make_shared<InferenceEngine::ImmediateExecutor>()),
_network{networkWithBatch},
_networkWithoutBatch{networkWithoutBatch},
_config{config},
_needPerfCounters{needPerfCounters} {
_config{config} {
// WA for gcc 4.8 ( fails compilation with member init-list)
_device = networkDevice;
auto time_out = config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT));
@@ -423,8 +411,7 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
networkOutputs,
workerRequestPtrAndId.first,
workerRequestPtrAndId.second,
_device.batchForDevice,
_needPerfCounters);
_device.batchForDevice);
}
InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl(
@@ -440,8 +427,7 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
outputs,
workerRequestPtrAndId.first,
workerRequestPtrAndId.second,
_device.batchForDevice,
_needPerfCounters);
_device.batchForDevice);
}
std::pair<AutoBatchExecutableNetwork::WorkerInferRequest&, int> AutoBatchExecutableNetwork::GetWorkerInferRequest() {
@@ -537,7 +523,6 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
_networkWithoutBatch._so};
return std::make_shared<AutoBatchAsyncInferRequest>(
std::static_pointer_cast<AutoBatchInferRequest>(syncRequestImpl),
_needPerfCounters,
inferRequestWithoutBatch,
_callbackExecutor);
}
@@ -845,11 +830,6 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
metaDevice.batchForDevice = 1;
}
const auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT);
const auto perfConfigInTargetPlugin =
core->GetConfig(deviceName, PluginConfigParams::KEY_PERF_COUNT).as<std::string>() == PluginConfigParams::YES;
const bool enablePerfCounters = perfConfigInTargetPlugin || ((fullConfig.end() != perfConfig) &&
(perfConfig->second == PluginConfigParams::YES));
auto report_footprint = [](std::shared_ptr<ICore> pCore, std::string device) -> size_t {
size_t footprint = 0;
// TODO: use the per-network metric (22.2) rather than plugin-level
@@ -901,8 +881,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
return std::make_shared<AutoBatchExecutableNetwork>(executableNetworkWithBatch,
executableNetworkWithoutBatch,
metaDevice,
networkConfig,
enablePerfCounters);
networkConfig);
}
InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(

View File

@@ -49,8 +49,7 @@ public:
const InferenceEngine::SoExecutableNetworkInternal& networkForDevice,
const InferenceEngine::SoExecutableNetworkInternal& networkForDeviceWithoutBatch,
const DeviceInformation& networkDevices,
const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
const bool needPerfCounters = false);
const std::unordered_map<std::string, InferenceEngine::Parameter>& config);
void SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) override;
InferenceEngine::Parameter GetConfig(const std::string& name) const override;
@@ -90,16 +89,13 @@ public:
const InferenceEngine::OutputsDataMap& networkOutputs,
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr,
int batch_id,
int num_batch,
bool _needPerfCounters = false);
int num_batch);
explicit AutoBatchInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr,
int batch_id,
int num_batch,
bool _needPerfCounters = false);
int num_batch);
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
// Batch-Device impl specific: sets the data (blobs from the device request to the batched device request)
void SetBlobsToAnotherRequest(InferenceEngine::SoIInferRequestInternal& req);
void CopyInputsIfNeeded();
@@ -111,10 +107,8 @@ public:
BATCH_EXECUTED,
TIMEOUT_EXECUTED
} _wasBatchedRequestUsed = eExecutionFlavor::NOT_EXECUTED;
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> _perfMap;
protected:
bool _needPerfCounters = false;
void CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, InferenceEngine::Blob::Ptr dst, bool bInput);
void ShareBlobsWithBatchRequest();
size_t _batchId;
@@ -126,11 +120,11 @@ public:
using Ptr = std::shared_ptr<AutoBatchAsyncInferRequest>;
explicit AutoBatchAsyncInferRequest(const AutoBatchInferRequest::Ptr& inferRequest,
const bool needPerfCounters,
InferenceEngine::SoIInferRequestInternal& inferRequestWithoutBatch,
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor);
void Infer_ThreadUnsafe() override;
virtual ~AutoBatchAsyncInferRequest();
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
InferenceEngine::SoIInferRequestInternal _inferRequestWithoutBatch;
AutoBatchInferRequest::Ptr _inferRequest;