Files
openvino/samples/cpp/benchmark_app/infer_request_wrap.hpp
Fedor Zharinov e9874ec1d4 Dynamic reshapes (#7788)
* Merged and compiling

* Fix for dynamic shape type

* review fixes

* renamed blob shape to tensor shape, small improvements

* fix code style

* added parsing of multiple shapes

* store latency per group, add isIdleRequestAvailable() to Infer Queue

* added cached random inputs

* redesign pipeline, added new metrics(avg, max, min), added metrics per groups

* fixed code style

* small improvements

* modified tensor parameters parsing

* modified -i parameter parsing: added possibility to specify input names

* implemented image cashing

* added cashed blobs creating

* added -pcseq flag, modified batch filling, changes fps formula

* improvements

* code formatting

* code formatting2

* apply suggestions from review

* replaced Buffer class with InferenceEngine Blobs

* use batch size in blobs filling

* added shared blob allocator to handle blob's data

* fixed warnings & code style

* allocate blobs

* fix for networks with image info input

* added comments & fixed codestyle

* clear data in free() in SharedBlobAllocator

* remove unnecessary check

* Delimeter is changed to ::

* stylefix

* added layout from string function, small improvements

* modified parsing to enable : in input parameters

* small fixes

* small fixes

* added missed blob allocation, fixes

* [TEST]added support for remote blobs

* fix remote blobs

* new inputs/files output format

* removed vectors resize which caused bugs

* made cl::Buffer type under ifdef, fix inputs filling

* changed batch() function to not throwing exceptions

* removed unused var

* fix code style

* replace empty name in input files with name from net input

* restored old behaviour for static models

* fix code style

* fix warning - made const iterator

* fix warning - remove reference in loop variable

* added random and image_info input types to -i, fix problem with layout

* replaced batch() with getBatchSize() in main

* fix layout, shape, tensor shape parameters parsing

* upd help messages for input, tensor shape and pcseq command

* added buffer for cl output blobs, small fixes

Signed-off-by: ivikhrev <ivan.vikhrev@intel.com>

* added legacy mode

* restore setBlob

* code style formatting

* move collecting latency for groups under flag

* removed not applicable layouts

* added hint to error message when wrong input name in -tensor_shape was specified

* added new metrics to statistics report

* Apply suggestions from code review

* fix binary blobs filling when layout is CN

* apply suggestions

* moved file in the right place after rebase

* improved -pcseq output

* updated args and readme

* removed TEMPLATE plugin registration

* fix -shape arg  decsription

* enable providing several -i args as input

* renamed legacy_mode to inference_only and made it default for static models, renamed tensor_shape to data_shape

* upd readme

* use getBlob() in inference only mode

* fix old input type for static case

* fix typo

* upd readme

* move log about benchmark mode to the measuring perfomance step

* added class for latency metrics

* upd readme, fix typos, renamed funcs

* fix warning and upd parsing to avoid error with : in file paths

* fix error on centos : error: use of deleted function ‘std::basic_stringstream<char>::basic_stringstream(const std::basic_stringstream<char>&)

* added check for key in inputs

* renamed input to inputs

* adjust batch size for binary blobs

* replaced warning with exception in bench mode defining

* align measurement cycle with master

Co-authored-by: ivikhrev <ivan.vikhrev@intel.com>
2021-12-17 12:20:43 +03:00

195 lines
6.2 KiB
C++

// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <algorithm>
#include <chrono>
#include <condition_variable>
#include <functional>
#include <map>
#include <memory>
#include <mutex>
#include <queue>
#include <string>
#include <vector>
// clang-format off
#include "inference_engine.hpp"
#include "remote_blobs_filling.hpp"
#include "statistics_report.hpp"
#include "utils.hpp"
// clang-format on
typedef std::function<void(size_t id, size_t group_id, const double latency)> QueueCallbackFunction;
/// @brief Wrapper class for InferenceEngine::InferRequest. Handles asynchronous callbacks and calculates execution
/// time.
class InferReqWrap final {
public:
using Ptr = std::shared_ptr<InferReqWrap>;
~InferReqWrap() = default;
explicit InferReqWrap(InferenceEngine::ExecutableNetwork& net, size_t id, QueueCallbackFunction callbackQueue)
: _request(net.CreateInferRequest()),
_id(id),
_lat_group_id(0),
_callbackQueue(callbackQueue),
outputClBuffer() {
_request.SetCompletionCallback([&]() {
_endTime = Time::now();
_callbackQueue(_id, _lat_group_id, getExecutionTimeInMilliseconds());
});
}
void startAsync() {
_startTime = Time::now();
_request.StartAsync();
}
void wait() {
_request.Wait(InferenceEngine::InferRequest::RESULT_READY);
}
void infer() {
_startTime = Time::now();
_request.Infer();
_endTime = Time::now();
_callbackQueue(_id, _lat_group_id, getExecutionTimeInMilliseconds());
}
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> getPerformanceCounts() {
return _request.GetPerformanceCounts();
}
InferenceEngine::Blob::Ptr getBlob(const std::string& name) {
return _request.GetBlob(name);
}
void setBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) {
_request.SetBlob(name, data);
}
double getExecutionTimeInMilliseconds() const {
auto execTime = std::chrono::duration_cast<ns>(_endTime - _startTime);
return static_cast<double>(execTime.count()) * 0.000001;
}
void setLatencyGroupId(size_t id) {
_lat_group_id = id;
}
// in case of using GPU memory we need to allocate CL buffer for
// output blobs. By encapsulating cl buffer inside InferReqWrap
// we will control the number of output buffers and access to it.
std::map<std::string, ::gpu::BufferType>& getOutputClBuffer() {
return outputClBuffer;
}
private:
InferenceEngine::InferRequest _request;
Time::time_point _startTime;
Time::time_point _endTime;
size_t _id;
size_t _lat_group_id;
QueueCallbackFunction _callbackQueue;
std::map<std::string, ::gpu::BufferType> outputClBuffer;
};
class InferRequestsQueue final {
public:
InferRequestsQueue(InferenceEngine::ExecutableNetwork& net,
size_t nireq,
size_t lat_group_n,
bool enable_lat_groups)
: enable_lat_groups(enable_lat_groups) {
for (size_t id = 0; id < nireq; id++) {
requests.push_back(std::make_shared<InferReqWrap>(net,
id,
std::bind(&InferRequestsQueue::putIdleRequest,
this,
std::placeholders::_1,
std::placeholders::_2,
std::placeholders::_3)));
_idleIds.push(id);
}
_latency_groups.resize(lat_group_n);
resetTimes();
}
~InferRequestsQueue() {
// Inference Request guarantee that it will wait for all asynchronous internal tasks in destructor
// So it should be released before any context that the request can use inside internal asynchronous tasks
// For example all members of InferRequestsQueue would be destroyed before `requests` vector
// So requests can try to use this members from `putIdleRequest()` that would be called from request callback
// To avoid this we should move this vector declaration after all members declaration or just clear it manually
// in destructor
requests.clear();
}
void resetTimes() {
_startTime = Time::time_point::max();
_endTime = Time::time_point::min();
_latencies.clear();
for (auto& group : _latency_groups) {
group.clear();
}
}
double getDurationInMilliseconds() {
return std::chrono::duration_cast<ns>(_endTime - _startTime).count() * 0.000001;
}
void putIdleRequest(size_t id, size_t lat_group_id, const double latency) {
std::unique_lock<std::mutex> lock(_mutex);
_latencies.push_back(latency);
if (enable_lat_groups) {
_latency_groups[lat_group_id].push_back(latency);
}
_idleIds.push(id);
_endTime = std::max(Time::now(), _endTime);
_cv.notify_one();
}
InferReqWrap::Ptr getIdleRequest() {
std::unique_lock<std::mutex> lock(_mutex);
_cv.wait(lock, [this] {
return _idleIds.size() > 0;
});
auto request = requests.at(_idleIds.front());
_idleIds.pop();
_startTime = std::min(Time::now(), _startTime);
return request;
}
void waitAll() {
std::unique_lock<std::mutex> lock(_mutex);
_cv.wait(lock, [this] {
return _idleIds.size() == requests.size();
});
}
std::vector<double> getLatencies() {
return _latencies;
}
std::vector<std::vector<double>> getLatencyGroups() {
return _latency_groups;
}
std::vector<InferReqWrap::Ptr> requests;
private:
std::queue<size_t> _idleIds;
std::mutex _mutex;
std::condition_variable _cv;
Time::time_point _startTime;
Time::time_point _endTime;
std::vector<double> _latencies;
std::vector<std::vector<double>> _latency_groups;
bool enable_lat_groups;
};