* Merged and compiling * Fix for dynamic shape type * review fixes * renamed blob shape to tensor shape, small improvements * fix code style * added parsing of multiple shapes * store latency per group, add isIdleRequestAvailable() to Infer Queue * added cached random inputs * redesign pipeline, added new metrics(avg, max, min), added metrics per groups * fixed code style * small improvements * modified tensor parameters parsing * modified -i parameter parsing: added possibility to specify input names * implemented image cashing * added cashed blobs creating * added -pcseq flag, modified batch filling, changes fps formula * improvements * code formatting * code formatting2 * apply suggestions from review * replaced Buffer class with InferenceEngine Blobs * use batch size in blobs filling * added shared blob allocator to handle blob's data * fixed warnings & code style * allocate blobs * fix for networks with image info input * added comments & fixed codestyle * clear data in free() in SharedBlobAllocator * remove unnecessary check * Delimeter is changed to :: * stylefix * added layout from string function, small improvements * modified parsing to enable : in input parameters * small fixes * small fixes * added missed blob allocation, fixes * [TEST]added support for remote blobs * fix remote blobs * new inputs/files output format * removed vectors resize which caused bugs * made cl::Buffer type under ifdef, fix inputs filling * changed batch() function to not throwing exceptions * removed unused var * fix code style * replace empty name in input files with name from net input * restored old behaviour for static models * fix code style * fix warning - made const iterator * fix warning - remove reference in loop variable * added random and image_info input types to -i, fix problem with layout * replaced batch() with getBatchSize() in main * fix layout, shape, tensor shape parameters parsing * upd help messages for input, tensor shape and pcseq command * added buffer for cl output blobs, small fixes Signed-off-by: ivikhrev <ivan.vikhrev@intel.com> * added legacy mode * restore setBlob * code style formatting * move collecting latency for groups under flag * removed not applicable layouts * added hint to error message when wrong input name in -tensor_shape was specified * added new metrics to statistics report * Apply suggestions from code review * fix binary blobs filling when layout is CN * apply suggestions * moved file in the right place after rebase * improved -pcseq output * updated args and readme * removed TEMPLATE plugin registration * fix -shape arg decsription * enable providing several -i args as input * renamed legacy_mode to inference_only and made it default for static models, renamed tensor_shape to data_shape * upd readme * use getBlob() in inference only mode * fix old input type for static case * fix typo * upd readme * move log about benchmark mode to the measuring perfomance step * added class for latency metrics * upd readme, fix typos, renamed funcs * fix warning and upd parsing to avoid error with : in file paths * fix error on centos : error: use of deleted function ‘std::basic_stringstream<char>::basic_stringstream(const std::basic_stringstream<char>&) * added check for key in inputs * renamed input to inputs * adjust batch size for binary blobs * replaced warning with exception in bench mode defining * align measurement cycle with master Co-authored-by: ivikhrev <ivan.vikhrev@intel.com>
195 lines
6.2 KiB
C++
195 lines
6.2 KiB
C++
// Copyright (C) 2018-2021 Intel Corporation
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
|
|
#pragma once
|
|
|
|
#include <algorithm>
|
|
#include <chrono>
|
|
#include <condition_variable>
|
|
#include <functional>
|
|
#include <map>
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <queue>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
// clang-format off
|
|
#include "inference_engine.hpp"
|
|
|
|
#include "remote_blobs_filling.hpp"
|
|
#include "statistics_report.hpp"
|
|
#include "utils.hpp"
|
|
// clang-format on
|
|
|
|
typedef std::function<void(size_t id, size_t group_id, const double latency)> QueueCallbackFunction;
|
|
|
|
/// @brief Wrapper class for InferenceEngine::InferRequest. Handles asynchronous callbacks and calculates execution
|
|
/// time.
|
|
class InferReqWrap final {
|
|
public:
|
|
using Ptr = std::shared_ptr<InferReqWrap>;
|
|
|
|
~InferReqWrap() = default;
|
|
|
|
explicit InferReqWrap(InferenceEngine::ExecutableNetwork& net, size_t id, QueueCallbackFunction callbackQueue)
|
|
: _request(net.CreateInferRequest()),
|
|
_id(id),
|
|
_lat_group_id(0),
|
|
_callbackQueue(callbackQueue),
|
|
outputClBuffer() {
|
|
_request.SetCompletionCallback([&]() {
|
|
_endTime = Time::now();
|
|
_callbackQueue(_id, _lat_group_id, getExecutionTimeInMilliseconds());
|
|
});
|
|
}
|
|
|
|
void startAsync() {
|
|
_startTime = Time::now();
|
|
_request.StartAsync();
|
|
}
|
|
|
|
void wait() {
|
|
_request.Wait(InferenceEngine::InferRequest::RESULT_READY);
|
|
}
|
|
|
|
void infer() {
|
|
_startTime = Time::now();
|
|
_request.Infer();
|
|
_endTime = Time::now();
|
|
_callbackQueue(_id, _lat_group_id, getExecutionTimeInMilliseconds());
|
|
}
|
|
|
|
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> getPerformanceCounts() {
|
|
return _request.GetPerformanceCounts();
|
|
}
|
|
|
|
InferenceEngine::Blob::Ptr getBlob(const std::string& name) {
|
|
return _request.GetBlob(name);
|
|
}
|
|
|
|
void setBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) {
|
|
_request.SetBlob(name, data);
|
|
}
|
|
|
|
double getExecutionTimeInMilliseconds() const {
|
|
auto execTime = std::chrono::duration_cast<ns>(_endTime - _startTime);
|
|
return static_cast<double>(execTime.count()) * 0.000001;
|
|
}
|
|
|
|
void setLatencyGroupId(size_t id) {
|
|
_lat_group_id = id;
|
|
}
|
|
|
|
// in case of using GPU memory we need to allocate CL buffer for
|
|
// output blobs. By encapsulating cl buffer inside InferReqWrap
|
|
// we will control the number of output buffers and access to it.
|
|
std::map<std::string, ::gpu::BufferType>& getOutputClBuffer() {
|
|
return outputClBuffer;
|
|
}
|
|
|
|
private:
|
|
InferenceEngine::InferRequest _request;
|
|
Time::time_point _startTime;
|
|
Time::time_point _endTime;
|
|
size_t _id;
|
|
size_t _lat_group_id;
|
|
QueueCallbackFunction _callbackQueue;
|
|
std::map<std::string, ::gpu::BufferType> outputClBuffer;
|
|
};
|
|
|
|
class InferRequestsQueue final {
|
|
public:
|
|
InferRequestsQueue(InferenceEngine::ExecutableNetwork& net,
|
|
size_t nireq,
|
|
size_t lat_group_n,
|
|
bool enable_lat_groups)
|
|
: enable_lat_groups(enable_lat_groups) {
|
|
for (size_t id = 0; id < nireq; id++) {
|
|
requests.push_back(std::make_shared<InferReqWrap>(net,
|
|
id,
|
|
std::bind(&InferRequestsQueue::putIdleRequest,
|
|
this,
|
|
std::placeholders::_1,
|
|
std::placeholders::_2,
|
|
std::placeholders::_3)));
|
|
_idleIds.push(id);
|
|
}
|
|
_latency_groups.resize(lat_group_n);
|
|
resetTimes();
|
|
}
|
|
|
|
~InferRequestsQueue() {
|
|
// Inference Request guarantee that it will wait for all asynchronous internal tasks in destructor
|
|
// So it should be released before any context that the request can use inside internal asynchronous tasks
|
|
// For example all members of InferRequestsQueue would be destroyed before `requests` vector
|
|
// So requests can try to use this members from `putIdleRequest()` that would be called from request callback
|
|
// To avoid this we should move this vector declaration after all members declaration or just clear it manually
|
|
// in destructor
|
|
requests.clear();
|
|
}
|
|
|
|
void resetTimes() {
|
|
_startTime = Time::time_point::max();
|
|
_endTime = Time::time_point::min();
|
|
_latencies.clear();
|
|
for (auto& group : _latency_groups) {
|
|
group.clear();
|
|
}
|
|
}
|
|
|
|
double getDurationInMilliseconds() {
|
|
return std::chrono::duration_cast<ns>(_endTime - _startTime).count() * 0.000001;
|
|
}
|
|
|
|
void putIdleRequest(size_t id, size_t lat_group_id, const double latency) {
|
|
std::unique_lock<std::mutex> lock(_mutex);
|
|
_latencies.push_back(latency);
|
|
if (enable_lat_groups) {
|
|
_latency_groups[lat_group_id].push_back(latency);
|
|
}
|
|
_idleIds.push(id);
|
|
_endTime = std::max(Time::now(), _endTime);
|
|
_cv.notify_one();
|
|
}
|
|
|
|
InferReqWrap::Ptr getIdleRequest() {
|
|
std::unique_lock<std::mutex> lock(_mutex);
|
|
_cv.wait(lock, [this] {
|
|
return _idleIds.size() > 0;
|
|
});
|
|
auto request = requests.at(_idleIds.front());
|
|
_idleIds.pop();
|
|
_startTime = std::min(Time::now(), _startTime);
|
|
return request;
|
|
}
|
|
|
|
void waitAll() {
|
|
std::unique_lock<std::mutex> lock(_mutex);
|
|
_cv.wait(lock, [this] {
|
|
return _idleIds.size() == requests.size();
|
|
});
|
|
}
|
|
|
|
std::vector<double> getLatencies() {
|
|
return _latencies;
|
|
}
|
|
|
|
std::vector<std::vector<double>> getLatencyGroups() {
|
|
return _latency_groups;
|
|
}
|
|
|
|
std::vector<InferReqWrap::Ptr> requests;
|
|
|
|
private:
|
|
std::queue<size_t> _idleIds;
|
|
std::mutex _mutex;
|
|
std::condition_variable _cv;
|
|
Time::time_point _startTime;
|
|
Time::time_point _endTime;
|
|
std::vector<double> _latencies;
|
|
std::vector<std::vector<double>> _latency_groups;
|
|
bool enable_lat_groups;
|
|
};
|