enable first_inference_latency (first_inference+time_to_inference) (#14829)

* add time_to_first_inference_result KPI as sum of inf latency values

* append unit time after the value

tested locally, worked

* Revert "append unit time after the value"

This reverts commit 428b8cafdc.

* remove metric

* revert 42dd271c3b

* enable { SCOPED_TIMER(first_inference_latency); }

* enable first_inference_latency

tested locally, it works

* remove trailing whitespace

Co-authored-by: Daria Ilina <daria.krupnova@intel.com>
This commit is contained in:
Albert Angles 2023-01-23 17:48:07 +01:00 committed by GitHub
parent e2a1bd78a4
commit 711e2cbab0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 85 additions and 80 deletions

View File

@ -55,7 +55,6 @@ def aggregate_stats(stats: dict):
"stdev": statistics.stdev(duration_list) if len(duration_list) > 1 else 0}
for step_name, duration_list in stats.items()}
def prepare_executable_cmd(args: dict):
"""Generate common part of cmd from arguments to execute"""
return [

View File

@ -28,50 +28,53 @@ int runPipeline(const std::string &model, const std::string &device, const bool
// first_inference_latency = time_to_inference + first_inference
{
SCOPED_TIMER(time_to_inference);
SCOPED_TIMER(first_inference_latency);
{
SCOPED_TIMER(load_plugin);
TimeTest::setPerformanceConfig(ie, device);
ie.GetVersions(device);
SCOPED_TIMER(time_to_inference);
{
SCOPED_TIMER(load_plugin);
TimeTest::setPerformanceConfig(ie, device);
ie.GetVersions(device);
if (isCacheEnabled)
ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "models_cache"}});
}
{
SCOPED_TIMER(create_exenetwork);
if (!isCacheEnabled) {
if (TimeTest::fileExt(model) == "blob") {
SCOPED_TIMER(import_network);
exeNetwork = ie.ImportNetwork(model, device);
if (isCacheEnabled)
ie.SetConfig({ {CONFIG_KEY(CACHE_DIR), "models_cache"} });
}
{
SCOPED_TIMER(create_exenetwork);
if (!isCacheEnabled) {
if (TimeTest::fileExt(model) == "blob") {
SCOPED_TIMER(import_network);
exeNetwork = ie.ImportNetwork(model, device);
}
else {
{
SCOPED_TIMER(read_network);
cnnNetwork = ie.ReadNetwork(model);
batchSize = cnnNetwork.getBatchSize();
}
{
SCOPED_TIMER(load_network);
exeNetwork = ie.LoadNetwork(cnnNetwork, device);
}
}
}
else {
{
SCOPED_TIMER(read_network);
cnnNetwork = ie.ReadNetwork(model);
batchSize = cnnNetwork.getBatchSize();
}
{
SCOPED_TIMER(load_network);
exeNetwork = ie.LoadNetwork(cnnNetwork, device);
}
SCOPED_TIMER(load_network_cache);
exeNetwork = ie.LoadNetwork(model, device);
}
}
else {
SCOPED_TIMER(load_network_cache);
exeNetwork = ie.LoadNetwork(model, device);
}
inferRequest = exeNetwork.CreateInferRequest();
}
inferRequest = exeNetwork.CreateInferRequest();
}
{
SCOPED_TIMER(first_inference);
{
SCOPED_TIMER(fill_inputs);
const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
batchSize = batchSize != 0 ? batchSize : 1;
fillBlobs(inferRequest, inputsInfo, batchSize);
SCOPED_TIMER(first_inference);
{
SCOPED_TIMER(fill_inputs);
const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
batchSize = batchSize != 0 ? batchSize : 1;
fillBlobs(inferRequest, inputsInfo, batchSize);
}
inferRequest.Infer();
}
inferRequest.Infer();
}
};

View File

@ -36,62 +36,65 @@ int runPipeline(const std::string &model, const std::string &device, const bool
// first_inference_latency = time_to_inference + first_inference
{
SCOPED_TIMER(time_to_inference);
SCOPED_TIMER(first_inference_latency);
{
SCOPED_TIMER(load_plugin);
TimeTest::setPerformanceConfig(ie, device);
ie.get_versions(device);
SCOPED_TIMER(time_to_inference);
{
SCOPED_TIMER(load_plugin);
TimeTest::setPerformanceConfig(ie, device);
ie.get_versions(device);
if (isCacheEnabled)
ie.set_property({{CONFIG_KEY(CACHE_DIR), "models_cache"}});
}
{
SCOPED_TIMER(create_exenetwork);
if (!isCacheEnabled) {
if (TimeTest::fileExt(model) == "blob") {
SCOPED_TIMER(import_network);
std::ifstream streamModel{model};
exeNetwork = ie.import_model(streamModel, device);
}
else {
{
SCOPED_TIMER(read_network);
cnnNetwork = ie.read_model(model);
if (isCacheEnabled)
ie.set_property({{CONFIG_KEY(CACHE_DIR), "models_cache"}});
}
{
SCOPED_TIMER(create_exenetwork);
if (!isCacheEnabled) {
if (TimeTest::fileExt(model) == "blob") {
SCOPED_TIMER(import_network);
std::ifstream streamModel{model};
exeNetwork = ie.import_model(streamModel, device);
}
if (reshape) {
else {
{
SCOPED_TIMER(reshape);
defaultInputs = getCopyOfDefaultInputs(cnnNetwork->inputs());
cnnNetwork->reshape(reshapeShapes);
SCOPED_TIMER(read_network);
cnnNetwork = ie.read_model(model);
}
if (reshape) {
{
SCOPED_TIMER(reshape);
defaultInputs = getCopyOfDefaultInputs(cnnNetwork->inputs());
cnnNetwork->reshape(reshapeShapes);
}
}
{
SCOPED_TIMER(load_network);
exeNetwork = ie.compile_model(cnnNetwork, device);
}
}
{
SCOPED_TIMER(load_network);
exeNetwork = ie.compile_model(cnnNetwork, device);
}
}
else {
SCOPED_TIMER(load_network_cache);
exeNetwork = ie.compile_model(model, device);
}
}
else {
SCOPED_TIMER(load_network_cache);
exeNetwork = ie.compile_model(model, device);
}
inferRequest = exeNetwork.create_infer_request();
}
inferRequest = exeNetwork.create_infer_request();
}
{
SCOPED_TIMER(first_inference);
{
SCOPED_TIMER(fill_inputs);
std::vector<ov::Output<const ov::Node>> inputs = exeNetwork.inputs();
if (reshape && dataShapes.empty()) {
fillTensors(inferRequest, defaultInputs);
} else if (reshape && !dataShapes.empty()) {
fillTensorsWithSpecifiedShape(inferRequest, inputs, dataShapes);
} else {
fillTensors(inferRequest, inputs);
SCOPED_TIMER(first_inference);
{
SCOPED_TIMER(fill_inputs);
std::vector<ov::Output<const ov::Node>> inputs = exeNetwork.inputs();
if (reshape && dataShapes.empty()) {
fillTensors(inferRequest, defaultInputs);
} else if (reshape && !dataShapes.empty()) {
fillTensorsWithSpecifiedShape(inferRequest, inputs, dataShapes);
} else {
fillTensors(inferRequest, inputs);
}
}
inferRequest.infer();
}
inferRequest.infer();
}
};