Add benchmark samples (#13388)
* Add sync_bnehcmark
* Fix Unix comilation
* niter->time
* Explain main loop
* samples: factor out common
* Code style
* clang-format -i
* return 0; -> return EXIT_SUCCESS;, +x
* Update throughput_benchmark
* Add READMEs
* Fix READMEs refs
* Add sync_benchmark.py
* Add niter, infer_new_request, -pc
* from datetime import timedelta
* Fix niter and seconds_to_run
* Add disclaimer about benchmark_app performance
* Update samples/cpp/benchmark/sync_benchmark/README.md
* Add dynamic_shape_bert_benhcmark
* Add dynamic_shape_detection_benchmark
* Adopt for detr-resnet50
* Remove sync_benchmark2, throughput_benchmark2, perf counters
* clang-format -i
* Fix flake8
* Add README.md
* Add links to sample_dynamic_shape_bert_benchmark
* Add softmax
* nameless LatencyMetrics
* parent.parent -> parents[2]
* Add bert_benhcmark sample
* Code style
* Add bert_benhcmark/README.md
* rm -r samples/python/benchmark/dynamic_shape_bert_benhcmark/
* rm -r samples/cpp/benchmark/dynamic_shape_detection_benchmark/
* bert_benhcmark/README.md: remove dynamic shape
* Remove add_subdirectory(dynamic_shape_detection_benchmark)
* flake8
* samples: Add a note about CUMULATIVE_THROUGHPUT, don’t expect get_property() to throw, don’t introduce json dependency for samples/cpp/common
* / namespace
* Add article
* namespace -> static
* Update README, seconds_ro_run 10, niter 10, no inter alinment
* percentile->median
* benchmark samples: use generate(), align logs, update READMEs
* benchmakr samples: remove percentile()
* samples/python/benchmark/bert_benhcmark/bert_benhcmark.py: report average sequence length and processing time
* Python samples: move requirements.txt to every sample
* Remove numpy from requirements.txt
* Remove Building section from Python samples, install only required extras from openvino-dev, set up environment for bert_benhcmark, report duration for bert_benhcmark
* Install openvino-dev for Hello Reshape SSD C++ Sample
2022-12-05 15:12:53 +04:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
# Copyright (C) 2022 Intel Corporation
|
|
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
|
|
|
|
|
import logging as log
|
|
|
|
|
import statistics
|
|
|
|
|
import sys
|
|
|
|
|
from time import perf_counter
|
|
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
from openvino.runtime import Core, get_version
|
|
|
|
|
from openvino.runtime.utils.types import get_dtype
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fill_tensor_random(tensor):
|
|
|
|
|
dtype = get_dtype(tensor.element_type)
|
|
|
|
|
rand_min, rand_max = (0, 1) if dtype == bool else (np.iinfo(np.uint8).min, np.iinfo(np.uint8).max)
|
|
|
|
|
# np.random.uniform excludes high: add 1 to have it generated
|
|
|
|
|
if np.dtype(dtype).kind in ['i', 'u', 'b']:
|
|
|
|
|
rand_max += 1
|
|
|
|
|
rs = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(0)))
|
|
|
|
|
if 0 == tensor.get_size():
|
|
|
|
|
raise RuntimeError("Models with dynamic shapes aren't supported. Input tensors must have specific shapes before inference")
|
|
|
|
|
tensor.data[:] = rs.uniform(rand_min, rand_max, list(tensor.shape)).astype(dtype)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout)
|
|
|
|
|
log.info('OpenVINO:')
|
|
|
|
|
log.info(f"{'Build ':.<39} {get_version()}")
|
|
|
|
|
if len(sys.argv) != 2:
|
|
|
|
|
log.info(f'Usage: {sys.argv[0]} <path_to_model>')
|
|
|
|
|
return 1
|
|
|
|
|
# Optimize for latency. Most of the devices are configured for latency by default,
|
2023-01-18 15:19:44 +04:00
|
|
|
# but there are exceptions like GNA
|
Add benchmark samples (#13388)
* Add sync_bnehcmark
* Fix Unix comilation
* niter->time
* Explain main loop
* samples: factor out common
* Code style
* clang-format -i
* return 0; -> return EXIT_SUCCESS;, +x
* Update throughput_benchmark
* Add READMEs
* Fix READMEs refs
* Add sync_benchmark.py
* Add niter, infer_new_request, -pc
* from datetime import timedelta
* Fix niter and seconds_to_run
* Add disclaimer about benchmark_app performance
* Update samples/cpp/benchmark/sync_benchmark/README.md
* Add dynamic_shape_bert_benhcmark
* Add dynamic_shape_detection_benchmark
* Adopt for detr-resnet50
* Remove sync_benchmark2, throughput_benchmark2, perf counters
* clang-format -i
* Fix flake8
* Add README.md
* Add links to sample_dynamic_shape_bert_benchmark
* Add softmax
* nameless LatencyMetrics
* parent.parent -> parents[2]
* Add bert_benhcmark sample
* Code style
* Add bert_benhcmark/README.md
* rm -r samples/python/benchmark/dynamic_shape_bert_benhcmark/
* rm -r samples/cpp/benchmark/dynamic_shape_detection_benchmark/
* bert_benhcmark/README.md: remove dynamic shape
* Remove add_subdirectory(dynamic_shape_detection_benchmark)
* flake8
* samples: Add a note about CUMULATIVE_THROUGHPUT, don’t expect get_property() to throw, don’t introduce json dependency for samples/cpp/common
* / namespace
* Add article
* namespace -> static
* Update README, seconds_ro_run 10, niter 10, no inter alinment
* percentile->median
* benchmark samples: use generate(), align logs, update READMEs
* benchmakr samples: remove percentile()
* samples/python/benchmark/bert_benhcmark/bert_benhcmark.py: report average sequence length and processing time
* Python samples: move requirements.txt to every sample
* Remove numpy from requirements.txt
* Remove Building section from Python samples, install only required extras from openvino-dev, set up environment for bert_benhcmark, report duration for bert_benhcmark
* Install openvino-dev for Hello Reshape SSD C++ Sample
2022-12-05 15:12:53 +04:00
|
|
|
latency = {'PERFORMANCE_HINT': 'LATENCY'}
|
|
|
|
|
|
|
|
|
|
# Create Core and use it to compile a model.
|
|
|
|
|
# Pick a device by replacing CPU, for example AUTO:GPU,CPU.
|
|
|
|
|
# Using MULTI device is pointless in sync scenario
|
|
|
|
|
# because only one instance of openvino.runtime.InferRequest is used
|
|
|
|
|
core = Core()
|
|
|
|
|
compiled_model = core.compile_model(sys.argv[1], 'CPU', latency)
|
|
|
|
|
ireq = compiled_model.create_infer_request()
|
|
|
|
|
# Fill input data for the ireq
|
|
|
|
|
for model_input in compiled_model.inputs:
|
|
|
|
|
fill_tensor_random(ireq.get_tensor(model_input))
|
|
|
|
|
# Warm up
|
|
|
|
|
ireq.infer()
|
|
|
|
|
# Benchmark for seconds_to_run seconds and at least niter iterations
|
|
|
|
|
seconds_to_run = 10
|
|
|
|
|
niter = 10
|
|
|
|
|
latencies = []
|
|
|
|
|
start = perf_counter()
|
|
|
|
|
time_point = start
|
|
|
|
|
time_point_to_finish = start + seconds_to_run
|
|
|
|
|
while time_point < time_point_to_finish or len(latencies) < niter:
|
|
|
|
|
ireq.infer()
|
|
|
|
|
iter_end = perf_counter()
|
|
|
|
|
latencies.append((iter_end - time_point) * 1e3)
|
|
|
|
|
time_point = iter_end
|
|
|
|
|
end = time_point
|
|
|
|
|
duration = end - start
|
|
|
|
|
# Report results
|
|
|
|
|
fps = len(latencies) / duration
|
|
|
|
|
log.info(f'Count: {len(latencies)} iterations')
|
|
|
|
|
log.info(f'Duration: {duration * 1e3:.2f} ms')
|
|
|
|
|
log.info('Latency:')
|
|
|
|
|
log.info(f' Median: {statistics.median(latencies):.2f} ms')
|
|
|
|
|
log.info(f' Average: {sum(latencies) / len(latencies):.2f} ms')
|
|
|
|
|
log.info(f' Min: {min(latencies):.2f} ms')
|
|
|
|
|
log.info(f' Max: {max(latencies):.2f} ms')
|
|
|
|
|
log.info(f'Throughput: {fps:.2f} FPS')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
main()
|