Files
openvino/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/benchmark.py
2019-04-12 18:25:53 +03:00

200 lines
8.5 KiB
Python

"""
Copyright (C) 2018-2019 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from statistics import median
from openvino.inference_engine import IENetwork, IEPlugin
from .utils.benchmark_utils import *
def main(args=None):
try:
if args is None:
args = parse_args()
validate_args(args)
# --------------------------------- 1. Load Plugin for inference engine ---------------------------------
logging.info("Loading plugin")
plugin = IEPlugin(args.target_device)
config = dict()
if CPU_DEVICE_NAME in args.target_device:
if args.path_to_extension:
plugin.add_cpu_extension(args.path_to_extension)
# limit threading for CPU portion of inference
if args.number_threads is not None:
config.update({'CPU_THREADS_NUM': str(args.number_threads)})
# pin threads for CPU portion of inference
config.update({'CPU_BIND_THREAD': args.infer_threads_pinning})
# for pure CPU execution, more throughput-oriented execution via streams
if args.api_type == 'async' and CPU_DEVICE_NAME in args.target_device:
config.update({'CPU_THROUGHPUT_STREAMS': str(args.number_infer_requests)})
elif GPU_DEVICE_NAME in args.target_device:
if args.path_to_cldnn_config:
config.update({'CONFIG_FILE': args.path_to_cldnn_config})
logger.info("GPU extensions is loaded {}".format(args.path_to_cldnn_config))
elif MYRIAD_DEVICE_NAME in args.target_device:
config.update({'LOG_LEVEL': 'LOG_INFO'})
config.update({'VPU_LOG_LEVEL': 'LOG_INFO'})
plugin.set_config(config)
logger.info("Device is {}".format(plugin.device))
logger.info("Plugin version is {}".format(plugin.version))
# --------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ---------------------
logger.info("Loading network files")
xml_filename = os.path.abspath(args.path_to_model)
head, tail = os.path.splitext(xml_filename)
bin_filename = os.path.abspath(head + BIN_EXTENSION)
ie_network = IENetwork(xml_filename, bin_filename)
input_info = ie_network.inputs
if len(input_info) == 0:
raise AttributeError('No inputs info is provided')
elif len(input_info) != 1:
raise AttributeError("only one input layer network is supported")
# -------------------------------------- 3. Change network batch_size -------------------------------------
batch_size = ie_network.batch_size
key = list(input_info.keys()).pop()
precision = input_info[key].precision
if args.batch_size and args.batch_size != ie_network.batch_size:
# deepcopy input_info
shape = input_info[key].shape
# We support models having only one input layers
if input_info[key].layout != LAYOUT_TYPE:
raise Exception('Unsupported model for batch size changing in automatic mode')
shape[BATCH_SIZE_ELEM] = args.batch_size
ie_network.reshape({key: shape})
input_info = ie_network.inputs
batch_size = args.batch_size
logger_message = "Network batch size was changed to: " if args.batch_size is not None else "Network batch size: "
logger_message += " {}, precision: {}".format(batch_size, precision)
logger.info(logger_message)
# ------------------------------------- 4. Loading model to the plugin -------------------------------------
logger.info("Loading model to the plugin")
exe_network = plugin.load(ie_network, args.number_infer_requests)
# ------------------------------------ 5. Performance measurements stuff -----------------------------------
inputs = get_images(os.path.abspath(args.path_to_images), batch_size)
if batch_size < len(inputs):
logger.warn("Network batch size {} is less then images count {}"
", some input files will be ignored".format(batch_size, len(inputs)))
input_images = {key: fill_blob_with_image(inputs, input_info[key].shape)}
times = list()
duration = 0
if args.number_iterations is None:
duration = get_duration_in_secs(args.target_device)
if args.api_type == 'sync':
# warming up - out of scope
exe_network.infer(input_images)
if args.number_iterations is not None:
logger.info(
"Start inference synchronously ({}) sync inference executions".format(args.number_iterations))
for iteration in range(args.number_iterations):
sync_infer_request(exe_network, times, input_images)
else:
logger.info("Start inference synchronously ({} s duration)".format(duration))
start_time = datetime.now()
current_time = start_time
while (current_time - start_time).total_seconds() < duration:
current_time = sync_infer_request(exe_network, times, input_images)
times.sort()
latency = median(times)
fps = batch_size / latency
print("[BENCHMARK RESULT] Latency is {:.4f} msec".format(latency * 1e3))
print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps))
else:
infer_requests = exe_network.requests
if args.number_iterations is not None:
logger.info("Start inference asynchronously ({}"
" async inference executions, {} "
" inference requests in parallel".format(args.number_iterations,
args.number_infer_requests))
else:
logger.info("Start inference asynchronously ({} s duration, "
"{} inference requests in parallel)".format(duration, args.number_infer_requests))
current_inference = 0
required_inference_requests_were_executed = False
previous_inference = 1 - args.number_infer_requests
step = 0
steps_count = args.number_infer_requests - 1
if args.number_iterations is not None:
steps_count += args.number_iterations
# warming up - out of scope
infer_requests[0].async_infer(input_images)
infer_requests[0].wait()
start_time = datetime.now()
while not required_inference_requests_were_executed or step < steps_count or \
args.number_iterations is None and (datetime.now() - start_time).total_seconds() < duration:
exe_network.start_async(current_inference, input_images)
if previous_inference >= 0:
status = infer_requests[previous_inference].wait()
if status is not 0:
raise Exception("Infer request not completed successfully")
current_inference += 1
if current_inference >= args.number_infer_requests:
current_inference = 0
required_inference_requests_were_executed = True
previous_inference += 1
if previous_inference >= args.number_infer_requests:
previous_inference = 0
step += 1
# wait the latest inference executions
for not_completed_index in range(args.number_infer_requests):
if infer_requests[not_completed_index].wait(0) != 0:
infer_requests[not_completed_index].wait()
total_duration = (datetime.now() - start_time).total_seconds()
fps = batch_size * step / total_duration
print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps))
del exe_network
del plugin
except Exception as e:
logging.exception(e)