200 lines
8.5 KiB
Python
200 lines
8.5 KiB
Python
"""
|
|
Copyright (C) 2018-2019 Intel Corporation
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
"""
|
|
|
|
from statistics import median
|
|
from openvino.inference_engine import IENetwork, IEPlugin
|
|
|
|
from .utils.benchmark_utils import *
|
|
|
|
def main(args=None):
|
|
try:
|
|
if args is None:
|
|
args = parse_args()
|
|
|
|
validate_args(args)
|
|
|
|
# --------------------------------- 1. Load Plugin for inference engine ---------------------------------
|
|
logging.info("Loading plugin")
|
|
plugin = IEPlugin(args.target_device)
|
|
|
|
config = dict()
|
|
if CPU_DEVICE_NAME in args.target_device:
|
|
if args.path_to_extension:
|
|
plugin.add_cpu_extension(args.path_to_extension)
|
|
# limit threading for CPU portion of inference
|
|
if args.number_threads is not None:
|
|
config.update({'CPU_THREADS_NUM': str(args.number_threads)})
|
|
# pin threads for CPU portion of inference
|
|
config.update({'CPU_BIND_THREAD': args.infer_threads_pinning})
|
|
# for pure CPU execution, more throughput-oriented execution via streams
|
|
if args.api_type == 'async' and CPU_DEVICE_NAME in args.target_device:
|
|
config.update({'CPU_THROUGHPUT_STREAMS': str(args.number_infer_requests)})
|
|
elif GPU_DEVICE_NAME in args.target_device:
|
|
if args.path_to_cldnn_config:
|
|
config.update({'CONFIG_FILE': args.path_to_cldnn_config})
|
|
logger.info("GPU extensions is loaded {}".format(args.path_to_cldnn_config))
|
|
elif MYRIAD_DEVICE_NAME in args.target_device:
|
|
config.update({'LOG_LEVEL': 'LOG_INFO'})
|
|
config.update({'VPU_LOG_LEVEL': 'LOG_INFO'})
|
|
|
|
plugin.set_config(config)
|
|
|
|
logger.info("Device is {}".format(plugin.device))
|
|
logger.info("Plugin version is {}".format(plugin.version))
|
|
|
|
# --------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ---------------------
|
|
logger.info("Loading network files")
|
|
|
|
xml_filename = os.path.abspath(args.path_to_model)
|
|
head, tail = os.path.splitext(xml_filename)
|
|
bin_filename = os.path.abspath(head + BIN_EXTENSION)
|
|
|
|
ie_network = IENetwork(xml_filename, bin_filename)
|
|
|
|
input_info = ie_network.inputs
|
|
|
|
if len(input_info) == 0:
|
|
raise AttributeError('No inputs info is provided')
|
|
elif len(input_info) != 1:
|
|
raise AttributeError("only one input layer network is supported")
|
|
|
|
# -------------------------------------- 3. Change network batch_size -------------------------------------
|
|
batch_size = ie_network.batch_size
|
|
key = list(input_info.keys()).pop()
|
|
precision = input_info[key].precision
|
|
|
|
if args.batch_size and args.batch_size != ie_network.batch_size:
|
|
# deepcopy input_info
|
|
shape = input_info[key].shape
|
|
# We support models having only one input layers
|
|
if input_info[key].layout != LAYOUT_TYPE:
|
|
raise Exception('Unsupported model for batch size changing in automatic mode')
|
|
shape[BATCH_SIZE_ELEM] = args.batch_size
|
|
ie_network.reshape({key: shape})
|
|
|
|
input_info = ie_network.inputs
|
|
|
|
batch_size = args.batch_size
|
|
|
|
|
|
logger_message = "Network batch size was changed to: " if args.batch_size is not None else "Network batch size: "
|
|
logger_message += " {}, precision: {}".format(batch_size, precision)
|
|
logger.info(logger_message)
|
|
|
|
# ------------------------------------- 4. Loading model to the plugin -------------------------------------
|
|
logger.info("Loading model to the plugin")
|
|
exe_network = plugin.load(ie_network, args.number_infer_requests)
|
|
|
|
# ------------------------------------ 5. Performance measurements stuff -----------------------------------
|
|
inputs = get_images(os.path.abspath(args.path_to_images), batch_size)
|
|
|
|
if batch_size < len(inputs):
|
|
logger.warn("Network batch size {} is less then images count {}"
|
|
", some input files will be ignored".format(batch_size, len(inputs)))
|
|
|
|
input_images = {key: fill_blob_with_image(inputs, input_info[key].shape)}
|
|
|
|
times = list()
|
|
duration = 0
|
|
|
|
if args.number_iterations is None:
|
|
duration = get_duration_in_secs(args.target_device)
|
|
|
|
if args.api_type == 'sync':
|
|
|
|
# warming up - out of scope
|
|
exe_network.infer(input_images)
|
|
|
|
if args.number_iterations is not None:
|
|
logger.info(
|
|
"Start inference synchronously ({}) sync inference executions".format(args.number_iterations))
|
|
for iteration in range(args.number_iterations):
|
|
sync_infer_request(exe_network, times, input_images)
|
|
|
|
else:
|
|
logger.info("Start inference synchronously ({} s duration)".format(duration))
|
|
start_time = datetime.now()
|
|
current_time = start_time
|
|
while (current_time - start_time).total_seconds() < duration:
|
|
current_time = sync_infer_request(exe_network, times, input_images)
|
|
|
|
times.sort()
|
|
latency = median(times)
|
|
fps = batch_size / latency
|
|
|
|
print("[BENCHMARK RESULT] Latency is {:.4f} msec".format(latency * 1e3))
|
|
print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps))
|
|
else:
|
|
infer_requests = exe_network.requests
|
|
|
|
if args.number_iterations is not None:
|
|
logger.info("Start inference asynchronously ({}"
|
|
" async inference executions, {} "
|
|
" inference requests in parallel".format(args.number_iterations,
|
|
args.number_infer_requests))
|
|
else:
|
|
logger.info("Start inference asynchronously ({} s duration, "
|
|
"{} inference requests in parallel)".format(duration, args.number_infer_requests))
|
|
|
|
current_inference = 0
|
|
required_inference_requests_were_executed = False
|
|
previous_inference = 1 - args.number_infer_requests
|
|
step = 0
|
|
steps_count = args.number_infer_requests - 1
|
|
if args.number_iterations is not None:
|
|
steps_count += args.number_iterations
|
|
|
|
# warming up - out of scope
|
|
infer_requests[0].async_infer(input_images)
|
|
infer_requests[0].wait()
|
|
|
|
start_time = datetime.now()
|
|
while not required_inference_requests_were_executed or step < steps_count or \
|
|
args.number_iterations is None and (datetime.now() - start_time).total_seconds() < duration:
|
|
exe_network.start_async(current_inference, input_images)
|
|
|
|
if previous_inference >= 0:
|
|
status = infer_requests[previous_inference].wait()
|
|
if status is not 0:
|
|
raise Exception("Infer request not completed successfully")
|
|
|
|
current_inference += 1
|
|
if current_inference >= args.number_infer_requests:
|
|
current_inference = 0
|
|
required_inference_requests_were_executed = True
|
|
|
|
previous_inference += 1
|
|
if previous_inference >= args.number_infer_requests:
|
|
previous_inference = 0
|
|
|
|
step += 1
|
|
|
|
# wait the latest inference executions
|
|
for not_completed_index in range(args.number_infer_requests):
|
|
if infer_requests[not_completed_index].wait(0) != 0:
|
|
infer_requests[not_completed_index].wait()
|
|
|
|
total_duration = (datetime.now() - start_time).total_seconds()
|
|
fps = batch_size * step / total_duration
|
|
|
|
print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps))
|
|
|
|
del exe_network
|
|
del plugin
|
|
|
|
except Exception as e:
|
|
logging.exception(e)
|