369 lines
19 KiB
Python
369 lines
19 KiB
Python
import os
|
|
import sys
|
|
from datetime import datetime
|
|
|
|
from openvino.tools.benchmark.benchmark import Benchmark
|
|
from openvino.tools.benchmark.parameters import parse_args
|
|
from openvino.tools.benchmark.utils.constants import MULTI_DEVICE_NAME, HETERO_DEVICE_NAME, CPU_DEVICE_NAME, \
|
|
GPU_DEVICE_NAME, MYRIAD_DEVICE_NAME, GNA_DEVICE_NAME, BLOB_EXTENSION
|
|
from openvino.tools.benchmark.utils.inputs_filling import set_inputs
|
|
from openvino.tools.benchmark.utils.logging import logger
|
|
from openvino.tools.benchmark.utils.progress_bar import ProgressBar
|
|
from openvino.tools.benchmark.utils.utils import next_step, config_network_inputs, get_number_iterations, \
|
|
process_help_inference_string, print_perf_counters, dump_exec_graph, get_duration_in_milliseconds, \
|
|
get_command_line_arguments, parse_nstreams_value_per_device, parse_devices, update_shapes, \
|
|
adjust_shapes_batch, load_config, dump_config
|
|
from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, averageCntReport, detailedCntReport
|
|
|
|
|
|
def main():
|
|
# ------------------------------ 1. Parsing and validating input arguments -------------------------------------
|
|
next_step()
|
|
run(parse_args())
|
|
|
|
|
|
def run(args):
|
|
statistics = None
|
|
try:
|
|
if args.number_streams is None:
|
|
logger.warn(" -nstreams default value is determined automatically for a device. "
|
|
"Although the automatic selection usually provides a reasonable performance, "
|
|
"but it still may be non-optimal for some cases, for more information look at README. ")
|
|
|
|
command_line_arguments = get_command_line_arguments(sys.argv)
|
|
if args.report_type:
|
|
statistics = StatisticsReport(StatisticsReport.Config(args.report_type, args.report_folder))
|
|
statistics.add_parameters(StatisticsReport.Category.COMMAND_LINE_PARAMETERS, command_line_arguments)
|
|
|
|
def is_flag_set_in_command_line(flag):
|
|
return any(x.strip('-') == flag for x, y in command_line_arguments)
|
|
|
|
device_name = args.target_device
|
|
|
|
devices = parse_devices(device_name)
|
|
device_number_streams = parse_nstreams_value_per_device(devices, args.number_streams)
|
|
|
|
config = {}
|
|
if args.load_config:
|
|
load_config(args.load_config, config)
|
|
|
|
is_network_compiled = False
|
|
_, ext = os.path.splitext(args.path_to_model)
|
|
|
|
if ext == BLOB_EXTENSION:
|
|
is_network_compiled = True
|
|
print("Network is compiled")
|
|
|
|
# ------------------------------ 2. Loading Inference Engine ---------------------------------------------------
|
|
next_step(step_id=2)
|
|
|
|
benchmark = Benchmark(args.target_device, args.number_infer_requests,
|
|
args.number_iterations, args.time, args.api_type)
|
|
|
|
## CPU (MKLDNN) extensions
|
|
if CPU_DEVICE_NAME in device_name and args.path_to_extension:
|
|
benchmark.add_extension(path_to_extension=args.path_to_extension)
|
|
|
|
## GPU (clDNN) Extensions
|
|
if GPU_DEVICE_NAME in device_name and args.path_to_cldnn_config:
|
|
if GPU_DEVICE_NAME not in config.keys():
|
|
config[GPU_DEVICE_NAME] = {}
|
|
config[GPU_DEVICE_NAME]['CONFIG_FILE'] = args.path_to_cldnn_config
|
|
|
|
if GPU_DEVICE_NAME in config.keys() and 'CONFIG_FILE' in config[GPU_DEVICE_NAME].keys():
|
|
cldnn_config = config[GPU_DEVICE_NAME]['CONFIG_FILE']
|
|
benchmark.add_extension(path_to_cldnn_config=cldnn_config)
|
|
|
|
version = benchmark.get_version_info()
|
|
|
|
logger.info(version)
|
|
|
|
# --------------------- 3. Setting device configuration --------------------------------------------------------
|
|
next_step()
|
|
|
|
perf_counts = False
|
|
for device in devices:
|
|
if device not in config.keys():
|
|
config[device] = {}
|
|
## Set performance counter
|
|
if is_flag_set_in_command_line('pc'):
|
|
## set to user defined value
|
|
config[device]['PERF_COUNT'] = 'YES' if args.perf_counts else 'NO'
|
|
elif 'PERF_COUNT' in config[device].keys() and config[device]['PERF_COUNT'] == 'YES':
|
|
logger.warn("Performance counters for {} device is turned on. ".format(device) +
|
|
"To print results use -pc option.")
|
|
elif args.report_type in [ averageCntReport, detailedCntReport ]:
|
|
logger.warn("Turn on performance counters for {} device ".format(device) +
|
|
"since report type is {}.".format(args.report_type))
|
|
config[device]['PERF_COUNT'] = 'YES'
|
|
elif args.exec_graph_path is not None:
|
|
logger.warn("Turn on performance counters for {} device ".format(device) +
|
|
"due to execution graph dumping.")
|
|
config[device]['PERF_COUNT'] = 'YES'
|
|
else:
|
|
## set to default value
|
|
config[device]['PERF_COUNT'] = 'YES' if args.perf_counts else 'NO'
|
|
perf_counts = True if config[device]['PERF_COUNT'] == 'YES' else perf_counts
|
|
|
|
def set_throughput_streams():
|
|
key = device + "_THROUGHPUT_STREAMS"
|
|
if device in device_number_streams.keys():
|
|
## set to user defined value
|
|
supported_config_keys = benchmark.ie.get_metric(device, 'SUPPORTED_CONFIG_KEYS')
|
|
if key not in supported_config_keys:
|
|
raise Exception("Device {} doesn't support config key '{}'! ".format(device, key) +
|
|
"Please specify -nstreams for correct devices in format <dev1>:<nstreams1>,<dev2>:<nstreams2>")
|
|
config[device][key] = device_number_streams[device]
|
|
elif key not in config[device].keys() and args.api_type == "async":
|
|
logger.warn("-nstreams default value is determined automatically for {} device. ".format(device) +
|
|
"Although the automatic selection usually provides a reasonable performance,"
|
|
"but it still may be non-optimal for some cases, for more information look at README.")
|
|
config[device][key] = device + "_THROUGHPUT_AUTO"
|
|
if key in config[device].keys():
|
|
device_number_streams[device] = config[device][key]
|
|
|
|
if device == CPU_DEVICE_NAME: # CPU supports few special performance-oriented keys
|
|
# limit threading for CPU portion of inference
|
|
if args.number_threads and is_flag_set_in_command_line("nthreads"):
|
|
config[device]['CPU_THREADS_NUM'] = str(args.number_threads)
|
|
|
|
if is_flag_set_in_command_line("enforcebf16") or is_flag_set_in_command_line("enforce_bfloat16"):
|
|
config[device]['ENFORCE_BF16'] = 'YES' if args.enforce_bfloat16 else 'NO'
|
|
|
|
if is_flag_set_in_command_line('pin'):
|
|
## set to user defined value
|
|
config[device]['CPU_BIND_THREAD'] = args.infer_threads_pinning
|
|
elif 'CPU_BIND_THREAD' not in config[device].keys():
|
|
if MULTI_DEVICE_NAME in device_name and GPU_DEVICE_NAME in device_name:
|
|
logger.warn("Turn off threads pinning for {}".format(device) +
|
|
"device since multi-scenario with GPU device is used.")
|
|
config[device]['CPU_BIND_THREAD'] = 'NO'
|
|
else:
|
|
## set to default value
|
|
config[device]['CPU_BIND_THREAD'] = args.infer_threads_pinning
|
|
|
|
## for CPU execution, more throughput-oriented execution via streams
|
|
set_throughput_streams()
|
|
elif device == GPU_DEVICE_NAME:
|
|
## for GPU execution, more throughput-oriented execution via streams
|
|
set_throughput_streams()
|
|
|
|
if MULTI_DEVICE_NAME in device_name and CPU_DEVICE_NAME in device_name:
|
|
logger.warn("Turn on GPU trottling. Multi-device execution with the CPU + GPU performs best with GPU trottling hint, " +
|
|
"which releases another CPU thread (that is otherwise used by the GPU driver for active polling)")
|
|
config[device]['CLDNN_PLUGIN_THROTTLE'] = '1'
|
|
elif device == MYRIAD_DEVICE_NAME:
|
|
config[device]['LOG_LEVEL'] = 'LOG_INFO'
|
|
elif device == GNA_DEVICE_NAME:
|
|
if is_flag_set_in_command_line('qb'):
|
|
if args.qb == 8:
|
|
config[device]['GNA_PRECISION'] = 'I8'
|
|
else:
|
|
config[device]['GNA_PRECISION'] = 'I16'
|
|
if args.number_threads and is_flag_set_in_command_line("nthreads"):
|
|
config[device]['GNA_LIB_N_THREADS'] = str(args.number_threads)
|
|
perf_counts = perf_counts
|
|
|
|
benchmark.set_config(config)
|
|
batch_size = args.batch_size
|
|
if not is_network_compiled:
|
|
# --------------------- 4. Read the Intermediate Representation of the network -----------------------------
|
|
next_step()
|
|
|
|
start_time = datetime.utcnow()
|
|
ie_network = benchmark.read_network(args.path_to_model)
|
|
duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
|
|
logger.info("Read network took {} ms".format(duration_ms))
|
|
if statistics:
|
|
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
|
[
|
|
('read network time (ms)', duration_ms)
|
|
])
|
|
|
|
# --------------------- 5. Resizing network to match image sizes and given batch ---------------------------
|
|
next_step()
|
|
|
|
shapes = {k: v.input_data.shape.copy() for k, v in ie_network.input_info.items()}
|
|
reshape = False
|
|
if args.shape:
|
|
reshape |= update_shapes(shapes, args.shape, ie_network.input_info)
|
|
if args.batch_size and args.batch_size != ie_network.batch_size:
|
|
reshape |= adjust_shapes_batch(shapes, args.batch_size, ie_network.input_info)
|
|
|
|
if reshape:
|
|
start_time = datetime.utcnow()
|
|
logger.info(
|
|
'Reshaping network: {}'.format(', '.join("'{}': {}".format(k, v) for k, v in shapes.items())))
|
|
ie_network.reshape(shapes)
|
|
duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
|
|
logger.info("Reshape network took {} ms".format(duration_ms))
|
|
if statistics:
|
|
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
|
[
|
|
('reshape network time (ms)', duration_ms)
|
|
])
|
|
|
|
batch_size = ie_network.batch_size
|
|
logger.info('Network batch size: {}'.format(ie_network.batch_size))
|
|
|
|
# --------------------- 6. Configuring input of the model --------------------------------------------------
|
|
next_step()
|
|
|
|
config_network_inputs(ie_network)
|
|
|
|
# --------------------- 7. Loading the model to the device -------------------------------------------------
|
|
next_step()
|
|
|
|
start_time = datetime.utcnow()
|
|
exe_network = benchmark.load_network(ie_network)
|
|
duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
|
|
logger.info("Load network took {} ms".format(duration_ms))
|
|
if statistics:
|
|
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
|
[
|
|
('load network time (ms)', duration_ms)
|
|
])
|
|
else:
|
|
next_step()
|
|
print("Skipping the step for compiled network")
|
|
next_step()
|
|
print("Skipping the step for compiled network")
|
|
next_step()
|
|
print("Skipping the step for compiled network")
|
|
|
|
# --------------------- 7. Loading the model to the device -------------------------------------------------
|
|
next_step()
|
|
|
|
start_time = datetime.utcnow()
|
|
exe_network = benchmark.import_network(args.path_to_model)
|
|
duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
|
|
logger.info("Import network took {} ms".format(duration_ms))
|
|
if statistics:
|
|
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
|
[
|
|
('import network time (ms)', duration_ms)
|
|
])
|
|
if batch_size == 0:
|
|
batch_size = 1
|
|
|
|
# --------------------- 8. Setting optimal runtime parameters --------------------------------------------------
|
|
next_step()
|
|
|
|
# Update number of streams
|
|
for device in device_number_streams.keys():
|
|
key = device + '_THROUGHPUT_STREAMS'
|
|
device_number_streams[device] = benchmark.ie.get_config(device, key)
|
|
|
|
# Number of requests
|
|
infer_requests = exe_network.requests
|
|
|
|
# Iteration limit
|
|
benchmark.niter = get_number_iterations(benchmark.niter, benchmark.nireq, args.api_type)
|
|
|
|
# ------------------------------------ 9. Creating infer requests and filling input blobs ----------------------
|
|
next_step()
|
|
|
|
paths_to_input = list()
|
|
if args.paths_to_input:
|
|
for path in args.paths_to_input:
|
|
paths_to_input.append(os.path.abspath(*path) if args.paths_to_input else None)
|
|
set_inputs(paths_to_input, batch_size, exe_network.input_info, infer_requests)
|
|
|
|
if statistics:
|
|
statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG,
|
|
[
|
|
('topology', ie_network.name),
|
|
('target device', device_name),
|
|
('API', args.api_type),
|
|
('precision', "UNSPECIFIED"),
|
|
('batch size', str(batch_size)),
|
|
('number of iterations', str(benchmark.niter) if benchmark.niter else "0"),
|
|
('number of parallel infer requests', str(benchmark.nireq)),
|
|
('duration (ms)', str(get_duration_in_milliseconds(benchmark.duration_seconds))),
|
|
])
|
|
|
|
for nstreams in device_number_streams.items():
|
|
statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG,
|
|
[
|
|
("number of {} streams".format(nstreams[0]), str(nstreams[1])),
|
|
])
|
|
|
|
# ------------------------------------ 10. Measuring performance -----------------------------------------------
|
|
|
|
output_string = process_help_inference_string(benchmark)
|
|
|
|
next_step(additional_info=output_string)
|
|
progress_bar_total_count = 10000
|
|
if benchmark.niter and not benchmark.duration_seconds:
|
|
progress_bar_total_count = benchmark.niter
|
|
|
|
progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress) if args.progress else None
|
|
|
|
duration_ms = "{:.2f}".format(benchmark.first_infer(exe_network))
|
|
logger.info("First inference took {} ms".format(duration_ms))
|
|
if statistics:
|
|
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
|
[
|
|
('first inference time (ms)', duration_ms)
|
|
])
|
|
fps, latency_ms, total_duration_sec, iteration = benchmark.infer(exe_network, batch_size, progress_bar)
|
|
|
|
# ------------------------------------ 11. Dumping statistics report -------------------------------------------
|
|
next_step()
|
|
|
|
if args.dump_config:
|
|
dump_config(args.dump_config, config)
|
|
logger.info("Inference Engine configuration settings were dumped to {}".format(args.dump_config))
|
|
|
|
if args.exec_graph_path:
|
|
dump_exec_graph(exe_network, args.exec_graph_path)
|
|
|
|
if perf_counts:
|
|
perfs_count_list = []
|
|
for ni in range(int(benchmark.nireq)):
|
|
perfs_count_list.append(exe_network.requests[ni].get_perf_counts())
|
|
if args.perf_counts:
|
|
print_perf_counters(perfs_count_list)
|
|
if statistics:
|
|
statistics.dump_performance_counters(perfs_count_list)
|
|
|
|
if statistics:
|
|
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
|
[
|
|
('total execution time (ms)', '{:.2f}'.format(get_duration_in_milliseconds(total_duration_sec))),
|
|
('total number of iterations', str(iteration)),
|
|
])
|
|
if MULTI_DEVICE_NAME not in device_name:
|
|
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
|
[
|
|
('latency (ms)', '{:.2f}'.format(latency_ms)),
|
|
])
|
|
|
|
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
|
[
|
|
('throughput', '{:.2f}'.format(fps)),
|
|
])
|
|
|
|
if statistics:
|
|
statistics.dump()
|
|
|
|
print('Count: {} iterations'.format(iteration))
|
|
print('Duration: {:.2f} ms'.format(get_duration_in_milliseconds(total_duration_sec)))
|
|
if MULTI_DEVICE_NAME not in device_name:
|
|
print('Latency: {:.2f} ms'.format(latency_ms))
|
|
print('Throughput: {:.2f} FPS'.format(fps))
|
|
|
|
del exe_network
|
|
|
|
next_step.step_id = 0
|
|
except Exception as e:
|
|
logger.exception(e)
|
|
|
|
if statistics:
|
|
statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
|
|
[
|
|
('error', str(e)),
|
|
])
|
|
statistics.dump()
|
|
sys.exit(1)
|