openvino/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/benchmark.py

"""
 Copyright (C) 2018-2019 Intel Corporation

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

      http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
"""

from statistics import median
from openvino.inference_engine import IENetwork, IEPlugin

from .utils.benchmark_utils import *

def main(args=None):
    try:
        if args is None:
            args = parse_args()

        validate_args(args)

        # --------------------------------- 1. Load Plugin for inference engine ---------------------------------
        logging.info("Loading plugin")
        plugin = IEPlugin(args.target_device)

        config = dict()
        if CPU_DEVICE_NAME in args.target_device:
            if args.path_to_extension:
                plugin.add_cpu_extension(args.path_to_extension)
            # limit threading for CPU portion of inference
            if args.number_threads is not None:
                config.update({'CPU_THREADS_NUM': str(args.number_threads)})
            # pin threads for CPU portion of inference
            config.update({'CPU_BIND_THREAD': args.infer_threads_pinning})
            # for pure CPU execution, more throughput-oriented execution via streams
            if args.api_type == 'async' and CPU_DEVICE_NAME in args.target_device:
                config.update({'CPU_THROUGHPUT_STREAMS': str(args.number_infer_requests)})
        elif GPU_DEVICE_NAME in args.target_device:
            if args.path_to_cldnn_config:
                config.update({'CONFIG_FILE': args.path_to_cldnn_config})
                logger.info("GPU extensions is loaded {}".format(args.path_to_cldnn_config))
        elif MYRIAD_DEVICE_NAME in args.target_device:
            config.update({'LOG_LEVEL': 'LOG_INFO'})
            config.update({'VPU_LOG_LEVEL': 'LOG_INFO'})

        plugin.set_config(config)

        logger.info("Device is {}".format(plugin.device))
        logger.info("Plugin version is {}".format(plugin.version))

        # --------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ---------------------
        logger.info("Loading network files")

        xml_filename = os.path.abspath(args.path_to_model)
        head, tail = os.path.splitext(xml_filename)
        bin_filename = os.path.abspath(head + BIN_EXTENSION)

        ie_network = IENetwork(xml_filename, bin_filename)

        input_info = ie_network.inputs

        if len(input_info) == 0:
            raise AttributeError('No inputs info is provided')
        elif len(input_info) != 1:
            raise AttributeError("only one input layer network is supported")

        # -------------------------------------- 3. Change network batch_size  -------------------------------------
        batch_size = ie_network.batch_size
        key = list(input_info.keys()).pop()
        precision = input_info[key].precision

        if args.batch_size and args.batch_size != ie_network.batch_size:
            # deepcopy input_info
            shape = input_info[key].shape
            # We support models having only one input layers
            if input_info[key].layout != LAYOUT_TYPE:
                raise Exception('Unsupported model for batch size changing in automatic mode')
            shape[BATCH_SIZE_ELEM] = args.batch_size
            ie_network.reshape({key: shape})

            input_info = ie_network.inputs

            batch_size = args.batch_size


        logger_message = "Network batch size was changed to: " if args.batch_size is not None else "Network batch size: "
        logger_message += " {}, precision: {}".format(batch_size, precision)
        logger.info(logger_message)

        # ------------------------------------- 4. Loading model to the plugin -------------------------------------
        logger.info("Loading model to the plugin")
        exe_network = plugin.load(ie_network, args.number_infer_requests)

        # ------------------------------------ 5. Performance measurements stuff -----------------------------------
        inputs = get_images(os.path.abspath(args.path_to_images), batch_size)

        if batch_size < len(inputs):
            logger.warn("Network batch size {} is less then images count  {}"
                        ", some input files will be ignored".format(batch_size, len(inputs)))

        input_images = {key: fill_blob_with_image(inputs, input_info[key].shape)}

        times = list()
        duration = 0

        if args.number_iterations is None:
            duration = get_duration_in_secs(args.target_device)

        if args.api_type == 'sync':

            # warming up - out of scope
            exe_network.infer(input_images)

            if args.number_iterations is not None:
                logger.info(
                    "Start inference synchronously ({}) sync inference executions".format(args.number_iterations))
                for iteration in range(args.number_iterations):
                    sync_infer_request(exe_network, times, input_images)

            else:
                logger.info("Start inference synchronously ({} s duration)".format(duration))
                start_time = datetime.now()
                current_time = start_time
                while (current_time - start_time).total_seconds() < duration:
                    current_time = sync_infer_request(exe_network, times, input_images)

            times.sort()
            latency = median(times)
            fps = batch_size / latency

            print("[BENCHMARK RESULT] Latency is {:.4f} msec".format(latency * 1e3))
            print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps))
        else:
            infer_requests = exe_network.requests

            if args.number_iterations is not None:
                logger.info("Start inference asynchronously ({}"
                            " async inference executions, {} "
                            " inference requests in parallel".format(args.number_iterations,
                                                                       args.number_infer_requests))
            else:
                logger.info("Start inference asynchronously ({} s duration, "
                            "{} inference requests in parallel)".format(duration, args.number_infer_requests))

            current_inference = 0
            required_inference_requests_were_executed = False
            previous_inference = 1 - args.number_infer_requests
            step = 0
            steps_count = args.number_infer_requests - 1
            if args.number_iterations is not None:
                steps_count += args.number_iterations

            # warming up - out of scope
            infer_requests[0].async_infer(input_images)
            infer_requests[0].wait()

            start_time = datetime.now()
            while not required_inference_requests_were_executed or step < steps_count or \
                    args.number_iterations is None and (datetime.now() - start_time).total_seconds() < duration:
                exe_network.start_async(current_inference, input_images)

                if previous_inference >= 0:
                    status = infer_requests[previous_inference].wait()
                    if status is not 0:
                        raise Exception("Infer request not completed successfully")

                current_inference += 1
                if current_inference >= args.number_infer_requests:
                    current_inference = 0
                    required_inference_requests_were_executed = True

                previous_inference += 1
                if previous_inference >= args.number_infer_requests:
                    previous_inference = 0

                step += 1

            # wait the latest inference executions
            for not_completed_index in range(args.number_infer_requests):
                if infer_requests[not_completed_index].wait(0) != 0:
                    infer_requests[not_completed_index].wait()

            total_duration = (datetime.now() - start_time).total_seconds()
            fps = batch_size * step / total_duration

            print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps))

        del exe_network
        del plugin

    except Exception as e:
        logging.exception(e)