diff --git a/samples/cpp/benchmark_app/README.md b/samples/cpp/benchmark_app/README.md index c18cb061042..0b354e6946b 100644 --- a/samples/cpp/benchmark_app/README.md +++ b/samples/cpp/benchmark_app/README.md @@ -113,51 +113,51 @@ Running the application with the `-h` or `--help` option yields the following us benchmark_app [OPTION] Options: - -h, --help Print a usage message - -m "" Required. Path to an .xml/.onnx file with a trained model or to a .blob files with a trained compiled model. - -i "" Optional. Path to a folder with images and/or binaries or to specific image or binary file. + -h, --help Print the usage message + -m Required. Path to an .xml/.onnx file with a trained model or to a .blob files with a trained compiled model. + -i Optional. Path to a folder with images and/or binaries or to specific image or binary file. In case of dynamic shapes models with several inputs provide the same number of files for each input (except cases with single file for any input):"input1:1.jpg input2:1.bin", "input1:1.bin,2.bin input2:3.bin input3:4.bin,5.bin ". Also you can pass specific keys for inputs: "random" - for fillling input with random data, "image_info" - for filling input with image size. You should specify either one files set to be used for all inputs (without providing input names) or separate files sets for every input of model (providing inputs names). - -d "" Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU. Use "-d HETERO:" format to specify HETERO plugin. Use "-d MULTI:" format to specify MULTI plugin. The application looks for a suitable plugin for the specified device. - -extensions "" Required for custom layers (extensions). Absolute path to a shared library with the kernels implementations. - -c "" Required for GPU custom kernels. Absolute path to an .xml file with the kernels description. - -hint "performance hint (latency or throughput or cumulative_throughput or none)" Optional. Performance hint allows the OpenVINO device to select the right model-specific settings. + -d Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU. Use "-d HETERO:" format to specify HETERO plugin. Use "-d MULTI:" format to specify MULTI plugin. The application looks for a suitable plugin for the specified device. + -extensions Required for custom layers (extensions). Absolute path to a shared library with the kernels implementations. + -c Required for GPU custom kernels. Absolute path to an .xml file with the kernels description. + -hint (latency or throughput or cumulative_throughput or none) Optional. Performance hint allows the OpenVINO device to select the right model-specific settings. 'throughput' or 'tput': device performance mode will be set to THROUGHPUT. 'cumulative_throughput' or 'ctput': device performance mode will be set to CUMULATIVE_THROUGHPUT. 'latency': device performance mode will be set to LATENCY. 'none': no device performance mode will be set. Using explicit 'nstreams' or other device-specific options, please set hint to 'none' - -api "" Optional (deprecated). Enable Sync/Async API. Default value is "async". - -niter "" Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device. - -nireq "" Optional. Number of infer requests. Default value is determined automatically for device. - -b "" Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation. - -t Optional. Time in seconds to execute topology. - -shape Optional. Set shape for model input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size. This parameter affect model input shape and can be dynamic. For dynamic dimensions use symbol `?` or '-1'. Ex. [?,3,?,?]. For bounded dimensions specify range 'min..max'. Ex. [1..10,3,?,?]. - -data_shape Required for models with dynamic shapes. Set shape for input blobs. In case of one input size: "[1,3,224,224]" or "input1[1,3,224,224],input2[1,4]". In case of several input sizes provide the same number for each input (except cases with single shape for any input): "[1,3,128,128][3,3,128,128][1,3,320,320]", "input1[1,1,128,128][1,1,256,256],input2[80,1]" or "input1[1,192][1,384],input2[1,192][1,384],input3[1,192][1,384],input4[1,192][1,384]". If model shapes are all static specifying the option will cause an exception. - -layout Optional. Prompts how model layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size. - -cache_dir "" Optional. Enables caching of loaded models to specified directory. List of devices which support caching is shown at the end of this message. - -load_from_file Optional. Loads model from file directly without read_model. All CNNNetwork options (like re-shape) will be ignored - -latency_percentile Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median). + -api Optional (deprecated). Enable Sync/Async API. Default value is "async". + -niter Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device. + -nireq Optional. Number of infer requests. Default value is determined automatically for device. + -b Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation. + -t Optional. Time in seconds to execute topology. + -shape Optional. Set shape for model input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size. This parameter affect model input shape and can be dynamic. For dynamic dimensions use symbol `?` or '-1'. Ex. [?,3,?,?]. For bounded dimensions specify range 'min..max'. Ex. [1..10,3,?,?]. + -data_shape Required for models with dynamic shapes. Set shape for input blobs. In case of one input size: "[1,3,224,224]" or "input1[1,3,224,224],input2[1,4]". In case of several input sizes provide the same number for each input (except cases with single shape for any input): "[1,3,128,128][3,3,128,128][1,3,320,320]", "input1[1,1,128,128][1,1,256,256],input2[80,1]" or "input1[1,192][1,384],input2[1,192][1,384],input3[1,192][1,384],input4[1,192][1,384]". If model shapes are all static specifying the option will cause an exception. + -layout Optional. Prompts how model layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size. + -cache_dir Optional. Enables caching of loaded models to specified directory. List of devices which support caching is shown at the end of this message. + -load_from_file Optional. Loads model from file directly without read_model. All CNNNetwork options (like re-shape) will be ignored + -latency_percentile Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median). device-specific performance options: - -nstreams "" Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices (for HETERO and MULTI device cases use format :,: or just ). Default value is determined automatically for a device.Please note that although the automatic selection usually provides a reasonable performance, it still may be non - optimal for some cases, especially for very small models. See sample's README for more details. Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency estimations the number of streams should be set to 1. - -nthreads "" Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases). - -pin ("YES"|"CORE")/"HYBRID_AWARE"/("NO"|"NONE")/"NUMA" Optional. Explicit inference threads binding options (leave empty to let the OpenVINO to make a choice): + -nstreams Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices (for HETERO and MULTI device cases use format :,: or just ). Default value is determined automatically for a device.Please note that although the automatic selection usually provides a reasonable performance, it still may be non - optimal for some cases, especially for very small models. See sample's README for more details. Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency estimations the number of streams should be set to 1. + -nthreads Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases). + -pin ("YES"|"CORE") / "HYBRID_AWARE" / ("NO"|"NONE") / "NUMA" Optional. Explicit inference threads binding options (leave empty to let the OpenVINO make a choice): enabling threads->cores pinning("YES", which is already default for any conventional CPU), letting the runtime to decide on the threads->different core types("HYBRID_AWARE", which is default on the hybrid CPUs) threads->(NUMA)nodes("NUMA") or completely disable("NO") CPU inference threads pinning Statistics dumping options: - -report_type "" Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency. "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the model. "detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters and latency for each executed infer request. - -report_folder Optional. Path to a folder where statistics report is stored. - -json_stats Optional. Enables JSON-based statistics output (by default reporting system will use CSV format). Should be used together with -report_folder option. - -exec_graph_path Optional. Path to a file where to store executable graph information serialized. - -pc Optional. Report performance counters. - -pcsort Optional. Report performance counters and analysis the sort hotpoint opts. "sort" Analysis opts time cost, print by hotpoint order "no_sort" Analysis opts time cost, print by normal order "simple_sort" Analysis opts time cost, only print EXECUTED opts by normal order - -pcseq Optional. Report latencies for each shape in -data_shape sequence. - -dump_config Optional. Path to JSON file to dump IE parameters, which were set by application. - -load_config Optional. Path to JSON file to load custom IE parameters. Please note, command line parameters have higher priority then parameters from configuration file. + -report_type Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency. "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the model. "detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters and latency for each executed infer request. + -report_folder Optional. Path to a folder where statistics report is stored. + -json_stats Optional. Enables JSON-based statistics output (by default reporting system will use CSV format). Should be used together with -report_folder option. + -exec_graph_path Optional. Path to a file where to store executable graph information serialized. + -pc Optional. Report performance counters. + -pcsort Optional. Report performance counters and analysis the sort hotpoint opts. "sort" Analysis opts time cost, print by hotpoint order "no_sort" Analysis opts time cost, print by normal order "simple_sort" Analysis opts time cost, only print EXECUTED opts by normal order + -pcseq Optional. Report latencies for each shape in -data_shape sequence. + -dump_config Optional. Path to JSON file to dump IE parameters, which were set by application. + -load_config Optional. Path to JSON file to load custom IE parameters. Please note, command line parameters have higher priority then parameters from configuration file. Example 1: a simple JSON file for HW device with primary properties. { "CPU": {"NUM_STREAMS": "3", "PERF_COUNT": "NO"} @@ -180,16 +180,16 @@ Options: } } - -infer_precision ""Optional. Inference precision - -ip Optional. Specifies precision for all input layers of the model. - -op Optional. Specifies precision for all output layers of the model. - -iop "" Optional. Specifies precision for input and output layers by name. + -infer_precision Optional. Specifies the inference precision. Example #1: '-infer_precision bf16'. Example #2: '-infer_precision CPU:bf16,GPU:f32' + -ip Optional. Specifies precision for all input layers of the model. + -op Optional. Specifies precision for all output layers of the model. + -iop Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required. Overwrites precision from ip and op options for specified layers. - -mean_values [R,G,B] Optional. Mean values to be used for the input image per channel. Values to be provided in the [R,G,B] format. Can be defined for desired input of the model, for example: "--mean_values data[255,255,255],info[255,255,255]". The exact meaning and order of channels depend on how the original model was trained. Applying the values affects performance and may cause type conversion - -scale_values [R,G,B] Optional. Scale values to be used for the input image per channel. Values are provided in the [R,G,B] format. Can be defined for desired input of the model, for example: "--scale_values data[255,255,255],info[255,255,255]". The exact meaning and order of channels depend on how the original model was trained. If both --mean_values and --scale_values are specified, the mean is subtracted first and then scale is applied regardless of the order of options in command line. Applying the values affects performance and may cause type conversion - -inference_only Optional. Measure only inference stage. Default option for static models. Dynamic models are measured in full mode which includes inputs setup stage, inference only mode available for them with single input data shape only. To enable full mode for static models pass "false" value to this argument: ex. "-inference_only=false". + -mean_values [R,G,B] Optional. Mean values to be used for the input image per channel. Values to be provided in the [R,G,B] format. Can be defined for desired input of the model, for example: "--mean_values data[255,255,255],info[255,255,255]". The exact meaning and order of channels depend on how the original model was trained. Applying the values affects performance and may cause type conversion + -scale_values [R,G,B] Optional. Scale values to be used for the input image per channel. Values are provided in the [R,G,B] format. Can be defined for desired input of the model, for example: "--scale_values data[255,255,255],info[255,255,255]". The exact meaning and order of channels depend on how the original model was trained. If both --mean_values and --scale_values are specified, the mean is subtracted first and then scale is applied regardless of the order of options in command line. Applying the values affects performance and may cause type conversion + -inference_only Optional. Measure only inference stage. Default option for static models. Dynamic models are measured in full mode which includes inputs setup stage, inference only mode available for them with single input data shape only. To enable full mode for static models pass "false" value to this argument: ex. "-inference_only=false". ``` Running the application with the empty list of options yields the usage message given above and an error message. diff --git a/samples/cpp/benchmark_app/benchmark_app.hpp b/samples/cpp/benchmark_app/benchmark_app.hpp index 1a18097509f..bd73b88122d 100644 --- a/samples/cpp/benchmark_app/benchmark_app.hpp +++ b/samples/cpp/benchmark_app/benchmark_app.hpp @@ -24,7 +24,7 @@ #endif /// @brief message for help argument -static const char help_message[] = "Print a usage message"; +static const char help_message[] = "Print the usage message"; /// @brief message for images argument static const char input_message[] = @@ -121,7 +121,7 @@ static const char batch_size_message[] = // @brief message for CPU threads pinning option static const char infer_threads_pinning_message[] = - "Optional. Explicit inference threads binding options (leave empty to let the OpenVINO to make a choice):\n" + "Optional. Explicit inference threads binding options (leave empty to let the OpenVINO make a choice):\n" "\t\t\t\tenabling threads->cores pinning(\"YES\", which is already default for any conventional CPU), \n" "\t\t\t\tletting the runtime to decide on the threads->different core types(\"HYBRID_AWARE\", which is default on " "the hybrid CPUs) \n" @@ -226,7 +226,9 @@ static const char load_from_file_message[] = "Optional. Loads model from file di " All CNNNetwork options (like re-shape) will be ignored"; // @brief message for inference_precision -static const char inference_precision_message[] = "Optional. Inference precision"; +static const char inference_precision_message[] = + "Optional. Specifies the inference precision. Example #1: '-infer_precision bf16'. Example #2: '-infer_precision " + "CPU:bf16,GPU:f32'"; static constexpr char inputs_precision_message[] = "Optional. Specifies precision for all input layers of the model."; @@ -402,48 +404,48 @@ static void show_usage() { std::cout << "benchmark_app [OPTION]" << std::endl; std::cout << "Options:" << std::endl; std::cout << std::endl; - std::cout << " -h, --help " << help_message << std::endl; - std::cout << " -m \"\" " << model_message << std::endl; - std::cout << " -i \"\" " << input_message << std::endl; - std::cout << " -d \"\" " << target_device_message << std::endl; - std::cout << " -extensions \"\" " << custom_extensions_library_message << std::endl; - std::cout << " -c \"\" " << custom_cldnn_message << std::endl; - std::cout << " -hint \"performance hint (latency or throughput or cumulative_throughput or none)\" " + std::cout << " -h, --help " << help_message << std::endl; + std::cout << " -m " << model_message << std::endl; + std::cout << " -i " << input_message << std::endl; + std::cout << " -d " << target_device_message << std::endl; + std::cout << " -extensions " << custom_extensions_library_message << std::endl; + std::cout << " -c " << custom_cldnn_message << std::endl; + std::cout << " -hint (latency or throughput or cumulative_throughput or none) " << hint_message << std::endl; - std::cout << " -api \"\" " << api_message << std::endl; - std::cout << " -niter \"\" " << iterations_count_message << std::endl; - std::cout << " -nireq \"\" " << infer_requests_count_message << std::endl; - std::cout << " -b \"\" " << batch_size_message << std::endl; - std::cout << " -t " << execution_time_message << std::endl; - std::cout << " -shape " << shape_message << std::endl; - std::cout << " -data_shape " << data_shape_message << std::endl; - std::cout << " -layout " << layout_message << std::endl; - std::cout << " -cache_dir \"\" " << cache_dir_message << std::endl; - std::cout << " -load_from_file " << load_from_file_message << std::endl; - std::cout << " -latency_percentile " << infer_latency_percentile_message << std::endl; + std::cout << " -api " << api_message << std::endl; + std::cout << " -niter " << iterations_count_message << std::endl; + std::cout << " -nireq " << infer_requests_count_message << std::endl; + std::cout << " -b " << batch_size_message << std::endl; + std::cout << " -t " << execution_time_message << std::endl; + std::cout << " -shape " << shape_message << std::endl; + std::cout << " -data_shape " << data_shape_message << std::endl; + std::cout << " -layout " << layout_message << std::endl; + std::cout << " -cache_dir " << cache_dir_message << std::endl; + std::cout << " -load_from_file " << load_from_file_message << std::endl; + std::cout << " -latency_percentile " << infer_latency_percentile_message << std::endl; std::cout << std::endl << " device-specific performance options:" << std::endl; - std::cout << " -nstreams \"\" " << infer_num_streams_message << std::endl; - std::cout << " -nthreads \"\" " << infer_num_threads_message << std::endl; - std::cout << " -pin (\"YES\"|\"CORE\")/\"HYBRID_AWARE\"/(\"NO\"|\"NONE\")/\"NUMA\" " + std::cout << " -nstreams " << infer_num_streams_message << std::endl; + std::cout << " -nthreads " << infer_num_threads_message << std::endl; + std::cout << " -pin (\"YES\"|\"CORE\") / \"HYBRID_AWARE\" / (\"NO\"|\"NONE\") / \"NUMA\" " << infer_threads_pinning_message << std::endl; #ifdef HAVE_DEVICE_MEM_SUPPORT std::cout << " -use_device_mem " << use_device_mem_message << std::endl; #endif std::cout << std::endl << " Statistics dumping options:" << std::endl; - std::cout << " -report_type \"\" " << report_type_message << std::endl; - std::cout << " -report_folder " << report_folder_message << std::endl; - std::cout << " -json_stats " << json_stats_message << std::endl; - std::cout << " -exec_graph_path " << exec_graph_path_message << std::endl; - std::cout << " -pc " << pc_message << std::endl; - std::cout << " -pcsort " << pc_sort_message << std::endl; - std::cout << " -pcseq " << pcseq_message << std::endl; - std::cout << " -dump_config " << dump_config_message << std::endl; - std::cout << " -load_config " << load_config_message << std::endl; - std::cout << " -infer_precision \"\"" << inference_precision_message << std::endl; - std::cout << " -ip " << inputs_precision_message << std::endl; - std::cout << " -op " << outputs_precision_message << std::endl; - std::cout << " -iop \"\" " << iop_message << std::endl; - std::cout << " -mean_values [R,G,B] " << mean_values_message << std::endl; - std::cout << " -scale_values [R,G,B] " << scale_values_message << std::endl; - std::cout << " -inference_only " << inference_only_message << std::endl; + std::cout << " -report_type " << report_type_message << std::endl; + std::cout << " -report_folder " << report_folder_message << std::endl; + std::cout << " -json_stats " << json_stats_message << std::endl; + std::cout << " -exec_graph_path " << exec_graph_path_message << std::endl; + std::cout << " -pc " << pc_message << std::endl; + std::cout << " -pcsort " << pc_sort_message << std::endl; + std::cout << " -pcseq " << pcseq_message << std::endl; + std::cout << " -dump_config " << dump_config_message << std::endl; + std::cout << " -load_config " << load_config_message << std::endl; + std::cout << " -infer_precision " << inference_precision_message << std::endl; + std::cout << " -ip " << inputs_precision_message << std::endl; + std::cout << " -op " << outputs_precision_message << std::endl; + std::cout << " -iop " << iop_message << std::endl; + std::cout << " -mean_values [R,G,B] " << mean_values_message << std::endl; + std::cout << " -scale_values [R,G,B] " << scale_values_message << std::endl; + std::cout << " -inference_only " << inference_only_message << std::endl; } diff --git a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py index b83d88bda12..31038ecd963 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py @@ -161,7 +161,7 @@ def parse_args(): " }\n" " }\n") args.add_argument('-infer_precision', type=str, required=False, - help='Optional. Hint to specifies inference precision. Example: -infer_precision CPU:bf16,GPU:f32') + help='Optional. Specifies the inference precision. Example #1: \'-infer_precision bf16\'. Example #2: \'-infer_precision CPU:bf16,GPU:f32\'') args.add_argument('-ip', '--input_precision', type=str, required=False, choices=['u8', 'U8', 'f16','FP16', 'f32','FP32'], help='Optional. Specifies precision for all input layers of the model.') args.add_argument('-op', '--output_precision', type=str, required=False, choices=['u8', 'U8', 'f16','FP16', 'f32','FP32'],