Merge branch 'itikhono/ts/fix_performance_issues' of https://github.com/itikhono/openvino into itikhono/ts/fix_performance_issues
This commit is contained in:
commit
9333c1cac5
@ -172,7 +172,7 @@ jobs:
|
||||
cmakeArgs: >
|
||||
-GNinja
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON
|
||||
-DCMAKE_COMPILE_WARNING_AS_ERROR=OFF
|
||||
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON
|
||||
-DOpenCV_DIR=$(INSTALL_OPENCV)/cmake
|
||||
-DENABLE_PYTHON=OFF
|
||||
-DENABLE_TESTS=ON
|
||||
|
@ -263,9 +263,6 @@ jobs:
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ov_conditional_compilation_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-ConditionalCompilation.xml
|
||||
displayName: 'Conditional Compilation Tests'
|
||||
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\paddle_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-PaddleTests.xml
|
||||
displayName: 'Paddle Tests'
|
||||
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ov_ir_frontend_tests --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-IRFrontend.xml
|
||||
displayName: 'IR Frontend Tests'
|
||||
|
||||
|
@ -155,6 +155,7 @@ jobs:
|
||||
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE) ^
|
||||
-DENABLE_PROFILING_ITT=OFF ^
|
||||
-DSELECTIVE_BUILD=ON ^
|
||||
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON ^
|
||||
-DSELECTIVE_BUILD_STAT=$(BUILD_DIR)\*.csv ^
|
||||
-DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" ^
|
||||
-DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" ^
|
||||
|
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:774f53500c6ed360001ca0478c96452c1037fa2c42eb39459d13c836ebcaeee1
|
||||
size 22041
|
||||
oid sha256:68d5003431670cea03abc68eba89ffc9c566e08782ae6f5a80dd4a2a20766847
|
||||
size 21883
|
||||
|
@ -11,7 +11,7 @@ idna==3.4
|
||||
imagesize==1.2.0
|
||||
importlib-metadata==4.4.0
|
||||
iniconfig==1.1.1
|
||||
ipython==8.5.0
|
||||
ipython==8.10.0
|
||||
Jinja2==3.1.2
|
||||
lxml>=4.9.2
|
||||
MarkupSafe==2.1.1
|
||||
|
@ -96,7 +96,7 @@ int readBmpImage(const char* fileName, BitMap* image) {
|
||||
int i;
|
||||
int image_height = image->height;
|
||||
for (i = 0; i < image_height; i++) {
|
||||
unsigned int storeAt = image->infoHeader.height < 0 ? i : (unsigned int)image_height - 1 - i;
|
||||
int storeAt = image->infoHeader.height < 0 ? i : image_height - 1 - i;
|
||||
cnt = fread(image->data + row_size * storeAt, row_size, sizeof(unsigned char), input);
|
||||
if (cnt != sizeof(unsigned char)) {
|
||||
printf("[BMP] file read error\n");
|
||||
|
@ -150,7 +150,7 @@ size_t read_image_from_file(const char* img_path, unsigned char* img_data, size_
|
||||
|
||||
if (fp) {
|
||||
fseek(fp, 0, SEEK_END);
|
||||
if (ftell(fp) >= size) {
|
||||
if ((size_t)ftell(fp) >= size) {
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
read_size = fread(img_data, 1, size, fp);
|
||||
}
|
||||
|
@ -109,78 +109,45 @@ Running the application with the `-h` or `--help` option yields the following us
|
||||
```
|
||||
[Step 1/11] Parsing and validating input arguments
|
||||
[ INFO ] Parsing input parameters
|
||||
usage: benchmark_app [OPTION]
|
||||
|
||||
benchmark_app [OPTION]
|
||||
Options:
|
||||
|
||||
-h, --help Print the usage message
|
||||
-m <path> Required. Path to an .xml/.onnx file with a trained model or to a .blob files with a trained compiled model.
|
||||
-i <path> Optional. Path to a folder with images and/or binaries or to specific image or binary file.
|
||||
In case of dynamic shapes models with several inputs provide the same number of files for each input (except cases with single file for any input):"input1:1.jpg input2:1.bin", "input1:1.bin,2.bin input2:3.bin input3:4.bin,5.bin ". Also you can pass specific keys for inputs: "random" - for fillling input with random data, "image_info" - for filling input with image size.
|
||||
You should specify either one files set to be used for all inputs (without providing input names) or separate files sets for every input of model (providing inputs names).
|
||||
Currently supported data types: bmp, bin, npy.
|
||||
If OPENCV is enabled, this functionality is extended with the following data types:
|
||||
dib, jpeg, jpg, jpe, jp2, png, pbm, pgm, ppm, sr, ras, tiff, tif.
|
||||
-d <device> Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU. Use "-d HETERO:<comma-separated_devices_list>" format to specify HETERO plugin. Use "-d MULTI:<comma-separated_devices_list>" format to specify MULTI plugin. The application looks for a suitable plugin for the specified device.
|
||||
-extensions <absolute_path> Required for custom layers (extensions). Absolute path to a shared library with the kernels implementations.
|
||||
-c <absolute_path> Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.
|
||||
-hint <performance hint> (latency or throughput or cumulative_throughput or none) Optional. Performance hint allows the OpenVINO device to select the right model-specific settings.
|
||||
'throughput' or 'tput': device performance mode will be set to THROUGHPUT.
|
||||
'cumulative_throughput' or 'ctput': device performance mode will be set to CUMULATIVE_THROUGHPUT.
|
||||
'latency': device performance mode will be set to LATENCY.
|
||||
'none': no device performance mode will be set.
|
||||
Using explicit 'nstreams' or other device-specific options, please set hint to 'none'
|
||||
-api <sync/async> Optional (deprecated). Enable Sync/Async API. Default value is "async".
|
||||
-niter <integer> Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device.
|
||||
-nireq <integer> Optional. Number of infer requests. Default value is determined automatically for device.
|
||||
-b <integer> Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation.
|
||||
-t Optional. Time in seconds to execute topology.
|
||||
|
||||
Input shapes
|
||||
-b <integer> Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation.
|
||||
-shape Optional. Set shape for model input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size. This parameter affect model input shape and can be dynamic. For dynamic dimensions use symbol `?` or '-1'. Ex. [?,3,?,?]. For bounded dimensions specify range 'min..max'. Ex. [1..10,3,?,?].
|
||||
-data_shape Required for models with dynamic shapes. Set shape for input blobs. In case of one input size: "[1,3,224,224]" or "input1[1,3,224,224],input2[1,4]". In case of several input sizes provide the same number for each input (except cases with single shape for any input): "[1,3,128,128][3,3,128,128][1,3,320,320]", "input1[1,1,128,128][1,1,256,256],input2[80,1]" or "input1[1,192][1,384],input2[1,192][1,384],input3[1,192][1,384],input4[1,192][1,384]". If model shapes are all static specifying the option will cause an exception.
|
||||
-layout Optional. Prompts how model layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size.
|
||||
|
||||
Advanced options
|
||||
-extensions <absolute_path> Required for custom layers (extensions). Absolute path to a shared library with the kernels implementations.
|
||||
-c <absolute_path> Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.
|
||||
-cache_dir <path> Optional. Enables caching of loaded models to specified directory. List of devices which support caching is shown at the end of this message.
|
||||
-load_from_file Optional. Loads model from file directly without read_model. All CNNNetwork options (like re-shape) will be ignored
|
||||
-latency_percentile Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).
|
||||
|
||||
device-specific performance options:
|
||||
-api <sync/async> Optional (deprecated). Enable Sync/Async API. Default value is "async".
|
||||
-nireq <integer> Optional. Number of infer requests. Default value is determined automatically for device.
|
||||
-nstreams <integer> Optional. Number of streams to use for inference on the CPU or GPU devices (for HETERO and MULTI device cases use format <dev1>:<nstreams1>,<dev2>:<nstreams2> or just <nstreams>). Default value is determined automatically for a device.Please note that although the automatic selection usually provides a reasonable performance, it still may be non - optimal for some cases, especially for very small models. See sample's README for more details. Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency estimations the number of streams should be set to 1.
|
||||
-nthreads <integer> Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases).
|
||||
-pin <string> ("YES"|"CORE") / "HYBRID_AWARE" / ("NO"|"NONE") / "NUMA" Optional. Explicit inference threads binding options (leave empty to let the OpenVINO make a choice):
|
||||
enabling threads->cores pinning("YES", which is already default for any conventional CPU),
|
||||
letting the runtime to decide on the threads->different core types("HYBRID_AWARE", which is default on the hybrid CPUs)
|
||||
threads->(NUMA)nodes("NUMA") or
|
||||
completely disable("NO") CPU inference threads pinning
|
||||
|
||||
Statistics dumping options:
|
||||
-report_type <type> Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency. "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the model. "detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters and latency for each executed infer request.
|
||||
-report_folder Optional. Path to a folder where statistics report is stored.
|
||||
-json_stats Optional. Enables JSON-based statistics output (by default reporting system will use CSV format). Should be used together with -report_folder option.
|
||||
-exec_graph_path Optional. Path to a file where to store executable graph information serialized.
|
||||
-pc Optional. Report performance counters.
|
||||
-pcsort Optional. Report performance counters and analysis the sort hotpoint opts. "sort" Analysis opts time cost, print by hotpoint order "no_sort" Analysis opts time cost, print by normal order "simple_sort" Analysis opts time cost, only print EXECUTED opts by normal order
|
||||
-pcseq Optional. Report latencies for each shape in -data_shape sequence.
|
||||
-dump_config Optional. Path to JSON file to dump IE parameters, which were set by application.
|
||||
-load_config Optional. Path to JSON file to load custom IE parameters. Please note, command line parameters have higher priority then parameters from configuration file.
|
||||
Example 1: a simple JSON file for HW device with primary properties.
|
||||
{
|
||||
"CPU": {"NUM_STREAMS": "3", "PERF_COUNT": "NO"}
|
||||
}
|
||||
Example 2: a simple JSON file for meta device(AUTO/MULTI) with HW device properties.
|
||||
{
|
||||
"AUTO": {
|
||||
"PERFORMANCE_HINT": "",
|
||||
"PERF_COUNT": "NO",
|
||||
"DEVICE_PROPERTIES": {
|
||||
"CPU": {
|
||||
"INFERENCE_PRECISION_HINT": "f32",
|
||||
"NUM_STREAMS": "3"
|
||||
},
|
||||
"GPU": {
|
||||
"INFERENCE_PRECISION_HINT": "f32",
|
||||
"NUM_STREAMS": "5"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
-inference_only Optional. Measure only inference stage. Default option for static models. Dynamic models are measured in full mode which includes inputs setup stage, inference only mode available for them with single input data shape only. To enable full mode for static models pass "false" value to this argument: ex. "-inference_only=false".
|
||||
-infer_precision Optional. Specifies the inference precision. Example #1: '-infer_precision bf16'. Example #2: '-infer_precision CPU:bf16,GPU:f32'
|
||||
|
||||
Preprocessing options:
|
||||
-ip <value> Optional. Specifies precision for all input layers of the model.
|
||||
-op <value> Optional. Specifies precision for all output layers of the model.
|
||||
-iop <value> Optional. Specifies precision for input and output layers by name.
|
||||
@ -189,13 +156,53 @@ Options:
|
||||
Overwrites precision from ip and op options for specified layers.
|
||||
-mean_values [R,G,B] Optional. Mean values to be used for the input image per channel. Values to be provided in the [R,G,B] format. Can be defined for desired input of the model, for example: "--mean_values data[255,255,255],info[255,255,255]". The exact meaning and order of channels depend on how the original model was trained. Applying the values affects performance and may cause type conversion
|
||||
-scale_values [R,G,B] Optional. Scale values to be used for the input image per channel. Values are provided in the [R,G,B] format. Can be defined for desired input of the model, for example: "--scale_values data[255,255,255],info[255,255,255]". The exact meaning and order of channels depend on how the original model was trained. If both --mean_values and --scale_values are specified, the mean is subtracted first and then scale is applied regardless of the order of options in command line. Applying the values affects performance and may cause type conversion
|
||||
-inference_only Optional. Measure only inference stage. Default option for static models. Dynamic models are measured in full mode which includes inputs setup stage, inference only mode available for them with single input data shape only. To enable full mode for static models pass "false" value to this argument: ex. "-inference_only=false".
|
||||
|
||||
Device-specific performance options:
|
||||
-nthreads <integer> Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases).
|
||||
-pin <string> ("YES"|"CORE") / "HYBRID_AWARE" / ("NO"|"NONE") / "NUMA" Optional. Explicit inference threads binding options (leave empty to let the OpenVINO make a choice):
|
||||
enabling threads->cores pinning("YES", which is already default for any conventional CPU),
|
||||
letting the runtime to decide on the threads->different core types("HYBRID_AWARE", which is default on the hybrid CPUs)
|
||||
threads->(NUMA)nodes("NUMA") or
|
||||
completely disable("NO") CPU inference threads pinning
|
||||
|
||||
Statistics dumping options:
|
||||
-latency_percentile Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).
|
||||
-report_type <type> Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency. "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the model. "detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters and latency for each executed infer request.
|
||||
-report_folder Optional. Path to a folder where statistics report is stored.
|
||||
-json_stats Optional. Enables JSON-based statistics output (by default reporting system will use CSV format). Should be used together with -report_folder option.
|
||||
-pc Optional. Report performance counters.
|
||||
-pcsort Optional. Report performance counters and analysis the sort hotpoint opts. "sort" Analysis opts time cost, print by hotpoint order "no_sort" Analysis opts time cost, print by normal order "simple_sort" Analysis opts time cost, only print EXECUTED opts by normal order
|
||||
-pcseq Optional. Report latencies for each shape in -data_shape sequence.
|
||||
-exec_graph_path Optional. Path to a file where to store executable graph information serialized.
|
||||
-dump_config Optional. Path to JSON file to dump IE parameters, which were set by application.
|
||||
-load_config Optional. Path to JSON file to load custom IE parameters. Please note, command line parameters have higher priority then parameters from configuration file.
|
||||
Example 1: a simple JSON file for HW device with primary properties.
|
||||
{
|
||||
"CPU": {"NUM_STREAMS": "3", "PERF_COUNT": "NO"}
|
||||
}
|
||||
Example 2: a simple JSON file for meta device(AUTO/MULTI) with HW device properties.
|
||||
{
|
||||
"AUTO": {
|
||||
"PERFORMANCE_HINT": "",
|
||||
"PERF_COUNT": "NO",
|
||||
"DEVICE_PROPERTIES": {
|
||||
"CPU": {
|
||||
"INFERENCE_PRECISION_HINT": "f32",
|
||||
"NUM_STREAMS": "3"
|
||||
},
|
||||
"GPU": {
|
||||
"INFERENCE_PRECISION_HINT": "f32",
|
||||
"NUM_STREAMS": "5"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Running the application with the empty list of options yields the usage message given above and an error message.
|
||||
|
||||
### More information on inputs
|
||||
The benchmark tool supports topologies with one or more inputs. If a topology is not data sensitive, you can skip the input parameter, and the inputs will be filled with random values. If a model has only image input(s), provide a folder with images or a path to an image as input. If a model has some specific input(s) (besides images), please prepare a binary file(s) that is filled with data of appropriate precision and provide a path to it as input. If a model has mixed input types, the input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one.
|
||||
The benchmark tool supports topologies with one or more inputs. If a topology is not data sensitive, you can skip the input parameter, and the inputs will be filled with random values. If a model has only image input(s), provide a folder with images or a path to an image as input. If a model has some specific input(s) (besides images), please prepare a binary file(s) or numpy array(s) that is filled with data of appropriate precision and provide a path to it as input. If a model has mixed input types, the input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one.
|
||||
|
||||
## <a name="examples-of-running-the-tool-cpp"></a> Examples of Running the Tool
|
||||
This section provides step-by-step instructions on how to run the Benchmark Tool with the `asl-recognition` model from the [Open Model Zoo](@ref model_zoo) on CPU or GPU devices. It uses random data as the input.
|
||||
|
@ -27,7 +27,10 @@ static const char input_message[] =
|
||||
" \"image_info\" - for filling input with image size.\n"
|
||||
" You should specify either one files set to be used for all inputs (without "
|
||||
"providing "
|
||||
"input names) or separate files sets for every input of model (providing inputs names).";
|
||||
"input names) or separate files sets for every input of model (providing inputs names).\n"
|
||||
"Currently supported data types: bmp, bin, npy.\n"
|
||||
"If OPENCV is enabled, this functionality is extended with the following data types:\n"
|
||||
"dib, jpeg, jpg, jpe, jp2, png, pbm, pgm, ppm, sr, ras, tiff, tif.";
|
||||
|
||||
/// @brief message for model argument
|
||||
static const char model_message[] =
|
||||
@ -45,9 +48,6 @@ static const char hint_message[] =
|
||||
" Using explicit 'nstreams' or other device-specific options, please set hint to "
|
||||
"'none'";
|
||||
|
||||
/// @brief message for execution mode
|
||||
static const char api_message[] = "Optional (deprecated). Enable Sync/Async API. Default value is \"async\".";
|
||||
|
||||
/// @brief message for assigning cnn calculation to device
|
||||
static const char target_device_message[] =
|
||||
"Optional. Specify a target device to infer on (the list of available devices is shown below). "
|
||||
@ -61,134 +61,21 @@ static const char iterations_count_message[] =
|
||||
"Optional. Number of iterations. "
|
||||
"If not specified, the number of iterations is calculated depending on a device.";
|
||||
|
||||
/// @brief message for requests count
|
||||
static const char infer_requests_count_message[] =
|
||||
"Optional. Number of infer requests. Default value is determined automatically for device.";
|
||||
// @brief message for enabling caching
|
||||
static const char cache_dir_message[] = "Optional. Enables caching of loaded models to specified directory. "
|
||||
"List of devices which support caching is shown at the end of this message.";
|
||||
|
||||
// @brief message for single load network
|
||||
static const char load_from_file_message[] = "Optional. Loads model from file directly without read_model."
|
||||
" All CNNNetwork options (like re-shape) will be ignored";
|
||||
|
||||
/// @brief message for execution time
|
||||
static const char execution_time_message[] = "Optional. Time in seconds to execute topology.";
|
||||
|
||||
/// @brief message for #threads for CPU inference
|
||||
static const char infer_num_threads_message[] = "Optional. Number of threads to use for inference on the CPU "
|
||||
"(including HETERO and MULTI cases).";
|
||||
|
||||
/// @brief message for #streams for CPU inference
|
||||
static const char infer_num_streams_message[] =
|
||||
"Optional. Number of streams to use for inference on the CPU or GPU devices "
|
||||
"(for HETERO and MULTI device cases use format <dev1>:<nstreams1>,<dev2>:<nstreams2> or just "
|
||||
"<nstreams>). "
|
||||
"Default value is determined automatically for a device.Please note that although the "
|
||||
"automatic selection "
|
||||
"usually provides a reasonable performance, it still may be non - optimal for some cases, "
|
||||
"especially for "
|
||||
"very small models. See sample's README for more details. "
|
||||
"Also, using nstreams>1 is inherently throughput-oriented option, "
|
||||
"while for the best-latency estimations the number of streams should be set to 1.";
|
||||
|
||||
/// @brief message for latency percentile settings
|
||||
static const char infer_latency_percentile_message[] =
|
||||
"Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value "
|
||||
"is 50 (median).";
|
||||
|
||||
/// @brief message for enforcing of BF16 execution where it is possible
|
||||
static const char enforce_bf16_message[] =
|
||||
"Optional. By default floating point operations execution in bfloat16 precision are enforced "
|
||||
"if supported by platform.\n"
|
||||
" 'true' - enable bfloat16 regardless of platform support\n"
|
||||
" 'false' - disable bfloat16 regardless of platform support";
|
||||
|
||||
/// @brief message for user library argument
|
||||
static const char custom_extensions_library_message[] =
|
||||
"Required for custom layers (extensions). Absolute path to a shared library with the kernels "
|
||||
"implementations.";
|
||||
|
||||
/// @brief message for clDNN custom kernels desc
|
||||
static const char custom_cldnn_message[] =
|
||||
"Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.";
|
||||
|
||||
static const char batch_size_message[] =
|
||||
"Optional. Batch size value. If not specified, the batch size value is determined from "
|
||||
"Intermediate Representation.";
|
||||
|
||||
// @brief message for CPU threads pinning option
|
||||
static const char infer_threads_pinning_message[] =
|
||||
"Optional. Explicit inference threads binding options (leave empty to let the OpenVINO make a choice):\n"
|
||||
"\t\t\t\tenabling threads->cores pinning(\"YES\", which is already default for any conventional CPU), \n"
|
||||
"\t\t\t\tletting the runtime to decide on the threads->different core types(\"HYBRID_AWARE\", which is default on "
|
||||
"the hybrid CPUs) \n"
|
||||
"\t\t\t\tthreads->(NUMA)nodes(\"NUMA\") or \n"
|
||||
"\t\t\t\tcompletely disable(\"NO\") CPU inference threads pinning";
|
||||
|
||||
// @brief message for report_type option
|
||||
static const char report_type_message[] =
|
||||
"Optional. Enable collecting statistics report. \"no_counters\" report contains "
|
||||
"configuration options specified, resulting FPS and latency. \"average_counters\" "
|
||||
"report extends \"no_counters\" report and additionally includes average PM "
|
||||
"counters values for each layer from the model. \"detailed_counters\" report "
|
||||
"extends \"average_counters\" report and additionally includes per-layer PM "
|
||||
"counters and latency for each executed infer request.";
|
||||
|
||||
// @brief message for report_folder option
|
||||
static const char report_folder_message[] = "Optional. Path to a folder where statistics report is stored.";
|
||||
|
||||
// @brief message for json_stats option
|
||||
static const char json_stats_message[] = "Optional. Enables JSON-based statistics output (by default reporting system "
|
||||
"will use CSV format). Should be used together with -report_folder option.";
|
||||
|
||||
// @brief message for exec_graph_path option
|
||||
static const char exec_graph_path_message[] =
|
||||
"Optional. Path to a file where to store executable graph information serialized.";
|
||||
|
||||
// @brief message for performance counters option
|
||||
static const char pc_message[] = "Optional. Report performance counters.";
|
||||
|
||||
// @brief message for sorted performance counters option
|
||||
static const char pc_sort_message[] =
|
||||
"Optional. Report performance counters and analysis the sort hotpoint opts. "
|
||||
" \"sort\" Analysis opts time cost, print by hotpoint order "
|
||||
" \"no_sort\" Analysis opts time cost, print by normal order "
|
||||
" \"simple_sort\" Analysis opts time cost, only print EXECUTED opts by normal order";
|
||||
|
||||
// @brief message for performance counters for sequence option
|
||||
static const char pcseq_message[] = "Optional. Report latencies for each shape in -data_shape sequence.";
|
||||
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
// @brief message for switching memory allocation type option
|
||||
static const char use_device_mem_message[] =
|
||||
"Optional. Switch between host and device memory allocation for input and output buffers.";
|
||||
#endif
|
||||
|
||||
// @brief message for load config option
|
||||
static const char load_config_message[] =
|
||||
"Optional. Path to JSON file to load custom IE parameters."
|
||||
" Please note, command line parameters have higher priority then parameters from configuration file.\n"
|
||||
" Example 1: a simple JSON file for HW device with primary properties.\n"
|
||||
" {\n"
|
||||
" \"CPU\": {\"NUM_STREAMS\": \"3\", \"PERF_COUNT\": \"NO\"}\n"
|
||||
" }\n"
|
||||
" Example 2: a simple JSON file for meta device(AUTO/MULTI) with HW device "
|
||||
"properties.\n"
|
||||
" {\n"
|
||||
" \"AUTO\": {\n"
|
||||
" \"PERFORMANCE_HINT\": \"\",\n"
|
||||
" \"PERF_COUNT\": \"NO\",\n"
|
||||
" \"DEVICE_PROPERTIES\": {\n"
|
||||
" \"CPU\": {\n"
|
||||
" \"INFERENCE_PRECISION_HINT\": \"f32\",\n"
|
||||
" \"NUM_STREAMS\": \"3\"\n"
|
||||
" },\n"
|
||||
" \"GPU\": {\n"
|
||||
" \"INFERENCE_PRECISION_HINT\": \"f32\",\n"
|
||||
" \"NUM_STREAMS\": \"5\"\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n";
|
||||
|
||||
// @brief message for dump config option
|
||||
static const char dump_config_message[] =
|
||||
"Optional. Path to JSON file to dump IE parameters, which were set by application.";
|
||||
|
||||
static const char shape_message[] =
|
||||
"Optional. Set shape for model input. For example, \"input1[1,3,224,224],input2[1,4]\" or \"[1,3,224,224]\""
|
||||
" in case of one input size. This parameter affect model input shape and can be dynamic."
|
||||
@ -208,13 +95,48 @@ static const char layout_message[] =
|
||||
"Optional. Prompts how model layouts should be treated by application. "
|
||||
"For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.";
|
||||
|
||||
// @brief message for enabling caching
|
||||
static const char cache_dir_message[] = "Optional. Enables caching of loaded models to specified directory. "
|
||||
"List of devices which support caching is shown at the end of this message.";
|
||||
/// @brief message for execution mode
|
||||
static const char api_message[] = "Optional (deprecated). Enable Sync/Async API. Default value is \"async\".";
|
||||
|
||||
// @brief message for single load network
|
||||
static const char load_from_file_message[] = "Optional. Loads model from file directly without read_model."
|
||||
" All CNNNetwork options (like re-shape) will be ignored";
|
||||
/// @brief message for #streams for CPU inference
|
||||
static const char infer_num_streams_message[] =
|
||||
"Optional. Number of streams to use for inference on the CPU or GPU devices "
|
||||
"(for HETERO and MULTI device cases use format <dev1>:<nstreams1>,<dev2>:<nstreams2> or just "
|
||||
"<nstreams>). "
|
||||
"Default value is determined automatically for a device.Please note that although the "
|
||||
"automatic selection "
|
||||
"usually provides a reasonable performance, it still may be non - optimal for some cases, "
|
||||
"especially for "
|
||||
"very small models. See sample's README for more details. "
|
||||
"Also, using nstreams>1 is inherently throughput-oriented option, "
|
||||
"while for the best-latency estimations the number of streams should be set to 1.";
|
||||
|
||||
/// @brief message for requests count
|
||||
static const char infer_requests_count_message[] =
|
||||
"Optional. Number of infer requests. Default value is determined automatically for device.";
|
||||
|
||||
/// @brief message for enforcing of BF16 execution where it is possible
|
||||
static const char enforce_bf16_message[] =
|
||||
"Optional. By default floating point operations execution in bfloat16 precision are enforced "
|
||||
"if supported by platform.\n"
|
||||
" 'true' - enable bfloat16 regardless of platform support\n"
|
||||
" 'false' - disable bfloat16 regardless of platform support";
|
||||
|
||||
/// @brief message for user library argument
|
||||
static const char custom_extensions_library_message[] =
|
||||
"Required for custom layers (extensions). Absolute path to a shared library with the kernels "
|
||||
"implementations.";
|
||||
|
||||
/// @brief message for clDNN custom kernels desc
|
||||
static const char custom_cldnn_message[] =
|
||||
"Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.";
|
||||
|
||||
static constexpr char inference_only_message[] =
|
||||
"Optional. Measure only inference stage. Default option for static models. Dynamic models"
|
||||
" are measured in full mode which includes inputs setup stage,"
|
||||
" inference only mode available for them with single input data shape only."
|
||||
" To enable full mode for static models pass \"false\" value to this argument:"
|
||||
" ex. \"-inference_only=false\".";
|
||||
|
||||
// @brief message for inference_precision
|
||||
static const char inference_precision_message[] =
|
||||
@ -250,12 +172,93 @@ static constexpr char scale_values_message[] =
|
||||
"--scale_values are specified, the mean is subtracted first and then scale is applied regardless of the order of "
|
||||
"options in command line. Applying the values affects performance and may cause type conversion";
|
||||
|
||||
static constexpr char inference_only_message[] =
|
||||
"Optional. Measure only inference stage. Default option for static models. Dynamic models"
|
||||
" are measured in full mode which includes inputs setup stage,"
|
||||
" inference only mode available for them with single input data shape only."
|
||||
" To enable full mode for static models pass \"false\" value to this argument:"
|
||||
" ex. \"-inference_only=false\".\n";
|
||||
/// @brief message for #threads for CPU inference
|
||||
static const char infer_num_threads_message[] = "Optional. Number of threads to use for inference on the CPU "
|
||||
"(including HETERO and MULTI cases).";
|
||||
|
||||
// @brief message for CPU threads pinning option
|
||||
static const char infer_threads_pinning_message[] =
|
||||
"Optional. Explicit inference threads binding options (leave empty to let the OpenVINO make a choice):\n"
|
||||
"\t\t\t\tenabling threads->cores pinning(\"YES\", which is already default for any conventional CPU), \n"
|
||||
"\t\t\t\tletting the runtime to decide on the threads->different core types(\"HYBRID_AWARE\", which is default on "
|
||||
"the hybrid CPUs) \n"
|
||||
"\t\t\t\tthreads->(NUMA)nodes(\"NUMA\") or \n"
|
||||
"\t\t\t\tcompletely disable(\"NO\") CPU inference threads pinning";
|
||||
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
// @brief message for switching memory allocation type option
|
||||
static const char use_device_mem_message[] =
|
||||
"Optional. Switch between host and device memory allocation for input and output buffers.";
|
||||
#endif
|
||||
|
||||
/// @brief message for latency percentile settings
|
||||
static const char infer_latency_percentile_message[] =
|
||||
"Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value "
|
||||
"is 50 (median).";
|
||||
|
||||
// @brief message for report_type option
|
||||
static const char report_type_message[] =
|
||||
"Optional. Enable collecting statistics report. \"no_counters\" report contains "
|
||||
"configuration options specified, resulting FPS and latency. \"average_counters\" "
|
||||
"report extends \"no_counters\" report and additionally includes average PM "
|
||||
"counters values for each layer from the model. \"detailed_counters\" report "
|
||||
"extends \"average_counters\" report and additionally includes per-layer PM "
|
||||
"counters and latency for each executed infer request.";
|
||||
|
||||
// @brief message for report_folder option
|
||||
static const char report_folder_message[] = "Optional. Path to a folder where statistics report is stored.";
|
||||
|
||||
// @brief message for json_stats option
|
||||
static const char json_stats_message[] = "Optional. Enables JSON-based statistics output (by default reporting system "
|
||||
"will use CSV format). Should be used together with -report_folder option.";
|
||||
|
||||
// @brief message for performance counters option
|
||||
static const char pc_message[] = "Optional. Report performance counters.";
|
||||
|
||||
// @brief message for sorted performance counters option
|
||||
static const char pc_sort_message[] =
|
||||
"Optional. Report performance counters and analysis the sort hotpoint opts. "
|
||||
" \"sort\" Analysis opts time cost, print by hotpoint order "
|
||||
" \"no_sort\" Analysis opts time cost, print by normal order "
|
||||
" \"simple_sort\" Analysis opts time cost, only print EXECUTED opts by normal order";
|
||||
|
||||
// @brief message for performance counters for sequence option
|
||||
static const char pcseq_message[] = "Optional. Report latencies for each shape in -data_shape sequence.";
|
||||
|
||||
// @brief message for exec_graph_path option
|
||||
static const char exec_graph_path_message[] =
|
||||
"Optional. Path to a file where to store executable graph information serialized.";
|
||||
|
||||
// @brief message for dump config option
|
||||
static const char dump_config_message[] =
|
||||
"Optional. Path to JSON file to dump IE parameters, which were set by application.";
|
||||
|
||||
// @brief message for load config option
|
||||
static const char load_config_message[] =
|
||||
"Optional. Path to JSON file to load custom IE parameters."
|
||||
" Please note, command line parameters have higher priority then parameters from configuration file.\n"
|
||||
" Example 1: a simple JSON file for HW device with primary properties.\n"
|
||||
" {\n"
|
||||
" \"CPU\": {\"NUM_STREAMS\": \"3\", \"PERF_COUNT\": \"NO\"}\n"
|
||||
" }\n"
|
||||
" Example 2: a simple JSON file for meta device(AUTO/MULTI) with HW device "
|
||||
"properties.\n"
|
||||
" {\n"
|
||||
" \"AUTO\": {\n"
|
||||
" \"PERFORMANCE_HINT\": \"\",\n"
|
||||
" \"PERF_COUNT\": \"NO\",\n"
|
||||
" \"DEVICE_PROPERTIES\": {\n"
|
||||
" \"CPU\": {\n"
|
||||
" \"INFERENCE_PRECISION_HINT\": \"f32\",\n"
|
||||
" \"NUM_STREAMS\": \"3\"\n"
|
||||
" },\n"
|
||||
" \"GPU\": {\n"
|
||||
" \"INFERENCE_PRECISION_HINT\": \"f32\",\n"
|
||||
" \"NUM_STREAMS\": \"5\"\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }";
|
||||
|
||||
/// @brief Define flag for showing help message <br>
|
||||
DEFINE_bool(h, false, help_message);
|
||||
@ -274,20 +277,9 @@ DEFINE_string(m, "", model_message);
|
||||
/// @brief Define execution mode
|
||||
DEFINE_string(hint, "", hint_message);
|
||||
|
||||
/// @brief Define execution mode
|
||||
DEFINE_string(api, "async", api_message);
|
||||
|
||||
/// @brief device the target device to infer on <br>
|
||||
DEFINE_string(d, "CPU", target_device_message);
|
||||
|
||||
/// @brief Absolute path to extensions library with user layers <br>
|
||||
/// It is a required parameter
|
||||
DEFINE_string(extensions, "", custom_extensions_library_message);
|
||||
|
||||
/// @brief Define parameter for clDNN custom kernels path <br>
|
||||
/// Default is ./lib
|
||||
DEFINE_string(c, "", custom_cldnn_message);
|
||||
|
||||
/// @brief Iterations count (default 0)
|
||||
/// Sync mode: iterations count
|
||||
/// Async mode: StartAsync counts
|
||||
@ -296,58 +288,10 @@ DEFINE_uint64(niter, 0, iterations_count_message);
|
||||
/// @brief Time to execute topology in seconds
|
||||
DEFINE_uint64(t, 0, execution_time_message);
|
||||
|
||||
/// @brief Number of infer requests in parallel
|
||||
DEFINE_uint64(nireq, 0, infer_requests_count_message);
|
||||
|
||||
/// @brief Number of threads to use for inference on the CPU in throughput mode (also affects Hetero
|
||||
/// cases)
|
||||
DEFINE_uint64(nthreads, 0, infer_num_threads_message);
|
||||
|
||||
/// @brief Number of streams to use for inference on the CPU (also affects Hetero cases)
|
||||
DEFINE_string(nstreams, "", infer_num_streams_message);
|
||||
|
||||
/// @brief The percentile which will be reported in latency metric
|
||||
DEFINE_uint64(latency_percentile, 50, infer_latency_percentile_message);
|
||||
|
||||
/// @brief Define parameter for batch size <br>
|
||||
/// Default is 0 (that means don't specify)
|
||||
DEFINE_uint64(b, 0, batch_size_message);
|
||||
|
||||
// @brief Enable plugin messages
|
||||
DEFINE_string(pin, "", infer_threads_pinning_message);
|
||||
|
||||
/// @brief Enables statistics report collecting
|
||||
DEFINE_string(report_type, "", report_type_message);
|
||||
|
||||
/// @brief Path to a folder where statistics report is stored
|
||||
DEFINE_string(report_folder, "", report_folder_message);
|
||||
|
||||
/// @brief Enables JSON-based statistics reporting
|
||||
DEFINE_bool(json_stats, false, json_stats_message);
|
||||
|
||||
/// @brief Path to a file where to store executable graph information serialized
|
||||
DEFINE_string(exec_graph_path, "", exec_graph_path_message);
|
||||
|
||||
/// @brief Define flag for showing performance counters <br>
|
||||
DEFINE_bool(pc, false, pc_message);
|
||||
|
||||
/// @brief Define flag for showing sorted performance counters <br>
|
||||
DEFINE_string(pcsort, "", pc_sort_message);
|
||||
|
||||
/// @brief Define flag for showing performance sequence counters <br>
|
||||
DEFINE_bool(pcseq, false, pcseq_message);
|
||||
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
/// @brief Define flag for switching beetwen host and device memory allocation for input and output buffers
|
||||
DEFINE_bool(use_device_mem, false, use_device_mem_message);
|
||||
#endif
|
||||
|
||||
/// @brief Define flag for loading configuration file <br>
|
||||
DEFINE_string(load_config, "", load_config_message);
|
||||
|
||||
/// @brief Define flag for dumping configuration file <br>
|
||||
DEFINE_string(dump_config, "", dump_config_message);
|
||||
|
||||
/// @brief Define flag for input shape <br>
|
||||
DEFINE_string(shape, "", shape_message);
|
||||
|
||||
@ -357,6 +301,32 @@ DEFINE_string(data_shape, "", data_shape_message);
|
||||
/// @brief Define flag for layout shape <br>
|
||||
DEFINE_string(layout, "", layout_message);
|
||||
|
||||
/// @brief Absolute path to extensions library with user layers <br>
|
||||
/// It is a required parameter
|
||||
DEFINE_string(extensions, "", custom_extensions_library_message);
|
||||
|
||||
/// @brief Define parameter for clDNN custom kernels path <br>
|
||||
/// Default is ./lib
|
||||
DEFINE_string(c, "", custom_cldnn_message);
|
||||
|
||||
/// @brief Define parameter for cache model dir <br>
|
||||
DEFINE_string(cache_dir, "", cache_dir_message);
|
||||
|
||||
/// @brief Define flag for load network from model file by name without ReadNetwork <br>
|
||||
DEFINE_bool(load_from_file, false, load_from_file_message);
|
||||
|
||||
/// @brief Define execution mode
|
||||
DEFINE_string(api, "async", api_message);
|
||||
|
||||
/// @brief Number of infer requests in parallel
|
||||
DEFINE_uint64(nireq, 0, infer_requests_count_message);
|
||||
|
||||
/// @brief Number of streams to use for inference on the CPU (also affects Hetero cases)
|
||||
DEFINE_string(nstreams, "", infer_num_streams_message);
|
||||
|
||||
/// @brief Define flag for inference only mode <br>
|
||||
DEFINE_bool(inference_only, true, inference_only_message);
|
||||
|
||||
/// @brief Define flag for inference precision
|
||||
DEFINE_string(infer_precision, "", inference_precision_message);
|
||||
|
||||
@ -372,71 +342,111 @@ DEFINE_string(op, "", outputs_precision_message);
|
||||
/// Overwrites layout from ip and op options for specified layers.";
|
||||
DEFINE_string(iop, "", iop_message);
|
||||
|
||||
/// @brief Define parameter for cache model dir <br>
|
||||
DEFINE_string(cache_dir, "", cache_dir_message);
|
||||
|
||||
/// @brief Define flag for load network from model file by name without ReadNetwork <br>
|
||||
DEFINE_bool(load_from_file, false, load_from_file_message);
|
||||
|
||||
/// @brief Define flag for using input image mean <br>
|
||||
DEFINE_string(mean_values, "", mean_values_message);
|
||||
|
||||
/// @brief Define flag for using input image scale <br>
|
||||
DEFINE_string(scale_values, "", scale_values_message);
|
||||
|
||||
/// @brief Define flag for inference only mode <br>
|
||||
DEFINE_bool(inference_only, true, inference_only_message);
|
||||
/// @brief Number of threads to use for inference on the CPU in throughput mode (also affects Hetero
|
||||
/// cases)
|
||||
DEFINE_uint64(nthreads, 0, infer_num_threads_message);
|
||||
|
||||
// @brief Enable plugin messages
|
||||
DEFINE_string(pin, "", infer_threads_pinning_message);
|
||||
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
/// @brief Define flag for switching beetwen host and device memory allocation for input and output buffers
|
||||
DEFINE_bool(use_device_mem, false, use_device_mem_message);
|
||||
#endif
|
||||
|
||||
/// @brief The percentile which will be reported in latency metric
|
||||
DEFINE_uint64(latency_percentile, 50, infer_latency_percentile_message);
|
||||
|
||||
/// @brief Enables statistics report collecting
|
||||
DEFINE_string(report_type, "", report_type_message);
|
||||
|
||||
/// @brief Path to a folder where statistics report is stored
|
||||
DEFINE_string(report_folder, "", report_folder_message);
|
||||
|
||||
/// @brief Enables JSON-based statistics reporting
|
||||
DEFINE_bool(json_stats, false, json_stats_message);
|
||||
|
||||
/// @brief Define flag for showing performance counters <br>
|
||||
DEFINE_bool(pc, false, pc_message);
|
||||
|
||||
/// @brief Define flag for showing sorted performance counters <br>
|
||||
DEFINE_string(pcsort, "", pc_sort_message);
|
||||
|
||||
/// @brief Define flag for showing performance sequence counters <br>
|
||||
DEFINE_bool(pcseq, false, pcseq_message);
|
||||
|
||||
/// @brief Path to a file where to store executable graph information serialized
|
||||
DEFINE_string(exec_graph_path, "", exec_graph_path_message);
|
||||
|
||||
/// @brief Define flag for loading configuration file <br>
|
||||
DEFINE_string(load_config, "", load_config_message);
|
||||
|
||||
/// @brief Define flag for dumping configuration file <br>
|
||||
DEFINE_string(dump_config, "", dump_config_message);
|
||||
|
||||
/**
|
||||
* @brief This function show a help message
|
||||
*/
|
||||
static void show_usage() {
|
||||
std::cout << "usage: benchmark_app [OPTION]" << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "benchmark_app [OPTION]" << std::endl;
|
||||
std::cout << "Options:" << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << " -h, --help " << help_message << std::endl;
|
||||
std::cout << " -m <path> " << model_message << std::endl;
|
||||
std::cout << " -i <path> " << input_message << std::endl;
|
||||
std::cout << " -d <device> " << target_device_message << std::endl;
|
||||
std::cout << " -extensions <absolute_path> " << custom_extensions_library_message << std::endl;
|
||||
std::cout << " -c <absolute_path> " << custom_cldnn_message << std::endl;
|
||||
std::cout << " -hint <performance hint> (latency or throughput or cumulative_throughput or none) "
|
||||
<< hint_message << std::endl;
|
||||
std::cout << " -api <sync/async> " << api_message << std::endl;
|
||||
std::cout << " -niter <integer> " << iterations_count_message << std::endl;
|
||||
std::cout << " -nireq <integer> " << infer_requests_count_message << std::endl;
|
||||
std::cout << " -b <integer> " << batch_size_message << std::endl;
|
||||
std::cout << " -t " << execution_time_message << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "Input shapes" << std::endl;
|
||||
std::cout << " -b <integer> " << batch_size_message << std::endl;
|
||||
std::cout << " -shape " << shape_message << std::endl;
|
||||
std::cout << " -data_shape " << data_shape_message << std::endl;
|
||||
std::cout << " -layout " << layout_message << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "Advanced options" << std::endl;
|
||||
std::cout << " -extensions <absolute_path> " << custom_extensions_library_message << std::endl;
|
||||
std::cout << " -c <absolute_path> " << custom_cldnn_message << std::endl;
|
||||
std::cout << " -cache_dir <path> " << cache_dir_message << std::endl;
|
||||
std::cout << " -load_from_file " << load_from_file_message << std::endl;
|
||||
std::cout << " -latency_percentile " << infer_latency_percentile_message << std::endl;
|
||||
std::cout << std::endl << " device-specific performance options:" << std::endl;
|
||||
std::cout << " -api <sync/async> " << api_message << std::endl;
|
||||
std::cout << " -nireq <integer> " << infer_requests_count_message << std::endl;
|
||||
std::cout << " -nstreams <integer> " << infer_num_streams_message << std::endl;
|
||||
std::cout << " -inference_only " << inference_only_message << std::endl;
|
||||
std::cout << " -infer_precision " << inference_precision_message << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "Preprocessing options:" << std::endl;
|
||||
std::cout << " -ip <value> " << inputs_precision_message << std::endl;
|
||||
std::cout << " -op <value> " << outputs_precision_message << std::endl;
|
||||
std::cout << " -iop <value> " << iop_message << std::endl;
|
||||
std::cout << " -mean_values [R,G,B] " << mean_values_message << std::endl;
|
||||
std::cout << " -scale_values [R,G,B] " << scale_values_message << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "Device-specific performance options:" << std::endl;
|
||||
std::cout << " -nthreads <integer> " << infer_num_threads_message << std::endl;
|
||||
std::cout << " -pin <string> (\"YES\"|\"CORE\") / \"HYBRID_AWARE\" / (\"NO\"|\"NONE\") / \"NUMA\" "
|
||||
<< infer_threads_pinning_message << std::endl;
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
std::cout << " -use_device_mem " << use_device_mem_message << std::endl;
|
||||
#endif
|
||||
std::cout << std::endl << " Statistics dumping options:" << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "Statistics dumping options:" << std::endl;
|
||||
std::cout << " -latency_percentile " << infer_latency_percentile_message << std::endl;
|
||||
std::cout << " -report_type <type> " << report_type_message << std::endl;
|
||||
std::cout << " -report_folder " << report_folder_message << std::endl;
|
||||
std::cout << " -json_stats " << json_stats_message << std::endl;
|
||||
std::cout << " -exec_graph_path " << exec_graph_path_message << std::endl;
|
||||
std::cout << " -pc " << pc_message << std::endl;
|
||||
std::cout << " -pcsort " << pc_sort_message << std::endl;
|
||||
std::cout << " -pcseq " << pcseq_message << std::endl;
|
||||
std::cout << " -exec_graph_path " << exec_graph_path_message << std::endl;
|
||||
std::cout << " -dump_config " << dump_config_message << std::endl;
|
||||
std::cout << " -load_config " << load_config_message << std::endl;
|
||||
std::cout << " -infer_precision " << inference_precision_message << std::endl;
|
||||
std::cout << " -ip <value> " << inputs_precision_message << std::endl;
|
||||
std::cout << " -op <value> " << outputs_precision_message << std::endl;
|
||||
std::cout << " -iop <value> " << iop_message << std::endl;
|
||||
std::cout << " -mean_values [R,G,B] " << mean_values_message << std::endl;
|
||||
std::cout << " -scale_values [R,G,B] " << scale_values_message << std::endl;
|
||||
std::cout << " -inference_only " << inference_only_message << std::endl;
|
||||
}
|
||||
|
@ -14,6 +14,8 @@
|
||||
#include <vector>
|
||||
|
||||
#include "format_reader_ptr.h"
|
||||
#include "npy.h"
|
||||
#include "samples/slog.hpp"
|
||||
#include "shared_tensor_allocator.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
@ -93,6 +95,65 @@ ov::Tensor create_tensor_from_image(const std::vector<std::string>& files,
|
||||
return tensor;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ov::Tensor create_tensor_from_numpy(const std::vector<std::string>& files,
|
||||
size_t inputId,
|
||||
size_t batchSize,
|
||||
const benchmark_app::InputInfo& inputInfo,
|
||||
const std::string& inputName,
|
||||
std::string* filenames_used = nullptr) {
|
||||
size_t tensor_size =
|
||||
std::accumulate(inputInfo.dataShape.begin(), inputInfo.dataShape.end(), 1, std::multiplies<size_t>());
|
||||
auto allocator = std::make_shared<SharedTensorAllocator>(tensor_size * sizeof(T));
|
||||
auto data = reinterpret_cast<T*>(allocator->get_buffer());
|
||||
|
||||
std::vector<std::shared_ptr<unsigned char>> numpy_array_pointers;
|
||||
numpy_array_pointers.reserve(batchSize);
|
||||
|
||||
size_t numpy_batch_size = 1;
|
||||
if (!inputInfo.layout.empty() && ov::layout::has_batch(inputInfo.layout)) {
|
||||
numpy_batch_size = batchSize;
|
||||
} else {
|
||||
slog::warn << inputName
|
||||
<< ": layout is not set or does not contain batch dimension. Assuming that numpy array "
|
||||
"contains data for all batches."
|
||||
<< slog::endl;
|
||||
}
|
||||
|
||||
for (size_t b = 0; b < numpy_batch_size; ++b) {
|
||||
auto inputIndex = (inputId + b) % files.size();
|
||||
if (filenames_used) {
|
||||
*filenames_used += (filenames_used->empty() ? "" : ", ") + files[inputIndex];
|
||||
}
|
||||
FormatReader::ReaderPtr numpy_array_reader(files[inputIndex].c_str());
|
||||
if (numpy_array_reader.get() == nullptr) {
|
||||
slog::warn << "Numpy array " << files[inputIndex] << " cannot be read!" << slog::endl << slog::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
std::shared_ptr<unsigned char> numpy_array_data_pointer(numpy_array_reader->getData());
|
||||
if (numpy_array_data_pointer) {
|
||||
numpy_array_pointers.push_back(numpy_array_data_pointer);
|
||||
}
|
||||
}
|
||||
|
||||
size_t type_bytes_size = sizeof(T);
|
||||
std::unique_ptr<unsigned char[]> bytes_buffer(new unsigned char[type_bytes_size]);
|
||||
|
||||
for (size_t batch_nr = 0; batch_nr < numpy_batch_size; ++batch_nr) {
|
||||
for (size_t input_tensor_nr = 0; input_tensor_nr < tensor_size; ++input_tensor_nr) {
|
||||
size_t offset = batch_nr * tensor_size + input_tensor_nr;
|
||||
for (size_t byte_nr = 0; byte_nr < type_bytes_size; ++byte_nr) {
|
||||
bytes_buffer.get()[byte_nr] =
|
||||
numpy_array_pointers.at(batch_nr).get()[offset * type_bytes_size + byte_nr];
|
||||
}
|
||||
data[offset] = *((T*)(bytes_buffer.get()));
|
||||
}
|
||||
}
|
||||
|
||||
return ov::Tensor(inputInfo.type, inputInfo.dataShape, ov::Allocator(allocator));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ov::Tensor create_tensor_im_info(const std::pair<size_t, size_t>& image_size,
|
||||
size_t batchSize,
|
||||
@ -154,17 +215,23 @@ ov::Tensor create_tensor_from_binary(const std::vector<std::string>& files,
|
||||
std::ifstream binaryFile(files[inputIndex], std::ios_base::binary | std::ios_base::ate);
|
||||
OPENVINO_ASSERT(binaryFile, "Cannot open ", files[inputIndex]);
|
||||
|
||||
auto fileSize = static_cast<std::size_t>(binaryFile.tellg());
|
||||
binaryFile.seekg(0, std::ios_base::beg);
|
||||
OPENVINO_ASSERT(binaryFile.good(), "Can not read ", files[inputIndex]);
|
||||
auto inputSize = tensor_size * sizeof(T) / binaryBatchSize;
|
||||
OPENVINO_ASSERT(fileSize == inputSize,
|
||||
"File ",
|
||||
files[inputIndex],
|
||||
" contains ",
|
||||
fileSize,
|
||||
" bytes, but the model expects ",
|
||||
inputSize);
|
||||
|
||||
std::string extension = get_extension(files[inputIndex]);
|
||||
if (extension == "bin") {
|
||||
auto fileSize = static_cast<std::size_t>(binaryFile.tellg());
|
||||
binaryFile.seekg(0, std::ios_base::beg);
|
||||
OPENVINO_ASSERT(binaryFile.good(), "Can not read ", files[inputIndex]);
|
||||
OPENVINO_ASSERT(fileSize == inputSize,
|
||||
"File ",
|
||||
files[inputIndex],
|
||||
" contains ",
|
||||
fileSize,
|
||||
" bytes, but the model expects ",
|
||||
inputSize);
|
||||
} else {
|
||||
throw ov::Exception("Unsupported binary file type: " + extension);
|
||||
}
|
||||
|
||||
if (inputInfo.layout != "CN") {
|
||||
binaryFile.read(&data[b * inputSize], inputSize);
|
||||
@ -208,20 +275,20 @@ ov::Tensor get_image_tensor(const std::vector<std::string>& files,
|
||||
const std::pair<std::string, benchmark_app::InputInfo>& inputInfo,
|
||||
std::string* filenames_used = nullptr) {
|
||||
auto type = inputInfo.second.type;
|
||||
if (type == ov::element::f32) {
|
||||
return create_tensor_from_image<float>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::f16) {
|
||||
if (type == ov::element::f16) {
|
||||
return create_tensor_from_image<ov::float16>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::f32) {
|
||||
return create_tensor_from_image<float>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::f64) {
|
||||
return create_tensor_from_image<double>(files,
|
||||
inputId,
|
||||
@ -229,6 +296,20 @@ ov::Tensor get_image_tensor(const std::vector<std::string>& files,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::i8) {
|
||||
return create_tensor_from_image<int8_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::i16) {
|
||||
return create_tensor_from_image<int16_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::i32) {
|
||||
return create_tensor_from_image<int32_t>(files,
|
||||
inputId,
|
||||
@ -243,13 +324,34 @@ ov::Tensor get_image_tensor(const std::vector<std::string>& files,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::u8) {
|
||||
} else if ((type == ov::element::u8) || (type == ov::element::boolean)) {
|
||||
return create_tensor_from_image<uint8_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::u16) {
|
||||
return create_tensor_from_image<uint16_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::u32) {
|
||||
return create_tensor_from_image<uint32_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::u64) {
|
||||
return create_tensor_from_image<uint64_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else {
|
||||
throw ov::Exception("Input type is not supported for " + inputInfo.first);
|
||||
}
|
||||
@ -259,16 +361,116 @@ ov::Tensor get_im_info_tensor(const std::pair<size_t, size_t>& image_size,
|
||||
size_t batchSize,
|
||||
const std::pair<std::string, benchmark_app::InputInfo>& inputInfo) {
|
||||
auto type = inputInfo.second.type;
|
||||
if (type == ov::element::f32) {
|
||||
if (type == ov::element::f16) {
|
||||
return create_tensor_im_info<ov::float16>(image_size, batchSize, inputInfo.second, inputInfo.first);
|
||||
} else if (type == ov::element::f32) {
|
||||
return create_tensor_im_info<float>(image_size, batchSize, inputInfo.second, inputInfo.first);
|
||||
} else if (type == ov::element::f64) {
|
||||
return create_tensor_im_info<double>(image_size, batchSize, inputInfo.second, inputInfo.first);
|
||||
} else if (type == ov::element::f16) {
|
||||
return create_tensor_im_info<short>(image_size, batchSize, inputInfo.second, inputInfo.first);
|
||||
} else if (type == ov::element::i8) {
|
||||
return create_tensor_im_info<int8_t>(image_size, batchSize, inputInfo.second, inputInfo.first);
|
||||
} else if (type == ov::element::i16) {
|
||||
return create_tensor_im_info<int16_t>(image_size, batchSize, inputInfo.second, inputInfo.first);
|
||||
} else if (type == ov::element::i32) {
|
||||
return create_tensor_im_info<int32_t>(image_size, batchSize, inputInfo.second, inputInfo.first);
|
||||
} else if (type == ov::element::i64) {
|
||||
return create_tensor_im_info<int64_t>(image_size, batchSize, inputInfo.second, inputInfo.first);
|
||||
} else if ((type == ov::element::u8) || (type == ov::element::boolean)) {
|
||||
return create_tensor_im_info<uint8_t>(image_size, batchSize, inputInfo.second, inputInfo.first);
|
||||
} else if (type == ov::element::u16) {
|
||||
return create_tensor_im_info<uint16_t>(image_size, batchSize, inputInfo.second, inputInfo.first);
|
||||
} else if (type == ov::element::u32) {
|
||||
return create_tensor_im_info<uint32_t>(image_size, batchSize, inputInfo.second, inputInfo.first);
|
||||
} else if (type == ov::element::u64) {
|
||||
return create_tensor_im_info<uint64_t>(image_size, batchSize, inputInfo.second, inputInfo.first);
|
||||
} else {
|
||||
throw ov::Exception("Input type is not supported for " + inputInfo.first);
|
||||
}
|
||||
}
|
||||
|
||||
ov::Tensor get_numpy_tensor(const std::vector<std::string>& files,
|
||||
size_t inputId,
|
||||
size_t batchSize,
|
||||
const std::pair<std::string, benchmark_app::InputInfo>& inputInfo,
|
||||
std::string* filenames_used = nullptr) {
|
||||
auto type = inputInfo.second.type;
|
||||
if (type == ov::element::f16) {
|
||||
return create_tensor_from_numpy<ov::float16>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::f32) {
|
||||
return create_tensor_from_numpy<float>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::f64) {
|
||||
return create_tensor_from_numpy<double>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::i8) {
|
||||
return create_tensor_from_numpy<int8_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::i16) {
|
||||
return create_tensor_from_numpy<int16_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::i32) {
|
||||
return create_tensor_from_numpy<int32_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::i64) {
|
||||
return create_tensor_from_numpy<int64_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if ((type == ov::element::u8) || (type == ov::element::boolean)) {
|
||||
return create_tensor_from_numpy<uint8_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::u16) {
|
||||
return create_tensor_from_numpy<uint16_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::u32) {
|
||||
return create_tensor_from_numpy<uint32_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::u64) {
|
||||
return create_tensor_from_numpy<uint64_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else {
|
||||
throw ov::Exception("Input type is not supported for " + inputInfo.first);
|
||||
}
|
||||
@ -280,7 +482,14 @@ ov::Tensor get_binary_tensor(const std::vector<std::string>& files,
|
||||
const std::pair<std::string, benchmark_app::InputInfo>& inputInfo,
|
||||
std::string* filenames_used = nullptr) {
|
||||
const auto& type = inputInfo.second.type;
|
||||
if (type == ov::element::f32) {
|
||||
if (type == ov::element::f16) {
|
||||
return create_tensor_from_binary<ov::float16>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::f32) {
|
||||
return create_tensor_from_binary<float>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
@ -294,13 +503,20 @@ ov::Tensor get_binary_tensor(const std::vector<std::string>& files,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::f16) {
|
||||
return create_tensor_from_binary<short>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::i8) {
|
||||
return create_tensor_from_binary<int8_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::i16) {
|
||||
return create_tensor_from_binary<int16_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::i32) {
|
||||
return create_tensor_from_binary<int32_t>(files,
|
||||
inputId,
|
||||
@ -322,6 +538,27 @@ ov::Tensor get_binary_tensor(const std::vector<std::string>& files,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::u16) {
|
||||
return create_tensor_from_binary<uint16_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::u32) {
|
||||
return create_tensor_from_binary<uint32_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else if (type == ov::element::u64) {
|
||||
return create_tensor_from_binary<uint64_t>(files,
|
||||
inputId,
|
||||
batchSize,
|
||||
inputInfo.second,
|
||||
inputInfo.first,
|
||||
filenames_used);
|
||||
} else {
|
||||
throw ov::Exception("Input type is not supported for " + inputInfo.first);
|
||||
}
|
||||
@ -339,7 +576,7 @@ ov::Tensor get_random_tensor(const std::pair<std::string, benchmark_app::InputIn
|
||||
return create_tensor_random<int32_t, int32_t>(inputInfo.second);
|
||||
} else if (type == ov::element::i64) {
|
||||
return create_tensor_random<int64_t, int64_t>(inputInfo.second);
|
||||
} else if (type == ov::element::u8) {
|
||||
} else if ((type == ov::element::u8) || (type == ov::element::boolean)) {
|
||||
// uniform_int_distribution<uint8_t> is not allowed in the C++17
|
||||
// standard and vs2017/19
|
||||
return create_tensor_random<uint8_t, uint32_t>(inputInfo.second);
|
||||
@ -403,8 +640,13 @@ std::map<std::string, ov::TensorVector> get_tensors(std::map<std::string, std::v
|
||||
std::string input_name = files.first.empty() ? app_inputs_info[0].begin()->first : files.first;
|
||||
auto input = app_inputs_info[0].at(input_name);
|
||||
if (!files.second.empty() && files.second[0] != "random" && files.second[0] != "image_info") {
|
||||
if (input.is_image()) {
|
||||
files.second = filter_files_by_extensions(files.second, supported_image_extensions);
|
||||
auto filtered_numpy_files = filter_files_by_extensions(files.second, supported_numpy_extensions);
|
||||
auto filtered_image_files = filter_files_by_extensions(files.second, supported_image_extensions);
|
||||
|
||||
if (!filtered_numpy_files.empty()) {
|
||||
files.second = filtered_numpy_files;
|
||||
} else if (!filtered_image_files.empty() && input.is_image()) {
|
||||
files.second = filtered_image_files;
|
||||
} else if (input.is_image_info() && net_input_im_sizes.size() == app_inputs_info.size()) {
|
||||
slog::info << "Input '" << input_name
|
||||
<< "' probably is image info. All files for this input will"
|
||||
@ -486,8 +728,9 @@ std::map<std::string, ov::TensorVector> get_tensors(std::map<std::string, std::v
|
||||
std::string tensor_src_info;
|
||||
if (files.second[0] == "random") {
|
||||
// Fill random
|
||||
tensor_src_info =
|
||||
"random (" + std::string((input_info.is_image() ? "image" : "binary data")) + " is expected)";
|
||||
tensor_src_info = "random (" +
|
||||
std::string((input_info.is_image() ? "image/numpy array" : "binary data")) +
|
||||
" is expected)";
|
||||
tensors[input_name].push_back(get_random_tensor({input_name, input_info}));
|
||||
} else if (files.second[0] == "image_info") {
|
||||
// Most likely it is image info: fill with image information
|
||||
@ -495,6 +738,10 @@ std::map<std::string, ov::TensorVector> get_tensors(std::map<std::string, std::v
|
||||
tensor_src_info =
|
||||
"Image size tensor " + std::to_string(image_size.first) + " x " + std::to_string(image_size.second);
|
||||
tensors[input_name].push_back(get_im_info_tensor(image_size, batchSize, {input_name, input_info}));
|
||||
} else if (supported_numpy_extensions.count(get_extension(files.second[0]))) {
|
||||
// Fill with Numpy arrrays
|
||||
tensors[input_name].push_back(
|
||||
get_numpy_tensor(files.second, inputId, batchSize, {input_name, input_info}, &tensor_src_info));
|
||||
} else if (input_info.is_image()) {
|
||||
// Fill with Images
|
||||
tensors[input_name].push_back(
|
||||
@ -549,45 +796,26 @@ std::map<std::string, ov::TensorVector> get_tensors_static_case(const std::vecto
|
||||
}
|
||||
}
|
||||
|
||||
size_t imageInputsNum = net_input_im_sizes.size();
|
||||
size_t binaryInputsNum = app_inputs_info.size() - imageInputsNum;
|
||||
std::vector<std::string> binaryFiles = filter_files_by_extensions(inputFiles, supported_binary_extensions);
|
||||
std::vector<std::string> numpyFiles = filter_files_by_extensions(inputFiles, supported_numpy_extensions);
|
||||
std::vector<std::string> imageFiles = filter_files_by_extensions(inputFiles, supported_image_extensions);
|
||||
|
||||
std::vector<std::string> binaryFiles;
|
||||
std::vector<std::string> imageFiles;
|
||||
size_t imageInputsNum = imageFiles.size();
|
||||
size_t numpyInputsNum = numpyFiles.size();
|
||||
size_t binaryInputsNum = binaryFiles.size();
|
||||
size_t totalInputsNum = imageInputsNum + numpyInputsNum + binaryInputsNum;
|
||||
|
||||
if (inputFiles.empty()) {
|
||||
slog::warn << "No input files were given: all inputs will be filled with "
|
||||
"random values!"
|
||||
<< slog::endl;
|
||||
} else {
|
||||
binaryFiles = filter_files_by_extensions(inputFiles, supported_binary_extensions);
|
||||
std::sort(std::begin(binaryFiles), std::end(binaryFiles));
|
||||
|
||||
auto binaryToBeUsed = binaryInputsNum * batchSize * requestsNum;
|
||||
if (binaryToBeUsed > 0 && binaryFiles.empty()) {
|
||||
std::stringstream ss;
|
||||
for (auto& ext : supported_binary_extensions) {
|
||||
if (!ss.str().empty()) {
|
||||
ss << ", ";
|
||||
}
|
||||
ss << ext;
|
||||
}
|
||||
slog::warn << "No supported binary inputs found! Please check your file "
|
||||
"extensions: "
|
||||
<< ss.str() << slog::endl;
|
||||
} else if (binaryToBeUsed > binaryFiles.size()) {
|
||||
slog::warn << "Some binary input files will be duplicated: " << binaryToBeUsed
|
||||
<< " files are required but only " << binaryFiles.size() << " are provided" << slog::endl;
|
||||
} else if (binaryToBeUsed < binaryFiles.size()) {
|
||||
slog::warn << "Some binary input files will be ignored: only " << binaryToBeUsed << " are required from "
|
||||
<< binaryFiles.size() << slog::endl;
|
||||
}
|
||||
|
||||
imageFiles = filter_files_by_extensions(inputFiles, supported_image_extensions);
|
||||
std::sort(std::begin(numpyFiles), std::end(numpyFiles));
|
||||
std::sort(std::begin(imageFiles), std::end(imageFiles));
|
||||
|
||||
auto imagesToBeUsed = imageInputsNum * batchSize * requestsNum;
|
||||
if (imagesToBeUsed > 0 && imageFiles.empty()) {
|
||||
auto filesToBeUsed = totalInputsNum * batchSize * requestsNum;
|
||||
if (filesToBeUsed == 0 && !inputFiles.empty()) {
|
||||
std::stringstream ss;
|
||||
for (auto& ext : supported_image_extensions) {
|
||||
if (!ss.str().empty()) {
|
||||
@ -595,23 +823,43 @@ std::map<std::string, ov::TensorVector> get_tensors_static_case(const std::vecto
|
||||
}
|
||||
ss << ext;
|
||||
}
|
||||
slog::warn << "No supported image inputs found! Please check your file "
|
||||
for (auto& ext : supported_numpy_extensions) {
|
||||
if (!ss.str().empty()) {
|
||||
ss << ", ";
|
||||
}
|
||||
ss << ext;
|
||||
}
|
||||
for (auto& ext : supported_binary_extensions) {
|
||||
if (!ss.str().empty()) {
|
||||
ss << ", ";
|
||||
}
|
||||
ss << ext;
|
||||
}
|
||||
slog::warn << "Inputs of unsupported type found! Please check your file "
|
||||
"extensions: "
|
||||
<< ss.str() << slog::endl;
|
||||
} else if (imagesToBeUsed > imageFiles.size()) {
|
||||
slog::warn << "Some image input files will be duplicated: " << imagesToBeUsed
|
||||
<< " files are required but only " << imageFiles.size() << " are provided" << slog::endl;
|
||||
} else if (imagesToBeUsed < imageFiles.size()) {
|
||||
slog::warn << "Some image input files will be ignored: only " << imagesToBeUsed << " are required from "
|
||||
<< imageFiles.size() << slog::endl;
|
||||
} else if (app_inputs_info.size() > totalInputsNum) {
|
||||
slog::warn << "Some input files will be duplicated: " << filesToBeUsed << " files are required but only "
|
||||
<< totalInputsNum << " are provided" << slog::endl;
|
||||
} else if (filesToBeUsed < app_inputs_info.size()) {
|
||||
slog::warn << "Some input files will be ignored: only " << filesToBeUsed << " are required from "
|
||||
<< totalInputsNum << slog::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, std::vector<std::string>> mappedFiles;
|
||||
size_t imageInputsCount = 0;
|
||||
size_t numpyInputsCount = 0;
|
||||
size_t binaryInputsCount = 0;
|
||||
for (auto& input : app_inputs_info) {
|
||||
if (input.second.is_image()) {
|
||||
if (numpyInputsNum) {
|
||||
mappedFiles[input.first] = {};
|
||||
for (size_t i = 0; i < numpyFiles.size(); i += numpyInputsNum) {
|
||||
mappedFiles[input.first].push_back(
|
||||
numpyFiles[(numpyInputsCount + i) * numpyInputsNum % numpyFiles.size()]);
|
||||
}
|
||||
++numpyInputsCount;
|
||||
} else if (input.second.is_image()) {
|
||||
mappedFiles[input.first] = {};
|
||||
for (size_t i = 0; i < imageFiles.size(); i += imageInputsNum) {
|
||||
mappedFiles[input.first].push_back(
|
||||
@ -643,13 +891,26 @@ std::map<std::string, ov::TensorVector> get_tensors_static_case(const std::vecto
|
||||
std::vector<std::map<std::string, std::string>> logOutput(test_configs_num);
|
||||
for (const auto& files : mappedFiles) {
|
||||
size_t imageInputId = 0;
|
||||
size_t numpyInputId = 0;
|
||||
size_t binaryInputId = 0;
|
||||
auto input_name = files.first;
|
||||
auto input_info = app_inputs_info.at(files.first);
|
||||
|
||||
for (size_t i = 0; i < test_configs_num; ++i) {
|
||||
std::string blob_src_info;
|
||||
if (input_info.is_image()) {
|
||||
if (files.second.size() && supported_numpy_extensions.count(get_extension(files.second[0]))) {
|
||||
if (!numpyFiles.empty()) {
|
||||
// Fill with Numpy arryys
|
||||
blobs[input_name].push_back(get_numpy_tensor(files.second,
|
||||
imageInputId,
|
||||
batchSize,
|
||||
{input_name, input_info},
|
||||
&blob_src_info));
|
||||
numpyInputId = (numpyInputId + batchSize) % files.second.size();
|
||||
logOutput[i][input_name] += get_test_info_stream_header(input_info) + blob_src_info;
|
||||
continue;
|
||||
}
|
||||
} else if (input_info.is_image()) {
|
||||
if (!imageFiles.empty()) {
|
||||
// Fill with Images
|
||||
blobs[input_name].push_back(get_image_tensor(files.second,
|
||||
@ -684,8 +945,8 @@ std::map<std::string, ov::TensorVector> get_tensors_static_case(const std::vecto
|
||||
}
|
||||
}
|
||||
// Fill random
|
||||
blob_src_info =
|
||||
"random (" + std::string((input_info.is_image() ? "image" : "binary data")) + " is expected)";
|
||||
blob_src_info = "random (" + std::string((input_info.is_image() ? "image" : "binary data")) +
|
||||
"/numpy array is expected)";
|
||||
blobs[input_name].push_back(get_random_tensor({input_name, input_info}));
|
||||
logOutput[i][input_name] += get_test_info_stream_header(input_info) + blob_src_info;
|
||||
}
|
||||
|
@ -686,7 +686,7 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
const auto& inputInfo = std::const_pointer_cast<const ov::Model>(model)->inputs();
|
||||
if (inputInfo.empty()) {
|
||||
throw std::logic_error("no inputs info is provided");
|
||||
throw std::logic_error("No inputs info is provided");
|
||||
}
|
||||
|
||||
// ----------------- 5. Resizing network to match image sizes and given
|
||||
|
@ -543,8 +543,9 @@ std::vector<benchmark_app::InputsInfo> get_inputs_info(const std::string& shape_
|
||||
}
|
||||
}
|
||||
|
||||
size_t w = 0;
|
||||
size_t h = 0;
|
||||
size_t w = 0;
|
||||
std::vector<size_t> shape;
|
||||
size_t fileIdx = currentFileCounters[item.get_any_name()];
|
||||
for (; fileIdx < currentFileCounters[item.get_any_name()] + tensorBatchSize; fileIdx++) {
|
||||
if (fileIdx >= namesVector.size()) {
|
||||
@ -553,28 +554,47 @@ std::vector<benchmark_app::InputsInfo> get_inputs_info(const std::string& shape_
|
||||
"size if -data_shape parameter is omitted and shape is dynamic)");
|
||||
}
|
||||
FormatReader::ReaderPtr reader(namesVector[fileIdx].c_str());
|
||||
if ((w && w != reader->width()) || (h && h != reader->height())) {
|
||||
throw std::logic_error("Image sizes putting into one batch should be of the same size if input "
|
||||
"shape is dynamic and -data_shape is omitted. Problem file: " +
|
||||
namesVector[fileIdx]);
|
||||
if ((w && w != reader->width()) || (h && h != reader->height()) ||
|
||||
(!shape.empty() && shape != reader->shape())) {
|
||||
throw std::logic_error(
|
||||
"File dimensions putting into one batch should be of the same dimensionality if input "
|
||||
"shape is dynamic and -data_shape is omitted. Problem file: " +
|
||||
namesVector[fileIdx]);
|
||||
}
|
||||
w = reader->width();
|
||||
h = reader->height();
|
||||
w = reader->width();
|
||||
shape = reader->shape();
|
||||
}
|
||||
currentFileCounters[item.get_any_name()] = fileIdx;
|
||||
|
||||
if (!info.dataShape[ov::layout::height_idx(info.layout)]) {
|
||||
info.dataShape[ov::layout::height_idx(info.layout)] = h;
|
||||
}
|
||||
if (!info.dataShape[ov::layout::width_idx(info.layout)]) {
|
||||
info.dataShape[ov::layout::width_idx(info.layout)] = w;
|
||||
if (shape.size() == 2) { // Has only h and w
|
||||
if (!info.dataShape[ov::layout::height_idx(info.layout)]) {
|
||||
info.dataShape[ov::layout::height_idx(info.layout)] = h;
|
||||
}
|
||||
if (!info.dataShape[ov::layout::width_idx(info.layout)]) {
|
||||
info.dataShape[ov::layout::width_idx(info.layout)] = w;
|
||||
}
|
||||
} else { // Is numpy array
|
||||
size_t shape_idx = 0;
|
||||
if (info.dataShape.size() != shape.size()) {
|
||||
throw std::logic_error("Shape required by the input and file shape do not have the same rank. "
|
||||
"Input: " +
|
||||
item.get_any_name() + ", File name: " + namesVector[fileIdx - 1]);
|
||||
}
|
||||
for (size_t i = ov::layout::batch_idx(info.layout);
|
||||
i < ov::layout::batch_idx(info.layout) + info.dataShape.size();
|
||||
++i) {
|
||||
if (!info.dataShape[i]) {
|
||||
info.dataShape[i] = shape.at(shape_idx);
|
||||
}
|
||||
shape_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
if (std::any_of(info.dataShape.begin(), info.dataShape.end(), [](size_t d) {
|
||||
return d == 0;
|
||||
})) {
|
||||
throw std::logic_error("Not enough information in shape and image to determine tensor shape "
|
||||
"automatically autmatically. Input: " +
|
||||
throw std::logic_error("Not enough information in shape and file to determine tensor shape "
|
||||
"autmatically. Input: " +
|
||||
item.get_any_name() + ", File name: " + namesVector[fileIdx - 1]);
|
||||
}
|
||||
|
||||
@ -736,14 +756,6 @@ void load_config(const std::string& filename, std::map<std::string, ov::AnyMap>&
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_OPENCV
|
||||
const std::vector<std::string> supported_image_extensions =
|
||||
{"bmp", "dib", "jpeg", "jpg", "jpe", "jp2", "png", "pbm", "pgm", "ppm", "sr", "ras", "tiff", "tif"};
|
||||
#else
|
||||
const std::vector<std::string> supported_image_extensions = {"bmp"};
|
||||
#endif
|
||||
const std::vector<std::string> supported_binary_extensions = {"bin"};
|
||||
|
||||
std::string get_extension(const std::string& name) {
|
||||
auto extensionPosition = name.rfind('.', name.size());
|
||||
return extensionPosition == std::string::npos ? "" : name.substr(extensionPosition + 1, name.size() - 1);
|
||||
@ -752,36 +764,38 @@ std::string get_extension(const std::string& name) {
|
||||
bool is_binary_file(const std::string& filePath) {
|
||||
auto extension = get_extension(filePath);
|
||||
std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower);
|
||||
return std::find(supported_binary_extensions.begin(), supported_binary_extensions.end(), extension) !=
|
||||
supported_binary_extensions.end();
|
||||
return supported_binary_extensions.find(extension) != supported_binary_extensions.end();
|
||||
}
|
||||
|
||||
bool is_numpy_file(const std::string& filePath) {
|
||||
auto extension = get_extension(filePath);
|
||||
std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower);
|
||||
return supported_numpy_extensions.find(extension) != supported_numpy_extensions.end();
|
||||
}
|
||||
|
||||
bool is_image_file(const std::string& filePath) {
|
||||
auto extension = get_extension(filePath);
|
||||
std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower);
|
||||
return std::find(supported_binary_extensions.begin(), supported_binary_extensions.end(), extension) !=
|
||||
supported_binary_extensions.end();
|
||||
return supported_image_extensions.find(extension) != supported_image_extensions.end();
|
||||
}
|
||||
|
||||
bool contains_binaries(const std::vector<std::string>& filePaths) {
|
||||
std::vector<std::string> filtered;
|
||||
for (auto& filePath : filePaths) {
|
||||
auto extension = get_extension(filePath);
|
||||
std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower);
|
||||
if (std::find(supported_binary_extensions.begin(), supported_binary_extensions.end(), extension) !=
|
||||
supported_binary_extensions.end()) {
|
||||
if (is_binary_file(filePath)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<std::string> filter_files_by_extensions(const std::vector<std::string>& filePaths,
|
||||
const std::vector<std::string>& extensions) {
|
||||
const std::unordered_set<std::string>& extensions) {
|
||||
std::vector<std::string> filtered;
|
||||
for (auto& filePath : filePaths) {
|
||||
auto extension = get_extension(filePath);
|
||||
std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower);
|
||||
if (std::find(extensions.begin(), extensions.end(), extension) != extensions.end()) {
|
||||
if (extensions.find(extension) != extensions.end()) {
|
||||
filtered.push_back(filePath);
|
||||
}
|
||||
}
|
||||
|
@ -10,8 +10,18 @@
|
||||
#include <openvino/openvino.hpp>
|
||||
#include <samples/slog.hpp>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#ifdef USE_OPENCV
|
||||
const std::unordered_set<std::string> supported_image_extensions =
|
||||
{"bmp", "dib", "jpeg", "jpg", "jpe", "jp2", "png", "pbm", "pgm", "ppm", "sr", "ras", "tiff", "tif"};
|
||||
#else
|
||||
const std::unordered_set<std::string> supported_image_extensions = {"bmp"};
|
||||
#endif
|
||||
const std::unordered_set<std::string> supported_numpy_extensions = {"npy"};
|
||||
const std::unordered_set<std::string> supported_binary_extensions = {"bin"};
|
||||
|
||||
typedef std::chrono::high_resolution_clock Time;
|
||||
typedef std::chrono::nanoseconds ns;
|
||||
|
||||
@ -117,14 +127,13 @@ std::vector<benchmark_app::InputsInfo> get_inputs_info(const std::string& shape_
|
||||
void dump_config(const std::string& filename, const std::map<std::string, ov::AnyMap>& config);
|
||||
void load_config(const std::string& filename, std::map<std::string, ov::AnyMap>& config);
|
||||
|
||||
extern const std::vector<std::string> supported_image_extensions;
|
||||
extern const std::vector<std::string> supported_binary_extensions;
|
||||
|
||||
std::string get_extension(const std::string& name);
|
||||
bool is_binary_file(const std::string& filePath);
|
||||
bool is_numpy_file(const std::string& filePath);
|
||||
bool is_image_file(const std::string& filePath);
|
||||
bool contains_binaries(const std::vector<std::string>& filePaths);
|
||||
std::vector<std::string> filter_files_by_extensions(const std::vector<std::string>& filePaths,
|
||||
const std::vector<std::string>& extensions);
|
||||
const std::unordered_set<std::string>& extensions);
|
||||
|
||||
std::string parameter_name_to_tensor_name(
|
||||
const std::string& name,
|
||||
|
@ -49,6 +49,8 @@ MnistUbyte::MnistUbyte(const std::string& filename) {
|
||||
}
|
||||
|
||||
size_t size = _width * _height * 1;
|
||||
_shape.push_back(_height);
|
||||
_shape.push_back(_width);
|
||||
|
||||
_data.reset(new unsigned char[size], std::default_delete<unsigned char[]>());
|
||||
size_t count = 0;
|
||||
|
@ -37,6 +37,8 @@ BitMap::BitMap(const string& filename) {
|
||||
bool rowsReversed = infoHeader.height < 0;
|
||||
_width = infoHeader.width;
|
||||
_height = abs(infoHeader.height);
|
||||
_shape.push_back(_height);
|
||||
_shape.push_back(_width);
|
||||
|
||||
if (infoHeader.bits != 24) {
|
||||
cerr << "[BMP] 24bpp only supported. But input has:" << infoHeader.bits << "\n";
|
||||
|
@ -6,9 +6,11 @@
|
||||
|
||||
// clang-format off
|
||||
#include "bmp.h"
|
||||
#include "npy.h"
|
||||
#include "MnistUbyte.h"
|
||||
#include "yuv_nv12.h"
|
||||
#include "opencv_wrapper.h"
|
||||
|
||||
#include "format_reader.h"
|
||||
// clang-format on
|
||||
|
||||
@ -18,6 +20,7 @@ std::vector<Registry::CreatorFunction> Registry::_data;
|
||||
|
||||
Register<MnistUbyte> MnistUbyte::reg;
|
||||
Register<YUV_NV12> YUV_NV12::reg;
|
||||
Register<NumpyArray> NumpyArray::reg;
|
||||
#ifdef USE_OPENCV
|
||||
Register<OCVReader> OCVReader::reg;
|
||||
#else
|
||||
|
@ -42,6 +42,8 @@ protected:
|
||||
size_t _width = 0;
|
||||
/// \brief data
|
||||
std::shared_ptr<unsigned char> _data;
|
||||
/// \brief shape - data shape
|
||||
std::vector<size_t> _shape;
|
||||
|
||||
public:
|
||||
virtual ~Reader() = default;
|
||||
@ -62,6 +64,14 @@ public:
|
||||
return _height;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get full shape vector
|
||||
* @return vector of size_t values determining data shape
|
||||
*/
|
||||
std::vector<size_t> shape() const {
|
||||
return _shape;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get input data ptr
|
||||
* @return shared pointer with input data
|
||||
|
117
samples/cpp/common/format_reader/npy.cpp
Normal file
117
samples/cpp/common/format_reader/npy.cpp
Normal file
@ -0,0 +1,117 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
// clang-format off
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
|
||||
#include "npy.h"
|
||||
// clang-format on
|
||||
|
||||
using namespace FormatReader;
|
||||
|
||||
NumpyArray::NumpyArray(const std::string& filename) {
|
||||
auto pos = filename.rfind('.');
|
||||
if (pos == std::string::npos)
|
||||
return;
|
||||
if (filename.substr(pos + 1) != "npy")
|
||||
return;
|
||||
|
||||
std::ifstream file(filename, std::ios::binary);
|
||||
if (!file.is_open()) {
|
||||
return;
|
||||
}
|
||||
|
||||
file.seekg(0, std::ios_base::end);
|
||||
if (!file.good()) {
|
||||
return;
|
||||
}
|
||||
auto full_file_size = static_cast<std::size_t>(file.tellg());
|
||||
file.seekg(0, std::ios_base::beg);
|
||||
|
||||
std::string magic_string(6, ' ');
|
||||
file.read(&magic_string[0], magic_string.size());
|
||||
if (magic_string != "\x93NUMPY") {
|
||||
return;
|
||||
}
|
||||
|
||||
file.ignore(2);
|
||||
unsigned short header_size;
|
||||
file.read((char*)&header_size, sizeof(header_size));
|
||||
|
||||
std::string header(header_size, ' ');
|
||||
file.read(&header[0], header.size());
|
||||
|
||||
int idx, from, to;
|
||||
|
||||
// Verify fortran order is false
|
||||
const std::string fortran_key = "'fortran_order':";
|
||||
idx = header.find(fortran_key);
|
||||
if (idx == -1) {
|
||||
return;
|
||||
}
|
||||
|
||||
from = header.find_last_of(' ', idx + fortran_key.size()) + 1;
|
||||
to = header.find(',', from);
|
||||
auto fortran_value = header.substr(from, to - from);
|
||||
if (fortran_value != "False") {
|
||||
return;
|
||||
}
|
||||
|
||||
// Verify array shape matches the input's
|
||||
const std::string shape_key = "'shape':";
|
||||
idx = header.find(shape_key);
|
||||
if (idx == -1) {
|
||||
return;
|
||||
}
|
||||
|
||||
from = header.find('(', idx + shape_key.size()) + 1;
|
||||
to = header.find(')', from);
|
||||
|
||||
std::string shape_data = header.substr(from, to - from);
|
||||
|
||||
if (!shape_data.empty()) {
|
||||
shape_data.erase(std::remove(shape_data.begin(), shape_data.end(), ','), shape_data.end());
|
||||
|
||||
std::istringstream shape_data_stream(shape_data);
|
||||
size_t value;
|
||||
while (shape_data_stream >> value) {
|
||||
_shape.push_back(value);
|
||||
}
|
||||
}
|
||||
|
||||
// Batch / Height / Width / Other dims
|
||||
// If batch is present, height and width are at least 1
|
||||
if (_shape.size()) {
|
||||
_height = _shape.size() >= 2 ? _shape.at(1) : 1;
|
||||
_width = _shape.size() >= 3 ? _shape.at(2) : 1;
|
||||
} else {
|
||||
_height = 0;
|
||||
_width = 0;
|
||||
}
|
||||
|
||||
// Verify array data type matches input's
|
||||
std::string dataTypeKey = "'descr':";
|
||||
idx = header.find(dataTypeKey);
|
||||
if (idx == -1) {
|
||||
return;
|
||||
}
|
||||
|
||||
from = header.find('\'', idx + dataTypeKey.size()) + 1;
|
||||
to = header.find('\'', from);
|
||||
type = header.substr(from, to - from);
|
||||
|
||||
_size = full_file_size - static_cast<std::size_t>(file.tellg());
|
||||
|
||||
_data.reset(new unsigned char[_size], std::default_delete<unsigned char[]>());
|
||||
for (size_t i = 0; i < _size; i++) {
|
||||
unsigned char buffer = 0;
|
||||
file.read(reinterpret_cast<char*>(&buffer), sizeof(buffer));
|
||||
_data.get()[i] = buffer;
|
||||
}
|
||||
}
|
52
samples/cpp/common/format_reader/npy.h
Normal file
52
samples/cpp/common/format_reader/npy.h
Normal file
@ -0,0 +1,52 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* \brief NumpyArray reader
|
||||
* \file npy.h
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// clang-format off
|
||||
#include "format_reader.h"
|
||||
#include "register.h"
|
||||
// clang-format on
|
||||
|
||||
namespace FormatReader {
|
||||
/**
|
||||
* \class NumpyArray
|
||||
* \brief Reader for NPY files
|
||||
*/
|
||||
class NumpyArray : public Reader {
|
||||
private:
|
||||
static Register<NumpyArray> reg;
|
||||
std::string type;
|
||||
size_t _size = 0;
|
||||
|
||||
public:
|
||||
/**
|
||||
* \brief Constructor of NumpyArray reader
|
||||
* @param filename - path to input data
|
||||
* @return NumpyArray reader object
|
||||
*/
|
||||
explicit NumpyArray(const std::string& filename);
|
||||
virtual ~NumpyArray() {}
|
||||
|
||||
/**
|
||||
* \brief Get size
|
||||
* @return size
|
||||
*/
|
||||
size_t size() const override {
|
||||
return _size;
|
||||
}
|
||||
|
||||
std::shared_ptr<unsigned char> getData(size_t width = 0, size_t height = 0) override {
|
||||
return _data;
|
||||
}
|
||||
};
|
||||
} // namespace FormatReader
|
@ -27,6 +27,8 @@ OCVReader::OCVReader(const string& filename) {
|
||||
_size = img.size().width * img.size().height * img.channels();
|
||||
_width = img.size().width;
|
||||
_height = img.size().height;
|
||||
_shape.push_back(_height);
|
||||
_shape.push_back(_width);
|
||||
}
|
||||
|
||||
std::shared_ptr<unsigned char> OCVReader::getData(size_t width = 0, size_t height = 0) {
|
||||
|
@ -106,9 +106,11 @@ class TorchScriptPythonDecoder (Decoder):
|
||||
else:
|
||||
self.graph_element = graph_element
|
||||
self.pt_module = pt_module
|
||||
self.raw_inputs = list(self.graph_element.inputs())
|
||||
self.raw_outputs = list(self.graph_element.outputs())
|
||||
|
||||
def inputs(self) -> list:
|
||||
return [x.unique() for x in self.graph_element.inputs()]
|
||||
return [x.unique() for x in self.raw_inputs]
|
||||
|
||||
def get_input(self, index: int):
|
||||
return self.inputs()[index]
|
||||
@ -207,22 +209,16 @@ class TorchScriptPythonDecoder (Decoder):
|
||||
return self.graph_element.schema()
|
||||
|
||||
def outputs(self) -> list:
|
||||
return [x.unique() for x in self.graph_element.outputs()]
|
||||
|
||||
def _raw_outputs(self) -> list:
|
||||
return list(self.graph_element.outputs())
|
||||
return [x.unique() for x in self.raw_outputs]
|
||||
|
||||
def _raw_output(self, index: int):
|
||||
return self._raw_outputs()[index]
|
||||
|
||||
def _raw_inputs(self) -> list:
|
||||
return list(self.graph_element.inputs())
|
||||
return self.raw_outputs[index]
|
||||
|
||||
def _raw_input(self, index: int):
|
||||
return self._raw_inputs()[index]
|
||||
return self.raw_inputs[index]
|
||||
|
||||
def num_of_outputs(self):
|
||||
return len(self.outputs())
|
||||
return len(self.raw_outputs)
|
||||
|
||||
def output(self, index: int):
|
||||
return self.outputs()[index]
|
||||
|
@ -18,10 +18,6 @@ class PyDecoder : public ov::frontend::pytorch::TorchDecoder {
|
||||
PYBIND11_OVERRIDE_PURE(ov::Any, TorchDecoder, const_input, index);
|
||||
}
|
||||
|
||||
size_t input(size_t index) const override {
|
||||
PYBIND11_OVERRIDE_PURE(size_t, TorchDecoder, get_input, index);
|
||||
}
|
||||
|
||||
const std::vector<size_t>& inputs() const override {
|
||||
PYBIND11_OVERRIDE_PURE(const std::vector<size_t>&, TorchDecoder, inputs);
|
||||
}
|
||||
|
@ -132,15 +132,6 @@ public:
|
||||
const ngraph::Output<ngraph::Node>& parent,
|
||||
const ngraph::Output<ngraph::Node>& subtract_constant);
|
||||
|
||||
static FakeQuantizeDequantization createDequantizationFromFakeQuantize(
|
||||
std::shared_ptr<opset1::FakeQuantize> fq,
|
||||
element::Type precision,
|
||||
float min,
|
||||
float max,
|
||||
const bool hasZeroPoint,
|
||||
const bool updatePrecision,
|
||||
const element::Type deqPrecision = element::f32);
|
||||
|
||||
static bool areQuantizeAndDequantizeSupportedForSubtract(const std::shared_ptr<const ngraph::Node>& node,
|
||||
const std::vector<ngraph::element::Type>& defaultPrecisions = precision_set::int8_support);
|
||||
|
||||
@ -259,7 +250,7 @@ public:
|
||||
|
||||
static ov::Output<ov::Node> getSingleConsumerConstant(const ov::Output<ov::Node>& output);
|
||||
|
||||
static bool checkConstantOnInf(const std::shared_ptr<Node> constant_node);
|
||||
static bool checkConstantNotInf(const std::shared_ptr<Node> constant_node);
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Node> foldFakeQuantize(
|
||||
|
@ -189,8 +189,8 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter
|
||||
auto newMultiplyFullPathValues = fold<opset1::Divide>(multiplyFullPathValues, multiplyEmptyPathValues);
|
||||
|
||||
// Transformation can't be applied if new full path values brake accuracy because of Inf values
|
||||
if (!NetworkHelper::checkConstantOnInf(newSubtractFullPathValues) ||
|
||||
!NetworkHelper::checkConstantOnInf(newMultiplyFullPathValues)) {
|
||||
if (!NetworkHelper::checkConstantNotInf(newSubtractFullPathValues) ||
|
||||
!NetworkHelper::checkConstantNotInf(newMultiplyFullPathValues)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -179,8 +179,8 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
|
||||
|
||||
inputLowConst_f32 = fold<opset1::Divide>(inputLowConst_f32, value);
|
||||
inputHighConst_f32 = fold<opset1::Divide>(inputHighConst_f32, value);
|
||||
if (!NetworkHelper::checkConstantOnInf(inputLowConst_f32) ||
|
||||
!NetworkHelper::checkConstantOnInf(inputHighConst_f32)) {
|
||||
if (!NetworkHelper::checkConstantNotInf(inputLowConst_f32) ||
|
||||
!NetworkHelper::checkConstantNotInf(inputHighConst_f32)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -1200,72 +1200,6 @@ std::shared_ptr<ov::Node> NetworkHelper::makeDequantizationSubtract(
|
||||
: std::make_shared<opset1::Subtract>(parent, subtract_constant);
|
||||
}
|
||||
|
||||
FakeQuantizeDequantization NetworkHelper::createDequantizationFromFakeQuantize(
|
||||
std::shared_ptr<opset1::FakeQuantize> fq,
|
||||
element::Type precision,
|
||||
float min,
|
||||
float max,
|
||||
const bool hasZeroPoint,
|
||||
const bool updatePrecision,
|
||||
const element::Type deqPrecision) {
|
||||
const ngraph::element::Type_t fqPrecision = fq->get_output_element_type(0);
|
||||
auto newMin = std::make_shared<opset1::Constant>(fqPrecision, Shape{}, min);
|
||||
auto newMax = std::make_shared<opset1::Constant>(fqPrecision, Shape{}, max);
|
||||
|
||||
auto outputLow = fq->input_value(3);
|
||||
auto outputHigh = fq->input_value(4);
|
||||
|
||||
// TODO: threshold values have to used here to avoid shifts
|
||||
|
||||
const std::shared_ptr<opset1::Constant> scale = ov::as_type_ptr<opset1::Constant>(foldConvert(fold<opset1::Divide>(
|
||||
fold<opset1::Subtract>(outputHigh, outputLow),
|
||||
fold<opset1::Subtract>(newMax->output(0), newMin->output(0))), deqPrecision));
|
||||
assert(scale != nullptr);
|
||||
|
||||
std::shared_ptr<opset1::Constant> shift = hasZeroPoint ?
|
||||
ov::as_type_ptr<opset1::Constant>(foldConvert(fold<opset1::Divide>(
|
||||
fold<opset1::Subtract>(fold<opset1::Multiply>(newMin->output(0), outputHigh), fold<opset1::Multiply>(newMax->output(0), outputLow)),
|
||||
fold<opset1::Subtract>(outputHigh, outputLow)), deqPrecision)) :
|
||||
nullptr;
|
||||
assert((!hasZeroPoint) || (hasZeroPoint && shift != nullptr));
|
||||
|
||||
if (shift != nullptr) {
|
||||
std::shared_ptr<opset1::Constant> shiftConst = ov::as_type_ptr<opset1::Constant>(shift);
|
||||
if (isScalarLike(shiftConst)) {
|
||||
auto scalar = toScalar(shiftConst);
|
||||
if (ov::op::util::constantIsEqualTo(scalar, 0)) {
|
||||
shift = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const auto input = std::make_shared<ngraph::opset1::Parameter>(
|
||||
updatePrecision ? precision : fq->get_output_element_type(0),
|
||||
fq->get_output_partial_shape(0));
|
||||
std::shared_ptr<ngraph::Node> parent = input;
|
||||
|
||||
std::shared_ptr<ngraph::opset1::Convert> convert;
|
||||
if (updatePrecision || (parent->output(0).get_element_type() != deqPrecision)) {
|
||||
convert = std::make_shared<opset1::Convert>(parent, deqPrecision);
|
||||
parent = convert;
|
||||
} else {
|
||||
convert = nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::opset1::Subtract> subtract;
|
||||
if (shift != nullptr) {
|
||||
subtract = std::make_shared<ov::op::TypeRelaxed<opset1::Subtract>>(parent, shift);
|
||||
subtract->set_output_type(0, deqPrecision, subtract->get_output_partial_shape(0));
|
||||
parent = subtract;
|
||||
} else {
|
||||
subtract = nullptr;
|
||||
}
|
||||
const std::shared_ptr<ngraph::opset1::Multiply> multiply = std::make_shared<opset1::Multiply>(parent, scale);
|
||||
multiply->set_output_type(0, fq->get_output_element_type(0), multiply->get_output_partial_shape(0));
|
||||
|
||||
return FakeQuantizeDequantization(fq, convert, subtract, nullptr, shift, multiply, scale);
|
||||
}
|
||||
|
||||
bool NetworkHelper::areQuantizeAndDequantizeSupportedForSubtract(const std::shared_ptr<const ngraph::Node>& node,
|
||||
const std::vector<ngraph::element::Type>& defaultPrecisions) {
|
||||
if (!ov::is_type<opset1::Subtract>(node)) {
|
||||
@ -2011,7 +1945,7 @@ ov::Output<ov::Node> NetworkHelper::getSingleConsumerConstant(const ov::Output<o
|
||||
: node->clone_with_new_inputs(node->input_values())->output(0);
|
||||
}
|
||||
|
||||
bool NetworkHelper::checkConstantOnInf(const std::shared_ptr<Node> constant_node) {
|
||||
bool NetworkHelper::checkConstantNotInf(const std::shared_ptr<Node> constant_node) {
|
||||
const auto constant = ov::as_type_ptr<opset1::Constant>(constant_node);
|
||||
if (constant == nullptr)
|
||||
return false;
|
||||
|
@ -346,3 +346,126 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNotConstantWeights) {
|
||||
// Input graph:
|
||||
//
|
||||
// Parameter
|
||||
// |F32
|
||||
// |
|
||||
// FakeQuantize Constant
|
||||
// |F32 |I8
|
||||
// | |
|
||||
// Convert Constant Clamp Constant
|
||||
// |U8 |U8 |I8 |I8
|
||||
// | | | |
|
||||
// Convert Convert(DCF) Convert(DCF) Convert(DCF)
|
||||
// \FP32 /FP32 |FP32 /F32
|
||||
// \ / | /
|
||||
// Subtract Constant Subtract Constant
|
||||
// \FP32 /FP32 |FP32 /FP32
|
||||
// \ / | /
|
||||
// Multiply Multiply
|
||||
// \FP32 /FP32
|
||||
// \ /
|
||||
// Convolution
|
||||
//
|
||||
// After MarkDequantizationSubgraph all Subtract and Multiply nodes from above graph
|
||||
// are marked with 'DequantizationNode' attribute.
|
||||
// Also all 'Convert(DCF)' nodes from above graph are marked with 'DisableConstantFolding' attribute
|
||||
|
||||
{
|
||||
auto parameter = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 16, 14, 14});
|
||||
std::shared_ptr<Node> activations =
|
||||
std::make_shared<opset10::FakeQuantize>(parameter,
|
||||
opset10::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset10::Constant::create(element::f32, Shape{}, {20}),
|
||||
opset10::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset10::Constant::create(element::f32, Shape{}, {254}),
|
||||
255);
|
||||
{
|
||||
auto first_convert = std::make_shared<opset10::Convert>(activations, element::u8);
|
||||
auto second_convert = std::make_shared<opset10::Convert>(first_convert, element::f32);
|
||||
auto zero_point = opset10::Constant::create(element::u8, Shape{}, {127});
|
||||
auto convert_on_zero_point = std::make_shared<opset10::Convert>(zero_point, element::f32);
|
||||
auto subtract = std::make_shared<opset10::Subtract>(second_convert, convert_on_zero_point);
|
||||
auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset10::Multiply>(subtract, scale);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-3});
|
||||
{
|
||||
auto clamp = std::make_shared<opset10::Clamp>(weights, -2, 2);
|
||||
auto convert = std::make_shared<opset10::Convert>(clamp, element::f32);
|
||||
auto zero_point = opset10::Constant::create(element::i8, Shape{}, {127});
|
||||
auto convert_on_zero_point = std::make_shared<opset10::Convert>(zero_point, element::f32);
|
||||
auto subtract = std::make_shared<opset10::Subtract>(convert, convert_on_zero_point);
|
||||
auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset10::Multiply>(subtract, scale);
|
||||
weights = multiply;
|
||||
}
|
||||
|
||||
auto conv = std::make_shared<opset10::Convolution>(activations,
|
||||
weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
function = std::make_shared<Model>(conv, ParameterVector{parameter});
|
||||
}
|
||||
|
||||
manager.register_pass<pass::MarkDequantizationSubgraph>(element::TypeVector{element::u8, element::i8});
|
||||
manager.register_pass<pass::ConstantFolding>();
|
||||
|
||||
{
|
||||
auto parameter = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 16, 14, 14});
|
||||
std::shared_ptr<Node> activations =
|
||||
std::make_shared<opset10::FakeQuantize>(parameter,
|
||||
opset10::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset10::Constant::create(element::f32, Shape{}, {20}),
|
||||
opset10::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset10::Constant::create(element::f32, Shape{}, {254}),
|
||||
255);
|
||||
{
|
||||
auto first_convert = std::make_shared<opset10::Convert>(activations, element::u8);
|
||||
auto second_convert = std::make_shared<opset10::Convert>(first_convert, element::f32);
|
||||
auto zero_point = opset10::Constant::create(element::u8, Shape{}, {127});
|
||||
auto convert_on_zero_point = std::make_shared<opset10::Convert>(zero_point, element::f32);
|
||||
pass::disable_constant_folding(convert_on_zero_point);
|
||||
auto subtract = std::make_shared<opset10::Subtract>(second_convert, convert_on_zero_point);
|
||||
mark_as_dequantization_node(subtract);
|
||||
auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset10::Multiply>(subtract, scale);
|
||||
mark_as_dequantization_node(multiply);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2});
|
||||
{
|
||||
// Clamp was constantfolded
|
||||
auto convert = std::make_shared<opset10::Convert>(weights, element::f32);
|
||||
pass::disable_constant_folding(convert);
|
||||
auto zero_point = opset10::Constant::create(element::i8, Shape{}, {127});
|
||||
auto convert_on_zero_point = std::make_shared<opset10::Convert>(zero_point, element::f32);
|
||||
pass::disable_constant_folding(convert_on_zero_point);
|
||||
auto subtract = std::make_shared<opset10::Subtract>(convert, convert_on_zero_point);
|
||||
mark_as_dequantization_node(subtract);
|
||||
auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset10::Multiply>(subtract, scale);
|
||||
mark_as_dequantization_node(multiply);
|
||||
weights = multiply;
|
||||
}
|
||||
|
||||
auto conv = std::make_shared<opset10::Convolution>(activations,
|
||||
weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
function_ref = std::make_shared<Model>(conv, ParameterVector{parameter});
|
||||
}
|
||||
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS);
|
||||
}
|
||||
|
@ -10,9 +10,26 @@
|
||||
#include <transformations/rt_info/dequantization_node.hpp>
|
||||
#include <transformations/rt_info/disable_constant_folding.hpp>
|
||||
|
||||
#include "bound_evaluation_util.hpp"
|
||||
static bool is_constfoldable(const ov::Output<ov::Node>& output) {
|
||||
auto status = true;
|
||||
std::deque<ov::Node*> nodes_to_calculate = {output.get_node()};
|
||||
|
||||
using namespace ngraph;
|
||||
while (status && !nodes_to_calculate.empty()) {
|
||||
auto current_node = nodes_to_calculate.front();
|
||||
nodes_to_calculate.pop_front();
|
||||
|
||||
if (current_node->get_input_size() == 0 && !ov::is_type<ov::op::v0::Constant>(current_node)) {
|
||||
status = false;
|
||||
} else {
|
||||
// not a leaf, not a shape_of -- continue to search
|
||||
for (const auto& input_value : current_node->input_values()) {
|
||||
const auto& input_node = input_value.get_node();
|
||||
nodes_to_calculate.push_front(input_node);
|
||||
}
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
ov::pass::MarkDequantizationSubgraph::MarkDequantizationSubgraph(const element::TypeVector& precisions) {
|
||||
// Dequantization subgraph may have two forms: with and without Subtract
|
||||
@ -51,14 +68,13 @@ ov::pass::MarkDequantizationSubgraph::MarkDequantizationSubgraph(const element::
|
||||
}
|
||||
}
|
||||
|
||||
// validation by Convert operation input precisions
|
||||
const auto& input_precision = input->get_output_element_type(0);
|
||||
// validation by Convert operation input precisions
|
||||
if (std::find(precisions.begin(), precisions.end(), input_precision) == precisions.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<Node*> tmp;
|
||||
if (ov::could_propagate(input, tmp)) {
|
||||
if (is_constfoldable(input)) {
|
||||
// disable ConstantFolding if dequantization subgraph is on constant data
|
||||
ov::disable_constant_folding(convert);
|
||||
}
|
||||
|
@ -4,94 +4,82 @@
|
||||
|
||||
#include "transformations/op_conversions/convert_batch_to_space.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <climits>
|
||||
#include <memory>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <openvino/opsets/opset3.hpp>
|
||||
#include <openvino/opsets/opset10.hpp>
|
||||
#include <vector>
|
||||
|
||||
#include "itt.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ov::opset10;
|
||||
using namespace ov::element;
|
||||
|
||||
void ov::pass::ConvertBatchToSpace::convert_batch_to_space() {
|
||||
MATCHER_SCOPE(ConvertBatchToSpace_convert_batch_to_space);
|
||||
auto batch_to_space = ngraph::pattern::wrap_type<ov::opset3::BatchToSpace>();
|
||||
matcher_pass_callback callback = [](pattern::Matcher& m) {
|
||||
auto batch_to_space = std::dynamic_pointer_cast<ov::opset3::BatchToSpace>(m.get_match_root());
|
||||
if (!batch_to_space) {
|
||||
const auto batch_to_space = pattern::wrap_type<BatchToSpace>();
|
||||
matcher_pass_callback callback = [this](pattern::Matcher& m) {
|
||||
const auto batch_to_space = dynamic_pointer_cast<BatchToSpace>(m.get_match_root());
|
||||
if (!batch_to_space || transformation_callback(batch_to_space)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
NodeVector new_ops;
|
||||
auto data = batch_to_space->input_value(0);
|
||||
auto block = batch_to_space->input_value(1);
|
||||
auto crops_begin = batch_to_space->input_value(2);
|
||||
auto crops_end = batch_to_space->input_value(3);
|
||||
NodeRegistry rg;
|
||||
const auto data = batch_to_space->input_value(0);
|
||||
const auto block = batch_to_space->input_value(1);
|
||||
const auto crops_begin = batch_to_space->input_value(2);
|
||||
const auto crops_end = batch_to_space->input_value(3);
|
||||
|
||||
if (data.get_partial_shape().is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
const auto& data_shape = data.get_shape();
|
||||
|
||||
const auto block_const = std::dynamic_pointer_cast<opset3::Constant>(block.get_node_shared_ptr());
|
||||
const auto crops_begin_const = std::dynamic_pointer_cast<opset3::Constant>(crops_begin.get_node_shared_ptr());
|
||||
const auto crops_end_const = std::dynamic_pointer_cast<opset3::Constant>(crops_end.get_node_shared_ptr());
|
||||
|
||||
if (!block_const || !crops_begin_const || !crops_end_const) {
|
||||
return false;
|
||||
const auto data_shape_rank = data.get_partial_shape().rank();
|
||||
if (data_shape_rank.is_dynamic()) {
|
||||
return false; // because StridedSlice masks are std::vector
|
||||
}
|
||||
|
||||
const std::vector<int64_t>& block_values = block_const->cast_vector<int64_t>();
|
||||
const std::vector<int64_t>& crops_end_values = crops_end_const->cast_vector<int64_t>();
|
||||
if (block.get_partial_shape().is_dynamic() || block.get_shape().size() == 0) {
|
||||
return false;
|
||||
}
|
||||
const auto block_length = static_cast<int64_t>(block.get_shape()[0]);
|
||||
|
||||
// First we have to disperse the data from batch, then rearrange them
|
||||
// so as appropriate chunks of data where close to their destination place.
|
||||
// Finally squeeze data from respective dimensions.ss
|
||||
std::vector<int64_t> dispersed_shape;
|
||||
int64_t b_dim_divider = 1;
|
||||
for (const auto& el : block_values) {
|
||||
b_dim_divider *= el;
|
||||
}
|
||||
// Finally squeeze data from respective dimensions
|
||||
|
||||
const auto zero = rg.make<Constant>(i64, Shape{1}, 0);
|
||||
const auto shape_of_data = rg.make<ShapeOf>(data, block.get_element_type());
|
||||
const auto batch = rg.make<Gather>(shape_of_data, zero, zero);
|
||||
const auto block_prod = rg.make<ReduceProd>(block, zero);
|
||||
const auto batch_div = rg.make<Divide>(batch, block_prod);
|
||||
|
||||
// note: B_0 is expected to be 1.
|
||||
// x' = reshape(`data`, [B_1, ..., B_{N - 1}, batch / (B_1 * ... B_{N - 1}), D_1, D_2, ...,
|
||||
// D_{N - 1}]),
|
||||
// where B_i = block_shape[i]
|
||||
dispersed_shape.insert(dispersed_shape.begin(), block_values.begin() + 1, block_values.end());
|
||||
dispersed_shape.push_back(data_shape.at(0) / b_dim_divider);
|
||||
for (size_t i = 1; i < data_shape.size(); ++i) {
|
||||
dispersed_shape.push_back(data_shape.at(i));
|
||||
}
|
||||
|
||||
const auto out_pattern_1 =
|
||||
opset3::Constant::create(element::i64, Shape{dispersed_shape.size()}, dispersed_shape);
|
||||
const auto one = rg.make<Constant>(i64, Shape{1}, 1);
|
||||
const auto end = rg.make<Constant>(i64, Shape{1}, block_length);
|
||||
const auto block_tail = rg.make<Slice>(block, one, end, one);
|
||||
const auto data_shape_tail = rg.make<Slice>(shape_of_data, one, end, one);
|
||||
const auto dispersed_shape = rg.make<Concat>(OutputVector{block_tail, batch_div, data_shape_tail}, 0);
|
||||
const bool special_zero = false;
|
||||
std::shared_ptr<Node> flat_node = std::make_shared<ov::opset3::Reshape>(data, out_pattern_1, special_zero);
|
||||
new_ops.push_back(flat_node);
|
||||
shared_ptr<Node> flat_node = rg.make<Reshape>(data, dispersed_shape, special_zero);
|
||||
|
||||
// calculate axes to transpose
|
||||
// x'' = transpose(x', [N, N + 1, 0, N + 2, 1, ..., N + N - 1, N - 1])
|
||||
std::vector<size_t> axes_order{block_values.size() - 1};
|
||||
for (size_t i = 0; i < block_values.size() - 1; ++i) {
|
||||
axes_order.push_back(i + block_values.size());
|
||||
vector<int64_t> axes_order{block_length - 1};
|
||||
for (int64_t i = 0; i < block_length - 1; ++i) {
|
||||
axes_order.push_back(i + block_length);
|
||||
axes_order.push_back(i);
|
||||
}
|
||||
const auto axes_order_const = rg.make<Constant>(i64, Shape{axes_order.size()}, axes_order);
|
||||
flat_node = rg.make<Transpose>(flat_node, axes_order_const);
|
||||
|
||||
const auto axes_order_const =
|
||||
opset3::Constant::create(element::i64,
|
||||
Shape{axes_order.size()},
|
||||
std::vector<int64_t>(axes_order.begin(), axes_order.end()));
|
||||
flat_node = std::make_shared<ov::opset3::Transpose>(flat_node, axes_order_const);
|
||||
new_ops.push_back(flat_node);
|
||||
// x''' = reshape(x'', [batch / (B_1 * ... * B_{N - 1}), D_1 * B_1, D_2 * B_2, ... , D_{N - 1}
|
||||
// * B_{N - 1}])
|
||||
std::vector<int64_t> squeezed_shape;
|
||||
squeezed_shape.push_back(data_shape.at(0) / b_dim_divider);
|
||||
for (size_t i = 1; i < block_values.size(); ++i) {
|
||||
squeezed_shape.push_back(data_shape.at(i) * block_values.at(i));
|
||||
}
|
||||
|
||||
const auto out_pattern_2 = opset3::Constant::create(element::i64, Shape{squeezed_shape.size()}, squeezed_shape);
|
||||
flat_node = std::make_shared<opset3::Reshape>(flat_node, out_pattern_2, special_zero);
|
||||
new_ops.push_back(flat_node);
|
||||
const auto squeezed_shape_tail = rg.make<Multiply>(block_tail, data_shape_tail);
|
||||
const auto squeezed_shape = rg.make<Concat>(OutputVector{batch_div, squeezed_shape_tail}, 0);
|
||||
flat_node = rg.make<Reshape>(flat_node, squeezed_shape, special_zero);
|
||||
|
||||
// Crop the start and end of dimensions according to `crops_begin`, `crops_end` to produce
|
||||
// the output of shape:
|
||||
@ -99,129 +87,133 @@ void ov::pass::ConvertBatchToSpace::convert_batch_to_space() {
|
||||
// `y = [batch / (B_1 * ... * B_{N - 1}), crop(D_1 * B_1, crops_begin[1], crops_end[1]),
|
||||
// crop(D_2 * B_2, crops_begin[2], crops_end[2]), ... ,
|
||||
// crop(D_{N - 1} * B_{N - 1}, crops_begin[N - 1], crops_end[N - 1])]`
|
||||
std::vector<int64_t> upperbounds_values;
|
||||
auto flat_node_shape = flat_node->get_shape();
|
||||
for (size_t i = 0; i < flat_node_shape.size(); ++i) {
|
||||
upperbounds_values.push_back(flat_node_shape.at(i) - crops_end_values.at(i));
|
||||
}
|
||||
const auto shape_of_flat_node = rg.make<ShapeOf>(flat_node, crops_end.get_element_type());
|
||||
const auto upperbounds = rg.make<Subtract>(shape_of_flat_node, crops_end);
|
||||
|
||||
const auto upperbounds = opset3::Constant::create(crops_end.get_element_type(),
|
||||
Shape{upperbounds_values.size()},
|
||||
upperbounds_values);
|
||||
|
||||
std::vector<int64_t> begin_mask(data_shape.size(), 0);
|
||||
std::vector<int64_t> end_mask(data_shape.size(), 0);
|
||||
flat_node =
|
||||
std::make_shared<opset3::StridedSlice>(flat_node, crops_begin_const, upperbounds, begin_mask, end_mask);
|
||||
new_ops.push_back(flat_node);
|
||||
const auto begin_mask = vector<int64_t>(data_shape_rank.get_length(), 0);
|
||||
const auto& end_mask = begin_mask;
|
||||
flat_node = rg.make<StridedSlice>(flat_node, crops_begin, upperbounds, begin_mask, end_mask);
|
||||
|
||||
flat_node->set_friendly_name(batch_to_space->get_friendly_name());
|
||||
ngraph::copy_runtime_info(batch_to_space, new_ops);
|
||||
ngraph::replace_node(batch_to_space, flat_node);
|
||||
copy_runtime_info(batch_to_space, rg.get());
|
||||
replace_node(batch_to_space, flat_node);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(batch_to_space, matcher_name);
|
||||
const auto m = make_shared<pattern::Matcher>(batch_to_space, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
void ov::pass::ConvertBatchToSpace::convert_batch_to_space_by_elements() {
|
||||
MATCHER_SCOPE(ConvertBatchToSpace_convert_batch_to_space_by_elements);
|
||||
auto batch_to_space = ngraph::pattern::wrap_type<ov::opset3::BatchToSpace>();
|
||||
const auto batch_to_space = pattern::wrap_type<BatchToSpace>();
|
||||
matcher_pass_callback callback = [this](pattern::Matcher& m) {
|
||||
auto batch_to_space = std::dynamic_pointer_cast<ov::opset3::BatchToSpace>(m.get_match_root());
|
||||
if (!batch_to_space) {
|
||||
const auto batch_to_space = dynamic_pointer_cast<BatchToSpace>(m.get_match_root());
|
||||
if (!batch_to_space || transformation_callback(batch_to_space)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto data = batch_to_space->input_value(0);
|
||||
const auto data = batch_to_space->input_value(0);
|
||||
|
||||
if (data.get_partial_shape().is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
auto data_shape = data.get_shape();
|
||||
|
||||
if (transformation_callback(batch_to_space) && (data_shape.size() == 4 || data_shape.size() == 5)) {
|
||||
return false;
|
||||
}
|
||||
auto block = batch_to_space->input_value(1);
|
||||
auto crops_begin = batch_to_space->input_value(2);
|
||||
auto crops_end = batch_to_space->input_value(3);
|
||||
|
||||
const auto block_const = ov::as_type_ptr<opset3::Constant>(block.get_node_shared_ptr());
|
||||
const auto crops_begin_const = ov::as_type_ptr<opset3::Constant>(crops_begin.get_node_shared_ptr());
|
||||
const auto crops_end_const = ov::as_type_ptr<opset3::Constant>(crops_end.get_node_shared_ptr());
|
||||
|
||||
const std::vector<int64_t>& block_values = block_const->cast_vector<int64_t>();
|
||||
const std::vector<int64_t>& crops_end_values = crops_end_const->cast_vector<int64_t>();
|
||||
|
||||
std::vector<int64_t> dispersed_shape(1);
|
||||
dispersed_shape.insert(dispersed_shape.end(), data_shape.begin(), data_shape.end());
|
||||
std::vector<size_t> axes_order(block_values.size() + 1);
|
||||
std::vector<int64_t> squeezed_shape(data_shape.begin(), data_shape.end());
|
||||
if (squeezed_shape.size() > block_values.size()) {
|
||||
return false;
|
||||
const auto data_shape_rank = data.get_partial_shape().rank();
|
||||
if (data_shape_rank.is_dynamic()) {
|
||||
return false; // because StridedSlice masks are std::vector
|
||||
}
|
||||
|
||||
NodeVector new_ops;
|
||||
const auto block = batch_to_space->input_value(1);
|
||||
const auto crops_begin = batch_to_space->input_value(2);
|
||||
const auto crops_end = batch_to_space->input_value(3);
|
||||
|
||||
std::shared_ptr<Node> flat_node = data.get_node_shared_ptr();
|
||||
for (size_t block_idx = 1; block_idx < block_values.size(); ++block_idx) {
|
||||
dispersed_shape[0] = block_values[block_idx];
|
||||
dispersed_shape[1] /= block_values[block_idx];
|
||||
const auto out_pattern_1 =
|
||||
opset3::Constant::create(element::i64, Shape{dispersed_shape.size()}, dispersed_shape);
|
||||
const bool special_zero = false;
|
||||
flat_node = std::make_shared<ov::opset3::Reshape>(flat_node, out_pattern_1, special_zero);
|
||||
new_ops.push_back(flat_node);
|
||||
if (block.get_partial_shape().is_dynamic() || block.get_shape().size() == 0) {
|
||||
return false;
|
||||
}
|
||||
const auto block_length = static_cast<int64_t>(block.get_shape()[0]);
|
||||
|
||||
size_t val = 1;
|
||||
for (size_t axis_idx = 0; axis_idx <= block_values.size(); ++axis_idx) {
|
||||
if ((block_idx + 1) == axis_idx) {
|
||||
NodeRegistry rg;
|
||||
const auto zero = rg.make<Constant>(i64, Shape{1}, 0);
|
||||
const auto one = rg.make<Constant>(i64, Shape{1}, 1);
|
||||
const auto two = rg.make<Constant>(i64, Shape{1}, 2);
|
||||
const auto int_max = rg.make<Constant>(i64, Shape{1}, INT_MAX);
|
||||
|
||||
const auto shape_of_data = rg.make<ShapeOf>(data, block.get_element_type());
|
||||
const auto et_zero = rg.make<Constant>(block.get_element_type(), Shape{1}, 0);
|
||||
shared_ptr<Node> dispersed_shape = rg.make<Concat>(OutputVector{et_zero, shape_of_data}, 0);
|
||||
shared_ptr<Node> squeezed_shape = shape_of_data;
|
||||
|
||||
shared_ptr<Node> flat_node = data.get_node_shared_ptr();
|
||||
|
||||
const auto make_concat = [&](OutputVector nodes) {
|
||||
nodes.erase(remove_if(nodes.begin(),
|
||||
nodes.end(),
|
||||
[](const Output<Node>& n) {
|
||||
return n.get_partial_shape().is_static() && n.get_shape().size() > 0 &&
|
||||
n.get_shape()[0] == 0;
|
||||
}),
|
||||
nodes.end());
|
||||
return rg.make<Concat>(nodes, 0);
|
||||
};
|
||||
|
||||
shared_ptr<Node> div;
|
||||
for (int64_t b_idx = 1; b_idx < block_length; ++b_idx) {
|
||||
const auto block_index = rg.make<Constant>(i64, Shape{1}, b_idx);
|
||||
const auto block_index_next = rg.make<Constant>(i64, Shape{1}, b_idx + 1);
|
||||
const auto block_value = rg.make<Gather>(block, block_index, zero);
|
||||
|
||||
// dispersed_shape[0] = block[b_idx];
|
||||
// dispersed_shape[1] /= block[b_idx];
|
||||
if (!div) {
|
||||
const auto batch = rg.make<Gather>(shape_of_data, zero, zero);
|
||||
div = rg.make<Divide>(batch, block_value);
|
||||
} else {
|
||||
div = rg.make<Divide>(div, block_value);
|
||||
}
|
||||
auto ds_tail = rg.make<Slice>(dispersed_shape, two, int_max, one);
|
||||
dispersed_shape = make_concat({block_value, div, ds_tail});
|
||||
constexpr auto special_zero = false;
|
||||
flat_node = rg.make<Reshape>(flat_node, dispersed_shape, special_zero);
|
||||
|
||||
vector<int64_t> axes_order(block_length + 1);
|
||||
int64_t val = 1;
|
||||
for (int64_t axis_idx = 0; axis_idx <= block_length; ++axis_idx) {
|
||||
if ((b_idx + 1) == axis_idx) {
|
||||
axes_order[axis_idx] = 0;
|
||||
} else {
|
||||
axes_order[axis_idx] = val;
|
||||
val++;
|
||||
}
|
||||
}
|
||||
const auto axes_order_const = rg.make<Constant>(i64, Shape{axes_order.size()}, axes_order);
|
||||
flat_node = rg.make<Transpose>(flat_node, axes_order_const);
|
||||
|
||||
const auto axes_order_const =
|
||||
ov::opset3::Constant::create(element::i64,
|
||||
Shape{axes_order.size()},
|
||||
std::vector<int64_t>(axes_order.begin(), axes_order.end()));
|
||||
flat_node = std::make_shared<ov::opset3::Transpose>(flat_node, axes_order_const);
|
||||
new_ops.push_back(flat_node);
|
||||
// squeezed_shape[0] = dispersed_shape[1];
|
||||
// squeezed_shape[b_idx] *= block[b_idx];
|
||||
const auto sq_slice = rg.make<Slice>(squeezed_shape, one, block_index, one);
|
||||
const auto sq_bidx_dim = rg.make<Gather>(squeezed_shape, block_index, zero);
|
||||
const auto sq_mul = rg.make<Multiply>(sq_bidx_dim, block_value);
|
||||
const auto sq_shape_tail = rg.make<Slice>(squeezed_shape, block_index_next, int_max, one);
|
||||
squeezed_shape.reset();
|
||||
squeezed_shape = make_concat({div, sq_slice, sq_mul, sq_shape_tail});
|
||||
flat_node = rg.make<Reshape>(flat_node, squeezed_shape, special_zero);
|
||||
|
||||
squeezed_shape[0] = dispersed_shape[1];
|
||||
squeezed_shape[block_idx] *= block_values[block_idx];
|
||||
dispersed_shape[block_idx + 1] = squeezed_shape[block_idx];
|
||||
const auto out_pattern_2 =
|
||||
opset3::Constant::create(element::i64, Shape{squeezed_shape.size()}, squeezed_shape);
|
||||
flat_node = std::make_shared<ov::opset3::Reshape>(flat_node, out_pattern_2, special_zero);
|
||||
new_ops.push_back(flat_node);
|
||||
// dispersed_shape[b_idx + 1] = squeezed_shape[b_idx];
|
||||
const auto ds_front = rg.make<Slice>(dispersed_shape, zero, block_index_next, one);
|
||||
ds_tail = rg.make<Slice>(dispersed_shape, rg.make<Constant>(i64, Shape{1}, b_idx + 2), int_max, one);
|
||||
dispersed_shape = make_concat({ds_front, sq_mul, ds_tail});
|
||||
}
|
||||
|
||||
std::vector<int64_t> upperbounds_values;
|
||||
auto flat_node_shape = flat_node->get_shape();
|
||||
for (size_t i = 0; i < flat_node_shape.size(); ++i) {
|
||||
upperbounds_values.push_back(flat_node_shape.at(i) - crops_end_values.at(i));
|
||||
}
|
||||
const auto upperbounds = opset3::Constant::create(crops_end.get_element_type(),
|
||||
Shape{upperbounds_values.size()},
|
||||
upperbounds_values);
|
||||
const auto shape_of_flat_node = rg.make<ShapeOf>(flat_node, crops_end.get_element_type());
|
||||
const auto upperbounds = rg.make<Subtract>(shape_of_flat_node, crops_end);
|
||||
|
||||
std::vector<int64_t> begin_mask(data_shape.size(), 0);
|
||||
std::vector<int64_t> end_mask(data_shape.size(), 0);
|
||||
flat_node =
|
||||
std::make_shared<opset3::StridedSlice>(flat_node, crops_begin_const, upperbounds, begin_mask, end_mask);
|
||||
new_ops.push_back(flat_node);
|
||||
const auto begin_mask = vector<int64_t>(data_shape_rank.get_length(), 0);
|
||||
const auto& end_mask = begin_mask;
|
||||
flat_node = rg.make<StridedSlice>(flat_node, crops_begin, upperbounds, begin_mask, end_mask);
|
||||
|
||||
flat_node->set_friendly_name(batch_to_space->get_friendly_name());
|
||||
ngraph::copy_runtime_info(batch_to_space, new_ops);
|
||||
ngraph::replace_node(batch_to_space, flat_node);
|
||||
copy_runtime_info(batch_to_space, rg.get());
|
||||
replace_node(batch_to_space, flat_node);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(batch_to_space, matcher_name);
|
||||
const auto m = make_shared<pattern::Matcher>(batch_to_space, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
@ -4,42 +4,43 @@
|
||||
|
||||
#include "transformations/op_conversions/convert_space_to_batch.hpp"
|
||||
|
||||
#include <climits>
|
||||
#include <memory>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <openvino/opsets/opset3.hpp>
|
||||
#include <openvino/opsets/opset10.hpp>
|
||||
#include <vector>
|
||||
|
||||
#include "itt.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ov::opset10;
|
||||
using namespace ov::element;
|
||||
|
||||
void ov::pass::ConvertSpaceToBatch::convert_space_to_batch() {
|
||||
MATCHER_SCOPE(ConvertSpaceToBatch_convert_space_to_batch);
|
||||
auto space_to_batch = ngraph::pattern::wrap_type<ov::opset3::SpaceToBatch>();
|
||||
matcher_pass_callback callback = [](pattern::Matcher& m) {
|
||||
auto space_to_batch = std::dynamic_pointer_cast<ov::opset3::SpaceToBatch>(m.get_match_root());
|
||||
if (!space_to_batch) {
|
||||
const auto space_to_batch = pattern::wrap_type<SpaceToBatch>();
|
||||
matcher_pass_callback callback = [this](pattern::Matcher& m) {
|
||||
const auto space_to_batch = dynamic_pointer_cast<SpaceToBatch>(m.get_match_root());
|
||||
if (!space_to_batch || transformation_callback(space_to_batch)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
NodeVector new_ops;
|
||||
auto data = space_to_batch->input_value(0);
|
||||
auto block = space_to_batch->input_value(1);
|
||||
auto pads_begin = space_to_batch->input_value(2);
|
||||
auto pads_end = space_to_batch->input_value(3);
|
||||
|
||||
if (data.get_partial_shape().is_dynamic()) {
|
||||
const auto data = space_to_batch->input_value(0);
|
||||
if (data.get_partial_shape().rank().is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto block_const = std::dynamic_pointer_cast<opset3::Constant>(block.get_node_shared_ptr());
|
||||
const auto pads_begin_const = std::dynamic_pointer_cast<opset3::Constant>(pads_begin.get_node_shared_ptr());
|
||||
const auto pads_end_const = std::dynamic_pointer_cast<opset3::Constant>(pads_end.get_node_shared_ptr());
|
||||
const auto block = space_to_batch->input_value(1);
|
||||
const auto pads_begin = space_to_batch->input_value(2);
|
||||
const auto pads_end = space_to_batch->input_value(3);
|
||||
|
||||
if (!block_const || !pads_begin_const || !pads_end_const) {
|
||||
if (block.get_partial_shape().is_dynamic() || block.get_shape().size() == 0) {
|
||||
return false;
|
||||
}
|
||||
const auto block_length = static_cast<int64_t>(block.get_shape()[0]);
|
||||
|
||||
const std::vector<int64_t>& block_values = block_const->cast_vector<int64_t>();
|
||||
NodeRegistry rg;
|
||||
|
||||
// Zero-pad the start and end of dimensions [D_0, ..., D_{N - 1}] of the input according to
|
||||
// `pads_begin`
|
||||
@ -47,162 +48,159 @@ void ov::pass::ConvertSpaceToBatch::convert_space_to_batch() {
|
||||
// note: P_0 for batch dimension is expected to be 0 (no-padding).
|
||||
// x = [batch + P_0, D_1 + P_1, D_2 + P_2, ..., D_{N - 1} + P_{N - 1}], where P_i =
|
||||
// pads_begin[i] + pads_end[i]
|
||||
std::shared_ptr<Node> flat_node =
|
||||
std::make_shared<opset3::Pad>(data, pads_begin_const, pads_end_const, ngraph::op::PadMode::CONSTANT);
|
||||
auto out_shape = flat_node->get_shape();
|
||||
new_ops.push_back(flat_node);
|
||||
shared_ptr<Node> flat_node = rg.make<Pad>(data, pads_begin, pads_end, op::PadMode::CONSTANT);
|
||||
const auto out_shape = rg.make<ShapeOf>(flat_node, block.get_element_type());
|
||||
|
||||
const auto zero = rg.make<Constant>(i64, Shape{1}, 0);
|
||||
const auto one = rg.make<Constant>(i64, Shape{1}, 1);
|
||||
const auto int_max = rg.make<Constant>(i64, Shape{1}, INT_MAX);
|
||||
|
||||
// First we have to disperse the data from spatial dimensions, then
|
||||
// rearrange them so as appropriate chunks of data where close to their
|
||||
// destination place. Finally squeeze data from respective dimensions.
|
||||
Shape dispersed_shape{out_shape.at(0)};
|
||||
|
||||
// note: B_0 for batch is ignored.
|
||||
// x' = reshape(x, [batch, (D_1 + P_1) / B_1, B_1, (D_2 + P_2) / B_2, B_2, ...,
|
||||
// (D_{N - 1} + P_{N - 1}) / B_{N - 1}, B_{N - 1}]), where B_i = block_shape[i]
|
||||
for (size_t i = 1; i < block_values.size(); ++i) {
|
||||
dispersed_shape.push_back(out_shape.at(i) / block_values.at(i));
|
||||
dispersed_shape.push_back(block_values.at(i));
|
||||
}
|
||||
const auto batch = rg.make<Gather>(out_shape, zero, zero);
|
||||
const auto out_shape_tail = rg.make<Slice>(out_shape, one, int_max, one);
|
||||
const auto block_tail = rg.make<Slice>(block, one, int_max, one);
|
||||
const auto os_tail_div = rg.make<Divide>(out_shape_tail, block_tail);
|
||||
|
||||
const auto out_pattern = opset3::Constant::create(element::i64, Shape{dispersed_shape.size()}, dispersed_shape);
|
||||
flat_node = std::make_shared<ov::opset3::Reshape>(flat_node, out_pattern, false);
|
||||
new_ops.push_back(flat_node);
|
||||
// interleave os_tail_div with block_tail
|
||||
const auto c = rg.make<Concat>(NodeVector{os_tail_div, block_tail}, 0);
|
||||
const auto r =
|
||||
rg.make<Reshape>(c, rg.make<Constant>(i64, Shape{2}, vector<int64_t>{2, block_length - 1}), false);
|
||||
const auto t = rg.make<Transpose>(r, rg.make<Constant>(i64, Shape{2}, vector<int64_t>{1, 0}));
|
||||
const auto interleaved = rg.make<Reshape>(t, rg.make<Constant>(i64, Shape{1}, 2 * (block_length - 1)), false);
|
||||
|
||||
const auto dispersed_shape = rg.make<Concat>(NodeVector{batch, interleaved}, 0);
|
||||
flat_node = rg.make<Reshape>(flat_node, dispersed_shape, false);
|
||||
|
||||
// x'' = transpose(x', [2, 4, ..., (N - 1) + (N - 1), 0, 1, 3, ..., N + (N - 1)])
|
||||
std::vector<size_t> axes_order;
|
||||
for (size_t i = 0, j = 2; i < block_values.size() - 1; ++i, j += 2) {
|
||||
vector<int64_t> axes_order;
|
||||
for (int64_t i = 0, j = 2; i < block_length - 1; ++i, j += 2) {
|
||||
axes_order.push_back(j);
|
||||
}
|
||||
axes_order.push_back(0);
|
||||
for (size_t i = 0, j = 1; i < block_values.size() - 1; ++i, j += 2) {
|
||||
for (int64_t i = 0, j = 1; i < block_length - 1; ++i, j += 2) {
|
||||
axes_order.push_back(j);
|
||||
}
|
||||
|
||||
const auto axes_order_const =
|
||||
opset3::Constant::create(element::i64,
|
||||
Shape{axes_order.size()},
|
||||
std::vector<int64_t>(axes_order.begin(), axes_order.end()));
|
||||
flat_node = std::make_shared<ov::opset3::Transpose>(flat_node, axes_order_const);
|
||||
new_ops.push_back(flat_node);
|
||||
const auto axes_order_const = rg.make<Constant>(i64, Shape{axes_order.size()}, axes_order);
|
||||
flat_node = rg.make<Transpose>(flat_node, axes_order_const);
|
||||
|
||||
Shape squeezed_shape;
|
||||
int64_t prod = 1;
|
||||
for (const auto& el : block_values) {
|
||||
prod *= el;
|
||||
}
|
||||
|
||||
// y = reshape(x'', [batch * B_1 * ... * B_{N - 1}, (D_1 + P_1) / B_1, (D_2 + P_2) / B_2, ...
|
||||
// ,
|
||||
// y = reshape(x'', [batch * B_1 * ... * B_{N - 1}, (D_1 + P_1) / B_1, (D_2 + P_2) / B_2, ...,
|
||||
// (D_{N - 1} + P_{N - 1}) / B_{N - 1}])
|
||||
squeezed_shape.push_back(out_shape.at(0) * prod);
|
||||
for (size_t i = 1; i < block_values.size(); ++i) {
|
||||
squeezed_shape.push_back(out_shape.at(i) / block_values.at(i));
|
||||
}
|
||||
|
||||
const auto out_pattern_2 = opset3::Constant::create(element::i64, Shape{squeezed_shape.size()}, squeezed_shape);
|
||||
flat_node = std::make_shared<ov::opset3::Reshape>(flat_node, out_pattern_2, false);
|
||||
new_ops.push_back(flat_node);
|
||||
// note: B_0 is assumed to be 1 by op definion
|
||||
const auto block_prod = rg.make<ReduceProd>(block, zero);
|
||||
const auto squeezed_shape = rg.make<Concat>(NodeVector{rg.make<Multiply>(batch, block_prod), os_tail_div}, 0);
|
||||
flat_node = rg.make<Reshape>(flat_node, squeezed_shape, false);
|
||||
|
||||
flat_node->set_friendly_name(space_to_batch->get_friendly_name());
|
||||
ngraph::copy_runtime_info(space_to_batch, new_ops);
|
||||
ngraph::replace_node(space_to_batch, flat_node);
|
||||
copy_runtime_info(space_to_batch, rg.get());
|
||||
replace_node(space_to_batch, flat_node);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(space_to_batch, matcher_name);
|
||||
const auto m = make_shared<pattern::Matcher>(space_to_batch, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
void ov::pass::ConvertSpaceToBatch::convert_space_to_batch_by_elements() {
|
||||
MATCHER_SCOPE(ConvertSpaceToBatch_convert_space_to_batch_by_elements);
|
||||
auto space_to_batch = ngraph::pattern::wrap_type<ov::opset3::SpaceToBatch>();
|
||||
const auto space_to_batch = pattern::wrap_type<SpaceToBatch>();
|
||||
matcher_pass_callback callback = [this](pattern::Matcher& m) {
|
||||
auto space_to_batch = std::dynamic_pointer_cast<ov::opset3::SpaceToBatch>(m.get_match_root());
|
||||
if (!space_to_batch) {
|
||||
const auto space_to_batch = dynamic_pointer_cast<SpaceToBatch>(m.get_match_root());
|
||||
if (!space_to_batch || transformation_callback(space_to_batch)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto data = space_to_batch->input_value(0);
|
||||
|
||||
if (data.get_partial_shape().is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
const auto& data_shape = data.get_shape();
|
||||
|
||||
if (transformation_callback(space_to_batch) && (data_shape.size() == 4 || data_shape.size() == 5)) {
|
||||
const auto data = space_to_batch->input_value(0);
|
||||
if (data.get_partial_shape().rank().is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto block = space_to_batch->input_value(1);
|
||||
auto pads_begin = space_to_batch->input_value(2);
|
||||
auto pads_end = space_to_batch->input_value(3);
|
||||
const auto block = space_to_batch->input_value(1);
|
||||
const auto pads_begin = space_to_batch->input_value(2);
|
||||
const auto pads_end = space_to_batch->input_value(3);
|
||||
|
||||
const auto block_const = ov::as_type_ptr<opset3::Constant>(block.get_node_shared_ptr());
|
||||
const auto pads_begin_const = ov::as_type_ptr<opset3::Constant>(pads_begin.get_node_shared_ptr());
|
||||
const auto pads_end_const = ov::as_type_ptr<opset3::Constant>(pads_end.get_node_shared_ptr());
|
||||
|
||||
if (!block_const || !pads_begin_const || !pads_end_const) {
|
||||
if (block.get_partial_shape().is_dynamic() || block.get_shape().size() == 0) {
|
||||
return false;
|
||||
}
|
||||
const std::vector<int64_t>& block_values = block_const->cast_vector<int64_t>();
|
||||
const auto block_length = static_cast<int64_t>(block.get_shape()[0]);
|
||||
|
||||
NodeVector new_ops;
|
||||
NodeRegistry rg;
|
||||
|
||||
std::shared_ptr<Node> flat_node =
|
||||
std::make_shared<opset3::Pad>(data, pads_begin_const, pads_end_const, ngraph::op::PadMode::CONSTANT);
|
||||
new_ops.push_back(flat_node);
|
||||
auto out_shape = flat_node->get_shape();
|
||||
shared_ptr<Node> flat_node = rg.make<Pad>(data, pads_begin, pads_end, op::PadMode::CONSTANT);
|
||||
|
||||
std::vector<int64_t> dispersed_shape(block_values.size() + 1);
|
||||
std::vector<size_t> axes_order(block_values.size() + 1);
|
||||
std::vector<int64_t> squeezed_shape(out_shape.begin(), out_shape.end());
|
||||
for (int64_t block_idx = block_values.size() - 1; block_idx >= 0; --block_idx) {
|
||||
int64_t sq_shape_idx = block_values.size() - 1;
|
||||
shared_ptr<Node> squeezed_shape = rg.make<ShapeOf>(flat_node, block.get_element_type());
|
||||
|
||||
const auto zero = rg.make<Constant>(i64, Shape{1}, 0);
|
||||
const auto one = rg.make<Constant>(i64, Shape{1}, 1);
|
||||
const auto int_max = rg.make<Constant>(i64, Shape{1}, INT_MAX);
|
||||
|
||||
for (int64_t b_idx = block_length - 1; b_idx >= 0; --b_idx) {
|
||||
const auto block_index = rg.make<Constant>(i64, Shape{1}, b_idx);
|
||||
const auto block_index_next = rg.make<Constant>(i64, Shape{1}, b_idx + 1);
|
||||
const auto block_value = rg.make<Gather>(block, block_index, zero);
|
||||
|
||||
NodeVector dispersed_shape_prep;
|
||||
dispersed_shape_prep.reserve(block_length + 1);
|
||||
if (b_idx > 0) // avoid addind empty Slice into Concat
|
||||
dispersed_shape_prep.push_back(rg.make<Slice>(squeezed_shape, zero, block_index, one));
|
||||
const auto squeezed_element = rg.make<Gather>(squeezed_shape, block_index, zero);
|
||||
dispersed_shape_prep.push_back(rg.make<Divide>(squeezed_element, block_value));
|
||||
dispersed_shape_prep.push_back(block_value);
|
||||
if (b_idx + 1 < block_length) // avoid addind empty Slice into Concat
|
||||
dispersed_shape_prep.push_back(rg.make<Slice>(squeezed_shape, block_index_next, int_max, one));
|
||||
|
||||
const auto dispersed_shape = rg.make<Concat>(dispersed_shape_prep, 0);
|
||||
constexpr auto special_zero = false;
|
||||
flat_node = rg.make<Reshape>(flat_node, dispersed_shape, special_zero);
|
||||
|
||||
vector<int64_t> axes_order(block_length + 1);
|
||||
int64_t axis_idx = axes_order.size() - 1;
|
||||
for (int64_t shape_idx = dispersed_shape.size() - 1; shape_idx >= 0; --shape_idx) {
|
||||
if (shape_idx == (block_idx + 1)) {
|
||||
dispersed_shape[shape_idx] = block_values[block_idx];
|
||||
axes_order[0] = shape_idx;
|
||||
} else if (shape_idx == block_idx) {
|
||||
dispersed_shape[shape_idx] = squeezed_shape[sq_shape_idx] / block_values[block_idx];
|
||||
axes_order[axis_idx] = shape_idx;
|
||||
for (int64_t ds_idx = block_length; ds_idx >= 0; --ds_idx) {
|
||||
if (ds_idx == (b_idx + 1)) {
|
||||
axes_order[0] = ds_idx;
|
||||
} else if (ds_idx == b_idx) {
|
||||
axes_order[axis_idx] = ds_idx;
|
||||
axis_idx--;
|
||||
sq_shape_idx--;
|
||||
} else {
|
||||
dispersed_shape[shape_idx] = squeezed_shape[sq_shape_idx];
|
||||
axes_order[axis_idx] = shape_idx;
|
||||
axes_order[axis_idx] = ds_idx;
|
||||
axis_idx--;
|
||||
sq_shape_idx--;
|
||||
}
|
||||
}
|
||||
const auto axes_order_const = rg.make<Constant>(i64, Shape{axes_order.size()}, axes_order);
|
||||
flat_node = rg.make<Transpose>(flat_node, axes_order_const);
|
||||
|
||||
const auto out_pattern_1 =
|
||||
opset3::Constant::create(element::i64, Shape{dispersed_shape.size()}, dispersed_shape);
|
||||
const bool special_zero = false;
|
||||
flat_node = std::make_shared<ov::opset3::Reshape>(flat_node, out_pattern_1, special_zero);
|
||||
new_ops.push_back(flat_node);
|
||||
// don't change squeezed_shape at the last iteration, block[0] is assumed to be 1 by op definion
|
||||
if (b_idx > 0) {
|
||||
NodeVector squeezed_shape_prep;
|
||||
squeezed_shape_prep.reserve(block_length);
|
||||
squeezed_shape_prep.push_back(
|
||||
rg.make<Multiply>(rg.make<Gather>(squeezed_shape, zero, zero), block_value));
|
||||
if (b_idx > 1) { // avoid addind empty Slice into Concat
|
||||
squeezed_shape_prep.push_back(rg.make<Slice>(squeezed_shape, one, block_index, one));
|
||||
}
|
||||
squeezed_shape_prep.push_back(
|
||||
rg.make<Divide>(rg.make<Gather>(squeezed_shape, block_index, zero), block_value));
|
||||
if (b_idx + 1 < block_length) { // avoid addind empty Slice into Concat
|
||||
squeezed_shape_prep.push_back(rg.make<Slice>(squeezed_shape, block_index_next, int_max, one));
|
||||
}
|
||||
|
||||
const auto axes_order_const =
|
||||
opset3::Constant::create(element::i64,
|
||||
Shape{axes_order.size()},
|
||||
std::vector<int64_t>(axes_order.begin(), axes_order.end()));
|
||||
flat_node = std::make_shared<ov::opset3::Transpose>(flat_node, axes_order_const);
|
||||
new_ops.push_back(flat_node);
|
||||
squeezed_shape[0] *= block_values[block_idx];
|
||||
squeezed_shape[block_idx] /= block_values[block_idx];
|
||||
const auto out_pattern_2 =
|
||||
opset3::Constant::create(element::i64, Shape{squeezed_shape.size()}, squeezed_shape);
|
||||
flat_node = std::make_shared<ov::opset3::Reshape>(flat_node, out_pattern_2, special_zero);
|
||||
new_ops.push_back(flat_node);
|
||||
squeezed_shape = rg.make<Concat>(squeezed_shape_prep, 0);
|
||||
}
|
||||
flat_node = rg.make<Reshape>(flat_node, squeezed_shape, special_zero);
|
||||
}
|
||||
|
||||
flat_node->set_friendly_name(space_to_batch->get_friendly_name());
|
||||
ngraph::copy_runtime_info(space_to_batch, new_ops);
|
||||
ngraph::replace_node(space_to_batch, flat_node);
|
||||
copy_runtime_info(space_to_batch, rg.get());
|
||||
replace_node(space_to_batch, flat_node);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(space_to_batch, matcher_name);
|
||||
const auto m = make_shared<pattern::Matcher>(space_to_batch, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <ngraph/function.hpp>
|
||||
#include <ngraph/opsets/opset3.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
#include <openvino/opsets/opset10.hpp>
|
||||
#include <queue>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
@ -19,6 +20,7 @@
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
#include "common_test_utils/test_common.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
using namespace ngraph;
|
||||
|
||||
@ -35,6 +37,7 @@ TEST_F(TransformationTestsF, BatchToSpaceDecompositionByElements) {
|
||||
std::make_shared<ngraph::Function>(ngraph::NodeVector{batch_to_space}, ngraph::ParameterVector{data});
|
||||
|
||||
manager.register_pass<ov::pass::ConvertBatchToSpace>();
|
||||
manager.register_pass<ov::pass::ConstantFolding>();
|
||||
}
|
||||
|
||||
{
|
||||
@ -93,6 +96,7 @@ TEST_F(TransformationTestsF, SpaceToBatchDecompositionByElements) {
|
||||
std::make_shared<ngraph::Function>(ngraph::NodeVector{batch_to_space}, ngraph::ParameterVector{data});
|
||||
|
||||
manager.register_pass<ov::pass::ConvertSpaceToBatch>();
|
||||
manager.register_pass<ov::pass::ConstantFolding>();
|
||||
}
|
||||
|
||||
{
|
||||
@ -159,6 +163,7 @@ TEST_F(TransformationTestsF, SpaceToBatchDecomposition) {
|
||||
std::make_shared<ngraph::Function>(ngraph::NodeVector{batch_to_space}, ngraph::ParameterVector{data});
|
||||
|
||||
manager.register_pass<ov::pass::ConvertSpaceToBatch>(false);
|
||||
manager.register_pass<ov::pass::ConstantFolding>();
|
||||
}
|
||||
|
||||
{
|
||||
@ -195,6 +200,7 @@ TEST_F(TransformationTestsF, BatchToSpaceDecomposition) {
|
||||
std::make_shared<ngraph::Function>(ngraph::NodeVector{batch_to_space}, ngraph::ParameterVector{data});
|
||||
|
||||
manager.register_pass<ov::pass::ConvertBatchToSpace>(false);
|
||||
manager.register_pass<ov::pass::ConstantFolding>();
|
||||
}
|
||||
|
||||
{
|
||||
@ -218,3 +224,156 @@ TEST_F(TransformationTestsF, BatchToSpaceDecomposition) {
|
||||
function_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Op, typename Conversion, typename Params>
|
||||
void op_convertion_type_test(const Params& params) {
|
||||
using namespace ov::opset10;
|
||||
using namespace ov::pass;
|
||||
|
||||
const auto by_elements = get<0>(params);
|
||||
const auto block_elem_type = get<1>(params);
|
||||
|
||||
const auto data = make_shared<Parameter>(element::f32, Shape{1, 1});
|
||||
const auto block_p = Constant::create(block_elem_type, Shape{2}, {1, 1});
|
||||
const auto input_2_p = Constant::create(block_elem_type, Shape{2}, {0, 0});
|
||||
const auto input_3_p = Constant::create(block_elem_type, Shape{2}, {0, 0});
|
||||
const auto bts_or_stb = make_shared<Op>(data, block_p, input_2_p, input_3_p);
|
||||
const auto f = make_shared<Function>(NodeVector{bts_or_stb}, ParameterVector{data});
|
||||
|
||||
Manager m;
|
||||
m.register_pass<Conversion>(by_elements);
|
||||
m.register_pass<ConstantFolding>();
|
||||
ASSERT_NO_THROW(m.run_passes(f));
|
||||
EXPECT_EQ(f->get_result()->get_input_shape(0), (Shape{1, 1}));
|
||||
}
|
||||
|
||||
using ElementTypeParams = tuple<bool, // by_elements
|
||||
element::Type // block element type
|
||||
>;
|
||||
|
||||
class BatchToSpaceDecomposition2D : public testing::WithParamInterface<ElementTypeParams>,
|
||||
public TransformationTests {};
|
||||
|
||||
TEST_P(BatchToSpaceDecomposition2D, BlockElemType) {
|
||||
op_convertion_type_test<ov::opset10::BatchToSpace, ov::pass::ConvertBatchToSpace>(GetParam());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(TransformationTests,
|
||||
BatchToSpaceDecomposition2D,
|
||||
::testing::Combine(::testing::ValuesIn({false, true}),
|
||||
::testing::ValuesIn({element::i32, element::i64})));
|
||||
|
||||
class SpaceToBatchDecomposition2D : public testing::WithParamInterface<ElementTypeParams>,
|
||||
public TransformationTests {};
|
||||
|
||||
TEST_P(SpaceToBatchDecomposition2D, BlockElemType) {
|
||||
op_convertion_type_test<ov::opset10::SpaceToBatch, ov::pass::ConvertSpaceToBatch>(GetParam());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(TransformationTests,
|
||||
SpaceToBatchDecomposition2D,
|
||||
::testing::Combine(::testing::ValuesIn({false, true}),
|
||||
::testing::ValuesIn({element::i32, element::i64})));
|
||||
|
||||
template <typename Op, typename Conversion, typename Params>
|
||||
void op_convertion_test(const Params& params) {
|
||||
using namespace ov::opset10;
|
||||
using namespace ov::pass;
|
||||
|
||||
const bool by_elements = get<0>(params);
|
||||
Shape data_shape;
|
||||
Shape expected_output_shape;
|
||||
vector<int64_t> block;
|
||||
vector<int64_t> input_2; // crops_begin or pads_begin
|
||||
vector<int64_t> input_3; // crops_end or pads_end
|
||||
tie(data_shape, block, input_2, input_3, expected_output_shape) = get<1>(params);
|
||||
|
||||
const auto data = make_shared<Parameter>(element::f32, PartialShape::dynamic(data_shape.size()));
|
||||
const auto block_p = Constant::create(element::i64, Shape{block.size()}, block);
|
||||
const auto input_2_p = Constant::create(element::i64, Shape{input_2.size()}, input_2);
|
||||
const auto input_3_p = Constant::create(element::i64, Shape{input_3.size()}, input_3);
|
||||
const auto bts_or_stb = make_shared<Op>(data, block_p, input_2_p, input_3_p);
|
||||
const auto f = make_shared<Function>(NodeVector{bts_or_stb}, ParameterVector{data});
|
||||
|
||||
Manager m;
|
||||
m.set_per_pass_validation(false);
|
||||
m.register_pass<Conversion>(by_elements);
|
||||
m.run_passes(f);
|
||||
ASSERT_EQ(count_ops_of_type<Op>(f), 0);
|
||||
EXPECT_TRUE(f->get_result()->get_input_partial_shape(0).is_dynamic());
|
||||
|
||||
data->set_partial_shape(data_shape);
|
||||
f->validate_nodes_and_infer_types();
|
||||
ASSERT_EQ(f->get_result()->get_input_shape(0), expected_output_shape);
|
||||
}
|
||||
|
||||
template <typename Params>
|
||||
string get_test_name(testing::TestParamInfo<Params> obj) {
|
||||
const auto& params = obj.param;
|
||||
const bool by_elements = get<0>(params);
|
||||
const auto& data_shape = get<0>(get<1>(params));
|
||||
|
||||
ostringstream result;
|
||||
result << data_shape.size() << "D" << (by_elements ? "_by_elements" : "");
|
||||
return result.str();
|
||||
}
|
||||
|
||||
using BatchToSpaceParams = tuple<Shape, // data_shape
|
||||
vector<int64_t>, // block
|
||||
vector<int64_t>, // crops_begin
|
||||
vector<int64_t>, // crops_end
|
||||
Shape // expected_output_shape
|
||||
>;
|
||||
|
||||
using BatchToSpaceDecomposeParams = tuple<bool, // by_elements
|
||||
BatchToSpaceParams>;
|
||||
|
||||
class BatchToSpaceDecompositionWithParams : public testing::WithParamInterface<BatchToSpaceDecomposeParams>,
|
||||
public TransformationTests {};
|
||||
|
||||
TEST_P(BatchToSpaceDecompositionWithParams, DynamicInputs) {
|
||||
op_convertion_test<ov::opset10::BatchToSpace, ov::pass::ConvertBatchToSpace>(GetParam());
|
||||
}
|
||||
|
||||
static vector<BatchToSpaceParams> batch_to_space_params = {
|
||||
{{4, 3}, {1, 2}, {0, 0}, {0, 0}, {2, 6}},
|
||||
{{6, 5, 7}, {1, 2, 3}, {0, 1, 2}, {0, 1, 2}, {1, 8, 17}},
|
||||
{{30, 4, 1, 1}, {1, 5, 3, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 20, 3, 2}},
|
||||
{{96, 3, 5, 7, 1}, {1, 4, 3, 2, 1}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {4, 12, 15, 14, 1}},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(TransformationTests,
|
||||
BatchToSpaceDecompositionWithParams,
|
||||
::testing::Combine(::testing::ValuesIn({false, true}),
|
||||
::testing::ValuesIn(batch_to_space_params)),
|
||||
get_test_name<BatchToSpaceDecomposeParams>);
|
||||
|
||||
using SpaceToBatchParams = tuple<Shape, // data_shape
|
||||
vector<int64_t>, // block
|
||||
vector<int64_t>, // pads_begin
|
||||
vector<int64_t>, // pads_end
|
||||
Shape // expected_output_shape
|
||||
>;
|
||||
|
||||
using SpaceToBatchDecomposeParams = tuple<bool, // by_elements
|
||||
SpaceToBatchParams>;
|
||||
|
||||
class SpaceToBatchDecompositionWithParams : public testing::WithParamInterface<SpaceToBatchDecomposeParams>,
|
||||
public TransformationTests {};
|
||||
|
||||
TEST_P(SpaceToBatchDecompositionWithParams, DynamicInputs) {
|
||||
op_convertion_test<ov::opset10::SpaceToBatch, ov::pass::ConvertSpaceToBatch>(GetParam());
|
||||
}
|
||||
|
||||
static vector<SpaceToBatchParams> space_to_batch_params = {
|
||||
{{2, 6}, {1, 2}, {0, 0}, {0, 0}, {4, 3}},
|
||||
{{1, 8, 17}, {1, 2, 3}, {0, 1, 2}, {0, 1, 2}, {6, 5, 7}},
|
||||
{{1, 20, 3, 2}, {1, 5, 3, 2}, {0, 0, 0, 0}, {0, 0, 0, 0}, {30, 4, 1, 1}},
|
||||
{{4, 12, 15, 14, 1}, {1, 4, 3, 2, 1}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {96, 3, 5, 7, 1}},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(TransformationTests,
|
||||
SpaceToBatchDecompositionWithParams,
|
||||
::testing::Combine(::testing::ValuesIn({false, true}),
|
||||
::testing::ValuesIn(space_to_batch_params)),
|
||||
get_test_name<SpaceToBatchDecomposeParams>);
|
||||
|
@ -30,6 +30,10 @@ public:
|
||||
/// \param tensor The tensor with data
|
||||
Constant(const std::shared_ptr<ngraph::runtime::Tensor>& tensor);
|
||||
|
||||
/// \brief Initialize a constant from ov::Tensor
|
||||
/// \param tensor The ov::Tensor with data
|
||||
Constant(const ov::Tensor& tensor);
|
||||
|
||||
/// \brief Constructs a tensor constant.
|
||||
///
|
||||
/// \param type The element type of the tensor constant.
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
namespace InferenceEngine {
|
||||
class Blob;
|
||||
class IAsyncInferRequestWrapper;
|
||||
} // namespace InferenceEngine
|
||||
|
||||
namespace ov {
|
||||
@ -28,6 +29,7 @@ class CoreImpl;
|
||||
class InferRequest;
|
||||
class RemoteContext;
|
||||
class VariableState;
|
||||
class IInferRequestInternalWrapper;
|
||||
|
||||
/**
|
||||
* @brief Tensor API holding host memory
|
||||
@ -52,6 +54,8 @@ protected:
|
||||
friend class ov::InferRequest;
|
||||
friend class ov::RemoteContext;
|
||||
friend class ov::VariableState;
|
||||
friend class ov::IInferRequestInternalWrapper;
|
||||
friend class InferenceEngine::IAsyncInferRequestWrapper;
|
||||
|
||||
public:
|
||||
/// @brief Default constructor
|
||||
|
@ -794,8 +794,8 @@ bool ov::Node::constant_fold(OutputVector& output_values, const OutputVector& in
|
||||
for (const auto& input : input_values) {
|
||||
nodes.push_back(input.get_node_shared_ptr());
|
||||
auto constant = ov::as_type_ptr<ngraph::op::v0::Constant>(input.get_node_shared_ptr());
|
||||
auto tensor = ov::Tensor(input.get_element_type(), input.get_shape());
|
||||
std::copy_n(constant->get_data_ptr<uint8_t>(), constant->get_byte_size(), static_cast<uint8_t*>(tensor.data()));
|
||||
void* data = (void*)constant->get_data_ptr();
|
||||
auto tensor = ov::Tensor(input.get_element_type(), input.get_shape(), data);
|
||||
input_tensors.push_back(tensor);
|
||||
}
|
||||
|
||||
@ -807,9 +807,7 @@ bool ov::Node::constant_fold(OutputVector& output_values, const OutputVector& in
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
if (evaluate(output_tensors, input_tensors)) {
|
||||
for (size_t i = 0; i < output_tensors.size(); ++i) {
|
||||
output_values[i] = make_shared<ngraph::op::Constant>(output_tensors[i].get_element_type(),
|
||||
output_tensors[i].get_shape(),
|
||||
output_tensors[i].data());
|
||||
output_values[i] = make_shared<ngraph::op::Constant>(output_tensors[i]);
|
||||
copy_runtime_info(nodes, output_values[i].get_node_shared_ptr());
|
||||
}
|
||||
return true;
|
||||
|
@ -50,6 +50,17 @@ ov::op::v0::Constant::Constant(const shared_ptr<ngraph::runtime::Tensor>& tensor
|
||||
constructor_validate_and_infer_types();
|
||||
}
|
||||
|
||||
ov::op::v0::Constant::Constant(const ov::Tensor& tensor) {
|
||||
m_element_type = tensor.get_element_type();
|
||||
m_shape = tensor.get_shape();
|
||||
// Share data from ov::Tensor
|
||||
m_data = make_shared<ngraph::runtime::SharedBuffer<ov::Tensor>>(static_cast<char*>(tensor.data()),
|
||||
tensor.get_byte_size(),
|
||||
tensor);
|
||||
|
||||
constructor_validate_and_infer_types();
|
||||
}
|
||||
|
||||
ov::op::v0::Constant::Constant(const element::Type& type,
|
||||
const ov::Shape& shape,
|
||||
const std::vector<std::string>& values)
|
||||
|
@ -2,9 +2,6 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
ie_add_compiler_flags(-Wno-sign-compare)
|
||||
endif()
|
||||
|
||||
add_subdirectory(src)
|
||||
|
||||
|
@ -275,7 +275,7 @@ std::map<int32_t, std::shared_ptr<ov::Model>> FrontEnd::convert_each_node_recurs
|
||||
// TODO: figure a way to safely handle unused outputs
|
||||
if (named_outputs.count(port.parameter())) {
|
||||
const auto& ng_outputs = named_outputs.at(port.parameter());
|
||||
FRONT_END_OP_CONVERSION_CHECK(ng_outputs.size() == port.arguments_size(),
|
||||
FRONT_END_OP_CONVERSION_CHECK(ng_outputs.size() == (size_t)port.arguments_size(),
|
||||
"The number of output tensors must be equal to "
|
||||
"the number of outputs of the OV node.");
|
||||
for (size_t idx = 0; idx < ng_outputs.size(); ++idx) {
|
||||
|
@ -163,9 +163,7 @@ bool read_tensor(std::istream& is, char* data, size_t len) {
|
||||
std::vector<char> dims_struct(dims_len);
|
||||
is.read(&dims_struct[0], dims_len);
|
||||
is.read(data, len);
|
||||
if (is.gcount() != len)
|
||||
return false;
|
||||
return true;
|
||||
return (size_t)is.gcount() == len;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -365,7 +363,7 @@ void InputModel::InputModelImpl::createTempConsts() {
|
||||
var_place->set_partial_shape(tensor_ps);
|
||||
|
||||
Shape shape(tensor_ps.size(), 0);
|
||||
for (auto i = 0; i < tensor_ps.size(); i++) {
|
||||
for (size_t i = 0; i < tensor_ps.size(); i++) {
|
||||
const auto& dim = tensor_ps[i];
|
||||
if (dim.is_static()) {
|
||||
shape[i] = dim.get_length();
|
||||
|
@ -48,7 +48,7 @@ std::shared_ptr<Node> op::internal::ConditionalBlock::clone_with_new_inputs(cons
|
||||
return make_shared<ConditionalBlock>(new_args.at(0), m_is_scalar_condition, m_sub_block_index, m_output_infos);
|
||||
} else {
|
||||
OutputVector inputs_args;
|
||||
for (auto i = 0; i < new_args.size() - 1; i++) {
|
||||
for (size_t i = 0; i < new_args.size() - 1; i++) {
|
||||
inputs_args.push_back(new_args[i]);
|
||||
}
|
||||
return make_shared<ConditionalBlock>(inputs_args,
|
||||
@ -66,7 +66,7 @@ bool op::internal::ConditionalBlock::visit_attributes(AttributeVisitor& visitor)
|
||||
}
|
||||
|
||||
void op::internal::ConditionalBlock::validate_and_infer_types() {
|
||||
for (auto i = 0; i < m_output_infos.size(); i++) {
|
||||
for (size_t i = 0; i < m_output_infos.size(); i++) {
|
||||
set_output_type(i, m_output_infos[i].first, m_output_infos[i].second);
|
||||
}
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ bool op::internal::While::visit_attributes(AttributeVisitor& visitor) {
|
||||
}
|
||||
|
||||
void op::internal::While::validate_and_infer_types() {
|
||||
for (auto i = 0; i < m_output_infos.size(); i++) {
|
||||
for (size_t i = 0; i < m_output_infos.size(); i++) {
|
||||
set_output_type(i, m_output_infos[i].first, m_output_infos[i].second);
|
||||
}
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ ov::frontend::paddle::pass::TransformIf::TransformIf(std::vector<std::shared_ptr
|
||||
// openvino If requires both then and else branch at the same time.
|
||||
ParameterVector params;
|
||||
ResultVector results;
|
||||
for (auto i = 0; i < then_branch->get_output_size(); i++) {
|
||||
for (size_t i = 0; i < then_branch->get_output_size(); i++) {
|
||||
const auto param = std::make_shared<Parameter>(then_branch->get_output_element_type(i),
|
||||
then_branch->get_output_partial_shape(i));
|
||||
param->set_friendly_name(then_branch->get_output_op(i)->get_output_tensor(0).get_any_name());
|
||||
@ -96,7 +96,7 @@ ov::frontend::paddle::pass::TransformIf::TransformIf(std::vector<std::shared_ptr
|
||||
|
||||
auto else_results = else_branch->get_results();
|
||||
auto then_results = then_branch->get_results();
|
||||
for (auto i = 0; i < else_results.size(); i++) {
|
||||
for (size_t i = 0; i < else_results.size(); i++) {
|
||||
if_node->set_output(then_results[i], else_results[i]);
|
||||
}
|
||||
replace_node(conditional_block, if_node);
|
||||
|
@ -22,7 +22,7 @@ NamedOutputs meshgrid(const NodeContext& node) {
|
||||
}
|
||||
const auto out_shape = std::make_shared<default_opset::Concat>(dims, 0);
|
||||
OutputVector outs;
|
||||
for (auto i = 0; i < inputs.size(); i++) {
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
const auto& input = inputs[i];
|
||||
const auto out =
|
||||
std::make_shared<default_opset::Broadcast>(input,
|
||||
|
@ -103,7 +103,7 @@ NamedOutputs slice_op(const NodeContext& node, const bool& stride_input) {
|
||||
const auto decreased_node = std::make_shared<default_opset::Squeeze>(stride_slice_node, squeeze_index_node);
|
||||
|
||||
const auto input_rank = input_shape.rank().get_length();
|
||||
if (input_rank == decrease_axis.size()) {
|
||||
if ((size_t)input_rank == decrease_axis.size()) {
|
||||
auto restore_node = std::make_shared<default_opset::Reshape>(
|
||||
decreased_node,
|
||||
std::make_shared<default_opset::Constant>(element::i64, Shape{1}, 1),
|
||||
|
@ -12,7 +12,7 @@ namespace op {
|
||||
NamedOutputs sum(const NodeContext& node) {
|
||||
auto data = node.get_ng_inputs("X");
|
||||
auto sum = data[0].get_node_shared_ptr();
|
||||
for (int i = 1; i < data.size(); i++) {
|
||||
for (size_t i = 1; i < data.size(); i++) {
|
||||
sum = std::make_shared<default_opset::Add>(sum, data[i]);
|
||||
}
|
||||
return node.default_single_output_mapping({sum}, {"Out"});
|
||||
|
@ -47,13 +47,14 @@ const std::map<std::string, std::vector<std::shared_ptr<InPortPlace>>>& OpPlace:
|
||||
|
||||
std::shared_ptr<OutPortPlace> OpPlace::get_output_port_paddle(const std::string& outputName,
|
||||
int outputPortIndex) const {
|
||||
FRONT_END_GENERAL_CHECK(outputPortIndex <= m_output_ports.at(outputName).size(),
|
||||
FRONT_END_GENERAL_CHECK((size_t)outputPortIndex <= m_output_ports.at(outputName).size(),
|
||||
"outputPortIndex is out of bounds.");
|
||||
return m_output_ports.at(outputName)[outputPortIndex];
|
||||
}
|
||||
|
||||
std::shared_ptr<InPortPlace> OpPlace::get_input_port_paddle(const std::string& inputName, int inputPortIndex) const {
|
||||
FRONT_END_GENERAL_CHECK(inputPortIndex <= m_input_ports.at(inputName).size(), "inputPortIndex is out of bounds.");
|
||||
FRONT_END_GENERAL_CHECK((size_t)inputPortIndex <= m_input_ports.at(inputName).size(),
|
||||
"inputPortIndex is out of bounds.");
|
||||
return m_input_ports.at(inputName)[inputPortIndex];
|
||||
}
|
||||
|
||||
@ -145,13 +146,14 @@ std::vector<Place::Ptr> OpPlace::get_consuming_ports() const {
|
||||
}
|
||||
|
||||
Place::Ptr OpPlace::get_output_port(const std::string& outputName, int outputPortIndex) const {
|
||||
FRONT_END_GENERAL_CHECK(outputPortIndex <= m_output_ports.at(outputName).size(),
|
||||
FRONT_END_GENERAL_CHECK((size_t)outputPortIndex <= m_output_ports.at(outputName).size(),
|
||||
"outputPortIndex is Out of bounds.");
|
||||
return m_output_ports.at(outputName)[outputPortIndex];
|
||||
}
|
||||
|
||||
Place::Ptr OpPlace::get_input_port(const std::string& inputName, int inputPortIndex) const {
|
||||
FRONT_END_GENERAL_CHECK(inputPortIndex <= m_input_ports.at(inputName).size(), "inputPortIndex is out of bounds.");
|
||||
FRONT_END_GENERAL_CHECK((size_t)inputPortIndex <= m_input_ports.at(inputName).size(),
|
||||
"inputPortIndex is out of bounds.");
|
||||
return m_input_ports.at(inputName)[inputPortIndex];
|
||||
}
|
||||
|
||||
|
@ -2,10 +2,4 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
# 'argument': conversion from 'size_t' to 'int', possible loss of data
|
||||
ie_add_compiler_flags(/wd4267)
|
||||
ie_add_compiler_flags(/wd4244)
|
||||
endif()
|
||||
|
||||
add_subdirectory(src)
|
||||
|
@ -29,9 +29,6 @@ public:
|
||||
|
||||
// TODO: set of input and output methods are not aligned; also they are not aligned with the rest of FEs
|
||||
|
||||
// Input tensor id
|
||||
virtual size_t input(size_t index) const = 0;
|
||||
|
||||
virtual const std::vector<size_t>& inputs() const = 0;
|
||||
|
||||
// ------------------------------
|
||||
|
@ -26,21 +26,23 @@ public:
|
||||
m_decoder(decoder),
|
||||
m_tensor_map(tensor_map),
|
||||
m_ext_tensor_map(ext_tensor_map),
|
||||
m_external_parameters(external_parameters) {}
|
||||
m_external_parameters(external_parameters),
|
||||
m_decoder_inputs(decoder->inputs()),
|
||||
m_decoder_outputs(decoder->outputs()) {}
|
||||
|
||||
// Do not search for input in tensor map; try to access it as a constant of specified type T and return its value
|
||||
template <typename T>
|
||||
T const_input(size_t index) const;
|
||||
|
||||
size_t get_input_size() const override {
|
||||
return m_decoder->inputs().size();
|
||||
return m_decoder_inputs.size();
|
||||
};
|
||||
|
||||
// Search for input in tensor map and return an output port for already converted op
|
||||
// TODO: int due to base class uses it, but naturally it should be size_t for PT
|
||||
Output<Node> get_input(int index) const override {
|
||||
FRONT_END_GENERAL_CHECK(!m_decoder->input_is_none(index), "Input is none with index: ", index);
|
||||
auto input = m_decoder->input(index);
|
||||
auto input = m_decoder_inputs.at(index);
|
||||
FRONT_END_GENERAL_CHECK(m_tensor_map->count(input), "No tensor corresponding input: ", input, " exist.");
|
||||
return m_tensor_map->at(input);
|
||||
}
|
||||
@ -48,7 +50,7 @@ public:
|
||||
// TODO: upstream to base class
|
||||
OutputVector inputs() const {
|
||||
OutputVector res;
|
||||
for (size_t input : m_decoder->inputs()) {
|
||||
for (auto input : m_decoder_inputs) {
|
||||
FRONT_END_GENERAL_CHECK(m_tensor_map->count(input), "No tensor corresponding index: ", input, " exist.");
|
||||
res.push_back(m_tensor_map->at(input));
|
||||
}
|
||||
@ -63,29 +65,22 @@ public:
|
||||
return m_decoder->input_is_none(index);
|
||||
}
|
||||
|
||||
size_t get_output_size() const {
|
||||
return m_decoder_outputs.size();
|
||||
}
|
||||
|
||||
std::vector<size_t> outputs() const {
|
||||
return m_decoder_outputs;
|
||||
}
|
||||
|
||||
// Convert the resulting value of this node to ov Constant; works correctly only for nodes that produce
|
||||
// constant value, naturally for prim::Constant
|
||||
OutputVector as_constant() const;
|
||||
|
||||
/*
|
||||
TODO: Should be uncommented when explicit NodeContext ctor won't require passing op_type
|
||||
const std::string& get_op_type() const override {
|
||||
return m_decoder->get_op_type();
|
||||
}
|
||||
*/
|
||||
|
||||
std::string get_schema() const {
|
||||
return m_decoder->get_schema();
|
||||
}
|
||||
|
||||
size_t num_of_outputs() const {
|
||||
return m_decoder->num_of_outputs();
|
||||
}
|
||||
|
||||
std::vector<size_t> outputs() const {
|
||||
return m_decoder->outputs();
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> mark_node(std::shared_ptr<Node> ov_node) const {
|
||||
return m_decoder->mark_node(ov_node);
|
||||
}
|
||||
@ -105,7 +100,7 @@ public:
|
||||
|
||||
void mutate_input(size_t index, Output<Node> ov_output) {
|
||||
FRONT_END_GENERAL_CHECK(!m_decoder->input_is_none(index), "Input is none with index: ", index);
|
||||
auto input = m_decoder->input(index);
|
||||
auto input = m_decoder_inputs.at(index);
|
||||
FRONT_END_GENERAL_CHECK(m_tensor_map->count(input), "No tensor corresponding input: ", input, " exist.");
|
||||
m_tensor_map->at(input).get_tensor().set_names({std::to_string(input) + "_"});
|
||||
// TODO: find out why this doesn't work
|
||||
@ -148,6 +143,8 @@ private:
|
||||
TensorMap* m_tensor_map;
|
||||
const TensorMap& m_ext_tensor_map;
|
||||
ParameterVector* m_external_parameters;
|
||||
const std::vector<size_t> m_decoder_inputs;
|
||||
const std::vector<size_t> m_decoder_outputs;
|
||||
};
|
||||
|
||||
} // namespace pytorch
|
||||
|
@ -3,7 +3,13 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/adaptive_avg_pool.hpp"
|
||||
#include "openvino/op/concat.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/reshape.hpp"
|
||||
#include "openvino/op/shape_of.hpp"
|
||||
#include "openvino/op/slice.hpp"
|
||||
#include "openvino/op/tile.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,23 +17,26 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_adaptive_avg_pool3d(NodeContext& context) {
|
||||
auto const_tile_params = context.mark_node(opset10::Constant::create(element::i32, Shape{5}, {1, 1, 1, 1, 1}));
|
||||
auto const_0 = context.mark_node(opset10::Constant::create(element::i32, Shape{1}, {0}));
|
||||
auto const_1 = context.mark_node(opset10::Constant::create(element::i32, Shape{1}, {1}));
|
||||
auto const_neg_3 = context.mark_node(opset10::Constant::create(element::i32, Shape{1}, {-3}));
|
||||
num_inputs_check(context, 2, 2);
|
||||
auto const_tile_params = context.mark_node(v0::Constant::create(element::i32, Shape{5}, {1, 1, 1, 1, 1}));
|
||||
auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0}));
|
||||
auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1}));
|
||||
auto const_neg_3 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-3}));
|
||||
|
||||
auto input_tensor = context.get_input(0);
|
||||
auto given_shape = context.get_input(1);
|
||||
|
||||
auto input_shape = context.mark_node(std::make_shared<opset10::ShapeOf>(input_tensor, element::i32));
|
||||
auto input_shape = context.mark_node(std::make_shared<v3::ShapeOf>(input_tensor, element::i32));
|
||||
auto shape_begin =
|
||||
context.mark_node(std::make_shared<opset10::Slice>(input_shape, const_0, const_neg_3, const_1, const_0));
|
||||
auto output_shape = context.mark_node(std::make_shared<opset10::Concat>(OutputVector{shape_begin, given_shape}, 0));
|
||||
context.mark_node(std::make_shared<v8::Slice>(input_shape, const_0, const_neg_3, const_1, const_0));
|
||||
auto output_shape = context.mark_node(std::make_shared<v0::Concat>(OutputVector{shape_begin, given_shape}, 0));
|
||||
|
||||
auto tile = context.mark_node(std::make_shared<opset10::Tile>(input_tensor, const_tile_params));
|
||||
auto adaptive_avg_pool = context.mark_node(std::make_shared<opset10::AdaptiveAvgPool>(tile, given_shape));
|
||||
auto reshape = context.mark_node(std::make_shared<opset10::Reshape>(adaptive_avg_pool, output_shape, false));
|
||||
auto tile = context.mark_node(std::make_shared<v0::Tile>(input_tensor, const_tile_params));
|
||||
auto adaptive_avg_pool = context.mark_node(std::make_shared<v8::AdaptiveAvgPool>(tile, given_shape));
|
||||
auto reshape = context.mark_node(std::make_shared<v1::Reshape>(adaptive_avg_pool, output_shape, false));
|
||||
|
||||
return {reshape};
|
||||
};
|
||||
|
@ -3,7 +3,7 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/adaptive_max_pool.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -12,9 +12,10 @@ namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_adaptive_max_pool2d(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 2);
|
||||
auto x = context.get_input(0);
|
||||
auto y = context.get_input(1);
|
||||
auto adaptive_max_pool = context.mark_node(std::make_shared<opset10::AdaptiveMaxPool>(x, y, ov::element::i32));
|
||||
auto adaptive_max_pool = context.mark_node(std::make_shared<ov::op::v8::AdaptiveMaxPool>(x, y, ov::element::i32));
|
||||
return {adaptive_max_pool->output(0), adaptive_max_pool->output(1)};
|
||||
};
|
||||
|
||||
|
@ -16,6 +16,7 @@ namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_add(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 3);
|
||||
auto lhs = context.get_input(0);
|
||||
auto rhs = context.get_input(1);
|
||||
auto dtype0 = context.get_input_type(0);
|
||||
|
@ -5,7 +5,9 @@
|
||||
#include <climits>
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/add.hpp"
|
||||
#include "openvino/op/convert_like.hpp"
|
||||
#include "openvino/op/multiply.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -13,13 +15,16 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_addcmul(NodeContext& context) {
|
||||
const auto eltwise_mult = std::make_shared<opset10::Multiply>(context.get_input(1), context.get_input(2));
|
||||
num_inputs_check(context, 4, 4);
|
||||
const auto eltwise_mult = std::make_shared<v1::Multiply>(context.get_input(1), context.get_input(2));
|
||||
const auto value = context.get_input(3);
|
||||
const auto converted_value = std::make_shared<opset10::ConvertLike>(value, context.get_input(1));
|
||||
const auto scalar_mult = std::make_shared<opset10::Multiply>(eltwise_mult, converted_value);
|
||||
const auto converted_value = std::make_shared<v1::ConvertLike>(value, context.get_input(1));
|
||||
const auto scalar_mult = std::make_shared<v1::Multiply>(eltwise_mult, converted_value);
|
||||
context.mark_nodes({eltwise_mult, converted_value, scalar_mult});
|
||||
return {context.mark_node(std::make_shared<opset10::Add>(context.get_input(0), scalar_mult))};
|
||||
return {context.mark_node(std::make_shared<v1::Add>(context.get_input(0), scalar_mult))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -3,7 +3,10 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/add.hpp"
|
||||
#include "openvino/op/convert_like.hpp"
|
||||
#include "openvino/op/matmul.hpp"
|
||||
#include "openvino/op/multiply.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,18 +14,21 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_addmm(NodeContext& context) {
|
||||
num_inputs_check(context, 5, 5);
|
||||
auto input = context.get_input(0);
|
||||
auto m1 = context.get_input(1);
|
||||
auto m2 = context.get_input(2);
|
||||
auto beta = context.get_input(3);
|
||||
auto alpha = context.get_input(4);
|
||||
auto beta_converted = context.mark_node(std::make_shared<opset10::ConvertLike>(beta, input));
|
||||
auto mm = context.mark_node(std::make_shared<opset10::MatMul>(m1, m2));
|
||||
auto alpha_converted = context.mark_node(std::make_shared<opset10::ConvertLike>(alpha, mm));
|
||||
auto input_beta = context.mark_node(std::make_shared<opset10::Multiply>(input, beta_converted));
|
||||
auto mm_alpha = context.mark_node(std::make_shared<opset10::Multiply>(mm, alpha_converted));
|
||||
return {context.mark_node(std::make_shared<opset10::Add>(input_beta, mm_alpha))};
|
||||
auto beta_converted = context.mark_node(std::make_shared<v1::ConvertLike>(beta, input));
|
||||
auto mm = context.mark_node(std::make_shared<v0::MatMul>(m1, m2));
|
||||
auto alpha_converted = context.mark_node(std::make_shared<v1::ConvertLike>(alpha, mm));
|
||||
auto input_beta = context.mark_node(std::make_shared<v1::Multiply>(input, beta_converted));
|
||||
auto mm_alpha = context.mark_node(std::make_shared<v1::Multiply>(mm, alpha_converted));
|
||||
return {context.mark_node(std::make_shared<v1::Add>(input_beta, mm_alpha))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -3,7 +3,10 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/convert.hpp"
|
||||
#include "openvino/op/convert_like.hpp"
|
||||
#include "openvino/op/range.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,9 +14,11 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_arange(NodeContext& context) {
|
||||
auto zero = context.mark_node(opset10::Constant::create(element::i32, Shape{}, {0}));
|
||||
auto one = context.mark_node(opset10::Constant::create(element::i32, Shape{}, {1}));
|
||||
auto zero = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0}));
|
||||
auto one = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1}));
|
||||
auto dtype = element::f32;
|
||||
bool dtype_applied = false;
|
||||
auto num_inputs = context.get_input_size();
|
||||
@ -22,29 +27,26 @@ OutputVector translate_arange(NodeContext& context) {
|
||||
ov::Output<Node> start = zero;
|
||||
ov::Output<Node> step = one;
|
||||
|
||||
// aten::arange(Scalar end, tensor out)
|
||||
if (num_inputs == 2) {
|
||||
// aten::arange(Scalar end, tensor out)
|
||||
end = context.get_input(0);
|
||||
out_tensor = context.input_is_none(1) ? end : context.get_input(1);
|
||||
}
|
||||
// aten::arange(Scalar start, Scalar end, Scalar step, Tensor out)
|
||||
if (num_inputs == 4) {
|
||||
} else if (num_inputs == 4) {
|
||||
// aten::arange(Scalar start, Scalar end, Scalar step, Tensor out)
|
||||
start = context.get_input(0);
|
||||
end = context.get_input(1);
|
||||
step = context.get_input(2);
|
||||
out_tensor = context.input_is_none(3) ? end : context.get_input(3);
|
||||
}
|
||||
// aten::arange(Scalar end, ScalarType dtype, Layout, Device, bool pin_memory)
|
||||
if (num_inputs == 5) {
|
||||
} else if (num_inputs == 5) {
|
||||
// aten::arange(Scalar end, ScalarType dtype, Layout, Device, bool pin_memory)
|
||||
end = context.get_input(0);
|
||||
out_tensor = end;
|
||||
if (!context.input_is_none(1)) {
|
||||
dtype = convert_dtype(context.const_input<int64_t>(1));
|
||||
dtype_applied = true;
|
||||
}
|
||||
}
|
||||
// aten::arange(Scalar start, Scalar end, ScalarType dtype, Layout, Device, bool pin_memory)
|
||||
if (num_inputs == 6) {
|
||||
} else if (num_inputs == 6) {
|
||||
// aten::arange(Scalar start, Scalar end, ScalarType dtype, Layout, Device, bool pin_memory)
|
||||
start = context.get_input(0);
|
||||
end = context.get_input(1);
|
||||
out_tensor = end;
|
||||
@ -52,9 +54,8 @@ OutputVector translate_arange(NodeContext& context) {
|
||||
dtype = convert_dtype(context.const_input<int64_t>(2));
|
||||
dtype_applied = true;
|
||||
}
|
||||
}
|
||||
// aten::arange(Scalar start, Scalar end, Scalar step, ScalarType dtype, Layout, Device, bool pin_memory)
|
||||
if (num_inputs == 7) {
|
||||
} else if (num_inputs == 7) {
|
||||
// aten::arange(Scalar start, Scalar end, Scalar step, ScalarType dtype, Layout, Device, bool pin_memory)
|
||||
start = context.get_input(0);
|
||||
end = context.get_input(1);
|
||||
step = context.get_input(2);
|
||||
@ -63,13 +64,15 @@ OutputVector translate_arange(NodeContext& context) {
|
||||
dtype = convert_dtype(context.const_input<int64_t>(3));
|
||||
dtype_applied = true;
|
||||
}
|
||||
} else {
|
||||
FRONT_END_OP_CONVERSION_CHECK(false, "Not expected number of inputs for ", context.get_op_type());
|
||||
}
|
||||
auto r_end = context.mark_node(std::make_shared<opset10::Convert>(end, dtype));
|
||||
auto r_start = context.mark_node(std::make_shared<opset10::Convert>(start, dtype));
|
||||
auto r_step = context.mark_node(std::make_shared<opset10::Convert>(step, dtype));
|
||||
auto range = context.mark_node(std::make_shared<opset10::Range>(r_start, r_end, r_step, dtype));
|
||||
auto r_end = context.mark_node(std::make_shared<v0::Convert>(end, dtype));
|
||||
auto r_start = context.mark_node(std::make_shared<v0::Convert>(start, dtype));
|
||||
auto r_step = context.mark_node(std::make_shared<v0::Convert>(step, dtype));
|
||||
auto range = context.mark_node(std::make_shared<v4::Range>(r_start, r_end, r_step, dtype));
|
||||
if (!dtype_applied) {
|
||||
range = context.mark_node(std::make_shared<opset10::ConvertLike>(range, out_tensor));
|
||||
range = context.mark_node(std::make_shared<v1::ConvertLike>(range, out_tensor));
|
||||
}
|
||||
return {range};
|
||||
};
|
||||
|
@ -3,7 +3,9 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/convert.hpp"
|
||||
#include "openvino/op/convert_like.hpp"
|
||||
#include "pt_framework_node.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
@ -12,24 +14,28 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_as_tensor(NodeContext& context) {
|
||||
// aten::tensor(t[] data, *, ScalarType? dtype=None, Device? device=None, bool requires_grad=False) -> Tensor
|
||||
num_inputs_check(context, 1, 4);
|
||||
auto dtype = element::f32;
|
||||
Output<Node> cast;
|
||||
if (!context.input_is_none(1)) {
|
||||
auto dtype_ext_node = context.get_input_from_visible_context(1).get_node_shared_ptr();
|
||||
auto dtype_fw_node = std::dynamic_pointer_cast<PtFrameworkNode>(dtype_ext_node);
|
||||
if (dtype_fw_node && dtype_fw_node->get_op_type() == "prim::dtype") {
|
||||
auto type_input = dtype_fw_node->input_value(0);
|
||||
return {context.mark_node(std::make_shared<opset10::ConvertLike>(context.get_input(0), type_input))};
|
||||
return {context.mark_node(std::make_shared<v1::ConvertLike>(context.get_input(0), type_input))};
|
||||
}
|
||||
if (auto dtype_const = std::dynamic_pointer_cast<opset10::Constant>(dtype_ext_node)) {
|
||||
if (auto dtype_const = std::dynamic_pointer_cast<v0::Constant>(dtype_ext_node)) {
|
||||
auto pt_type = dtype_const->cast_vector<int64_t>()[0];
|
||||
dtype = convert_dtype(pt_type);
|
||||
}
|
||||
}
|
||||
cast = context.mark_node(std::make_shared<opset10::Convert>(context.get_input(0), dtype));
|
||||
auto cast = context.mark_node(std::make_shared<v0::Convert>(context.get_input(0), dtype));
|
||||
|
||||
// Input with index 2 is device, we skip this input
|
||||
// Input with index 3 is flag requires_grad, we skip this input
|
||||
return {cast};
|
||||
};
|
||||
|
||||
|
@ -3,7 +3,12 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/avg_pool.hpp"
|
||||
#include "openvino/op/broadcast.hpp"
|
||||
#include "openvino/op/concat.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/pad.hpp"
|
||||
#include "openvino/op/subtract.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,7 +16,10 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_avg_poolnd(NodeContext& context) {
|
||||
num_inputs_check(context, 6, 7);
|
||||
auto input = context.get_input(0);
|
||||
auto kernel = context.const_input<Shape>(1);
|
||||
auto strides = context.const_input<Strides>(2);
|
||||
@ -25,23 +33,22 @@ OutputVector translate_avg_poolnd(NodeContext& context) {
|
||||
// PyTorch allows sliding window go off bound, which leads to this accommodation.
|
||||
// More detail on https://github.com/pytorch/pytorch/issues/57178
|
||||
if (count_include_pad) {
|
||||
auto zero = context.mark_node(opset10::Constant::create(element::f32, Shape{}, {0}));
|
||||
auto zero_i32 = context.mark_node(opset10::Constant::create(element::i32, Shape{}, {0}));
|
||||
auto shape = context.mark_node(std::make_shared<opset10::ShapeOf>(input, element::i32));
|
||||
auto rank = context.mark_node(std::make_shared<opset10::ShapeOf>(shape, element::i32));
|
||||
auto zero = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0}));
|
||||
auto zero_i32 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0}));
|
||||
Output<Node> rank;
|
||||
std::tie(std::ignore, rank) = get_shape_rank(context, input);
|
||||
auto pad_values = context.get_input(3);
|
||||
auto pads_len = context.mark_node(opset10::Constant::create(element::i32, Shape{}, {pads.size()}));
|
||||
auto pads_diff = context.mark_node(std::make_shared<opset10::Subtract>(rank, pads_len));
|
||||
auto pads_remaining = context.mark_node(std::make_shared<opset10::Broadcast>(zero_i32, pads_diff));
|
||||
auto pads_len = context.mark_node(v0::Constant::create(element::i32, Shape{}, {pads.size()}));
|
||||
auto pads_diff = context.mark_node(std::make_shared<v1::Subtract>(rank, pads_len));
|
||||
auto pads_remaining = context.mark_node(std::make_shared<v3::Broadcast>(zero_i32, pads_diff));
|
||||
auto padding = context.mark_node(
|
||||
std::make_shared<opset10::Concat>(NodeVector{pads_remaining, pad_values.get_node_shared_ptr()}, 0));
|
||||
input =
|
||||
context.mark_node(std::make_shared<opset10::Pad>(input, padding, padding, zero, ov::op::PadMode::CONSTANT));
|
||||
std::make_shared<v0::Concat>(NodeVector{pads_remaining, pad_values.get_node_shared_ptr()}, 0));
|
||||
input = context.mark_node(std::make_shared<v1::Pad>(input, padding, padding, zero, ov::op::PadMode::CONSTANT));
|
||||
pads = Shape(pads.size(), 0);
|
||||
}
|
||||
|
||||
return {context.mark_node(
|
||||
std::make_shared<opset10::AvgPool>(input, strides, pads, pads, kernel, !count_include_pad, rounding_type))};
|
||||
std::make_shared<v1::AvgPool>(input, strides, pads, pads, kernel, !count_include_pad, rounding_type))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -2,8 +2,14 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/op/batch_norm.hpp"
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/broadcast.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/gather.hpp"
|
||||
#include "openvino/op/shape_of.hpp"
|
||||
#include "openvino/op/unsqueeze.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,33 +17,38 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
namespace {
|
||||
Output<Node> broadcast_const_to_channel_dim(NodeContext& context, Output<Node> input, Output<Node> value) {
|
||||
auto input_shape = context.mark_node(std::make_shared<opset10::ShapeOf>(input));
|
||||
auto zero_i = context.mark_node(opset10::Constant::create(element::i64, Shape{}, {0}));
|
||||
auto one_i = context.mark_node(opset10::Constant::create(element::i64, Shape{}, {1}));
|
||||
auto channel_dim = context.mark_node(std::make_shared<opset10::Gather>(input_shape, one_i, zero_i));
|
||||
auto channel_dim_exp = context.mark_node(std::make_shared<opset10::Unsqueeze>(channel_dim, zero_i));
|
||||
return context.mark_node(std::make_shared<opset10::Broadcast>(value, channel_dim_exp));
|
||||
Output<Node> broadcast_const_to_channel_dim(const NodeContext& context,
|
||||
const Output<Node>& input,
|
||||
const Output<Node>& value) {
|
||||
auto input_shape = context.mark_node(std::make_shared<v3::ShapeOf>(input));
|
||||
auto zero_i = context.mark_node(v0::Constant::create(element::i64, Shape{}, {0}));
|
||||
auto one_i = context.mark_node(v0::Constant::create(element::i64, Shape{}, {1}));
|
||||
auto channel_dim = context.mark_node(std::make_shared<v8::Gather>(input_shape, one_i, zero_i));
|
||||
auto channel_dim_exp = context.mark_node(std::make_shared<v0::Unsqueeze>(channel_dim, zero_i));
|
||||
return context.mark_node(std::make_shared<v3::Broadcast>(value, channel_dim_exp));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
OutputVector translate_batch_norm(NodeContext& context) {
|
||||
// Schema: aten::batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var,
|
||||
// bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor
|
||||
num_inputs_check(context, 8, 9);
|
||||
auto input = context.get_input(0);
|
||||
Output<Node> weight;
|
||||
Output<Node> bias;
|
||||
if (!context.input_is_none(1)) {
|
||||
weight = context.get_input(1);
|
||||
} else {
|
||||
auto one_f = context.mark_node(opset10::Constant::create(element::f32, Shape{}, {1}));
|
||||
auto one_f = context.mark_node(v0::Constant::create(element::f32, Shape{}, {1}));
|
||||
weight = broadcast_const_to_channel_dim(context, input, one_f);
|
||||
}
|
||||
if (!context.input_is_none(2)) {
|
||||
bias = context.get_input(2);
|
||||
} else {
|
||||
auto zero_f = context.mark_node(opset10::Constant::create(element::f32, Shape{}, {0}));
|
||||
auto zero_f = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0}));
|
||||
bias = broadcast_const_to_channel_dim(context, input, zero_f);
|
||||
}
|
||||
// index 3 running_mean and index 4 running_var can be none for training case only, check that not training before
|
||||
@ -45,10 +56,11 @@ OutputVector translate_batch_norm(NodeContext& context) {
|
||||
FRONT_END_OP_CONVERSION_CHECK(!training, "Translation for aten::batch_norm do not support training mode.");
|
||||
auto running_mean = context.get_input(3);
|
||||
auto running_var = context.get_input(4);
|
||||
// Index with index 6 is momentum, it is used only in training mode
|
||||
// Input with index 6 is momentum, it is used only in training mode
|
||||
auto epsilon = context.const_input<float>(7);
|
||||
// Input with index 8 is flag "cudnn_enabled" we can ignore it
|
||||
return {context.mark_node(
|
||||
std::make_shared<opset10::BatchNormInference>(input, weight, bias, running_mean, running_var, epsilon))};
|
||||
std::make_shared<v5::BatchNormInference>(input, weight, bias, running_mean, running_var, epsilon))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
22
src/frontends/pytorch/src/op/bool.cpp
Normal file
22
src/frontends/pytorch/src/op/bool.cpp
Normal file
@ -0,0 +1,22 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/op/convert.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_bool(NodeContext& context) {
|
||||
num_inputs_check(context, 1, 1);
|
||||
return {context.mark_node(std::make_shared<ov::op::v0::Convert>(context.get_input(0), element::boolean))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace pytorch
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
@ -3,7 +3,9 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/convert_like.hpp"
|
||||
#include "openvino/op/maximum.hpp"
|
||||
#include "openvino/op/minimum.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,17 +13,20 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_clamp(NodeContext& context) {
|
||||
num_inputs_check(context, 1, 3);
|
||||
auto x = context.get_input(0);
|
||||
if (!context.input_is_none(1)) {
|
||||
auto min_clip = context.get_input(1);
|
||||
min_clip = context.mark_node(std::make_shared<opset10::ConvertLike>(min_clip, x));
|
||||
x = context.mark_node(std::make_shared<opset10::Maximum>(x, min_clip));
|
||||
min_clip = context.mark_node(std::make_shared<v1::ConvertLike>(min_clip, x));
|
||||
x = context.mark_node(std::make_shared<v1::Maximum>(x, min_clip));
|
||||
}
|
||||
if (!context.input_is_none(2)) {
|
||||
auto max_clip = context.get_input(2);
|
||||
max_clip = context.mark_node(std::make_shared<opset10::ConvertLike>(max_clip, x));
|
||||
x = context.mark_node(std::make_shared<opset10::Minimum>(x, max_clip));
|
||||
max_clip = context.mark_node(std::make_shared<v1::ConvertLike>(max_clip, x));
|
||||
x = context.mark_node(std::make_shared<v1::Minimum>(x, max_clip));
|
||||
}
|
||||
return {x};
|
||||
};
|
||||
|
@ -13,9 +13,10 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_conv_transposend(NodeContext& context) {
|
||||
auto num_inputs = context.get_input_size();
|
||||
FRONT_END_OP_CONVERSION_CHECK(num_inputs == 8, "Unsupported number of inputs: ", num_inputs);
|
||||
num_inputs_check(context, 8, 8);
|
||||
auto strides = context.const_input<Strides>(3);
|
||||
// PyTorch support only symmetric padding, padding sizes are the same for begins and ends for each dimension
|
||||
auto pads = context.const_input<CoordinateDiff>(4);
|
||||
@ -27,16 +28,16 @@ OutputVector translate_conv_transposend(NodeContext& context) {
|
||||
|
||||
std::shared_ptr<ov::Node> conv;
|
||||
if (groups == 1) {
|
||||
conv = std::make_shared<ov::op::v1::ConvolutionBackpropData>(context.get_input(0),
|
||||
context.get_input(1),
|
||||
strides,
|
||||
pads,
|
||||
pads,
|
||||
dilations,
|
||||
pad_type,
|
||||
output_padding);
|
||||
conv = std::make_shared<v1::ConvolutionBackpropData>(context.get_input(0),
|
||||
context.get_input(1),
|
||||
strides,
|
||||
pads,
|
||||
pads,
|
||||
dilations,
|
||||
pad_type,
|
||||
output_padding);
|
||||
} else {
|
||||
conv = std::make_shared<ov::op::v1::GroupConvolutionBackpropData>(
|
||||
conv = std::make_shared<v1::GroupConvolutionBackpropData>(
|
||||
context.get_input(0),
|
||||
reshape_kernel_for_group(context, context.get_input(1), groups),
|
||||
strides,
|
||||
@ -52,7 +53,7 @@ OutputVector translate_conv_transposend(NodeContext& context) {
|
||||
if (bias_rank == 1) {
|
||||
bias = reshape_channelwise(context, bias, conv);
|
||||
}
|
||||
conv = context.mark_node(std::make_shared<ov::op::v1::Add>(conv, bias));
|
||||
conv = context.mark_node(std::make_shared<v1::Add>(conv, bias));
|
||||
}
|
||||
|
||||
return {conv};
|
||||
|
@ -3,7 +3,9 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/add.hpp"
|
||||
#include "openvino/op/convolution.hpp"
|
||||
#include "openvino/op/group_conv.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,7 +13,10 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_convnd(NodeContext& context) {
|
||||
num_inputs_check(context, 7, 7);
|
||||
auto strides = context.const_input<Strides>(3);
|
||||
// In torch pads at beginning are same as at end
|
||||
auto pads = CoordinateDiff(strides.size(), 0);
|
||||
@ -28,22 +33,21 @@ OutputVector translate_convnd(NodeContext& context) {
|
||||
|
||||
std::shared_ptr<ov::Node> conv;
|
||||
if (groups == 1) {
|
||||
conv = std::make_shared<opset10::Convolution>(context.get_input(0),
|
||||
context.get_input(1),
|
||||
conv = std::make_shared<v1::Convolution>(context.get_input(0),
|
||||
context.get_input(1),
|
||||
strides,
|
||||
pads,
|
||||
pads,
|
||||
dilations,
|
||||
pad_type);
|
||||
} else {
|
||||
conv = std::make_shared<v1::GroupConvolution>(context.get_input(0),
|
||||
reshape_kernel_for_group(context, context.get_input(1), groups),
|
||||
strides,
|
||||
pads,
|
||||
pads,
|
||||
dilations,
|
||||
pad_type);
|
||||
} else {
|
||||
conv =
|
||||
std::make_shared<opset10::GroupConvolution>(context.get_input(0),
|
||||
reshape_kernel_for_group(context, context.get_input(1), groups),
|
||||
strides,
|
||||
pads,
|
||||
pads,
|
||||
dilations,
|
||||
pad_type);
|
||||
}
|
||||
if (!context.input_is_none(2)) {
|
||||
auto bias = context.get_input(2);
|
||||
@ -51,7 +55,7 @@ OutputVector translate_convnd(NodeContext& context) {
|
||||
if (bias_rank == 1) {
|
||||
bias = reshape_channelwise(context, bias, conv);
|
||||
}
|
||||
conv = context.mark_node(std::make_shared<opset10::Add>(conv, bias));
|
||||
conv = context.mark_node(std::make_shared<v1::Add>(conv, bias));
|
||||
}
|
||||
|
||||
return {conv};
|
||||
|
@ -2,8 +2,11 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/op/convolution.hpp"
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/add.hpp"
|
||||
#include "openvino/op/group_conv.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,11 +14,14 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_convolution(NodeContext& context) {
|
||||
// Schema: aten::_convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[]
|
||||
// dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool
|
||||
// cudnn_enabled, bool allow_tf32) -> Tensor
|
||||
|
||||
num_inputs_check(context, 9, 13);
|
||||
auto strides = context.const_input<Strides>(3);
|
||||
auto pads = context.const_input<CoordinateDiff>(4);
|
||||
auto dilations = context.const_input<Strides>(5);
|
||||
@ -26,25 +32,25 @@ OutputVector translate_convolution(NodeContext& context) {
|
||||
std::shared_ptr<ov::Node> conv;
|
||||
if (groups == 1) {
|
||||
if (!transposed) {
|
||||
conv = context.mark_node(std::make_shared<opset10::Convolution>(context.get_input(0),
|
||||
context.get_input(1),
|
||||
strides,
|
||||
pads,
|
||||
pads,
|
||||
dilations));
|
||||
conv = context.mark_node(std::make_shared<v1::Convolution>(context.get_input(0),
|
||||
context.get_input(1),
|
||||
strides,
|
||||
pads,
|
||||
pads,
|
||||
dilations));
|
||||
} else {
|
||||
conv = context.mark_node(std::make_shared<opset10::ConvolutionBackpropData>(context.get_input(0),
|
||||
context.get_input(1),
|
||||
strides,
|
||||
pads,
|
||||
pads,
|
||||
dilations,
|
||||
ov::op::PadType::EXPLICIT,
|
||||
output_padding));
|
||||
conv = context.mark_node(std::make_shared<v1::ConvolutionBackpropData>(context.get_input(0),
|
||||
context.get_input(1),
|
||||
strides,
|
||||
pads,
|
||||
pads,
|
||||
dilations,
|
||||
ov::op::PadType::EXPLICIT,
|
||||
output_padding));
|
||||
}
|
||||
} else {
|
||||
if (!transposed) {
|
||||
conv = context.mark_node(std::make_shared<opset10::GroupConvolution>(
|
||||
conv = context.mark_node(std::make_shared<v1::GroupConvolution>(
|
||||
context.get_input(0),
|
||||
context.mark_output(reshape_kernel_for_group(context, context.get_input(1), groups)),
|
||||
strides,
|
||||
@ -52,7 +58,7 @@ OutputVector translate_convolution(NodeContext& context) {
|
||||
pads,
|
||||
dilations));
|
||||
} else {
|
||||
conv = context.mark_node(std::make_shared<opset10::GroupConvolutionBackpropData>(
|
||||
conv = context.mark_node(std::make_shared<v1::GroupConvolutionBackpropData>(
|
||||
context.get_input(0),
|
||||
context.mark_output(reshape_kernel_for_group(context, context.get_input(1), groups)),
|
||||
strides,
|
||||
@ -70,7 +76,7 @@ OutputVector translate_convolution(NodeContext& context) {
|
||||
bias = reshape_channelwise(context, bias, conv);
|
||||
}
|
||||
|
||||
conv = context.mark_node(std::make_shared<opset10::Add>(conv, bias));
|
||||
conv = context.mark_node(std::make_shared<v1::Add>(conv, bias));
|
||||
}
|
||||
|
||||
return {context.mark_output(conv)};
|
||||
|
@ -3,7 +3,9 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/add.hpp"
|
||||
#include "openvino/op/convolution.hpp"
|
||||
#include "openvino/op/group_conv.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,9 +13,12 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_convolution_mode(NodeContext& context) {
|
||||
// Schema: aten::_convolution_mode(Tensor input, Tensor weight, Tensor? bias, int[] stride, str padding, int[]
|
||||
// dilation, int groups) -> Tensor
|
||||
num_inputs_check(context, 7, 7);
|
||||
auto strides = context.const_input<Strides>(3);
|
||||
auto pad_mode = context.const_input<std::string>(4);
|
||||
auto dilations = context.const_input<Strides>(5);
|
||||
@ -24,15 +29,15 @@ OutputVector translate_convolution_mode(NodeContext& context) {
|
||||
|
||||
std::shared_ptr<ov::Node> conv;
|
||||
if (groups == 1) {
|
||||
conv = context.mark_node(std::make_shared<opset10::Convolution>(context.get_input(0),
|
||||
context.get_input(1),
|
||||
strides,
|
||||
pad_const,
|
||||
pad_const,
|
||||
dilations,
|
||||
auto_pad_mode));
|
||||
conv = context.mark_node(std::make_shared<v1::Convolution>(context.get_input(0),
|
||||
context.get_input(1),
|
||||
strides,
|
||||
pad_const,
|
||||
pad_const,
|
||||
dilations,
|
||||
auto_pad_mode));
|
||||
} else {
|
||||
conv = context.mark_node(std::make_shared<opset10::GroupConvolution>(
|
||||
conv = context.mark_node(std::make_shared<v1::GroupConvolution>(
|
||||
context.get_input(0),
|
||||
context.mark_output(reshape_kernel_for_group(context, context.get_input(1), groups)),
|
||||
strides,
|
||||
@ -49,7 +54,7 @@ OutputVector translate_convolution_mode(NodeContext& context) {
|
||||
bias = reshape_channelwise(context, bias, conv);
|
||||
}
|
||||
|
||||
conv = context.mark_node(std::make_shared<opset10::Add>(conv, bias));
|
||||
conv = context.mark_node(std::make_shared<v1::Add>(conv, bias));
|
||||
}
|
||||
return {context.mark_output(conv)};
|
||||
};
|
||||
|
@ -3,7 +3,6 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,12 +10,13 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_dim(NodeContext& context) {
|
||||
auto shape = std::make_shared<opset10::ShapeOf>(context.get_input(0), element::i32);
|
||||
auto rank = std::make_shared<opset10::ShapeOf>(shape, element::i32);
|
||||
auto squeeze = std::make_shared<opset10::Squeeze>(rank);
|
||||
context.mark_nodes({shape, rank, squeeze});
|
||||
return squeeze->outputs();
|
||||
num_inputs_check(context, 1, 1);
|
||||
Output<Node> rank;
|
||||
std::tie(std::ignore, rank) = get_shape_rank(context, context.get_input(0), true);
|
||||
return {rank};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -18,6 +18,7 @@ namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_div(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 3);
|
||||
auto x = context.get_input(0);
|
||||
auto y = context.get_input(1);
|
||||
std::string rounding_mode = "";
|
||||
|
@ -2,8 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/op/elu.hpp"
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -12,9 +13,16 @@ namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_elu(NodeContext& context) {
|
||||
// aten::elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor
|
||||
num_inputs_check(context, 2, 4);
|
||||
auto x = context.get_input(0);
|
||||
auto alpha = context.const_input<float>(1);
|
||||
return {context.mark_node(std::make_shared<opset10::Elu>(x, alpha))};
|
||||
// TODO: Figure out what scale and input_scale do
|
||||
FRONT_END_OP_CONVERSION_CHECK(context.input_is_none(2) || context.const_input<int64_t>(2) == 1,
|
||||
"Unexpected value of scale input for elu operation");
|
||||
FRONT_END_OP_CONVERSION_CHECK(context.input_is_none(3) || context.const_input<int64_t>(3) == 1,
|
||||
"Unexpected value of input_scale input for elu operation");
|
||||
return {context.mark_node(std::make_shared<ov::op::v0::Elu>(x, alpha))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -3,7 +3,8 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/gather.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -12,14 +13,15 @@ namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_embedding(NodeContext& context) {
|
||||
num_inputs_check(context, 5, 5);
|
||||
auto data = context.get_input(0);
|
||||
auto indices = context.get_input(1);
|
||||
// TODO: find out the meaning of input idx 2
|
||||
FRONT_END_OP_CONVERSION_CHECK(
|
||||
context.const_input<bool>(3) == false && context.const_input<bool>(4) == false,
|
||||
"Only False is supported on inputs with indexes 3 and 4 for aten::embedding translation");
|
||||
auto axis_0 = context.mark_node(opset10::Constant::create(element::i64, Shape{}, {0}));
|
||||
return {context.mark_node(std::make_shared<opset10::Gather>(data, indices, axis_0))};
|
||||
auto axis_0 = context.mark_node(ov::op::v0::Constant::create(element::i64, Shape{}, {0}));
|
||||
return {context.mark_node(std::make_shared<ov::op::v8::Gather>(data, indices, axis_0))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -3,7 +3,11 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/broadcast.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/equal.hpp"
|
||||
#include "openvino/op/select.hpp"
|
||||
#include "openvino/op/shape_of.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,29 +15,37 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
namespace {
|
||||
OutputVector base_expand(NodeContext& context, ov::Output<ov::Node> x, ov::Output<ov::Node> sizes) {
|
||||
auto one = context.mark_node(opset10::Constant::create(element::i32, Shape{}, {1}));
|
||||
auto sizes_shape = context.mark_node(std::make_shared<opset10::ShapeOf>(sizes, element::i32));
|
||||
auto neg_one = context.mark_node(opset10::Constant::create(element::i32, Shape{}, {-1}));
|
||||
auto neg_ones = context.mark_node(std::make_shared<opset10::Broadcast>(neg_one, sizes_shape));
|
||||
auto ones = context.mark_node(std::make_shared<opset10::Broadcast>(one, sizes_shape));
|
||||
auto neg_sizes = context.mark_node(std::make_shared<opset10::Equal>(sizes, neg_ones));
|
||||
auto shape = context.mark_node(std::make_shared<opset10::Select>(neg_sizes, ones, sizes));
|
||||
return {std::make_shared<opset10::Broadcast>(x, shape, ov::op::BroadcastType::BIDIRECTIONAL)};
|
||||
OutputVector base_expand(const NodeContext& context, const Output<Node>& x, const Output<Node>& sizes) {
|
||||
auto one = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1}));
|
||||
auto sizes_shape = context.mark_node(std::make_shared<v3::ShapeOf>(sizes, element::i32));
|
||||
auto neg_one = context.mark_node(v0::Constant::create(element::i32, Shape{}, {-1}));
|
||||
auto neg_ones = context.mark_node(std::make_shared<v3::Broadcast>(neg_one, sizes_shape));
|
||||
auto ones = context.mark_node(std::make_shared<v3::Broadcast>(one, sizes_shape));
|
||||
auto neg_sizes = context.mark_node(std::make_shared<v1::Equal>(sizes, neg_ones));
|
||||
auto shape = context.mark_node(std::make_shared<v1::Select>(neg_sizes, ones, sizes));
|
||||
return {context.mark_node(std::make_shared<v3::Broadcast>(x, shape, BroadcastType::BIDIRECTIONAL))};
|
||||
};
|
||||
} // namespace
|
||||
|
||||
OutputVector translate_expand(NodeContext& context) {
|
||||
// aten::expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a)
|
||||
num_inputs_check(context, 2, 3);
|
||||
auto x = context.get_input(0);
|
||||
auto sizes = context.get_input(1);
|
||||
// TODO: figure out what implicit means
|
||||
FRONT_END_OP_CONVERSION_CHECK(context.input_is_none(2) || context.const_input<bool>(2) == false,
|
||||
"Unexpected value of implicit for expand operation");
|
||||
return base_expand(context, x, sizes);
|
||||
};
|
||||
|
||||
OutputVector translate_expand_as(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 2);
|
||||
auto x = context.get_input(0);
|
||||
auto y = context.get_input(1);
|
||||
auto sizes = context.mark_node(std::make_shared<opset10::ShapeOf>(y, element::i32));
|
||||
auto sizes = context.mark_node(std::make_shared<v3::ShapeOf>(y, element::i32));
|
||||
return base_expand(context, x, sizes);
|
||||
};
|
||||
|
||||
|
@ -14,16 +14,18 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_eye(NodeContext& context) {
|
||||
size_t num_inputs = context.get_input_size();
|
||||
auto x = context.get_input(0);
|
||||
// num rows and cols should be integer, but at the moment conversion their data type can be unknown yet
|
||||
x = context.mark_node(std::make_shared<ov::op::v0::Convert>(x, element::i64));
|
||||
x = context.mark_node(std::make_shared<v0::Convert>(x, element::i64));
|
||||
Output<Node> y;
|
||||
size_t dtype_id;
|
||||
int dtype_id;
|
||||
auto dtype = element::f32;
|
||||
// aten::eye support only main diagonal
|
||||
auto diagonal = context.mark_node(ov::op::v0::Constant::create(element::i64, Shape{}, {0}));
|
||||
auto diagonal = context.mark_node(v0::Constant::create(element::i64, Shape{}, {0}));
|
||||
if (num_inputs == 5) {
|
||||
// aten::eye(n, dtype, layout, device, pin_memory)
|
||||
y = x;
|
||||
@ -31,7 +33,7 @@ OutputVector translate_eye(NodeContext& context) {
|
||||
} else if (num_inputs == 6) {
|
||||
// aten::eye(n, m, dtype, layout, device, pin_memory)
|
||||
y = context.get_input(1);
|
||||
y = context.mark_node(std::make_shared<ov::op::v0::Convert>(y, element::i64));
|
||||
y = context.mark_node(std::make_shared<v0::Convert>(y, element::i64));
|
||||
dtype_id = 2;
|
||||
} else {
|
||||
FRONT_END_OP_CONVERSION_CHECK(false, "Unsupported number of inputs: ", num_inputs, " for aten::eye");
|
||||
@ -39,8 +41,8 @@ OutputVector translate_eye(NodeContext& context) {
|
||||
if (!context.input_is_none(dtype_id)) {
|
||||
dtype = convert_dtype(context.const_input<int64_t>(dtype_id));
|
||||
}
|
||||
auto eye = context.mark_node(std::make_shared<ov::op::v9::Eye>(x, y, diagonal, element::i32));
|
||||
return {context.mark_node(std::make_shared<ov::op::v0::Convert>(eye, dtype))};
|
||||
auto eye = context.mark_node(std::make_shared<v9::Eye>(x, y, diagonal, element::i32));
|
||||
return {context.mark_node(std::make_shared<v0::Convert>(eye, dtype))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -19,17 +19,32 @@ namespace op {
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_flatten(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 3);
|
||||
num_inputs_check(context, 1, 3);
|
||||
auto x = context.get_input(0);
|
||||
auto start_dim = context.const_input<int64_t>(1);
|
||||
auto end_dim = context.const_input<int64_t>(2);
|
||||
|
||||
int64_t start_dim = 0;
|
||||
int64_t end_dim = -1;
|
||||
if (!context.input_is_none(1)) {
|
||||
start_dim = context.const_input<int64_t>(1);
|
||||
}
|
||||
if (!context.input_is_none(2)) {
|
||||
end_dim = context.const_input<int64_t>(2);
|
||||
}
|
||||
Output<Node> shape;
|
||||
Output<Node> rank;
|
||||
std::tie(shape, rank) = get_shape_rank(context, x, true);
|
||||
// Use opset::If for dim normalization. For now we only have flatten with constant start and end
|
||||
auto start_dim_node = context.get_input(1);
|
||||
auto end_dim_node = context.get_input(2);
|
||||
Output<Node> start_dim_node;
|
||||
Output<Node> end_dim_node;
|
||||
if (!context.input_is_none(1)) {
|
||||
start_dim_node = context.get_input(1);
|
||||
} else {
|
||||
start_dim_node = v0::Constant::create(element::i32, Shape{}, {start_dim});
|
||||
}
|
||||
if (!context.input_is_none(2)) {
|
||||
end_dim_node = context.get_input(2);
|
||||
} else {
|
||||
end_dim_node = v0::Constant::create(element::i32, Shape{}, {end_dim});
|
||||
}
|
||||
if (start_dim < 0) {
|
||||
start_dim_node = context.mark_node(std::make_shared<v1::Add>(rank, start_dim_node));
|
||||
}
|
||||
@ -51,7 +66,7 @@ OutputVector translate_flatten(NodeContext& context) {
|
||||
|
||||
context.mark_nodes({zero, one, int_max, start_dim_u, end_dim_u, slice_begin, slice_end, neg_1_const, new_shape});
|
||||
|
||||
return {context.mark_node(std::make_shared<v1::Reshape>(context.get_input(0), new_shape, true))};
|
||||
return {context.mark_node(std::make_shared<v1::Reshape>(x, new_shape, true))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -3,7 +3,8 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/divide.hpp"
|
||||
#include "openvino/op/floor.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,11 +12,14 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_floor_divide(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 2);
|
||||
auto x = context.get_input(0);
|
||||
auto y = context.get_input(1);
|
||||
auto div = context.mark_node(std::make_shared<opset10::Divide>(x, y, true));
|
||||
return {context.mark_node(std::make_shared<opset10::Floor>(div))};
|
||||
auto div = context.mark_node(std::make_shared<v1::Divide>(x, y, true));
|
||||
return {context.mark_node(std::make_shared<v0::Floor>(div))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -3,7 +3,7 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/divide.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -12,9 +12,10 @@ namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_floordiv(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 2);
|
||||
auto x = context.get_input(0);
|
||||
auto y = context.get_input(1);
|
||||
return {context.mark_node(std::make_shared<opset10::Divide>(x, y, true))};
|
||||
return {context.mark_node(std::make_shared<ov::op::v1::Divide>(x, y, true))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -15,33 +15,36 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
namespace {
|
||||
ov::Output<Node> base_translate_full(NodeContext& context, ov::Output<Node> sizes, ov::Output<Node> value) {
|
||||
return context.mark_node(std::make_shared<ov::op::v3::Broadcast>(value, sizes));
|
||||
Output<Node> base_translate_full(const NodeContext& context, const Output<Node>& sizes, const Output<Node>& value) {
|
||||
return context.mark_node(std::make_shared<v3::Broadcast>(value, sizes));
|
||||
}
|
||||
|
||||
ov::Output<Node> base_translate_full_with_convert(NodeContext& context,
|
||||
ov::Output<Node> sizes,
|
||||
ov::Output<Node> value,
|
||||
size_t dtype_id) {
|
||||
Output<Node> base_translate_full_with_convert(const NodeContext& context,
|
||||
const Output<Node>& sizes,
|
||||
const Output<Node>& value,
|
||||
size_t dtype_id) {
|
||||
auto filled_tensor = base_translate_full(context, sizes, value);
|
||||
if (!context.input_is_none(dtype_id)) {
|
||||
auto dtype = convert_dtype(context.const_input<int64_t>(dtype_id));
|
||||
filled_tensor = context.mark_node(std::make_shared<ov::op::v0::Convert>(filled_tensor, dtype));
|
||||
filled_tensor = context.mark_node(std::make_shared<v0::Convert>(filled_tensor, dtype));
|
||||
}
|
||||
return filled_tensor;
|
||||
}
|
||||
|
||||
ov::Output<Node> base_translate_full_with_convertlike(NodeContext& context,
|
||||
ov::Output<Node> sizes,
|
||||
ov::Output<Node> value,
|
||||
ov::Output<Node> out) {
|
||||
Output<Node> base_translate_full_with_convertlike(const NodeContext& context,
|
||||
const Output<Node>& sizes,
|
||||
const Output<Node>& value,
|
||||
const Output<Node>& out) {
|
||||
auto filled_tensor = base_translate_full(context, sizes, value);
|
||||
return context.mark_node(std::make_shared<ov::op::v1::ConvertLike>(filled_tensor, out));
|
||||
return context.mark_node(std::make_shared<v1::ConvertLike>(filled_tensor, out));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
OutputVector translate_full(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 6);
|
||||
auto sizes = context.get_input(0);
|
||||
auto value = context.get_input(1);
|
||||
auto num_inputs = context.get_input_size();
|
||||
@ -58,9 +61,10 @@ OutputVector translate_full(NodeContext& context) {
|
||||
};
|
||||
|
||||
OutputVector translate_full_like(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 7);
|
||||
auto input = context.get_input(0);
|
||||
auto value = context.get_input(1);
|
||||
auto sizes = context.mark_node(std::make_shared<ov::op::v3::ShapeOf>(input));
|
||||
auto sizes = context.mark_node(std::make_shared<v3::ShapeOf>(input));
|
||||
if (context.get_input_size() == 7) {
|
||||
return {base_translate_full_with_convert(context, sizes, value, 2)};
|
||||
}
|
||||
@ -69,13 +73,15 @@ OutputVector translate_full_like(NodeContext& context) {
|
||||
};
|
||||
|
||||
OutputVector translate_fill_(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 2);
|
||||
auto input = context.get_input(0);
|
||||
auto value = context.get_input(1);
|
||||
auto sizes = context.mark_node(std::make_shared<ov::op::v3::ShapeOf>(input));
|
||||
auto sizes = context.mark_node(std::make_shared<v3::ShapeOf>(input));
|
||||
return {base_translate_full_with_convertlike(context, sizes, value, input)};
|
||||
};
|
||||
|
||||
OutputVector translate_new_full(NodeContext& context) {
|
||||
num_inputs_check(context, 3, 7);
|
||||
auto input = context.get_input(0);
|
||||
auto sizes = context.get_input(1);
|
||||
auto value = context.get_input(2);
|
||||
@ -86,8 +92,9 @@ OutputVector translate_new_full(NodeContext& context) {
|
||||
};
|
||||
|
||||
OutputVector translate_zeros(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 5);
|
||||
auto sizes = context.get_input(0);
|
||||
auto value = context.mark_node(ov::op::v0::Constant::create(element::f32, Shape{}, {0}));
|
||||
auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0}));
|
||||
auto num_inputs = context.get_input_size();
|
||||
if (num_inputs < 5) {
|
||||
int out_id = num_inputs == 2 ? 1 : 2;
|
||||
@ -102,9 +109,10 @@ OutputVector translate_zeros(NodeContext& context) {
|
||||
};
|
||||
|
||||
OutputVector translate_zeros_like(NodeContext& context) {
|
||||
num_inputs_check(context, 1, 6);
|
||||
auto input = context.get_input(0);
|
||||
auto value = context.mark_node(ov::op::v0::Constant::create(element::f32, Shape{}, {0}));
|
||||
auto sizes = context.mark_node(std::make_shared<ov::op::v3::ShapeOf>(input));
|
||||
auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0}));
|
||||
auto sizes = context.mark_node(std::make_shared<v3::ShapeOf>(input));
|
||||
if (context.get_input_size() == 6) {
|
||||
return {base_translate_full_with_convert(context, sizes, value, 1)};
|
||||
}
|
||||
@ -113,9 +121,10 @@ OutputVector translate_zeros_like(NodeContext& context) {
|
||||
};
|
||||
|
||||
OutputVector translate_new_zeros(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 6);
|
||||
auto input = context.get_input(0);
|
||||
auto sizes = context.get_input(1);
|
||||
auto value = context.mark_node(ov::op::v0::Constant::create(element::f32, Shape{}, {0}));
|
||||
auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0}));
|
||||
if (context.get_input_size() == 6 && !context.input_is_none(2)) {
|
||||
return {base_translate_full_with_convert(context, sizes, value, 2)};
|
||||
}
|
||||
@ -123,8 +132,9 @@ OutputVector translate_new_zeros(NodeContext& context) {
|
||||
};
|
||||
|
||||
OutputVector translate_ones(NodeContext& context) {
|
||||
num_inputs_check(context, 1, 5);
|
||||
auto sizes = context.get_input(0);
|
||||
auto value = context.mark_node(ov::op::v0::Constant::create(element::f32, Shape{}, {1}));
|
||||
auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {1}));
|
||||
auto num_inputs = context.get_input_size();
|
||||
if (num_inputs < 5) {
|
||||
int out_id = num_inputs == 2 ? 1 : 2;
|
||||
@ -139,9 +149,10 @@ OutputVector translate_ones(NodeContext& context) {
|
||||
};
|
||||
|
||||
OutputVector translate_ones_like(NodeContext& context) {
|
||||
num_inputs_check(context, 1, 6);
|
||||
auto input = context.get_input(0);
|
||||
auto value = context.mark_node(ov::op::v0::Constant::create(element::f32, Shape{}, {1}));
|
||||
auto sizes = context.mark_node(std::make_shared<ov::op::v3::ShapeOf>(input));
|
||||
auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {1}));
|
||||
auto sizes = context.mark_node(std::make_shared<v3::ShapeOf>(input));
|
||||
if (context.get_input_size() == 6) {
|
||||
return {base_translate_full_with_convert(context, sizes, value, 1)};
|
||||
}
|
||||
@ -150,9 +161,10 @@ OutputVector translate_ones_like(NodeContext& context) {
|
||||
};
|
||||
|
||||
OutputVector translate_new_ones(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 6);
|
||||
auto input = context.get_input(0);
|
||||
auto sizes = context.get_input(1);
|
||||
auto value = context.mark_node(ov::op::v0::Constant::create(element::f32, Shape{}, {1}));
|
||||
auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {1}));
|
||||
if (context.get_input_size() == 6 && !context.input_is_none(2)) {
|
||||
return {base_translate_full_with_convert(context, sizes, value, 2)};
|
||||
}
|
||||
@ -160,11 +172,12 @@ OutputVector translate_new_ones(NodeContext& context) {
|
||||
};
|
||||
|
||||
OutputVector translate_empty(NodeContext& context) {
|
||||
num_inputs_check(context, 1, 2);
|
||||
auto sizes = context.get_input(0);
|
||||
// In OV uninitialised data is not supported, so we create a tensor filled with zeros with a given shape and type.
|
||||
auto value = context.mark_node(ov::op::v0::Constant::create(element::f32, Shape{}, {0}));
|
||||
auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0}));
|
||||
int dtype_id = 1;
|
||||
ov::Output<ov::Node> empty;
|
||||
Output<Node> empty;
|
||||
if (!context.input_is_none(dtype_id)) {
|
||||
empty = base_translate_full_with_convert(context, sizes, value, dtype_id);
|
||||
} else {
|
||||
|
@ -2,8 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/op/gelu.hpp"
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -12,11 +13,12 @@ namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_gelu(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 2);
|
||||
auto x = context.get_input(0);
|
||||
auto approximate = context.const_input<std::string>(1);
|
||||
// TODO: Add support for "tanh" approximate
|
||||
FRONT_END_OP_CONVERSION_CHECK(approximate == "none", "Unsupported approximate for Gelu: ", approximate);
|
||||
return {context.mark_node(std::make_shared<opset10::Gelu>(x))};
|
||||
return {context.mark_node(std::make_shared<ov::op::v7::Gelu>(x))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -13,6 +13,7 @@ namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_getitem(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 2);
|
||||
auto input = context.get_input(0);
|
||||
FRONT_END_OP_CONVERSION_CHECK(cast_fw_node(input.get_node_shared_ptr(), "prim::ListConstruct") == nullptr,
|
||||
"unsupported case for aten::getitem");
|
||||
|
@ -7,21 +7,25 @@
|
||||
#include "openvino/op/multiply.hpp"
|
||||
#include "openvino/op/sigmoid.hpp"
|
||||
#include "openvino/op/split.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_glu(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 2);
|
||||
auto x = context.get_input(0);
|
||||
auto dim = context.input_is_none(1) ? context.mark_node(ov::op::v0::Constant::create(element::i64, Shape{}, {-1}))
|
||||
auto dim = context.input_is_none(1) ? context.mark_node(v0::Constant::create(element::i64, Shape{}, {-1}))
|
||||
: context.get_input(1);
|
||||
auto split = context.mark_node(std::make_shared<ov::op::v1::Split>(x, dim, 2));
|
||||
auto split = context.mark_node(std::make_shared<v1::Split>(x, dim, 2));
|
||||
auto first = split->output(0);
|
||||
auto second = split->output(1);
|
||||
auto sigmoid = context.mark_node(std::make_shared<ov::op::v0::Sigmoid>(second));
|
||||
return {context.mark_node(std::make_shared<ov::op::v1::Multiply>(first, sigmoid))};
|
||||
auto sigmoid = context.mark_node(std::make_shared<v0::Sigmoid>(second));
|
||||
return {context.mark_node(std::make_shared<v1::Multiply>(first, sigmoid))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -4,25 +4,29 @@
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/op/grid_sample.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_grid_sampler(NodeContext& context) {
|
||||
num_inputs_check(context, 4, 5);
|
||||
auto x = context.get_input(0);
|
||||
auto grid = context.get_input(1);
|
||||
ov::op::v9::GridSample::Attributes attrs{};
|
||||
const std::unordered_map<int64_t, ov::op::v9::GridSample::InterpolationMode> grid_sample_mode_map{
|
||||
{0, ov::op::v9::GridSample::InterpolationMode::BILINEAR},
|
||||
{1, ov::op::v9::GridSample::InterpolationMode::NEAREST},
|
||||
{2, ov::op::v9::GridSample::InterpolationMode::BICUBIC},
|
||||
v9::GridSample::Attributes attrs{};
|
||||
const std::unordered_map<int64_t, v9::GridSample::InterpolationMode> grid_sample_mode_map{
|
||||
{0, v9::GridSample::InterpolationMode::BILINEAR},
|
||||
{1, v9::GridSample::InterpolationMode::NEAREST},
|
||||
{2, v9::GridSample::InterpolationMode::BICUBIC},
|
||||
};
|
||||
const std::unordered_map<int64_t, ov::op::v9::GridSample::PaddingMode> grid_sample_padding_mode_map{
|
||||
{0, ov::op::v9::GridSample::PaddingMode::ZEROS},
|
||||
{1, ov::op::v9::GridSample::PaddingMode::BORDER},
|
||||
{2, ov::op::v9::GridSample::PaddingMode::REFLECTION}};
|
||||
const std::unordered_map<int64_t, v9::GridSample::PaddingMode> grid_sample_padding_mode_map{
|
||||
{0, v9::GridSample::PaddingMode::ZEROS},
|
||||
{1, v9::GridSample::PaddingMode::BORDER},
|
||||
{2, v9::GridSample::PaddingMode::REFLECTION}};
|
||||
auto mode = context.const_input<int64_t>(2);
|
||||
FRONT_END_OP_CONVERSION_CHECK(grid_sample_mode_map.count(mode), "Unknown interpolation mode: ", mode);
|
||||
attrs.mode = grid_sample_mode_map.at(mode);
|
||||
@ -37,7 +41,7 @@ OutputVector translate_grid_sampler(NodeContext& context) {
|
||||
}
|
||||
attrs.align_corners = align_corners;
|
||||
|
||||
return {context.mark_node(std::make_shared<ov::op::v9::GridSample>(x, grid, attrs))};
|
||||
return {context.mark_node(std::make_shared<v9::GridSample>(x, grid, attrs))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -3,7 +3,14 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/add.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/multiply.hpp"
|
||||
#include "openvino/op/mvn.hpp"
|
||||
#include "openvino/op/range.hpp"
|
||||
#include "openvino/op/reshape.hpp"
|
||||
#include "openvino/op/subtract.hpp"
|
||||
#include "openvino/op/unsqueeze.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,35 +18,40 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_group_norm(NodeContext& context) {
|
||||
// aten::group_norm(Tensor input, int num_groups, Tensor? weight=None, Tensor? bias=None, float
|
||||
// eps=1.0000000000000001e-05, bool cudnn_enabled=True) -> Tensor
|
||||
num_inputs_check(context, 2, 6);
|
||||
auto data = context.get_input(0);
|
||||
auto num_groups = context.const_input<int64_t>(1);
|
||||
// input 2 - weights and input 3 - bias are optional without default value, we handle them later
|
||||
auto eps = static_cast<float>(context.const_input<double>(4));
|
||||
auto input_shape = context.mark_node(std::make_shared<opset10::ShapeOf>(data, element::i64));
|
||||
auto scalar_one = context.mark_node(opset10::Constant::create(element::i64, {}, {1}));
|
||||
Output<Node> input_shape;
|
||||
Output<Node> input_rank;
|
||||
std::tie(input_shape, input_rank) = get_shape_rank(context, data, true, element::i64);
|
||||
auto scalar_one = context.mark_node(v0::Constant::create(element::i64, {}, {1}));
|
||||
auto shape = context.mark_node(
|
||||
std::make_shared<opset10::Constant>(element::i64, Shape({3}), std::vector<int64_t>{0, num_groups, -1}));
|
||||
auto reshaped_input = context.mark_node(std::make_shared<opset10::Reshape>(data, shape, true));
|
||||
auto reduction_axes =
|
||||
context.mark_node(opset10::Constant::create(element::i64, Shape({1}), std::vector<int64_t>(1, 2)));
|
||||
std::make_shared<v0::Constant>(element::i64, Shape({3}), std::vector<int64_t>{0, num_groups, -1}));
|
||||
auto reshaped_input = context.mark_node(std::make_shared<v1::Reshape>(data, shape, true));
|
||||
auto reduction_axes = context.mark_node(v0::Constant::create(element::i64, Shape({1}), std::vector<int64_t>(1, 2)));
|
||||
auto reshaped_norm = context.mark_node(
|
||||
std::make_shared<opset10::MVN>(reshaped_input, reduction_axes, true, eps, ov::op::MVNEpsMode::INSIDE_SQRT));
|
||||
auto norm = context.mark_node(std::make_shared<opset10::Reshape>(reshaped_norm, input_shape, true));
|
||||
auto input_rank2d = context.mark_node(std::make_shared<opset10::ShapeOf>(input_shape, element::i64));
|
||||
auto input_rank = context.mark_node(std::make_shared<opset10::Squeeze>(input_rank2d));
|
||||
auto skip_last = context.mark_node(std::make_shared<opset10::Subtract>(input_rank, scalar_one));
|
||||
auto axes = context.mark_node(std::make_shared<opset10::Range>(scalar_one, skip_last, scalar_one, element::i64));
|
||||
std::make_shared<v6::MVN>(reshaped_input, reduction_axes, true, eps, MVNEpsMode::INSIDE_SQRT));
|
||||
auto norm = context.mark_node(std::make_shared<v1::Reshape>(reshaped_norm, input_shape, true));
|
||||
auto skip_last = context.mark_node(std::make_shared<v1::Subtract>(input_rank, scalar_one));
|
||||
auto axes = context.mark_node(std::make_shared<v4::Range>(scalar_one, skip_last, scalar_one, element::i64));
|
||||
if (!context.input_is_none(2)) {
|
||||
auto weights = context.get_input(2);
|
||||
weights = context.mark_node(std::make_shared<opset10::Unsqueeze>(weights, axes));
|
||||
norm = context.mark_node(std::make_shared<opset10::Multiply>(norm, weights));
|
||||
weights = context.mark_node(std::make_shared<v0::Unsqueeze>(weights, axes));
|
||||
norm = context.mark_node(std::make_shared<v1::Multiply>(norm, weights));
|
||||
}
|
||||
if (!context.input_is_none(3)) {
|
||||
auto bias = context.get_input(3);
|
||||
bias = context.mark_node(std::make_shared<opset10::Unsqueeze>(bias, axes));
|
||||
norm = context.mark_node(std::make_shared<opset10::Add>(norm, bias));
|
||||
bias = context.mark_node(std::make_shared<v0::Unsqueeze>(bias, axes));
|
||||
norm = context.mark_node(std::make_shared<v1::Add>(norm, bias));
|
||||
}
|
||||
// Input with index 5 is flag "cudnn_enabled" we can ignore it
|
||||
return {norm};
|
||||
};
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/clamp.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -12,6 +12,7 @@ namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_hardtanh(NodeContext& context) {
|
||||
num_inputs_check(context, 1, 3);
|
||||
float min = -1;
|
||||
float max = 1;
|
||||
if (!context.input_is_none(1)) {
|
||||
@ -20,7 +21,7 @@ OutputVector translate_hardtanh(NodeContext& context) {
|
||||
if (!context.input_is_none(2)) {
|
||||
max = context.const_input<float>(2);
|
||||
}
|
||||
return {context.mark_node(std::make_shared<opset10::Clamp>(context.get_input(0), min, max))};
|
||||
return {context.mark_node(std::make_shared<ov::op::v0::Clamp>(context.get_input(0), min, max))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -56,7 +56,7 @@ OutputVector translate_if(NodeContext& context) {
|
||||
}
|
||||
}
|
||||
OutputVector res;
|
||||
const auto num_outs = context.num_of_outputs();
|
||||
const auto num_outs = context.get_output_size();
|
||||
const auto then_results = then_body->get_results();
|
||||
const auto else_results = else_body->get_results();
|
||||
FRONT_END_OP_CONVERSION_CHECK(then_results.size() >= num_outs && else_results.size() >= num_outs,
|
||||
|
@ -3,7 +3,20 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/add.hpp"
|
||||
#include "openvino/op/concat.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/gather.hpp"
|
||||
#include "openvino/op/multiply.hpp"
|
||||
#include "openvino/op/pad.hpp"
|
||||
#include "openvino/op/range.hpp"
|
||||
#include "openvino/op/reshape.hpp"
|
||||
#include "openvino/op/shape_of.hpp"
|
||||
#include "openvino/op/split.hpp"
|
||||
#include "openvino/op/squeeze.hpp"
|
||||
#include "openvino/op/subtract.hpp"
|
||||
#include "openvino/op/transpose.hpp"
|
||||
#include "openvino/op/unsqueeze.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,37 +24,40 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
namespace {
|
||||
std::shared_ptr<Node> get_im2col_indices_along_dim(NodeContext& context,
|
||||
ov::Output<Node> input_d,
|
||||
std::shared_ptr<Node> get_im2col_indices_along_dim(const NodeContext& context,
|
||||
const Output<Node>& input_d,
|
||||
int64_t kernel_size_d,
|
||||
int64_t dilation_d,
|
||||
int64_t padding_d,
|
||||
int64_t stride_d) {
|
||||
auto zero = context.mark_node(opset10::Constant::create(element::i64, Shape{}, {0}));
|
||||
auto minus_one = context.mark_node(opset10::Constant::create(element::i64, Shape{}, {-1}));
|
||||
auto kernel_size = context.mark_node(opset10::Constant::create(element::i64, Shape{}, {kernel_size_d}));
|
||||
auto padding_2 = context.mark_node(opset10::Constant::create(element::i64, Shape{}, {padding_d * 2}));
|
||||
auto stride = context.mark_node(opset10::Constant::create(element::i64, Shape{}, {stride_d}));
|
||||
auto input_d_squeezed = context.mark_node(std::make_shared<opset10::Squeeze>(input_d, zero));
|
||||
auto blocks_d = context.mark_node(std::make_shared<opset10::Add>(input_d_squeezed, padding_2));
|
||||
auto zero = context.mark_node(v0::Constant::create(element::i64, Shape{}, {0}));
|
||||
auto minus_one = context.mark_node(v0::Constant::create(element::i64, Shape{}, {-1}));
|
||||
auto kernel_size = context.mark_node(v0::Constant::create(element::i64, Shape{}, {kernel_size_d}));
|
||||
auto padding_2 = context.mark_node(v0::Constant::create(element::i64, Shape{}, {padding_d * 2}));
|
||||
auto stride = context.mark_node(v0::Constant::create(element::i64, Shape{}, {stride_d}));
|
||||
auto input_d_squeezed = context.mark_node(std::make_shared<v0::Squeeze>(input_d, zero));
|
||||
auto blocks_d = context.mark_node(std::make_shared<v1::Add>(input_d_squeezed, padding_2));
|
||||
auto subtrahend =
|
||||
context.mark_node(opset10::Constant::create(element::i64, Shape{}, {dilation_d * (kernel_size_d - 1)}));
|
||||
blocks_d = context.mark_node(std::make_shared<opset10::Subtract>(blocks_d, subtrahend));
|
||||
auto blocks_d_indices = context.mark_node(std::make_shared<opset10::Range>(zero, blocks_d, stride, element::i64));
|
||||
blocks_d_indices = context.mark_node(std::make_shared<opset10::Unsqueeze>(blocks_d_indices, zero));
|
||||
context.mark_node(v0::Constant::create(element::i64, Shape{}, {dilation_d * (kernel_size_d - 1)}));
|
||||
blocks_d = context.mark_node(std::make_shared<v1::Subtract>(blocks_d, subtrahend));
|
||||
auto blocks_d_indices = context.mark_node(std::make_shared<v4::Range>(zero, blocks_d, stride, element::i64));
|
||||
blocks_d_indices = context.mark_node(std::make_shared<v0::Unsqueeze>(blocks_d_indices, zero));
|
||||
std::vector<int64_t> rng;
|
||||
for (int64_t i = 0; i < kernel_size_d * dilation_d; i += dilation_d) {
|
||||
rng.push_back(i);
|
||||
}
|
||||
|
||||
auto kernel_grid = context.mark_node(opset10::Constant::create(element::i64, Shape{rng.size()}, rng));
|
||||
auto kernel_mask = context.mark_node(std::make_shared<opset10::Unsqueeze>(kernel_grid, minus_one));
|
||||
return context.mark_node(std::make_shared<opset10::Add>(blocks_d_indices, kernel_mask));
|
||||
auto kernel_grid = context.mark_node(v0::Constant::create(element::i64, Shape{rng.size()}, rng));
|
||||
auto kernel_mask = context.mark_node(std::make_shared<v0::Unsqueeze>(kernel_grid, minus_one));
|
||||
return context.mark_node(std::make_shared<v1::Add>(blocks_d_indices, kernel_mask));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
OutputVector translate_im2col(NodeContext& context) {
|
||||
num_inputs_check(context, 5, 5);
|
||||
auto input = context.get_input(0);
|
||||
auto kernel_size = context.const_input<std::vector<int64_t>>(1);
|
||||
FRONT_END_OP_CONVERSION_CHECK(kernel_size.size() == 2, "kernel size should contains 2 elements");
|
||||
@ -51,13 +67,13 @@ OutputVector translate_im2col(NodeContext& context) {
|
||||
FRONT_END_OP_CONVERSION_CHECK(kernel_size.size() == 2, "padding should contains 2 elements");
|
||||
auto stride = context.const_input<std::vector<int64_t>>(4);
|
||||
FRONT_END_OP_CONVERSION_CHECK(kernel_size.size() == 2, "stride should contains 2 elements");
|
||||
auto zero = context.mark_node(opset10::Constant::create(element::i64, Shape{}, {0}));
|
||||
auto input_shape = context.mark_node(std::make_shared<opset10::ShapeOf>(input));
|
||||
auto zero_f = context.mark_node(opset10::Constant::create(element::f32, Shape{}, {0}));
|
||||
auto minus_one = context.mark_node(opset10::Constant::create(element::i64, Shape{1}, {-1}));
|
||||
auto two = context.mark_node(opset10::Constant::create(element::i64, Shape{}, {2}));
|
||||
auto four = context.mark_node(opset10::Constant::create(element::i64, Shape{}, {4}));
|
||||
auto input_shape_split = context.mark_node(std::make_shared<opset10::Split>(input_shape, zero, 4));
|
||||
auto zero = context.mark_node(v0::Constant::create(element::i64, Shape{}, {0}));
|
||||
auto input_shape = context.mark_node(std::make_shared<v3::ShapeOf>(input));
|
||||
auto zero_f = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0}));
|
||||
auto minus_one = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-1}));
|
||||
auto two = context.mark_node(v0::Constant::create(element::i64, Shape{}, {2}));
|
||||
auto four = context.mark_node(v0::Constant::create(element::i64, Shape{}, {4}));
|
||||
auto input_shape_split = context.mark_node(std::make_shared<v1::Split>(input_shape, zero, 4));
|
||||
auto input_b = input_shape_split->output(0);
|
||||
auto input_c = input_shape_split->output(1);
|
||||
auto input_h = input_shape_split->output(2);
|
||||
@ -72,22 +88,22 @@ OutputVector translate_im2col(NodeContext& context) {
|
||||
auto kernel_w = kernel_size[1];
|
||||
auto blocks_row_indices = get_im2col_indices_along_dim(context, input_h, kernel_h, dilation_h, padding_h, stride_h);
|
||||
auto blocks_col_indices = get_im2col_indices_along_dim(context, input_w, kernel_w, dilation_w, padding_w, stride_w);
|
||||
auto kernel_window = context.mark_node(opset10::Constant::create(element::i64, Shape{}, {kernel_h * kernel_w}));
|
||||
auto input_c_squeezed = context.mark_node(std::make_shared<opset10::Squeeze>(input_c, zero));
|
||||
auto channel_unfolded = context.mark_node(std::make_shared<opset10::Multiply>(input_c_squeezed, kernel_window));
|
||||
auto channel_unfolded_unsqueezed = context.mark_node(std::make_shared<opset10::Unsqueeze>(channel_unfolded, zero));
|
||||
auto kernel_window = context.mark_node(v0::Constant::create(element::i64, Shape{}, {kernel_h * kernel_w}));
|
||||
auto input_c_squeezed = context.mark_node(std::make_shared<v0::Squeeze>(input_c, zero));
|
||||
auto channel_unfolded = context.mark_node(std::make_shared<v1::Multiply>(input_c_squeezed, kernel_window));
|
||||
auto channel_unfolded_unsqueezed = context.mark_node(std::make_shared<v0::Unsqueeze>(channel_unfolded, zero));
|
||||
auto output_shape = context.mark_node(
|
||||
std::make_shared<opset10::Concat>(OutputVector{input_b, channel_unfolded_unsqueezed, minus_one}, 0));
|
||||
std::make_shared<v0::Concat>(OutputVector{input_b, channel_unfolded_unsqueezed, minus_one}, 0));
|
||||
auto pads = context.mark_node(
|
||||
opset10::Constant::create(element::i64, Shape{4}, std::vector<int64_t>{0, 0, padding_h, padding_w}));
|
||||
v0::Constant::create(element::i64, Shape{4}, std::vector<int64_t>{0, 0, padding_h, padding_w}));
|
||||
auto padded_input =
|
||||
context.mark_node(std::make_shared<opset10::Pad>(input, pads, pads, zero_f, ov::op::PadMode::CONSTANT));
|
||||
auto output = context.mark_node(std::make_shared<opset10::Gather>(padded_input, blocks_row_indices, two));
|
||||
output = context.mark_node(std::make_shared<opset10::Gather>(output, blocks_col_indices, four));
|
||||
context.mark_node(std::make_shared<v1::Pad>(input, pads, pads, zero_f, ov::op::PadMode::CONSTANT));
|
||||
auto output = context.mark_node(std::make_shared<v8::Gather>(padded_input, blocks_row_indices, two));
|
||||
output = context.mark_node(std::make_shared<v8::Gather>(output, blocks_col_indices, four));
|
||||
auto permutation_dims =
|
||||
context.mark_node(opset10::Constant::create(element::i64, Shape{6}, std::vector<int64_t>{0, 1, 2, 4, 3, 5}));
|
||||
output = context.mark_node(std::make_shared<opset10::Transpose>(output, permutation_dims));
|
||||
return {context.mark_node(std::make_shared<opset10::Reshape>(output, output_shape, false))};
|
||||
context.mark_node(v0::Constant::create(element::i64, Shape{6}, std::vector<int64_t>{0, 1, 2, 4, 3, 5}));
|
||||
output = context.mark_node(std::make_shared<v1::Transpose>(output, permutation_dims));
|
||||
return {context.mark_node(std::make_shared<v1::Reshape>(output, output_shape, false))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -24,65 +24,66 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
namespace {
|
||||
OutputVector translate_instance_norm_inference(const NodeContext& context,
|
||||
const Output<Node>& input,
|
||||
const Output<Node>& reduction_axes,
|
||||
float eps) {
|
||||
auto norm = context.mark_node(
|
||||
std::make_shared<ov::op::v6::MVN>(input, reduction_axes, true, eps, ov::op::MVNEpsMode::INSIDE_SQRT));
|
||||
auto norm = context.mark_node(std::make_shared<v6::MVN>(input, reduction_axes, true, eps, MVNEpsMode::INSIDE_SQRT));
|
||||
if (!context.input_is_none(1)) {
|
||||
auto weight = context.get_input(1);
|
||||
weight = reshape_channelwise(context, weight, norm);
|
||||
norm = context.mark_node(std::make_shared<ov::op::v1::Multiply>(norm, weight));
|
||||
norm = context.mark_node(std::make_shared<v1::Multiply>(norm, weight));
|
||||
}
|
||||
if (!context.input_is_none(2)) {
|
||||
auto bias = context.get_input(2);
|
||||
bias = reshape_channelwise(context, bias, norm);
|
||||
norm = context.mark_node(std::make_shared<ov::op::v1::Add>(norm, bias));
|
||||
norm = context.mark_node(std::make_shared<v1::Add>(norm, bias));
|
||||
}
|
||||
return {norm};
|
||||
}
|
||||
|
||||
OutputVector translate_instance_norm_train(NodeContext& context,
|
||||
OutputVector translate_instance_norm_train(const NodeContext& context,
|
||||
const Output<Node>& input,
|
||||
const Output<Node>& reduction_axes,
|
||||
float eps) {
|
||||
auto zero = context.mark_node(ov::op::v0::Constant::create(element::i64, Shape{}, {0}));
|
||||
auto one = context.mark_node(ov::op::v0::Constant::create(element::i64, Shape{}, {1}));
|
||||
auto input_shape = context.mark_node(std::make_shared<ov::op::v3::ShapeOf>(input));
|
||||
auto batch_dim = context.mark_node(std::make_shared<ov::op::v8::Gather>(input_shape, zero, zero));
|
||||
auto channel_dim = context.mark_node(std::make_shared<ov::op::v8::Gather>(input_shape, one, zero));
|
||||
auto batch_dim_1d = context.mark_node(std::make_shared<ov::op::v0::Unsqueeze>(batch_dim, zero));
|
||||
auto batch_norm_channels_1d = context.mark_node(std::make_shared<ov::op::v1::Multiply>(batch_dim_1d, channel_dim));
|
||||
auto one_1d = context.mark_node(ov::op::v0::Constant::create(element::i64, Shape{1}, {1}));
|
||||
auto tail_shape = context.mark_node(std::make_shared<ov::op::v8::Gather>(input_shape, reduction_axes, zero));
|
||||
auto reshape_shape = context.mark_node(
|
||||
std::make_shared<ov::op::v0::Concat>(OutputVector{one_1d, batch_norm_channels_1d, tail_shape}, 0));
|
||||
auto reshaped_input = context.mark_node(std::make_shared<ov::op::v1::Reshape>(input, reshape_shape, false));
|
||||
auto zero = context.mark_node(v0::Constant::create(element::i64, Shape{}, {0}));
|
||||
auto one = context.mark_node(v0::Constant::create(element::i64, Shape{}, {1}));
|
||||
auto input_shape = context.mark_node(std::make_shared<v3::ShapeOf>(input));
|
||||
auto batch_dim = context.mark_node(std::make_shared<v8::Gather>(input_shape, zero, zero));
|
||||
auto channel_dim = context.mark_node(std::make_shared<v8::Gather>(input_shape, one, zero));
|
||||
auto batch_dim_1d = context.mark_node(std::make_shared<v0::Unsqueeze>(batch_dim, zero));
|
||||
auto batch_norm_channels_1d = context.mark_node(std::make_shared<v1::Multiply>(batch_dim_1d, channel_dim));
|
||||
auto one_1d = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {1}));
|
||||
auto tail_shape = context.mark_node(std::make_shared<v8::Gather>(input_shape, reduction_axes, zero));
|
||||
auto reshape_shape =
|
||||
context.mark_node(std::make_shared<v0::Concat>(OutputVector{one_1d, batch_norm_channels_1d, tail_shape}, 0));
|
||||
auto reshaped_input = context.mark_node(std::make_shared<v1::Reshape>(input, reshape_shape, false));
|
||||
Output<Node> weight;
|
||||
Output<Node> bias;
|
||||
if (context.input_is_none(1)) {
|
||||
weight = context.mark_node(std::make_shared<ov::op::v3::Broadcast>(one, batch_norm_channels_1d));
|
||||
weight = context.mark_node(std::make_shared<ov::op::v1::ConvertLike>(weight, input));
|
||||
weight = context.mark_node(std::make_shared<v3::Broadcast>(one, batch_norm_channels_1d));
|
||||
weight = context.mark_node(std::make_shared<v1::ConvertLike>(weight, input));
|
||||
} else {
|
||||
weight = context.get_input(1);
|
||||
weight = context.mark_node(std::make_shared<ov::op::v0::Tile>(weight, batch_dim_1d));
|
||||
weight = context.mark_node(std::make_shared<v0::Tile>(weight, batch_dim_1d));
|
||||
}
|
||||
if (context.input_is_none(2)) {
|
||||
bias = context.mark_node(std::make_shared<ov::op::v3::Broadcast>(zero, batch_norm_channels_1d));
|
||||
bias = context.mark_node(std::make_shared<ov::op::v1::ConvertLike>(bias, input));
|
||||
bias = context.mark_node(std::make_shared<v3::Broadcast>(zero, batch_norm_channels_1d));
|
||||
bias = context.mark_node(std::make_shared<v1::ConvertLike>(bias, input));
|
||||
} else {
|
||||
bias = context.get_input(2);
|
||||
bias = context.mark_node(std::make_shared<ov::op::v0::Tile>(bias, batch_dim_1d));
|
||||
bias = context.mark_node(std::make_shared<v0::Tile>(bias, batch_dim_1d));
|
||||
}
|
||||
auto running_mean = context.get_input(3);
|
||||
running_mean = context.mark_node(std::make_shared<ov::op::v0::Tile>(running_mean, batch_dim_1d));
|
||||
running_mean = context.mark_node(std::make_shared<v0::Tile>(running_mean, batch_dim_1d));
|
||||
auto running_var = context.get_input(4);
|
||||
running_var = context.mark_node(std::make_shared<ov::op::v0::Tile>(running_var, batch_dim_1d));
|
||||
running_var = context.mark_node(std::make_shared<v0::Tile>(running_var, batch_dim_1d));
|
||||
auto batch_norm = context.mark_node(
|
||||
std::make_shared<ov::op::v5::BatchNormInference>(reshaped_input, weight, bias, running_mean, running_var, eps));
|
||||
return {context.mark_node(std::make_shared<ov::op::v1::Reshape>(batch_norm, input_shape, true))};
|
||||
std::make_shared<v5::BatchNormInference>(reshaped_input, weight, bias, running_mean, running_var, eps));
|
||||
return {context.mark_node(std::make_shared<v1::Reshape>(batch_norm, input_shape, true))};
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@ -91,12 +92,11 @@ OutputVector translate_instance_norm(NodeContext& context) {
|
||||
num_inputs_check(context, 8, 9);
|
||||
auto input = context.get_input(0);
|
||||
auto eps = context.const_input<float>(7);
|
||||
auto input_shape = context.mark_node(std::make_shared<ov::op::v3::ShapeOf>(input));
|
||||
auto rank_1d = context.mark_node(std::make_shared<ov::op::v3::ShapeOf>(input_shape));
|
||||
auto rank = context.mark_node(std::make_shared<ov::op::v0::Squeeze>(rank_1d));
|
||||
auto one = context.mark_node(ov::op::v0::Constant::create(element::i64, Shape{}, {1}));
|
||||
auto two = context.mark_node(ov::op::v0::Constant::create(element::i64, Shape{}, {2}));
|
||||
auto reduction_axes = context.mark_node(std::make_shared<ov::op::v4::Range>(two, rank, one, element::i64));
|
||||
Output<Node> rank;
|
||||
std::tie(std::ignore, rank) = get_shape_rank(context, input, true, element::i64);
|
||||
auto one = context.mark_node(v0::Constant::create(element::i64, Shape{}, {1}));
|
||||
auto two = context.mark_node(v0::Constant::create(element::i64, Shape{}, {2}));
|
||||
auto reduction_axes = context.mark_node(std::make_shared<v4::Range>(two, rank, one, element::i64));
|
||||
if (context.input_is_none(3) && context.input_is_none(4)) {
|
||||
return translate_instance_norm_inference(context, input, reduction_axes, eps);
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/convert.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -12,7 +12,8 @@ namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_int(NodeContext& context) {
|
||||
return {context.mark_node(std::make_shared<opset10::Convert>(context.get_input(0), element::i32))};
|
||||
num_inputs_check(context, 1, 1);
|
||||
return {context.mark_node(std::make_shared<ov::op::v0::Convert>(context.get_input(0), element::i32))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -3,7 +3,10 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/add.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/multiply.hpp"
|
||||
#include "openvino/op/mvn.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,22 +14,26 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_layer_norm(NodeContext& context) {
|
||||
num_inputs_check(context, 5, 6);
|
||||
auto eps = context.const_input<float>(4);
|
||||
auto normalized_shape = context.const_input<Shape>(1);
|
||||
FRONT_END_OP_CONVERSION_CHECK(normalized_shape.size() == 1,
|
||||
"Translation for aten::layer_norm supports only single normalized_shape value, "
|
||||
"which means normalizing over the last dimension.");
|
||||
// TODO: support any dimention
|
||||
auto axes = context.mark_node(opset10::Constant::create(element::i64, Shape{1}, {-1}));
|
||||
auto out_node = context.mark_node(
|
||||
std::make_shared<opset10::MVN>(context.get_input(0), axes, true, eps, ov::op::MVNEpsMode::INSIDE_SQRT));
|
||||
auto axes = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-1}));
|
||||
auto out_node =
|
||||
context.mark_node(std::make_shared<v6::MVN>(context.get_input(0), axes, true, eps, MVNEpsMode::INSIDE_SQRT));
|
||||
if (!context.input_is_none(2)) {
|
||||
out_node = context.mark_node(std::make_shared<opset10::Multiply>(out_node, context.get_input(2)));
|
||||
out_node = context.mark_node(std::make_shared<v1::Multiply>(out_node, context.get_input(2)));
|
||||
}
|
||||
if (!context.input_is_none(3)) {
|
||||
out_node = context.mark_node(std::make_shared<opset10::Add>(out_node, context.get_input(3)));
|
||||
out_node = context.mark_node(std::make_shared<v1::Add>(out_node, context.get_input(3)));
|
||||
}
|
||||
// Input with index 5 is flag "cudnn_enabled" we can ignore it
|
||||
return {out_node};
|
||||
};
|
||||
|
||||
|
@ -3,7 +3,10 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/shape_of.hpp"
|
||||
#include "openvino/op/slice.hpp"
|
||||
#include "openvino/op/squeeze.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,14 +14,17 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_len(NodeContext& context) {
|
||||
auto const_0 = context.mark_node(opset10::Constant::create(element::i64, Shape{1}, {0}));
|
||||
auto const_1 = context.mark_node(opset10::Constant::create(element::i64, Shape{1}, {1}));
|
||||
auto input = context.get_input(0);
|
||||
auto input_shape = context.mark_node(std::make_shared<opset10::ShapeOf>(input, element::i64));
|
||||
using namespace ov::op;
|
||||
|
||||
auto slice = context.mark_node(std::make_shared<opset10::Slice>(input_shape, const_0, const_1, const_1));
|
||||
auto squeeze = std::make_shared<opset10::Squeeze>(slice, const_0);
|
||||
OutputVector translate_len(NodeContext& context) {
|
||||
num_inputs_check(context, 1, 1);
|
||||
auto const_0 = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {0}));
|
||||
auto const_1 = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {1}));
|
||||
auto input = context.get_input(0);
|
||||
auto input_shape = context.mark_node(std::make_shared<v3::ShapeOf>(input, element::i64));
|
||||
|
||||
auto slice = context.mark_node(std::make_shared<v8::Slice>(input_shape, const_0, const_1, const_1));
|
||||
auto squeeze = std::make_shared<v0::Squeeze>(slice, const_0);
|
||||
return {context.mark_node(squeeze)};
|
||||
};
|
||||
|
||||
|
@ -13,6 +13,7 @@ namespace op {
|
||||
|
||||
OutputVector translate_linear(NodeContext& context) {
|
||||
// schema: aten::linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
|
||||
num_inputs_check(context, 2, 3);
|
||||
auto x = context.get_input(0);
|
||||
auto y = context.get_input(1);
|
||||
auto matmul = context.mark_node(std::make_shared<ov::op::v0::MatMul>(x, y, false, true));
|
||||
|
@ -3,7 +3,8 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/concat.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,19 +12,21 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_list_construct(NodeContext& context) {
|
||||
// Process the case when prim::ListConstruct has all inputs constant
|
||||
ov::OutputVector consts;
|
||||
for (size_t i = 0; i < context.get_input_size(); i++) {
|
||||
auto input = context.get_input_from_visible_context(i);
|
||||
auto c_node = std::dynamic_pointer_cast<opset10::Constant>(input.get_node_shared_ptr());
|
||||
auto c_node = std::dynamic_pointer_cast<v0::Constant>(input.get_node_shared_ptr());
|
||||
FRONT_END_OP_CONVERSION_CHECK(c_node, "Translation for prim::ListConstruct support only constant inputs");
|
||||
if (c_node->get_shape().size() == 0) {
|
||||
c_node = std::make_shared<opset10::Constant>(c_node->get_element_type(), Shape{1}, c_node->get_data_ptr());
|
||||
c_node = std::make_shared<v0::Constant>(c_node->get_element_type(), Shape{1}, c_node->get_data_ptr());
|
||||
}
|
||||
consts.push_back(c_node);
|
||||
}
|
||||
auto list_construct = std::make_shared<opset10::Concat>(consts, 0);
|
||||
auto list_construct = std::make_shared<v0::Concat>(consts, 0);
|
||||
if (list_construct->has_evaluate()) {
|
||||
OutputVector replacements(list_construct->get_output_size());
|
||||
|
||||
|
@ -8,28 +8,33 @@
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/convert.hpp"
|
||||
#include "openvino/op/divide.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_log(NodeContext& context) {
|
||||
// torch.log returns a tensor with the natural logarithm of the elements of input.
|
||||
num_inputs_check(context, 1, 1);
|
||||
auto x = context.get_input(0);
|
||||
x = context.mark_node(std::make_shared<ov::op::v0::Convert>(x, element::f32));
|
||||
auto log = context.mark_node(std::make_shared<ov::op::v0::Log>(x));
|
||||
x = context.mark_node(std::make_shared<v0::Convert>(x, element::f32));
|
||||
auto log = context.mark_node(std::make_shared<v0::Log>(x));
|
||||
return {log};
|
||||
};
|
||||
|
||||
OutputVector translate_log2(NodeContext& context) {
|
||||
// torch.log2 returns a tensor with the logarithm to the base 2 of the elements of input.
|
||||
num_inputs_check(context, 1, 1);
|
||||
auto x = context.get_input(0);
|
||||
auto two = context.mark_node(ov::op::v0::Constant::create(element::f32, Shape{}, {2}));
|
||||
x = context.mark_node(std::make_shared<ov::op::v0::Convert>(x, element::f32));
|
||||
auto log2 = context.mark_node(std::make_shared<ov::op::v0::Log>(two));
|
||||
auto log = context.mark_node(std::make_shared<ov::op::v0::Log>(x));
|
||||
auto res = context.mark_node(std::make_shared<ov::op::v1::Divide>(log, log2));
|
||||
auto two = context.mark_node(v0::Constant::create(element::f32, Shape{}, {2}));
|
||||
x = context.mark_node(std::make_shared<v0::Convert>(x, element::f32));
|
||||
auto log2 = context.mark_node(std::make_shared<v0::Log>(two));
|
||||
auto log = context.mark_node(std::make_shared<v0::Log>(x));
|
||||
auto res = context.mark_node(std::make_shared<v1::Divide>(log, log2));
|
||||
return {res};
|
||||
};
|
||||
|
||||
|
@ -2,8 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/op/loop.hpp"
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -12,41 +13,30 @@ namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_loop(NodeContext& context) {
|
||||
auto loop = std::make_shared<opset10::Loop>(context.get_input(0), context.get_input(1));
|
||||
const auto& inputs = context.inputs();
|
||||
FRONT_END_OP_CONVERSION_CHECK(inputs.size() >= 2, "Loop must have at least 2 inputs.");
|
||||
auto loop = std::make_shared<ov::op::v5::Loop>(inputs[0], inputs[1]);
|
||||
auto decoder = context.get_decoder();
|
||||
FRONT_END_OP_CONVERSION_CHECK(decoder->get_subgraph_size() == 1, "Loop must have 1 subgraph.");
|
||||
auto subgraph_decoder = decoder->get_subgraph_decoder(0);
|
||||
auto body = context.convert_subgraph(0);
|
||||
loop->set_function(body);
|
||||
opset10::Loop::SpecialBodyPorts spec_ports{0, 0};
|
||||
ov::op::v5::Loop::SpecialBodyPorts spec_ports{0, 0};
|
||||
loop->set_special_body_ports(spec_ports);
|
||||
|
||||
auto inputs = subgraph_decoder->inputs();
|
||||
std::set<size_t> input_idxs(inputs.begin(), inputs.end());
|
||||
std::map<size_t, ParameterVector> inputs_map;
|
||||
|
||||
auto body_parameters = body->get_parameters();
|
||||
// #0 parameter is counter
|
||||
for (size_t i = 1; i < body_parameters.size(); i++) {
|
||||
// #0 body parameter is counter; #0 loop input is counter, #1 loop input is condition
|
||||
// Connect other inputs
|
||||
for (size_t i = 2; i < inputs.size(); i++) {
|
||||
loop->set_invariant_inputs(inputs[i], {body_parameters[i - 1]});
|
||||
}
|
||||
// Connect inputs from external context
|
||||
for (auto i = inputs.size() - 1; i < body_parameters.size(); i++) {
|
||||
auto param = body_parameters[i];
|
||||
auto name = param->get_output_tensor(0).get_any_name();
|
||||
size_t input_idx = (size_t)std::stoll(name);
|
||||
if (inputs_map.count(input_idx)) {
|
||||
inputs_map[input_idx] = {param};
|
||||
} else {
|
||||
inputs_map[input_idx].push_back(param);
|
||||
}
|
||||
}
|
||||
for (const auto& input : inputs_map) {
|
||||
if (!input_idxs.count(input.first)) {
|
||||
auto external_output = context.get_tensor_from_model_or_create_input(input.first);
|
||||
loop->set_invariant_inputs(external_output, input.second);
|
||||
} else {
|
||||
auto external_output = context.get_tensor_from_model(input.first);
|
||||
if (external_output.get_node()) {
|
||||
loop->set_invariant_inputs(external_output, input.second);
|
||||
}
|
||||
}
|
||||
auto external_output = context.get_tensor_from_model_or_create_input(input_idx);
|
||||
loop->set_invariant_inputs(external_output, {param});
|
||||
}
|
||||
// TODO: Connect back edges (merged inputs)
|
||||
auto body_results = body->get_results();
|
||||
@ -69,4 +59,4 @@ OutputVector translate_loop(NodeContext& context) {
|
||||
} // namespace op
|
||||
} // namespace pytorch
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
} // namespace ov
|
||||
|
@ -3,7 +3,11 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/broadcast.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/convert.hpp"
|
||||
#include "openvino/op/select.hpp"
|
||||
#include "openvino/op/shape_of.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,15 +15,18 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_masked_fill(NodeContext& context) {
|
||||
num_inputs_check(context, 3, 3);
|
||||
auto data = context.get_input(0);
|
||||
auto mask = context.get_input(1);
|
||||
auto value = context.const_input<float>(2);
|
||||
auto data_shape = context.mark_node(std::make_shared<opset10::ShapeOf>(data));
|
||||
auto value_const = context.mark_node(opset10::Constant::create(element::f32, Shape({}), {value}));
|
||||
auto broadcasted_value = context.mark_node(std::make_shared<opset10::Broadcast>(value_const, data_shape));
|
||||
auto bool_mask = context.mark_node(std::make_shared<opset10::Convert>(mask, element::boolean));
|
||||
return {context.mark_node(std::make_shared<opset10::Select>(bool_mask, broadcasted_value, data))};
|
||||
auto data_shape = context.mark_node(std::make_shared<v3::ShapeOf>(data));
|
||||
auto value_const = context.mark_node(v0::Constant::create(element::f32, Shape({}), {value}));
|
||||
auto broadcasted_value = context.mark_node(std::make_shared<v3::Broadcast>(value_const, data_shape));
|
||||
auto bool_mask = context.mark_node(std::make_shared<v0::Convert>(mask, element::boolean));
|
||||
return {context.mark_node(std::make_shared<v1::Select>(bool_mask, broadcasted_value, data))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -3,7 +3,7 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/max_pool.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,20 +11,18 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_max_poolnd(NodeContext& context) {
|
||||
num_inputs_check(context, 6, 6);
|
||||
auto kernel = context.const_input<Shape>(1);
|
||||
auto strides = context.const_input<Strides>(2);
|
||||
auto pads = context.const_input<Shape>(3); // pytorch supports only symmetric paddings
|
||||
auto dilations = context.const_input<Strides>(4);
|
||||
auto rounding_type = context.const_input<bool>(5) ? ov::op::RoundingType::CEIL : ov::op::RoundingType::FLOOR;
|
||||
auto rounding_type = context.const_input<bool>(5) ? RoundingType::CEIL : RoundingType::FLOOR;
|
||||
|
||||
return {context.mark_node(std::make_shared<opset10::MaxPool>(context.get_input(0),
|
||||
strides,
|
||||
dilations,
|
||||
pads,
|
||||
pads,
|
||||
kernel,
|
||||
rounding_type))};
|
||||
return {context.mark_node(
|
||||
std::make_shared<v8::MaxPool>(context.get_input(0), strides, dilations, pads, pads, kernel, rounding_type))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -3,7 +3,7 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/reduce_mean.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -12,12 +12,13 @@ namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_mean(NodeContext& context) {
|
||||
num_inputs_check(context, 3, 4);
|
||||
auto x = context.get_input(0);
|
||||
auto y = context.get_input(1);
|
||||
auto keep_dims = context.const_input<bool>(2);
|
||||
FRONT_END_OP_CONVERSION_CHECK(context.input_is_none(3),
|
||||
"Only False is supported for input with index 3 for aten::mean");
|
||||
return {context.mark_node(std::make_shared<opset10::ReduceMean>(x, y, keep_dims))};
|
||||
return {context.mark_node(std::make_shared<ov::op::v1::ReduceMean>(x, y, keep_dims))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -11,12 +11,11 @@ namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_meshgrid(NodeContext& context) {
|
||||
OutputVector inputs{context.get_input(0)};
|
||||
std::string indexing = "ij";
|
||||
if (!context.input_is_none(1)) {
|
||||
indexing = context.const_input<std::string>(1);
|
||||
}
|
||||
auto node = std::make_shared<PtFrameworkNode>(context.get_decoder(), inputs);
|
||||
auto node = std::make_shared<PtFrameworkNode>(context.get_decoder(), context.inputs());
|
||||
auto attrs = node->get_attrs();
|
||||
attrs["indexing"] = indexing;
|
||||
node->set_attrs(attrs);
|
||||
|
@ -3,7 +3,14 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/convert.hpp"
|
||||
#include "openvino/op/maximum.hpp"
|
||||
#include "openvino/op/minimum.hpp"
|
||||
#include "openvino/op/reduce_max.hpp"
|
||||
#include "openvino/op/reduce_min.hpp"
|
||||
#include "openvino/op/squeeze.hpp"
|
||||
#include "openvino/op/topk.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,31 +18,33 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_max(NodeContext& context) {
|
||||
// torch.max (same for torch.min) actually has two interfaces smashed together:
|
||||
// torch.max(x, dim, keepdim) and torch.max(x, y)
|
||||
num_inputs_check(context, 1, 3);
|
||||
auto x = context.get_input(0);
|
||||
// torch.max(input)
|
||||
if (context.input_is_none(1) && context.input_is_none(2)) {
|
||||
auto axes = get_axes_range(context, 0);
|
||||
return {context.mark_node(std::make_shared<opset10::ReduceMax>(x, axes, false))};
|
||||
return {context.mark_node(std::make_shared<v1::ReduceMax>(x, axes, false))};
|
||||
}
|
||||
// torch.max(input, other)
|
||||
if (context.input_is_none(2)) {
|
||||
auto y = context.get_input(1);
|
||||
return {context.mark_node(std::make_shared<opset10::Maximum>(x, y))};
|
||||
return {context.mark_node(std::make_shared<v1::Maximum>(x, y))};
|
||||
}
|
||||
// torch.max(input, dim, keepdim), returns values and indicies
|
||||
auto axes_node = context.get_input(1);
|
||||
auto axis_const = context.const_input<int64_t>(1);
|
||||
auto keepdims = context.const_input<bool>(2);
|
||||
auto values = context.mark_node(std::make_shared<opset10::ReduceMax>(x, axes_node, keepdims));
|
||||
auto k = context.mark_node(std::make_shared<opset10::Constant>(element::i64, Shape{}, 1));
|
||||
auto topk =
|
||||
std::make_shared<opset10::TopK>(x, k, axis_const, opset10::TopK::Mode::MAX, opset10::TopK::SortType::NONE);
|
||||
auto indicies = context.mark_node(std::make_shared<opset10::Convert>(topk->output(1), element::i64));
|
||||
auto values = context.mark_node(std::make_shared<v1::ReduceMax>(x, axes_node, keepdims));
|
||||
auto k = context.mark_node(std::make_shared<v0::Constant>(element::i64, Shape{}, 1));
|
||||
auto topk = std::make_shared<v3::TopK>(x, k, axis_const, v3::TopK::Mode::MAX, v3::TopK::SortType::NONE);
|
||||
auto indicies = context.mark_node(std::make_shared<v0::Convert>(topk->output(1), element::i64));
|
||||
if (!keepdims) {
|
||||
indicies = std::make_shared<opset10::Squeeze>(indicies, axes_node);
|
||||
indicies = std::make_shared<v0::Squeeze>(indicies, axes_node);
|
||||
}
|
||||
return {values, indicies};
|
||||
};
|
||||
@ -43,29 +52,28 @@ OutputVector translate_max(NodeContext& context) {
|
||||
OutputVector translate_min(NodeContext& context) {
|
||||
// torch.min (same for torch.max) actually has two interfaces smashed together:
|
||||
// torch.min(x, dim, keepdim) and torch.min(x, y)
|
||||
num_inputs_check(context, 1, 3);
|
||||
auto x = context.get_input(0);
|
||||
// torch.min(input)
|
||||
if (context.input_is_none(1) && context.input_is_none(2)) {
|
||||
auto axes = get_axes_range(context, 0);
|
||||
return {context.mark_node(std::make_shared<opset10::ReduceMin>(x, axes, false))};
|
||||
return {context.mark_node(std::make_shared<v1::ReduceMin>(x, axes, false))};
|
||||
}
|
||||
// torch.min(input, other)
|
||||
if (context.input_is_none(2)) {
|
||||
auto y = context.get_input(1);
|
||||
return {context.mark_node(std::make_shared<opset10::Minimum>(x, y))};
|
||||
return {context.mark_node(std::make_shared<v1::Minimum>(x, y))};
|
||||
}
|
||||
// torch.min(input, dim, keepdim), returns values and indicies
|
||||
auto axes_node = context.get_input(1);
|
||||
auto axis_const = context.const_input<int64_t>(1);
|
||||
auto keepdims = context.const_input<bool>(2);
|
||||
auto values = context.mark_node(std::make_shared<opset10::ReduceMin>(x, axes_node, keepdims));
|
||||
auto k = context.mark_node(std::make_shared<opset10::Constant>(element::i64, Shape{}, 1));
|
||||
auto topk =
|
||||
std::make_shared<opset10::TopK>(x, k, axis_const, opset10::TopK::Mode::MIN, opset10::TopK::SortType::NONE);
|
||||
auto indicies = context.mark_node(std::make_shared<opset10::Convert>(topk->output(1), element::i64));
|
||||
|
||||
auto values = context.mark_node(std::make_shared<v1::ReduceMin>(x, axes_node, keepdims));
|
||||
auto k = context.mark_node(std::make_shared<v0::Constant>(element::i64, Shape{}, 1));
|
||||
auto topk = std::make_shared<v3::TopK>(x, k, axis_const, v3::TopK::Mode::MIN, v3::TopK::SortType::NONE);
|
||||
auto indicies = context.mark_node(std::make_shared<v0::Convert>(topk->output(1), element::i64));
|
||||
if (!keepdims) {
|
||||
indicies = std::make_shared<opset10::Squeeze>(indicies, axes_node);
|
||||
indicies = std::make_shared<v0::Squeeze>(indicies, axes_node);
|
||||
}
|
||||
return {values, indicies};
|
||||
};
|
||||
|
@ -3,7 +3,9 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/convert_like.hpp"
|
||||
#include "openvino/op/multiply.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,11 +13,14 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_neg(NodeContext& context) {
|
||||
num_inputs_check(context, 1, 1);
|
||||
auto x = context.get_input(0);
|
||||
auto const_neg_1 = context.mark_node(opset10::Constant::create(element::i32, Shape{}, {-1}));
|
||||
auto cast = context.mark_node(std::make_shared<opset10::ConvertLike>(const_neg_1, x));
|
||||
return {context.mark_node(std::make_shared<opset10::Multiply>(x, cast))};
|
||||
auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {-1}));
|
||||
auto cast = context.mark_node(std::make_shared<v1::ConvertLike>(const_neg_1, x));
|
||||
return {context.mark_node(std::make_shared<v1::Multiply>(x, cast))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -3,7 +3,12 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset9.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/gather.hpp"
|
||||
#include "openvino/op/non_max_suppression.hpp"
|
||||
#include "openvino/op/reshape.hpp"
|
||||
#include "openvino/op/squeeze.hpp"
|
||||
#include "openvino/op/unsqueeze.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,27 +16,29 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_nms(NodeContext& context) {
|
||||
auto const_0 = context.mark_node(opset9::Constant::create(element::i64, Shape{}, {0}));
|
||||
auto const_1 = context.mark_node(opset9::Constant::create(element::i64, Shape{}, {1}));
|
||||
auto const_2 = context.mark_node(opset9::Constant::create(element::i64, Shape{1}, {2}));
|
||||
// the shape that is required by PyTorch operator differs from the shape required in OpenVino
|
||||
auto boxes_shape = context.mark_node(opset9::Constant::create(element::i64, Shape{3}, {1, -1, 4}));
|
||||
using namespace ov::op;
|
||||
|
||||
auto boxes = context.mark_node(std::make_shared<opset9::Reshape>(context.get_input(0), boxes_shape, false));
|
||||
OutputVector translate_nms(NodeContext& context) {
|
||||
num_inputs_check(context, 3, 3);
|
||||
auto const_0 = context.mark_node(v0::Constant::create(element::i64, Shape{}, {0}));
|
||||
auto const_1 = context.mark_node(v0::Constant::create(element::i64, Shape{}, {1}));
|
||||
auto const_2 = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {2}));
|
||||
// the shape that is required by PyTorch operator differs from the shape required in OpenVino
|
||||
auto boxes_shape = context.mark_node(v0::Constant::create(element::i64, Shape{3}, {1, -1, 4}));
|
||||
|
||||
auto boxes = context.mark_node(std::make_shared<v1::Reshape>(context.get_input(0), boxes_shape, false));
|
||||
// Unsqueeze operator is also used to align shapes required by PyTorch and OpenVino
|
||||
auto axis_01 = context.mark_node(opset9::Constant::create(element::i64, Shape{2}, {0, 1}));
|
||||
auto scores = context.mark_node(std::make_shared<opset9::Unsqueeze>(context.get_input(1), axis_01));
|
||||
auto axis_01 = context.mark_node(v0::Constant::create(element::i64, Shape{2}, {0, 1}));
|
||||
auto scores = context.mark_node(std::make_shared<v0::Unsqueeze>(context.get_input(1), axis_01));
|
||||
auto max_output_per_class =
|
||||
context.mark_node(opset9::Constant::create(element::i64, Shape{1}, {std::numeric_limits<int64_t>::max()}));
|
||||
context.mark_node(v0::Constant::create(element::i64, Shape{1}, {std::numeric_limits<int64_t>::max()}));
|
||||
auto iou_threshold = context.get_input(2);
|
||||
|
||||
auto nms_out = context.mark_node(
|
||||
std::make_shared<opset9::NonMaxSuppression>(boxes, scores, max_output_per_class, iou_threshold));
|
||||
auto select = context.mark_node(std::make_shared<opset9::Gather>(nms_out, const_2, const_1));
|
||||
auto squeeze = std::make_shared<opset9::Squeeze>(select, const_1);
|
||||
auto nms_out =
|
||||
context.mark_node(std::make_shared<v9::NonMaxSuppression>(boxes, scores, max_output_per_class, iou_threshold));
|
||||
auto select = context.mark_node(std::make_shared<v8::Gather>(nms_out, const_2, const_1));
|
||||
|
||||
return {context.mark_node(squeeze)};
|
||||
return {context.mark_node(std::make_shared<v0::Squeeze>(select, const_1))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -3,7 +3,9 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/non_zero.hpp"
|
||||
#include "openvino/op/transpose.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,11 +13,14 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_nonzero(NodeContext& context) {
|
||||
num_inputs_check(context, 1, 1);
|
||||
auto cond = context.get_input(0);
|
||||
auto non_zero = context.mark_node(std::make_shared<opset10::NonZero>(cond));
|
||||
auto input_order = context.mark_node(opset10::Constant::create(element::i64, Shape{2}, {1, 0}));
|
||||
return {context.mark_node(std::make_shared<opset10::Transpose>(non_zero, input_order))};
|
||||
auto non_zero = context.mark_node(std::make_shared<v3::NonZero>(cond));
|
||||
auto input_order = context.mark_node(v0::Constant::create(element::i64, Shape{2}, {1, 0}));
|
||||
return {context.mark_node(std::make_shared<v1::Transpose>(non_zero, input_order))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -3,7 +3,14 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/abs.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/power.hpp"
|
||||
#include "openvino/op/reduce_l1.hpp"
|
||||
#include "openvino/op/reduce_l2.hpp"
|
||||
#include "openvino/op/reduce_max.hpp"
|
||||
#include "openvino/op/reduce_min.hpp"
|
||||
#include "openvino/op/reduce_sum.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -11,39 +18,35 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_norm(NodeContext& context) {
|
||||
num_inputs_check(context, 4, 4);
|
||||
auto input_tensor = context.get_input(0);
|
||||
auto p = context.const_input<float>(1);
|
||||
auto dim = context.get_input(2);
|
||||
auto keep_dim = context.const_input<bool>(3);
|
||||
|
||||
OutputVector res;
|
||||
|
||||
Output<Node> res;
|
||||
if (p == 1) {
|
||||
auto reduce_l1 = context.mark_node(std::make_shared<opset10::ReduceL1>(input_tensor, dim, keep_dim));
|
||||
res.push_back(reduce_l1);
|
||||
res = context.mark_node(std::make_shared<v4::ReduceL1>(input_tensor, dim, keep_dim));
|
||||
} else if (p == 2) {
|
||||
auto reduce_l2 = context.mark_node(std::make_shared<opset10::ReduceL2>(input_tensor, dim, keep_dim));
|
||||
res.push_back(reduce_l2);
|
||||
res = context.mark_node(std::make_shared<v4::ReduceL2>(input_tensor, dim, keep_dim));
|
||||
} else if (p == std::numeric_limits<float>::infinity()) {
|
||||
auto abs = context.mark_node(std::make_shared<opset10::Abs>(input_tensor));
|
||||
auto max = context.mark_node(std::make_shared<opset10::ReduceMax>(abs, dim, keep_dim));
|
||||
res.push_back(max);
|
||||
auto abs = context.mark_node(std::make_shared<v0::Abs>(input_tensor));
|
||||
res = context.mark_node(std::make_shared<v1::ReduceMax>(abs, dim, keep_dim));
|
||||
} else if (p == -std::numeric_limits<float>::infinity()) {
|
||||
auto abs = context.mark_node(std::make_shared<opset10::Abs>(input_tensor));
|
||||
auto min = context.mark_node(std::make_shared<opset10::ReduceMin>(abs, dim, keep_dim));
|
||||
res.push_back(min);
|
||||
auto abs = context.mark_node(std::make_shared<v0::Abs>(input_tensor));
|
||||
res = context.mark_node(std::make_shared<v1::ReduceMin>(abs, dim, keep_dim));
|
||||
} else {
|
||||
auto const_p = context.mark_node(opset10::Constant::create(element::f64, Shape{1}, {p}));
|
||||
auto const_p_inv = context.mark_node(opset10::Constant::create(element::f64, Shape{1}, {1.0 / p}));
|
||||
auto abs = context.mark_node(std::make_shared<opset10::Abs>(input_tensor));
|
||||
auto pow = context.mark_node(std::make_shared<opset10::Power>(abs, const_p));
|
||||
auto sum = context.mark_node(std::make_shared<opset10::ReduceSum>(pow, dim, keep_dim));
|
||||
auto pow_inv = context.mark_node(std::make_shared<opset10::Power>(sum, const_p_inv));
|
||||
res.push_back(pow_inv);
|
||||
auto const_p = context.mark_node(v0::Constant::create(element::f64, Shape{1}, {p}));
|
||||
auto const_p_inv = context.mark_node(v0::Constant::create(element::f64, Shape{1}, {1.0 / p}));
|
||||
auto abs = context.mark_node(std::make_shared<v0::Abs>(input_tensor));
|
||||
auto pow = context.mark_node(std::make_shared<v1::Power>(abs, const_p));
|
||||
auto sum = context.mark_node(std::make_shared<v1::ReduceSum>(pow, dim, keep_dim));
|
||||
res = context.mark_node(std::make_shared<v1::Power>(sum, const_p_inv));
|
||||
}
|
||||
|
||||
return res;
|
||||
return {res};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -3,7 +3,6 @@
|
||||
//
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -12,7 +11,8 @@ namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_numel(NodeContext& context) {
|
||||
return {numel(context, 0)};
|
||||
num_inputs_check(context, 1, 1);
|
||||
return {numel(context, context.get_input(0))};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -2,9 +2,16 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/op/pad.hpp"
|
||||
|
||||
#include "openvino/core/coordinate_diff.hpp"
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/op/broadcast.hpp"
|
||||
#include "openvino/op/concat.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/gather.hpp"
|
||||
#include "openvino/op/slice.hpp"
|
||||
#include "openvino/op/subtract.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -12,15 +19,17 @@ namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_pad(NodeContext& context) {
|
||||
num_inputs_check(context, 2, 4);
|
||||
auto data = context.get_input(0);
|
||||
auto paddings = context.const_input<std::vector<int64_t>>(1);
|
||||
std::string mode = "constant";
|
||||
auto shape = context.mark_node(std::make_shared<opset10::ShapeOf>(data, element::i32));
|
||||
auto rank = context.mark_node(std::make_shared<opset10::ShapeOf>(shape, element::i32));
|
||||
auto reduced_rank = context.mark_node(std::make_shared<opset10::Squeeze>(rank));
|
||||
auto zero = context.mark_node(opset10::Constant::create(element::i32, Shape{}, {0}));
|
||||
auto zero_f = context.mark_node(opset10::Constant::create(element::f32, Shape{}, {0}));
|
||||
Output<Node> shape;
|
||||
Output<Node> rank;
|
||||
std::tie(shape, rank) = get_shape_rank(context, data);
|
||||
auto zero = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0}));
|
||||
size_t pad_size_half = paddings.size() / 2;
|
||||
std::vector<int64_t> pad_b(pad_size_half, 0);
|
||||
std::vector<int64_t> pad_e(pad_size_half, 0);
|
||||
@ -28,15 +37,13 @@ OutputVector translate_pad(NodeContext& context) {
|
||||
pad_b[i] = paddings[paddings.size() - 2 - 2 * i];
|
||||
pad_e[i] = paddings[paddings.size() - 1 - 2 * i];
|
||||
}
|
||||
auto pads_begin_short = context.mark_node(opset10::Constant::create(element::i32, Shape{pad_size_half}, pad_b));
|
||||
auto pads_end_short = context.mark_node(opset10::Constant::create(element::i32, Shape{pad_size_half}, pad_e));
|
||||
auto pads_short_len = context.mark_node(opset10::Constant::create(element::i32, Shape{1}, {pad_size_half}));
|
||||
auto pads_diff = context.mark_node(std::make_shared<opset10::Subtract>(rank, pads_short_len));
|
||||
auto pads_remaining = context.mark_node(std::make_shared<opset10::Broadcast>(zero, pads_diff));
|
||||
auto pads_begins =
|
||||
context.mark_node(std::make_shared<opset10::Concat>(NodeVector{pads_remaining, pads_begin_short}, 0));
|
||||
auto pads_ends =
|
||||
context.mark_node(std::make_shared<opset10::Concat>(NodeVector{pads_remaining, pads_end_short}, 0));
|
||||
auto pads_begin_short = context.mark_node(v0::Constant::create(element::i32, Shape{pad_size_half}, pad_b));
|
||||
auto pads_end_short = context.mark_node(v0::Constant::create(element::i32, Shape{pad_size_half}, pad_e));
|
||||
auto pads_short_len = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {pad_size_half}));
|
||||
auto pads_diff = context.mark_node(std::make_shared<v1::Subtract>(rank, pads_short_len));
|
||||
auto pads_remaining = context.mark_node(std::make_shared<v3::Broadcast>(zero, pads_diff));
|
||||
auto pads_begins = context.mark_node(std::make_shared<v0::Concat>(NodeVector{pads_remaining, pads_begin_short}, 0));
|
||||
auto pads_ends = context.mark_node(std::make_shared<v0::Concat>(NodeVector{pads_remaining, pads_end_short}, 0));
|
||||
if (!context.input_is_none(2)) {
|
||||
mode = context.const_input<std::string>(2);
|
||||
}
|
||||
@ -45,64 +52,54 @@ OutputVector translate_pad(NodeContext& context) {
|
||||
int64_t pad_r;
|
||||
auto pad_last_id = paddings.size();
|
||||
auto cur = data.get_node_shared_ptr();
|
||||
auto step = context.mark_node(opset10::Constant::create(element::i64, Shape{1}, {1}));
|
||||
auto step = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {1}));
|
||||
auto zero_1d = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0}));
|
||||
for (size_t i = 0; i < pad_size_half; i++) {
|
||||
ov::NodeVector tensors;
|
||||
pad_r = paddings[pad_last_id - (2 * i + 1)];
|
||||
pad_l = paddings[pad_last_id - (2 * i + 2)];
|
||||
auto axes = context.mark_node(opset10::Constant::create(element::i64, Shape{1}, {2 + i}));
|
||||
auto axes = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {2 + i}));
|
||||
if (pad_l > 0) {
|
||||
auto start =
|
||||
context.mark_node(context.mark_node(opset10::Constant::create(element::i64, Shape{1}, {-pad_l})));
|
||||
auto end = context.mark_node(std::make_shared<opset10::Gather>(
|
||||
shape,
|
||||
context.mark_node(opset10::Constant::create(element::i64, Shape{1}, {2 + i})),
|
||||
context.mark_node(opset10::Constant::create(element::i64, Shape{1}, {0}))));
|
||||
auto start = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {-pad_l}));
|
||||
auto end = context.mark_node(std::make_shared<v8::Gather>(shape, axes, zero_1d));
|
||||
|
||||
auto left = context.mark_node(std::make_shared<opset10::Slice>(cur, start, end, step, axes));
|
||||
auto left = context.mark_node(std::make_shared<v8::Slice>(cur, start, end, step, axes));
|
||||
tensors.push_back(left);
|
||||
}
|
||||
if (pad_l < 0 || pad_r < 0) {
|
||||
auto start = context.mark_node(
|
||||
context.mark_node(opset10::Constant::create(element::i64, Shape{1}, {pad_l < 0 ? -pad_l : 0})));
|
||||
auto end = context.mark_node(
|
||||
context.mark_node(opset10::Constant::create(element::i64, Shape{1}, {pad_r < 0 ? pad_r : 0})));
|
||||
auto middle = context.mark_node(std::make_shared<opset10::Slice>(cur, start, end, step, axes));
|
||||
auto start = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {pad_l < 0 ? -pad_l : 0}));
|
||||
auto end = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {pad_r < 0 ? pad_r : 0}));
|
||||
auto middle = context.mark_node(std::make_shared<v8::Slice>(cur, start, end, step, axes));
|
||||
tensors.push_back(middle);
|
||||
} else {
|
||||
tensors.push_back(cur);
|
||||
}
|
||||
if (pad_r > 0) {
|
||||
auto start = context.mark_node(opset10::Constant::create(element::i64, Shape{1}, {0}));
|
||||
auto end = context.mark_node(opset10::Constant::create(element::i64, Shape{1}, {pad_r}));
|
||||
auto right = context.mark_node(std::make_shared<opset10::Slice>(cur, start, end, step, axes));
|
||||
auto end = context.mark_node(v0::Constant::create(element::i64, Shape{1}, {pad_r}));
|
||||
auto right = context.mark_node(std::make_shared<v8::Slice>(cur, zero_1d, end, step, axes));
|
||||
tensors.push_back(right);
|
||||
}
|
||||
if (tensors.size()) {
|
||||
cur = context.mark_node(std::make_shared<opset10::Concat>(tensors, 2 + i));
|
||||
cur = context.mark_node(std::make_shared<v0::Concat>(tensors, 2 + i));
|
||||
}
|
||||
}
|
||||
return {cur};
|
||||
}
|
||||
if (mode == "constant") {
|
||||
if (!context.input_is_none(3)) {
|
||||
auto pad_value = context.get_input(3);
|
||||
return {context.mark_node(
|
||||
std::make_shared<opset10::Pad>(data, pads_begins, pads_ends, pad_value, ov::op::PadMode::CONSTANT))};
|
||||
}
|
||||
return {context.mark_node(
|
||||
std::make_shared<opset10::Pad>(data, pads_begins, pads_ends, zero_f, ov::op::PadMode::CONSTANT))};
|
||||
const std::map<std::string, PadMode> pt_to_ov_pad{
|
||||
{"constant", PadMode::CONSTANT},
|
||||
{"reflect", PadMode::REFLECT},
|
||||
{"replicate", PadMode::EDGE},
|
||||
};
|
||||
Output<Node> pad_value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0}));
|
||||
if (mode == "constant" && !context.input_is_none(3)) {
|
||||
pad_value = context.get_input(3);
|
||||
}
|
||||
if (mode == "reflect") {
|
||||
return {context.mark_node(
|
||||
std::make_shared<opset10::Pad>(data, pads_begins, pads_ends, zero_f, ov::op::PadMode::REFLECT))};
|
||||
}
|
||||
if (mode == "replicate") {
|
||||
return {context.mark_node(
|
||||
std::make_shared<opset10::Pad>(data, pads_begins, pads_ends, zero_f, ov::op::PadMode::EDGE))};
|
||||
}
|
||||
|
||||
FRONT_END_OP_CONVERSION_CHECK(false, "aten::pad conversion doesn't support [ " + mode + " ] padding mode");
|
||||
auto ov_mode = pt_to_ov_pad.find(mode);
|
||||
FRONT_END_OP_CONVERSION_CHECK(ov_mode != pt_to_ov_pad.end(),
|
||||
"aten::pad conversion doesn't support [ ",
|
||||
mode,
|
||||
" ] padding mode");
|
||||
return {context.mark_node(std::make_shared<v1::Pad>(data, pads_begins, pads_ends, pad_value, ov_mode->second))};
|
||||
}
|
||||
|
||||
} // namespace op
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user