Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Steve Yoo 2021-05-18 08:46:00 +09:00
commit e07337d533
723 changed files with 18992 additions and 13364 deletions

View File

@ -125,6 +125,7 @@ jobs:
displayName: 'nGraph UT' displayName: 'nGraph UT'
continueOnError: false continueOnError: false
# python3 $(WORK_DIR)/gtest-parallel/gtest_parallel.py $(BIN_DIR)/InferenceEngineUnitTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=InferenceEngineUnitTests.json --gtest_filter=*smoke* -- --gtest_print_time=1
- script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml - script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
displayName: 'IE UT old' displayName: 'IE UT old'
continueOnError: false continueOnError: false
@ -161,14 +162,6 @@ jobs:
displayName: 'CPU FuncTests' displayName: 'CPU FuncTests'
continueOnError: false continueOnError: false
- script: |
export DATA_PATH=$(MODELS_PATH)
export MODELS_PATH=$(MODELS_PATH)
python3 $(WORK_DIR)/gtest-parallel/gtest-parallel $(BIN_DIR)/MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=*smoke* -- --gtest_print_time=1
workingDirectory: $(WORK_DIR)
displayName: 'MklDnnFunctionalTests'
continueOnError: false
- script: | - script: |
export DATA_PATH=$(MODELS_PATH) export DATA_PATH=$(MODELS_PATH)
export MODELS_PATH=$(MODELS_PATH) export MODELS_PATH=$(MODELS_PATH)

View File

@ -105,7 +105,7 @@ jobs:
workingDirectory: $(BUILD_DIR) workingDirectory: $(BUILD_DIR)
displayName: 'Install' displayName: 'Install'
- script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru* --gtest_output=xml:TEST-NGraphUT.xml - script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru*:IE_CPU.exp_* --gtest_output=xml:TEST-NGraphUT.xml
displayName: 'nGraph UT' displayName: 'nGraph UT'
continueOnError: false continueOnError: false
@ -137,14 +137,6 @@ jobs:
displayName: 'CPU FuncTests' displayName: 'CPU FuncTests'
continueOnError: false continueOnError: false
- script: |
export DATA_PATH=$(MODELS_PATH)
export MODELS_PATH=$(MODELS_PATH)
python3 $(WORK_DIR)/gtest-parallel/gtest-parallel $(BIN_DIR)/MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=*smoke*:-smoke_MobileNet/ModelTransformationsTest.LPT/mobilenet_v2_tf_depthwise_batch1_inPluginDisabled_inTestDisabled_asymmetric* -- --gtest_print_time=1
workingDirectory: $(WORK_DIR)
displayName: 'MklDnnFunctionalTests'
continueOnError: false
- script: | - script: |
export DATA_PATH=$(MODELS_PATH) export DATA_PATH=$(MODELS_PATH)
export MODELS_PATH=$(MODELS_PATH) export MODELS_PATH=$(MODELS_PATH)

View File

@ -167,17 +167,6 @@ jobs:
displayName: 'CPU FuncTests - IB' displayName: 'CPU FuncTests - IB'
continueOnError: false continueOnError: false
# Add for gtest-parallel, it hangs now (CVS-33386)
#python $(WORK_DIR)\gtest-parallel\gtest-parallel $(BIN_DIR)\MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=*smoke* -- --gtest_print_time=1
- script: |
set PATH=$(TEST_ENV_PATH)
set DATA_PATH=$(MODELS_PATH)
set MODELS_PATH=$(MODELS_PATH)
rem "$(IB_TESTCONSOLE)" $(BIN_DIR)\MklDnnFunctionalTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-MklDnnFunctionalTests-IB.xml
$(BIN_DIR)\MklDnnFunctionalTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-MklDnnFunctionalTests.xml
displayName: 'MklDnnFunctionalTests'
continueOnError: false
- script: | - script: |
set PATH=$(TEST_ENV_PATH) set PATH=$(TEST_ENV_PATH)
set DATA_PATH=$(MODELS_PATH) set DATA_PATH=$(MODELS_PATH)

View File

@ -104,6 +104,7 @@ When specifying key values as raw strings (that is, when using Python API), omit
| Parameter Name | Parameter Values | Default | Description | | Parameter Name | Parameter Values | Default | Description |
|---------------------|-----------------------------|-----------------|-----------------------------------------------------------| |---------------------|-----------------------------|-----------------|-----------------------------------------------------------|
| `KEY_CACHE_DIR` | `"<cache_dir>"` | `""` | Specifies a directory where compiled OCL binaries can be cached. First model loading generates the cache, and all subsequent LoadNetwork calls use precompiled kernels which significantly improves load time. If empty - caching is disabled |
| `KEY_PERF_COUNT` | `YES` / `NO` | `NO` | Collect performance counters during inference | | `KEY_PERF_COUNT` | `YES` / `NO` | `NO` | Collect performance counters during inference |
| `KEY_CONFIG_FILE` | `"<file1> [<file2> ...]"` | `""` | Load custom layer configuration files | | `KEY_CONFIG_FILE` | `"<file1> [<file2> ...]"` | `""` | Load custom layer configuration files |
| `KEY_DUMP_KERNELS` | `YES` / `NO` | `NO` | Dump the final kernels used for custom layers | | `KEY_DUMP_KERNELS` | `YES` / `NO` | `NO` | Dump the final kernels used for custom layers |
@ -115,7 +116,7 @@ When specifying key values as raw strings (that is, when using Python API), omit
| `KEY_CLDNN_SOURCES_DUMPS_DIR` | `"<dump_dir>"` | `""` | Final optimized clDNN OpenCL sources dump output directory | | `KEY_CLDNN_SOURCES_DUMPS_DIR` | `"<dump_dir>"` | `""` | Final optimized clDNN OpenCL sources dump output directory |
| `KEY_GPU_THROUGHPUT_STREAMS` | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_CLDNN_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behavior.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. | | `KEY_GPU_THROUGHPUT_STREAMS` | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_CLDNN_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behavior.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. |
| `KEY_EXCLUSIVE_ASYNC_REQUESTS` | `YES` / `NO` | `NO` | Forces async requests (also from different executable networks) to execute serially.| | `KEY_EXCLUSIVE_ASYNC_REQUESTS` | `YES` / `NO` | `NO` | Forces async requests (also from different executable networks) to execute serially.|
| `KEY_CLDNN_MAX_NUM_THREADS` | `integer value` | `maximum # of HW threads available in host environment` | Specifies the number of CPU threads that can be used for clDNN engine, e.g, JIT compilation of clDNN kernels or clDNN cpu kernel processing. The default value is set as the number of maximum available threads in host environment to minimize the time for LoadNetwork, where the clDNN kernel build time occupies a large portion. Note that if the specified value is larger than the maximum available # of threads or less than zero, it is set as maximum available # of threads. It can be specified with a smaller number than the available HW threads according to the usage scenario, e.g., when the user wants to assign more CPU threads while clDNN plugin is running. Note that setting this value with lower number will affect not only the network loading time but also the cpu layers of clDNN networks that are optimized with multi-threading. |
## Note on Debug Capabilities of the GPU Plugin ## Note on Debug Capabilities of the GPU Plugin
Inference Engine GPU plugin provides possibility to dump the user custom OpenCL&trade; kernels to a file to allow you to properly debug compilation issues in your custom kernels. Inference Engine GPU plugin provides possibility to dump the user custom OpenCL&trade; kernels to a file to allow you to properly debug compilation issues in your custom kernels.

View File

@ -35,7 +35,7 @@ Thus we can define:
**Note**: During the quantization process the values `input_low`, `input_high`, `output_low`, `output_high` are selected so that to map a floating-point zero exactly to an integer value (zero-point) and vice versa. **Note**: During the quantization process the values `input_low`, `input_high`, `output_low`, `output_high` are selected so that to map a floating-point zero exactly to an integer value (zero-point) and vice versa.
## Quantization specifics and restrictions ## Quantization specifics and restrictions
In general, OpenVINO can represent and execute quantized models from different sources. However, the Post-training Optimization Toolkit (POT) In general, OpenVINO can represent and execute quantized models from different sources. However, the Post-training Optimization Tool (POT)
is considered the default way to get optimized models. Since the POT supports HW-aware quantization it means that specific rules can be implemented in it for is considered the default way to get optimized models. Since the POT supports HW-aware quantization it means that specific rules can be implemented in it for
the particular HW. However, it is reasonable to have compatibility with general-purpose HW such as CPU and GPU and support their quantization schemes. the particular HW. However, it is reasonable to have compatibility with general-purpose HW such as CPU and GPU and support their quantization schemes.
Below we define these rules as follows: Below we define these rules as follows:

View File

@ -122,10 +122,12 @@ virtualenv -p /usr/bin/python3.6 .env3 --system-site-packages
virtualenv -p /usr/bin/python3.6 .env3/bin/activate virtualenv -p /usr/bin/python3.6 .env3/bin/activate
``` ```
3. Install all dependencies or only the dependencies for a specific framework: 3. Install all dependencies or only the dependencies for a specific framework:
* To install dependencies for all frameworks except TensorFlow* 2.x: * To install dependencies for all frameworks except TensorFlow* 1.x:
```shell ```shell
pip3 install -r requirements.txt pip3 install -r requirements.txt
``` ```
> **NOTE**: TensorFlow 1.x and 2.x are incompatible. Use separate virtual environments if you want to install multiple TensorFlow versions.
* To install dependencies only for Caffe: * To install dependencies only for Caffe:
```shell ```shell
pip3 install -r requirements_caffe.txt pip3 install -r requirements_caffe.txt

View File

@ -0,0 +1,107 @@
# Convert PyTorch\* RNN-T Model to the Intermediate Representation (IR) {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT}
This instruction covers conversion of RNN-T model from [MLCommons](https://github.com/mlcommons) repository. Follow
the steps below to export a PyTorch* model into ONNX* before converting it to IR:
**Step 1**. Clone RNN-T PyTorch implementation from MLCommons repository (revision r1.0). Make a shallow clone to pull
only RNN-T model without full repository. If you already have a full repository, skip this and go to **Step 2**:
```bash
git clone -b r1.0 -n https://github.com/mlcommons/inference rnnt_for_openvino --depth 1
cd rnnt_for_openvino
git checkout HEAD speech_recognition/rnnt
```
**Step 2**. If you already have a full clone of MLCommons inference repository, create a folder for
pretrained PyTorch model, where conversion into IR will take place. You will also need to specify the path to
your full clone at **Step 5**. Skip this step if you have a shallow clone.
```bash
mkdir rnnt_for_openvino
cd rnnt_for_openvino
```
**Step 3**. Download pretrained weights for PyTorch implementation from https://zenodo.org/record/3662521#.YG21DugzZaQ.
For UNIX*-like systems you can use wget:
```bash
wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt
```
The link was taken from `setup.sh` in the `speech_recoginitin/rnnt` subfolder. You will get exactly the same weights as
if you were following the steps from https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt.
**Step 4**. Install required python* packages:
```bash
pip3 install torch toml
```
**Step 5**. Export RNN-T model into ONNX with the script below. Copy the code below into a file named
`export_rnnt_to_onnx.py` and run it in the current directory `rnnt_for_openvino`:
> **NOTE**: If you already have a full clone of MLCommons inference repository, you need to
> specify `mlcommons_inference_path` variable.
```python
import toml
import torch
import sys
def load_and_migrate_checkpoint(ckpt_path):
checkpoint = torch.load(ckpt_path, map_location="cpu")
migrated_state_dict = {}
for key, value in checkpoint['state_dict'].items():
key = key.replace("joint_net", "joint.net")
migrated_state_dict[key] = value
del migrated_state_dict["audio_preprocessor.featurizer.fb"]
del migrated_state_dict["audio_preprocessor.featurizer.window"]
return migrated_state_dict
mlcommons_inference_path = './' # specify relative path for MLCommons inferene
checkpoint_path = 'DistributedDataParallel_1576581068.9962234-epoch-100.pt'
config_toml = 'speech_recognition/rnnt/pytorch/configs/rnnt.toml'
config = toml.load(config_toml)
rnnt_vocab = config['labels']['labels']
sys.path.insert(0, mlcommons_inference_path + 'speech_recognition/rnnt/pytorch')
from model_separable_rnnt import RNNT
model = RNNT(config['rnnt'], len(rnnt_vocab) + 1, feature_config=config['input_eval'])
model.load_state_dict(load_and_migrate_checkpoint(checkpoint_path))
seq_length, batch_size, feature_length = 157, 1, 240
inp = torch.randn([seq_length, batch_size, feature_length])
feature_length = torch.LongTensor([seq_length])
x_padded, x_lens = model.encoder(inp, feature_length)
torch.onnx.export(model.encoder, (inp, feature_length), "rnnt_encoder.onnx", opset_version=12,
input_names=['input.1', '1'], dynamic_axes={'input.1': {0: 'seq_len', 1: 'batch'}})
symbol = torch.LongTensor([[20]])
hidden = torch.randn([2, batch_size, 320]), torch.randn([2, batch_size, 320])
g, hidden = model.prediction.forward(symbol, hidden)
torch.onnx.export(model.prediction, (symbol, hidden), "rnnt_prediction.onnx", opset_version=12,
input_names=['input.1', '1', '2'],
dynamic_axes={'input.1': {0: 'batch'}, '1': {1: 'batch'}, '2': {1: 'batch'}})
f = torch.randn([batch_size, 1, 1024])
model.joint.forward(f, g)
torch.onnx.export(model.joint, (f, g), "rnnt_joint.onnx", opset_version=12,
input_names=['0', '1'], dynamic_axes={'0': {0: 'batch'}, '1': {0: 'batch'}})
```
```bash
python3 export_rnnt_to_onnx.py
```
After completing this step, the files rnnt_encoder.onnx, rnnt_prediction.onnx, and rnnt_joint.onnx will be saved in
the current directory.
**Step 6**. Run the conversion command:
```bash
python3 {path_to_openvino}/mo.py --input_model rnnt_encoder.onnx --input "input.1[157 1 240],1->157"
python3 {path_to_openvino}/mo.py --input_model rnnt_prediction.onnx --input "input.1[1 1],1[2 1 320],2[2 1 320]"
python3 {path_to_openvino}/mo.py --input_model rnnt_joint.onnx --input "0[1 1 1024],1[1 1 320]"
```
Please note that hardcoded value for sequence length = 157 was taken from the MLCommons, but conversion to IR preserves
network [reshapeability](../../../../IE_DG/ShapeInference.md); this means you can change input shapes manually to any value either during conversion or
inference.

View File

@ -56,6 +56,7 @@ limitations under the License.
<tab type="user" title="Convert DLRM ONNX* Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_DLRM"/> <tab type="user" title="Convert DLRM ONNX* Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_DLRM"/>
<tab type="usergroup" title="Converting Your PyTorch* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_PyTorch"> <tab type="usergroup" title="Converting Your PyTorch* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_PyTorch">
<tab type="user" title="Convert PyTorch* QuartzNet Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_QuartzNet"/> <tab type="user" title="Convert PyTorch* QuartzNet Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_QuartzNet"/>
<tab type="user" title="Convert PyTorch* RNN-T Model " url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT"/>
<tab type="user" title="Convert PyTorch* YOLACT Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT"/> <tab type="user" title="Convert PyTorch* YOLACT Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT"/>
<tab type="user" title="Convert PyTorch* F3Net Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net"/> <tab type="user" title="Convert PyTorch* F3Net Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net"/>
</tab> </tab>

View File

@ -13,7 +13,7 @@ OpenVINO™ toolkit components:
with pre-trained models for a range of different tasks with pre-trained models for a range of different tasks
* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) to transform models into * [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) to transform models into
the Intermediate Representation (IR) format the Intermediate Representation (IR) format
* [Post-Training Optimization toolkit](@ref pot_README) to calibrate a model and then execute it in the * [Post-training Optimization Tool](@ref pot_README) to calibrate a model and then execute it in the
INT8 precision INT8 precision
* [Accuracy Checker](@ref omz_tools_accuracy_checker) to determine the accuracy of a model * [Accuracy Checker](@ref omz_tools_accuracy_checker) to determine the accuracy of a model
* [Benchmark Tool](@ref openvino_inference_engine_samples_benchmark_app_README) to estimate inference performance on supported devices * [Benchmark Tool](@ref openvino_inference_engine_samples_benchmark_app_README) to estimate inference performance on supported devices

View File

@ -29,11 +29,14 @@ If your neural network model contains layers that are not in the list of known l
Run the [Accuracy Checker utility](@ref omz_tools_accuracy_checker) either against source topologies or against the output representation to evaluate the accuracy of inference. The Accuracy Checker is also part of the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction), an integrated web-based performance analysis studio. Run the [Accuracy Checker utility](@ref omz_tools_accuracy_checker) either against source topologies or against the output representation to evaluate the accuracy of inference. The Accuracy Checker is also part of the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction), an integrated web-based performance analysis studio.
Use the [Post-training Optimization Tool](@ref pot_README) to accelerate the inference of a deep learning model by quantizing it to INT8.
Useful documents for model optimization: Useful documents for model optimization:
* [Model Optimizer Developer Guide](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) * [Model Optimizer Developer Guide](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
* [Intermediate Representation and Opsets](MO_DG/IR_and_opsets.md) * [Intermediate Representation and Opsets](MO_DG/IR_and_opsets.md)
* [Custom Layers Guide](HOWTO/Custom_Layers_Guide.md) * [Custom Layers Guide](HOWTO/Custom_Layers_Guide.md)
* [Accuracy Checker utility](@ref omz_tools_accuracy_checker) * [Accuracy Checker utility](@ref omz_tools_accuracy_checker)
* [Post-training Optimization Tool](@ref pot_README)
* [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) * [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction)
* [Model Downloader](@ref omz_tools_downloader) utility * [Model Downloader](@ref omz_tools_downloader) utility
* [Intel's Pretrained Models (Open Model Zoo)](@ref omz_models_group_intel) * [Intel's Pretrained Models (Open Model Zoo)](@ref omz_models_group_intel)
@ -42,7 +45,7 @@ Useful documents for model optimization:
### Running and Tuning Inference ### Running and Tuning Inference
The other core component of OpenVINO™ is the [Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md), which manages the loading and compiling of the optimized neural network model, runs inference operations on input data, and outputs the results. Inference Engine can execute synchronously or asynchronously, and its plugin architecture manages the appropriate compilations for execution on multiple Intel® devices, including both workhorse CPUs and specialized graphics and video processing platforms (see below, Packaging and Deployment). The other core component of OpenVINO™ is the [Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md), which manages the loading and compiling of the optimized neural network model, runs inference operations on input data, and outputs the results. Inference Engine can execute synchronously or asynchronously, and its plugin architecture manages the appropriate compilations for execution on multiple Intel® devices, including both workhorse CPUs and specialized graphics and video processing platforms (see below, Packaging and Deployment).
You can use OpenVINO™ Tuning Utilities with the Inference Engine to trial and test inference on your model. The Benchmark utility uses an input model to run iterative tests for throughput or latency measures, and the [Cross Check Utility](../inference-engine/tools/cross_check_tool/README.md) compares performance of differently configured inferences. The [Post-Training Optimization Tool](@ref pot_README) integrates a suite of quantization- and calibration-based tools to further streamline performance. You can use OpenVINO™ Tuning Utilities with the Inference Engine to trial and test inference on your model. The Benchmark utility uses an input model to run iterative tests for throughput or latency measures, and the [Cross Check Utility](../inference-engine/tools/cross_check_tool/README.md) compares performance of differently configured inferences.
For a full browser-based studio integrating these other key tuning utilities, try the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction). For a full browser-based studio integrating these other key tuning utilities, try the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction).
![](img/OV-diagram-step3.png) ![](img/OV-diagram-step3.png)
@ -56,7 +59,7 @@ Useful documents for inference tuning:
* [Inference Engine API References](./api_references.html) * [Inference Engine API References](./api_references.html)
* [Inference Code Samples](IE_DG/Samples_Overview.md) * [Inference Code Samples](IE_DG/Samples_Overview.md)
* [Application Demos](@ref omz_demos) * [Application Demos](@ref omz_demos)
* [Post-Training Optimization Tool Guide](@ref pot_README) * [Low Precision Optimization Guide] (@ref pot_docs_LowPrecisionOptimizationGuide)
* [Deep Learning Workbench Guide](@ref workbench_docs_Workbench_DG_Introduction) * [Deep Learning Workbench Guide](@ref workbench_docs_Workbench_DG_Introduction)
* [Intel Media SDK](https://github.com/Intel-Media-SDK/MediaSDK) * [Intel Media SDK](https://github.com/Intel-Media-SDK/MediaSDK)
* [DL Streamer Samples](@ref gst_samples_README) * [DL Streamer Samples](@ref gst_samples_README)
@ -86,7 +89,7 @@ Intel® Distribution of OpenVINO™ toolkit includes the following components:
- [Deep Learning Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md): A unified API to allow high performance inference on many hardware types including Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, Intel® Vision Accelerator Design with Intel® Movidius™ vision processing unit (VPU). - [Deep Learning Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md): A unified API to allow high performance inference on many hardware types including Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, Intel® Vision Accelerator Design with Intel® Movidius™ vision processing unit (VPU).
- [Inference Engine Samples](IE_DG/Samples_Overview.md): A set of simple console applications demonstrating how to use the Inference Engine in your applications. - [Inference Engine Samples](IE_DG/Samples_Overview.md): A set of simple console applications demonstrating how to use the Inference Engine in your applications.
- [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction): A web-based graphical environment that allows you to easily use various sophisticated OpenVINO™ toolkit components. - [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction): A web-based graphical environment that allows you to easily use various sophisticated OpenVINO™ toolkit components.
- [Post-Training Optimization tool](@ref pot_README): A tool to calibrate a model and then execute it in the INT8 precision. - [Post-training Optimization Tool](@ref pot_README): A tool to calibrate a model and then execute it in the INT8 precision.
- Additional Tools: A set of tools to work with your models including [Benchmark App](../inference-engine/tools/benchmark_tool/README.md), [Cross Check Tool](../inference-engine/tools/cross_check_tool/README.md), [Compile tool](../inference-engine/tools/compile_tool/README.md). - Additional Tools: A set of tools to work with your models including [Benchmark App](../inference-engine/tools/benchmark_tool/README.md), [Cross Check Tool](../inference-engine/tools/cross_check_tool/README.md), [Compile tool](../inference-engine/tools/compile_tool/README.md).
- [Open Model Zoo](@ref omz_models_group_intel) - [Open Model Zoo](@ref omz_models_group_intel)
- [Demos](@ref omz_demos): Console applications that provide robust application templates to help you implement specific deep learning scenarios. - [Demos](@ref omz_demos): Console applications that provide robust application templates to help you implement specific deep learning scenarios.

View File

@ -4,15 +4,15 @@
**Category**: Convolution **Category**: Convolution
**Short description**: Computes the gradients of a Convolution operation with respect to the input. Also known as a Deconvolution or a Transposed Convolution. **Short description**: Computes 1D, 2D or 3D *ConvolutionBackpropData* operation with respect to the input and kernel tensors. Also known as a Transposed Convolution.
**Detailed description**: **Detailed description**:
ConvolutionBackpropData takes the input tensor, weights tensor and output shape and computes the output tensor of a given shape. The shape of the output can be specified as an input 1D integer tensor explicitly or determined by other attributes implicitly. If output shape is specified as an explicit input, shape of the output exactly matches the specified size and required amount of padding is computed. ConvolutionBackpropData takes the input tensor, weights tensor and output shape and computes the output tensor of a given shape. The shape of the output can be specified as an input 1D integer tensor explicitly or determined by other attributes implicitly. If output shape is specified as an explicit input, shape of the output exactly matches the specified size and required amount of padding is computed. More thorough explanation can be found in [Transposed Convolutions](https://arxiv.org/abs/1603.07285).
ConvolutionBackpropData accepts the same set of attributes as a regular Convolution operation, but they are interpreted in a "backward way", so they are applied to the output of ConvolutionBackpropData, but not to the input. Refer to a regular Convolution operation for detailed description of each attribute. ConvolutionBackpropData accepts the same set of attributes as a regular Convolution operation and additionally `output_padding` attribute, but they are interpreted in a "backward way", so they are applied to the output of ConvolutionBackpropData, but not to the input. Refer to a regular [Convolution](Convolution_1.md) operation for detailed description of each Convolution attribute.
Output shape when specified as an input `output_shape`, specifies only spatial dimensions. No batch or channel dimension should be passed along with H, W or other spatial dimensions. If `output_shape` is omitted, then `pads_begin`, `pads_end` or `auto_pad` are used to determine output spatial shape `[Y_1, Y_2, ..., Y_D]` by input spatial shape `[X_1, X_2, ..., X_D]` in the following way: When output shape is specified as an input tensor `output_shape` then it specifies only spatial dimensions. No batch or channel dimension should be passed along with spatial dimensions. If `output_shape` is omitted, then `pads_begin`, `pads_end` or `auto_pad` are used to determine output spatial shape `[O_z, O_y, O_x]` by input spatial shape `[I_z, I_y, I_x]` in the following way:
``` ```
if auto_pads != None: if auto_pads != None:
@ -24,7 +24,7 @@ Y_i = stride[i] * (X_i - 1) + ((K_i - 1) * dilations[i] + 1) - pads_begin[i] - p
where `K_i` filter kernel dimension along spatial axis `i`. where `K_i` filter kernel dimension along spatial axis `i`.
If `output_shape` is specified, `pads_begin` and `pads_end` are ignored, and `auto_pad` defines how to distribute padding amount around the tensor. In this case pads are determined based on the next formulas to correctly align input and output tensors (similar to ONNX definition at https://github.com/onnx/onnx/blob/master/docs/Operators.md#convtranspose): If `output_shape` is specified, `pads_begin` and `pads_end` are ignored, and `auto_pad` defines how to distribute padding amount around the tensor. In this case pads are determined based on the next formulas to correctly align input and output tensors:
``` ```
total_padding[i] = stride[i] * (X_i - 1) + ((K_i - 1) * dilations[i] + 1) - output_shape[i] + output_padding[i] total_padding[i] = stride[i] * (X_i - 1) + ((K_i - 1) * dilations[i] + 1) - output_shape[i] + output_padding[i]
@ -42,7 +42,7 @@ else:
* **Description**: *strides* has the same definition as *strides* for a regular Convolution but applied in the backward way, for the output tensor. * **Description**: *strides* has the same definition as *strides* for a regular Convolution but applied in the backward way, for the output tensor.
* **Range of values**: positive integers * **Range of values**: positive integers
* **Type**: int[] * **Type**: `int[]`
* **Default value**: None * **Default value**: None
* **Required**: *yes* * **Required**: *yes*
@ -50,7 +50,7 @@ else:
* **Description**: *pads_begin* has the same definition as *pads_begin* for a regular Convolution but applied in the backward way, for the output tensor. May be omitted specified, in which case pads are calculated automatically. * **Description**: *pads_begin* has the same definition as *pads_begin* for a regular Convolution but applied in the backward way, for the output tensor. May be omitted specified, in which case pads are calculated automatically.
* **Range of values**: non-negative integers * **Range of values**: non-negative integers
* **Type**: int[] * **Type**: `int[]`
* **Default value**: None * **Default value**: None
* **Required**: *yes* * **Required**: *yes*
* **Note**: the attribute is ignored when *auto_pad* attribute is specified. * **Note**: the attribute is ignored when *auto_pad* attribute is specified.
@ -59,7 +59,7 @@ else:
* **Description**: *pads_end* has the same definition as *pads_end* for a regular Convolution but applied in the backward way, for the output tensor. May be omitted, in which case pads are calculated automatically. * **Description**: *pads_end* has the same definition as *pads_end* for a regular Convolution but applied in the backward way, for the output tensor. May be omitted, in which case pads are calculated automatically.
* **Range of values**: non-negative integers * **Range of values**: non-negative integers
* **Type**: int[] * **Type**: `int[]`
* **Default value**: None * **Default value**: None
* **Required**: *yes* * **Required**: *yes*
* **Note**: the attribute is ignored when *auto_pad* attribute is specified. * **Note**: the attribute is ignored when *auto_pad* attribute is specified.
@ -68,7 +68,7 @@ else:
* **Description**: *dilations* has the same definition as *dilations* for a regular Convolution but applied in the backward way, for the output tensor. * **Description**: *dilations* has the same definition as *dilations* for a regular Convolution but applied in the backward way, for the output tensor.
* **Range of values**: positive integers * **Range of values**: positive integers
* **Type**: int[] * **Type**: `int[]`
* **Default value**: None * **Default value**: None
* **Required**: *yes* * **Required**: *yes*
@ -76,9 +76,10 @@ else:
* **Description**: *auto_pad* has the same definition as *auto_pad* for a regular Convolution but applied in the backward way, for the output tensor. * **Description**: *auto_pad* has the same definition as *auto_pad* for a regular Convolution but applied in the backward way, for the output tensor.
* *explicit*: use explicit padding values from `pads_begin` and `pads_end`. * *explicit*: use explicit padding values from `pads_begin` and `pads_end`.
* *same_upper (same_lower)* the input is padded to match the output size. In case of odd padding value an extra padding is added at the end (at the beginning). * *same_upper* the input is padded to match the output size. In case of odd padding value an extra padding is added at the end.
* *same_lower* the input is padded to match the output size. In case of odd padding value an extra padding is added at the beginning.
* *valid* - do not use padding. * *valid* - do not use padding.
* **Type**: string * **Type**: `string`
* **Default value**: None * **Default value**: None
* **Required**: *no* * **Required**: *no*
* **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is specified. * **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is specified.
@ -87,27 +88,38 @@ else:
* **Description**: *output_padding* adds additional amount of paddings per each spatial axis in the `output` tensor. It unlocks more elements in the output allowing them to be computed. Elements are added at the higher coordinate indices for the spatial dimensions. Number of elements in *output_padding* list matches the number of spatial dimensions in `data` and `output` tensors. * **Description**: *output_padding* adds additional amount of paddings per each spatial axis in the `output` tensor. It unlocks more elements in the output allowing them to be computed. Elements are added at the higher coordinate indices for the spatial dimensions. Number of elements in *output_padding* list matches the number of spatial dimensions in `data` and `output` tensors.
* **Range of values**: non-negative integer values * **Range of values**: non-negative integer values
* **Type**: int[] * **Type**: `int[]`
* **Default value**: all zeros * **Default value**: all zeros
* **Required**: *no* * **Required**: *no*
**Inputs**: **Inputs**:
* **1**: `data` -- input tensor of rank 3 or greater. Layout is `[N, C_INPUT, X1, ..., XD]`. *Required*. * **1**: Input tensor of type *T1* and rank 3, 4 or 5. Layout is `[N, C_INPUT, Z, Y, X]` (number of batches, number of input channels, spatial axes Z, Y, X). *Required*.
* **2**: `filter` -- convolution kernel tensor. Weights have shape `[C_INPUT, C_OUTPUT, K_D, ..., K_1]`. `C_INPUT` is the number of channels in input `data` tensor shape, and `C_OUTPUT` is the number of channels in the `output` tensor. Spatial size of the kernel `[K_D, ..., K_1]` is derived from the shape of this input and aren't specified by any attribute. *Required*. * **2**: Convolution kernel tensor of type *T1* and rank 3, 4 or 5. Layout is `[C_INPUT, C_OUTPUT, Z, Y, X]` (number of input channels, number of output channels, spatial axes Z, Y, X). Spatial size of the kernel is derived from the shape of this input and aren't specified by any attribute. *Required*.
* **3**: `output_shape` is 1D integer tensor that specifies spatial shape of the output. *Optional*. If specified, *padding amount* is deduced from relation of input and output spatial shapes according to formulas in the description. If not specified, *output shape* is calculated based on the `pads_begin` and `pads_end` or completely according to `auto_pad`. * **3**: `output_shape` is 1D tensor of type *T2* that specifies spatial shape of the output. If specified, *padding amount* is deduced from relation of input and output spatial shapes according to formulas in the description. If not specified, *output shape* is calculated based on the `pads_begin` and `pads_end` or completely according to `auto_pad`. *Optional*.
* **Note**: Type of the convolution (1D, 2D or 3D) is derived from the rank of the input tensors and not specified by any attribute:
* 1D convolution (input tensors rank 3) means that there is only one spatial axis X,
* 2D convolution (input tensors rank 4) means that there are two spatial axes Y, X,
* 3D convolution (input tensors rank 5) means that there are three spatial axes Z, Y, X.
**Outputs**: **Outputs**:
* **1**: `output` -- output tensor of the same rank as input `data` tensor and shape `[N, C_OUTPUT, Y1, ..., YD]`. * **1**: Output tensor of type *T1* and rank 3, 4 or 5. Layout is `[N, C_OUTPUT, Z, Y, X]` (number of batches, number of kernel output channels, spatial axes Z, Y, X).
**Example** **Types**:
* *T1*: any numeric type.
* *T2*: any integer type.
**Examples**
*Example 1: 2D ConvolutionBackpropData*
```xml ```xml
<layer id="5" name="upsampling_node" type="ConvolutionBackpropData"> <layer id="5" name="upsampling_node" type="ConvolutionBackpropData">
<data dilations="1,1" pads_begin="1,1" pads_end="1,1" strides="2,2"/> <data dilations="1,1" pads_begin="1,1" pads_end="1,1" strides="2,2" output_padding="0,0" auto_pad="explicit"/>
<input> <input>
<port id="0"> <port id="0">
<dim>1</dim> <dim>1</dim>
@ -132,3 +144,66 @@ else:
</output> </output>
</layer> </layer>
``` ```
*Example 2: 2D ConvolutionBackpropData with output_padding*
```xml
<layer id="5" name="upsampling_node" type="ConvolutionBackpropData">
<data dilations="1,1" pads_begin="0,0" pads_end="0,0" strides="3,3" output_padding="2,2" auto_pad="explicit"/>
<input>
<port id="0">
<dim>1</dim>
<dim>20</dim>
<dim>2</dim>
<dim>2</dim>
</port>
<port id="1">
<dim>20</dim>
<dim>10</dim>
<dim>3</dim>
<dim>3</dim>
</port>
</input>
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>10</dim>
<dim>8</dim>
<dim>8</dim>
</port>
</output>
</layer>
```
*Example 3: 2D ConvolutionBackpropData with output_shape input*
```xml
<layer id="5" name="upsampling_node" type="ConvolutionBackpropData">
<data dilations="1,1" pads_begin="1,1" pads_end="1,1" strides="1,1" output_padding="0,0" auto_pad="valid"/>
<input>
<port id="0">
<dim>1</dim>
<dim>20</dim>
<dim>224</dim>
<dim>224</dim>
</port>
<port id="1">
<dim>20</dim>
<dim>10</dim>
<dim>3</dim>
<dim>3</dim>
</port>
<port id="2">
<dim>2</dim> <!-- output_shape value is: [450, 450]-->
</port>
</input>
<output>
<port id="0" precision="FP32">
<dim>1</dim>
<dim>10</dim>
<dim>450</dim>
<dim>450</dim>
</port>
</output>
</layer>
```

View File

@ -22,7 +22,7 @@ the number of batch dimensions. `N` and `M` are numbers of dimensions of `data`
representing the batches, and *Gather* starts to gather from the `b` dimension. It requires the first `b` representing the batches, and *Gather* starts to gather from the `b` dimension. It requires the first `b`
dimensions in `data` and `indices` tensors to be equal. If `batch_dims` is less than zero, normalized value is used dimensions in `data` and `indices` tensors to be equal. If `batch_dims` is less than zero, normalized value is used
`batch_dims = indices.rank + batch_dims`. `batch_dims = indices.rank + batch_dims`.
* **Range of values**: `[-min(data.rank, indices.rank); min(data.rank, indices.rank))` and `batch_dims' <= axis'`. * **Range of values**: `[-min(data.rank, indices.rank); min(data.rank, indices.rank)]` and `batch_dims' <= axis'`.
Where `batch_dims'` and `axis'` stand for normalized `batch_dims` and `axis` values. Where `batch_dims'` and `axis'` stand for normalized `batch_dims` and `axis` values.
* **Type**: *T_AXIS* * **Type**: *T_AXIS*
* **Default value**: 0 * **Default value**: 0
@ -136,8 +136,9 @@ output_shape = (2, 3)
* **1**: `data` tensor of type *T* with arbitrary data. **Required**. * **1**: `data` tensor of type *T* with arbitrary data. **Required**.
* **2**: `indices` tensor of type *T_IND* with indices to gather. The values for indices are in the range `[0, data[axis] - 1]`. * **2**: `indices` tensor of type *T_IND* with indices to gather. 0D tensor (scalar) for indices is also allowed.
**Required**. The values for indices are in the range `[0, data[axis] - 1]`.
**Required**.
* **3**: Scalar or 1D tensor `axis` of *T_AXIS* type is a dimension index to gather data from. For example, * **3**: Scalar or 1D tensor `axis` of *T_AXIS* type is a dimension index to gather data from. For example,
*axis* equal to 1 means that gathering is performed over the first dimension. Negative `axis` means reverse indexing and *axis* equal to 1 means that gathering is performed over the first dimension. Negative `axis` means reverse indexing and

View File

@ -26,7 +26,7 @@ Where D is the rank of input tensor `data`. The sum of elements in `split_length
* **2**: `axis`. Axis along `data` to split. A scalar of type `T2` with value from range `-rank(data) .. rank(data)-1`. Negative values address dimensions from the end. * **2**: `axis`. Axis along `data` to split. A scalar of type `T2` with value from range `-rank(data) .. rank(data)-1`. Negative values address dimensions from the end.
**Required.** **Required.**
* **3**: `split_lengths`. A list containing the dimension values of each output tensor shape along the split `axis`. A 1D tensor of type `T2`. The number of elements in `split_lengths` determines the number of outputs. The sum of elements in `split_lengths` must match `data.shape[axis]`. In addition `split_lenghts` can contain a single `-1` element, which means, all remaining items along specified `axis` that are not consumed by other parts. **Required.** * **3**: `split_lengths`. A list containing the dimension values of each output tensor shape along the split `axis`. A 1D tensor of type `T2`. The number of elements in `split_lengths` determines the number of outputs. The sum of elements in `split_lengths` must match `data.shape[axis]`. In addition `split_lengths` can contain a single `-1` element, which means, all remaining items along specified `axis` that are not consumed by other parts. **Required.**
**Outputs** **Outputs**

View File

@ -8,7 +8,7 @@
**Detailed description**: **Detailed description**:
*Reshape* layer takes two input tensors: the tensor to be resized and the output tensor shape. The values in the second tensor could be -1, 0 and any positive integer number. The two special values -1 and 0: *Reshape* takes two input tensors: `data` to be resized and `shape` of the new output. The values in the `shape` could be `-1`, `0` and any positive integer number. The two special values `-1` and `0`:
* `0` means "copy the respective dimension *(left aligned)* of the input tensor" if `special_zero` is set to `true`; otherwise it is a normal dimension and is applicable to empty tensors. * `0` means "copy the respective dimension *(left aligned)* of the input tensor" if `special_zero` is set to `true`; otherwise it is a normal dimension and is applicable to empty tensors.
* `-1` means that this dimension is calculated to keep the overall elements count the same as in the input tensor. Not more than one `-1` can be used in a reshape operation. * `-1` means that this dimension is calculated to keep the overall elements count the same as in the input tensor. Not more than one `-1` can be used in a reshape operation.
@ -18,30 +18,31 @@ If `special_zero` is set to `true` index of `0` cannot be larger than the rank o
* *special_zero* * *special_zero*
* **Description**: *special_zero* controls how zero values in `shape` are interpreted. If *special_zero* is `false`, then 0 is interpreted as-is which means that output shape will contain a zero dimension at the specified location. Input and output tensors are empty in this case. If *special_zero* is `true`, then all zeros in `shape` implies the copying of corresponding dimensions from `data.shape` into the output shape *(left aligned)*. * **Description**: *special_zero* controls how zero values in `shape` are interpreted. If *special_zero* is `false`, then `0` is interpreted as-is which means that output shape will contain a zero dimension at the specified location. Input and output tensors are empty in this case. If *special_zero* is `true`, then all zeros in `shape` implies the copying of corresponding dimensions from `data.shape` into the output shape *(left aligned)*.
* **Range of values**: `false` or `true` * **Range of values**: `false` or `true`
* **Type**: boolean * **Type**: `boolean`
* **Default value**: None * **Default value**: None
* **Required**: *yes* * **Required**: *yes*
**Inputs**: **Inputs**:
* **1**: `data` -- multidimensional input tensor of type *T*. *Required*. * **1**: `data` a tensor of type T and arbitrary shape. **Required**.
* **2**: `shape` -- 1D tensor of type *T_SHAPE* describing output shape. *Required*. * **2**: `shape` 1D tensor of type *T_SHAPE* describing output shape. **Required**.
**Outputs**: **Outputs**:
* **1**: Output tensor with the same content as a tensor at input `data` but with shape defined by input `shape`. * **1**: Output tensor of type *T* with the same content as `data` input tensor but with shape defined by `shape` input tensor.
**Types** **Types**
* *T*: supported type. * *T*: any numeric type.
* *T_SHAPE*: supported integer type. * *T_SHAPE*: any supported integer type.
**Examples** **Examples**
*Example 1: reshape empty tensor*
```xml ```xml
<layer ... type="Reshape" ...> <layer ... type="Reshape" ...>
<data special_zero="false"/> <data special_zero="false"/>
@ -65,6 +66,7 @@ If `special_zero` is set to `true` index of `0` cannot be larger than the rank o
</layer> </layer>
``` ```
*Example 2: reshape tensor - preserve first dim, calculate second and fix value for third dim*
```xml ```xml
<layer ... type="Reshape" ...> <layer ... type="Reshape" ...>
<data special_zero="true"/> <data special_zero="true"/>
@ -89,6 +91,7 @@ If `special_zero` is set to `true` index of `0` cannot be larger than the rank o
</layer> </layer>
``` ```
*Example 3: reshape tensor - preserve first two dims, fix value for third dim and calculate fourth*
```xml ```xml
<layer ... type="Reshape" ...> <layer ... type="Reshape" ...>
<data special_zero="true"/> <data special_zero="true"/>
@ -113,6 +116,7 @@ If `special_zero` is set to `true` index of `0` cannot be larger than the rank o
</layer> </layer>
``` ```
*Example 4: reshape tensor - calculate first dim and preserve second dim*
```xml ```xml
<layer ... type="Reshape" ...> <layer ... type="Reshape" ...>
<data special_zero="true"/> <data special_zero="true"/>
@ -135,6 +139,7 @@ If `special_zero` is set to `true` index of `0` cannot be larger than the rank o
</layer> </layer>
``` ```
*Example 5: reshape tensor - preserve first dim and calculate second dim*
```xml ```xml
<layer ... type="Reshape" ...> <layer ... type="Reshape" ...>
<data special_zero="true"/> <data special_zero="true"/>

View File

@ -67,7 +67,7 @@ std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const n
// Example: register CommonOptimizations transformation from transformations library // Example: register CommonOptimizations transformation from transformations library
passManager.register_pass<ngraph::pass::CommonOptimizations>(); passManager.register_pass<ngraph::pass::CommonOptimizations>();
// Template plugin handles only FP32 networks // Template plugin handles only FP32 networks
passManager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::f16, ngraph::element::f32); passManager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ngraph::element::f16, ngraph::element::f32 }});
// Example: register plugin specific transformation // Example: register plugin specific transformation
passManager.register_pass<ngraph::pass::DecomposeDivideMatcher>(); passManager.register_pass<ngraph::pass::DecomposeDivideMatcher>();
passManager.register_pass<ngraph::pass::ReluReluFusionMatcher>(); passManager.register_pass<ngraph::pass::ReluReluFusionMatcher>();

View File

@ -122,6 +122,7 @@ if(SPEECH_LIBS_AND_DEMOS)
install(DIRECTORY ${TEMP}/deployment_tools install(DIRECTORY ${TEMP}/deployment_tools
${TEMP}/data_processing ${TEMP}/data_processing
DESTINATION . DESTINATION .
USE_SOURCE_PERMISSIONS
COMPONENT speech_demo_files) COMPONENT speech_demo_files)
endif() endif()

View File

@ -38,62 +38,6 @@ if (ENABLE_MYRIAD)
include(cmake/vpu_dependencies.cmake) include(cmake/vpu_dependencies.cmake)
endif() endif()
## enable cblas_gemm from OpenBLAS package
if (ENABLE_MKL_DNN AND GEMM STREQUAL "OPENBLAS")
if(AARCH64)
if(DEFINED ENV{THIRDPARTY_SERVER_PATH})
set(IE_PATH_TO_DEPS "$ENV{THIRDPARTY_SERVER_PATH}")
elseif(DEFINED THIRDPARTY_SERVER_PATH)
set(IE_PATH_TO_DEPS "${THIRDPARTY_SERVER_PATH}")
else()
message(WARNING "OpenBLAS is not found!")
endif()
if(DEFINED IE_PATH_TO_DEPS)
reset_deps_cache(OpenBLAS_DIR)
RESOLVE_DEPENDENCY(OpenBLAS
ARCHIVE_LIN "keembay/openblas_0.3.7_yocto_kmb.tar.xz"
TARGET_PATH "${TEMP}/openblas_0.3.7_yocto_kmb"
ENVIRONMENT "OpenBLAS_DIR"
SHA256 "c75aac901d5297d6d60a4b1f941f0335d8fd7f52e0dff8c445f644e2e45e6fba")
update_deps_cache(OpenBLAS_DIR "${OpenBLAS}/lib/cmake/openblas" "Path to OpenBLAS package folder")
find_package(OpenBLAS QUIET)
if(OpenBLAS_FOUND)
set(BLAS_FOUND TRUE)
set(BLAS_INCLUDE_DIRS ${OpenBLAS_INCLUDE_DIRS})
set(BLAS_LIBRARIES ${OpenBLAS_LIBRARIES})
endif()
unset(IE_PATH_TO_DEPS)
endif()
endif()
if(NOT BLAS_LIBRARIES OR NOT BLAS_INCLUDE_DIRS)
find_package(BLAS REQUIRED)
if(BLAS_FOUND)
find_path(BLAS_INCLUDE_DIRS cblas.h)
else()
message(ERROR "OpenBLAS not found: install OpenBLAS or set -DBLAS_INCLUDE_DIRS=<path to dir with cblas.h> and -DBLAS_LIBRARIES=<path to libopenblas.so or openblas.lib>")
endif()
endif()
debug_message(STATUS "openblas=" ${BLAS_LIBRARIES})
endif ()
## MKL-ML package
if (GEMM STREQUAL "MKL")
if(NOT MKLROOT)
message(FATAL_ERROR "MKLROOT not found: install MKL and set -DMKLROOT=<path_to_MKL>")
endif()
set(MKL ${MKLROOT})
debug_message(STATUS "mkl_ml=" ${MKLROOT})
endif ()
## Intel OMP package ## Intel OMP package
if (THREADING STREQUAL "OMP") if (THREADING STREQUAL "OMP")
reset_deps_cache(OMP) reset_deps_cache(OMP)
@ -145,10 +89,10 @@ if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
ENVIRONMENT "TBBROOT" ENVIRONMENT "TBBROOT"
SHA256 "f1c9b9e2861efdaa01552bd25312ccbc5feeb45551e5f91ae61e29221c5c1479") SHA256 "f1c9b9e2861efdaa01552bd25312ccbc5feeb45551e5f91ae61e29221c5c1479")
RESOLVE_DEPENDENCY(TBBBIND_2_4 RESOLVE_DEPENDENCY(TBBBIND_2_4
ARCHIVE_WIN "tbbbind_2_4_static_win.zip" ARCHIVE_WIN "tbbbind_2_4_static_win_v2.zip"
TARGET_PATH "${TEMP}/tbbbind_2_4" TARGET_PATH "${TEMP}/tbbbind_2_4"
ENVIRONMENT "TBBBIND_2_4_ROOT" ENVIRONMENT "TBBBIND_2_4_ROOT"
SHA256 "1a3a05082cc5ef1a764d635793be347b82c795f0e9ced771515fc3706a4dc4f0") SHA256 "90dc165652f6ac2ed3014c71e57f797fcc4b11e1498a468e3d2c85deb2a4186a")
elseif(ANDROID) # Should be before LINUX due LINUX is detected as well elseif(ANDROID) # Should be before LINUX due LINUX is detected as well
RESOLVE_DEPENDENCY(TBB RESOLVE_DEPENDENCY(TBB
ARCHIVE_ANDROID "tbb2020_20200404_android.tgz" ARCHIVE_ANDROID "tbb2020_20200404_android.tgz"
@ -159,11 +103,13 @@ if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
RESOLVE_DEPENDENCY(TBB RESOLVE_DEPENDENCY(TBB
ARCHIVE_LIN "tbb2020_20200415_lin_strip.tgz" ARCHIVE_LIN "tbb2020_20200415_lin_strip.tgz"
TARGET_PATH "${TEMP}/tbb" TARGET_PATH "${TEMP}/tbb"
ENVIRONMENT "TBBROOT"
SHA256 "95b2f3b0b70c7376a0c7de351a355c2c514b42c4966e77e3e34271a599501008") SHA256 "95b2f3b0b70c7376a0c7de351a355c2c514b42c4966e77e3e34271a599501008")
RESOLVE_DEPENDENCY(TBBBIND_2_4 RESOLVE_DEPENDENCY(TBBBIND_2_4
ARCHIVE_LIN "tbbbind_2_4_static_lin.tgz" ARCHIVE_LIN "tbbbind_2_4_static_lin_v2.tgz"
TARGET_PATH "${TEMP}/tbbbind_2_4" TARGET_PATH "${TEMP}/tbbbind_2_4"
SHA256 "888582a94f81821f9894cc089db36d5a6c2e0b6998cfa1fec0c027f28c597ada") ENVIRONMENT "TBBBIND_2_4_ROOT"
SHA256 "6dc926258c6cd3cba0f5c2cc672fd2ad599a1650fe95ab11122e8f361a726cb6")
elseif(LINUX AND AARCH64) elseif(LINUX AND AARCH64)
RESOLVE_DEPENDENCY(TBB RESOLVE_DEPENDENCY(TBB
ARCHIVE_LIN "keembay/tbb2020_38404_kmb_lic.tgz" ARCHIVE_LIN "keembay/tbb2020_38404_kmb_lic.tgz"

View File

@ -8,23 +8,6 @@ ie_dependent_option (ENABLE_GNA "GNA support for inference engine" ON "NOT APPLE
ie_dependent_option (ENABLE_CLDNN_TESTS "Enable clDNN unit tests" OFF "ENABLE_CLDNN" OFF) ie_dependent_option (ENABLE_CLDNN_TESTS "Enable clDNN unit tests" OFF "ENABLE_CLDNN" OFF)
# "MKL-DNN library might use MKL-ML or OpenBLAS for gemm tasks: MKL|OPENBLAS|JIT"
if (ENABLE_MKL_DNN)
if(AARCH64)
set(GEMM_DEFAULT "OPENBLAS")
else()
set(GEMM_DEFAULT "JIT")
endif()
set(GEMM "${GEMM_DEFAULT}" CACHE STRING "GEMM implementation")
set_property(CACHE GEMM PROPERTY STRINGS "MKL" "OPENBLAS" "JIT")
list (APPEND IE_OPTIONS GEMM)
if (NOT GEMM STREQUAL "MKL" AND
NOT GEMM STREQUAL "OPENBLAS" AND
NOT GEMM STREQUAL "JIT")
message(FATAL_ERROR "GEMM should be set to MKL, OPENBLAS or JIT. Default option is ${GEMM_DEFAULT}")
endif()
endif()
# "MKL-DNN library based on OMP or TBB or Sequential implementation: TBB|OMP|SEQ" # "MKL-DNN library based on OMP or TBB or Sequential implementation: TBB|OMP|SEQ"
if(X86 OR ARM OR (MSVC AND (ARM OR AARCH64)) ) if(X86 OR ARM OR (MSVC AND (ARM OR AARCH64)) )
set(THREADING_DEFAULT "SEQ") set(THREADING_DEFAULT "SEQ")

View File

@ -25,9 +25,9 @@ function(set_ie_threading_interface_for TARGET_NAME)
else() else()
find_dependency(TBB COMPONENTS tbb tbbmalloc) find_dependency(TBB COMPONENTS tbb tbbmalloc)
endif() endif()
set("TBB_FOUND" ${TBB_FOUND} PARENT_SCOPE) set(TBB_FOUND ${TBB_FOUND} PARENT_SCOPE)
set("TBB_IMPORTED_TARGETS" ${TBB_IMPORTED_TARGETS} PARENT_SCOPE) set(TBB_IMPORTED_TARGETS ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
set("TBB_VERSION" ${TBB_VERSION} PARENT_SCOPE) set(TBB_VERSION ${TBB_VERSION} PARENT_SCOPE)
if (NOT TBB_FOUND) if (NOT TBB_FOUND)
ext_message(WARNING "TBB was not found by the configured TBB_DIR/TBBROOT path.\ ext_message(WARNING "TBB was not found by the configured TBB_DIR/TBBROOT path.\
SEQ method will be used.") SEQ method will be used.")

View File

@ -19,15 +19,44 @@
@PACKAGE_INIT@ @PACKAGE_INIT@
include(CMakeFindDependencyMacro) macro(_ie_find_dependency dep)
set(cmake_fd_quiet_arg)
if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY)
set(cmake_fd_quiet_arg QUIET)
endif()
set(cmake_fd_required_arg)
if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED)
set(cmake_fd_required_arg REQUIRED)
endif()
get_property(cmake_fd_alreadyTransitive GLOBAL PROPERTY
_CMAKE_${dep}_TRANSITIVE_DEPENDENCY)
find_package(${dep} ${ARGN}
${cmake_fd_quiet_arg}
${cmake_fd_required_arg})
if(NOT DEFINED cmake_fd_alreadyTransitive OR cmake_fd_alreadyTransitive)
set_property(GLOBAL PROPERTY _CMAKE_${dep}_TRANSITIVE_DEPENDENCY TRUE)
endif()
if(NOT ${dep}_FOUND)
set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.")
set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False)
return()
endif()
set(cmake_fd_required_arg)
set(cmake_fd_quiet_arg)
endmacro()
# need to store current PACKAGE_PREFIX_DIR, because it's overwritten by ngraph one # need to store current PACKAGE_PREFIX_DIR, because it's overwritten by ngraph one
set(IE_PACKAGE_PREFIX_DIR "${PACKAGE_PREFIX_DIR}") set(IE_PACKAGE_PREFIX_DIR "${PACKAGE_PREFIX_DIR}")
set(THREADING "@THREADING@") set(THREADING "@THREADING@")
if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" AND NOT TBB_FOUND)
set_and_check(_tbb_dir "@PACKAGE_IE_TBB_DIR@") set_and_check(_tbb_dir "@PACKAGE_IE_TBB_DIR@")
find_dependency(TBB _ie_find_dependency(TBB
COMPONENTS tbb tbbmalloc COMPONENTS tbb tbbmalloc
CONFIG CONFIG
PATHS ${TBBROOT}/cmake PATHS ${TBBROOT}/cmake
@ -37,7 +66,7 @@ if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
endif() endif()
set_and_check(_ngraph_dir "@PACKAGE_IE_NGRAPH_DIR@") set_and_check(_ngraph_dir "@PACKAGE_IE_NGRAPH_DIR@")
find_dependency(ngraph _ie_find_dependency(ngraph
CONFIG CONFIG
PATHS ${_ngraph_dir} PATHS ${_ngraph_dir}
NO_CMAKE_FIND_ROOT_PATH NO_CMAKE_FIND_ROOT_PATH

View File

@ -1,2 +1 @@
numpy>=1.16.3 numpy~=1.19.5
cython>=0.29.17

View File

@ -1,2 +1,2 @@
opencv-python>=3.4.4.19 opencv-python==4.5.*
numpy>=1.16.3 numpy~=1.19.5

View File

@ -6,6 +6,7 @@ from ..inference_engine.ie_api cimport IENetwork
from libcpp cimport bool from libcpp cimport bool
from libcpp.string cimport string from libcpp.string cimport string
from libc.stdint cimport int64_t
def ApplyMOCTransformations(IENetwork network, bool cf): def ApplyMOCTransformations(IENetwork network, bool cf):
@ -16,8 +17,8 @@ def ApplyPOTTransformations(IENetwork network, string device):
C.ApplyPOTTransformations(network.impl, device) C.ApplyPOTTransformations(network.impl, device)
def ApplyLowLatencyTransformation(IENetwork network): def ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations=1):
C.ApplyLowLatencyTransformation(network.impl) C.ApplyLowLatencyTransformation(network.impl, num_iterations)
def ApplyPruningTransformation(IENetwork network): def ApplyPruningTransformation(IENetwork network):

View File

@ -26,8 +26,9 @@ void InferenceEnginePython::ApplyPOTTransformations(InferenceEnginePython::IENet
manager.run_passes(network.actual->getFunction()); manager.run_passes(network.actual->getFunction());
} }
void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network) { void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations) {
ngraph::pass::Manager manager; ngraph::pass::Manager manager;
// TODO: pass num_iterations to LowLatency
manager.register_pass<ngraph::pass::LowLatency>(); manager.register_pass<ngraph::pass::LowLatency>();
manager.register_pass<ngraph::pass::UnrollTensorIterator>(); manager.register_pass<ngraph::pass::UnrollTensorIterator>();

View File

@ -15,7 +15,7 @@ void ApplyMOCTransformations(InferenceEnginePython::IENetwork network, bool cf);
void ApplyPOTTransformations(InferenceEnginePython::IENetwork network, std::string device); void ApplyPOTTransformations(InferenceEnginePython::IENetwork network, std::string device);
void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network); void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations);
void ApplyPruningTransformation(InferenceEnginePython::IENetwork network); void ApplyPruningTransformation(InferenceEnginePython::IENetwork network);

View File

@ -3,6 +3,7 @@
from libcpp cimport bool from libcpp cimport bool
from libcpp.string cimport string from libcpp.string cimport string
from libc.stdint cimport int64_t
from ..inference_engine.ie_api_impl_defs cimport IENetwork from ..inference_engine.ie_api_impl_defs cimport IENetwork
@ -11,7 +12,7 @@ cdef extern from "offline_transformations_api_impl.hpp" namespace "InferenceEngi
cdef void ApplyPOTTransformations(IENetwork network, string device) cdef void ApplyPOTTransformations(IENetwork network, string device)
cdef void ApplyLowLatencyTransformation(IENetwork network) cdef void ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations)
cdef void ApplyPruningTransformation(IENetwork network) cdef void ApplyPruningTransformation(IENetwork network)

View File

@ -2,3 +2,4 @@ opencv-python>=3.4.4.19
pytest==4.0.1 pytest==4.0.1
attrs==19.1.0 attrs==19.1.0
pytest-html==1.19.0 pytest-html==1.19.0
cython>=0.29.22

View File

@ -1,28 +1,28 @@
defusedxml>=0.5.0 defusedxml>=0.7.1
scipy==1.5.4 scipy~=1.5.4
jstyleson==0.0.2 jstyleson~=0.0.2
numpy~=1.18.5 numpy~=1.19.5
addict==2.2.1 addict>=2.4.0
pandas~=1.1.5 pandas~=1.1.5
hyperopt==0.1.2 hyperopt~=0.1.2
networkx==2.2 networkx~=2.5
tqdm==4.31.1 tqdm>=4.54.1
texttable==1.6.3 texttable~=1.6.3
py-cpuinfo!=5.0,!=6.0 py-cpuinfo>=7.0.0
PyYAML>=5.4.1 PyYAML>=5.4.1
pillow>=8.1.0 pillow>=8.1.2
scikit-image>=0.17 scikit-image~=0.17.2
scikit-learn>=0.23 scikit-learn>=0.24.1
yamlloader>=0.5 yamlloader>=0.5
shapely>=1.7 shapely>=1.7.1
nibabel>=3.1 nibabel>=3.2.1
pydicom>=2.0 pydicom>=2.1.2
sentencepiece>=0.1.91 sentencepiece>=0.1.95
tokenizers>=0.8 tokenizers>=0.10.1
editdistance>=0.5 editdistance>=0.5.3
parasail>=1.2 parasail>=1.2.4
fast-ctc-decode>=0.2 fast-ctc-decode>=0.2.5
rawpy>=0.15 rawpy>=0.16.0
nltk>=3.5 nltk>=3.5
opencv-python>=4.4 opencv-python==4.5.*
progress==1.5 progress>=1.5

View File

@ -8,20 +8,10 @@ py_modules =
mo_kaldi mo_kaldi
[options.package_data] [options.package_data]
mo = *.txt * = *
compression.configs.hardware = *.json
mo.extensions.front.mxnet = *.json
mo.extensions.front.onnx = *.json
mo.extensions.front.tf = *.json
mo.mo.front.caffe.proto = *.proto
[options.entry_points] [options.entry_points]
console_scripts = console_scripts =
mo=mo.__main__:main
pot=app.run:main
accuracy_check=accuracy_checker.main:main
convert_annotation=accuracy_checker.annotation_converters.convert:main
benchmark_app=openvino.tools.benchmark.main:main
[metadata] [metadata]
license_files = license_files =

View File

@ -1 +1 @@
numpy>=1.16.3 numpy~=1.19.5

View File

@ -35,7 +35,7 @@ class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
std::shared_ptr<IExecutableNetworkInternal> _impl; std::shared_ptr<IExecutableNetworkInternal> _impl;
std::shared_ptr<details::SharedObjectLoader> _so; std::shared_ptr<details::SharedObjectLoader> _so;
explicit ExecutableNetwork(const std::shared_ptr<IExecutableNetworkInternal>& impl, ExecutableNetwork(const std::shared_ptr<IExecutableNetworkInternal>& impl,
const std::shared_ptr<details::SharedObjectLoader>& so); const std::shared_ptr<details::SharedObjectLoader>& so);
friend class InferencePlugin; friend class InferencePlugin;

View File

@ -36,7 +36,7 @@ class INFERENCE_ENGINE_API_CLASS(InferRequest) {
std::shared_ptr<IInferRequestInternal> _impl; std::shared_ptr<IInferRequestInternal> _impl;
std::shared_ptr<details::SharedObjectLoader> _so; std::shared_ptr<details::SharedObjectLoader> _so;
explicit InferRequest(const std::shared_ptr<IInferRequestInternal>& impl, InferRequest(const std::shared_ptr<IInferRequestInternal>& impl,
const std::shared_ptr<details::SharedObjectLoader>& so); const std::shared_ptr<details::SharedObjectLoader>& so);
friend class ExecutableNetwork; friend class ExecutableNetwork;
@ -191,7 +191,7 @@ public:
*/ */
template<typename F> template<typename F>
void SetCompletionCallback(F callbackToSet) { void SetCompletionCallback(F callbackToSet) {
return SetCallback<F>{*this}(std::move(callbackToSet)); SetCallback<F>{*this}(std::move(callbackToSet));
} }
/** /**

View File

@ -11,9 +11,10 @@
#pragma once #pragma once
#include <string> #include <string>
#include <memory>
#include "ie_api.h"
#include "ie_blob.h" #include "ie_blob.h"
#include "details/ie_so_loader.h"
namespace InferenceEngine { namespace InferenceEngine {
@ -28,15 +29,15 @@ class IVariableStateInternal;
*/ */
class INFERENCE_ENGINE_API_CLASS(VariableState) { class INFERENCE_ENGINE_API_CLASS(VariableState) {
std::shared_ptr<IVariableStateInternal> _impl = nullptr; std::shared_ptr<IVariableStateInternal> _impl = nullptr;
details::SharedObjectLoader::Ptr _so = nullptr; std::shared_ptr<details::SharedObjectLoader> _so = nullptr;
/** /**
* @brief Constructs VariableState from the initialized std::shared_ptr * @brief Constructs VariableState from the initialized std::shared_ptr
* @param impl Initialized shared pointer * @param impl Initialized shared pointer
* @param so Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin object is destroyed. * @param so Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin object is destroyed.
*/ */
explicit VariableState(const std::shared_ptr<IVariableStateInternal>& impl, VariableState(const std::shared_ptr<IVariableStateInternal>& impl,
const details::SharedObjectLoader::Ptr& so = {}); const std::shared_ptr<details::SharedObjectLoader>& so);
friend class InferRequest; friend class InferRequest;
friend class ExecutableNetwork; friend class ExecutableNetwork;

View File

@ -1,50 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief header file for no_copy class
*
* @file ie_no_copy.hpp
*/
#pragma once
namespace InferenceEngine {
namespace details {
/**
* @brief This class is used for objects returned from the shared library factory to prevent copying
*/
class no_copy {
protected:
/**
* @brief A default constructor
*/
no_copy() = default;
/**
* @brief A default destructor
*/
virtual ~no_copy() = default;
/**
* @brief A removed copy constructor
*/
no_copy(no_copy const&) = delete;
/**
* @brief A removed assign operator
*/
no_copy& operator=(no_copy const&) = delete;
/**
* @brief A removed move constructor
*/
no_copy(no_copy&&) = delete;
/**
* @brief A removed move operator
*/
no_copy& operator=(no_copy&&) = delete;
};
} // namespace details
} // namespace InferenceEngine

View File

@ -19,7 +19,6 @@
#include "ie_layouts.h" #include "ie_layouts.h"
#include "ie_blob.h" #include "ie_blob.h"
#include "ie_version.hpp" #include "ie_version.hpp"
#include "details/ie_no_copy.hpp"
/** /**
* @def INFERENCE_EXTENSION_API(TYPE) * @def INFERENCE_EXTENSION_API(TYPE)

View File

@ -14,7 +14,6 @@
#include "ie_blob.h" #include "ie_blob.h"
#include "ie_common.h" #include "ie_common.h"
#include "details/ie_no_copy.hpp"
namespace InferenceEngine { namespace InferenceEngine {
@ -23,7 +22,7 @@ namespace InferenceEngine {
* @interface IVariableState * @interface IVariableState
* @brief Manages data for reset operations * @brief Manages data for reset operations
*/ */
class IVariableState : public details::no_copy { class IVariableState {
public: public:
IE_SUPPRESS_DEPRECATED_START IE_SUPPRESS_DEPRECATED_START
/** /**

View File

@ -93,6 +93,8 @@ Options:
-progress Optional. Show progress bar (can affect performance measurement). Default values is "false". -progress Optional. Show progress bar (can affect performance measurement). Default values is "false".
-shape Optional. Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size. -shape Optional. Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.
-layout Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size. -layout Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size.
-cache_dir "<path>" Optional. Enables caching of loaded models to specified directory.
-load_from_file Optional. Loads model from file directly without ReadNetwork.
CPU-specific performance options: CPU-specific performance options:
-nstreams "<integer>" Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices -nstreams "<integer>" Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices

View File

@ -122,6 +122,14 @@ static const char shape_message[] = "Optional. Set shape for input. For example,
static const char layout_message[] = "Optional. Prompts how network layouts should be treated by application. " static const char layout_message[] = "Optional. Prompts how network layouts should be treated by application. "
"For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size."; "For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.";
// @brief message for enabling caching
static const char cache_dir_message[] = "Optional. Enables caching of loaded models to specified directory. "
"List of devices which support caching is shown at the end of this message.";
// @brief message for single load network
static const char load_from_file_message[] = "Optional. Loads model from file directly without ReadNetwork."
"All CNNNetwork options (like re-shape) will be ignored";
// @brief message for quantization bits // @brief message for quantization bits
static const char gna_qb_message[] = "Optional. Weight bits for quantization: 8 or 16 (default)"; static const char gna_qb_message[] = "Optional. Weight bits for quantization: 8 or 16 (default)";
@ -238,6 +246,12 @@ DEFINE_string(op, "", outputs_precision_message);
/// Overwrites layout from ip and op options for specified layers."; /// Overwrites layout from ip and op options for specified layers.";
DEFINE_string(iop, "", iop_message); DEFINE_string(iop, "", iop_message);
/// @brief Define parameter for cache model dir <br>
DEFINE_string(cache_dir, "", cache_dir_message);
/// @brief Define flag for load network from model file by name without ReadNetwork <br>
DEFINE_bool(load_from_file, false, load_from_file_message);
/** /**
* @brief This function show a help message * @brief This function show a help message
*/ */
@ -262,6 +276,8 @@ static void showUsage() {
std::cout << " -progress " << progress_message << std::endl; std::cout << " -progress " << progress_message << std::endl;
std::cout << " -shape " << shape_message << std::endl; std::cout << " -shape " << shape_message << std::endl;
std::cout << " -layout " << layout_message << std::endl; std::cout << " -layout " << layout_message << std::endl;
std::cout << " -cache_dir \"<path>\" " << cache_dir_message << std::endl;
std::cout << " -load_from_file " << load_from_file_message << std::endl;
std::cout << std::endl << " device-specific performance options:" << std::endl; std::cout << std::endl << " device-specific performance options:" << std::endl;
std::cout << " -nstreams \"<integer>\" " << infer_num_streams_message << std::endl; std::cout << " -nstreams \"<integer>\" " << infer_num_streams_message << std::endl;
std::cout << " -nthreads \"<integer>\" " << infer_num_threads_message << std::endl; std::cout << " -nthreads \"<integer>\" " << infer_num_threads_message << std::endl;

View File

@ -330,7 +330,29 @@ int main(int argc, char* argv[]) {
std::string topology_name = ""; std::string topology_name = "";
benchmark_app::InputsInfo app_inputs_info; benchmark_app::InputsInfo app_inputs_info;
std::string output_name; std::string output_name;
if (!isNetworkCompiled) {
// Takes priority over config from file
if (!FLAGS_cache_dir.empty()) {
ie.SetConfig({{CONFIG_KEY(CACHE_DIR), FLAGS_cache_dir}});
}
if (FLAGS_load_from_file && !isNetworkCompiled) {
next_step();
slog::info << "Skipping the step for loading network from file" << slog::endl;
next_step();
slog::info << "Skipping the step for loading network from file" << slog::endl;
next_step();
slog::info << "Skipping the step for loading network from file" << slog::endl;
auto startTime = Time::now();
exeNetwork = ie.LoadNetwork(FLAGS_m, device_name);
auto duration_ms = double_to_string(get_total_ms_time(startTime));
slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"load network time (ms)", duration_ms}});
if (batchSize == 0) {
batchSize = 1;
}
} else if (!isNetworkCompiled) {
// ----------------- 4. Reading the Intermediate Representation network // ----------------- 4. Reading the Intermediate Representation network
// ---------------------------------------- // ----------------------------------------
next_step(); next_step();
@ -363,7 +385,7 @@ int main(int argc, char* argv[]) {
slog::info << "Reshaping network: " << getShapesString(shapes) << slog::endl; slog::info << "Reshaping network: " << getShapesString(shapes) << slog::endl;
startTime = Time::now(); startTime = Time::now();
cnnNetwork.reshape(shapes); cnnNetwork.reshape(shapes);
auto duration_ms = double_to_string(get_total_ms_time(startTime)); duration_ms = double_to_string(get_total_ms_time(startTime));
slog::info << "Reshape network took " << duration_ms << " ms" << slog::endl; slog::info << "Reshape network took " << duration_ms << " ms" << slog::endl;
if (statistics) if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"reshape network time (ms)", duration_ms}}); statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"reshape network time (ms)", duration_ms}});

View File

@ -30,6 +30,8 @@ endif()
add_subdirectory(hetero_plugin) add_subdirectory(hetero_plugin)
add_subdirectory(auto_plugin)
add_subdirectory(multi_device) add_subdirectory(multi_device)
add_subdirectory(transformations) add_subdirectory(transformations)

View File

@ -0,0 +1,19 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
set (TARGET_NAME "AutoPlugin")
file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
file(GLOB HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
ie_add_plugin(NAME ${TARGET_NAME}
DEVICE_NAME "AUTO"
SOURCES ${SOURCES} ${HEADERS}
VERSION_DEFINES_FOR auto_plugin.cpp)
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine)
ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})

View File

@ -0,0 +1,59 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include <vector>
#include <memory>
#include <map>
#include <unordered_map>
#include "ie_metric_helpers.hpp"
#include "auto_exec_network.hpp"
#include "auto_infer_request.hpp"
namespace AutoPlugin {
using namespace InferenceEngine;
AutoExecutableNetwork::AutoExecutableNetwork(const ExecutableNetwork& network,
const DeviceInformation& deviceInfo,
const bool needPerfCounters) :
_deviceInfo(deviceInfo),
_network(network),
_config(deviceInfo.config.begin(), deviceInfo.config.end()),
_needPerfCounters(needPerfCounters) {
}
AutoExecutableNetwork::~AutoExecutableNetwork() = default;
IInferRequestInternal::Ptr AutoExecutableNetwork::CreateInferRequestImpl(InputsDataMap networkInputs,
OutputsDataMap networkOutputs) {
auto inferRequest = _network.CreateInferRequest();
return std::make_shared<AutoInferRequest>(networkInputs, networkOutputs, inferRequest);
}
void AutoExecutableNetwork::Export(std::ostream& networkModel) {
_network.Export(networkModel);
}
RemoteContext::Ptr AutoExecutableNetwork::GetContext() const {
return _network.GetContext();
}
InferenceEngine::CNNNetwork AutoExecutableNetwork::GetExecGraphInfo() {
return _network.GetExecGraphInfo();
}
Parameter AutoExecutableNetwork::GetMetric(const std::string &name) const {
return _network.GetMetric(name);
}
void AutoExecutableNetwork::SetConfig(const std::map<std::string, Parameter>& config) {
_network.SetConfig(config);
}
Parameter AutoExecutableNetwork::GetConfig(const std::string& name) const {
return _network.GetConfig(name);
}
} // namespace AutoPlugin

View File

@ -0,0 +1,51 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <atomic>
#include <mutex>
#include <queue>
#include <unordered_map>
#include <map>
#include <vector>
#include <string>
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
#include <threading/ie_itask_executor.hpp>
namespace AutoPlugin {
using DeviceName = std::string;
struct DeviceInformation {
DeviceName deviceName;
std::map<std::string, std::string> config;
};
class AutoExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
public:
using Ptr = std::shared_ptr<AutoExecutableNetwork>;
AutoExecutableNetwork(const InferenceEngine::ExecutableNetwork& network,
const DeviceInformation& deviceInfo,
const bool needPerfCounters = false);
void Export(std::ostream& networkModel) override;
InferenceEngine::RemoteContext::Ptr GetContext() const override;
InferenceEngine::CNNNetwork GetExecGraphInfo() override;
InferenceEngine::Parameter GetMetric(const std::string &name) const override;
void SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) override;
InferenceEngine::Parameter GetConfig(const std::string& name) const override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs) override;
~AutoExecutableNetwork() override;
DeviceInformation _deviceInfo;
InferenceEngine::ExecutableNetwork _network;
std::unordered_map<std::string, InferenceEngine::Parameter> _config;
bool _needPerfCounters = false;
};
} // namespace AutoPlugin

View File

@ -0,0 +1,39 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "auto_infer_request.hpp"
#include <ie_input_info.hpp>
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
namespace AutoPlugin {
using namespace InferenceEngine;
AutoInferRequest::AutoInferRequest(const InputsDataMap& networkInputs,
const OutputsDataMap& networkOutputs,
const InferRequest& inferRequest)
: IInferRequestInternal(networkInputs, networkOutputs)
, _inferRequest(inferRequest) {
}
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> AutoInferRequest::GetPerformanceCounts() const {
return _inferRequest.GetPerformanceCounts();
}
void AutoInferRequest::InferImpl() {
_inferRequest.Infer();
}
void AutoInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) {
_inferRequest.SetBlob(name, data);
}
Blob::Ptr AutoInferRequest::GetBlob(const std::string& name) {
return _inferRequest.GetBlob(name);
}
void AutoInferRequest::Cancel() {
_inferRequest.Cancel();
}
} // namespace AutoPlugin

View File

@ -0,0 +1,40 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <atomic>
#include <cpp/ie_executable_network.hpp>
#include <cpp/ie_infer_request.hpp>
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
#include <ie_blob.h>
#include <ie_common.h>
#include <map>
#include <memory>
#include <mutex>
#include <queue>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
namespace AutoPlugin {
class AutoInferRequest : public InferenceEngine::IInferRequestInternal {
public:
using Ptr = std::shared_ptr<AutoInferRequest>;
explicit AutoInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
const InferenceEngine::OutputsDataMap& networkOutputs,
const InferenceEngine::InferRequest& inferRequest);
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
void InferImpl() override;
void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) override;
InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
void Cancel() override;
private:
InferenceEngine::InferRequest _inferRequest;
};
} // namespace AutoPlugin

View File

@ -0,0 +1,199 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include <vector>
#include <memory>
#include <map>
#include <unordered_set>
#include <ie_metric_helpers.hpp>
#include <ie_core.hpp>
#include <threading/ie_executor_manager.hpp>
#include <ie_algorithm.hpp>
#include "auto_plugin.hpp"
namespace AutoPlugin {
namespace {
ConfigType mergeConfigs(ConfigType config, const ConfigType& local) {
for (auto && kvp : local) {
config[kvp.first] = kvp.second;
}
return config;
}
DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices) {
for (auto& item : metaDevices) {
if (item.deviceName.find("CPU") == 0) {
return item;
}
}
IE_THROW(NotFound) << "No available device could be used";
}
} // namespace
AutoInferencePlugin::AutoInferencePlugin() {
_pluginName = "AUTO";
}
IE::ExecutableNetworkInternal::Ptr AutoInferencePlugin::LoadExeNetworkImpl(const IE::CNNNetwork& network,
const ConfigType& config) {
if (GetCore() == nullptr) {
IE_THROW() << "Please, work with AUTO device via InferencEngine::Core object";
}
if (network.getFunction() == nullptr) {
IE_THROW() << "AUTO device supports just ngraph network representation";
}
auto fullConfig = mergeConfigs(_config, config);
auto metaDevices = GetDeviceChoice(fullConfig);
// FIXME: always select CPU device now
DeviceInformation selectedDevice = SelectDevice(metaDevices);
IE::ExecutableNetwork executableNetwork;
try {
executableNetwork = GetCore()->LoadNetwork(network, selectedDevice.deviceName, selectedDevice.config);
} catch(const IE::Exception &iie) {
IE_THROW() << "Failed to load network to device named " << selectedDevice.deviceName
<< " with exception " << iie.what();
}
bool enablePerfCounters = false;
try {
enablePerfCounters =
executableNetwork.GetConfig(IE::PluginConfigParams::KEY_PERF_COUNT).as<std::string>() ==
IE::PluginConfigParams::YES;
} catch (...) {
}
return std::make_shared<AutoExecutableNetwork>(executableNetwork,
selectedDevice,
enablePerfCounters);
}
IE::QueryNetworkResult AutoInferencePlugin::QueryNetwork(const IE::CNNNetwork& network, const ConfigType& config) const {
IE::QueryNetworkResult queryResult = {};
if (GetCore() == nullptr) {
IE_THROW() << "Please, work with AUTO device via InferencEngine::Core object";
}
if (network.getFunction() == nullptr) {
IE_THROW() << "AUTO device supports just ngraph network representation";
}
auto fullConfig = mergeConfigs(_config, config);
auto metaDevices = GetDeviceChoice(fullConfig);
std::unordered_set<std::string> supportedLayers;
for (auto&& value : metaDevices) {
try {
auto deviceQr = GetCore()->QueryNetwork(network, value.deviceName, value.config);
std::unordered_set<std::string> deviceSupportedLayers;
for (auto &&layerQr : deviceQr.supportedLayersMap) {
deviceSupportedLayers.emplace(layerQr.first);
}
supportedLayers = supportedLayers.empty()
? deviceSupportedLayers : (deviceSupportedLayers.empty()
? supportedLayers : IE::details::Intersection(
supportedLayers, deviceSupportedLayers));
break;
} catch (...) {
}
}
for (auto&& supportedLayer : supportedLayers) {
queryResult.supportedLayersMap[supportedLayer] = GetName();
}
return queryResult;
}
IE::Parameter AutoInferencePlugin::GetConfig(const std::string& name,
const std::map<std::string, IE::Parameter> & options) const {
auto it = _config.find(name);
if (it == _config.end()) {
IE_THROW() << "Unsupported config key: " << name;
} else {
return { it->second };
}
}
void AutoInferencePlugin::SetConfig(const ConfigType& config) {
for (auto && kvp : config) {
_config[kvp.first] = kvp.second;
}
}
IE::Parameter AutoInferencePlugin::GetMetric(const std::string& name,
const std::map<std::string, IE::Parameter> & options) const {
if (name == METRIC_KEY(SUPPORTED_METRICS)) {
std::vector<std::string> metrics;
metrics.emplace_back(METRIC_KEY(SUPPORTED_METRICS));
metrics.emplace_back(METRIC_KEY(FULL_DEVICE_NAME));
metrics.emplace_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
metrics.emplace_back(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
} else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
std::string device_name = {"Inference Engine AUTO device"};
IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, device_name);
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
std::vector<std::string> configKeys;
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
} else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
std::vector<std::string> capabilities = { "" };
IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
} else {
IE_THROW() << "Unsupported metric key " << name;
}
}
std::vector<AutoPlugin::DeviceInformation> AutoInferencePlugin::GetDeviceChoice(const ConfigType& config) const {
std::vector<DeviceInformation> metaDevices;
std::vector<std::string> availableDevices = GetCore()->GetAvailableDevices();
auto getDeviceConfig = [&] (const DeviceName & deviceWithID) {
IE::DeviceIDParser deviceParser(deviceWithID);
std::string deviceName = deviceParser.getDeviceName();
ConfigType tconfig = mergeConfigs(_config, config);
// set device ID if any
std::string deviceIDLocal = deviceParser.getDeviceID();
if (!deviceIDLocal.empty()) {
tconfig[IE::PluginConfigParams::KEY_DEVICE_ID] = deviceIDLocal;
}
return GetSupportedConfig(tconfig, deviceName);
};
for (auto && d : availableDevices) {
if (d != _pluginName) {
metaDevices.push_back({ d, getDeviceConfig(d)});
}
}
if (metaDevices.empty()) {
IE_THROW() << "Please, check environment due to no supported devices can be used";
}
return metaDevices;
}
//////////////////////////////////// private & protected functions ///////////////////
ConfigType AutoInferencePlugin::GetSupportedConfig(const ConfigType& config,
const std::string& deviceName) const {
std::vector<std::string> supportedConfigKeys = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
ConfigType supportedConfig;
for (auto&& key : supportedConfigKeys) {
auto itKey = config.find(key);
if (config.end() != itKey) {
supportedConfig[key] = itKey->second;
}
}
return supportedConfig;
}
// define CreatePluginEngine to create plugin instance
static const IE::Version version = {{2, 1}, CI_BUILD_NUMBER, "AutoPlugin"};
IE_DEFINE_PLUGIN_CREATE_FUNCTION(AutoInferencePlugin, version)
} // namespace AutoPlugin

View File

@ -0,0 +1,37 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <map>
#include <vector>
#include <string>
#include <unordered_set>
#include <cpp_interfaces/impl/ie_plugin_internal.hpp>
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
#include "auto_exec_network.hpp"
namespace AutoPlugin {
namespace IE = InferenceEngine;
using ConfigType = std::map<std::string, std::string>;
class AutoInferencePlugin : public IE::InferencePluginInternal {
public:
AutoInferencePlugin();
~AutoInferencePlugin() = default;
IE::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const IE::CNNNetwork& network, const ConfigType& config) override;
IE::QueryNetworkResult QueryNetwork(const IE::CNNNetwork& network, const ConfigType& config) const override;
IE::Parameter GetMetric(const std::string& name, const std::map<std::string, IE::Parameter>& options) const override;
IE::Parameter GetConfig(const std::string& name, const std::map<std::string, IE::Parameter> & options) const override;
void SetConfig(const ConfigType& config) override;
private:
std::vector<AutoPlugin::DeviceInformation> GetDeviceChoice(const ConfigType& config) const;
protected:
ConfigType GetSupportedConfig(const ConfigType& config, const AutoPlugin::DeviceName & deviceName) const;
};
} // namespace AutoPlugin

View File

@ -40,8 +40,6 @@ target_include_directories(${TARGET_NAME} PRIVATE
set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
set_ie_threading_interface_for(clDNN_lib)
# Failed because of OpenCL # Failed because of OpenCL
# ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME}) # ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME})

View File

@ -70,6 +70,7 @@
#include <low_precision/pull_reshape_through_dequantization.hpp> #include <low_precision/pull_reshape_through_dequantization.hpp>
#include <low_precision/pull_transpose_through_dequantization.hpp> #include <low_precision/pull_transpose_through_dequantization.hpp>
#include <low_precision/transformer.hpp> #include <low_precision/transformer.hpp>
#include <low_precision/convolution_backprop_data.hpp>
#include <low_precision/mat_mul.hpp> #include <low_precision/mat_mul.hpp>
#include <low_precision/strided_slice.hpp> #include <low_precision/strided_slice.hpp>
#include <low_precision/network_helper.hpp> #include <low_precision/network_helper.hpp>
@ -175,7 +176,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>(); manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
manager.register_pass<ngraph::pass::ConvertGather0D>(); manager.register_pass<ngraph::pass::ConvertGather0D>();
std::vector<std::pair<ngraph::element::Type, ngraph::element::Type>> convert_precision_list { static const precisions_array convert_precision_list {
{ngraph::element::i64, ngraph::element::i32}, {ngraph::element::i64, ngraph::element::i32},
{ngraph::element::u64, ngraph::element::i32}, {ngraph::element::u64, ngraph::element::i32},
{ngraph::element::u16, ngraph::element::i32}, {ngraph::element::u16, ngraph::element::i32},
@ -185,9 +186,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
{ngraph::element::u4, ngraph::element::u8}, {ngraph::element::u4, ngraph::element::u8},
}; };
for (auto& precision : convert_precision_list) { manager.register_pass<ngraph::pass::ConvertPrecision>(convert_precision_list);
manager.register_pass<ngraph::pass::ConvertPrecision>(precision.first, precision.second);
}
auto pass_config = manager.get_pass_config(); auto pass_config = manager.get_pass_config();
@ -366,7 +365,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
// Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers // Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers
// With this key users can work-around such issues // With this key users can work-around such issues
if (!config.enable_fp16_for_quantized_models) { if (!config.enable_fp16_for_quantized_models) {
manager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::f16, ngraph::element::f32); manager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }});
} }
auto lptPrerequisites = manager.register_pass<ngraph::pass::GraphRewrite>(); auto lptPrerequisites = manager.register_pass<ngraph::pass::GraphRewrite>();
const std::vector<ngraph::element::Type> supportedTypes = { ngraph::element::i8, ngraph::element::u8 }; const std::vector<ngraph::element::Type> supportedTypes = { ngraph::element::i8, ngraph::element::u8 };
@ -383,6 +382,9 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
.add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params) .add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params)
.setSupportAsymmetricQuantization(false) .setSupportAsymmetricQuantization(false)
.setSupport3DTensorOnActivations(false)) .setSupport3DTensorOnActivations(false))
.add<ConvolutionBackpropDataTransformation, ngraph::opset1::ConvolutionBackpropData>(LayerTransformation::Params(params)
.setSupportAsymmetricQuantization(false)
.setDeconvolutionSpecificChannelsRatio(true))
// INT8 StridedSlice not supported // INT8 StridedSlice not supported
.remove<StridedSliceTransformation, ngraph::opset1::StridedSlice>()); .remove<StridedSliceTransformation, ngraph::opset1::StridedSlice>());

View File

@ -90,7 +90,8 @@ public:
std::map<std::string, InferenceEngine::SizeVector> outputDims; std::map<std::string, InferenceEngine::SizeVector> outputDims;
std::map<std::string, cldnn::layout> inputLayouts; std::map<std::string, cldnn::layout> inputLayouts;
std::map<const char *, cldnn::primitive_id> blobMemCache; using BlobCacheKey = std::pair<const char*, std::vector<size_t>>;
std::map<BlobCacheKey, cldnn::primitive_id> blobMemCache;
int m_max_batch; int m_max_batch;
int m_curBatch; int m_curBatch;

View File

@ -163,7 +163,8 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
cldnn::primitive_id constPrimID; cldnn::primitive_id constPrimID;
auto data = op->get_data_ptr<char>(); auto data = op->get_data_ptr<char>();
auto bufIter = p.blobMemCache.find(data);
auto bufIter = p.blobMemCache.find(std::make_pair(data, constDims));
if (bufIter != p.blobMemCache.end()) { if (bufIter != p.blobMemCache.end()) {
constPrimID = bufIter->second; constPrimID = bufIter->second;
@ -198,7 +199,7 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
std::memcpy(&buf[0], &data[0], bufSize); std::memcpy(&buf[0], &data[0], bufSize);
} }
p.AddPrimitive(cldnn::data(initialconstPrimID, mem)); p.AddPrimitive(cldnn::data(initialconstPrimID, mem));
p.blobMemCache[data] = initialconstPrimID; p.blobMemCache[std::make_pair(data, constDims)] = initialconstPrimID;
constPrimID = initialconstPrimID; constPrimID = initialconstPrimID;
} }

View File

@ -60,8 +60,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
auto shape_a = op->get_input_shape(0); auto shape_a = op->get_input_shape(0);
auto shape_b = op->get_input_shape(1); auto shape_b = op->get_input_shape(1);
bool is_fc = ngraph::is_type<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1)) || bool is_fc = IsNodeOnConstPath(op->get_input_node_shared_ptr(1));
ngraph::is_type<ngraph::op::v0::FakeQuantize>(op->get_input_node_shared_ptr(1));
is_fc &= std::count_if(shape_b.begin(), shape_b.end(), [](size_t x) { return x != 1; }) <= 2; is_fc &= std::count_if(shape_b.begin(), shape_b.end(), [](size_t x) { return x != 1; }) <= 2;
if (is_fc) { if (is_fc) {

View File

@ -154,7 +154,7 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
auto data = static_cast<const char *>(meanBlobPtr->buffer()); auto data = static_cast<const char *>(meanBlobPtr->buffer());
auto bufIter = p.blobMemCache.find(data); auto bufIter = p.blobMemCache.find(std::make_pair(data, meanDims));
if (bufIter != p.blobMemCache.end()) { if (bufIter != p.blobMemCache.end()) {
meanBlobID = bufIter->second; meanBlobID = bufIter->second;
} else { } else {
@ -166,7 +166,7 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
std::memcpy(&buf[0], &data[0], bufSize); std::memcpy(&buf[0], &data[0], bufSize);
p.AddPrimitive(cldnn::data(meanBlobID, mem)); p.AddPrimitive(cldnn::data(meanBlobID, mem));
p.blobMemCache[data] = meanBlobID; p.blobMemCache[std::make_pair(data, meanDims)] = meanBlobID;
} }
break; break;
} }

View File

@ -14,6 +14,7 @@
#include <legacy/ie_layers.h> #include <legacy/ie_layers.h>
#include "gna_upstream_iterator.hpp" #include "gna_upstream_iterator.hpp"
#include "layers/gna_layer_info.hpp" #include "layers/gna_layer_info.hpp"
#include "layers/gna_convolution_layer.hpp"
#include "gna_plugin_log.hpp" #include "gna_plugin_log.hpp"
#include "gna_slope_scale.h" #include "gna_slope_scale.h"
#include "runtime/pwl.h" #include "runtime/pwl.h"
@ -834,6 +835,7 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
THROW_GNA_EXCEPTION << "Two Input layers " << (*sourceLayerIt)->name THROW_GNA_EXCEPTION << "Two Input layers " << (*sourceLayerIt)->name
<< " and " << (*nextInputIt)->name << " have different scales in concat!!! \n"; << " and " << (*nextInputIt)->name << " have different scales in concat!!! \n";
} }
++nextInputIt;
} }
} }
@ -1107,8 +1109,9 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
double weights_reducer = 1.0; double weights_reducer = 1.0;
auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl); auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl);
if (conv) { if (conv) {
auto channels_num = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C); const auto inDepth = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
weights_reducer = MAX_VAL_2B_FEAT * scaleRange * channels_num / std::numeric_limits<int32_t>::max(); weights_reducer = GNAConvolutionLayer::getWeightsReducer(*conv);
weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
weights_reducer = std::max(1.0, weights_reducer); weights_reducer = std::max(1.0, weights_reducer);
} }
quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weights_reducer); quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weights_reducer);

View File

@ -30,6 +30,7 @@
#include "frontend/model_quantizer.hpp" #include "frontend/model_quantizer.hpp"
#include "layers/layers_builder.hpp" #include "layers/layers_builder.hpp"
#include "layers/gna_concat_layer.hpp" #include "layers/gna_concat_layer.hpp"
#include "layers/gna_convolution_layer.hpp"
#include "layers/gna_crop_layer.hpp" #include "layers/gna_crop_layer.hpp"
#include "layers/gna_fake_quantize_layer.hpp" #include "layers/gna_fake_quantize_layer.hpp"
#include "round_float_define.hpp" #include "round_float_define.hpp"
@ -265,7 +266,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
} }
// Map 2d convolution to 1d if it's possible // Map 2d convolution to 1d if it's possible
if (in_height > 1 && in_width > 1 && in_width == convolution._kernel_x && convolution._stride_x == 1) { if (GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, convolution._kernel_x, convolution._stride_x)) {
in_width *= in_height; in_width *= in_height;
in_height = 1; in_height = 1;
out_width *= out_height; out_width *= out_height;
@ -298,9 +299,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
dnn->new_num_conv_columns = 0; dnn->new_num_conv_columns = 0;
} }
// TODO: refine following condition if (GNAConvolutionLayer::isConv2D(in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x) ||
if (((in_channels > 1) && (in_height > 1) && (in_width > 1)) || // 3D input
(convolution._kernel_x != 1 && convolution._kernel_y != 1) || // 2D kernel
in_height != 1) { in_height != 1) {
// TensorFlow default layout is NHWC // TensorFlow default layout is NHWC
// OpenVino Default layout is NCHW // OpenVino Default layout is NCHW

View File

@ -110,6 +110,8 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
InferenceEngine::IExecutableNetworkInternal::Ptr LoadNetwork(const InferenceEngine::CNNNetwork &network, InferenceEngine::IExecutableNetworkInternal::Ptr LoadNetwork(const InferenceEngine::CNNNetwork &network,
const std::map<std::string, std::string> &config_map, const std::map<std::string, std::string> &config_map,
InferenceEngine::RemoteContext::Ptr context) override { THROW_GNA_EXCEPTION << "Not implemented"; } InferenceEngine::RemoteContext::Ptr context) override { THROW_GNA_EXCEPTION << "Not implemented"; }
InferenceEngine::ExecutableNetwork LoadNetwork(const std::string &modelPath,
const std::map<std::string, std::string> &config_map) override { THROW_GNA_EXCEPTION << "Not implemented"; }
bool Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &result); bool Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &result);
void SetCore(InferenceEngine::ICore*) noexcept override {} void SetCore(InferenceEngine::ICore*) noexcept override {}
InferenceEngine::ICore* GetCore() const noexcept override {return nullptr;} InferenceEngine::ICore* GetCore() const noexcept override {return nullptr;}

View File

@ -0,0 +1,49 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <algorithm>
#include <cmath>
#include <utility>
#include <vector>
#include <legacy/ie_layers.h>
#include "../gna_graph_tools.hpp"
namespace GNAPluginNS {
struct GNAConvolutionLayer {
static bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) {
return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1;
}
// 3D input or 2D kernel
static bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
const uint32_t kernelHeight, const uint32_t kernelWidth) {
return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
}
static double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
using KRT = std::pair<uint32_t, double>;
// Empirically determined weights reducers for 2D Convolution
// i.e.:
// for kernelSize >= 9 -> 1.3
// for kernelSize in {7, 8} -> 1.2
const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
auto reducer = 1.0;
const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
!isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) {
const auto kernelSize = conv._kernel_x * conv._kernel_y;
auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
[](const KRT& l, const KRT::first_type& r) {return l.first > r; });
if (r != reducers.end())
reducer = r->second;
}
return reducer;
}
};
} // namespace GNAPluginNS

View File

@ -8,11 +8,6 @@ if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
find_package(TBBBIND_2_4 QUIET) find_package(TBBBIND_2_4 QUIET)
if (TBBBIND_2_4_FOUND) if (TBBBIND_2_4_FOUND)
message(STATUS "Static tbbbind_2_4 package was found") message(STATUS "Static tbbbind_2_4 package was found")
# WA: need to update TBBBind_2_4 package
set_target_properties(TBBbind::tbbbind_2_4 PROPERTIES
MAP_IMPORTED_CONFIG_MINSIZEREL Release
MAP_IMPORTED_CONFIG_RELWITHDEBINFO Release)
endif() endif()
endif() endif()
@ -32,6 +27,9 @@ set(LEGACY_LIBRARY_SHARED_SRCS
"${LEGACY_SRC_ROOT}/ngraph_ops/nms_ie.cpp" "${LEGACY_SRC_ROOT}/ngraph_ops/nms_ie.cpp"
"${LEGACY_SRC_ROOT}/ngraph_ops/onehot_ie.cpp") "${LEGACY_SRC_ROOT}/ngraph_ops/onehot_ie.cpp")
set_source_files_properties(${LEGACY_LIBRARY_SHARED_SRCS} PROPERTIES
COMPILE_DEFINITIONS "USE_STATIC_IE")
set(IE_STATIC_DEPENDENT_FILES ${CMAKE_CURRENT_SOURCE_DIR}/file_utils.cpp) set(IE_STATIC_DEPENDENT_FILES ${CMAKE_CURRENT_SOURCE_DIR}/file_utils.cpp)
list(REMOVE_ITEM LIBRARY_SRC ${IE_STATIC_DEPENDENT_FILES}) list(REMOVE_ITEM LIBRARY_SRC ${IE_STATIC_DEPENDENT_FILES})
@ -203,7 +201,6 @@ if(WIN32)
endif() endif()
target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ${NGRAPH_LIBRARIES} target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ${NGRAPH_LIBRARIES}
inference_engine_snippets
inference_engine_transformations pugixml) inference_engine_transformations pugixml)
target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE) target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE)

View File

@ -94,6 +94,28 @@ void CNNNetworkNGraphImpl::createDataForResult(const ::ngraph::Output<::ngraph::
} }
} }
void CNNNetworkNGraphImpl::validateFunctionNames() const {
// nGraph function parameters and pre-Results operations should have unique names
std::unordered_set<std::string> unique_names;
for (const auto& param : _ngraph_function->get_parameters()) {
if (unique_names.count(param->get_friendly_name())) {
IE_THROW() << "Function contains several inputs with one friendly name!";
}
unique_names.insert(param->get_friendly_name());
}
for (const auto& result : _ngraph_function->get_results()) {
const auto& parent = result->get_input_node_shared_ptr(0);
auto name = parent->get_friendly_name();
if (parent->get_output_size() > 1) {
name += "." + std::to_string(result->get_input_source_output(0).get_index());
}
if (unique_names.count(name) && !ngraph::op::is_parameter(parent)) {
IE_THROW() << "Function contains several inputs and outputs with one friendly name!";
}
unique_names.insert(name);
}
}
CNNNetworkNGraphImpl::CNNNetworkNGraphImpl( CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(
const std::shared_ptr<Function>& nGraph, const std::shared_ptr<Function>& nGraph,
const std::vector<IExtensionPtr>& exts) const std::vector<IExtensionPtr>& exts)
@ -113,6 +135,8 @@ CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(
network.setInputInfo(info); network.setInputInfo(info);
}; };
validateFunctionNames();
reshape(); reshape();
for (const auto& layer : _ngraph_function->get_parameters()) { for (const auto& layer : _ngraph_function->get_parameters()) {
std::string outName = layer->get_friendly_name(); std::string outName = layer->get_friendly_name();
@ -148,6 +172,7 @@ CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(const CNNNetwork& network) {
} }
_ngraph_function = copyFunction(network.getFunction(), false); _ngraph_function = copyFunction(network.getFunction(), false);
validateFunctionNames();
InputsDataMap inputs = network.getInputsInfo(); InputsDataMap inputs = network.getInputsInfo();
OutputsDataMap outputs = network.getOutputsInfo(); OutputsDataMap outputs = network.getOutputsInfo();
@ -231,6 +256,13 @@ StatusCode CNNNetworkNGraphImpl::addOutput(const std::string& layerName, size_t
auto result = make_shared<::ngraph::op::Result>(layer->output(outputIndex)); auto result = make_shared<::ngraph::op::Result>(layer->output(outputIndex));
result->set_friendly_name(outputName); result->set_friendly_name(outputName);
_ngraph_function->add_results({result}); _ngraph_function->add_results({result});
// Check that we cannot add Result to layer with non unique friendly name
try {
validateFunctionNames();
} catch (...) {
_ngraph_function->remove_result(result);
throw;
}
if (_outputData.count(outputName) == 0) { if (_outputData.count(outputName) == 0) {
reshape(); reshape();

View File

@ -105,6 +105,7 @@ private:
*/ */
void reshape(); void reshape();
void reshape(const std::map<std::string, std::vector<size_t>>& inputShapes); void reshape(const std::map<std::string, std::vector<size_t>>& inputShapes);
void validateFunctionNames() const;
}; };
} // namespace details } // namespace details
} // namespace InferenceEngine } // namespace InferenceEngine

View File

@ -74,11 +74,11 @@ InferRequest::Ptr ExecutableNetwork::CreateInferRequestPtr() {
} }
void ExecutableNetwork::Export(const std::string& modelFileName) { void ExecutableNetwork::Export(const std::string& modelFileName) {
EXEC_NET_CALL_STATEMENT(return _impl->Export(modelFileName)); EXEC_NET_CALL_STATEMENT(_impl->Export(modelFileName));
} }
void ExecutableNetwork::Export(std::ostream& networkModel) { void ExecutableNetwork::Export(std::ostream& networkModel) {
EXEC_NET_CALL_STATEMENT(return _impl->Export(networkModel)); EXEC_NET_CALL_STATEMENT(_impl->Export(networkModel));
} }
CNNNetwork ExecutableNetwork::GetExecGraphInfo() { CNNNetwork ExecutableNetwork::GetExecGraphInfo() {

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// //
#include "details/ie_so_loader.h"
#include "cpp/ie_memory_state.hpp" #include "cpp/ie_memory_state.hpp"
#include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp" #include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp"
#include "exception2status.hpp" #include "exception2status.hpp"
@ -19,7 +20,7 @@
namespace InferenceEngine { namespace InferenceEngine {
VariableState::VariableState(const std::shared_ptr<IVariableStateInternal>& impl, VariableState::VariableState(const std::shared_ptr<IVariableStateInternal>& impl,
const details::SharedObjectLoader::Ptr& so) : _impl(impl), _so(so) { const std::shared_ptr<details::SharedObjectLoader>& so) : _impl(impl), _so(so) {
if (impl == nullptr) { if (impl == nullptr) {
IE_THROW(NotAllocated) << "VariableState wrapper was not initialized."; IE_THROW(NotAllocated) << "VariableState wrapper was not initialized.";
} }

View File

@ -51,6 +51,9 @@ Parsed<T> parseDeviceNameIntoConfig(const std::string& deviceName, const std::ma
deviceName_ = "MULTI"; deviceName_ = "MULTI";
config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = deviceName.substr(6); config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = deviceName.substr(6);
} else { } else {
if (deviceName_.empty()) {
deviceName_ = "AUTO";
}
DeviceIDParser parser(deviceName_); DeviceIDParser parser(deviceName_);
deviceName_ = parser.getDeviceName(); deviceName_ = parser.getDeviceName();
std::string deviceIDLocal = parser.getDeviceID(); std::string deviceIDLocal = parser.getDeviceID();
@ -493,9 +496,8 @@ public:
return res; return res;
} }
// TODO: In future this method can be added to ICore interface
ExecutableNetwork LoadNetwork(const std::string& modelPath, const std::string& deviceName, ExecutableNetwork LoadNetwork(const std::string& modelPath, const std::string& deviceName,
const std::map<std::string, std::string>& config) { const std::map<std::string, std::string>& config) override {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::Path"); OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::Path");
auto parsed = parseDeviceNameIntoConfig(deviceName, config); auto parsed = parseDeviceNameIntoConfig(deviceName, config);
auto plugin = GetCPPPluginByName(parsed._deviceName); auto plugin = GetCPPPluginByName(parsed._deviceName);
@ -511,6 +513,8 @@ public:
auto cnnNetwork = ReadNetwork(modelPath, std::string()); auto cnnNetwork = ReadNetwork(modelPath, std::string());
res = LoadNetworkImpl(cnnNetwork, plugin, parsed._config, nullptr, hash, modelPath); res = LoadNetworkImpl(cnnNetwork, plugin, parsed._config, nullptr, hash, modelPath);
} }
} else if (cacheManager) {
res = plugin.LoadNetwork(modelPath, parsed._config);
} else { } else {
auto cnnNetwork = ReadNetwork(modelPath, std::string()); auto cnnNetwork = ReadNetwork(modelPath, std::string());
res = LoadNetworkImpl(cnnNetwork, plugin, parsed._config, nullptr, {}, modelPath); res = LoadNetworkImpl(cnnNetwork, plugin, parsed._config, nullptr, {}, modelPath);
@ -582,6 +586,15 @@ public:
} }
} }
// AUTO case
{
if (deviceName.find("AUTO:") == 0) {
IE_THROW()
<< "You can get specific metrics with the GetMetric only for the AUTO itself (without devices). "
"To get individual devices's metrics call GetMetric for each device separately";
}
}
auto parsed = parseDeviceNameIntoConfig(deviceName); auto parsed = parseDeviceNameIntoConfig(deviceName);
// we need to return a copy of Parameter object which is created on Core side, // we need to return a copy of Parameter object which is created on Core side,
@ -752,7 +765,7 @@ public:
* @brief Sets config values for a plugin or set of plugins * @brief Sets config values for a plugin or set of plugins
* @param deviceName A device name to set config to * @param deviceName A device name to set config to
* If empty, config is set for all the plugins / plugin's meta-data * If empty, config is set for all the plugins / plugin's meta-data
* @note `deviceName` is not allowed in form of MULTI:CPU, HETERO:FPGA,CPU * @note `deviceName` is not allowed in form of MULTI:CPU, HETERO:FPGA,CPU, AUTO:CPU
* just simple forms like CPU, GPU, MULTU, GPU.0, etc * just simple forms like CPU, GPU, MULTU, GPU.0, etc
*/ */
void SetConfigForPlugins(const std::map<std::string, std::string>& configMap, const std::string& deviceName) { void SetConfigForPlugins(const std::map<std::string, std::string>& configMap, const std::string& deviceName) {
@ -908,6 +921,10 @@ RemoteContext::Ptr Core::CreateContext(const std::string& deviceName, const Para
if (deviceName.find("MULTI") == 0) { if (deviceName.find("MULTI") == 0) {
IE_THROW() << "MULTI device does not support remote context"; IE_THROW() << "MULTI device does not support remote context";
} }
if (deviceName.find("AUTO") == 0) {
IE_THROW() << "AUTO device does not support remote context";
}
auto parsed = parseDeviceNameIntoConfig(deviceName, params); auto parsed = parseDeviceNameIntoConfig(deviceName, params);
return _impl->GetCPPPluginByName(parsed._deviceName).CreateContext(parsed._config); return _impl->GetCPPPluginByName(parsed._deviceName).CreateContext(parsed._config);
@ -920,6 +937,9 @@ RemoteContext::Ptr Core::GetDefaultContext(const std::string& deviceName) {
if (deviceName.find("MULTI") == 0) { if (deviceName.find("MULTI") == 0) {
IE_THROW() << "MULTI device does not support remote context"; IE_THROW() << "MULTI device does not support remote context";
} }
if (deviceName.find("AUTO") == 0) {
IE_THROW() << "AUTO device does not support remote context";
}
auto parsed = parseDeviceNameIntoConfig(deviceName, ParamMap()); auto parsed = parseDeviceNameIntoConfig(deviceName, ParamMap());
return _impl->GetCPPPluginByName(parsed._deviceName).GetDefaultContext(parsed._config); return _impl->GetCPPPluginByName(parsed._deviceName).GetDefaultContext(parsed._config);
@ -934,6 +954,10 @@ void Core::AddExtension(IExtensionPtr extension, const std::string& deviceName_)
IE_THROW() IE_THROW()
<< "MULTI device does not support extensions. Please, set extensions directly to fallback devices"; << "MULTI device does not support extensions. Please, set extensions directly to fallback devices";
} }
if (deviceName_.find("AUTO") == 0) {
IE_THROW()
<< "AUTO device does not support extensions. Please, set extensions directly to fallback devices";
}
_impl->AddExtension(extension); _impl->AddExtension(extension);
} }
@ -953,6 +977,9 @@ ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName, const st
if (deviceName.find("MULTI") == 0) { if (deviceName.find("MULTI") == 0) {
IE_THROW() << "MULTI device does not support ImportNetwork"; IE_THROW() << "MULTI device does not support ImportNetwork";
} }
if (deviceName.find("AUTO") == 0) {
IE_THROW() << "AUTO device does not support ImportNetwork";
}
auto parsed = parseDeviceNameIntoConfig(deviceName, config); auto parsed = parseDeviceNameIntoConfig(deviceName, config);
return _impl->GetCPPPluginByName(parsed._deviceName).ImportNetwork(modelFileName, parsed._config); return _impl->GetCPPPluginByName(parsed._deviceName).ImportNetwork(modelFileName, parsed._config);
@ -998,6 +1025,12 @@ void Core::SetConfig(const std::map<std::string, std::string>& config, const std
"You can configure the devices with SetConfig before creating the MULTI on top."; "You can configure the devices with SetConfig before creating the MULTI on top.";
} }
// AUTO case
if (deviceName.find("AUTO:") == 0) {
IE_THROW() << "SetConfig is supported only for AUTO itself (without devices). "
"You can configure the devices with SetConfig before creating the AUTO on top.";
}
// GPU.0, FPGA.1 cases // GPU.0, FPGA.1 cases
if (deviceName.find(".") != std::string::npos) { if (deviceName.find(".") != std::string::npos) {
IE_THROW() << "SetConfig is supported only for device family itself (without particular device .#). " IE_THROW() << "SetConfig is supported only for device family itself (without particular device .#). "
@ -1029,6 +1062,14 @@ Parameter Core::GetConfig(const std::string& deviceName, const std::string& name
"GetConfig is also possible for the individual devices before creating the MULTI on top."; "GetConfig is also possible for the individual devices before creating the MULTI on top.";
} }
} }
// AUTO case
{
if (deviceName.find("AUTO:") == 0) {
IE_THROW()
<< "You can only GetConfig of the AUTO itself (without devices). "
"GetConfig is also possible for the individual devices before creating the AUTO on top.";
}
}
auto parsed = parseDeviceNameIntoConfig(deviceName); auto parsed = parseDeviceNameIntoConfig(deviceName);

View File

@ -88,6 +88,10 @@ public:
PLUGIN_CALL_STATEMENT(return ExecutableNetwork(actual->LoadNetwork(network, config, context), actual)); PLUGIN_CALL_STATEMENT(return ExecutableNetwork(actual->LoadNetwork(network, config, context), actual));
} }
ExecutableNetwork LoadNetwork(const std::string& modelPath, const std::map<std::string, std::string>& config) {
PLUGIN_CALL_STATEMENT(return actual->LoadNetwork(modelPath, config));
}
QueryNetworkResult QueryNetwork(const CNNNetwork& network, QueryNetworkResult QueryNetwork(const CNNNetwork& network,
const std::map<std::string, std::string>& config) const { const std::map<std::string, std::string>& config) const {
QueryNetworkResult res; QueryNetworkResult res;

View File

@ -41,7 +41,7 @@ inline float asfloat(uint32_t v) {
return f; return f;
} }
// Function to convert F32 into F16 // Function to convert F16 into F32
float f16tof32(ie_fp16 x) { float f16tof32(ie_fp16 x) {
// this is storage for output result // this is storage for output result
uint32_t u = static_cast<uint32_t>(x); uint32_t u = static_cast<uint32_t>(x);

View File

@ -40,7 +40,6 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE
${PUBLIC_HEADERS_DIR} ${PUBLIC_HEADERS_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}/src
${IE_MAIN_SOURCE_DIR}/src/inference_engine # For CNNNetworkNGraphImpl ${IE_MAIN_SOURCE_DIR}/src/inference_engine # For CNNNetworkNGraphImpl
$<TARGET_PROPERTY:inference_engine_snippets,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES> $<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES> $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_INCLUDE_DIRECTORIES> $<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_INCLUDE_DIRECTORIES>
@ -61,7 +60,7 @@ add_library(${TARGET_NAME} SHARED
ie_add_vs_version_file(NAME ${TARGET_NAME} ie_add_vs_version_file(NAME ${TARGET_NAME}
FILEDESCRIPTION "Inference Engine Legacy library") FILEDESCRIPTION "Inference Engine Legacy library")
target_link_libraries(${TARGET_NAME} PUBLIC inference_engine inference_engine_snippets target_link_libraries(${TARGET_NAME} PUBLIC inference_engine
PRIVATE pugixml openvino::itt PRIVATE pugixml openvino::itt
${NGRAPH_LIBRARIES} inference_engine_transformations) ${NGRAPH_LIBRARIES} inference_engine_transformations)

View File

@ -24,7 +24,7 @@ public:
LSTMSequenceIE(const Output <Node> &X, LSTMSequenceIE(const Output <Node> &X,
const Output <Node> &H_t, const Output <Node> &H_t,
const Output <Node> &C_t, const Output <Node> &C_t,
const Output <Node> &seq_lenghts, const Output <Node> &seq_lengths,
const Output <Node> &WR, const Output <Node> &WR,
const Output <Node> &B, const Output <Node> &B,
size_t hidden_size, size_t hidden_size,

View File

@ -39,7 +39,6 @@
#include "legacy/ngraph_ops/rnn_sequence_ie.hpp" #include "legacy/ngraph_ops/rnn_sequence_ie.hpp"
#include "legacy/ngraph_ops/lstm_sequence_ie.hpp" #include "legacy/ngraph_ops/lstm_sequence_ie.hpp"
#include "legacy/ngraph_ops/gru_sequence_ie.hpp" #include "legacy/ngraph_ops/gru_sequence_ie.hpp"
#include "snippets/op/subgraph.hpp"
#include "exec_graph_info.hpp" #include "exec_graph_info.hpp"
#include "caseless.hpp" #include "caseless.hpp"
@ -1979,15 +1978,6 @@ void convertFunctionToICNNNetwork(const std::shared_ptr<const ::ngraph::Function
cnnLayer->params[ExecGraphInfoSerialization::ORIGINAL_NAMES] = originalNames; cnnLayer->params[ExecGraphInfoSerialization::ORIGINAL_NAMES] = originalNames;
} }
if (auto subgraph = ::ngraph::as_type_ptr<ngraph::snippets::op::Subgraph>(layer)) {
std::string names = "";
for (const auto& op : subgraph->get_body()->get_ordered_ops()) {
names += ", " + op->get_friendly_name();
}
cnnLayer->params["originalLayersNames"] += names;
}
std::string primitivesPriority = ::ngraph::getPrimitivesPriority(layer); std::string primitivesPriority = ::ngraph::getPrimitivesPriority(layer);
if (!primitivesPriority.empty()) { if (!primitivesPriority.empty()) {
cnnLayer->params["PrimitivesPriority"] = primitivesPriority; cnnLayer->params["PrimitivesPriority"] = primitivesPriority;

View File

@ -16,7 +16,7 @@ NGRAPH_RTTI_DEFINITION(op::GRUSequenceIE, "GRUSequenceIE", 4);
op::GRUSequenceIE::GRUSequenceIE(const Output<Node>& X, op::GRUSequenceIE::GRUSequenceIE(const Output<Node>& X,
const Output<Node>& H_t, const Output<Node>& H_t,
const Output<Node>& seq_lenghts, const Output<Node>& seq_lengths,
const Output<Node>& WR, const Output<Node>& WR,
const Output<Node>& B, const Output<Node>& B,
std::size_t hidden_size, std::size_t hidden_size,
@ -27,7 +27,7 @@ op::GRUSequenceIE::GRUSequenceIE(const Output<Node>& X,
float clip, float clip,
bool linear_before_reset, bool linear_before_reset,
int64_t seq_axis) int64_t seq_axis)
: RNNCellBase({X, H_t, seq_lenghts, WR, B}, hidden_size, clip, activations, activations_alpha, activations_beta), : RNNCellBase({X, H_t, seq_lengths, WR, B}, hidden_size, clip, activations, activations_alpha, activations_beta),
m_direction(direction), m_direction(direction),
m_linear_before_reset(linear_before_reset), m_linear_before_reset(linear_before_reset),
m_seq_axis(seq_axis) { m_seq_axis(seq_axis) {
@ -50,7 +50,7 @@ void op::GRUSequenceIE::validate_and_infer_types() {
auto b_pshape = get_input_partial_shape(4); auto b_pshape = get_input_partial_shape(4);
std::vector<ngraph::PartialShape> pshapes = {x_pshape, h_state_pshape, seq_lengths_pshape, wr_pshape, b_pshape}; std::vector<ngraph::PartialShape> pshapes = {x_pshape, h_state_pshape, seq_lengths_pshape, wr_pshape, b_pshape};
std::vector<std::string> in_names = {"X", "H", "seq_lenghts", "WR", "B"}; std::vector<std::string> in_names = {"X", "H", "seq_lengths", "WR", "B"};
// num_direction dimension should be squeezed, we don't support bidirectional case // num_direction dimension should be squeezed, we don't support bidirectional case
std::vector<size_t> ranks = {3, 2, 1, 2, 1}; std::vector<size_t> ranks = {3, 2, 1, 2, 1};
for (size_t i = 0; i < pshapes.size(); ++i) { for (size_t i = 0; i < pshapes.size(); ++i) {

View File

@ -17,7 +17,7 @@ NGRAPH_RTTI_DEFINITION(op::LSTMSequenceIE, "LSTMSequenceIE", 5);
op::LSTMSequenceIE::LSTMSequenceIE(const Output<Node> &X, op::LSTMSequenceIE::LSTMSequenceIE(const Output<Node> &X,
const Output<Node> &H_t, const Output<Node> &H_t,
const Output<Node> &C_t, const Output<Node> &C_t,
const Output<Node> &seq_lenghts, const Output<Node> &seq_lengths,
const Output<Node> &WR, const Output<Node> &WR,
const Output<Node> &B, const Output<Node> &B,
std::size_t hidden_size, std::size_t hidden_size,
@ -27,7 +27,7 @@ op::LSTMSequenceIE::LSTMSequenceIE(const Output<Node> &X,
const std::vector<float> &activations_beta, const std::vector<float> &activations_beta,
float clip, float clip,
int64_t seq_axis) int64_t seq_axis)
: RNNCellBase({X, H_t, C_t, seq_lenghts, WR, B}, hidden_size, clip, activations, activations_alpha, activations_beta), : RNNCellBase({X, H_t, C_t, seq_lengths, WR, B}, hidden_size, clip, activations, activations_alpha, activations_beta),
m_direction(direction), m_direction(direction),
m_seq_axis(seq_axis) { m_seq_axis(seq_axis) {
constructor_validate_and_infer_types(); constructor_validate_and_infer_types();
@ -52,7 +52,7 @@ void op::LSTMSequenceIE::validate_and_infer_types() {
std::vector<ngraph::PartialShape> pshapes = {x_pshape, h_state_pshape, c_state_pshape, std::vector<ngraph::PartialShape> pshapes = {x_pshape, h_state_pshape, c_state_pshape,
seq_lengths_pshape, wr_pshape, b_pshape}; seq_lengths_pshape, wr_pshape, b_pshape};
std::vector<std::string> in_names = {"X", "H", "C", "seq_lenghts", "WR", "B"}; std::vector<std::string> in_names = {"X", "H", "C", "seq_lengths", "WR", "B"};
// num_direction dimension should be squeezed, we don't support bidirectional case // num_direction dimension should be squeezed, we don't support bidirectional case
std::vector<size_t> ranks = {3, 2, 2, 1, 2, 1}; std::vector<size_t> ranks = {3, 2, 2, 1, 2, 1};
for (size_t i = 0; i < pshapes.size(); ++i) { for (size_t i = 0; i < pshapes.size(); ++i) {

View File

@ -48,7 +48,7 @@ void op::RNNSequenceIE::validate_and_infer_types() {
auto b_pshape = get_input_partial_shape(4); auto b_pshape = get_input_partial_shape(4);
std::vector<ngraph::PartialShape> pshapes = {x_pshape, h_state_pshape, seq_lengths_pshape, wr_pshape, b_pshape}; std::vector<ngraph::PartialShape> pshapes = {x_pshape, h_state_pshape, seq_lengths_pshape, wr_pshape, b_pshape};
std::vector<std::string> in_names = {"X", "H", "seq_lenghts", "WR", "B"}; std::vector<std::string> in_names = {"X", "H", "seq_lengths", "WR", "B"};
// num_direction dimension should be squeezed, we don't support bidirectional case // num_direction dimension should be squeezed, we don't support bidirectional case
std::vector<size_t> ranks = {3, 2, 1, 2, 1}; std::vector<size_t> ranks = {3, 2, 1, 2, 1};
for (size_t i = 0; i < pshapes.size(); ++i) { for (size_t i = 0; i < pshapes.size(); ++i) {

View File

@ -140,6 +140,12 @@ ngraph::matcher_pass_callback get_callback() {
} }
const ngraph::Shape constShape = constant->get_output_shape(0); const ngraph::Shape constShape = constant->get_output_shape(0);
const ngraph::Shape shape = partialShape.to_shape();
if (constShape.size() == 1ul && constShape[0] != 1 && constShape[0] != shape[1]) {
return false;
}
if ((constShape.size() > 5ul)) { if ((constShape.size() > 5ul)) {
return false; return false;
} }
@ -148,7 +154,6 @@ ngraph::matcher_pass_callback get_callback() {
return true; return true;
} }
const ngraph::Shape shape = partialShape.to_shape();
if (constShape.size() == shape.size()) { if (constShape.size() == shape.size()) {
if ((constShape[0] != 1ul) || (constShape[1] != shape[1])) { if ((constShape[0] != 1ul) || (constShape[1] != shape[1])) {
return false; return false;

View File

@ -0,0 +1,27 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <utility>
#include <transformations_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
namespace ngraph {
namespace pass {
namespace low_precision {
class TRANSFORMATIONS_API ConvertSubtractConstant;
} // namespace low_precision
} // namespace pass
} // namespace ngraph
class ngraph::pass::low_precision::ConvertSubtractConstant : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
ConvertSubtractConstant(const std::vector<ngraph::element::Type>& constantPrecisions = {});
};

View File

@ -0,0 +1,25 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/ngraph.hpp>
#include "weightable_layer_transformation.hpp"
namespace ngraph {
namespace pass {
namespace low_precision {
class TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation {
public:
ConvolutionBackpropDataTransformation(const Params& params);
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
};
} // namespace low_precision
} // namespace pass
} // namespace ngraph

View File

@ -45,6 +45,13 @@ class TRANSFORMATIONS_API DataPrecision {
public: public:
DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {} DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}
explicit DataPrecision(const element::Type& precision) {
this->precision = precision;
min = getMinValue(precision, 256);
max = getMaxValue(precision, 256);
hasZeroPoint = false;
}
DataPrecision(const element::Type precision, const float min, const float max, const bool hasZeroPoint) : DataPrecision(const element::Type precision, const float min, const float max, const bool hasZeroPoint) :
precision(precision), precision(precision),
min(min), min(min),
@ -70,6 +77,10 @@ public:
return -1.0e15f; return -1.0e15f;
} else if (precision == element::f32) { } else if (precision == element::f32) {
return std::numeric_limits<float>::lowest(); return std::numeric_limits<float>::lowest();
} else if (precision == element::i4) {
return -8.f;
} else if (precision == element::u4) {
return 0.f;
} else { } else {
NGRAPH_CHECK(false, "unexpected precision ", precision); NGRAPH_CHECK(false, "unexpected precision ", precision);
} }
@ -88,6 +99,10 @@ public:
return 1.0e15f; return 1.0e15f;
} else if (precision == element::f32) { } else if (precision == element::f32) {
return std::numeric_limits<float>::max(); return std::numeric_limits<float>::max();
} else if (precision == element::i4) {
return 7.f;
} else if (precision == element::u4) {
return 15.f;
} else { } else {
THROW_TRANSFORMATION_EXCEPTION << "unexpected precision " << precision; THROW_TRANSFORMATION_EXCEPTION << "unexpected precision " << precision;
} }
@ -114,29 +129,6 @@ public:
static element::Type getPrecision(const size_t /* quantizationLevels */, const bool signedInterval) { static element::Type getPrecision(const size_t /* quantizationLevels */, const bool signedInterval) {
return signedInterval ? element::i8 : element::u8; return signedInterval ? element::i8 : element::u8;
} }
static float getMin(const size_t quantizationLevels, const bool signedInterval) {
if (quantizationLevels == 255) {
return signedInterval ? -127.0f : 0.0f;
} else if (quantizationLevels == 256) {
return signedInterval ? -128.0f : 0.0f;
} else {
// THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
// FIXME: not completed
return signedInterval ? -128.0f : 0.0f;
}
}
static float getMax(const size_t quantizationLevels, const bool signedInterval) {
if ((quantizationLevels == 255) || (quantizationLevels == 256)) {
return signedInterval ? 127.0f : 255.0f;
} else {
// THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
// FIXME: not completed
// return quantizationLevels - 1.0;
return signedInterval ? 127.0f : 255.0f;
}
}
}; };
inline bool operator==(const DataPrecision& value1, const DataPrecision& value2) { inline bool operator==(const DataPrecision& value1, const DataPrecision& value2) {
@ -173,7 +165,8 @@ public:
std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 }, std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 },
std::vector<element::Type> precisionsOnWeights = { element::i8 }, std::vector<element::Type> precisionsOnWeights = { element::i8 },
element::Type deqPrecision = element::f32, element::Type deqPrecision = element::f32,
bool support3DTensorOnActivations = true) : bool support3DTensorOnActivations = true,
bool deconvolutionSpecificChannelsRatio = false) :
updatePrecisions(updatePrecisions), updatePrecisions(updatePrecisions),
quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations), quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights), quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
@ -181,7 +174,8 @@ public:
precisionsOnActivations(precisionsOnActivations), precisionsOnActivations(precisionsOnActivations),
precisionsOnWeights(precisionsOnWeights), precisionsOnWeights(precisionsOnWeights),
deqPrecision(deqPrecision), deqPrecision(deqPrecision),
support3DTensorOnActivations(support3DTensorOnActivations) { support3DTensorOnActivations(support3DTensorOnActivations),
deconvolutionSpecificChannelsRatio(deconvolutionSpecificChannelsRatio) {
if (precisionsOnActivations.size() == 0ul) { if (precisionsOnActivations.size() == 0ul) {
THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed"; THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed";
} }
@ -226,6 +220,11 @@ public:
return *this; return *this;
} }
Params& setDeconvolutionSpecificChannelsRatio(const bool deconvolutionSpecificChannelsRatio) {
this->deconvolutionSpecificChannelsRatio = deconvolutionSpecificChannelsRatio;
return *this;
}
bool updatePrecisions; bool updatePrecisions;
QuantizedTensorAlignment quantizedTensorAlignmentOnActivations; QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
QuantizedTensorAlignment quantizedTensorAlignmentOnWeights; QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
@ -234,6 +233,7 @@ public:
std::vector<element::Type> precisionsOnWeights; std::vector<element::Type> precisionsOnWeights;
element::Type deqPrecision; element::Type deqPrecision;
bool support3DTensorOnActivations; bool support3DTensorOnActivations;
bool deconvolutionSpecificChannelsRatio;
}; };
class PrecisionDetails { class PrecisionDetails {
@ -310,6 +310,7 @@ protected:
std::vector<element::Type> precisionsOnWeights; std::vector<element::Type> precisionsOnWeights;
element::Type deqPrecision; element::Type deqPrecision;
bool support3DTensorOnActivations; bool support3DTensorOnActivations;
bool deconvolutionSpecificChannelsRatio;
// absolute value, used to determine quantization interval asymmetry // absolute value, used to determine quantization interval asymmetry
float quantizationIntervalAsymmetryThreshold; float quantizationIntervalAsymmetryThreshold;

View File

@ -109,7 +109,8 @@ public:
const float max, const float max,
const bool hasZeroPoint, const bool hasZeroPoint,
const bool updatePrecision, const bool updatePrecision,
const element::Type deqPrecision = element::f32); const element::Type deqPrecision = element::f32,
const size_t outChannelsShapeIndex = 0);
static std::shared_ptr<opset1::FakeQuantize> updateFakeQuantize( static std::shared_ptr<opset1::FakeQuantize> updateFakeQuantize(
std::shared_ptr<opset1::FakeQuantize> fq, std::shared_ptr<opset1::FakeQuantize> fq,
@ -183,7 +184,7 @@ public:
static std::shared_ptr<Node> toScalarIfPossible(std::shared_ptr<Node> node); static std::shared_ptr<Node> toScalarIfPossible(std::shared_ptr<Node> node);
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq); static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq);
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues); static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues, int outChannelsShapeIndex = 0);
static FakeQuantizeDequantization foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace = false); static FakeQuantizeDequantization foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace = false);
@ -191,8 +192,16 @@ public:
static std::shared_ptr<opset1::FakeQuantize> fuseConvert(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize); static std::shared_ptr<opset1::FakeQuantize> fuseConvert(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize);
static std::vector<element::Type> precisionIntersection(
const std::vector<element::Type>& v1,
const std::vector<element::Type>& v2) noexcept;
private: private:
static std::shared_ptr<Node> foldFakeQuantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues, const bool roundValuesWasSet); static std::shared_ptr<Node> foldFakeQuantize(
const std::shared_ptr<opset1::FakeQuantize>& fq,
const bool roundValues,
const bool roundValuesWasSet,
int outChannelsShapeIndex = 0);
// 1 - on weights // 1 - on weights
// 0 - weightable layer was not found // 0 - weightable layer was not found
@ -255,6 +264,8 @@ std::shared_ptr<Node> fold(Args&&... args) {
return node; return node;
} }
std::shared_ptr<Node> foldConvert(const Output<Node>& node, const element::Type targetPrecision);
template <typename T, typename... Args> template <typename T, typename... Args>
std::shared_ptr<Node> fold_reshape(Args&&... args) { std::shared_ptr<Node> fold_reshape(Args&&... args) {
std::shared_ptr<Node> node = std::make_shared<T>(std::forward<Args>(args)...); std::shared_ptr<Node> node = std::make_shared<T>(std::forward<Args>(args)...);

View File

@ -303,10 +303,6 @@ private:
std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>> transformations, std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>> transformations,
GraphRewrite& pass, GraphRewrite& pass,
TransformationContext& context); TransformationContext& context);
std::vector<element::Type> precisionIntersection(
const std::vector<element::Type>& v1,
const std::vector<element::Type>& v2) const noexcept;
}; };
class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite { class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite {

View File

@ -22,7 +22,7 @@ public:
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override; bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
protected: protected:
void decomposeFakeQuantizeForWeightsPath(std::shared_ptr<Node> weightableLayer) const; void decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& weightableLayer, size_t outChannelsShapeIndex = 0ul) const;
static bool isGroup(const std::shared_ptr<Node>& node); static bool isGroup(const std::shared_ptr<Node>& node);
static bool isDepthwise(const std::shared_ptr<Node>& node); static bool isDepthwise(const std::shared_ptr<Node>& node);

View File

@ -42,6 +42,7 @@ std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>&
const auto parent = add->get_input_node_shared_ptr(dataBranchIndex); const auto parent = add->get_input_node_shared_ptr(dataBranchIndex);
if (is_type<opset1::Convolution>(parent) || if (is_type<opset1::Convolution>(parent) ||
is_type<opset1::GroupConvolution>(parent) || is_type<opset1::GroupConvolution>(parent) ||
is_type<opset1::ConvolutionBackpropData>(parent) ||
(is_type<opset1::MatMul>(parent) && (is_type<opset1::MatMul>(parent) &&
(is_type<opset1::Constant>(parent->get_input_node_ptr(0)) || is_type<opset1::Constant>(parent->get_input_node_ptr(1))))) { (is_type<opset1::Constant>(parent->get_input_node_ptr(0)) || is_type<opset1::Constant>(parent->get_input_node_ptr(1))))) {
return nullptr; return nullptr;

View File

@ -50,14 +50,14 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
return false; return false;
} }
DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false); std::vector<element::Type> concatParentsChildrensPrecisions = precisionsOnActivations;
if (dataPrecision.precision == ngraph::element::undefined) { fillAvailablePrecisions(subgraph.quantizationLayers[0], concatParentsChildrensPrecisions);
if (concatParentsChildrensPrecisions.empty()) {
return false; return false;
} }
std::unordered_map<std::string, ngraph::pass::low_precision::FakeQuantizeDequantization> dequantizations;
for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
const std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]); fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
if (fq == nullptr) { if (fq == nullptr) {
return false; return false;
} }
@ -72,21 +72,20 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
if (quantizationDetails.inputHighValues.size() != 1ul) { if (quantizationDetails.inputHighValues.size() != 1ul) {
return false; return false;
} }
std::vector<element::Type> fqChildrensPrecisions = precisionsOnActivations;
fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrensPrecisions);
concatParentsChildrensPrecisions = NetworkHelper::precisionIntersection(concatParentsChildrensPrecisions, fqChildrensPrecisions);
const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false); if (concatParentsChildrensPrecisions.empty()) {
if (dataPrecision2.precision == ngraph::element::undefined) {
return false; return false;
} }
if (dataPrecision.precision != dataPrecision2.precision) {
// quantization levels are the same, difference can be in sign
// wider interval (precision) is preferable: use signed if least one interval is signed
dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2;
}
} }
if (dataPrecision.precision == ngraph::element::undefined) { DataPrecision dataPrecision;
return false; if (std::find(concatParentsChildrensPrecisions.begin(), concatParentsChildrensPrecisions.end(), element::i8) != concatParentsChildrensPrecisions.end()) {
dataPrecision = DataPrecision(element::i8);
} else {
dataPrecision = DataPrecision(concatParentsChildrensPrecisions[0]);
} }
std::vector<QuantizationDetails> quantizationLayersDetails; std::vector<QuantizationDetails> quantizationLayersDetails;

View File

@ -27,7 +27,9 @@ bool ConcatMultiChannelsTransformation::isMultiChannel(const std::vector<std::sh
for (const std::shared_ptr<ngraph::opset1::Concat>& concat : concatLayers) { for (const std::shared_ptr<ngraph::opset1::Concat>& concat : concatLayers) {
const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(concat); const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(concat);
for (const std::shared_ptr<ngraph::Node>& child : children) { for (const std::shared_ptr<ngraph::Node>& child : children) {
if (is_type<ngraph::opset1::Convolution>(child.get())) { if ((is_type<ngraph::opset1::Convolution>(child.get()) ||
is_type<ngraph::opset1::ConvolutionBackpropData>(child.get())) &&
this->layerTransformationsManager->isQuantized(child)) {
return false; return false;
} }
} }

View File

@ -0,0 +1,98 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "low_precision/convert_subtract_constant.hpp"
#include <memory>
#include <vector>
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include "low_precision/network_helper.hpp"
using namespace ngraph;
NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ConvertSubtractConstant, "ConvertSubtractConstant", 0);
// Original (FP16 as example, I8 in constantPrecisions):
//
// Constant
// | I8
// Convert Constant
// \ FP16 / FP16
// Subtract Constant
// \ FP16 / FP16
// Multiply
//
// Result:
//
// Constant Constant
// | I8 | I8
// Convert Convert
// \ FP16 / FP16
// Subtract Constant
// \ FP16 / FP16
// Multiply
//
ngraph::pass::low_precision::ConvertSubtractConstant::ConvertSubtractConstant(const std::vector<ngraph::element::Type>& constantPrecisions) {
auto weightsConstantWrapper = ngraph::pattern::wrap_type<opset1::Constant>(pattern::consumers_count(1));
auto weightsConvertWrapper = ngraph::pattern::wrap_type<opset1::Convert>({ weightsConstantWrapper }, pattern::consumers_count(1));
auto subtractConstantWrapper = ngraph::pattern::wrap_type<opset1::Constant>(pattern::consumers_count(1));
auto subtractWrapper = ngraph::pattern::wrap_type<opset1::Subtract>({ weightsConvertWrapper, subtractConstantWrapper }, pattern::consumers_count(1));
auto multiplyConstantWrapper = ngraph::pattern::wrap_type<opset1::Constant>(pattern::consumers_count(1));
auto multiplyWrapper = ngraph::pattern::wrap_type<opset1::Multiply>({ subtractWrapper, multiplyConstantWrapper }, pattern::consumers_count(1));
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher & m) -> bool {
const auto& opsMap = m.get_pattern_value_map();
const auto weightsConvert = opsMap.at(weightsConvertWrapper).get_node_shared_ptr();
const auto quantizePrecision = weightsConvert->get_input_element_type(0);
const auto dequantizationPrecision = weightsConvert->get_output_element_type(0);
// validation by Convert operation input precisions
if (!constantPrecisions.empty()) {
const ngraph::element::Type inputPrecision = quantizePrecision;
if (std::find(constantPrecisions.begin(), constantPrecisions.end(), inputPrecision) == constantPrecisions.end()) {
return false;
}
}
const auto subtract = opsMap.at(subtractWrapper).get_node_shared_ptr();
if (!NetworkHelper::checkZeroPoint(subtract)) {
return false;
}
const auto subtractConstant = opsMap.at(subtractConstantWrapper).get_node_shared_ptr();
auto resultSubtractConstant = NetworkHelper::round(subtractConstant, quantizePrecision);
if (NetworkHelper::isScalarLike(resultSubtractConstant)) {
resultSubtractConstant = NetworkHelper::toScalar(resultSubtractConstant);
if (op::util::constantIsEqualTo(resultSubtractConstant, 0.f)) {
resultSubtractConstant = nullptr;
}
}
if (resultSubtractConstant == nullptr) {
const auto multiply = opsMap.at(multiplyWrapper).get_node_shared_ptr();
const auto newMultiply = std::make_shared<opset1::Multiply>(weightsConvert, opsMap.at(multiplyConstantWrapper).get_node_shared_ptr());
NetworkHelper::copyInfo(multiply, newMultiply);
replace_node(multiply, newMultiply);
} else {
NetworkHelper::copyInfo(subtractConstant, resultSubtractConstant);
const auto resultConvert = std::make_shared<opset1::Convert>(resultSubtractConstant, dequantizationPrecision);
NetworkHelper::copyInfo(subtractConstant, resultConvert);
resultConvert->set_friendly_name(subtractConstant->get_friendly_name() + "/Convert");
auto& rtInfo = resultConvert->get_rt_info();
rtInfo["DISABLED_CONSTANT_FOLDING"] = std::make_shared<VariantWrapper<std::string>>("");
const auto newSubtract = std::make_shared<opset1::Subtract>(opsMap.at(weightsConvertWrapper).get_node_shared_ptr(), resultConvert);
NetworkHelper::copyInfo(subtract, newSubtract);
replace_node(subtract, newSubtract);
}
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(multiplyWrapper, "ConvertSubtractConstant");
this->register_matcher(m, callback);
}

View File

@ -42,7 +42,27 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
auto convolution = m.get_match_root(); auto convolution = m.get_match_root();
if (!canConvolutionBeTransformed(context, convolution)) { if (!canConvolutionBeTransformed(context, convolution)) {
return false; auto weightInput = convolution->get_input_node_shared_ptr(1);
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightInput);
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
NetworkHelper::getDequantization(convolution, 1ul) :
NetworkHelper::getDequantization(reshapeFromWeights);
if (dequantization.empty()) {
const auto fqOnWeights = getFakeQuantizeOnWeights(convolution);
std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
if (reshapeFromWeights != nullptr) {
resultConstant = fold_reshape<opset1::Reshape>(
resultConstant,
reshapeFromWeights->input_value(1),
false);
}
if (as_type_ptr<opset1::Constant>(resultConstant)) {
replace_node(weightInput, resultConstant);
}
} else {
NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
}
return true;
} }
convolution = NetworkHelper::separateInStandaloneBranch(convolution); convolution = NetworkHelper::separateInStandaloneBranch(convolution);
@ -79,6 +99,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
const auto newSubtract = as_type_ptr<opset1::Subtract>(subtract->clone_with_new_inputs({ const auto newSubtract = as_type_ptr<opset1::Subtract>(subtract->clone_with_new_inputs({
subtract->input_value(0).get_node_shared_ptr(), subtract->input_value(0).get_node_shared_ptr(),
newShift })); newShift }));
NetworkHelper::copyInfo(subtract, newSubtract);
replace_node(subtract, newSubtract); replace_node(subtract, newSubtract);
newSubtract->set_output_type(0, subtract->get_output_element_type(0), newSubtract->get_output_partial_shape(0)); newSubtract->set_output_type(0, subtract->get_output_element_type(0), newSubtract->get_output_partial_shape(0));
@ -203,7 +224,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
reshapeFromWeights : reshapeFromWeights :
multiplyFromWeights->input_value(0) multiplyFromWeights->input_value(0)
}), }),
fold<opset1::Convert>( foldConvert(
fold_reshape<opset1::Reshape>( fold_reshape<opset1::Reshape>(
multiplyFromWeights->input_value(1), multiplyFromWeights->input_value(1),
std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape), std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
@ -230,6 +251,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
auto zeroPointConstant = fold<opset1::Broadcast>( auto zeroPointConstant = fold<opset1::Broadcast>(
subtractFromWeights->get_input_node_shared_ptr(1), subtractFromWeights->get_input_node_shared_ptr(1),
std::make_shared<opset1::Constant>(element::i32, Shape{ zeroPointShape.size() }, zeroPointShape)); std::make_shared<opset1::Constant>(element::i32, Shape{ zeroPointShape.size() }, zeroPointShape));
NetworkHelper::copyInfo(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant); replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
} }
} }

View File

@ -0,0 +1,218 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "low_precision/convolution_backprop_data.hpp"
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include <cassert>
#include "low_precision/network_helper.hpp"
#include "low_precision/common/dequantization_op.hpp"
namespace ngraph {
namespace pass {
namespace low_precision {
ConvolutionBackpropDataTransformation::ConvolutionBackpropDataTransformation(const Params& params) : WeightableLayerTransformation(params) {
}
void ConvolutionBackpropDataTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
addPattern(
pass,
context,
make_op_pattern<opset1::ConvolutionBackpropData>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>() }));
addPattern(
pass,
context,
make_op_pattern<opset1::ConvolutionBackpropData>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>() }));
addPattern(
pass,
context,
make_op_pattern<opset1::ConvolutionBackpropData>(
{ make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>(), make_op_label<opset1::Constant>() }));
addPattern(
pass,
context,
make_op_pattern<opset1::ConvolutionBackpropData>(
{ make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>(), make_op_label<opset1::Constant>() }));
}
bool ConvolutionBackpropDataTransformation::isQuantized(std::shared_ptr<Node> layer) const noexcept {
if (deconvolutionSpecificChannelsRatio) {
size_t inputChannels = layer->get_input_shape(0)[1];
size_t outputChannels = layer->get_output_shape(0)[1];
if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
return false;
}
}
return WeightableLayerTransformation::isQuantized(layer, false);
}
bool ConvolutionBackpropDataTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const {
auto convolutionBackpropData = m.get_match_root();
if (!canBeTransformed(context, convolutionBackpropData)) {
auto weightsInput = convolutionBackpropData->get_input_node_shared_ptr(1);
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightsInput);
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
NetworkHelper::getDequantization(convolutionBackpropData, 1ul) :
NetworkHelper::getDequantization(reshapeFromWeights);
if (dequantization.empty()) {
const auto fqOnWeights = getFakeQuantizeOnWeights(convolutionBackpropData);
std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
if (reshapeFromWeights != nullptr) {
resultConstant = fold_reshape<opset1::Reshape>(
resultConstant,
reshapeFromWeights->input_value(1),
false);
}
if (as_type_ptr<opset1::Constant>(resultConstant)) {
replace_node(weightsInput, resultConstant);
}
} else {
NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
}
return true;
}
convolutionBackpropData = NetworkHelper::separateInStandaloneBranch(convolutionBackpropData);
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
{
if (dequantization.subtract != nullptr) {
std::shared_ptr<ngraph::Node> layer = dequantization.subtract;
ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer);
NetworkHelper::optimizeSubtract(dequantization.subtract);
}
std::shared_ptr<opset1::Constant> reducedConstant = as_type_ptr<opset1::Constant>(dequantization.multiplyConstant);
std::shared_ptr<Node> newMultiplyAfterConst = std::make_shared<opset1::Constant>(
reducedConstant->get_output_element_type(0),
Shape{ 1 },
reducedConstant->cast_vector<float>()[0]);
auto inputs = convolutionBackpropData->input_values();
inputs[0] = dequantization.multiply->input_value(0);
const auto copyNode = convolutionBackpropData->copy_with_new_inputs(inputs);
const auto relaxedConvolutionBackpropData = std::make_shared<op::TypeRelaxed<opset1::ConvolutionBackpropData>>(
*as_type_ptr<opset1::ConvolutionBackpropData>(copyNode),
std::vector<element::Type>{deqPrecision, deqPrecision},
std::vector<element::Type>{deqPrecision});
const auto newMultiplyAfter = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
std::vector<element::Type>{ deqPrecision, deqPrecision },
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
ngraph::op::TemporaryReplaceOutputType(relaxedConvolutionBackpropData, deqPrecision).get(),
ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
replace_node(convolutionBackpropData, newMultiplyAfter);
convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
if (is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
auto newConvolution = convolutionBackpropData->copy_with_new_inputs(inputs);
replace_node(convolutionBackpropData, newConvolution);
convolutionBackpropData = newConvolution;
}
}
{
decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul);
dequantization = NetworkHelper::getDequantization(convolutionBackpropData, 1ul);
if (is_type<opset1::FakeQuantize>(dequantization.data.get_node())) {
const std::shared_ptr<opset1::FakeQuantize> fq = as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr());
std::shared_ptr<ngraph::Node> newFQ = NetworkHelper::fold_fake_quantize(fq, true);
NetworkHelper::copyInfo(fq, newFQ);
replace_node(fq, newFQ);
}
std::shared_ptr<opset1::Multiply> multiplyFromWeights = as_type_ptr<opset1::Multiply>(
convolutionBackpropData->input_value(1).get_node_shared_ptr());
std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
{
Shape newScaleShape = multiplyFromWeights->get_input_shape(1);
auto inputs = convolutionBackpropData->input_values();
inputs[1] = multiplyFromWeights->input_value(0);
auto newMultiplyAfter = std::make_shared<DequantizationMultiply>(
convolutionBackpropData->copy_with_new_inputs(inputs),
foldConvert(
fold_reshape<opset1::Reshape>(
multiplyFromWeights->input_value(1),
std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
false),
convolutionBackpropData->get_output_element_type(0)));
replace_node(convolutionBackpropData, newMultiplyAfter);
convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
}
if (subtractFromWeights != nullptr) {
// optimize zero point on weights
auto optimizedSubtract = NetworkHelper::optimizeSubtract(subtractFromWeights);
if (optimizedSubtract == nullptr) {
subtractFromWeights = nullptr;
} else {
subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract);
const Shape weightsShape = subtractFromWeights->input(0).get_shape();
Shape zeroPointShape(weightsShape.size(), 1ul);
zeroPointShape[1] = weightsShape[1];
auto zeroPointConstant = fold<opset1::Broadcast>(
subtractFromWeights->get_input_node_shared_ptr(1),
std::make_shared<opset1::Constant>(element::i32, Shape{zeroPointShape.size()}, zeroPointShape));
replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
}
}
std::shared_ptr<opset1::Convert> convertFromWeights =
as_type_ptr<opset1::Convert>(
subtractFromWeights == nullptr ?
multiplyFromWeights->get_input_node_shared_ptr(0) :
subtractFromWeights->get_input_node_shared_ptr(0));
if (convertFromWeights != nullptr) {
auto inputs = convolutionBackpropData->input_values();
inputs[1] = convolutionBackpropData->get_input_node_ptr(1)->input_value(0);
// remove Convert on weights
auto newConvolution = convolutionBackpropData->clone_with_new_inputs(inputs);
replace_node(convolutionBackpropData, newConvolution);
convolutionBackpropData = newConvolution;
}
}
std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter(
convolutionBackpropData->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
ngraph::copy_runtime_info({ convolutionBackpropData, finalDequantization }, finalDequantization);
updateOutput(context, finalDequantization, convolutionBackpropData);
auto onWeights = convolutionBackpropData->get_input_node_shared_ptr(1);
if (is_type<opset1::Reshape>(onWeights)) {
onWeights = onWeights->get_input_node_shared_ptr(0);
}
if (is_type<opset1::Subtract>(onWeights)) {
auto& rt = onWeights->get_rt_info();
rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared<ngraph::VariantWrapper<std::string>>("");
}
return true;
}
bool ConvolutionBackpropDataTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const {
if (deconvolutionSpecificChannelsRatio) {
size_t inputChannels = op->get_input_shape(0)[1];
size_t outputChannels = op->get_output_shape(0)[1];
if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
return false;
}
}
return canConvolutionBeTransformed(context, op);
}
} // namespace low_precision
} // namespace pass
} // namespace ngraph

View File

@ -20,7 +20,7 @@ void FakeQuantizeTransformation::registerMatcherIn(GraphRewrite& pass, Transform
bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
std::shared_ptr<opset1::FakeQuantize> layer = std::dynamic_pointer_cast<opset1::FakeQuantize>(m.get_match_root()); std::shared_ptr<opset1::FakeQuantize> layer = std::dynamic_pointer_cast<opset1::FakeQuantize>(m.get_match_root());
if (!NetworkHelper::isQuantizeSupported(layer)) { if (!QuantizationDetails::outputLayoutIsSupported(layer)) {
return false; return false;
} }
@ -114,15 +114,14 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize) const { const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize) const {
const std::shared_ptr<Node> eltwise = fakeQuantize->get_input_node_shared_ptr(0); const std::shared_ptr<Node> eltwise = fakeQuantize->get_input_node_shared_ptr(0);
std::shared_ptr<Node> inputLowConst_f32 = fold<opset1::Convert>(fakeQuantize->get_input_node_shared_ptr(1), deqPrecision); std::shared_ptr<Node> inputLowConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(1), deqPrecision);
std::shared_ptr<Node> inputHighConst_f32 = fold<opset1::Convert>(fakeQuantize->get_input_node_shared_ptr(2), deqPrecision); std::shared_ptr<Node> inputHighConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(2), deqPrecision);
std::shared_ptr<opset1::Constant> constant = fq::getConstant(eltwise); std::shared_ptr<opset1::Constant> constant = fq::getConstant(eltwise);
if (is_type<opset1::Multiply>(eltwise) && checkElementwise(eltwise)) { if (is_type<opset1::Multiply>(eltwise) && checkElementwise(eltwise)) {
const auto value = constant->get_output_element_type(0) == deqPrecision ? const auto value = constant->get_output_element_type(0) == deqPrecision ?
constant : constant :
fold<opset1::Convert>(constant, deqPrecision); foldConvert(constant, deqPrecision);
const auto valueVec = as_type_ptr<opset1::Constant>(value)->cast_vector<float>(); const auto valueVec = as_type_ptr<opset1::Constant>(value)->cast_vector<float>();
@ -144,19 +143,21 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
} else if (is_type<opset1::Subtract>(eltwise) && checkElementwise(eltwise)) { } else if (is_type<opset1::Subtract>(eltwise) && checkElementwise(eltwise)) {
const auto value = constant->get_output_element_type(0) == deqPrecision ? const auto value = constant->get_output_element_type(0) == deqPrecision ?
constant : constant :
fold<opset1::Convert>(constant, deqPrecision); foldConvert(constant, deqPrecision);
inputLowConst_f32 = fq::updateShape(fold<opset1::Add>(inputLowConst_f32, value), fakeQuantize->get_output_shape(0)); inputLowConst_f32 = fq::updateShape(fold<opset1::Add>(inputLowConst_f32, value), fakeQuantize->get_output_shape(0));
inputHighConst_f32 = fq::updateShape(fold<opset1::Add>(inputHighConst_f32, value), fakeQuantize->get_output_shape(0)); inputHighConst_f32 = fq::updateShape(fold<opset1::Add>(inputHighConst_f32, value), fakeQuantize->get_output_shape(0));
} else if (is_type<opset1::Add>(eltwise) && checkElementwise(eltwise)) { } else if (is_type<opset1::Add>(eltwise) && checkElementwise(eltwise)) {
if (is_type<opset1::Convolution>(fq::getData(eltwise)) || if (is_type<opset1::Convolution>(fq::getData(eltwise)) ||
is_type<opset1::GroupConvolution>(fq::getData(eltwise))) { is_type<opset1::GroupConvolution>(fq::getData(eltwise)) ||
is_type<opset1::ConvolutionBackpropData>(fq::getData(eltwise)) ||
is_type<opset1::GroupConvolutionBackpropData>(fq::getData(eltwise))) {
return nullptr; return nullptr;
} }
const auto value = constant->get_output_element_type(0) == deqPrecision ? const auto value = constant->get_output_element_type(0) == deqPrecision ?
constant : constant :
fold<opset1::Convert>(constant, deqPrecision); foldConvert(constant, deqPrecision);
inputLowConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputLowConst_f32, value), fakeQuantize->get_output_shape(0)); inputLowConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputLowConst_f32, value), fakeQuantize->get_output_shape(0));
inputHighConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputHighConst_f32, value), fakeQuantize->get_output_shape(0)); inputHighConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputHighConst_f32, value), fakeQuantize->get_output_shape(0));
@ -176,8 +177,8 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
data->output(outputIdx), data->output(outputIdx),
inputLowConst_f32, inputLowConst_f32,
inputHighConst_f32, inputHighConst_f32,
fold<opset1::Convert>(fakeQuantize->input_value(3), deqPrecision), foldConvert(fakeQuantize->input_value(3), deqPrecision),
fold<opset1::Convert>(fakeQuantize->input_value(4), deqPrecision) })); foldConvert(fakeQuantize->input_value(4), deqPrecision) }));
replace_node(fakeQuantize, newFakeQuantize); replace_node(fakeQuantize, newFakeQuantize);
ngraph::copy_runtime_info({ fakeQuantize, eltwise }, newFakeQuantize); ngraph::copy_runtime_info({ fakeQuantize, eltwise }, newFakeQuantize);

View File

@ -22,18 +22,31 @@ bool FoldConvertTransformation::transform(TransformationContext& context, ngraph
return false; return false;
} }
const auto convert = subtract->get_input_node_shared_ptr(1); auto foldConvert = [&](const size_t branch) {
const auto resultConstant = fold<opset1::Convert>(convert->get_input_node_shared_ptr(0), convert->output(0).get_element_type()); const auto convert = subtract->get_input_node_shared_ptr(branch);
if (!is_type<opset1::Convert>(convert) || !is_type<opset1::Constant>(convert->get_input_node_shared_ptr(0))) {
return;
}
const auto resultConstant = ngraph::pass::low_precision::foldConvert(convert->get_input_node_shared_ptr(0), convert->output(0).get_element_type());
assert(is_type<opset1::Constant>(resultConstant));
replace_node(convert, resultConstant); replace_node(convert, resultConstant);
updateOutput(context, resultConstant, convert); updateOutput(context, resultConstant, convert);
};
foldConvert(0ul);
foldConvert(1ul);
return true; return true;
} }
bool FoldConvertTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> operation) const { bool FoldConvertTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> operation) const {
return return
is_type<opset1::Convert>(operation->get_input_node_ptr(1)) && (is_type<opset1::Convert>(operation->get_input_node_ptr(1)) &&
is_type<opset1::Constant>(operation->get_input_node_ptr(1)->get_input_node_ptr(0)); is_type<opset1::Constant>(operation->get_input_node_ptr(1)->get_input_node_ptr(0))) ||
(is_type<opset1::Convert>(operation->get_input_node_ptr(0)) &&
is_type<opset1::Constant>(operation->get_input_node_ptr(0)->get_input_node_ptr(0)));
} }
bool FoldConvertTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept { bool FoldConvertTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {

View File

@ -60,7 +60,7 @@ bool FuseConvertTransformation::transform(TransformationContext& context, ngraph
std::shared_ptr<Node> parent = convert->get_input_node_shared_ptr(0); std::shared_ptr<Node> parent = convert->get_input_node_shared_ptr(0);
if (is_type<opset1::Constant>(parent)) { if (is_type<opset1::Constant>(parent)) {
auto convertedConstant = fold<opset1::Convert>(parent, convert->get_convert_element_type()); auto convertedConstant = foldConvert(parent, convert->get_convert_element_type());
NetworkHelper::copyInfo(parent, convertedConstant); NetworkHelper::copyInfo(parent, convertedConstant);
replace_node(convert, convertedConstant); replace_node(convert, convertedConstant);
} else { } else {

View File

@ -102,21 +102,21 @@ std::shared_ptr<opset1::FakeQuantize> FuseFakeQuantizeTransformation::handle(
if (is_type<opset1::Multiply>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) { if (is_type<opset1::Multiply>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ? const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
constant : constant :
fold<opset1::Convert>(constant, eltwise->get_output_element_type(0)); foldConvert(constant, eltwise->get_output_element_type(0));
inputLowConst = fuse_fq::updateShape(fold<opset1::Divide>(inputLowConst, value), fakeQuantize->get_output_shape(0)); inputLowConst = fuse_fq::updateShape(fold<opset1::Divide>(inputLowConst, value), fakeQuantize->get_output_shape(0));
inputHightConst = fuse_fq::updateShape(fold<opset1::Divide>(inputHightConst, value), fakeQuantize->get_output_shape(0)); inputHightConst = fuse_fq::updateShape(fold<opset1::Divide>(inputHightConst, value), fakeQuantize->get_output_shape(0));
} else if (is_type<opset1::Divide>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) { } else if (is_type<opset1::Divide>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ? const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
constant : constant :
fold<opset1::Convert>(constant, eltwise->get_output_element_type(0)); foldConvert(constant, eltwise->get_output_element_type(0));
inputLowConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputLowConst, value), fakeQuantize->get_output_shape(0)); inputLowConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputLowConst, value), fakeQuantize->get_output_shape(0));
inputHightConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputHightConst, value), fakeQuantize->get_output_shape(0)); inputHightConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputHightConst, value), fakeQuantize->get_output_shape(0));
} else if (is_type<opset1::Subtract>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) { } else if (is_type<opset1::Subtract>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ? const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
constant : constant :
fold<opset1::Convert>(constant, eltwise->get_output_element_type(0)); foldConvert(constant, eltwise->get_output_element_type(0));
inputLowConst = fuse_fq::updateShape(fold<opset1::Add>(inputLowConst, value), fakeQuantize->get_output_shape(0)); inputLowConst = fuse_fq::updateShape(fold<opset1::Add>(inputLowConst, value), fakeQuantize->get_output_shape(0));
inputHightConst = fuse_fq::updateShape(fold<opset1::Add>(inputHightConst, value), fakeQuantize->get_output_shape(0)); inputHightConst = fuse_fq::updateShape(fold<opset1::Add>(inputHightConst, value), fakeQuantize->get_output_shape(0));
@ -128,7 +128,7 @@ std::shared_ptr<opset1::FakeQuantize> FuseFakeQuantizeTransformation::handle(
const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ? const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
constant : constant :
fold<opset1::Convert>(constant, eltwise->get_output_element_type(0)); foldConvert(constant, eltwise->get_output_element_type(0));
inputLowConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputLowConst, value), fakeQuantize->get_output_shape(0)); inputLowConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputLowConst, value), fakeQuantize->get_output_shape(0));
inputHightConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputHightConst, value), fakeQuantize->get_output_shape(0)); inputHightConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputHightConst, value), fakeQuantize->get_output_shape(0));

View File

@ -32,12 +32,12 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
const auto multiplyConstant = multiply->get_input_node_shared_ptr(1); const auto multiplyConstant = multiply->get_input_node_shared_ptr(1);
auto outputLowConst_f32 = fold<opset1::Convert>(fakeQuantize->get_input_node_shared_ptr(3), deqPrecision); auto outputLowConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(3), deqPrecision);
auto outputHighConst_f32 = fold<opset1::Convert>(fakeQuantize->get_input_node_shared_ptr(4), deqPrecision); auto outputHighConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(4), deqPrecision);
const auto value = multiplyConstant->get_output_element_type(0) == element::f32 ? const auto value = multiplyConstant->get_output_element_type(0) == element::f32 ?
multiplyConstant : multiplyConstant :
fold<opset1::Convert>(multiplyConstant, deqPrecision); foldConvert(multiplyConstant, deqPrecision);
outputLowConst_f32 = fold<opset1::Multiply>(outputLowConst_f32, value); outputLowConst_f32 = fold<opset1::Multiply>(outputLowConst_f32, value);
outputHighConst_f32 = fold<opset1::Multiply>(outputHighConst_f32, value); outputHighConst_f32 = fold<opset1::Multiply>(outputHighConst_f32, value);
@ -45,11 +45,18 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0); const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize); const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>( auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
opset1::FakeQuantize( opset1::FakeQuantize(
fakeQuantizeParent->output(parentIndex), fakeQuantizeParent->output(parentIndex),
fold<opset1::Convert>(fakeQuantize->input_value(1), deqPrecision), inputLow,
fold<opset1::Convert>(fakeQuantize->input_value(2), deqPrecision), inputHigh,
outputLowConst_f32, outputLowConst_f32,
outputHighConst_f32, outputHighConst_f32,
fakeQuantize->get_levels()), fakeQuantize->get_levels()),

View File

@ -32,12 +32,12 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
const auto subtractConstant = subtract->get_input_node_shared_ptr(1); const auto subtractConstant = subtract->get_input_node_shared_ptr(1);
auto outputLowConst_f32 = fold<opset1::Convert>(fakeQuantize->get_input_node_shared_ptr(3), deqPrecision); auto outputLowConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(3), deqPrecision);
auto outputHighConst_f32 = fold<opset1::Convert>(fakeQuantize->get_input_node_shared_ptr(4), deqPrecision); auto outputHighConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(4), deqPrecision);
const auto value = subtractConstant->get_output_element_type(0) == element::f32 ? const auto value = subtractConstant->get_output_element_type(0) == element::f32 ?
subtractConstant : subtractConstant :
fold<opset1::Convert>(subtractConstant, deqPrecision); foldConvert(subtractConstant, deqPrecision);
outputLowConst_f32 = fold<opset1::Subtract>(outputLowConst_f32, value); outputLowConst_f32 = fold<opset1::Subtract>(outputLowConst_f32, value);
outputHighConst_f32 = fold<opset1::Subtract>(outputHighConst_f32, value); outputHighConst_f32 = fold<opset1::Subtract>(outputHighConst_f32, value);
@ -45,11 +45,18 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0); const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize); const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>( auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
opset1::FakeQuantize( opset1::FakeQuantize(
fakeQuantizeParent->output(parentIndex), fakeQuantizeParent->output(parentIndex),
fold<opset1::Convert>(fakeQuantize->input_value(1), deqPrecision), inputLow,
fold<opset1::Convert>(fakeQuantize->input_value(2), deqPrecision), inputHigh,
outputLowConst_f32, outputLowConst_f32,
outputHighConst_f32, outputHighConst_f32,
fakeQuantize->get_levels()), fakeQuantize->get_levels()),
@ -76,7 +83,8 @@ bool FuseSubtractToFakeQuantizeTransformation::canBeTransformed(const Transforma
for (const auto& target : children) { for (const auto& target : children) {
const auto convolution = is_type<opset1::Convolution>(target.get_node()); const auto convolution = is_type<opset1::Convolution>(target.get_node());
const auto groupConvolution = is_type<opset1::GroupConvolution>(target.get_node()); const auto groupConvolution = is_type<opset1::GroupConvolution>(target.get_node());
if (convolution || groupConvolution) { const auto convolutionBackpropData = is_type<opset1::ConvolutionBackpropData>(target.get_node());
if (convolution || groupConvolution || convolutionBackpropData) {
return false; return false;
} }
} }

View File

@ -32,6 +32,7 @@ LayerTransformation::LayerTransformation(const Params& params) :
precisionsOnWeights(params.precisionsOnWeights), precisionsOnWeights(params.precisionsOnWeights),
deqPrecision(params.deqPrecision), deqPrecision(params.deqPrecision),
support3DTensorOnActivations(params.support3DTensorOnActivations), support3DTensorOnActivations(params.support3DTensorOnActivations),
deconvolutionSpecificChannelsRatio(params.deconvolutionSpecificChannelsRatio),
quantizationIntervalAsymmetryThreshold(0.002f), quantizationIntervalAsymmetryThreshold(0.002f),
zeroThreshold(1.e-6f), zeroThreshold(1.e-6f),
minQuantizationLevels(2ul), minQuantizationLevels(2ul),

View File

@ -80,7 +80,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
// multiply by weights: [1, ..., 1, Y] x [Y, Z] => [1, ..., 1, Z] // multiply by weights: [1, ..., 1, Y] x [Y, Z] => [1, ..., 1, Z]
const auto newSubConst = NetworkHelper::toScalarIfPossible(fold<opset1::MatMul>( const auto newSubConst = NetworkHelper::toScalarIfPossible(fold<opset1::MatMul>(
broadcastedConst, broadcastedConst,
fold<opset1::Convert>(newMatMul->get_input_node_shared_ptr(1), newMatMul->get_element_type()), foldConvert(newMatMul->get_input_node_shared_ptr(1), newMatMul->get_element_type()),
newMatMul->get_transpose_a(), newMatMul->get_transpose_a(),
newMatMul->get_transpose_b())); newMatMul->get_transpose_b()));
@ -128,7 +128,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
const auto newMulConst = NetworkHelper::toScalarIfPossible(fold<ngraph::opset1::Multiply>( const auto newMulConst = NetworkHelper::toScalarIfPossible(fold<ngraph::opset1::Multiply>(
mulConst1, mulConst1,
fold<opset1::Convert>(mulConst2, element::f32))); foldConvert(mulConst2, element::f32)));
const auto newMultiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>( const auto newMultiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
std::vector<element::Type>{ deqPrecision, deqPrecision }, std::vector<element::Type>{ deqPrecision, deqPrecision },

View File

@ -74,8 +74,8 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p
ngraph::op::TemporaryReplaceOutputType(multiplyParentParent, element::f32).get(), ngraph::op::TemporaryReplaceOutputType(multiplyParentParent, element::f32).get(),
ngraph::op::TemporaryReplaceOutputType( ngraph::op::TemporaryReplaceOutputType(
fold<opset1::Multiply>( fold<opset1::Multiply>(
fold<opset1::Convert>(multiplyParentConst, element::f32), foldConvert(multiplyParentConst, element::f32),
fold<opset1::Convert>(constParent, element::f32)), foldConvert(constParent, element::f32)),
element::f32).get()); element::f32).get());
NetworkHelper::copyInfo(multiplyParent.get_node_shared_ptr(), newMultiply); NetworkHelper::copyInfo(multiplyParent.get_node_shared_ptr(), newMultiply);

View File

@ -91,7 +91,7 @@ bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext&
if (dequantization.subtract != nullptr) { if (dequantization.subtract != nullptr) {
lastNode = std::make_shared<opset1::Add>( lastNode = std::make_shared<opset1::Add>(
convolution, convolution,
fold<opset1::Negative>(fold<opset1::Convert>(dequantization.subtractConstant, element::f32))); fold<opset1::Negative>(foldConvert(dequantization.subtractConstant, element::f32)));
lastNode->set_friendly_name(convolution->get_friendly_name() + "/Add"); lastNode->set_friendly_name(convolution->get_friendly_name() + "/Add");
} }

View File

@ -69,7 +69,8 @@ bool NetworkHelper::isConstantPath(const std::shared_ptr<Node>& op) {
return is_type<opset1::Parameter>(node) || return is_type<opset1::Parameter>(node) ||
is_type<opset1::Convolution>(node) || is_type<opset1::Convolution>(node) ||
is_type<opset1::GroupConvolution>(node) || is_type<opset1::GroupConvolution>(node) ||
is_type<opset1::MatMul>(node); is_type<opset1::MatMul>(node) ||
is_type<opset1::ConvolutionBackpropData>(node);
}; };
if (isNotConstantPathOperation(op)) { if (isNotConstantPathOperation(op)) {
@ -262,11 +263,11 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::
aBroadcasted ? b->get_output_shape(0) : a->get_output_shape(0), aBroadcasted ? b->get_output_shape(0) : a->get_output_shape(0),
bDivAValues); bDivAValues);
} else { } else {
b = fold<opset1::Convert>(b, element::f32); b = foldConvert(b, element::f32);
a = fold<opset1::Convert>(a, element::f32); a = foldConvert(a, element::f32);
bDivA = fold<opset1::Divide>(b, a); bDivA = fold<opset1::Divide>(b, a);
// TODO: issue #49868 // TODO: issue #49868
bDivA = fold<opset1::Convert>(bDivA, a->get_output_element_type(0)); bDivA = foldConvert(bDivA, a->get_output_element_type(0));
} }
OutputVector inputs{ {}, {} }; OutputVector inputs{ {}, {} };
@ -440,8 +441,11 @@ std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(const std::shared_ptr<op
return foldFakeQuantize(fq, false, false); return foldFakeQuantize(fq, false, false);
} }
std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues) { std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(
return foldFakeQuantize(fq, roundValues, true); const std::shared_ptr<opset1::FakeQuantize>& fq,
const bool roundValues,
const int outChannelsShapeIndex) {
return foldFakeQuantize(fq, roundValues, true, outChannelsShapeIndex);
} }
FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace) { FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace) {
@ -451,7 +455,7 @@ FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_p
} }
if (dequantization.convert != nullptr) { if (dequantization.convert != nullptr) {
const std::shared_ptr<Node> result = fold<opset1::Convert>(dequantization.data, dequantization.convert->get_element_type()); const std::shared_ptr<Node> result = foldConvert(dequantization.data, dequantization.convert->get_element_type());
if (is_type<opset1::Constant>(result)) { if (is_type<opset1::Constant>(result)) {
if (inPlace) { if (inPlace) {
copyInfo(dequantization.convert, result); copyInfo(dequantization.convert, result);
@ -467,7 +471,7 @@ FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_p
} }
if (dequantization.subtractConvert != nullptr) { if (dequantization.subtractConvert != nullptr) {
const auto convertionResult = fold<opset1::Convert>( const auto convertionResult = foldConvert(
dequantization.subtractConstant, dequantization.subtractConstant,
dequantization.subtractConvert->get_element_type()); dequantization.subtractConvert->get_element_type());
if (is_type<opset1::Constant>(convertionResult)) { if (is_type<opset1::Constant>(convertionResult)) {
@ -502,7 +506,7 @@ FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_p
return dequantization; return dequantization;
} }
if (dequantization.multiply->get_output_element_type(0) != result->get_element_type()) { if (dequantization.multiply->get_output_element_type(0) != result->get_element_type()) {
result = fold<opset1::Convert>(result, dequantization.multiply->get_output_element_type(0)); result = foldConvert(result, dequantization.multiply->get_output_element_type(0));
} }
if (inPlace) { if (inPlace) {
copyInfo(dequantization.multiply, result); copyInfo(dequantization.multiply, result);
@ -591,7 +595,8 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::fuseConvert(const std::shar
std::shared_ptr<Node> NetworkHelper::foldFakeQuantize( std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
const std::shared_ptr<opset1::FakeQuantize>& fq, const std::shared_ptr<opset1::FakeQuantize>& fq,
const bool roundValuesArg, const bool roundValuesArg,
const bool roundValuesWasSet) { const bool roundValuesWasSet,
const int outChannelsShapeIndex) {
if (is_type<opset1::Constant>(fq->get_input_node_shared_ptr(0)) && if (is_type<opset1::Constant>(fq->get_input_node_shared_ptr(0)) &&
is_type<opset1::Constant>(fq->get_input_node_shared_ptr(1)) && is_type<opset1::Constant>(fq->get_input_node_shared_ptr(1)) &&
is_type<opset1::Constant>(fq->get_input_node_shared_ptr(2)) && is_type<opset1::Constant>(fq->get_input_node_shared_ptr(2)) &&
@ -609,16 +614,16 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
if (type1.is_real() && !type2.is_real()) { if (type1.is_real() && !type2.is_real()) {
return fold<opset1::Add>( return fold<opset1::Add>(
fq->input_value(0), fq->input_value(0),
fold<opset1::Convert>(fq->input_value(3), type1)); foldConvert(fq->input_value(3), type1));
} }
if (!type1.is_real() && type2.is_real()) { if (!type1.is_real() && type2.is_real()) {
return fold<opset1::Add>( return fold<opset1::Add>(
fold<opset1::Convert>(fq->input_value(0), type2), foldConvert(fq->input_value(0), type2),
fq->input_value(3)); fq->input_value(3));
} }
return fold<opset1::Add>( return fold<opset1::Add>(
fold<opset1::Convert>(fq->input_value(0), element::f32), foldConvert(fq->input_value(0), element::f32),
fold<opset1::Convert>(fq->input_value(3), element::f32)); foldConvert(fq->input_value(3), element::f32));
} }
auto constant = as_type_ptr<opset1::Constant>(fq->get_input_node_shared_ptr(0)); auto constant = as_type_ptr<opset1::Constant>(fq->get_input_node_shared_ptr(0));
@ -630,10 +635,20 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
if (constShape.empty() || constShape.size() > 5lu) { if (constShape.empty() || constShape.size() > 5lu) {
THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size(); THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size();
} }
if (outChannelsShapeIndex != 0 && outChannelsShapeIndex != 1) {
THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected outChannelsShapeIndex " << outChannelsShapeIndex;
}
// OIDHW size_t OC;
const size_t OC = constShape[0]; size_t IC;
const size_t IC = constShape.size() > 1lu ? constShape[1] : 1; // OIDHW or IODHW
if (constShape.size() == 1) {
OC = constShape[0];
IC = 1;
} else {
OC = constShape[outChannelsShapeIndex];
IC = constShape[outChannelsShapeIndex == 0 ? 1 : 0];
}
const size_t D = constShape.size() > 4lu ? constShape[constShape.size() - 3] : 1; const size_t D = constShape.size() > 4lu ? constShape[constShape.size() - 3] : 1;
const size_t H = constShape.size() > 2lu ? constShape.size() == 3lu ? constShape[2] : constShape[constShape.size() - 2] : 1; const size_t H = constShape.size() > 2lu ? constShape.size() == 3lu ? constShape[2] : constShape[constShape.size() - 2] : 1;
const size_t W = constShape.size() > 3lu ? constShape[constShape.size() - 1] : 1; const size_t W = constShape.size() > 3lu ? constShape[constShape.size() - 1] : 1;
@ -667,20 +682,25 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
auto levels_1 = fq->get_levels() - 1.f; auto levels_1 = fq->get_levels() - 1.f;
//const size_t DHW = D * H * W; const size_t DHW = D * H * W;
const size_t IDHW = IC * D * H * W; const size_t IDHW = IC * D * H * W;
const auto values = constant->cast_vector<float>(); const auto values = constant->cast_vector<float>();
std::vector<float> quantizedValues(OC * IC * D * H * W); std::vector<float> quantizedValues(OC * IC * D * H * W);
for (size_t oc = 0; oc < OC; ++oc) { for (size_t oc = 0; oc < OC; ++oc) {
for (size_t iidx = 0; iidx < IDHW; ++iidx) {
const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc]; const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc];
const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc]; const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc];
const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc]; const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc];
const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc]; const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc];
for (size_t ic = 0; ic < IC; ++ic) {
const size_t idx = oc * IDHW + iidx; for (size_t iidx = 0; iidx < DHW; ++iidx) {
size_t idx;
if (outChannelsShapeIndex == 0) {
idx = oc * IDHW + ic * DHW + iidx;
} else {
idx = ic * IDHW + oc * DHW + iidx;
}
if (values[idx] <= inputLow) { if (values[idx] <= inputLow) {
quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow; quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow;
@ -693,6 +713,7 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
} }
} }
} }
}
return std::make_shared<opset1::Constant>(fq->get_output_element_type(0), constShape, quantizedValues); return std::make_shared<opset1::Constant>(fq->get_output_element_type(0), constShape, quantizedValues);
} }
@ -755,7 +776,7 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::composeFakeQuantize(const s
if (dequantization.subtract != nullptr) { if (dequantization.subtract != nullptr) {
const auto subtractValue = (dequantization.subtractConvert == nullptr) ? const auto subtractValue = (dequantization.subtractConvert == nullptr) ?
dequantization.subtractConstant : dequantization.subtractConstant :
fold<opset1::Convert>(dequantization.subtractConstant, dequantization.subtractConvert->output(0).get_element_type()); foldConvert(dequantization.subtractConstant, dequantization.subtractConvert->output(0).get_element_type());
const std::shared_ptr<opset1::FakeQuantize> replacement = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>( const std::shared_ptr<opset1::FakeQuantize> replacement = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
newFakeQuantize->input_value(0), newFakeQuantize->input_value(0),
@ -782,11 +803,11 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::composeFakeQuantize(const s
assert((precision2.is_real() == precision1.is_real()) && (precision2.bitwidth() >= precision1.bitwidth())); assert((precision2.is_real() == precision1.is_real()) && (precision2.bitwidth() >= precision1.bitwidth()));
auto output = fold<opset1::Multiply>( auto output = fold<opset1::Multiply>(
precision2 != precision1 ? fold<opset1::Convert>(value1, precision2) : value1, precision2 != precision1 ? foldConvert(value1, precision2) : value1,
value2); value2);
if (output->output(0).get_element_type() != precision1) { if (output->output(0).get_element_type() != precision1) {
output = fold<opset1::Convert>(output, precision1); output = foldConvert(output, precision1);
} }
return output; return output;
@ -818,7 +839,8 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
const float max, const float max,
const bool hasZeroPoint, const bool hasZeroPoint,
const bool updatePrecision, const bool updatePrecision,
const element::Type deqPrecision) { const element::Type deqPrecision,
const size_t outChannelsShapeIndex) {
using std::make_shared; using std::make_shared;
const auto outputLow = fq->input_value(3); const auto outputLow = fq->input_value(3);
@ -898,7 +920,8 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
newMax->output(0), newMax->output(0),
fq->get_levels(), fq->get_levels(),
fq->get_auto_broadcast()), fq->get_auto_broadcast()),
true); true,
outChannelsShapeIndex);
NetworkHelper::copyInfo(fq, newFQ); NetworkHelper::copyInfo(fq, newFQ);
std::shared_ptr<ngraph::Node> convert2; std::shared_ptr<ngraph::Node> convert2;
@ -907,7 +930,7 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
std::shared_ptr<opset1::Constant> newFqConstant = as_type_ptr<opset1::Constant>(newFQ); std::shared_ptr<opset1::Constant> newFqConstant = as_type_ptr<opset1::Constant>(newFQ);
if (is_type<opset1::Constant>(newFQ)) { if (is_type<opset1::Constant>(newFQ)) {
convert = fold<opset1::Convert>(newFQ, precision); convert = foldConvert(newFQ, precision);
} else if (is_type<opset1::FakeQuantize>(newFQ)) { } else if (is_type<opset1::FakeQuantize>(newFQ)) {
newFQ = setOutDataPrecision(as_type_ptr<opset1::FakeQuantize>(newFQ), precision); newFQ = setOutDataPrecision(as_type_ptr<opset1::FakeQuantize>(newFQ), precision);
convert = newFQ; convert = newFQ;
@ -1032,13 +1055,13 @@ FakeQuantizeDequantization NetworkHelper::createDequantizationFromFakeQuantize(
// TODO: threshold values have to used here to avoid shifts // TODO: threshold values have to used here to avoid shifts
const std::shared_ptr<opset1::Constant> scale = as_type_ptr<opset1::Constant>(fold<opset1::Convert>(fold<opset1::Divide>( const std::shared_ptr<opset1::Constant> scale = as_type_ptr<opset1::Constant>(foldConvert(fold<opset1::Divide>(
fold<opset1::Subtract>(outputHigh, outputLow), fold<opset1::Subtract>(outputHigh, outputLow),
fold<opset1::Subtract>(newMax, newMin)), deqPrecision)); fold<opset1::Subtract>(newMax, newMin)), deqPrecision));
assert(scale != nullptr); assert(scale != nullptr);
std::shared_ptr<opset1::Constant> shift = hasZeroPoint ? std::shared_ptr<opset1::Constant> shift = hasZeroPoint ?
as_type_ptr<opset1::Constant>(fold<opset1::Convert>(fold<opset1::Divide>( as_type_ptr<opset1::Constant>(foldConvert(fold<opset1::Divide>(
fold<opset1::Subtract>(fold<opset1::Multiply>(newMin, outputHigh), fold<opset1::Multiply>(newMax, outputLow)), fold<opset1::Subtract>(fold<opset1::Multiply>(newMin, outputHigh), fold<opset1::Multiply>(newMax, outputLow)),
fold<opset1::Subtract>(outputHigh, outputLow)), deqPrecision)) : fold<opset1::Subtract>(outputHigh, outputLow)), deqPrecision)) :
nullptr; nullptr;
@ -1298,7 +1321,7 @@ FakeQuantizeDequantizationValues NetworkHelper::createEmptyValues(const FakeQuan
std::shared_ptr<Node> subtract1Const = dequantization.subtract ? std::shared_ptr<Node> subtract1Const = dequantization.subtract ?
(dequantization.subtractConvert == nullptr ? (dequantization.subtractConvert == nullptr ?
dequantization.subtractConstant->clone_with_new_inputs({}) : dequantization.subtractConstant->clone_with_new_inputs({}) :
fold<opset1::Convert>(dequantization.subtractConstant, dequantization.subtractConvert->get_element_type())) : foldConvert(dequantization.subtractConstant, dequantization.subtractConvert->get_element_type())) :
std::make_shared<opset1::Constant>(parent->get_output_element_type(0), Shape({}), std::vector<float>({ 0.f })); std::make_shared<opset1::Constant>(parent->get_output_element_type(0), Shape({}), std::vector<float>({ 0.f }));
subtract1Const->set_output_type(0, multiply1Const->get_output_element_type(0), subtract1Const->get_output_partial_shape(0)); subtract1Const->set_output_type(0, multiply1Const->get_output_element_type(0), subtract1Const->get_output_partial_shape(0));
@ -1357,6 +1380,8 @@ std::shared_ptr<Node> NetworkHelper::optimizeSubtract(std::shared_ptr<opset1::Su
} }
if (roundedShift) { if (roundedShift) {
NetworkHelper::copyInfo(shift, roundedShift);
// Propagate convertInputType down // Propagate convertInputType down
replacement = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(data, roundedShift); replacement = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(data, roundedShift);
NetworkHelper::copyInfo(subtract, replacement); NetworkHelper::copyInfo(subtract, replacement);
@ -1364,22 +1389,10 @@ std::shared_ptr<Node> NetworkHelper::optimizeSubtract(std::shared_ptr<opset1::Su
replace_node(subtract, replacement); replace_node(subtract, replacement);
} }
// We lose the tail conversion here; not needed if the next node is a TypeRelaxed
// TODO: check cases when Convert should be preserved
// Try to optimize Add out if constant is zero
// TODO: don't remove operation here: don't create this Subtraction operation in FQ decomposition
// if (isScalarLike(roundedShift)) {
// auto scalar = distillToScalar(roundedShift);
// if (op::util::constantIsEqualTo(scalar, 0)) {
// replace_node(replacement, replacement->input_value(0).get_node_shared_ptr());
// replacement = nullptr;
// }
// }
return replacement; return replacement;
} else if (is_type<opset1::Convert>(subtractParent) || is_type<opset1::Constant>(subtractParent->get_input_node_shared_ptr(0))) { } else if (is_type<opset1::Convert>(subtractParent) && is_type<opset1::Constant>(subtractParent->get_input_node_shared_ptr(0))) {
auto replacement = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(data, subtractParent->get_input_node_shared_ptr(0)); auto replacement = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(data, subtractParent->get_input_node_shared_ptr(0));
NetworkHelper::copyInfo(subtract, replacement);
NetworkHelper::setOutDataPrecisionForTypeRelaxed(replacement, convertOutputType); NetworkHelper::setOutDataPrecisionForTypeRelaxed(replacement, convertOutputType);
replace_node(subtract, replacement); replace_node(subtract, replacement);
return replacement; return replacement;
@ -1453,7 +1466,7 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
parent, parent,
dequantization.subtractConstant->output(0).get_element_type() == parentPrecision ? dequantization.subtractConstant->output(0).get_element_type() == parentPrecision ?
dequantization.subtractConstant : dequantization.subtractConstant :
fold<opset1::Convert>(dequantization.subtractConstant, parentPrecision)); foldConvert(dequantization.subtractConstant, parentPrecision));
ngraph::copy_runtime_info({ newOperation, parent }, parent); ngraph::copy_runtime_info({ newOperation, parent }, parent);
} else { } else {
parent = std::make_shared<DequantizationSubtract>(parent, dequantization.subtractConvert); parent = std::make_shared<DequantizationSubtract>(parent, dequantization.subtractConvert);
@ -1474,7 +1487,7 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
DequantizationMultiply(parent, DequantizationMultiply(parent,
multiplyConstant->output(0).get_element_type() == parentPrecision ? multiplyConstant->output(0).get_element_type() == parentPrecision ?
multiplyConstant : multiplyConstant :
fold<opset1::Convert>(multiplyConstant->output(0), parentPrecision)), foldConvert(multiplyConstant->output(0), parentPrecision)),
dequantization.multiply->get_output_element_type(0)); dequantization.multiply->get_output_element_type(0));
ngraph::copy_runtime_info({ newOperation, parent }, parent); ngraph::copy_runtime_info({ newOperation, parent }, parent);
} }
@ -1541,6 +1554,14 @@ std::shared_ptr<Node> NetworkHelper::toScalarIfPossible(std::shared_ptr<Node> no
return NetworkHelper::toScalar(constant); return NetworkHelper::toScalar(constant);
} }
std::shared_ptr<Node> foldConvert(const Output<Node>& node, const element::Type targetPrecision) {
if (is_type<opset1::Constant>(node.get_node_shared_ptr()) && (node.get_element_type() == targetPrecision)) {
return node.get_node_shared_ptr();
}
return fold<opset1::Convert>(node, targetPrecision);
}
bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const DataPrecision& dataPrecision) { bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const DataPrecision& dataPrecision) {
if (!node) { if (!node) {
return true; return true;
@ -1550,12 +1571,12 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const Data
if (is_type<opset1::Subtract>(node)) { if (is_type<opset1::Subtract>(node)) {
const auto parent = node->get_input_node_shared_ptr(0); const auto parent = node->get_input_node_shared_ptr(0);
const auto intNode = is_type<opset1::Convert>(parent) ? parent : node; const auto intNode = is_type<opset1::Convert>(parent) ? parent : node;
const auto intType = intNode->get_input_element_type(0); const auto type = intNode->get_input_element_type(0);
if (intType == element::u8 || intType == element::i8) { if (type == element::u8 || type == element::i8) {
min = DataPrecision::getMinValue(intType, 256) - 0.5f; min = DataPrecision::getMinValue(type, 256) - 0.5f;
max = DataPrecision::getMaxValue(intType, 256) + 0.5f; max = DataPrecision::getMaxValue(type, 256) + 0.5f;
} else { } else {
return false; return type == element::f32 || type == element::f16;
} }
auto subtract1input = node->get_input_node_shared_ptr(1); auto subtract1input = node->get_input_node_shared_ptr(1);
if (is_type<opset1::Convert>(subtract1input)) { if (is_type<opset1::Convert>(subtract1input)) {
@ -1597,6 +1618,23 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const Data
return true; return true;
} }
std::vector<element::Type> NetworkHelper::precisionIntersection(
const std::vector<element::Type>& v1,
const std::vector<element::Type>& v2) noexcept {
std::vector<element::Type> v3;
auto v1Copy = v1;
auto v2Copy = v2;
std::sort(v1Copy.begin(), v1Copy.end());
std::sort(v2Copy.begin(), v2Copy.end());
std::set_intersection(v1Copy.begin(), v1Copy.end(),
v2Copy.begin(), v2Copy.end(),
std::back_inserter(v3));
return v3;
}
} // namespace low_precision } // namespace low_precision
} // namespace pass } // namespace pass
} // namespace ngraph } // namespace ngraph

View File

@ -23,13 +23,14 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& reshap
const auto reshapeValues = reshape->get_input_node_shared_ptr(1); const auto reshapeValues = reshape->get_input_node_shared_ptr(1);
NGRAPH_CHECK(reshapeValues != nullptr, "Reshape constant was not found"); NGRAPH_CHECK(reshapeValues != nullptr, "Reshape constant was not found");
const auto constantIndex = ngraph::pass::low_precision::NetworkHelper::getConstantInputIndex(elementwise); auto elementwiseValuesConvert = as_type_ptr<opset1::Convert>(elementwise->get_input_node_shared_ptr(1ul));
NGRAPH_CHECK(constantIndex != -1); auto elementwiseValues = elementwiseValuesConvert == nullptr ?
elementwise->get_input_node_shared_ptr(1ul) :
const auto elementwiseValues = elementwise->get_input_node_shared_ptr(constantIndex); elementwiseValuesConvert->get_input_node_shared_ptr(0ul);
assert(is_type<opset1::Constant>(elementwiseValues));
const std::shared_ptr<opset1::Reshape> newReshape = as_type_ptr<opset1::Reshape>(reshape->clone_with_new_inputs({ const std::shared_ptr<opset1::Reshape> newReshape = as_type_ptr<opset1::Reshape>(reshape->clone_with_new_inputs({
elementwise->get_input_node_shared_ptr(constantIndex == 1 ? 0ul : 1ul), elementwise->get_input_node_shared_ptr(0ul),
reshapeValues })); reshapeValues }));
std::shared_ptr<Node> newElementwiseValues; std::shared_ptr<Node> newElementwiseValues;
@ -54,10 +55,15 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& reshap
elementwiseValues->output(0), elementwiseValues->output(0),
newReshapeValues->output(0), newReshapeValues->output(0),
as_type_ptr<opset1::Reshape>(reshape)->get_special_zero()); as_type_ptr<opset1::Reshape>(reshape)->get_special_zero());
assert(is_type<opset1::Constant>(newElementwiseValues));
} else { } else {
newElementwiseValues = elementwiseValues; newElementwiseValues = elementwiseValues;
} }
const auto newElementwise = elementwise->clone_with_new_inputs({ newReshape, newElementwiseValues }); const auto newElementwise = elementwise->clone_with_new_inputs({
newReshape,
elementwiseValuesConvert == nullptr ?
newElementwiseValues :
std::make_shared<opset1::Convert>(newElementwiseValues, elementwiseValuesConvert->get_destination_type()) });
replace_node(reshape, newElementwise); replace_node(reshape, newElementwise);
copy_runtime_info({ elementwise, reshape }, { newReshape, newElementwise }); copy_runtime_info({ elementwise, reshape }, { newReshape, newElementwise });
@ -87,8 +93,12 @@ ngraph::pass::low_precision::PullReshapeThroughDequantization::PullReshapeThroug
const std::vector<ngraph::element::Type>& inputPrecisions) { const std::vector<ngraph::element::Type>& inputPrecisions) {
const auto weights = ngraph::pattern::wrap_type<ngraph::opset1::Constant>(pattern::type_matches_any(inputPrecisions)); const auto weights = ngraph::pattern::wrap_type<ngraph::opset1::Constant>(pattern::type_matches_any(inputPrecisions));
const auto convert = ngraph::pattern::wrap_type<ngraph::opset1::Convert>({ weights }); const auto convert = ngraph::pattern::wrap_type<ngraph::opset1::Convert>({ weights });
const auto subtractConvert = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
const auto subtract = ngraph::pattern::wrap_type<ngraph::opset1::Subtract>({ convert, subtractConvert }); const auto subtractValues = std::make_shared<pattern::op::Or>(OutputVector{
ngraph::pattern::wrap_type<ngraph::opset1::Constant>(),
ngraph::pattern::wrap_type<ngraph::opset1::Convert>({ngraph::pattern::wrap_type<ngraph::opset1::Constant>()})
});
const auto subtract = ngraph::pattern::wrap_type<ngraph::opset1::Subtract>({ convert, subtractValues });
const auto subtractOrConvert = std::make_shared<pattern::op::Or>(OutputVector{ convert, subtract }); const auto subtractOrConvert = std::make_shared<pattern::op::Or>(OutputVector{ convert, subtract });

View File

@ -24,10 +24,12 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& transp
const auto transposeValues = transpose->get_input_node_shared_ptr(1); const auto transposeValues = transpose->get_input_node_shared_ptr(1);
NGRAPH_CHECK(transposeValues != nullptr, "transpose constant was not found"); NGRAPH_CHECK(transposeValues != nullptr, "transpose constant was not found");
const auto constantIndex = ngraph::pass::low_precision::NetworkHelper::getConstantInputIndex(elementwise); auto elementwiseValuesConvert = as_type_ptr<opset1::Convert>(elementwise->get_input_node_shared_ptr(1ul));
NGRAPH_CHECK(constantIndex != -1); auto elementwiseValues = elementwiseValuesConvert == nullptr ?
elementwise->get_input_node_shared_ptr(1ul) :
elementwiseValuesConvert->get_input_node_shared_ptr(0ul);
assert(is_type<opset1::Constant>(elementwiseValues));
auto elementwiseValues = elementwise->get_input_node_shared_ptr(constantIndex);
const auto transposeValuesShape = transposeValues->output(0).get_shape(); const auto transposeValuesShape = transposeValues->output(0).get_shape();
const auto elementwiseValuesShape = elementwiseValues->output(0).get_shape(); const auto elementwiseValuesShape = elementwiseValues->output(0).get_shape();
if (elementwiseValuesShape.size() != shape_size(transposeValuesShape)) { if (elementwiseValuesShape.size() != shape_size(transposeValuesShape)) {
@ -45,7 +47,7 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& transp
} }
const std::shared_ptr<opset1::Transpose> newTranspose = as_type_ptr<opset1::Transpose>(transpose->clone_with_new_inputs({ const std::shared_ptr<opset1::Transpose> newTranspose = as_type_ptr<opset1::Transpose>(transpose->clone_with_new_inputs({
elementwise->get_input_node_shared_ptr(constantIndex == 1 ? 0ul : 1ul), elementwise->get_input_node_shared_ptr(0ul),
transposeValues })); transposeValues }));
const auto newElementwiseValues = ngraph::pass::low_precision::fold<opset1::Transpose>( const auto newElementwiseValues = ngraph::pass::low_precision::fold<opset1::Transpose>(
@ -53,7 +55,11 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& transp
transposeValues->output(0)); transposeValues->output(0));
assert(is_type<opset1::Constant>(newElementwiseValues)); assert(is_type<opset1::Constant>(newElementwiseValues));
const auto newElementwise = elementwise->clone_with_new_inputs({ newTranspose, newElementwiseValues }); const auto newElementwise = elementwise->clone_with_new_inputs({
newTranspose,
elementwiseValuesConvert == nullptr ?
newElementwiseValues :
std::make_shared<opset1::Convert>(newElementwiseValues, elementwiseValuesConvert->get_destination_type()) });
replace_node(transpose, newElementwise); replace_node(transpose, newElementwise);
copy_runtime_info({ elementwise, transpose }, { newTranspose, newElementwise }); copy_runtime_info({ elementwise, transpose }, { newTranspose, newElementwise });
@ -85,8 +91,12 @@ ngraph::pass::low_precision::PullTransposeThroughDequantization::PullTransposeTh
const std::vector<ngraph::element::Type>& inputPrecisions) { const std::vector<ngraph::element::Type>& inputPrecisions) {
const auto weights = ngraph::pattern::wrap_type<ngraph::opset1::Constant>(pattern::type_matches_any(inputPrecisions)); const auto weights = ngraph::pattern::wrap_type<ngraph::opset1::Constant>(pattern::type_matches_any(inputPrecisions));
const auto convert = ngraph::pattern::wrap_type<ngraph::opset1::Convert>({ weights }); const auto convert = ngraph::pattern::wrap_type<ngraph::opset1::Convert>({ weights });
const auto subtractConvert = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
const auto subtract = ngraph::pattern::wrap_type<ngraph::opset1::Subtract>({ convert, subtractConvert }); const auto subtractValues = std::make_shared<pattern::op::Or>(OutputVector{
ngraph::pattern::wrap_type<ngraph::opset1::Constant>(),
ngraph::pattern::wrap_type<ngraph::opset1::Convert>({ngraph::pattern::wrap_type<ngraph::opset1::Constant>()})
});
const auto subtract = ngraph::pattern::wrap_type<ngraph::opset1::Subtract>({ convert, subtractValues });
const auto subtractOrConvert = std::make_shared<pattern::op::Or>(OutputVector{ convert, subtract }); const auto subtractOrConvert = std::make_shared<pattern::op::Or>(OutputVector{ convert, subtract });

View File

@ -83,14 +83,20 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt
parent = subtract; parent = subtract;
} }
const auto multiply = std::make_shared<DequantizationMultiply>(parent, splitedMul[i]); const auto multiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(parent, splitedMul[i]);
NetworkHelper::setOutDataPrecisionForTypeRelaxed(multiply, dequantization.multiply->get_output_element_type(0));
copy_runtime_info({ newSplit, multiply }, multiply); copy_runtime_info({ newSplit, multiply }, multiply);
lastNodes.push_back(multiply); lastNodes.push_back(multiply);
replacement.push_back(multiply); replacement.push_back(multiply);
} }
replace_node(split, replacement); for (size_t i = 0ul; i < newSplit->get_output_size(); ++i) {
for (auto input : split->output(i).get_target_inputs()) {
input.replace_source_output(replacement[i]);
}
}
updateOutputs(context, lastNodes, newSplit); updateOutputs(context, lastNodes, newSplit);
return true; return true;
} }

View File

@ -101,9 +101,9 @@ bool SubtractMultiplyToMultiplyAddTransformation::transform(TransformationContex
std::shared_ptr<Node> subtractConstant = fold<opset1::Multiply>( std::shared_ptr<Node> subtractConstant = fold<opset1::Multiply>(
fold<opset1::Multiply>( fold<opset1::Multiply>(
fold<opset1::Convert>(originalSubtractConstant, deqPrecision), foldConvert(originalSubtractConstant, deqPrecision),
std::make_shared<opset1::Constant>(deqPrecision, Shape{}, std::vector<float>{ -1.f })), std::make_shared<opset1::Constant>(deqPrecision, Shape{}, std::vector<float>{ -1.f })),
fold<opset1::Convert>(dequantization.multiply->get_input_node_shared_ptr(1), deqPrecision)); foldConvert(dequantization.multiply->get_input_node_shared_ptr(1), deqPrecision));
if (is_type<opset1::Constant>(subtractConstant)) { if (is_type<opset1::Constant>(subtractConstant)) {
std::shared_ptr<opset1::Constant> constant = as_type_ptr<opset1::Constant>(subtractConstant); std::shared_ptr<opset1::Constant> constant = as_type_ptr<opset1::Constant>(subtractConstant);

View File

@ -34,6 +34,7 @@
#include "low_precision/avg_pool.hpp" #include "low_precision/avg_pool.hpp"
#include "low_precision/clamp.hpp" #include "low_precision/clamp.hpp"
#include "low_precision/convolution.hpp" #include "low_precision/convolution.hpp"
#include "low_precision/convolution_backprop_data.hpp"
#include "low_precision/depth_to_space.hpp" #include "low_precision/depth_to_space.hpp"
#include "low_precision/fake_quantize.hpp" #include "low_precision/fake_quantize.hpp"
#include "low_precision/group_convolution.hpp" #include "low_precision/group_convolution.hpp"
@ -220,6 +221,7 @@ LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const
add<AvgPoolTransformation, opset1::AvgPool>(params). add<AvgPoolTransformation, opset1::AvgPool>(params).
add<ClampTransformation, opset1::Clamp>(params). add<ClampTransformation, opset1::Clamp>(params).
add<ConvolutionTransformation, opset1::Convolution>(params). add<ConvolutionTransformation, opset1::Convolution>(params).
add<ConvolutionBackpropDataTransformation, opset1::ConvolutionBackpropData>(params).
add<DepthToSpaceTransformation, opset1::DepthToSpace>(params). add<DepthToSpaceTransformation, opset1::DepthToSpace>(params).
add<FakeQuantizeTransformation, opset1::FakeQuantize>(params). add<FakeQuantizeTransformation, opset1::FakeQuantize>(params).
add<GroupConvolutionTransformation, opset1::GroupConvolution>(params). add<GroupConvolutionTransformation, opset1::GroupConvolution>(params).
@ -338,6 +340,7 @@ TypeRelaxedReplacer::TypeRelaxedReplacer() {
make_matcher_type_relaxed<opset1::Clamp>(this); make_matcher_type_relaxed<opset1::Clamp>(this);
make_matcher_type_relaxed<opset1::Concat>(this); make_matcher_type_relaxed<opset1::Concat>(this);
make_matcher_type_relaxed<opset1::Convolution>(this); make_matcher_type_relaxed<opset1::Convolution>(this);
make_matcher_type_relaxed<opset1::ConvolutionBackpropData>(this);
make_matcher_type_relaxed<opset1::DepthToSpace>(this); make_matcher_type_relaxed<opset1::DepthToSpace>(this);
make_matcher_type_relaxed<opset1::FakeQuantize>(this); make_matcher_type_relaxed<opset1::FakeQuantize>(this);
make_matcher_type_relaxed<opset1::GroupConvolution>(this); make_matcher_type_relaxed<opset1::GroupConvolution>(this);
@ -430,23 +433,6 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
network->validate_nodes_and_infer_types(); network->validate_nodes_and_infer_types();
} }
std::vector<element::Type> LowPrecisionTransformer::precisionIntersection(
const std::vector<element::Type>& v1,
const std::vector<element::Type>& v2) const noexcept {
std::vector<element::Type> v3;
auto v1Copy = v1;
auto v2Copy = v2;
std::sort(v1Copy.begin(), v1Copy.end());
std::sort(v2Copy.begin(), v2Copy.end());
std::set_intersection(v1Copy.begin(), v1Copy.end(),
v2Copy.begin(), v2Copy.end(),
std::back_inserter(v3));
return v3;
}
std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(const Node& op) const noexcept { std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(const Node& op) const noexcept {
const std::string operantionType = LowPrecisionTransformations::getType(op); const std::string operantionType = LowPrecisionTransformations::getType(op);
const std::vector<LayerTransformationPtr> transformation = transformations.find(operantionType); const std::vector<LayerTransformationPtr> transformation = transformations.find(operantionType);
@ -456,7 +442,7 @@ std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(c
std::vector<element::Type> precisions = transformation[0]->getPrecisionsOnActivations(); std::vector<element::Type> precisions = transformation[0]->getPrecisionsOnActivations();
for (const auto& transform : transformation) { for (const auto& transform : transformation) {
precisions = precisionIntersection(precisions, transform->getPrecisionsOnActivations()); precisions = NetworkHelper::precisionIntersection(precisions, transform->getPrecisionsOnActivations());
} }
return precisions; return precisions;
} }

Some files were not shown because too many files have changed in this diff Show More