Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Steve Yoo 2021-05-10 08:11:10 +09:00
commit d4b251678e
105 changed files with 2693 additions and 640 deletions

View File

@ -61,6 +61,9 @@ jobs:
- script: | - script: |
sudo apt --assume-yes install libusb-1.0-0-dev sudo apt --assume-yes install libusb-1.0-0-dev
# For opencv-python: setuptools and upgrade
sudo apt-get install python3-setuptools
python3 -m pip install --upgrade pip
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt
# For running Python API tests # For running Python API tests
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt

View File

@ -30,7 +30,7 @@ message (STATUS "CMAKE_C_COMPILER_ID ................... " ${CMAKE_C_COMPILER_ID
message (STATUS "CMAKE_BUILD_TYPE ...................... " ${CMAKE_BUILD_TYPE}) message (STATUS "CMAKE_BUILD_TYPE ...................... " ${CMAKE_BUILD_TYPE})
# remove file with exported developer targets to force its regeneration # remove file with exported developer targets to force its regeneration
file(REMOVE "${CMAKE_BINARY_DIR}/inference_engine_targets.cmake") file(REMOVE "${CMAKE_BINARY_DIR}/InferenceEngineTargets.cmake")
foreach(component IN LISTS openvino_export_components) foreach(component IN LISTS openvino_export_components)
file(REMOVE "${CMAKE_BINARY_DIR}/${component}_dev_targets.cmake") file(REMOVE "${CMAKE_BINARY_DIR}/${component}_dev_targets.cmake")
unset(${component} CACHE) unset(${component} CACHE)

View File

@ -4,7 +4,7 @@
set_temp_directory(TEMP "${IE_MAIN_SOURCE_DIR}") set_temp_directory(TEMP "${IE_MAIN_SOURCE_DIR}")
if(CMAKE_CROSSCOMPILING AND LINUX AND X86_64) if(CMAKE_CROSSCOMPILING AND CMAKE_HOST_SYSTEM_NAME MATCHES Linux AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(protoc_version "3.7.1") set(protoc_version "3.7.1")
RESOLVE_DEPENDENCY(SYSTEM_PROTOC_ROOT RESOLVE_DEPENDENCY(SYSTEM_PROTOC_ROOT

View File

@ -12,7 +12,7 @@
# 2) ${TBBROOT} with IE own version of TBBConfig.cmake (actual for TBB < 2017.7) # 2) ${TBBROOT} with IE own version of TBBConfig.cmake (actual for TBB < 2017.7)
# #
## Path to IE own version of TBBConfig.cmake old TBB version without cmake config. # Path to IE own version of TBBConfig.cmake old TBB version without cmake config.
if(APPLE) if(APPLE)
set(IE_OWN_TBB_CONFIG tbb/mac) set(IE_OWN_TBB_CONFIG tbb/mac)
elseif(UNIX) elseif(UNIX)
@ -27,6 +27,7 @@ find_package(TBB
CONFIG CONFIG
PATHS ${TBBROOT}/cmake PATHS ${TBBROOT}/cmake
${IEDevScripts_DIR}/${IE_OWN_TBB_CONFIG} ${IEDevScripts_DIR}/${IE_OWN_TBB_CONFIG}
NO_CMAKE_FIND_ROOT_PATH
NO_DEFAULT_PATH NO_DEFAULT_PATH
) )

View File

@ -56,7 +56,7 @@ ie_option (VERBOSE_BUILD "shows extra information about build" OFF)
ie_option (ENABLE_UNSAFE_LOCATIONS "skip check for MD5 for dependency" OFF) ie_option (ENABLE_UNSAFE_LOCATIONS "skip check for MD5 for dependency" OFF)
ie_option (ENABLE_ALTERNATIVE_TEMP "in case of dependency conflict, to avoid modification in master, use local copy of dependency" ON) ie_option (ENABLE_ALTERNATIVE_TEMP "in case of dependency conflict, to avoid modification in master, use local copy of dependency" OFF)
ie_dependent_option (ENABLE_FUZZING "instrument build for fuzzing" OFF "CMAKE_CXX_COMPILER_ID MATCHES ^(Apple)?Clang$; NOT WIN32" OFF) ie_dependent_option (ENABLE_FUZZING "instrument build for fuzzing" OFF "CMAKE_CXX_COMPILER_ID MATCHES ^(Apple)?Clang$; NOT WIN32" OFF)

View File

@ -139,6 +139,8 @@ for more details and command line parameters used for the model conversion.
```bash ```bash
./<MO_INSTALL_DIR>/mo.py --input_model <PATH_TO_MODEL>/wnet_20.pb -b 1 ./<MO_INSTALL_DIR>/mo.py --input_model <PATH_TO_MODEL>/wnet_20.pb -b 1
``` ```
> **NOTE:** This conversion guide is applicable for the 2021.3 release of OpenVINO and that starting from 2021.4
> the OpenVINO supports this model out of the box.
Model Optimizer produces the following error: Model Optimizer produces the following error:
```bash ```bash

View File

@ -160,6 +160,9 @@ Standard TensorFlow\* operations:
| EuclideanNorm | No | | EuclideanNorm | No |
| FakeQuantWithMinMaxVars | No | | FakeQuantWithMinMaxVars | No |
| FakeQuantWithMinMaxVarsPerChannel | No | | FakeQuantWithMinMaxVarsPerChannel | No |
| FFT | Supported only when it is part of a sub-graph of the special form |
| FFT2D | Supported only when it is part of a sub-graph of the special form |
| FFT3D | Supported only when it is part of a sub-graph of the special form |
| Fill | No | | Fill | No |
| Floor | No | | Floor | No |
| FloorDiv | No | | FloorDiv | No |
@ -172,6 +175,9 @@ Standard TensorFlow\* operations:
| Greater | No | | Greater | No |
| GreaterEqual | No | | GreaterEqual | No |
| Identity | Not needed for shape inference | | Identity | Not needed for shape inference |
| IFFT | Supported only when it is part of a sub-graph of the special form |
| IFFT2D | Supported only when it is part of a sub-graph of the special form |
| IFFT3D | Supported only when it is part of a sub-graph of the special form |
| LRN | No | | LRN | No |
| Less | No | | Less | No |
| Log | No | | Log | No |

View File

@ -12,48 +12,57 @@ OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applicatio
| Component | Description | | Component | Description |
|-----------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |-----------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| [Model Optimizer](https://docs.openvinotoolkit.org/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) | This tool imports, converts, and optimizes models that were trained in popular frameworks to a format usable by Intel tools, especially the Inference Engine. <br>Popular frameworks include Caffe\*, TensorFlow\*, MXNet\*, and ONNX\*. | | [Model Optimizer](https://docs.openvinotoolkit.org/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) | This tool imports, converts, and optimizes models that were trained in popular frameworks to a format usable by Intel tools, especially the Inference Engine. <br>Popular frameworks include Caffe\*, TensorFlow\*, MXNet\*, and ONNX\*. |
| Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](https://docs.openvinotoolkit.org/latest/omz_tools_accuracy_checker.html), [Post-Training Optimization Tool](https://docs.openvinotoolkit.org/latest/pot_README.html) | | Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](https://docs.openvinotoolkit.org/latest/omz_tools_accuracy_checker.html), [Post-Training Optimization Tool](https://docs.openvinotoolkit.org/latest/pot_README.html), [Benchmark Tool](../../inference-engine/samples/benchmark_app/README.md) |
**The Runtime Package Includes the Following Components Installed by Dependency:** **The Runtime Package Includes the Following Components Installed by Dependency:**
| Component | Description | | Component | Description |
|-----------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |-----------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| [Inference Engine](https://pypi.org/project/openvino) | This is the engine that runs the deep learning model. It includes a set of libraries for an easy inference integration into your applications. | | [Inference Engine](https://pypi.org/project/openvino) | This is the engine that runs the deep learning model. It includes a set of libraries for an easy inference integration into your applications. |
## System Requirements ## System Requirements
The complete list of supported hardware is available in the [Release Notes](https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html#inpage-nav-8).
The table below lists the supported operating systems and Python* versions required to run the installation. The table below lists the supported operating systems and Python* versions required to run the installation.
| Supported Operating System | [Python* Version (64-bit)](https://www.python.org/) | | Supported Operating System | [Python* Version (64-bit)](https://www.python.org/) |
| :------------------------------------------------------------| :---------------------------------------------------| | :------------------------------------------------------------| :---------------------------------------------------|
| Ubuntu* 18.04 long-term support (LTS), 64-bit | 3.6, 3.7 | | Ubuntu* 18.04 long-term support (LTS), 64-bit | 3.6, 3.7, 3.8 |
| Ubuntu* 20.04 long-term support (LTS), 64-bit | 3.6, 3.7 | | Ubuntu* 20.04 long-term support (LTS), 64-bit | 3.6, 3.7, 3.8 |
| Red Hat* Enterprise Linux* 8, 64-bit | 3.6, 3.7 | | Red Hat* Enterprise Linux* 8, 64-bit | 3.6, 3.8 |
| CentOS* 7, 64-bit | 3.6, 3.7 | | CentOS* 7, 64-bit | 3.6, 3.7, 3.8 |
| macOS* 10.15.x versions | 3.6, 3.7, 3.8 | | macOS* 10.15.x versions | 3.6, 3.7, 3.8 |
| Windows 10*, 64-bit | 3.6, 3.7, 3.8 | | Windows 10*, 64-bit | 3.6, 3.7, 3.8 |
> **NOTE**: This package can be installed on other versions of Linux and Windows OSes, but only the specific versions above are fully validated. > **NOTE**: This package can be installed on other versions of macOS, Linux and Windows, but only the specific versions above are fully validated.
## Install the Developer Package ## Install the Developer Package
### Step 1. Set Up Python Virtual Environment ### Step 1. Install External Software Dependencies
On Windows* OS you are required to install [Microsoft* Visual C++ Redistributable Package (x64)](https://visualstudio.microsoft.com/downloads/#microsoft-visual-c-redistributable-for-visual-studio-2019) to be able to run OpenVINO™ applications.
### Step 2. Set Up Python Virtual Environment
To avoid dependency conflicts, use a virtual environment. Skip this To avoid dependency conflicts, use a virtual environment. Skip this
step only if you do want to install all dependencies globally. step only if you do want to install all dependencies globally.
Create virtual environment: Create virtual environment:
On Linux and macOS:
```sh
# Depending on your OS, this step may require installing python3-venv
python3 -m venv openvino_env
```
On Windows:
```sh ```sh
python -m pip install --user virtualenv
python -m venv openvino_env python -m venv openvino_env
``` ```
> **NOTE**: On Linux and macOS, you may need to type `python3` instead of ### Step 3. Activate Virtual Environment
`python`. You may also need to [install pip](https://pip.pypa.io/en/stable/installing/).
### Step 2. Activate Virtual Environment
On Linux and macOS: On Linux and macOS:
```sh ```sh
@ -64,14 +73,14 @@ On Windows:
openvino_env\Scripts\activate openvino_env\Scripts\activate
``` ```
### Step 3. Set Up and Update pip to the Highest Version ### Step 4. Set Up and Update pip to the Highest Version
Run the command below: Run the command below:
```sh ```sh
python -m pip install --upgrade pip python -m pip install --upgrade pip
``` ```
### Step 4. Install the Package ### Step 5. Install the Package
Run the command below: <br> Run the command below: <br>
@ -79,7 +88,7 @@ Run the command below: <br>
pip install openvino-dev pip install openvino-dev
``` ```
### Step 5. Verify that the Package is Installed ### Step 6. Verify that the Package is Installed
Run the command below (this may take a few seconds): Run the command below (this may take a few seconds):
```sh ```sh
@ -92,4 +101,3 @@ You will see the help message for Post-Training Optimization Tool if installatio
- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit) - Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
- OpenVINO™ toolkit online documentation: [https://docs.openvinotoolkit.org](https://docs.openvinotoolkit.org) - OpenVINO™ toolkit online documentation: [https://docs.openvinotoolkit.org](https://docs.openvinotoolkit.org)

View File

@ -8,7 +8,7 @@ license terms for third party or open source software included in or with the So
OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applications and solutions that solve a variety of tasks including emulation of human vision, automatic speech recognition, natural language processing, recommendation systems, and many others. Based on latest generations of artificial neural networks, including Convolutional Neural Networks (CNNs), recurrent and attention-based networks, the toolkit extends computer vision and non-vision workloads across Intel® hardware, maximizing performance. It accelerates applications with high-performance, AI and deep learning inference deployed from edge to cloud. OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applications and solutions that solve a variety of tasks including emulation of human vision, automatic speech recognition, natural language processing, recommendation systems, and many others. Based on latest generations of artificial neural networks, including Convolutional Neural Networks (CNNs), recurrent and attention-based networks, the toolkit extends computer vision and non-vision workloads across Intel® hardware, maximizing performance. It accelerates applications with high-performance, AI and deep learning inference deployed from edge to cloud.
The Intel® Distribution of OpenVINO™ toolkit for Linux\*: The Intel® Distribution of OpenVINO™ toolkit\*:
- Enables CNN-based deep learning inference on the edge - Enables CNN-based deep learning inference on the edge
- Supports heterogeneous execution across Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs - Supports heterogeneous execution across Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
- Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels - Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels
@ -20,15 +20,16 @@ The Intel® Distribution of OpenVINO™ toolkit for Linux\*:
| [Inference Engine](https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_inference_engine_intro.html) | This is the engine that runs the deep learning model. It includes a set of libraries for an easy inference integration into your applications. | | [Inference Engine](https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_inference_engine_intro.html) | This is the engine that runs the deep learning model. It includes a set of libraries for an easy inference integration into your applications. |
## System Requirements ## System Requirements
The complete list of supported hardware is available in the [Release Notes](https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html#inpage-nav-8).
The table below lists the supported operating systems and Python* versions required to run the installation. The table below lists supported operating systems and Python* versions required to run the installation.
| Supported Operating System | [Python* Version (64-bit)](https://www.python.org/) | | Supported Operating System | [Python* Version (64-bit)](https://www.python.org/) |
| :------------------------------------------------------------| :---------------------------------------------------| | :------------------------------------------------------------| :---------------------------------------------------|
| Ubuntu* 18.04 long-term support (LTS), 64-bit | 3.6, 3.7 | | Ubuntu* 18.04 long-term support (LTS), 64-bit | 3.6, 3.7, 3.8 |
| Ubuntu* 20.04 long-term support (LTS), 64-bit | 3.6, 3.7 | | Ubuntu* 20.04 long-term support (LTS), 64-bit | 3.6, 3.7, 3.8 |
| Red Hat* Enterprise Linux* 8, 64-bit | 3.6, 3.7 | | Red Hat* Enterprise Linux* 8, 64-bit | 3.6, 3.8 |
| CentOS* 7, 64-bit | 3.6, 3.7 | | CentOS* 7, 64-bit | 3.6, 3.7, 3.8 |
| macOS* 10.15.x versions | 3.6, 3.7, 3.8 | | macOS* 10.15.x versions | 3.6, 3.7, 3.8 |
| Windows 10*, 64-bit | 3.6, 3.7, 3.8 | | Windows 10*, 64-bit | 3.6, 3.7, 3.8 |
@ -36,7 +37,11 @@ The table below lists the supported operating systems and Python* versions requi
## Install the Runtime Package ## Install the Runtime Package
### Step 1. Set Up Python Virtual Environment ### Step 1. Install External Software Dependencies
On Windows* OS you are required to install [Microsoft* Visual C++ Redistributable Package (x64)](https://visualstudio.microsoft.com/downloads/#microsoft-visual-c-redistributable-for-visual-studio-2019) to be able to run OpenVINO™ applications.
### Step 2. Set Up Python Virtual Environment
To avoid dependency conflicts, use a virtual environment. Skip this To avoid dependency conflicts, use a virtual environment. Skip this
step only if you do want to install all dependencies globally. step only if you do want to install all dependencies globally.
@ -50,7 +55,7 @@ python -m venv openvino_env
> **NOTE**: On Linux and macOS, you may need to type `python3` instead of > **NOTE**: On Linux and macOS, you may need to type `python3` instead of
`python`. You may also need to [install pip](https://pip.pypa.io/en/stable/installing/). `python`. You may also need to [install pip](https://pip.pypa.io/en/stable/installing/).
### Step 2. Activate Virtual Environment ### Step 3. Activate Virtual Environment
On Linux and macOS: On Linux and macOS:
```sh ```sh
@ -61,14 +66,14 @@ On Windows:
openvino_env\Scripts\activate openvino_env\Scripts\activate
``` ```
### Step 3. Set Up and Update pip to the Highest Version ### Step 4. Set Up and Update pip to the Highest Version
Run the command below: Run the command below:
```sh ```sh
python -m pip install --upgrade pip python -m pip install --upgrade pip
``` ```
### Step 4. Install the Package ### Step 5. Install the Package
Run the command below: <br> Run the command below: <br>
@ -76,7 +81,7 @@ Run the command below: <br>
pip install openvino pip install openvino
``` ```
### Step 5. Verify that the Package is Installed ### Step 6. Verify that the Package is Installed
Run the command below: Run the command below:
```sh ```sh

View File

@ -4,39 +4,33 @@
**Category**: *Normalization* **Category**: *Normalization*
**Short description**: *BatchNormInference* layer normalizes a `input` tensor by `mean` and `variance`, and applies a scale (`gamma`) to it, as well as an offset (`beta`). **Short description**: *BatchNormInference* performs Batch Normalization operation described in the [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](https://arxiv.org/abs/1502.03167v2) article.
**Attributes**: **Detailed Description**
* *epsilon* *BatchNormInference* performs the following operations on a given data batch input tensor `data`:
* **Description**: *epsilon* is the number to be added to the variance to avoid division by zero when normalizing a value. For example, *epsilon* equal to 0.001 means that 0.001 is added to the variance.
* **Range of values**: a positive floating-point number
* **Type**: `float`
* **Default value**: None
* **Required**: *yes*
**Inputs** * Normalizes each activation \f$x^{(k)}\f$ by the mean and variance.
\f[
\hat{x}^{(k)}=\frac{x^{(k)} - E[x^{(k)}]}{\sqrt{Var(x^{(k)}) + \epsilon}}
\f]
where \f$E[x^{(k)}]\f$ and \f$Var(x^{(k)})\f$ are the mean and variance, calculated per channel axis of `data` input, and correspond to `mean` and `variance` inputs, respectively. Additionally, \f$\epsilon\f$ is a value added to the variance for numerical stability and corresponds to `epsilon` attribute.
* **1**: `input` - input tensor with data for normalization. At least a 2D tensor of type T, the second dimension represents the channel axis and must have a span of at least 1. **Required.** * Performs linear transformation of each normalized activation based on `gamma` and `beta` input, representing the scaling factor and shift, respectively.
* **2**: `gamma` - gamma scaling for normalized value. A 1D tensor of type T with the same span as input's channel axis. **Required.** \f[
* **3**: `beta` - bias added to the scaled normalized value. A 1D tensor of type T with the same span as input's channel axis.. **Required.** \hat{y}^{(k)}=\gamma^{(k)}\hat{x}^{(k)} + \beta^{(k)}
* **4**: `mean` - value for mean normalization. A 1D tensor of type T with the same span as input's channel axis.. **Required.** \f]
* **5**: `variance` - value for variance normalization. A 1D tensor of type T with the same span as input's channel axis.. **Required.** where \f$\gamma^{(k)}\f$ and \f$\beta^{(k)}\f$ are learnable parameters, calculated per channel axis, and correspond to `gamma` and `beta` inputs.
**Outputs**
* **1**: The result of normalization. A tensor of the same type and shape with 1st input tensor.
**Types**
* *T*: any numeric type.
**Mathematical Formulation** **Mathematical Formulation**
*BatchNormInference* normalizes the output in each hidden layer. Let `x` be a *d*-dimensional input, \f$x=(x_{1}\dotsc x_{d})\f$. Since normalization is applied to each activation \f$E[x^{(k)}]\f$, you can focus on a particular activation and omit k.
For a particular activation, consider a mini-batch \f$\mathcal{B}\f$ of m values. *BatchNormInference* performs Batch Normalization algorithm as follows:
* **Input**: Values of \f$x\f$ over a mini-batch: * **Input**: Values of \f$x\f$ over a mini-batch:
\f[ \f[
\beta = \{ x_{1...m} \} \mathcal{B} = \{ x_{1...m} \}
\f] \f]
* **Parameters to learn**: \f$ \gamma, \beta\f$ * **Parameters to learn**: \f$ \gamma, \beta\f$
* **Output**: * **Output**:
@ -45,22 +39,81 @@
\f] \f]
* **Mini-batch mean**: * **Mini-batch mean**:
\f[ \f[
\mu_{\beta} \leftarrow \frac{1}{m}\sum_{i=1}^{m}b_{i} \mu_{\mathcal{B}} \leftarrow \frac{1}{m}\sum_{i=1}^{m}b_{i}
\f] \f]
* **Mini-batch variance**: * **Mini-batch variance**:
\f[ \f[
\sigma_{\beta }^{2}\leftarrow \frac{1}{m}\sum_{i=1}^{m} ( b_{i} - \mu_{\beta} )^{2} \sigma_{\mathcal{B}}^{2}\leftarrow \frac{1}{m}\sum_{i=1}^{m} ( b_{i} - \mu_{\mathcal{B}})^{2}
\f] \f]
* **Normalize**: * **Normalize**:
\f[ \f[
\hat{b_{i}} \leftarrow \frac{b_{i} - \mu_{\beta}}{\sqrt{\sigma_{\beta }^{2} + \epsilon }} \hat{b_{i}} \leftarrow \frac{b_{i} - \mu_{\mathcal{B}}}{\sqrt{\sigma_{\mathcal{B}}^{2} + \epsilon }}
\f] \f]
* **Scale and shift**: * **Scale and shift**:
\f[ \f[
o_{i} \leftarrow \gamma\hat{b_{i}} + \beta = BN_{\gamma ,\beta } ( b_{i} ) o_{i} \leftarrow \gamma\hat{b_{i}} + \beta = BN_{\gamma ,\beta } ( b_{i} )
\f] \f]
**Example** **Attributes**:
* *epsilon*
* **Description**: *epsilon* is a constant added to the variance for numerical stability.
* **Range of values**: a positive floating-point number
* **Type**: `float`
* **Default value**: none
* **Required**: *yes*
**Inputs**
* **1**: `data` - A tensor of type *T* and at least rank 2. The second dimension represents the channel axis and must have a span of at least 1. **Required.**
* **2**: `gamma` - Scaling factor for normalized value. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
* **3**: `beta` - Bias added to the scaled normalized value. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
* **4**: `mean` - Value for mean normalization. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
* **5**: `variance` - Value for variance normalization. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
**Outputs**
* **1**: The result of element-wise Batch Normalization operation applied to the input tensor `data`. A tensor of type *T* and the same shape as `data` input tensor.
**Types**
* *T*: any supported floating-point type.
**Examples**
*Example: 2D input tensor `data`*
```xml
<layer ... type="BatchNormInference" ...>
<data epsilon="9.99e-06" />
<input>
<port id="0"> <!-- input -->
<dim>10</dim>
<dim>128</dim>
</port>
<port id="1"> <!-- gamma -->
<dim>128</dim>
</port>
<port id="2"> <!-- beta -->
<dim>128</dim>
</port>
<port id="3"> <!-- mean -->
<dim>128</dim>
</port>
<port id="4"> <!-- variance -->
<dim>128</dim>
</port>
</input>
<output>
<port id="5">
<dim>10</dim>
<dim>128</dim>
</port>
</output>
</layer>
```
*Example: 4D input tensor `data`*
```xml ```xml
<layer ... type="BatchNormInference" ...> <layer ... type="BatchNormInference" ...>

View File

@ -1,42 +1,36 @@
## BatchNormInference <a name="BatchNormInference"></a> {#openvino_docs_ops_normalization_BatchNormInference_5} ## BatchNormInference <a name="BatchNormInference"></a> {#openvino_docs_ops_normalization_BatchNormInference_5}
**Versioned name**: *BatchNormInference-5 **Versioned name**: *BatchNormInference-5*
**Category**: *Normalization* **Category**: *Normalization*
**Short description**: *BatchNormInference* layer normalizes a `input` tensor by `mean` and `variance`, and applies a scale (`gamma`) to it, as well as an offset (`beta`). **Short description**: *BatchNormInference* performs Batch Normalization operation described in the [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](https://arxiv.org/abs/1502.03167v2) article.
**Attributes**: **Detailed Description**
* *epsilon* *BatchNormInference* performs the following operations on a given data batch input tensor `data`:
* **Description**: *epsilon* is the number to be added to the variance to avoid division by zero when normalizing a value. For example, *epsilon* equal to 0.001 means that 0.001 is added to the variance.
* **Range of values**: a positive floating-point number
* **Type**: `float`
* **Default value**: None
* **Required**: *yes*
**Inputs** * Normalizes each activation \f$x^{(k)}\f$ by the mean and variance.
\f[
\hat{x}^{(k)}=\frac{x^{(k)} - E[x^{(k)}]}{\sqrt{Var(x^{(k)}) + \epsilon}}
\f]
where \f$E[x^{(k)}]\f$ and \f$Var(x^{(k)})\f$ are the mean and variance, calculated per channel axis of `data` input, and correspond to `mean` and `variance` inputs, respectively. Additionally, \f$\epsilon\f$ is a value added to the variance for numerical stability and corresponds to `epsilon` attribute.
* **1**: `input` - input tensor with data for normalization. At least a 2D tensor of type T, the second dimension represents the channel axis and must have a span of at least 1. **Required.** * Performs linear transformation of each normalized activation based on `gamma` and `beta` input, representing the scaling factor and shift, respectively.
* **2**: `gamma` - gamma scaling for normalized value. A 1D tensor of type T with the same span as input's channel axis. **Required.** \f[
* **3**: `beta` - bias added to the scaled normalized value. A 1D tensor of type T with the same span as input's channel axis.. **Required.** \hat{y}^{(k)}=\gamma^{(k)}\hat{x}^{(k)} + \beta^{(k)}
* **4**: `mean` - value for mean normalization. A 1D tensor of type T with the same span as input's channel axis.. **Required.** \f]
* **5**: `variance` - value for variance normalization. A 1D tensor of type T with the same span as input's channel axis.. **Required.** where \f$\gamma^{(k)}\f$ and \f$\beta^{(k)}\f$ are learnable parameters, calculated per channel axis, and correspond to `gamma` and `beta` inputs.
**Outputs**
* **1**: The result of normalization. A tensor of the same type and shape with 1st input tensor.
**Types**
* *T*: any numeric type.
**Mathematical Formulation** **Mathematical Formulation**
*BatchNormInference* normalizes the output in each hidden layer. Let `x` be a *d*-dimensional input, \f$x=(x_{1}\dotsc x_{d})\f$. Since normalization is applied to each activation \f$E[x^{(k)}]\f$, you can focus on a particular activation and omit k.
For a particular activation, consider a mini-batch \f$\mathcal{B}\f$ of m values. *BatchNormInference* performs Batch Normalization algorithm as follows:
* **Input**: Values of \f$x\f$ over a mini-batch: * **Input**: Values of \f$x\f$ over a mini-batch:
\f[ \f[
\beta = \{ x_{1...m} \} \mathcal{B} = \{ x_{1...m} \}
\f] \f]
* **Parameters to learn**: \f$ \gamma, \beta\f$ * **Parameters to learn**: \f$ \gamma, \beta\f$
* **Output**: * **Output**:
@ -45,22 +39,81 @@
\f] \f]
* **Mini-batch mean**: * **Mini-batch mean**:
\f[ \f[
\mu_{\beta} \leftarrow \frac{1}{m}\sum_{i=1}^{m}b_{i} \mu_{\mathcal{B}} \leftarrow \frac{1}{m}\sum_{i=1}^{m}b_{i}
\f] \f]
* **Mini-batch variance**: * **Mini-batch variance**:
\f[ \f[
\sigma_{\beta }^{2}\leftarrow \frac{1}{m}\sum_{i=1}^{m} ( b_{i} - \mu_{\beta} )^{2} \sigma_{\mathcal{B}}^{2}\leftarrow \frac{1}{m}\sum_{i=1}^{m} ( b_{i} - \mu_{\mathcal{B}})^{2}
\f] \f]
* **Normalize**: * **Normalize**:
\f[ \f[
\hat{b_{i}} \leftarrow \frac{b_{i} - \mu_{\beta}}{\sqrt{\sigma_{\beta }^{2} + \epsilon }} \hat{b_{i}} \leftarrow \frac{b_{i} - \mu_{\mathcal{B}}}{\sqrt{\sigma_{\mathcal{B}}^{2} + \epsilon }}
\f] \f]
* **Scale and shift**: * **Scale and shift**:
\f[ \f[
o_{i} \leftarrow \gamma\hat{b_{i}} + \beta = BN_{\gamma ,\beta } ( b_{i} ) o_{i} \leftarrow \gamma\hat{b_{i}} + \beta = BN_{\gamma ,\beta } ( b_{i} )
\f] \f]
**Example** **Attributes**:
* *epsilon*
* **Description**: *epsilon* is a constant added to the variance for numerical stability.
* **Range of values**: a positive floating-point number
* **Type**: `float`
* **Default value**: none
* **Required**: *yes*
**Inputs**
* **1**: `data` - A tensor of type *T* and at least rank 2. The second dimension represents the channel axis and must have a span of at least 1. **Required.**
* **2**: `gamma` - Scaling factor for normalized value. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
* **3**: `beta` - Bias added to the scaled normalized value. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
* **4**: `mean` - Value for mean normalization. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
* **5**: `variance` - Value for variance normalization. A 1D tensor of type *T* with the same span as `data` channel axis. **Required.**
**Outputs**
* **1**: The result of element-wise Batch Normalization operation applied to the input tensor `data`. A tensor of type *T* and the same shape as `data` input tensor.
**Types**
* *T*: any supported floating-point type.
**Examples**
*Example: 2D input tensor `data`*
```xml
<layer ... type="BatchNormInference" ...>
<data epsilon="9.99e-06" />
<input>
<port id="0"> <!-- input -->
<dim>10</dim>
<dim>128</dim>
</port>
<port id="1"> <!-- gamma -->
<dim>128</dim>
</port>
<port id="2"> <!-- beta -->
<dim>128</dim>
</port>
<port id="3"> <!-- mean -->
<dim>128</dim>
</port>
<port id="4"> <!-- variance -->
<dim>128</dim>
</port>
</input>
<output>
<port id="5">
<dim>10</dim>
<dim>128</dim>
</port>
</output>
</layer>
```
*Example: 4D input tensor `data`*
```xml ```xml
<layer ... type="BatchNormInference" ...> <layer ... type="BatchNormInference" ...>
@ -95,4 +148,3 @@
</output> </output>
</layer> </layer>
``` ```

View File

@ -4,7 +4,21 @@
**Category**: *Reduction* **Category**: *Reduction*
**Short description**: *ReduceSum* operation performs reduction with addition of the 1st input tensor in slices specified by the 2nd input. **Short description**: *ReduceSum* operation performs reduction with addition, on a given input `data`, along dimensions specified by `axes` input.
**Detailed Description**
*ReduceSum* operation performs reduction with addition, on a given input `data`, along dimensions specified by `axes` additional input.
Each element in the output is calculated as follows:
output[i0, i1, ..., iN] = sum[j0,..., jN](x[j0, ..., jN]))
where indices i0, ..., iN run through all valid indices for input `data` and summation `sum[j0, ..., jN]` has `jk = ik` for those dimensions `k` that are not in the set of indices specified by `axes` input.
Particular cases:
1. If `axes` is an empty list, then *ReduceSum* corresponds to identity operation.
2. If `axes` contains all dimensions of input `data`, a single reduction value is calculated for entire input tensor.
**Attributes** **Attributes**
@ -18,32 +32,20 @@
**Inputs** **Inputs**
* **1**: Input tensor x of type *T1*. **Required.** * **1**: `data` - A tensor of type *T* and arbitrary shape. **Required.**
* **2**: Scalar or 1D tensor of type *T_IND* with axis indices for the 1st input along which reduction is performed. Accepted range is `[-r, r-1]` where where `r` is the rank of input tensor, all values must be unique, repeats are not allowed. **Required.** * **2**: `axes` - Axis indices of `data` input tensor, along which reduction is performed. A scalar or 1D tensor of unique elements and type *T_IND*. The range of elements is `[-r, r-1]` where `r` is the rank of `data` input tensor. **Required.**
**Outputs** **Outputs**
* **1**: Tensor of the same type as the 1st input tensor and `shape[i] = shapeOf(input1)[i]` for all `i` that is not in the list of axes from the 2nd input. For dimensions from the 2nd input tensor, `shape[i] == 1` if `keep_dims == true`, or `i`-th dimension is removed from the output otherwise. * **1**: A tensor of type *T* and `shape[i] = shapeOf(data)[i]` for all `i` dimensions not in `axes` input tensor. For dimensions in `axes`, `shape[i] == 1` if `keep_dims == true`, otherwise the `i`-th dimension is removed from the output.
**Types** **Types**
* *T1*: any supported numeric type. * *T*: any supported numeric type.
* *T_IND*: `int64` or `int32`. * *T_IND*: `int64` or `int32`.
**Detailed Description** **Examples**
Each element in the output is the result of reduction with addition operation along dimensions specified by the 2nd input:
output[i0, i1, ..., iN] = sum[j0,..., jN](x[j0, ..., jN]))
Where indices i0, ..., iN run through all valid indices for the 1st input and summation `sum[j0, ..., jN]` have `jk = ik` for those dimensions `k` that are not in the set of indices specified by the 2nd input of the operation.
Corner cases:
1. When the 2nd input is an empty list, then this operation does nothing, it is an identity.
2. When the 2nd input contains all dimensions of the 1st input, this means that a single reduction value is calculated for entire input tensor.
**Example**
```xml ```xml
<layer id="1" type="ReduceSum" ...> <layer id="1" type="ReduceSum" ...>

View File

@ -10,9 +10,9 @@
**Inputs**: **Inputs**:
* **1**: Multidimensional input tensor of type *T*. *Required*. * **1**: Tensor of type *T* and arbitrary shape. **Required**.
* **2**: OD or 1D tensor of type *T_SHAPE* with dimensions indices to be set to 1. Values could be negative. *Required*. * **2**: Scalar or 1D tensor of type *T_INT* with indices of dimensions to unsqueeze. Values could be negative (have to be from range `[-R, R-1]`, where `R` is the rank of the output). **Required**.
**Outputs**: **Outputs**:
@ -20,13 +20,13 @@
**Types** **Types**
* *T*: supported type. * *T*: any numeric type.
* *T_SHAPE*: supported integer type. * *T_INT*: any supported integer type.
**Example** **Example**
*Example 1:* *Example 1: unsqueeze 2D tensor to a 4D tensor*
```xml ```xml
<layer ... type="Unsqueeze"> <layer ... type="Unsqueeze">
<input> <input>
@ -51,7 +51,7 @@
</layer> </layer>
``` ```
*Example 2: (unsqueeze 0D tensor (constant) to 1D tensor)* *Example 2: unsqueeze 0D tensor (constant) to 1D tensor*
```xml ```xml
<layer ... type="Unsqueeze"> <layer ... type="Unsqueeze">
<input> <input>

View File

@ -2,41 +2,37 @@
**Versioned name**: *ConvertLike-1* **Versioned name**: *ConvertLike-1*
**Category**: type conversion **Category**: *Type conversion*
**Short description**: Operation converts all elements of the 1st input tensor to a type of elements of 2nd input tensor. **Short description**: *ConvertLike* operation performs element-wise conversion on a given input tensor `data` to the element type of an additional input tensor `like`.
**Attributes**:
No attributes available.
**Inputs**
* **1**: `data` - A tensor of type T1. **Required.**
* **2**: `like` - A tensor of type T2. **Required.**
**Outputs**
* **1**: The result of element-wise *"ConvertLike"* operation. A tensor of the same type with `like` tensor and the same shape with `data` tensor.
**Types**
* *T1*: u8, u16, u32, u64, i8, i16, i32, i64, f16, f32, boolean, bf16
* *T2*: u8, u16, u32, u64, i8, i16, i32, i64, f16, f32, boolean, bf16
**Detailed description** **Detailed description**
Conversion from one supported type to another supported type is always allowed. User must be aware of precision loss and value change caused by range difference between two types. For example, a 32-bit float *3.141592* may be round to a 32-bit int *3*. Conversion from one supported type to another supported type is always allowed. User must be aware of precision loss and value change caused by range difference between two types. For example, a 32-bit float *3.141592* may be round to a 32-bit int *3*. The result of unsupported conversions is undefined, e.g. conversion of negative signed integer value to any unsigned integer type.
*a* - `data` input tensor, *b* - `like` input tensor. Output elements are represented as follows:
\f[ o[i] = Convert[destination_type=type(b)](a[i])
o_{i} = Convert[destination_type=type(b)](a_{i})
\f]
**Examples** where `a` and `b` correspond to `data` and `like` input tensors, respectively.
*Example 1* **Attributes**: *ConvertLike* operation has no attributes.
**Inputs**
* **1**: `data` - A tensor of type *T1* and arbitrary shape. **Required.**
* **2**: `like` - A tensor of type *T2* and arbitrary shape. **Required.**
**Outputs**
* **1**: The result of element-wise *ConvertLike* operation applied to input tensor `data`. A tensor of type *T2* and the same shape as `data` input tensor.
**Types**
* *T1*: any supported type
* *T2*: any supported type
**Example**
```xml ```xml
<layer ... type="ConvertLike"> <layer ... type="ConvertLike">

View File

@ -8,18 +8,21 @@
**Detailed description** **Detailed description**
Conversion from one supported type to another supported type is always allowed. User must be aware of precision loss and value change caused by range difference between two types. For example, a 32-bit float `3.141592` may be round to a 32-bit int `3`. The result of unsupported conversions is undefined, e.g. convertion of negative signed integer value to any unsigned integer type. Conversion from one supported type to another supported type is always allowed. User must be aware of precision loss and value change caused by range difference between two types. For example, a 32-bit float `3.141592` may be round to a 32-bit int `3`. The result of unsupported conversions is undefined, e.g. conversion of negative signed integer value to any unsigned integer type.
Output elements are represented as follows:
\f[ \f[
o_{i} = convert(a_{i}) o_{i} = Convert(a_{i})
\f] \f]
where `a` corresponds to the input tensor.
**Attributes**: **Attributes**:
* *destination_type* * *destination_type*
* **Description**: the destination type * **Description**: the destination type.
* **Range of values**: one of the supported types *T* * **Range of values**: one of the supported types *T*
* **Type**: `string` * **Type**: `string`
* **Default value**: None * **Default value**: None
@ -35,11 +38,9 @@ o_{i} = convert(a_{i})
**Types** **Types**
* *T*: `u8`, `u16`, `u32`, `u64`, `i8`, `i16`, `i32`, `i64`, `f16`, `f32`, `boolean`, `bf16` * *T*: any supported type
**Examples** **Example**
*Example 1*
```xml ```xml
<layer ... type="Convert"> <layer ... type="Convert">

View File

@ -32,6 +32,8 @@ add_subdirectory(thirdparty)
add_subdirectory(src) add_subdirectory(src)
add_subdirectory(ie_bridges/c)
if(ENABLE_TESTS) if(ENABLE_TESTS)
add_subdirectory(tests_deprecated) add_subdirectory(tests_deprecated)
add_subdirectory(tests) add_subdirectory(tests)
@ -58,12 +60,10 @@ endfunction()
# they must be built even if samples build is disabled (required for tests and tools). # they must be built even if samples build is disabled (required for tests and tools).
ie_build_samples() ie_build_samples()
if (ENABLE_PYTHON) if(ENABLE_PYTHON)
add_subdirectory(ie_bridges/python) add_subdirectory(ie_bridges/python)
endif() endif()
add_subdirectory(ie_bridges/c)
# #
# Install # Install
# #

View File

@ -18,7 +18,6 @@ else()
set(MODELS_BRANCH "master") set(MODELS_BRANCH "master")
endif() endif()
if (ENABLE_DATA) if (ENABLE_DATA)
add_models_repo(${ENABLE_DATA} "data:https://github.com/openvinotoolkit/testdata.git") add_models_repo(${ENABLE_DATA} "data:https://github.com/openvinotoolkit/testdata.git")
set(MODELS_PATH "${TEMP}/models/src/data") set(MODELS_PATH "${TEMP}/models/src/data")
@ -294,8 +293,6 @@ else()
reset_deps_cache(OpenCV_DIR) reset_deps_cache(OpenCV_DIR)
endif() endif()
# TODO: remove global CMAKE_MODULE_PATH
list(APPEND CMAKE_MODULE_PATH "${IEDevScripts_DIR}")
include(cmake/ie_parallel.cmake) include(cmake/ie_parallel.cmake)
if (ENABLE_GNA) if (ENABLE_GNA)

View File

@ -3,8 +3,28 @@
# #
function(set_ie_threading_interface_for TARGET_NAME) function(set_ie_threading_interface_for TARGET_NAME)
macro(ext_message TRACE_LEVEL)
if (TRACE_LEVEL STREQUAL FATAL_ERROR)
if(InferenceEngine_FIND_REQUIRED)
message(FATAL_ERROR "${ARGN}")
elseif(NOT InferenceEngine_FIND_QUIETLY)
message(WARNING "${ARGN}")
endif()
return()
elseif(NOT InferenceEngine_FIND_QUIETLY)
message(${TRACE_LEVEL} "${ARGN}")
endif ()
endmacro()
if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" AND NOT TBB_FOUND) if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" AND NOT TBB_FOUND)
find_package(TBB COMPONENTS tbb tbbmalloc) if(IEDevScripts_DIR)
find_package(TBB COMPONENTS tbb tbbmalloc
PATHS IEDevScripts_DIR
NO_CMAKE_FIND_ROOT_PATH
NO_DEFAULT_PATH)
else()
find_dependency(TBB COMPONENTS tbb tbbmalloc)
endif()
set("TBB_FOUND" ${TBB_FOUND} PARENT_SCOPE) set("TBB_FOUND" ${TBB_FOUND} PARENT_SCOPE)
set("TBB_IMPORTED_TARGETS" ${TBB_IMPORTED_TARGETS} PARENT_SCOPE) set("TBB_IMPORTED_TARGETS" ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
set("TBB_VERSION" ${TBB_VERSION} PARENT_SCOPE) set("TBB_VERSION" ${TBB_VERSION} PARENT_SCOPE)
@ -15,12 +35,22 @@ function(set_ie_threading_interface_for TARGET_NAME)
endif() endif()
get_target_property(target_type ${TARGET_NAME} TYPE) get_target_property(target_type ${TARGET_NAME} TYPE)
if(target_type STREQUAL "INTERFACE_LIBRARY") if(target_type STREQUAL "INTERFACE_LIBRARY")
set(LINK_TYPE "INTERFACE") set(LINK_TYPE "INTERFACE")
elseif(target_type STREQUAL "EXECUTABLE" OR target_type STREQUAL "OBJECT_LIBRARY") elseif(target_type STREQUAL "EXECUTABLE" OR target_type STREQUAL "OBJECT_LIBRARY" OR
target_type STREQUAL "MODULE_LIBRARY")
set(LINK_TYPE "PRIVATE")
elseif(target_type STREQUAL "STATIC_LIBRARY")
# Affected libraries: inference_engine_s, inference_engine_preproc_s
# they don't have TBB in public headers => PRIVATE
set(LINK_TYPE "PRIVATE")
elseif(target_type STREQUAL "SHARED_LIBRARY")
# TODO: inference_engine only
# Why TBB propogates its headers to inference_engine?
set(LINK_TYPE "PRIVATE") set(LINK_TYPE "PRIVATE")
else() else()
set(LINK_TYPE "PUBLIC") ext_message(WARNING "Unknown target type")
endif() endif()
function(ie_target_link_libraries TARGET_NAME LINK_TYPE) function(ie_target_link_libraries TARGET_NAME LINK_TYPE)

View File

@ -1,62 +0,0 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
#
# FindIE
# ------
#
# This will define the following variables:
#
# InferenceEngine_FOUND - True if the system has the Inference Engine library
# InferenceEngine_INCLUDE_DIRS - Inference Engine include directories
# InferenceEngine_LIBRARIES - Inference Engine libraries
#
# and the following imported targets:
#
# IE::inference_engine - The Inference Engine library
# IE::inference_engine_c_api - The Inference Engine C API library
#
if(DEFINED IE_MAIN_SOURCE_DIR AND TARGET inference_engine)
set(InferenceEngine_LIBRARIES inference_engine inference_engine_c_api)
if(NOT TARGET IE::inference_engine)
add_library(IE::inference_engine ALIAS inference_engine)
endif()
if(TARGET inference_engine_c_api AND NOT TARGET IE::inference_engine_c_api)
add_library(IE::inference_engine_c_api ALIAS inference_engine_c_api)
endif()
else()
include("${CMAKE_CURRENT_LIST_DIR}/inference_engine_targets.cmake")
file(TO_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}" cache_path)
set (ie_options THREADING)
load_cache("${cache_path}" READ_WITH_PREFIX "" ${ie_options})
message(STATUS "The following CMake options are exported from the Inference Engine build tree")
message("")
foreach(option IN LISTS ie_options)
message(" ${option}: ${${option}}")
endforeach()
message("")
# inherit TBB from main IE project if enabled
if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
load_cache("${cache_path}" READ_WITH_PREFIX "" TBB_DIR;ENABLE_TBB_RELEASE_ONLY)
set(TBB_FIND_RELEASE_ONLY ${ENABLE_TBB_RELEASE_ONLY})
find_package(TBB)
endif()
get_target_property(InferenceEngine_INCLUDE_DIRS IE::inference_engine INTERFACE_INCLUDE_DIRECTORIES)
set(InferenceEngine_LIBRARIES IE::inference_engine IE::inference_engine_c_api)
foreach(library IN LISTS InferenceEngine_LIBRARIES)
if(CMAKE_CROSSCOMPILING AND NOT MSVC)
set_property(TARGET ${library} PROPERTY
INTERFACE_LINK_OPTIONS "-Wl,--allow-shlib-undefined")
endif()
endforeach()
if(NOT MSVC)
set_target_properties(${InferenceEngine_LIBRARIES} PROPERTIES
INTERFACE_COMPILE_OPTIONS "-Wno-error=deprecated-declarations")
endif()
endif()

View File

@ -2,17 +2,23 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
set(InferenceEngine_VERSION 2.1.0) # TODO: hardcode will be fixed separatelly
set(PACKAGE_VERSION ${InferenceEngine_VERSION}) set(PACKAGE_VERSION_MAJOR 2)
set(PACKAGE_VERSION_MINOR 1)
set(PACKAGE_VERSION_PATCH 0)
set(PACKAGE_VERSION_COUNT 3)
set(PACKAGE_VERSION "${PACKAGE_VERSION_MAJOR}.${PACKAGE_VERSION_MINOR}.${PACKAGE_VERSION_PATCH}")
set(PACKAGE_VERSION_EXACT False) set(PACKAGE_VERSION_EXACT False)
set(PACKAGE_VERSION_COMPATIBLE False) set(PACKAGE_VERSION_COMPATIBLE False)
if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION) if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
set(PACKAGE_VERSION_EXACT True) set(PACKAGE_VERSION_EXACT True)
set(PACKAGE_VERSION_COMPATIBLE True) set(PACKAGE_VERSION_COMPATIBLE True)
endif() endif()
if(PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION) if(PACKAGE_FIND_VERSION_MAJOR EQUAL PACKAGE_VERSION_MAJOR AND
set(PACKAGE_VERSION_COMPATIBLE True) PACKAGE_FIND_VERSION VERSION_LESS PACKAGE_VERSION)
set(PACKAGE_VERSION_COMPATIBLE True)
endif() endif()

View File

@ -1,12 +1,10 @@
# Copyright (C) 2018-2020 Intel Corporation # Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
# #
# FindIE # Inference Engine cmake config
# ------ # ------
# #
# You can specify the path to Inference Engine files in IE_ROOT_DIR
#
# This will define the following variables: # This will define the following variables:
# #
# InferenceEngine_FOUND - True if the system has the Inference Engine library # InferenceEngine_FOUND - True if the system has the Inference Engine library
@ -19,150 +17,55 @@
# IE::inference_engine_c_api - The Inference Engine C API library # IE::inference_engine_c_api - The Inference Engine C API library
# #
macro(ext_message TRACE_LEVEL) @PACKAGE_INIT@
if (${TRACE_LEVEL} STREQUAL FATAL_ERROR)
if(InferenceEngine_FIND_REQUIRED)
message(FATAL_ERROR "${ARGN}")
elseif(NOT InferenceEngine_FIND_QUIETLY)
message(WARNING "${ARGN}")
endif()
return()
elseif(NOT InferenceEngine_FIND_QUIETLY)
message(${TRACE_LEVEL} "${ARGN}")
endif ()
endmacro()
set(InferenceEngine_FOUND FALSE) include(CMakeFindDependencyMacro)
if(TARGET IE::inference_engine) # need to store current PACKAGE_PREFIX_DIR, because it's overwritten by ngraph one
set(InferenceEngine_FOUND TRUE) set(IE_PACKAGE_PREFIX_DIR "${PACKAGE_PREFIX_DIR}")
get_target_property(InferenceEngine_INCLUDE_DIRS IE::inference_engine INTERFACE_INCLUDE_DIRECTORIES)
set(InferenceEngine_LIBRARIES IE::inference_engine
IE::inference_engine_c_api)
else()
if (WIN32)
set(_ARCH intel64)
else()
string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} _ARCH)
if(_ARCH STREQUAL "x86_64" OR _ARCH STREQUAL "amd64") # Windows detects Intel's 64-bit CPU as AMD64
set(_ARCH intel64)
elseif(_ARCH STREQUAL "i386")
set(_ARCH ia32)
endif()
endif()
set(THREADING "@THREADING@") set(THREADING "@THREADING@")
if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
# check whether setvars.sh is sourced set_and_check(_tbb_dir "@PACKAGE_IE_TBB_DIR@")
if(NOT IE_ROOT_DIR AND (DEFINED ENV{InferenceEngine_DIR} OR InferenceEngine_DIR OR DEFINED ENV{INTEL_OPENVINO_DIR})) find_dependency(TBB
if (EXISTS "${InferenceEngine_DIR}") COMPONENTS tbb tbbmalloc
# InferenceEngine_DIR manually set via command line params CONFIG
set(IE_ROOT_DIR "${InferenceEngine_DIR}/..") PATHS ${TBBROOT}/cmake
elseif (EXISTS "$ENV{InferenceEngine_DIR}") ${_tbb_dir}
# InferenceEngine_DIR manually set via env NO_CMAKE_FIND_ROOT_PATH
set(IE_ROOT_DIR "$ENV{InferenceEngine_DIR}/..") NO_DEFAULT_PATH)
elseif (EXISTS "$ENV{INTEL_OPENVINO_DIR}/inference_engine")
# if we installed DL SDK
set(IE_ROOT_DIR "$ENV{INTEL_OPENVINO_DIR}/inference_engine")
elseif (EXISTS "$ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine")
# CV SDK is installed
set(IE_ROOT_DIR "$ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine")
endif()
endif()
if(NOT IE_ROOT_DIR)
ext_message(FATAL_ERROR "inference_engine root directory is not found")
endif()
find_path(IE_INCLUDE_DIR inference_engine.hpp "${IE_ROOT_DIR}/include" NO_DEFAULT_PATH)
set(IE_LIB_DIR "${IE_ROOT_DIR}/lib/${_ARCH}")
set(IE_LIB_REL_DIR "${IE_LIB_DIR}/Release")
set(IE_LIB_DBG_DIR "${IE_LIB_DIR}/Debug")
include(FindPackageHandleStandardArgs)
if(WIN32)
find_library(IE_RELEASE_LIBRARY inference_engine@IE_RELEASE_POSTFIX_WIN@ "${IE_LIB_REL_DIR}" NO_DEFAULT_PATH)
find_library(IE_C_API_RELEASE_LIBRARY inference_engine_c_api@IE_RELEASE_POSTFIX_WIN@ "${IE_LIB_REL_DIR}" NO_DEFAULT_PATH)
elseif(APPLE)
find_library(IE_RELEASE_LIBRARY inference_engine@IE_RELEASE_POSTFIX_MAC@ "${IE_LIB_DIR}" NO_DEFAULT_PATH)
find_library(IE_C_API_RELEASE_LIBRARY inference_engine_c_api@IE_RELEASE_POSTFIX_MAC@ "${IE_LIB_DIR}" NO_DEFAULT_PATH)
else()
find_library(IE_RELEASE_LIBRARY inference_engine@IE_RELEASE_POSTFIX_LIN@ "${IE_LIB_DIR}" NO_DEFAULT_PATH)
find_library(IE_C_API_RELEASE_LIBRARY inference_engine_c_api@IE_RELEASE_POSTFIX_LIN@ "${IE_LIB_DIR}" NO_DEFAULT_PATH)
endif()
find_package_handle_standard_args( InferenceEngine
FOUND_VAR INFERENCEENGINE_FOUND
REQUIRED_VARS IE_RELEASE_LIBRARY IE_C_API_RELEASE_LIBRARY IE_INCLUDE_DIR
FAIL_MESSAGE "Some of mandatory Inference Engine components are not found. Please consult InferenceEgnineConfig.cmake module's help page.")
if(INFERENCEENGINE_FOUND)
# to keep this line for successful execution in CMake 2.8
set(InferenceEngine_FOUND TRUE)
foreach(ie_library_suffix "" "_c_api")
string(TOUPPER "${ie_library_suffix}" ie_library_usuffix)
add_library(IE::inference_engine${ie_library_suffix} SHARED IMPORTED GLOBAL)
if (WIN32)
set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
IMPORTED_CONFIGURATIONS RELEASE
IMPORTED_IMPLIB_RELEASE "${IE${ie_library_usuffix}_RELEASE_LIBRARY}"
MAP_IMPORTED_CONFIG_RELEASE Release
MAP_IMPORTED_CONFIG_RELWITHDEBINFO Release
INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}")
# Debug binaries are optional
find_library(IE${ie_library_usuffix}_DEBUG_LIBRARY inference_engine${ie_library_suffix}@IE_DEBUG_POSTFIX_WIN@
"${IE_LIB_DBG_DIR}" NO_DEFAULT_PATH)
if (IE${ie_library_usuffix}_DEBUG_LIBRARY)
set_property(TARGET IE::inference_engine${ie_library_suffix} APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG)
set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
IMPORTED_IMPLIB_DEBUG "${IE${ie_library_usuffix}_DEBUG_LIBRARY}"
MAP_IMPORTED_CONFIG_DEBUG Debug)
else()
ext_message(WARNING "Inference Engine DEBUG binaries are missed.")
endif()
elseif (APPLE)
set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
IMPORTED_LOCATION_RELEASE "${IE${ie_library_usuffix}_RELEASE_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}"
INTERFACE_COMPILE_OPTIONS "-Wno-error=deprecated-declarations")
# Debug binaries are optional
find_library(IE${ie_library_usuffix}_DEBUG_LIBRARY inference_engine${ie_library_suffix}@IE_DEBUG_POSTFIX_MAC@ "${IE_LIB_DIR}" NO_DEFAULT_PATH)
if (IE${ie_library_usuffix}_DEBUG_LIBRARY)
set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
IMPORTED_LOCATION_DEBUG "${IE${ie_library_usuffix}_DEBUG_LIBRARY}")
else()
ext_message(WARNING "Inference Engine DEBUG binaries are missed")
endif()
else()
# Only Release binaries are distributed for Linux systems
set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
IMPORTED_LOCATION "${IE${ie_library_usuffix}_RELEASE_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}")
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
INTERFACE_COMPILE_OPTIONS "-diag-warning=1786")
else()
set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
INTERFACE_COMPILE_OPTIONS "-Wno-error=deprecated-declarations")
if(CMAKE_CROSSCOMPILING AND NOT MSVC)
set_property(TARGET IE::inference_engine${ie_library_suffix} PROPERTY
INTERFACE_LINK_OPTIONS "-Wl,--allow-shlib-undefined")
endif()
endif()
endif()
endforeach()
set(InferenceEngine_INCLUDE_DIRS ${IE_INCLUDE_DIR})
set(InferenceEngine_LIBRARIES IE::inference_engine
IE::inference_engine_c_api)
set(IE_EXTERNAL_DIR "${IE_ROOT_DIR}/external")
include("${IE_ROOT_DIR}/share/ie_parallel.cmake")
endif()
endif() endif()
set_and_check(_ngraph_dir "@PACKAGE_IE_NGRAPH_DIR@")
find_dependency(ngraph
CONFIG
PATHS ${_ngraph_dir}
NO_CMAKE_FIND_ROOT_PATH
NO_DEFAULT_PATH)
function(_ie_target_no_deprecation_error)
if(NOT MSVC)
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(flags "-diag-warning=1786")
else()
set(flags "-Wno-error=deprecated-declarations")
endif()
set_target_properties(${ARGV} PROPERTIES INTERFACE_COMPILE_OPTIONS ${flags})
endif()
endfunction()
if(TARGET inference_engine)
set(InferenceEngine_LIBRARIES inference_engine inference_engine_c_api)
else()
include("${CMAKE_CURRENT_LIST_DIR}/InferenceEngineTargets.cmake")
set(InferenceEngine_LIBRARIES IE::inference_engine IE::inference_engine_c_api)
_ie_target_no_deprecation_error(${InferenceEngine_LIBRARIES})
endif()
# restore PACKAGE_PREFIX_DIR
set(PACKAGE_PREFIX_DIR ${IE_PACKAGE_PREFIX_DIR})
set_and_check(InferenceEngine_INCLUDE_DIRS "@PACKAGE_IE_INCLUDE_DIR@")
check_required_components(InferenceEngine)

View File

@ -13,7 +13,9 @@ add_library(${TARGET_NAME} SHARED ${HEADERS} ${SOURCES})
target_link_libraries(${TARGET_NAME} PRIVATE inference_engine) target_link_libraries(${TARGET_NAME} PRIVATE inference_engine)
target_include_directories(${TARGET_NAME} PUBLIC "${InferenceEngine_C_API_SOURCE_DIR}/include") target_include_directories(${TARGET_NAME} PUBLIC
$<INSTALL_INTERFACE:${IE_CPACK_IE_DIR}/include>
$<BUILD_INTERFACE:${InferenceEngine_C_API_SOURCE_DIR}/include>)
add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}) add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
@ -25,13 +27,17 @@ ie_add_vs_version_file(NAME ${TARGET_NAME}
# export # export
export(TARGETS ${TARGET_NAME} NAMESPACE IE:: export(TARGETS ${TARGET_NAME} NAMESPACE IE::
APPEND FILE "${CMAKE_BINARY_DIR}/inference_engine_targets.cmake") APPEND FILE "${CMAKE_BINARY_DIR}/InferenceEngineTargets.cmake")
# WA for CI issue
export(TARGETS ${TARGET_NAME} NAMESPACE IE::
APPEND FILE "${CMAKE_BINARY_DIR}/share/InferenceEngineTargets.cmake")
# install # install
ie_cpack_add_component(core_c DEPENDS core) ie_cpack_add_component(core_c DEPENDS core)
install(TARGETS ${TARGET_NAME} install(TARGETS ${TARGET_NAME} EXPORT InferenceEngineTargets
RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core_c RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core_c
ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core_c ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core_c
LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core_c) LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core_c)

View File

@ -77,6 +77,27 @@ DECLARE_GNA_CONFIG_VALUE(AVX1_EXACT);
DECLARE_GNA_CONFIG_VALUE(AVX2); DECLARE_GNA_CONFIG_VALUE(AVX2);
DECLARE_GNA_CONFIG_VALUE(AVX2_EXACT); DECLARE_GNA_CONFIG_VALUE(AVX2_EXACT);
/**
* @brief The option to override the GNA HW execution target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0.
* By default (in case of no value set) the behavior depends on GNA HW availability:
* If GNA HW is present, use the option corresponding to this HW.
* If HW is not present, use the option corresponding to the latest fully supported GNA HW generation.
* A fully supported GNA HW generation means it must be supported by booth the OV GNA Plugin and the core GNA Library.
* For the GNA Library 2.0.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_2_0.
* For the GNA Library 2.1.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0.
* For the OV GNA Plugin 2021.4, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0.
*/
DECLARE_GNA_CONFIG_KEY(EXEC_TARGET);
DECLARE_GNA_CONFIG_VALUE(TARGET_2_0);
DECLARE_GNA_CONFIG_VALUE(TARGET_3_0);
/**
* @brief The option to override the GNA HW compile target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0.
* By default the same as GNA_EXEC_TARGET.
*/
DECLARE_GNA_CONFIG_KEY(COMPILE_TARGET);
/** /**
* @brief if enabled produced minimum memory footprint for loaded network in GNA memory, default value is YES * @brief if enabled produced minimum memory footprint for loaded network in GNA memory, default value is YES
*/ */

View File

@ -133,15 +133,7 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
endif() endif()
if(IE_NOT_FOUND_MESSAGE) find_package(InferenceEngine 2.1.0 EXACT REQUIRED)
# the flag is used to throw a custom message in case if the IE package is not found.
find_package(InferenceEngine 2.1 QUIET)
if (NOT(InferenceEngine_FOUND))
message(FATAL_ERROR ${IE_NOT_FOUND_MESSAGE})
endif()
else()
find_package(InferenceEngine 2.1 REQUIRED)
endif()
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/common/utils") if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/common/utils")
add_subdirectory(common/utils) add_subdirectory(common/utils)

View File

@ -760,6 +760,8 @@ int main(int argc, char* argv[]) {
gnaPluginConfig[GNAConfigParams::KEY_GNA_PRECISION] = "I16"; gnaPluginConfig[GNAConfigParams::KEY_GNA_PRECISION] = "I16";
} }
gnaPluginConfig[GNAConfigParams::KEY_GNA_EXEC_TARGET] = FLAGS_exec_target;
gnaPluginConfig[GNAConfigParams::KEY_GNA_COMPILE_TARGET] = FLAGS_compile_target;
gnaPluginConfig[GNAConfigParams::KEY_GNA_LIB_N_THREADS] = std::to_string((FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : FLAGS_nthreads); gnaPluginConfig[GNAConfigParams::KEY_GNA_LIB_N_THREADS] = std::to_string((FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : FLAGS_nthreads);
gnaPluginConfig[GNA_CONFIG_KEY(COMPACT_MODE)] = CONFIG_VALUE(NO); gnaPluginConfig[GNA_CONFIG_KEY(COMPACT_MODE)] = CONFIG_VALUE(NO);
gnaPluginConfig[GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT)] = std::to_string(FLAGS_pwl_me); gnaPluginConfig[GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT)] = std::to_string(FLAGS_pwl_me);

View File

@ -27,6 +27,20 @@ static const char target_device_message[] = "Optional. Specify a target device t
"below. " "below. "
"The sample will look for a suitable plugin for device specified."; "The sample will look for a suitable plugin for device specified.";
/// @brief message for execution target
static const char execution_target_message[] = "Optional. Specify GNA execution target generation. "
"May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. "
"By default, generation corresponds to the GNA HW available in the system "
"or the latest fully supported generation by the software. "
"See the GNA Plugin's GNA_EXEC_TARGET config option description.";
/// @brief message for execution target
static const char compile_target_message[] = "Optional. Specify GNA compile target generation. "
"May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. "
"By default, generation corresponds to the GNA HW available in the system "
"or the latest fully supported generation by the software. "
"See the GNA Plugin's GNA_COMPILE_TARGET config option description.";
/// @brief message for performance counters /// @brief message for performance counters
static const char performance_counter_message[] = "Optional. Enables per-layer performance report."; static const char performance_counter_message[] = "Optional. Enables per-layer performance report.";
@ -109,6 +123,12 @@ DEFINE_string(m, "", model_message);
/// \brief device the target device to infer on (default CPU) <br> /// \brief device the target device to infer on (default CPU) <br>
DEFINE_string(d, "CPU", target_device_message); DEFINE_string(d, "CPU", target_device_message);
/// \brief GNA execution target <br>
DEFINE_string(exec_target, "", execution_target_message);
/// \brief GNA compile target <br>
DEFINE_string(compile_target, "", compile_target_message);
/// \brief Enable per-layer performance report /// \brief Enable per-layer performance report
DEFINE_bool(pc, false, performance_counter_message); DEFINE_bool(pc, false, performance_counter_message);

View File

@ -21,7 +21,7 @@ if(ENABLE_CLDNN)
endif() endif()
if(ENABLE_VPU) if(ENABLE_VPU)
add_subdirectory(vpu) add_subdirectory(vpu)
endif() endif()
if(ENABLE_GNA) if(ENABLE_GNA)

View File

@ -1736,8 +1736,8 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
outputTensor.Shape.Dimensions[beginOfHInNHWC + dimHW] = outputTensor.Shape.Dimensions[beginOfHInNHWC + dimHW] =
outputFromPooling(outFromConv, poolWindow->Dimensions[beginOfHInHW + dimHW], poolStride->Dimensions[beginOfHInHW + dimHW]); outputFromPooling(outFromConv, poolWindow->Dimensions[beginOfHInHW + dimHW], poolStride->Dimensions[beginOfHInHW + dimHW]);
} }
AdvanceOperationIfAllApplied(component, i, gnaOperation);
} }
AdvanceOperationIfAllApplied(component, i, gnaOperation);
} }
#else #else
} else if (pLayer->nLayerKind == INTEL_CONVOLUTIONAL) { } else if (pLayer->nLayerKind == INTEL_CONVOLUTIONAL) {

View File

@ -6,12 +6,58 @@
#include <iostream> #include <iostream>
#include <cmath> #include <cmath>
#include <runtime/pwl.h> #include "runtime/pwl.h"
#include <gna_slope_scale.h> #include "gna_slope_scale.h"
#include "dnn_types.h" #include "dnn_types.h"
#include "backend/gna_types.h" #include "backend/gna_types.h"
#include "round_float_define.hpp" #include "round_float_define.hpp"
// This function performes emulatation of HW saturation of PWL segments in SW
// by inserting additional segments when overflow would happen
static void insert_extra_pwl_segments(std::vector<gna_pwl_segment_t>& gna_pwl,
const int16_t y_min,
const int16_t y_max) {
std::map<size_t, gna_pwl_segment_t> extra_segments;
gna_pwl_segment_t extra_segment;
size_t gna_pwl_size = gna_pwl.size();
if (gna_pwl_size == 0)
return;
// We're adding a segment at the beginning if the first one doesn't cover min value
if ((gna_pwl[0].xBase & XBASEMASK) != INT32_MIN) {
extra_segment.xBase = INT32_MIN & XBASEMASK;
extra_segment.yBase = gna_pwl[0].yBase;
extra_segment.slope = 0;
extra_segments[0] = extra_segment;
}
// We're checking here if saturation could potentially happen at the trailing segments
if (gna_pwl[gna_pwl_size - 1].slope != 0) {
int16_t slope = gna_pwl[gna_pwl_size - 1].slope;
int32_t xBase = gna_pwl[gna_pwl_size - 1].xBase & XBASEMASK;
int16_t yBase = gna_pwl[gna_pwl_size - 1].yBase;
float scale = pow(2, ((gna_pwl[gna_pwl_size - 1].xBase & ~XBASEMASK) + 1) * 8);
float y_value = ((static_cast<float>(INT32_MAX) - xBase) * slope) / scale + yBase;
if (y_value > static_cast<float>(INT16_MAX) || y_value < static_cast<float>(INT16_MIN)) {
float x_value = ((static_cast<float>(y_max) - yBase) * scale) / slope + xBase;
extra_segment.xBase = FLOAT_TO_INT32(x_value) & XBASEMASK;
extra_segment.yBase = slope > 0 ? y_max : y_min;
extra_segment.slope = 0;
extra_segments[gna_pwl_size] = extra_segment;
}
}
if (!extra_segments.empty())
gnalog() << "Additional segment(s) added to protect against saturation\n";
for (auto i = extra_segments.rbegin(); i != extra_segments.rend(); i++) {
gna_pwl.insert(gna_pwl.begin() + i->first, i->second);
}
}
void make_gna_pwl(const DnnActivation fun, void make_gna_pwl(const DnnActivation fun,
const std::vector<pwl_t>& pwl, const std::vector<pwl_t>& pwl,
const double l_bound, const double l_bound,
@ -583,6 +629,7 @@ void make_gna_pwl(const DnnActivation fun,
} }
default: default:
gnalog() << "Unexpected function activation!\n"; gnalog() << "Unexpected function activation!\n";
std::cerr << "Unexpected function activation!\n"; THROW_GNA_EXCEPTION << "Unexpected function activation!" << fun;
} }
insert_extra_pwl_segments(gna_pwl, y_min, y_max);
} }

View File

@ -5,9 +5,7 @@
#pragma once #pragma once
#include <vector> #include <vector>
#include <runtime/pwl.h> #include "runtime/pwl.h"
#include "backend/gna_types.h"
void make_gna_pwl(const DnnActivation fun, void make_gna_pwl(const DnnActivation fun,
const std::vector<pwl_t>& pwl, const std::vector<pwl_t>& pwl,

View File

@ -24,6 +24,7 @@
#include "gna-api.h" #include "gna-api.h"
#endif #endif
#include "gna/gna_config.hpp"
#include "gna_plugin_log.hpp" #include "gna_plugin_log.hpp"
//#define MODEL_DUMP //#define MODEL_DUMP
@ -130,7 +131,7 @@ void GNADeviceHelper::enforceLegacyCnns(Gna2Model& gnaModel) {
uint32_t GNADeviceHelper::createModel(Gna2Model& gnaModel) const { uint32_t GNADeviceHelper::createModel(Gna2Model& gnaModel) const {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync }; std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
uint32_t modelId; uint32_t modelId;
if (isUpTo20GnaHwDevice() && isGnaLibVersion2_1) { if (enforceLegacyCnnNeeded()) {
enforceLegacyCnns(gnaModel); enforceLegacyCnns(gnaModel);
} }
#if GNA_LIB_VER == 2 && defined MODEL_DUMP #if GNA_LIB_VER == 2 && defined MODEL_DUMP
@ -154,15 +155,40 @@ void GNADeviceHelper::releaseModel(const uint32_t model_id) {
checkGna2Status(status, "Gna2ModelRelease"); checkGna2Status(status, "Gna2ModelRelease");
} }
bool GNADeviceHelper::enforceLegacyCnnNeeded() const {
auto devVersion = getExecutionTargetDevice();
return isGnaLibVersion2_1 && isUpTo20HwGnaDevice(devVersion);
}
Gna2DeviceVersion GNADeviceHelper::getExecutionTargetDevice() const {
const volatile auto Gna2DeviceVersion3_0 = static_cast<Gna2DeviceVersion>(0x30);
if (executionTarget.empty()) {
if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation)
return isGnaLibVersion2_1 ? Gna2DeviceVersion3_0 : Gna2DeviceVersion2_0;
return detectedGnaDevVersion;
} else if (executionTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) {
if (!isGnaLibVersion2_1)
THROW_GNA_EXCEPTION << "Unsupported GNA execution target " << executionTarget << " when GNA Library version is 2.0.X.Y";
return Gna2DeviceVersion3_0;
} else if (executionTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
return Gna2DeviceVersion2_0;
}
THROW_GNA_EXCEPTION << "Unknown execution target: \"" << executionTarget << "\"";
}
uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) { uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync }; std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
uint32_t reqConfId; uint32_t reqConfId;
auto status = Gna2RequestConfigCreate(model_id, &reqConfId); auto status = Gna2RequestConfigCreate(model_id, &reqConfId);
checkGna2Status(status, "Gna2RequestConfigCreate"); checkGna2Status(status, "Gna2RequestConfigCreate");
if (gna2HwConsistency != Gna2DeviceVersionSoftwareEmulation && !isGnaLibVersion2_1) {
status = Gna2RequestConfigEnableHardwareConsistency(reqConfId, // When the GNA_SW_EXACT mode is chosen inference results should be computed exactly the same way
isUpTo20GnaDevice() ? gna2HwConsistency : detectedGnaDevVersion); // (bit exactly) as on the selected GNA execution target generation.
checkGna2Status(status, "Gna2RequestConfigEnableHardwareConsistency"); // See the GNA Plugin's GNA_EXEC_TARGET config option description.
if (swExactMode) {
const auto consistentDevice = getExecutionTargetDevice();
status = Gna2RequestConfigEnableHardwareConsistency(reqConfId, consistentDevice);
checkGna2Status(status, "Gna2RequestConfigEnableHardwareConsistency(" + std::to_string(static_cast<long>(consistentDevice)) + ")");
} }
status = Gna2InstrumentationConfigAssignToRequestConfig(instrumentationConfigId, reqConfId); status = Gna2InstrumentationConfigAssignToRequestConfig(instrumentationConfigId, reqConfId);
checkGna2Status(status, "Gna2InstrumentationConfigAssignToRequestConfig"); checkGna2Status(status, "Gna2InstrumentationConfigAssignToRequestConfig");

View File

@ -51,8 +51,10 @@ class GNADeviceHelper {
intel_gna_perf_t nGNAPerfResultsTotal; intel_gna_perf_t nGNAPerfResultsTotal;
#else #else
uint32_t nGnaDeviceIndex = 0; uint32_t nGnaDeviceIndex = 0;
Gna2DeviceVersion gna2HwConsistency = Gna2DeviceVersionSoftwareEmulation; bool swExactMode = false;
Gna2DeviceVersion detectedGnaDevVersion = Gna2DeviceVersionSoftwareEmulation; Gna2DeviceVersion detectedGnaDevVersion = Gna2DeviceVersionSoftwareEmulation;
std::string executionTarget;
std::string compileTarget;
bool isGnaLibVersion2_1 = false; bool isGnaLibVersion2_1 = false;
static const uint32_t TotalGna2InstrumentationPoints = 2; static const uint32_t TotalGna2InstrumentationPoints = 2;
@ -75,11 +77,15 @@ public:
bool isPerformanceMeasuring = false) : bool isPerformanceMeasuring = false) :
isPerformanceMeasuring(isPerformanceMeasuring) { isPerformanceMeasuring(isPerformanceMeasuring) {
#else #else
explicit GNADeviceHelper(Gna2DeviceVersion gna2HwConsistency = Gna2DeviceVersionSoftwareEmulation, explicit GNADeviceHelper(std::string executionTargetIn = "",
std::string compileTargetIn = "",
bool swExactModeIn = false,
uint8_t lib_async_n_threads = 1, uint8_t lib_async_n_threads = 1,
bool use_openmp = false, bool use_openmp = false,
bool isPerformanceMeasuring = false) : bool isPerformanceMeasuring = false) :
gna2HwConsistency(gna2HwConsistency), swExactMode(swExactModeIn),
executionTarget(executionTargetIn),
compileTarget(compileTargetIn),
isPerformanceMeasuring(isPerformanceMeasuring), isPerformanceMeasuring(isPerformanceMeasuring),
nGnaDeviceIndex{selectGnaDevice()} { nGnaDeviceIndex{selectGnaDevice()} {
#endif #endif
@ -129,15 +135,17 @@ public:
uint32_t createRequestConfig(const uint32_t model_id); uint32_t createRequestConfig(const uint32_t model_id);
static uint32_t getNumberOfGnaDevices(); static uint32_t getNumberOfGnaDevices();
static uint32_t selectGnaDevice(); static uint32_t selectGnaDevice();
static bool isGnaHw(const Gna2DeviceVersion dev) {
return Gna2DeviceVersionSoftwareEmulation != dev;
}
bool hasGnaHw() const { bool hasGnaHw() const {
return Gna2DeviceVersionSoftwareEmulation != detectedGnaDevVersion; return isGnaHw(detectedGnaDevVersion);
} }
bool isUpTo20GnaDevice() const { static bool isUpTo20HwGnaDevice(const Gna2DeviceVersion dev) {
return detectedGnaDevVersion <= Gna2DeviceVersion2_0; return dev <= Gna2DeviceVersion2_0 && isGnaHw(dev);
}
bool isUpTo20GnaHwDevice() const {
return isUpTo20GnaDevice() && detectedGnaDevVersion != Gna2DeviceVersionSoftwareEmulation;
} }
bool enforceLegacyCnnNeeded() const;
Gna2DeviceVersion getExecutionTargetDevice() const;
static void checkGna2Status(Gna2Status status, const std::string& from); static void checkGna2Status(Gna2Status status, const std::string& from);
static void checkGna2Status(Gna2Status status, const Gna2Model& gnaModel); static void checkGna2Status(Gna2Status status, const Gna2Model& gnaModel);
#endif #endif

View File

@ -397,7 +397,9 @@ void GNAPlugin::InitGNADevice() {
gnaFlags->gna_openmp_multithreading, gnaFlags->gna_openmp_multithreading,
gnaFlags->performance_counting); gnaFlags->performance_counting);
#else #else
gnadevice = std::make_shared<GNADeviceHelper>(config.pluginGna2DeviceConsistent, gnadevice = std::make_shared<GNADeviceHelper>(config.gnaExecTarget,
config.gnaCompileTarget,
config.swExactMode,
gnaFlags->gna_lib_async_threads_num, gnaFlags->gna_lib_async_threads_num,
gnaFlags->gna_openmp_multithreading, gnaFlags->gna_openmp_multithreading,
gnaFlags->performance_counting); gnaFlags->performance_counting);

View File

@ -33,22 +33,28 @@ static const std::vector<std::string> supported_values_on_gna2 = {
GNAConfigParams::GNA_AVX2_EXACT GNAConfigParams::GNA_AVX2_EXACT
}; };
#else #else
static const caseless_unordered_map <std::string, std::pair<Gna2AccelerationMode, Gna2DeviceVersion>> supported_values = { static const caseless_unordered_map <std::string, std::pair<Gna2AccelerationMode, bool>> supported_values = {
{GNAConfigParams::GNA_AUTO, {Gna2AccelerationModeAuto, Gna2DeviceVersionSoftwareEmulation}}, {GNAConfigParams::GNA_AUTO, {Gna2AccelerationModeAuto, false}},
{GNAConfigParams::GNA_HW, {Gna2AccelerationModeHardware, Gna2DeviceVersionSoftwareEmulation}}, {GNAConfigParams::GNA_HW, {Gna2AccelerationModeHardware, false}},
{GNAConfigParams::GNA_SW, {Gna2AccelerationModeSoftware, Gna2DeviceVersionSoftwareEmulation}}, {GNAConfigParams::GNA_SW, {Gna2AccelerationModeSoftware, false}},
{GNAConfigParams::GNA_SW_EXACT, {Gna2AccelerationModeSoftware, Gna2DeviceVersion1_0}}, {GNAConfigParams::GNA_SW_EXACT, {Gna2AccelerationModeSoftware, true}},
{GNAConfigParams::GNA_GEN, {Gna2AccelerationModeGeneric, Gna2DeviceVersionSoftwareEmulation}}, {GNAConfigParams::GNA_GEN, {Gna2AccelerationModeGeneric, false}},
{GNAConfigParams::GNA_GEN_EXACT, {Gna2AccelerationModeGeneric, Gna2DeviceVersion1_0}}, {GNAConfigParams::GNA_GEN_EXACT, {Gna2AccelerationModeGeneric, true}},
{GNAConfigParams::GNA_SSE, {Gna2AccelerationModeSse4x2, Gna2DeviceVersionSoftwareEmulation}}, {GNAConfigParams::GNA_SSE, {Gna2AccelerationModeSse4x2, false}},
{GNAConfigParams::GNA_SSE_EXACT, {Gna2AccelerationModeSse4x2, Gna2DeviceVersion1_0}}, {GNAConfigParams::GNA_SSE_EXACT, {Gna2AccelerationModeSse4x2, true}},
{GNAConfigParams::GNA_AVX1, {Gna2AccelerationModeAvx1, Gna2DeviceVersionSoftwareEmulation}}, {GNAConfigParams::GNA_AVX1, {Gna2AccelerationModeAvx1, false}},
{GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1, Gna2DeviceVersion1_0}}, {GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1, true}},
{GNAConfigParams::GNA_AVX2, {Gna2AccelerationModeAvx2, Gna2DeviceVersionSoftwareEmulation}}, {GNAConfigParams::GNA_AVX2, {Gna2AccelerationModeAvx2, false}},
{GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2, Gna2DeviceVersion1_0}}, {GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2, true}},
}; };
#endif #endif
static const std::set<std::string> supportedTargets = {
GNAConfigParams::GNA_TARGET_2_0,
GNAConfigParams::GNA_TARGET_3_0,
""
};
void Config::UpdateFromMap(const std::map<std::string, std::string>& config) { void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
for (auto&& item : config) { for (auto&& item : config) {
auto key = item.first; auto key = item.first;
@ -116,9 +122,14 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
gna_proc_type = static_cast<intel_gna_proc_t>(procType->second); gna_proc_type = static_cast<intel_gna_proc_t>(procType->second);
#else #else
pluginGna2AccMode = procType->second.first; pluginGna2AccMode = procType->second.first;
pluginGna2DeviceConsistent = procType->second.second; swExactMode = procType->second.second;
#endif #endif
} }
} else if (key == GNA_CONFIG_KEY(EXEC_TARGET) || key == GNA_CONFIG_KEY(COMPILE_TARGET)) {
if (supportedTargets.count(value) == 0) {
THROW_GNA_EXCEPTION << "Unsupported GNA config value (key, value): (" << key << ", " << value << ")";
}
(key == GNA_CONFIG_KEY(EXEC_TARGET) ? gnaExecTarget : gnaCompileTarget) = value;
} else if (key == GNA_CONFIG_KEY(COMPACT_MODE)) { } else if (key == GNA_CONFIG_KEY(COMPACT_MODE)) {
if (value == PluginConfigParams::YES) { if (value == PluginConfigParams::YES) {
gnaFlags.compact_mode = true; gnaFlags.compact_mode = true;
@ -255,15 +266,17 @@ void Config::AdjustKeyMapValues() {
} }
#else #else
if (value.second.first == pluginGna2AccMode && if (value.second.first == pluginGna2AccMode &&
value.second.second == pluginGna2DeviceConsistent) { value.second.second == swExactMode) {
device_mode = value.first; device_mode = value.first;
break; break;
} }
#endif #endif
} }
} }
IE_ASSERT(!device_mode.empty()); IE_ASSERT(!device_mode.empty());
keyConfigMap[GNA_CONFIG_KEY(DEVICE_MODE)] = device_mode; keyConfigMap[GNA_CONFIG_KEY(DEVICE_MODE)] = device_mode;
keyConfigMap[GNA_CONFIG_KEY(EXEC_TARGET)] = gnaExecTarget;
keyConfigMap[GNA_CONFIG_KEY(COMPILE_TARGET)] = gnaCompileTarget;
keyConfigMap[GNA_CONFIG_KEY(COMPACT_MODE)] = keyConfigMap[GNA_CONFIG_KEY(COMPACT_MODE)] =
gnaFlags.compact_mode ? PluginConfigParams::YES: PluginConfigParams::NO; gnaFlags.compact_mode ? PluginConfigParams::YES: PluginConfigParams::NO;
keyConfigMap[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] = keyConfigMap[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] =

View File

@ -37,7 +37,7 @@ struct Config {
gna_proc_type = r.gna_proc_type; gna_proc_type = r.gna_proc_type;
#else #else
pluginGna2AccMode = r.pluginGna2AccMode; pluginGna2AccMode = r.pluginGna2AccMode;
pluginGna2DeviceConsistent = r.pluginGna2DeviceConsistent; swExactMode = r.swExactMode;
#endif #endif
inputScaleFactors = r.inputScaleFactors; inputScaleFactors = r.inputScaleFactors;
gnaFlags = r.gnaFlags; gnaFlags = r.gnaFlags;
@ -55,11 +55,14 @@ struct Config {
std::string dumpXNNPath; std::string dumpXNNPath;
std::string dumpXNNGeneration; std::string dumpXNNGeneration;
std::string gnaExecTarget;
std::string gnaCompileTarget;
#if GNA_LIB_VER == 1 #if GNA_LIB_VER == 1
intel_gna_proc_t gna_proc_type = static_cast<intel_gna_proc_t>(GNA_SOFTWARE & GNA_HARDWARE); intel_gna_proc_t gna_proc_type = static_cast<intel_gna_proc_t>(GNA_SOFTWARE & GNA_HARDWARE);
#else #else
Gna2AccelerationMode pluginGna2AccMode = Gna2AccelerationModeSoftware; Gna2AccelerationMode pluginGna2AccMode = Gna2AccelerationModeSoftware;
Gna2DeviceVersion pluginGna2DeviceConsistent = Gna2DeviceVersion1_0; bool swExactMode = true;
#endif #endif
std::vector<float> inputScaleFactors; std::vector<float> inputScaleFactors;

View File

@ -777,7 +777,39 @@ void RemovePermutationsNHWCToNCHWPass::run() {
void InsertIdentityLayerPass::run() { void InsertIdentityLayerPass::run() {
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front()); auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
auto createIdentityLayer = [quantized, this](const TensorDesc& tensorDesc) {
int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers);
CNNLayerPtr activationLayer =
std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
CNNLayerPtr activationLayerWithQuant = quantized ?
InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) :
activationLayer;
auto dataPtr = std::make_shared<Data>("identity_data_" + std::to_string(numOfIdentityLayers), tensorDesc);
getCreatorLayer(dataPtr) = activationLayerWithQuant;
activationLayerWithQuant->outData.push_back(dataPtr);
return activationLayerWithQuant;
};
for (auto & l : *pLayers) { for (auto & l : *pLayers) {
if (LayerInfo(l).isPooling()) {
// Identity should be inserted after 1D pooling if it's the last functional layer.
auto pooling = LayerInfo(l).as<PoolingLayer*>();
IE_ASSERT(pooling != nullptr);
if (is2D(pooling->_kernel)) continue;
auto hasNextFuncLayer = CNNNetHasNextLayerSkipCertain(l, 0, 0, [](CNNLayerPtr layer) {
return LayerInfo(layer).isNonFunctional();
});
if (hasNextFuncLayer) continue;
auto identityLayer = createIdentityLayer(l->outData[0]->getTensorDesc());
gnalog() << "Inserted "<< identityLayer->name << " after " << l->name << std::endl;
auto nextLayer = CNNNetCheckNextLayerSkipCertain(l, 0, 0, true, [](CNNLayerPtr layer) { return false; }).first;
CNNNetworkInsertLayer(l, nextLayer, identityLayer);
}
for (auto && prev : getCandidatesForIdentityInsertion(l, getPassManager())) { for (auto && prev : getCandidatesForIdentityInsertion(l, getPassManager())) {
// Do an upstream search until Functional layer is found // Do an upstream search until Functional layer is found
auto original_prev_layer = prev; auto original_prev_layer = prev;
@ -817,15 +849,6 @@ void InsertIdentityLayerPass::run() {
if (reconnected) if (reconnected)
continue; continue;
int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
// actual insertion
auto activationName = std::string("identity_") + std::to_string(numOfIdentityLayers);
gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << true_layer->name << "\n" << std::flush;
CNNLayerPtr activationLayer =
std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
// TODO: why index is 0 ? - better use direct indexing in getCandidateFunction // TODO: why index is 0 ? - better use direct indexing in getCandidateFunction
// detecting ins-data-idx // detecting ins-data-idx
size_t insDataIdx = std::numeric_limits<size_t>::max(); size_t insDataIdx = std::numeric_limits<size_t>::max();
@ -840,13 +863,10 @@ void InsertIdentityLayerPass::run() {
} }
auto inputData = true_layer->insData[insDataIdx].lock(); auto inputData = true_layer->insData[insDataIdx].lock();
auto identityLayer = createIdentityLayer(inputData->getTensorDesc());
gnalog() << "Inserted "<< identityLayer->name << " between: " << prev->name << " and " << true_layer->name << "\n" << std::flush;
auto dataPtr = std::make_shared<Data>("identity_data_" + std::to_string(numOfIdentityLayers), inputData->getTensorDesc());
auto activationLayerWithQuant = quantized ?
InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) :
activationLayer;
getCreatorLayer(dataPtr) = activationLayerWithQuant;
activationLayerWithQuant->outData.push_back(dataPtr);
// wether 1 identity or all outputs TODO possible grouping here, need to implement special grouped inserter // wether 1 identity or all outputs TODO possible grouping here, need to implement special grouped inserter
bool notAll = false; bool notAll = false;
for (auto && nextData : prev->outData) { for (auto && nextData : prev->outData) {
@ -860,14 +880,14 @@ void InsertIdentityLayerPass::run() {
} }
// copy offset - to be used while connecting outputs // copy offset - to be used while connecting outputs
if (prev->params.find("output_offset") != prev->params.end()) { if (prev->params.find("output_offset") != prev->params.end()) {
activationLayerWithQuant->params["output_offset"] = prev->params["output_offset"]; identityLayer->params["output_offset"] = prev->params["output_offset"];
} }
// copy offset - to be used while connecting outputs // copy offset - to be used while connecting outputs
if (prev->params.find("original_num_rows") != prev->params.end()) { if (prev->params.find("original_num_rows") != prev->params.end()) {
activationLayerWithQuant->params["original_num_rows"] = prev->params["original_num_rows"]; identityLayer->params["original_num_rows"] = prev->params["original_num_rows"];
} }
CNNNetworkInsertLayer(prev, notAll ? true_layer : CNNLayerPtr(nullptr), activationLayerWithQuant); CNNNetworkInsertLayer(prev, notAll ? true_layer : CNNLayerPtr(nullptr), identityLayer);
} }
} }
} }
@ -1662,6 +1682,10 @@ void BreakFusingOfOutputLayersPass::run() {
#endif #endif
OutputsDataMap outputsMap = this->getPassManager()->getNetwork().getOutputsInfo(); OutputsDataMap outputsMap = this->getPassManager()->getNetwork().getOutputsInfo();
for (auto layer : *pLayers) { for (auto layer : *pLayers) {
/* Inserion of the second activation after pooling will break Conv - Pooling - Activation component
* since scaleshift layers will be inserted between the pooling and activations
*/
if (LayerInfo(layer).isPooling()) continue;
for (int output_idx = 0; output_idx < layer->outData.size(); output_idx++) { for (int output_idx = 0; output_idx < layer->outData.size(); output_idx++) {
auto& output = layer->outData[output_idx]; auto& output = layer->outData[output_idx];
auto& input_to = getInputTo(output); auto& input_to = getInputTo(output);

View File

@ -9,11 +9,10 @@
#include <limits> #include <limits>
#include <cstdint> #include <cstdint>
#include <algorithm> #include <algorithm>
#include "backend/gna_types.h"
#ifdef _NO_MKL_ #ifdef _NO_MKL_
#include <cmath> #include <cmath>
#include <backend/make_pwl.hpp> #include "backend/make_pwl.hpp"
#define SCOPY(num, in, inci, out, inco) for (int i_ = 0; i_ < *(num); i_++) *(out + i_ * *(inco)) = *(in + i_ * *(inci)); #define SCOPY(num, in, inci, out, inco) for (int i_ = 0; i_ < *(num); i_++) *(out + i_ * *(inco)) = *(in + i_ * *(inci));
#define SSCAL(num, scale, inout, inco) for (int i_ = 0; i_ < *(num); i_++) *(inout + i_ * *(inco)) = *(scale) * *(inout + i_ * *(inco)); #define SSCAL(num, scale, inout, inco) for (int i_ = 0; i_ < *(num); i_++) *(inout + i_ * *(inco)) = *(scale) * *(inout + i_ * *(inco));
@ -27,7 +26,6 @@
#include "pwl.h" #include "pwl.h"
#include "gna_plugin_log.hpp" #include "gna_plugin_log.hpp"
#include "backend/dnn_types.h"
#include "gna_slope_scale.h" #include "gna_slope_scale.h"
#include "round_float_define.hpp" #include "round_float_define.hpp"

View File

@ -164,7 +164,9 @@ endif()
target_link_libraries(${TARGET_NAME} PRIVATE pugixml openvino::itt ${CMAKE_DL_LIBS} Threads::Threads target_link_libraries(${TARGET_NAME} PRIVATE pugixml openvino::itt ${CMAKE_DL_LIBS} Threads::Threads
${NGRAPH_LIBRARIES} inference_engine_transformations) ${NGRAPH_LIBRARIES} inference_engine_transformations)
target_include_directories(${TARGET_NAME} INTERFACE ${PUBLIC_HEADERS_DIR} target_include_directories(${TARGET_NAME} INTERFACE
$<BUILD_INTERFACE:${PUBLIC_HEADERS_DIR}>
$<INSTALL_INTERFACE:${IE_CPACK_IE_DIR}/include>
PRIVATE $<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES> PRIVATE $<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:${TARGET_NAME}_legacy,INTERFACE_INCLUDE_DIRECTORIES>) $<TARGET_PROPERTY:${TARGET_NAME}_legacy,INTERFACE_INCLUDE_DIRECTORIES>)
@ -213,31 +215,10 @@ set_target_properties(${TARGET_NAME}_s PROPERTIES EXCLUDE_FROM_ALL ON)
set_target_properties(${TARGET_NAME} ${TARGET_NAME}_obj ${TARGET_NAME}_s set_target_properties(${TARGET_NAME} ${TARGET_NAME}_obj ${TARGET_NAME}_s
PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
# InferenceEngineConfig.cmake for install tree # Export for build tree
configure_file("${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineConfig.cmake.in"
"${CMAKE_BINARY_DIR}/share/InferenceEngineConfig.cmake" @ONLY)
configure_file("${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineConfig-version.cmake.in"
"${CMAKE_BINARY_DIR}/share/InferenceEngineConfig-version.cmake"
COPYONLY)
configure_file("${IE_MAIN_SOURCE_DIR}/cmake/ie_parallel.cmake"
"${CMAKE_BINARY_DIR}/share/ie_parallel.cmake"
COPYONLY)
# Export Inference Engine targets
export(TARGETS ${TARGET_NAME} NAMESPACE IE:: export(TARGETS ${TARGET_NAME} NAMESPACE IE::
APPEND FILE "${CMAKE_BINARY_DIR}/inference_engine_targets.cmake") APPEND FILE "${CMAKE_BINARY_DIR}/InferenceEngineTargets.cmake")
configure_file("${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineConfig-build.cmake.in"
"${CMAKE_BINARY_DIR}/InferenceEngineConfig.cmake"
COPYONLY)
configure_file("${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineConfig-version.cmake.in"
"${CMAKE_BINARY_DIR}/InferenceEngineConfig-version.cmake"
COPYONLY)
# Export for developer package # Export for developer package
@ -246,6 +227,8 @@ ie_developer_export_targets(${TARGET_NAME} ${TARGET_NAME}_plugin_api)
# install TBB # install TBB
list(APPEND core_components ngraph) list(APPEND core_components ngraph)
list(APPEND PATH_VARS "IE_INCLUDE_DIR" "IE_NGRAPH_DIR"
"IE_PARALLEL_CMAKE")
if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND TBBROOT MATCHES ${TEMP}) if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND TBBROOT MATCHES ${TEMP})
ie_cpack_add_component(tbb REQUIRED) ie_cpack_add_component(tbb REQUIRED)
@ -258,16 +241,21 @@ if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND TBBROOT MATCH
DESTINATION ${IE_CPACK_IE_DIR}/external/tbb DESTINATION ${IE_CPACK_IE_DIR}/external/tbb
COMPONENT tbb) COMPONENT tbb)
if(EXISTS "${TBB}/bin") if(EXISTS "${TBB}/bin")
install(DIRECTORY "${TBB}/bin" install(DIRECTORY "${TBB}/bin"
DESTINATION ${IE_CPACK_IE_DIR}/external/tbb DESTINATION ${IE_CPACK_IE_DIR}/external/tbb
COMPONENT tbb) COMPONENT tbb)
endif() endif()
install(FILES "${TBB}/LICENSE" install(FILES "${TBB}/LICENSE"
DESTINATION ${IE_CPACK_IE_DIR}/external/tbb DESTINATION ${IE_CPACK_IE_DIR}/external/tbb
COMPONENT tbb) COMPONENT tbb)
set(IE_TBB_DIR_INSTALL "external/tbb/cmake")
set(IE_TBB_DIR "${TBB_DIR}")
list(APPEND PATH_VARS "IE_TBB_DIR")
install(FILES "${TBB}/cmake/TBBConfig.cmake" install(FILES "${TBB}/cmake/TBBConfig.cmake"
"${TBB}/cmake/TBBConfigVersion.cmake" "${TBB}/cmake/TBBConfigVersion.cmake"
DESTINATION ${IE_CPACK_IE_DIR}/external/tbb/cmake DESTINATION ${IE_CPACK_IE_DIR}/${IE_TBB_DIR_INSTALL}
COMPONENT tbb) COMPONENT tbb)
endif() endif()
@ -277,15 +265,58 @@ ie_cpack_add_component(core REQUIRED DEPENDS ${core_components})
install(DIRECTORY "${IE_MAIN_SOURCE_DIR}/include" DESTINATION ${IE_CPACK_IE_DIR} install(DIRECTORY "${IE_MAIN_SOURCE_DIR}/include" DESTINATION ${IE_CPACK_IE_DIR}
COMPONENT core) COMPONENT core)
install(TARGETS ${TARGET_NAME}
install(TARGETS ${TARGET_NAME} EXPORT InferenceEngineTargets
RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core
LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core) LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)
install(FILES "${OpenVINO_BINARY_DIR}/share/ie_parallel.cmake"
"${OpenVINO_BINARY_DIR}/share/InferenceEngineConfig.cmake"
"${OpenVINO_BINARY_DIR}/share/InferenceEngineConfig-version.cmake"
DESTINATION ${IE_CPACK_IE_DIR}/share
COMPONENT core)
install(FILES $<TARGET_FILE_DIR:${TARGET_NAME}>/plugins.xml install(FILES $<TARGET_FILE_DIR:${TARGET_NAME}>/plugins.xml
DESTINATION ${IE_CPACK_RUNTIME_PATH} DESTINATION ${IE_CPACK_RUNTIME_PATH}
COMPONENT core) COMPONENT core)
# Install cmake scripts
install(EXPORT InferenceEngineTargets
FILE InferenceEngineTargets.cmake
NAMESPACE IE::
DESTINATION ${IE_CPACK_IE_DIR}/share
COMPONENT core)
include(CMakePackageConfigHelpers)
set(IE_NGRAPH_DIR "${CMAKE_BINARY_DIR}/ngraph")
set(IE_INCLUDE_DIR "${PUBLIC_HEADERS_DIR}")
set(IE_PARALLEL_CMAKE "${InferenceEngine_SOURCE_DIR}/cmake/ie_parallel.cmake")
configure_package_config_file("${InferenceEngine_SOURCE_DIR}/cmake/templates/InferenceEngineConfig.cmake.in"
"${CMAKE_BINARY_DIR}/InferenceEngineConfig.cmake"
INSTALL_DESTINATION "${CMAKE_INSTALL_PREFIX}"
PATH_VARS ${PATH_VARS})
set(IE_INCLUDE_DIR "include")
set(IE_NGRAPH_DIR "../ngraph/cmake")
set(IE_TBB_DIR "${IE_TBB_DIR_INSTALL}")
set(IE_PARALLEL_CMAKE "share/ie_parallel.cmake")
configure_package_config_file("${InferenceEngine_SOURCE_DIR}/cmake/templates/InferenceEngineConfig.cmake.in"
"${CMAKE_BINARY_DIR}/share/InferenceEngineConfig.cmake"
INSTALL_DESTINATION share
PATH_VARS ${PATH_VARS})
configure_file("${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineConfig-version.cmake.in"
"${CMAKE_BINARY_DIR}/InferenceEngineConfig-version.cmake"
COPYONLY)
# WA for CI
configure_file("${IE_MAIN_SOURCE_DIR}/cmake/templates/InferenceEngineConfig-version.cmake.in"
"${CMAKE_BINARY_DIR}/share/InferenceEngineConfig-version.cmake"
COPYONLY)
export(TARGETS ${TARGET_NAME} NAMESPACE IE::
APPEND FILE "${CMAKE_BINARY_DIR}/share/InferenceEngineTargets.cmake")
install(FILES "${CMAKE_BINARY_DIR}/share/InferenceEngineConfig.cmake"
"${CMAKE_BINARY_DIR}/InferenceEngineConfig-version.cmake"
"${InferenceEngine_SOURCE_DIR}/cmake/ie_parallel.cmake"
DESTINATION ${IE_CPACK_IE_DIR}/share
COMPONENT core)

View File

@ -24,18 +24,18 @@ using cpu_set_t = void;
* *
* @param mask The mask * @param mask The mask
*/ */
INFERENCE_ENGINE_API_CPP(void) ReleaseProcessMask(cpu_set_t* mask); void ReleaseProcessMask(cpu_set_t* mask);
/** /**
* @brief Deleter for process mask * @brief Deleter for process mask
* @ingroup ie_dev_api_threading * @ingroup ie_dev_api_threading
*/ */
struct ReleaseProcessMaskDeleter { struct ReleaseProcessMaskDeleter {
/** /**
* @brief A callable operator to release object * @brief A callable operator to release object
* *
* @param mask The mask to release * @param mask The mask to release
*/ */
void operator()(cpu_set_t* mask) const { void operator()(cpu_set_t* mask) const {
ReleaseProcessMask(mask); ReleaseProcessMask(mask);
} }
@ -52,7 +52,7 @@ using CpuSet = std::unique_ptr<cpu_set_t, ReleaseProcessMaskDeleter>;
* @ingroup ie_dev_api_threading * @ingroup ie_dev_api_threading
* @return A core affinity mask * @return A core affinity mask
*/ */
INFERENCE_ENGINE_API_CPP(std::tuple<CpuSet, int>) GetProcessMask(); std::tuple<CpuSet, int> GetProcessMask();
/** /**
* @brief Pins current thread to a set of cores determined by the mask * @brief Pins current thread to a set of cores determined by the mask
@ -64,18 +64,18 @@ INFERENCE_ENGINE_API_CPP(std::tuple<CpuSet, int>) GetProcessMask();
* @param[in] processMask The process mask * @param[in] processMask The process mask
* @return `True` in case of success, `false` otherwise * @return `True` in case of success, `false` otherwise
*/ */
INFERENCE_ENGINE_API_CPP(bool) PinThreadToVacantCore(int thrIdx, int hyperThreads, int ncores, const CpuSet& processMask); bool PinThreadToVacantCore(int thrIdx, int hyperThreads, int ncores, const CpuSet& processMask);
/** /**
* @brief Pins thread to a spare core in the round-robin scheme, while respecting the given process mask. * @brief Pins thread to a spare core in the round-robin scheme, while respecting the given process mask.
* The function can also handle the hyper-threading (by populating the physical cores first) * The function can also handle the hyper-threading (by populating the physical cores first)
* @ingroup ie_dev_api_threading * @ingroup ie_dev_api_threading
* *
* @param[in] ncores The ncores * @param[in] ncores The ncores
* @param[in] processMask The process mask * @param[in] processMask The process mask
* @return `True` in case of success, `false` otherwise * @return `True` in case of success, `false` otherwise
*/ */
INFERENCE_ENGINE_API_CPP(bool) PinCurrentThreadByMask(int ncores, const CpuSet& processMask); bool PinCurrentThreadByMask(int ncores, const CpuSet& processMask);
/** /**
* @brief Pins a current thread to a socket. * @brief Pins a current thread to a socket.
@ -84,5 +84,5 @@ INFERENCE_ENGINE_API_CPP(bool) PinCurrentThreadByMask(int ncores, const CpuSet&
* @param[in] socket The socket id * @param[in] socket The socket id
* @return `True` in case of success, `false` otherwise * @return `True` in case of success, `false` otherwise
*/ */
INFERENCE_ENGINE_API_CPP(bool) PinCurrentThreadToSocket(int socket); bool PinCurrentThreadToSocket(int socket);
} // namespace InferenceEngine } // namespace InferenceEngine

View File

@ -34,8 +34,6 @@ ie_faster_build(${TARGET_NAME}_obj
PCH PRIVATE "src/precomp.hpp" PCH PRIVATE "src/precomp.hpp"
) )
set_ie_threading_interface_for(${TARGET_NAME}_obj)
target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API) target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API)
target_include_directories(${TARGET_NAME}_obj PRIVATE target_include_directories(${TARGET_NAME}_obj PRIVATE
@ -63,8 +61,6 @@ add_library(${TARGET_NAME} SHARED
ie_add_vs_version_file(NAME ${TARGET_NAME} ie_add_vs_version_file(NAME ${TARGET_NAME}
FILEDESCRIPTION "Inference Engine Legacy library") FILEDESCRIPTION "Inference Engine Legacy library")
set_ie_threading_interface_for(${TARGET_NAME})
target_link_libraries(${TARGET_NAME} PUBLIC inference_engine inference_engine_snippets target_link_libraries(${TARGET_NAME} PUBLIC inference_engine inference_engine_snippets
PRIVATE pugixml openvino::itt PRIVATE pugixml openvino::itt
${NGRAPH_LIBRARIES} inference_engine_transformations) ${NGRAPH_LIBRARIES} inference_engine_transformations)

View File

@ -15,7 +15,6 @@
#include <threading/ie_cpu_streams_executor.hpp> #include <threading/ie_cpu_streams_executor.hpp>
#include <ie_system_conf.h> #include <ie_system_conf.h>
#include <threading/ie_thread_affinity.hpp>
#include <algorithm> #include <algorithm>
#include <unordered_set> #include <unordered_set>
#include <utility> #include <utility>

View File

@ -30,7 +30,7 @@ ie_add_vs_version_file(NAME ${TARGET_NAME}
target_link_libraries(${TARGET_NAME} PUBLIC ${NGRAPH_LIBRARIES} target_link_libraries(${TARGET_NAME} PUBLIC ${NGRAPH_LIBRARIES}
PRIVATE ${NGRAPH_REF_LIBRARIES} openvino::itt ngraph::builder pugixml) PRIVATE ${NGRAPH_REF_LIBRARIES} openvino::itt ngraph::builder pugixml)
target_include_directories(${TARGET_NAME} PUBLIC ${PUBLIC_HEADERS_DIR} target_include_directories(${TARGET_NAME} PUBLIC $<BUILD_INTERFACE:${PUBLIC_HEADERS_DIR}>
PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src") PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src")
add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}) add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
@ -49,7 +49,7 @@ ie_developer_export_targets(${TARGET_NAME})
# install # install
install(TARGETS ${TARGET_NAME} install(TARGETS ${TARGET_NAME} EXPORT InferenceEngineTargets
RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core
LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core) LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)

View File

@ -11,8 +11,6 @@ function(add_common_target TARGET_NAME STATIC_IE)
UNITY UNITY
) )
set_ie_threading_interface_for(${TARGET_NAME})
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
# TODO: enable some day and fix all warnings # TODO: enable some day and fix all warnings
# target_compile_options(${TARGET_NAME} PRIVATE "-Wall") # target_compile_options(${TARGET_NAME} PRIVATE "-Wall")

View File

@ -11,7 +11,6 @@
#include <cpp/ie_cnn_network.h> #include <cpp/ie_cnn_network.h>
#include <precision_utils.h> #include <precision_utils.h>
#include <ie_parallel.hpp>
#include <vector> #include <vector>
#include <memory> #include <memory>

View File

@ -8,8 +8,6 @@
#include <unordered_map> #include <unordered_map>
#include <algorithm> #include <algorithm>
#include <ie_parallel.hpp>
#include <vpu/model/stage.hpp> #include <vpu/model/stage.hpp>
#include <vpu/utils/numeric.hpp> #include <vpu/utils/numeric.hpp>
#include <vpu/utils/profiling.hpp> #include <vpu/utils/profiling.hpp>

View File

@ -14,7 +14,6 @@
#include <vpu/model/data_contents/hw_const_data_content.hpp> #include <vpu/model/data_contents/hw_const_data_content.hpp>
#include <precision_utils.h> #include <precision_utils.h>
#include <ie_parallel.hpp>
#include <utility> #include <utility>
#include <memory> #include <memory>

View File

@ -6,8 +6,6 @@
#include <vpu/stages/stub_stage.hpp> #include <vpu/stages/stub_stage.hpp>
#include <vpu/model/data_contents/merge_fc_content.hpp> #include <vpu/model/data_contents/merge_fc_content.hpp>
#include <ie_parallel.hpp>
#include <memory> #include <memory>
#include <utility> #include <utility>
#include <vector> #include <vector>

View File

@ -7,7 +7,6 @@
#include <vpu/stages/stub_stage.hpp> #include <vpu/stages/stub_stage.hpp>
#include <vpu/model/data_contents/priorbox_contents.hpp> #include <vpu/model/data_contents/priorbox_contents.hpp>
#include <ie_parallel.hpp>
#include <precision_utils.h> #include <precision_utils.h>
#include <cmath> #include <cmath>

View File

@ -89,6 +89,7 @@ void PassImpl::run(const Model& model) {
} }
} // namespace } // namespace
Pass::Ptr PassManager::replaceWithReduceMean() { Pass::Ptr PassManager::replaceWithReduceMean() {
return std::make_shared<PassImpl>(_stageBuilder); return std::make_shared<PassImpl>(_stageBuilder);
} }

View File

@ -8,8 +8,6 @@
#include <vpu/utils/numeric.hpp> #include <vpu/utils/numeric.hpp>
#include <vpu/model/data_contents/deconvolution_contents.hpp> #include <vpu/model/data_contents/deconvolution_contents.hpp>
#include <ie_parallel.hpp>
#include <vector> #include <vector>
#include <string> #include <string>
#include <memory> #include <memory>

View File

@ -12,7 +12,6 @@
#include <vpu/compile_env.hpp> #include <vpu/compile_env.hpp>
#include <precision_utils.h> #include <precision_utils.h>
#include <ie_parallel.hpp>
#include <array> #include <array>
#include <algorithm> #include <algorithm>

View File

@ -7,7 +7,6 @@
#include <vpu/utils/profiling.hpp> #include <vpu/utils/profiling.hpp>
#include <vpu/middleend/sw/utility.hpp> #include <vpu/middleend/sw/utility.hpp>
#include <ie_parallel.hpp>
#include <precision_utils.h> #include <precision_utils.h>
namespace vpu { namespace vpu {

View File

@ -10,7 +10,6 @@
#include <vpu/model/data_contents/batch_norm_contents.hpp> #include <vpu/model/data_contents/batch_norm_contents.hpp>
#include <precision_utils.h> #include <precision_utils.h>
#include <ie_parallel.hpp>
#include <cmath> #include <cmath>
#include <vector> #include <vector>

View File

@ -9,8 +9,6 @@
#include <vpu/utils/profiling.hpp> #include <vpu/utils/profiling.hpp>
#include <vpu/model/data_contents/prelu_blob_content.hpp> #include <vpu/model/data_contents/prelu_blob_content.hpp>
#include <ie_parallel.hpp>
#include <vector> #include <vector>
#include <memory> #include <memory>

View File

@ -45,6 +45,8 @@ addIeTargetTest(
IE IE
) )
set_ie_threading_interface_for(${TARGET_NAME})
if(NGRAPH_ONNX_IMPORT_ENABLE) if(NGRAPH_ONNX_IMPORT_ENABLE)
target_compile_definitions(${TARGET_NAME} PRIVATE target_compile_definitions(${TARGET_NAME} PRIVATE
NGRAPH_ONNX_IMPORT_ENABLE NGRAPH_ONNX_IMPORT_ENABLE

View File

@ -192,6 +192,8 @@ public:
} }
void TearDown() override { void TearDown() override {
EXPECT_TRUE(Mock::VerifyAndClearExpectations(net.get()));
EXPECT_TRUE(Mock::VerifyAndClearExpectations(mockPlugin.get()));
CommonTestUtils::removeIRFiles(modelName, weightsName); CommonTestUtils::removeIRFiles(modelName, weightsName);
} }
@ -766,8 +768,6 @@ TEST_P(CachingTest, TestThrowOnExport) {
// TODO: temporary behavior is to no re-throw exception on import error (see 54335) // TODO: temporary behavior is to no re-throw exception on import error (see 54335)
// In future add separate 'no throw' test for 'blob_outdated' exception from plugin // In future add separate 'no throw' test for 'blob_outdated' exception from plugin
TEST_P(CachingTest, TestThrowOnImport) { TEST_P(CachingTest, TestThrowOnImport) {
ON_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).WillByDefault(Throw(1));
ON_CALL(*mockPlugin, ImportNetworkImpl(_, _)).WillByDefault(Throw(1));
EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber()); EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber()); EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber()); EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
@ -785,20 +785,25 @@ TEST_P(CachingTest, TestThrowOnImport) {
{ {
EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0); EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0); EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0); if (m_remoteContext) {
EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0); EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(1).WillOnce(Throw(1));
EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0);
} else {
EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(1).WillOnce(Throw(1));
}
EXPECT_CALL(*net, ExportImpl(_)).Times(1); EXPECT_CALL(*net, ExportImpl(_)).Times(1);
testLoad([&](Core &ie) { testLoad([&](Core &ie) {
ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}); ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
EXPECT_NO_THROW(m_testFunction(ie)); EXPECT_NO_THROW(m_testFunction(ie));
}); });
} }
{ // Step 3: same load, cache should be deleted due to unsuccessful import on step 2 { // Step 3: same load, cache is re-created on export on step 2 and shall be successfully imported now
EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0); EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0); EXPECT_CALL(*mockPlugin, LoadExeNetworkImpl(_, _)).Times(0);
EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(0); EXPECT_CALL(*mockPlugin, ImportNetworkImpl(_, _)).Times(!m_remoteContext ? 1 : 0);
EXPECT_CALL(*net, ExportImpl(_)).Times(1); EXPECT_CALL(*net, ExportImpl(_)).Times(0);
testLoad([&](Core &ie) { testLoad([&](Core &ie) {
ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}}); ie.SetConfig({{CONFIG_KEY(CACHE_DIR), m_cacheDir}});
EXPECT_NO_THROW(m_testFunction(ie)); EXPECT_NO_THROW(m_testFunction(ie));

View File

@ -26,6 +26,14 @@ typedef std::tuple<
std::vector<size_t> // Input shape std::vector<size_t> // Input shape
> removePermutationsPassParams; > removePermutationsPassParams;
typedef std::tuple<
InferenceEngine::Precision, // Network Precision
std::string, // Target Device
std::map<std::string, std::string>, // Configuration
std::vector<size_t>, // Input shape
bool // with activation
> removePermutationsWithPoolPassParams;
namespace LayerTestsDefinitions { namespace LayerTestsDefinitions {
class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface<removePermutationsPassParams>, class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface<removePermutationsPassParams>,
@ -137,15 +145,16 @@ protected:
} }
}; };
class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<removePermutationsPassParams>, class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<removePermutationsWithPoolPassParams>,
public LayerTestsUtils::LayerTestsCommon { public LayerTestsUtils::LayerTestsCommon {
public: public:
static std::string getTestCaseName(testing::TestParamInfo<removePermutationsPassParams> obj) { static std::string getTestCaseName(testing::TestParamInfo<removePermutationsWithPoolPassParams> obj) {
InferenceEngine::Precision netPrecision; InferenceEngine::Precision netPrecision;
std::string targetDevice; std::string targetDevice;
std::map<std::string, std::string> configuration; std::map<std::string, std::string> configuration;
std::vector<size_t> inputShape; std::vector<size_t> inputShape;
std::tie(netPrecision, targetDevice, configuration, inputShape) = obj.param; bool withActivation;
std::tie(netPrecision, targetDevice, configuration, inputShape, withActivation) = obj.param;
std::ostringstream result; std::ostringstream result;
result << "netPRC=" << netPrecision.name() << "_"; result << "netPRC=" << netPrecision.name() << "_";
@ -154,6 +163,7 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
result << "_configItem=" << configItem.first << "_" << configItem.second; result << "_configItem=" << configItem.first << "_" << configItem.second;
} }
result << "_IS=" << CommonTestUtils::vec2str(inputShape); result << "_IS=" << CommonTestUtils::vec2str(inputShape);
result << "_withActivation=" << withActivation;
return result.str(); return result.str();
} }
@ -175,8 +185,6 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
// | // |
// Permute (order: [0, 3, 1, 2]) // Permute (order: [0, 3, 1, 2])
// | // |
// Relu
// |
// Convolution // Convolution
// | // |
// Pooling // Pooling
@ -188,7 +196,8 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
// Reshape // Reshape
InferenceEngine::Precision netPrecision; InferenceEngine::Precision netPrecision;
std::vector<size_t> inputShape; std::vector<size_t> inputShape;
std::tie(netPrecision, targetDevice, configuration, inputShape) = this->GetParam(); bool withActivation;
std::tie(netPrecision, targetDevice, configuration, inputShape, withActivation) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
size_t in_total_dims_size = std::accumulate(std::begin(inputShape), std::end(inputShape), 1, std::multiplies<double>()); size_t in_total_dims_size = std::accumulate(std::begin(inputShape), std::end(inputShape), 1, std::multiplies<double>());
@ -199,14 +208,12 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
auto permute1 = std::make_shared<ngraph::opset1::Transpose>(reshape1, auto permute1 = std::make_shared<ngraph::opset1::Transpose>(reshape1,
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 })); ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 }));
auto relu1 = std::make_shared<ngraph::opset3::Relu>(permute1);
size_t num_out_channels = 12; size_t num_out_channels = 12;
size_t kernal_size = 8; size_t kernal_size = 8;
auto kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, kernal_size} : std::vector<size_t>{kernal_size, 1}); auto kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, kernal_size} : std::vector<size_t>{kernal_size, 1});
std::vector<float> filter_weights = CommonTestUtils::generate_float_numbers(num_out_channels * inputShape[3] * kernal_size, std::vector<float> filter_weights = CommonTestUtils::generate_float_numbers(num_out_channels * inputShape[3] * kernal_size,
-0.2f, 0.2f); -0.2f, 0.2f);
auto conv1 = ngraph::builder::makeConvolution(relu1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 }, auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
ngraph::op::PadType::VALID, num_out_channels, false, filter_weights); ngraph::op::PadType::VALID, num_out_channels, false, filter_weights);
auto pool_kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, 2} : std::vector<size_t>{2, 1}); auto pool_kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, 2} : std::vector<size_t>{2, 1});
auto pool = ngraph::builder::makePooling(conv1, pool_kernal_shape, {0, 0}, {0, 0}, pool_kernal_shape, ngraph::op::RoundingType::FLOOR, auto pool = ngraph::builder::makePooling(conv1, pool_kernal_shape, {0, 0}, {0, 0}, pool_kernal_shape, ngraph::op::RoundingType::FLOOR,
@ -214,9 +221,14 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
size_t out_width = ((inputShape[2] - kernal_shape[1]) + 1) / pool_kernal_shape[1]; size_t out_width = ((inputShape[2] - kernal_shape[1]) + 1) / pool_kernal_shape[1];
size_t out_height = ((inputShape[1] - kernal_shape[0]) + 1) / pool_kernal_shape[0]; size_t out_height = ((inputShape[1] - kernal_shape[0]) + 1) / pool_kernal_shape[0];
auto relu2 = std::make_shared<ngraph::opset3::Relu>(pool);
auto permute2 = std::make_shared<ngraph::opset1::Transpose>(relu2, auto pool_output = pool;
if (withActivation) {
auto relu2 = std::make_shared<ngraph::opset3::Relu>(pool);
pool_output = relu2;
}
auto permute2 = std::make_shared<ngraph::opset1::Transpose>(pool_output,
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 })); ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
std::vector<size_t> outFormShapes = { 1, out_width * out_height * num_out_channels }; std::vector<size_t> outFormShapes = { 1, out_width * out_height * num_out_channels };
@ -480,8 +492,9 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GNA), ::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(configs), ::testing::ValuesIn(configs),
::testing::ValuesIn(inputShapes)), ::testing::ValuesIn(inputShapes),
RemovePermutationsNHWCToNCHWPassTest::getTestCaseName); ::testing::ValuesIn(std::vector<bool>{false, true})), // with activation
RemovePermutationsWithPoolAndActTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_PermutationPass, RemovePermutationsWithTwoConvTest, INSTANTIATE_TEST_CASE_P(smoke_PermutationPass, RemovePermutationsWithTwoConvTest,
::testing::Combine( ::testing::Combine(
@ -489,7 +502,7 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
::testing::Values(CommonTestUtils::DEVICE_GNA), ::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(configs), ::testing::ValuesIn(configs),
::testing::ValuesIn(inputShapes)), ::testing::ValuesIn(inputShapes)),
RemovePermutationsNHWCToNCHWPassTest::getTestCaseName); RemovePermutationsWithTwoConvTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_PermutationPass, RemovePermutationsWithEltwiseTest, INSTANTIATE_TEST_CASE_P(smoke_PermutationPass, RemovePermutationsWithEltwiseTest,
::testing::Combine( ::testing::Combine(
@ -497,7 +510,7 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
::testing::Values(CommonTestUtils::DEVICE_GNA), ::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(configs), ::testing::ValuesIn(configs),
::testing::ValuesIn(inputShapes)), ::testing::ValuesIn(inputShapes)),
RemovePermutationsNHWCToNCHWPassTest::getTestCaseName); RemovePermutationsWithEltwiseTest::getTestCaseName);
} // namespace LayerTestsDefinitions } // namespace LayerTestsDefinitions

View File

@ -16,6 +16,8 @@ const std::map<std::string, std::string> supportedConfigKeysWithDefaults = {
{GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_0"), "1.000000"}, {GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_0"), "1.000000"},
{GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE), ""}, {GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE), ""},
{GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE_GENERATION), ""}, {GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE_GENERATION), ""},
{GNA_CONFIG_KEY(EXEC_TARGET), ""},
{GNA_CONFIG_KEY(COMPILE_TARGET), ""},
{GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW_EXACT}, {GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW_EXACT},
{GNA_CONFIG_KEY(COMPACT_MODE), CONFIG_VALUE(NO)}, {GNA_CONFIG_KEY(COMPACT_MODE), CONFIG_VALUE(NO)},
{CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(NO)}, {CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(NO)},
@ -104,28 +106,28 @@ TEST_F(GNAPluginConfigTest, GnaConfigDeviceModeTest) {
EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_HARDWARE)); EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_HARDWARE));
#else #else
EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeHardware); EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeHardware);
EXPECT_EQ(config.pluginGna2DeviceConsistent, Gna2DeviceVersionSoftwareEmulation); EXPECT_EQ(config.swExactMode, false);
#endif #endif
SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW); SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW);
#if GNA_LIB_VER == 1 #if GNA_LIB_VER == 1
EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_SOFTWARE)); EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_SOFTWARE));
#else #else
EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeSoftware); EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeSoftware);
EXPECT_EQ(config.pluginGna2DeviceConsistent, Gna2DeviceVersionSoftwareEmulation); EXPECT_EQ(config.swExactMode, false);
#endif #endif
SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW_EXACT); SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW_EXACT);
#if GNA_LIB_VER == 1 #if GNA_LIB_VER == 1
EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_SOFTWARE & GNA_HARDWARE)); EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_SOFTWARE & GNA_HARDWARE));
#else #else
EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeSoftware); EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeSoftware);
EXPECT_EQ(config.pluginGna2DeviceConsistent, Gna2DeviceVersion1_0); EXPECT_EQ(config.swExactMode, true);
#endif #endif
SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_AUTO); SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_AUTO);
#if GNA_LIB_VER == 1 #if GNA_LIB_VER == 1
EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_AUTO)); EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_AUTO));
#else #else
EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeAuto); EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeAuto);
EXPECT_EQ(config.pluginGna2DeviceConsistent, Gna2DeviceVersionSoftwareEmulation); EXPECT_EQ(config.swExactMode, false);
#endif #endif
ExpectThrow(GNA_CONFIG_KEY(DEVICE_MODE), ""); ExpectThrow(GNA_CONFIG_KEY(DEVICE_MODE), "");
ExpectThrow(GNA_CONFIG_KEY(DEVICE_MODE), "abc"); ExpectThrow(GNA_CONFIG_KEY(DEVICE_MODE), "abc");
@ -187,3 +189,25 @@ TEST_F(GNAPluginConfigTest, GnaConfigSingleThreadTest) {
config.gnaFlags.gna_openmp_multithreading, config.gnaFlags.gna_openmp_multithreading,
true); true);
} }
TEST_F(GNAPluginConfigTest, GnaConfigGnaExecTargetTest) {
SetAndCompare(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_2_0");
EXPECT_EQ(config.gnaExecTarget, "GNA_TARGET_2_0");
SetAndCompare(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_3_0");
EXPECT_EQ(config.gnaExecTarget, "GNA_TARGET_3_0");
ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_3_5");
ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "0");
ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_1_5");
ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET");
}
TEST_F(GNAPluginConfigTest, GnaConfigGnaCompileTargetTest) {
SetAndCompare(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_2_0");
EXPECT_EQ(config.gnaCompileTarget, "GNA_TARGET_2_0");
SetAndCompare(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_3_0");
EXPECT_EQ(config.gnaCompileTarget, "GNA_TARGET_3_0");
ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_3_5");
ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "0");
ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_1_5");
ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET");
}

View File

@ -48,6 +48,8 @@ target_include_directories(${TARGET_NAME} PRIVATE
target_link_libraries(${TARGET_NAME} PRIVATE ${LIBRARIES}) target_link_libraries(${TARGET_NAME} PRIVATE ${LIBRARIES})
set_ie_threading_interface_for(${TARGET_NAME})
add_dependencies(${TARGET_NAME} ${DEPENDENCIES}) add_dependencies(${TARGET_NAME} ${DEPENDENCIES})
add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME}) add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME})

View File

@ -21,6 +21,8 @@ file(GLOB SHARED_TESTS_SRC
add_library(${TARGET_NAME} STATIC ${SHARED_TESTS_SRC}) add_library(${TARGET_NAME} STATIC ${SHARED_TESTS_SRC})
add_dependencies(${TARGET_NAME} inference_engine_preproc MultiDevicePlugin mock_engine) add_dependencies(${TARGET_NAME} inference_engine_preproc MultiDevicePlugin mock_engine)
set_ie_threading_interface_for(${TARGET_NAME})
ie_faster_build(${TARGET_NAME} ie_faster_build(${TARGET_NAME}
UNITY UNITY
PCH PRIVATE "precomp.hpp" PCH PRIVATE "precomp.hpp"

View File

@ -25,8 +25,6 @@
#include <ngraph_functions/builders.hpp> #include <ngraph_functions/builders.hpp>
#include <functional_test_utils/blob_utils.hpp> #include <functional_test_utils/blob_utils.hpp>
#include "ie_parallel.hpp"
using namespace ::testing; using namespace ::testing;
using namespace InferenceEngine; using namespace InferenceEngine;

View File

@ -49,6 +49,7 @@ protected:
} }
void TearDown() override { void TearDown() override {
PluginCache::get().reset();
} }
std::string ConvNet(const int batch, TBlob<uint8_t>::Ptr &weights) { std::string ConvNet(const int batch, TBlob<uint8_t>::Ptr &weights) {

View File

@ -139,6 +139,7 @@ protected:
} }
void TearDown() override { void TearDown() override {
PluginCache::get().reset();
} }
template <Precision::ePrecision PRC> template <Precision::ePrecision PRC>

View File

@ -83,7 +83,6 @@ source_group("include" FILES ${TEST_INCLUDE})
# create target # create target
add_executable(${TARGET_NAME} ${TEST_SRC} ${TEST_INCLUDE}) add_executable(${TARGET_NAME} ${TEST_SRC} ${TEST_INCLUDE})
set_ie_threading_interface_for(${TARGET_NAME})
target_include_directories(${TARGET_NAME} PRIVATE target_include_directories(${TARGET_NAME} PRIVATE
${IE_MAIN_SOURCE_DIR}/src/gna_plugin ${IE_MAIN_SOURCE_DIR}/src/gna_plugin

View File

@ -147,7 +147,7 @@ TEST_F(PWLAproximationTest, forReLUonRecursiveAlgoWithSegmentThresholdIsSuccess)
.propagate_forward() .propagate_forward()
.called_with() .called_with()
.pwl_quantization_activation(DnnActivationType::kActRelu) .pwl_quantization_activation(DnnActivationType::kActRelu)
.pwl_quantization_segments_threshold(2); .pwl_quantization_segments_threshold(4);
} }
TEST_F(PWLAproximationTest, forLeakyReLUonRecursiveAlgoWithSegmentThresholdIsSuccess) { TEST_F(PWLAproximationTest, forLeakyReLUonRecursiveAlgoWithSegmentThresholdIsSuccess) {
@ -157,7 +157,7 @@ TEST_F(PWLAproximationTest, forLeakyReLUonRecursiveAlgoWithSegmentThresholdIsSuc
.propagate_forward() .propagate_forward()
.called_with() .called_with()
.pwl_quantization_activation(DnnActivationType::kActLeakyRelu) .pwl_quantization_activation(DnnActivationType::kActLeakyRelu)
.pwl_quantization_segments_threshold(2); .pwl_quantization_segments_threshold(4);
} }
TEST_F(PWLAproximationTest, DISABLED_forIdentityOnRecursiveAlgoWithSegmentThresholdIsSuccess) { TEST_F(PWLAproximationTest, DISABLED_forIdentityOnRecursiveAlgoWithSegmentThresholdIsSuccess) {

View File

@ -102,8 +102,14 @@ class PWLMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
} }
switch (slopeChangedTimes) { switch (slopeChangedTimes) {
case 2 : return kActRelu; // also relu has y=0 segment while identity doenst have case 3 :
case 3 : return kActIdentity; if (comp.op.pwl.num_segments == 4) {
// ReLU has y=0 segment while identity doesn't have
// 2 segments are added: one at the begining and one at the end, due to saturation errata
return kActRelu;
} else {
return kActIdentity;
}
default: default:
// currently cannot determine between sigmoid or tanh etc // currently cannot determine between sigmoid or tanh etc
if (slopeChangedTimes > 3) { if (slopeChangedTimes > 3) {

View File

@ -36,8 +36,15 @@ void prepare_quantization::prepare_packed_quantize(program_impl& p) {
auto levels = quantize_node.get_primitive()->levels; auto levels = quantize_node.get_primitive()->levels;
auto &input_low = quantize_node.get_dependency(1).template as<data>(); program_node &input_low_node = quantize_node.get_dependency(1);
auto &input_high = quantize_node.get_dependency(2).template as<data>(); program_node &input_high_node = quantize_node.get_dependency(2);
if (!input_low_node.is_type<data>() || !input_high_node.is_type<data>()) {
return;
}
auto &input_low = input_low_node.as<data>();
auto &input_high = input_high_node.as<data>();
auto &mem_input_low = input_low.get_attached_memory(); auto &mem_input_low = input_low.get_attached_memory();
auto &mem_input_high = input_high.get_attached_memory(); auto &mem_input_high = input_high.get_attached_memory();
@ -99,10 +106,20 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) {
if (levels == 2 || levels > 256 || quantize_node.get_scale_shift_opt() || quantize_node.is_constant()) if (levels == 2 || levels > 256 || quantize_node.get_scale_shift_opt() || quantize_node.is_constant())
return; return;
auto &input_low = quantize_node.get_dependency(1).template as<data>(); program_node &input_low_node = quantize_node.get_dependency(1);
auto &input_high = quantize_node.get_dependency(2).template as<data>(); program_node &input_high_node = quantize_node.get_dependency(2);
auto &output_low = quantize_node.get_dependency(3).template as<data>(); program_node &output_low_node = quantize_node.get_dependency(3);
auto &output_high = quantize_node.get_dependency(4).template as<data>(); program_node &output_high_node = quantize_node.get_dependency(4);
if (!input_low_node.is_type<data>() || !input_high_node.is_type<data>() ||
!output_low_node.is_type<data>() || !output_high_node.is_type<data>()) {
return;
}
auto &input_low = input_low_node.as<data>();
auto &input_high = input_high_node.as<data>();
auto &output_low = output_low_node.as<data>();
auto &output_high = output_high_node.as<data>();
auto &mem_input_low = input_low.get_attached_memory(); auto &mem_input_low = input_low.get_attached_memory();
auto &mem_input_high = input_high.get_attached_memory(); auto &mem_input_high = input_high.get_attached_memory();

View File

@ -60,6 +60,7 @@ extensions/back/ShufflenetReLUReorder.py
extensions/back/SpecialNodesFinalization.py extensions/back/SpecialNodesFinalization.py
extensions/back/StridedSliceMasksNormalizer.py extensions/back/StridedSliceMasksNormalizer.py
extensions/back/TopKNormalizer.py extensions/back/TopKNormalizer.py
extensions/back/TransposeDFT.py
extensions/back/TransposeReduceFusing.py extensions/back/TransposeReduceFusing.py
extensions/back/UselessConcatRemoval.py extensions/back/UselessConcatRemoval.py
extensions/front/__init__.py extensions/front/__init__.py
@ -196,6 +197,7 @@ extensions/front/mxnet/elementwise_ext.py
extensions/front/mxnet/eltwise_scalar_replacers.py extensions/front/mxnet/eltwise_scalar_replacers.py
extensions/front/mxnet/exp_ext.py extensions/front/mxnet/exp_ext.py
extensions/front/mxnet/expand_dims_ext.py extensions/front/mxnet/expand_dims_ext.py
extensions/front/mxnet/fft_ext.py
extensions/front/mxnet/flatten_ext.py extensions/front/mxnet/flatten_ext.py
extensions/front/mxnet/fully_connected_ext.py extensions/front/mxnet/fully_connected_ext.py
extensions/front/mxnet/gather.py extensions/front/mxnet/gather.py
@ -208,6 +210,7 @@ extensions/front/mxnet/max_ext.py
extensions/front/mxnet/multibox_detection_ext.py extensions/front/mxnet/multibox_detection_ext.py
extensions/front/mxnet/mx_reshape_reverse.py extensions/front/mxnet/mx_reshape_reverse.py
extensions/front/mxnet/mx_reshape_to_reshape.py extensions/front/mxnet/mx_reshape_to_reshape.py
extensions/front/mxnet/MXFFTToDFT.py
extensions/front/mxnet/MXRepeatReplacer.py extensions/front/mxnet/MXRepeatReplacer.py
extensions/front/mxnet/null_ext.py extensions/front/mxnet/null_ext.py
extensions/front/mxnet/pad_ext.py extensions/front/mxnet/pad_ext.py
@ -383,10 +386,13 @@ extensions/front/tf/broadcast_ext.py
extensions/front/tf/bucketize.py extensions/front/tf/bucketize.py
extensions/front/tf/bucketize_ext.py extensions/front/tf/bucketize_ext.py
extensions/front/tf/Cast_ext.py extensions/front/tf/Cast_ext.py
extensions/front/tf/ComplexAbs.py
extensions/front/tf/ComplexAbsAfterComplex.py
extensions/front/tf/concat.py extensions/front/tf/concat.py
extensions/front/tf/concat_ext.py extensions/front/tf/concat_ext.py
extensions/front/tf/const_ext.py extensions/front/tf/const_ext.py
extensions/front/tf/conv_ext.py extensions/front/tf/conv_ext.py
extensions/front/tf/CorrectRollAxes.py
extensions/front/tf/crop_and_resize_ext.py extensions/front/tf/crop_and_resize_ext.py
extensions/front/tf/CropAndResizeReplacement.py extensions/front/tf/CropAndResizeReplacement.py
extensions/front/tf/CTCGreedyDecoder_ext.py extensions/front/tf/CTCGreedyDecoder_ext.py
@ -413,6 +419,7 @@ extensions/front/tf/faster_rcnn_support_api_v1.15.json
extensions/front/tf/faster_rcnn_support_api_v1.7.json extensions/front/tf/faster_rcnn_support_api_v1.7.json
extensions/front/tf/faster_rcnn_support_api_v2.0.json extensions/front/tf/faster_rcnn_support_api_v2.0.json
extensions/front/tf/faster_rcnn_support_api_v2.4.json extensions/front/tf/faster_rcnn_support_api_v2.4.json
extensions/front/tf/fft_ext.py
extensions/front/tf/fifo_queue_v2_ext.py extensions/front/tf/fifo_queue_v2_ext.py
extensions/front/tf/fifo_replacer.py extensions/front/tf/fifo_replacer.py
extensions/front/tf/fill_ext.py extensions/front/tf/fill_ext.py
@ -471,6 +478,7 @@ extensions/front/tf/rfcn_support_api_v1.10.json
extensions/front/tf/rfcn_support_api_v1.13.json extensions/front/tf/rfcn_support_api_v1.13.json
extensions/front/tf/rfcn_support_api_v1.14.json extensions/front/tf/rfcn_support_api_v1.14.json
extensions/front/tf/roll_ext.py extensions/front/tf/roll_ext.py
extensions/front/tf/RollRealImagPack.py
extensions/front/tf/select_ext.py extensions/front/tf/select_ext.py
extensions/front/tf/sign_ext.py extensions/front/tf/sign_ext.py
extensions/front/tf/SizeReplacer.py extensions/front/tf/SizeReplacer.py
@ -495,12 +503,14 @@ extensions/front/tf/ssd_toolbox_detection_output.json
extensions/front/tf/ssd_toolbox_multihead_detection_output.json extensions/front/tf/ssd_toolbox_multihead_detection_output.json
extensions/front/tf/ssd_v2_support.json extensions/front/tf/ssd_v2_support.json
extensions/front/tf/SSDToolboxDetectionOutput.py extensions/front/tf/SSDToolboxDetectionOutput.py
extensions/front/tf/SSliceComplex.py
extensions/front/tf/swap_deconv_inputs.py extensions/front/tf/swap_deconv_inputs.py
extensions/front/tf/swish_ext.py extensions/front/tf/swish_ext.py
extensions/front/tf/SwitchMergeOptimization.py extensions/front/tf/SwitchMergeOptimization.py
extensions/front/tf/TensorArrayExtractors.py extensions/front/tf/TensorArrayExtractors.py
extensions/front/tf/TensorArrayGatherV3.py extensions/front/tf/TensorArrayGatherV3.py
extensions/front/tf/tensorflow_custom_operations_config_update.py extensions/front/tf/tensorflow_custom_operations_config_update.py
extensions/front/tf/TFFFTToDFT.py
extensions/front/tf/TFResizeToInterpolate.py extensions/front/tf/TFResizeToInterpolate.py
extensions/front/tf/TFSliceToSlice.py extensions/front/tf/TFSliceToSlice.py
extensions/front/tf/tile_ext.py extensions/front/tf/tile_ext.py
@ -667,6 +677,7 @@ extensions/ops/depth_to_space.py
extensions/ops/dequantize_linear.py extensions/ops/dequantize_linear.py
extensions/ops/DetectionOutput.py extensions/ops/DetectionOutput.py
extensions/ops/detectionoutput_onnx.py extensions/ops/detectionoutput_onnx.py
extensions/ops/dft.py
extensions/ops/elementwise.py extensions/ops/elementwise.py
extensions/ops/embedding_bag.py extensions/ops/embedding_bag.py
extensions/ops/Enter.py extensions/ops/Enter.py
@ -695,6 +706,7 @@ extensions/ops/lstm_sequence.py
extensions/ops/MatMul.py extensions/ops/MatMul.py
extensions/ops/merge.py extensions/ops/merge.py
extensions/ops/mvn.py extensions/ops/mvn.py
extensions/ops/mxfft.py
extensions/ops/mxrepeat.py extensions/ops/mxrepeat.py
extensions/ops/mxreshape.py extensions/ops/mxreshape.py
extensions/ops/NextIteration.py extensions/ops/NextIteration.py
@ -760,6 +772,7 @@ extensions/ops/TensorArrayScatter.py
extensions/ops/TensorArraySize.py extensions/ops/TensorArraySize.py
extensions/ops/TensorArrayWrite.py extensions/ops/TensorArrayWrite.py
extensions/ops/TensorIterator_ops.py extensions/ops/TensorIterator_ops.py
extensions/ops/TFFFT.py
extensions/ops/TFResize.py extensions/ops/TFResize.py
extensions/ops/topk.py extensions/ops/topk.py
extensions/ops/topkrois_onnx.py extensions/ops/topkrois_onnx.py

View File

@ -1,13 +1,8 @@
# Copyright (C) 2018-2021 Intel Corporation # Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import numpy as np
from extensions.ops.transpose import Transpose
from mo.front.common.partial_infer.utils import int64_array
from mo.front.tf.graph_utils import create_op_with_const_inputs
from mo.graph.graph import Graph, Port
from mo.back.replacement import BackReplacementPattern from mo.back.replacement import BackReplacementPattern
from mo.graph.graph import Graph
class LayoutChangeForGatherND(BackReplacementPattern): class LayoutChangeForGatherND(BackReplacementPattern):
@ -19,31 +14,9 @@ class LayoutChangeForGatherND(BackReplacementPattern):
force_shape_inference = True force_shape_inference = True
graph_condition = [lambda graph: graph.graph['fw'] == 'tf'] graph_condition = [lambda graph: graph.graph['fw'] == 'tf']
@staticmethod
def insert_transpose(graph: Graph, input_port: Port, before_input=True):
input_rank = len(input_port.data.get_shape())
if input_rank > 3:
if before_input:
axis_order = np.concatenate((int64_array([0]),
int64_array(list(range(2, input_rank))),
int64_array([1])))
source_node = input_port.get_source().node
transpose_name = source_node.soft_get('name', source_node.id) + '/TransposeToNHWC'
else:
axis_order = np.concatenate(
(int64_array([0]),
int64_array([input_rank - 1]),
int64_array(list(range(1, input_rank - 1)))))
transpose_name = input_port.node.soft_get('name', input_port.node.id) + '/TransposeToNCHW'
input_port.node['need_shape_inference'] = True
input_port.node['override_output_shape'] = True
transpose = create_op_with_const_inputs(graph, Transpose, {1: axis_order}, {'name': transpose_name})
input_port.get_connection().insert_node(transpose)
transpose['need_shape_inference'] = True
transpose['override_output_shape'] = True
def find_and_replace_pattern(self, graph: Graph): def find_and_replace_pattern(self, graph: Graph):
import extensions.middle.InsertLayoutPropagationTransposes as InsertTransposes
for gathernd in graph.get_op_nodes(type='GatherND'): for gathernd in graph.get_op_nodes(type='GatherND'):
self.insert_transpose(graph, gathernd.in_port(0), before_input=True) InsertTransposes.insert_transpose(graph, gathernd.in_port(0), before_input=True)
self.insert_transpose(graph, gathernd.in_port(1), before_input=True) InsertTransposes.insert_transpose(graph, gathernd.in_port(1), before_input=True)
self.insert_transpose(graph, gathernd.out_port(0), before_input=False) InsertTransposes.insert_transpose(graph, gathernd.out_port(0), before_input=False)

View File

@ -310,7 +310,13 @@ class ReverseChannelsPropagationUp(BackReplacementPattern):
reverse_channels_copy = reverse_channels.copy_node({'axis': np.array(axis)}) reverse_channels_copy = reverse_channels.copy_node({'axis': np.array(axis)})
src = port.get_connection().get_source() src = port.get_connection().get_source()
port.get_connection().set_source(reverse_channels_copy.out_port(0)) if src.node.soft_get('type') == 'Parameter':
# For Parameter nodes tensor debug attributes should not move to the last node
# of subgraph. It is needed for the proper mapping of input framework name.
# For this reason "source" mode is used to keep tensor debug attributes at Parameter node.
port.get_connection().set_source(reverse_channels_copy.out_port(0), attributes_save_mode="source")
else:
port.get_connection().set_source(reverse_channels_copy.out_port(0))
src.connect(reverse_channels_copy.in_port(0)) src.connect(reverse_channels_copy.in_port(0))
copies.append(reverse_channels_copy) copies.append(reverse_channels_copy)

View File

@ -0,0 +1,34 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from mo.back.replacement import BackReplacementPattern
from mo.graph.graph import Graph
class TransposeDFT(BackReplacementPattern):
"""
In TF models, operation (I)FFTxD has some input shape, [N_0, ..., N_{r - 1}].
After the transformation SSliceComplexRolledFFTPackBlockReplacement, we have an input shape [N_0, ..., N_{r - 1}, 2]
for operation DFT or IDFT.
If the input rank in the TF model was greater than 2, we have [N_0, 2, N_1, ..., N_{r - 1}] as the input shape of
(I)DFT after the layout conversion, if the option '--disable_nhwc_to_nchw' is not specified.
But, generally speaking, according to DFT and IDFT specifications, the input shape [N_0, 2, N_1, ..., N_{r - 1}]
is not correct input shape for DFT and IDFT. Hence, we need to insert Transpose operations before and after (I)DFT
in such cases.
This transformation inserts such Transpose nodes, when the source model was the TF model, (I)DFT node has the
attribute 'need_insert_transposes_for_dft', and this attribute is True.
"""
enabled = True
force_shape_inference = True
graph_condition = [lambda graph: graph.graph['fw'] == 'tf']
def find_and_replace_pattern(self, graph: Graph):
import extensions.middle.InsertLayoutPropagationTransposes as InsertTransposes
for dft in graph.get_op_nodes(need_insert_transposes_for_dft=True):
InsertTransposes.insert_transpose(graph, dft.in_port(0), before_input=True)
InsertTransposes.insert_transpose(graph, dft.out_port(0), before_input=False)

View File

@ -36,3 +36,4 @@ class ThresholdedReluDecomposition(FrontReplacementPattern):
mul.in_port(1).connect(float_greater.out_port(0)) mul.in_port(1).connect(float_greater.out_port(0))
rename_nodes([(node, name + '/TBR'), (mul, name)]) rename_nodes([(node, name + '/TBR'), (mul, name)])
graph.remove_node(node.id)

View File

@ -0,0 +1,140 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import numpy as np
from extensions.ops.dft import DFT, IDFT
from extensions.ops.elementwise import Add, Sub
from extensions.ops.rank import Rank
from extensions.ops.scatter import ScatterUpdate
from extensions.ops.split import Split
from mo.front.common.partial_infer.utils import int64_array
from mo.front.tf.graph_utils import create_op_with_const_inputs
from mo.graph.graph import Graph, Node, rename_nodes
from mo.front.common.replacement import FrontReplacementSubgraph
from mo.ops.broadcast import Broadcast
from mo.ops.concat import Concat
from mo.ops.pad import Pad
from mo.ops.reshape import Reshape
from mo.ops.squeeze import Squeeze
from mo.ops.unsqueeze import Unsqueeze
class MXFFTToDFT(FrontReplacementSubgraph):
"""
This transformation converts the operation MXFFT into OpenVINO DFT (if the attribute 'is_inverse' is False),
or into OpenVINO IDFT (otherwise).
According to https://mxnet.apache.org/versions/1.0.0/api/python/symbol/contrib.html#mxnet.symbol.contrib.fft,
MxNet operation FFT accept 2 input data shapes: [N, d] or [N_1, N_2, N_3, d], data can only be real numbers.
The output data has shape: [N, 2*d] or [N_1, N_2, N_3, 2*d]. The format is: [real0, imag0, real1, imag1, ...].
Next, MxNet operation IFFT accept 2 input data shapes: [N, d] or [N_1, N_2, N_3, d]. Data is in format:
[real0, imag0, real1, imag1, ...]. Last dimension must be an even number. The output data has shape: [N, d/2] or
[N_1, N_2, N_3, d/2]. It is only the real part of the result.
But OpenVINO DFT and IDFT operations uses complex input data represented as real tensors of the shape
[N_1, ..., N_r, 2]. Also, the result of OpenVINO DFT and IDFT operations is always complex but represented as
a real tensor of the shape [M_1, ..., M_r, 2]. If OpenVINO DFT or IDFT have no input signal_size, the output shape
and the input shape are the same.
Hence, to convert MxNet FFT to OpenVINO DFT, we need
1) to convert input data from the shape [N, d] or [N_1, N_2, N_3, d] to shape [N, d, 1] or [N_1, N_2, N_3, d, 1]
respectively;
2) to pad converted data using pads_begin = [0, 0, 0] and pads_end = [0, 0, 1] for MxNet FFT input shape [N, d], or
using pads_begin [0, 0, 0, 0, 0] and pads_end = [0, 0, 0, 0, 1] for MxNet FFT input shape [N_1, N_2, N_3, d],
with mode=constant;
3) to put padded data into DFT input 0, using (-1) in 'axes' input;
4) to reshape calculated DFT output to the shape [N, 2 * d] for for MxNet FFT input shape [N, d], or to the shape
[N_1, N_2, N_3, 2 * d]
Finally, to convert MxNet IFFT to OpenVINO IDFT, we need
1) to reshape input data from the shape [N, d] or [N_1, N_2, N_3, d] to shape [N, d // 2, 2] or
[N_1, N_2, N_3, d // 2, 2] respectively;
2) to put reshaped input data to the input 0 of IDFT, using (-1) in 'axes' input;
3) to get real parts using Split + Squeeze.
"""
enabled = True
def find_and_replace_pattern(self, graph: Graph):
for mx_fft in graph.get_op_nodes(op='MXFFT'):
if mx_fft.soft_get('is_inverse', False):
self.convert_ifft_to_dft(graph, mx_fft)
else:
self.convert_fft_to_dft(graph, mx_fft)
def convert_fft_to_dft(self, graph: Graph, mx_fft: Node):
mx_fft_name = mx_fft.soft_get('name', mx_fft.id)
unsqueeze_node = create_op_with_const_inputs(graph, Unsqueeze, {1: int64_array([-1])},
{'name': mx_fft_name + '/Unsqueeze'})
rank_node = Rank(graph, {'name': mx_fft_name + '/Rank'}).create_node()
mx_fft_connection = mx_fft.in_port(0).get_connection()
mx_fft_connection.set_destination(unsqueeze_node.in_port(0))
mx_fft_connection.get_source().connect(rank_node.in_port(0))
add_node = create_op_with_const_inputs(graph, Add, {1: int64_array(1)},
{'name': mx_fft_name + '/Add'}, rank_node)
broadcast_node1 = create_op_with_const_inputs(graph, Broadcast, {0: int64_array(0)},
{'name': mx_fft_name + '/Pad_broadcast'})
add_node.out_port(0).connect(broadcast_node1.in_port(1))
scatter_node = create_op_with_const_inputs(graph, ScatterUpdate,
{2: int64_array(1), 3: int64_array(0)},
{'name': mx_fft_name + '/ScatterUpdate'})
broadcast_node1.out_port(0).connect(scatter_node.in_port(0))
rank_node.out_port(0).connect(scatter_node.in_port(1))
pad_node = Pad(graph, {'name': mx_fft_name + '/Pad', 'mode': 'constant'}).create_node([unsqueeze_node,
broadcast_node1,
scatter_node])
dft_node = create_op_with_const_inputs(graph, DFT, {1: int64_array([-1])},
{'name': mx_fft_name + '/DFT', 'in_ports_count': 2},
pad_node)
sub_node = create_op_with_const_inputs(graph, Sub, {1: int64_array(1)}, {'name': mx_fft_name + '/Sub'})
rank_node.out_port(0).connect(sub_node.in_port(0))
broadcast_node2 = create_op_with_const_inputs(graph, Broadcast, {0: int64_array(0)},
{'name': mx_fft_name + '/Reshape_broadcast'})
sub_node.out_port(0).connect(broadcast_node2.in_port(1))
concat_node = create_op_with_const_inputs(graph, Concat, {1: int64_array([-1, 2])},
{'name': mx_fft_name + '/New_shape', 'in_ports_count': 2, 'axis': 0},
broadcast_node2)
reshape_node = Reshape(graph, {}).create_node([dft_node, concat_node])
mx_fft.out_port(0).get_connection().set_source(reshape_node.out_port(0))
rename_nodes([(mx_fft, mx_fft_name + '/to_be_removed'), (reshape_node, mx_fft_name)])
def convert_ifft_to_dft(self, graph: Graph, mx_fft: Node):
mx_fft_name = mx_fft.soft_get('name', mx_fft.id)
rank_node = Rank(graph, {'name': mx_fft_name + '/rank'}).create_node()
sub_node = create_op_with_const_inputs(graph, Sub, {1: int64_array(1)}, {'name': mx_fft_name + '/Sub'})
rank_node.out_port(0).connect(sub_node.in_port(0))
broadcast_node0 = create_op_with_const_inputs(graph, Broadcast, {0: int64_array(0)},
{'name': mx_fft_name + '/broadcast'})
sub_node.out_port(0).connect(broadcast_node0.in_port(1))
concat_node = create_op_with_const_inputs(graph, Concat, {1: int64_array([-1, 2])},
{'name': mx_fft_name + '/new_shape', 'in_ports_count': 2, 'axis': 0},
broadcast_node0)
reshape_node = Reshape(graph, {'name': mx_fft_name + '/reshape'}).create_node()
concat_node.out_port(0).connect(reshape_node.in_port(1))
mx_fft_connection = mx_fft.in_port(0).get_connection()
mx_fft_connection.set_destination(reshape_node.in_port(0))
mx_fft_connection.get_source().connect(rank_node.in_port(0))
dft_node = create_op_with_const_inputs(graph, IDFT, {1: int64_array([-1])},
{'name': mx_fft_name + '/idft', 'in_ports_count': 2},
reshape_node)
split_node = create_op_with_const_inputs(graph, Split, {1: int64_array(-1)},
{'name': mx_fft_name + '/split', 'num_splits': 2},
dft_node)
squeeze_node = create_op_with_const_inputs(graph, Squeeze, {1: int64_array([-1])}, {}, split_node)
mx_fft.out_port(0).get_connection().set_source(squeeze_node.out_port(0))
rename_nodes([(mx_fft, mx_fft_name + '/to_be_removed'), (squeeze_node, mx_fft_name)])

View File

@ -0,0 +1,25 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from extensions.ops.mxfft import MXFFT
from mo.front.extractor import FrontExtractorOp
class FFTFrontExtractor(FrontExtractorOp):
op = 'fft'
enabled = True
@classmethod
def extract(cls, node):
MXFFT.update_node_stat(node, {'is_inverse': False})
return cls.enabled
class IFFTFrontExtractor(FrontExtractorOp):
op = 'ifft'
enabled = True
@classmethod
def extract(cls, node):
MXFFT.update_node_stat(node, {'is_inverse': True})
return cls.enabled

View File

@ -0,0 +1,36 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from extensions.ops.elementwise import Pow
from extensions.ops.ReduceOps import ReduceSum
from mo.front.common.partial_infer.utils import int64_array
from mo.front.common.replacement import FrontReplacementSubgraph
from mo.front.tf.graph_utils import create_op_with_const_inputs
from mo.graph.graph import Graph, rename_nodes
from mo.middle.passes.convert_data_type import data_type_str_to_np
class ComplexAbs(FrontReplacementSubgraph):
enabled = True
def run_after(self):
from extensions.front.tf.ComplexAbsAfterComplex import ComplexAbsAfterComplex
return [ComplexAbsAfterComplex]
def find_and_replace_pattern(self, graph: Graph):
for complex_abs in graph.get_op_nodes(op='ComplexAbs'):
complex_abs_name = complex_abs.soft_get('name', complex_abs.id)
power_type = data_type_str_to_np(graph.graph['cmd_params'].data_type)
squared = create_op_with_const_inputs(graph, Pow, {1: power_type(2.0)},
{'name': complex_abs_name + '/squared_parts'})
complex_abs.in_port(0).get_connection().set_destination(squared.in_port(0))
sum = create_op_with_const_inputs(graph, ReduceSum, {1: int64_array(-1)},
{'name': complex_abs_name + '/squared_abs'},
squared)
sqrt = create_op_with_const_inputs(graph, Pow, {1: power_type(0.5)}, {}, sum)
complex_abs.out_port(0).get_connection().set_source(sqrt.out_port(0))
rename_nodes([(complex_abs, complex_abs_name + '/to_be_removed'), (sqrt, complex_abs_name)])

View File

@ -0,0 +1,69 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from extensions.ops.elementwise import Add, Pow
from mo.front.common.replacement import FrontReplacementSubgraph
from mo.front.subgraph_matcher import SubgraphMatch
from mo.front.tf.graph_utils import create_op_with_const_inputs
from mo.graph.graph import Graph, rename_nodes
from mo.middle.passes.convert_data_type import data_type_str_to_np
class ComplexAbsAfterComplex(FrontReplacementSubgraph):
"""
This transformation converts a sub-graph
SomeOp1 SomeOp2
| |
------------
|
Complex
|
ComplexAbs
into the sub-graph
SomeOp1 SomeOp2
| |
Constant[2]--Pow Pow--Constant[2]
| |
-------------
Add
|
Pow--Constant[0.5]
"""
enabled = True
def pattern(self):
return dict(
nodes=[
('complex', dict(op='Complex')),
('abs', dict(op='ComplexAbs')),
],
edges=[
('complex', 'abs', {'in': 0}),
])
def replace_sub_graph(self, graph: Graph, match: [dict, SubgraphMatch]):
cmp = match['complex']
complex_abs = match['abs']
complex_abs_name = complex_abs.soft_get('name', complex_abs.id)
power_type = data_type_str_to_np(graph.graph['cmd_params'].data_type)
pow0 = create_op_with_const_inputs(graph, Pow, {1: power_type(2.0)},
{'name': complex_abs_name + '/real_part_squared'})
pow1 = create_op_with_const_inputs(graph, Pow, {1: power_type(2.0)},
{'name': complex_abs_name + '/imag_part_squared'})
cmp.in_port(0).get_connection().set_destination(pow0.in_port(0))
cmp.in_port(1).get_connection().set_destination(pow1.in_port(0))
add = Add(graph, {'name': complex_abs_name + '/squared_abs'}).create_node([pow0, pow1])
sqrt = create_op_with_const_inputs(graph, Pow, {1: power_type(0.5)}, {})
add.out_port(0).connect(sqrt.in_port(0))
complex_abs.out_port(0).get_connection().set_source(sqrt.out_port(0))
rename_nodes([(complex_abs, complex_abs_name + '/to_be_removed'), (sqrt, complex_abs_name)])

View File

@ -0,0 +1,27 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from mo.front.common.partial_infer.utils import int64_array
from mo.front.common.replacement import FrontReplacementSubgraph
from mo.front.tf.graph_utils import add_constant_to_negative_values
from mo.graph.graph import Graph
class CorrectRollAxes(FrontReplacementSubgraph):
"""
The transformation SSliceComplex removes 2 StridedSlice and Complex operation. If the Roll node is a consumer
of Complex node in the original TF model, then we have a real input tensor for Roll instead of a complex.
Negative axes values for the Roll operation should be updated to reflect the fact that the rank of input tensor was
increased by one (a new trailing dimension of size 2 containing real and imaginary part of complex number is added).
"""
enabled = True
def run_after(self):
from extensions.front.tf.SSliceComplex import SSliceComplex
return [SSliceComplex]
def find_and_replace_pattern(self, graph: Graph):
for roll in graph.get_op_nodes(op='Roll', input_rank_changed=True):
add_constant_to_negative_values(roll, 2, int64_array(-1))
del roll['input_rank_changed']

View File

@ -765,6 +765,7 @@ class ObjectDetectionAPIPreprocessor2Replacement(FrontReplacementFromConfigFileG
else: # case 1 else: # case 1
# change output of the end_node to be produced with the last preprocessing op # change output of the end_node to be produced with the last preprocessing op
end_node.out_port(0).get_connection().set_source(pre_processing_ops[-1][0].out_port(0)) end_node.out_port(0).get_connection().set_source(pre_processing_ops[-1][0].out_port(0))
start_node.in_port(0).disconnect()
else: # simply remove the nodes in between start_node and end_node (including them). Case 3 and 6 else: # simply remove the nodes in between start_node and end_node (including them). Case 3 and 6
end_node.out_port(0).get_connection().set_source(start_node.in_port(0).get_source()) end_node.out_port(0).get_connection().set_source(start_node.in_port(0).get_source())

View File

@ -0,0 +1,73 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from mo.front.common.partial_infer.utils import int64_array
from mo.front.common.replacement import FrontReplacementSubgraph
from mo.front.subgraph_matcher import SubgraphMatch
from mo.front.tf.graph_utils import add_constant_to_negative_values
from mo.graph.graph import Graph
class RollRealImagPack(FrontReplacementSubgraph):
"""
Some TF models contain Roll for complex data, as a part of the sub-graph
input shift axes
| | |
-------------------
Roll
|
-------------------
| |
Real Imag
| |
------- -------
| |
Pack
|
SomeOp
This sub-graph can be replaced with the sub-graph
input shift axes
| | |
-------------------
Roll
|
SomeOp
But after such replacement, we should correct axes of Roll, because input data are real now. Namely, if
there are negative axes for Roll, we need subtract 1 from such axes indices.
"""
enabled = True
def run_after(self):
from extensions.front.tf.SSliceComplex import SSliceComplex
return [SSliceComplex]
def run_before(self):
from extensions.front.Pack import Pack
return [Pack]
def pattern(self):
return dict(
nodes=[
('unroll', dict(op='Roll')),
('real', dict(op='Real')),
('imag', dict(op='Imag')),
('pack', dict(op='Pack')),
],
edges=[
('unroll', 'real', {'in': 0}),
('unroll', 'imag', {'in': 0}),
('real', 'pack', {'in': 0}),
('imag', 'pack', {'in': 1}),
])
def replace_sub_graph(self, graph: Graph, match: [dict, SubgraphMatch]):
unroll = match['unroll']
add_constant_to_negative_values(unroll, 2, int64_array(-1))
pack = match['pack']
pack.out_port(0).get_connection().set_source(unroll.out_port(0))
graph.remove_nodes_from([match['real'].id, match['imag'].id])

View File

@ -0,0 +1,70 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import logging as log
from mo.front.common.replacement import FrontReplacementSubgraph
from mo.front.subgraph_matcher import SubgraphMatch
from mo.graph.graph import Graph
class SSliceComplex(FrontReplacementSubgraph):
"""
Some TF models contain the sub-graph
SomeOp
|
--------------------------
| |
StridedSlice StridedSlice
| |
------------------------
Complex
|
| other inputs
| | ... |
-------------------
SomeOp1
Here SomeOp is some node with real output and with the shape [N_0, ..., N_{r - 1}, 2], and StridedSlice nodes
have output shapes [N_0, ..., N_{r - 1}].
But MO and Inference Engine do not support complex tensors. Hence, we need to replace this sub-graph with
SomeOp other inputs
| | ... |
-------------------
SomeOp1
After this transformation we need to mark SomeOp1 operation that its input rank has changed because
its inputs/attributes should probably be updated. Currently we have such a case for a Roll operation.
"""
enabled = True
def pattern(self):
return dict(
nodes=[
('strided_slice_real', dict(op='StridedSlice')),
('strided_slice_imag', dict(op='StridedSlice')),
('complex', dict(op='Complex')),
],
edges=[
('strided_slice_real', 'complex', {'in': 0}),
('strided_slice_imag', 'complex', {'in': 1}),
])
def replace_sub_graph(self, graph: Graph, match: [dict, SubgraphMatch]):
strided_slice_real = match['strided_slice_real']
strided_slice_imag = match['strided_slice_imag']
real_input = strided_slice_real.in_port(0).get_source().node
imag_input = strided_slice_imag.in_port(0).get_source().node
if real_input.id != imag_input.id:
log.debug('The pattern does not correspond to operation for complex tensor. Different inputs.')
return
complex_node = match['complex']
for dst in complex_node.out_port(0).get_connection().get_destinations():
after_complex_node = dst.node
after_complex_node['input_rank_changed'] = True
complex_node.out_port(0).get_connection().set_source(strided_slice_real.in_port(0).get_source())

View File

@ -0,0 +1,38 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from extensions.ops.dft import DFT, IDFT
from mo.front.common.partial_infer.utils import int64_array
from mo.front.common.replacement import FrontReplacementSubgraph
from mo.front.tf.graph_utils import create_op_with_const_inputs
from mo.graph.graph import Graph, rename_nodes
class TFFFTToDFT(FrontReplacementSubgraph):
"""
This transformation converts the operation TFFFT into OpenVINO DFT (if the attribute 'is_inverse' is False),
or into OpenVINO IDFT (otherwise).
"""
enabled = True
def run_after(self):
from extensions.front.tf.RollRealImagPack import RollRealImagPack
return [RollRealImagPack]
def find_and_replace_pattern(self, graph: Graph):
for tf_fft in graph.get_op_nodes(op='TFFFT'):
tf_fft_name = tf_fft.soft_get('name', tf_fft.id)
num_of_dims = tf_fft.soft_get('num_of_dimensions', 1)
axes = int64_array(range(-num_of_dims, 0))
op = IDFT if tf_fft.soft_get('is_inverse', False) else DFT
dft_node = create_op_with_const_inputs(graph, op, {1: axes}, {'in_ports_count': 2},
tf_fft.in_port(0).get_source().node)
tf_fft.out_port(0).get_connection().set_source(dft_node.out_port(0))
rename_nodes([(tf_fft, tf_fft_name + '/to_be_removed'), (dft_node, tf_fft_name)])
if graph.graph['layout'] == 'NHWC':
dft_node['need_insert_transposes_for_dft'] = True

View File

@ -0,0 +1,71 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from extensions.ops.TFFFT import TFFFT
from mo.front.extractor import FrontExtractorOp
class FFT1DOpFrontExtractor(FrontExtractorOp):
op = 'FFT'
enabled = True
@classmethod
def extract(cls, node):
attrs = {'num_of_dimensions': 1, 'is_inverse': False}
TFFFT.update_node_stat(node, attrs)
return cls.enabled
class FFT2DOpFrontExtractor(FrontExtractorOp):
op = 'FFT2D'
enabled = True
@classmethod
def extract(cls, node):
attrs = {'num_of_dimensions': 2, 'is_inverse': False}
TFFFT.update_node_stat(node, attrs)
return cls.enabled
class FFT3DOpFrontExtractor(FrontExtractorOp):
op = 'FFT3D'
enabled = True
@classmethod
def extract(cls, node):
attrs = {'num_of_dimensions': 3, 'is_inverse': False}
TFFFT.update_node_stat(node, attrs)
return cls.enabled
class IFFT1DOpFrontExtractor(FrontExtractorOp):
op = 'IFFT'
enabled = True
@classmethod
def extract(cls, node):
attrs = {'num_of_dimensions': 1, 'is_inverse': True}
TFFFT.update_node_stat(node, attrs)
return cls.enabled
class IFFT2DOpFrontExtractor(FrontExtractorOp):
op = 'IFFT2D'
enabled = True
@classmethod
def extract(cls, node):
attrs = {'num_of_dimensions': 2, 'is_inverse': True}
TFFFT.update_node_stat(node, attrs)
return cls.enabled
class IFFT3DOpFrontExtractor(FrontExtractorOp):
op = 'IFFT3D'
enabled = True
@classmethod
def extract(cls, node):
attrs = {'num_of_dimensions': 3, 'is_inverse': True}
TFFFT.update_node_stat(node, attrs)
return cls.enabled

View File

@ -1,10 +1,12 @@
# Copyright (C) 2018-2021 Intel Corporation # Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import numpy as np
from extensions.middle.pass_separator import PostMiddleStart from extensions.middle.pass_separator import PostMiddleStart
from extensions.ops.transpose import Transpose from extensions.ops.transpose import Transpose
from mo.front.common.partial_infer.utils import int64_array
from mo.graph.graph import Graph, Node from mo.graph.graph import Graph, Node, Port
from mo.middle.replacement import MiddleReplacementPattern from mo.middle.replacement import MiddleReplacementPattern
from mo.ops.op import PermuteAttrs from mo.ops.op import PermuteAttrs
@ -159,3 +161,28 @@ def mark_as_correct_data_layout(node: Node):
for ind, port in node.out_ports().items(): for ind, port in node.out_ports().items():
mark_output_as_in_correct_layout(node, ind) mark_output_as_in_correct_layout(node, ind)
def insert_transpose(graph: Graph, input_port: Port, before_input=True):
from mo.front.tf.graph_utils import create_op_with_const_inputs
input_rank = len(input_port.data.get_shape())
if input_rank > 3:
if before_input:
axis_order = np.concatenate((int64_array([0]),
int64_array(list(range(2, input_rank))),
int64_array([1])))
source_node = input_port.get_source().node
transpose_name = source_node.soft_get('name', source_node.id) + '/TransposeToNHWC'
else:
axis_order = np.concatenate(
(int64_array([0]),
int64_array([input_rank - 1]),
int64_array(list(range(1, input_rank - 1)))))
transpose_name = input_port.node.soft_get('name', input_port.node.id) + '/TransposeToNCHW'
input_port.node['need_shape_inference'] = True
input_port.node['override_output_shape'] = True
transpose = create_op_with_const_inputs(graph, Transpose, {1: axis_order}, {'name': transpose_name})
input_port.get_connection().insert_node(transpose)
transpose['need_shape_inference'] = True
transpose['override_output_shape'] = True

View File

@ -0,0 +1,31 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from mo.graph.graph import Graph
from mo.ops.op import Op
class TFFFT(Op):
"""
This operation is intended to read TF operations FFT, FFT2D, FFT3D, IFFT, IFFT2D, IFFT3D.
The operation TFFFT has two attributes: an integer attribute num_of_dimensions and a boolean attribute is_inverse.
If an operation to read is FFT, FFT2D, or FFT3D, then the attribute 'is_inverse' is False, and True otherwise.
The attribute 'num_of_dimensions' is equal to number of transformed axes, i.e. 1 for FFT and IFFT, 2 for FFT2D and
IFFT2D, 3 for FFT3D and IFFT3D.
The transformation TFFFTToDFT converts the operation TFFFT into MO DFT (if the attribute 'is_inverse' is False),
or into MO IDFT (otherwise).
"""
op = 'TFFFT'
enabled = False
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
'op': self.op,
'out_ports_count': 1,
'in_ports_count': 1,
}
assert 'is_inverse' in attrs, 'Attribute is_inverse is not given for the operation TFFFT.'
assert 'num_of_dimensions' in attrs, 'Attribute num_of_dimensions is not given for the operation TFFFT.'
super().__init__(graph, mandatory_props, attrs)

View File

@ -0,0 +1,127 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from mo.front.common.partial_infer.utils import int64_array
from mo.graph.graph import Node, Graph
from mo.ops.op import Op
class FFTBase(Op):
enabled = False
op = None
version = 'opset7'
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
'out_ports_count': 1,
'in_ports_count': 3,
'version': self.version,
'infer': self.infer
}
super().__init__(graph, mandatory_props, attrs)
def infer(self, node: Node):
node_name = node.soft_get(node.name, node.id)
assert len([p for p in node.in_ports().values() if not p.disconnected()]) in [2, 3], \
'(I)DFT node {} must have 2 or 3 inputs'.format(node_name)
src_shape = node.in_port(0).data.get_shape()
assert src_shape is not None, 'The input data shape of (I)DFT node {} must not be None'.format(node_name)
assert src_shape[-1] == 2, \
'The last dimension of input shape of (I)DFT node {} should be equal to 2'.format(node_name)
input_rank = len(src_shape)
assert input_rank >= 2, 'The input rank of (I)DFT node {} should be greater or equal to 2'.format(node_name)
axes = FFTBase.get_axes(node)
assert input_rank >= len(axes) + 1, \
'The input rank must be greater than number of (I)DFT node {} axes'.format(node_name)
axes = FFTBase.canonicalize_axes(axes, input_rank)
assert (input_rank - 1) not in axes, '(I)DFT node {} axes cannot contain the last axis'.format(node_name)
assert len(set(axes)) == len(axes), '(I)DFT node {} axes must be unique.'.format(node_name)
output_shape = int64_array(src_shape)
if node.is_in_port_connected(2):
signal_size = FFTBase.get_signal_size(node)
signal_size = FFTBase.canonicalize_signal_size(signal_size, axes, src_shape)
output_shape[axes] = signal_size
node.out_port(0).data.set_shape(output_shape)
@staticmethod
def canonicalize_axes(axes, input_rank):
"""
FFT operation supports for negative axes to transform. More precisely, according to the FFT operation
specification, axes should be integers from -(r - 1) to (r - 2) inclusively, where r = rank(data).
A negative axis 'a' is interpreted as an axis 'r - 1 + a'. The reason is the following: real input
tensor of the shape [n_0, ..., n_{r - 1}, 2] is interpreted as a complex tensor with the shape
[n_0, ..., n_{r - 1}]. Hence, we need to 'canonicalize' axes using the formula 'r - 1 + a'.
:param axes: axes to canonicalize
:param input_rank: input tensor rank
:return: canonicalized axes
"""
result = axes.copy()
for i, axis in enumerate(axes):
if axis < 0:
result[i] = axis + input_rank - 1
return result
@staticmethod
def canonicalize_signal_size(signal_size, axes, input_shape):
result = signal_size.copy()
for i, axis in enumerate(axes):
size = signal_size[i]
if size == -1:
result[i] = input_shape[axis]
return result
@staticmethod
def get_axes(node: Node):
axes = node.in_port(1).get_source().data.get_value()
node_name = node.soft_get('name', node.id)
assert axes is not None, 'The input with axes is not constant for node {}'.format(node_name)
return int64_array(axes)
@staticmethod
def get_signal_size(node: Node):
src_shape = node.in_port(0).data.get_shape()
assert src_shape is not None
input_rank = len(src_shape)
if node.is_in_port_connected(2):
signal_size = node.in_port(2).get_source().data.get_value()
else:
axes = FFTBase.get_axes(node)
signal_size = [src_shape[: input_rank - 1][a] for a in axes]
node_name = node.soft_get('name', node.id)
assert signal_size is not None, 'The input with signal_size is not constant for node {}'.format(node_name)
return int64_array(signal_size)
class DFT(FFTBase):
op = 'DFT'
enabled = False
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
'type': self.op,
'op': self.op,
}
mandatory_props.update(attrs)
super().__init__(graph, mandatory_props)
class IDFT(FFTBase):
op = 'IDFT'
enabled = False
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
'type': self.op,
'op': self.op,
}
mandatory_props.update(attrs)
super().__init__(graph, mandatory_props)

View File

@ -0,0 +1,42 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from mo.front.common.partial_infer.utils import int64_array
from mo.graph.graph import Graph, Node
from mo.ops.op import Op
class MXFFT(Op):
"""
This operation is intended to read MxNet operations FFT and IFFT.
The operation MXFFT has one attribute: a boolean attribute is_inverse.
If an operation to read is FFT, then the attribute 'is_inverse' is False, and True otherwise.
The transformation MXFFTToDFT converts the operation MXFFT into MO DFT (if the attribute 'is_inverse'
is False), or into MO IDFT (otherwise).
"""
op = 'MXFFT'
enabled = False
def __init__(self, graph: Graph, attrs: dict):
mandatory_props = {
'op': self.op,
'out_ports_count': 1,
'in_ports_count': 1,
'infer': self.infer
}
assert 'is_inverse' in attrs, 'Attribute is_inverse is not given for the operation MXFFT.'
super().__init__(graph, mandatory_props, attrs)
def infer(self, node: Node):
node_name = node.soft_get('name', node.id)
input_shape = node.in_port(0).data.get_shape()
assert input_shape is not None, 'Input shape of MXFFT node {} must not be None'.format(node_name)
is_inverse = node.soft_get('is_inverse', False)
output_shape = input_shape.copy()
if is_inverse:
output_shape[-1] = output_shape[-1] // 2
else:
output_shape[-1] = output_shape[-1] * 2
node.out_port(0).data.set_shape(int64_array(output_shape))

View File

@ -8,7 +8,8 @@ from mo.ops.op import Op
class Roll(Op): class Roll(Op):
""" Roll operation that shifts elements of a tensor along specified axes. """
Roll operation that shifts elements of a tensor along specified axes.
""" """
op = 'Roll' op = 'Roll'
enabled = False enabled = False
@ -26,7 +27,6 @@ class Roll(Op):
class AttributedRoll(Op): class AttributedRoll(Op):
""" Roll operation that shifts elements of a tensor along specified axes. """ Roll operation that shifts elements of a tensor along specified axes.
This operation uses the same semantics as Roll but with shift and axes specified as attributes. This operation uses the same semantics as Roll but with shift and axes specified as attributes.
Shift and axes are specified as attributes in MxNet. Shift and axes are specified as attributes in MxNet.
""" """

View File

@ -10,6 +10,7 @@ import numpy as np
from extensions.middle.InsertLayoutPropagationTransposes import mark_input_as_in_correct_layout, \ from extensions.middle.InsertLayoutPropagationTransposes import mark_input_as_in_correct_layout, \
mark_output_as_in_correct_layout mark_output_as_in_correct_layout
from extensions.ops.activation_ops import Sigmoid from extensions.ops.activation_ops import Sigmoid
from extensions.ops.elementwise import Add, Less, Mul
from mo.front.common.partial_infer.utils import int64_array from mo.front.common.partial_infer.utils import int64_array
from mo.graph.graph import Node, Graph from mo.graph.graph import Node, Graph
from mo.ops.concat import Concat from mo.ops.concat import Concat
@ -185,3 +186,31 @@ def add_activation_function_after_node(graph: Graph, node: Node, activation_func
else: else:
raise Error('Unknown post-processing activation function "{}".'.format(activation_function)) raise Error('Unknown post-processing activation function "{}".'.format(activation_function))
return activation_node return activation_node
def add_constant_to_negative_values(node: Node, port_idx: int, added_value: np.array):
"""
This function adds the given values to negative elements of value from the given input port.
:param node: node with corrected values in the input port port_idx
:param port_idx: input port index for negative values
:param added_value: the value to add
:return: None
"""
negative_values_source = node.in_port(port_idx).get_source()
negative_values_node = node.in_port(port_idx).get_source().node
negative_values_node_name = negative_values_node.soft_get('name', negative_values_node.id)
graph = node.graph
less_node = create_op_with_const_inputs(graph, Less,
{1: np.array(0, dtype=added_value.dtype)},
{'name': negative_values_node_name + '/Less'})
mul_node = create_op_with_const_inputs(graph, Mul, {1: added_value}, {'name': negative_values_node_name + '/Mul'})
node.in_port(port_idx).get_connection().set_destination(less_node.in_port(0))
less_node.out_port(0).connect(mul_node.in_port(0))
add_node = Add(graph, {}).create_node()
mul_node.out_port(0).connect(add_node.in_port(1))
negative_values_source.connect(add_node.in_port(0))
add_node.out_port(0).connect(node.in_port(port_idx))

View File

@ -107,9 +107,40 @@ def _fuse_mul(graph: Graph, node: Node, fuse_nodes: list, backward: bool = True)
w_mul = node.copy_node({'name': mul_name, 'in_ports_count': len(node.in_ports()), w_mul = node.copy_node({'name': mul_name, 'in_ports_count': len(node.in_ports()),
'out_ports_count': len(node.out_ports()), 'can_be_fused': False}) 'out_ports_count': len(node.out_ports()), 'can_be_fused': False})
w_mul.in_port(const_port.idx).connect(mul_const.out_port(0)) w_mul.in_port(const_port.idx).connect(mul_const.out_port(0))
w_const = weights_port.get_source()
weights_port.get_connection().set_source(w_mul.out_port(0)) r"""
w_const.connect(w_mul.in_port(tensor_port.idx)) In this transformation we remove Mul or Div node (node) that goes after fuse_node and
create new Mul node (w_mul), connect it with the corrected const value (mul_const) and
insert w_mul before the fuse_node. So the input data of fuse_node becomes different.
For this reason we need to use set_destination from previous operation to w_mul which
guaranties that data node will be reused on previous_op -> w_mul connection and its
attributes won't be copied to the data node of w_mul -> fuse_node connection.
BEFORE AFTER
previous_op mul_const
\ /
previous_op w_mul
| |
fuse_node const fuse_node
\ / |
node next_op
|
next_op
"""
weights_port.get_connection().set_destination(w_mul.in_port(tensor_port.idx))
w_mul.out_port(0).connect(weights_port)
# As fusing is applied to convolutions it is important to keep 'permutation' and 'input_permutation' attributes
# which were obtained from original model. These attributes are stored on the incoming edge to the operation
# node and during the reconnection they are moved to the new connection. But during reconnection in this
# transformation these attributes are moved to the previous node. So we need manually set them at the
# incoming edge to fuse_node.
in_edge = w_mul.in_edge(tensor_port.idx)
if 'permutation' in in_edge:
fuse_node.in_edge(weights_port.idx)['permutation'] = in_edge['permutation']
if 'input_permutation' in in_edge:
fuse_node.in_edge(weights_port.idx)['input_permutation'] = in_edge['input_permutation']
# If we fuse in backward direction we should multiply biases if they exists # If we fuse in backward direction we should multiply biases if they exists
if backward and len(fuse_node.in_ports()) == 3 and not fuse_node.in_port(2).disconnected() and \ if backward and len(fuse_node.in_ports()) == 3 and not fuse_node.in_port(2).disconnected() and \

View File

@ -133,12 +133,18 @@ def compare_graphs(graph: Graph, graph_ref: Graph, last_node: str, last_node_ref
if in_node.id not in checked_nodes_ref and in_node.id not in q_ref: if in_node.id not in checked_nodes_ref and in_node.id not in q_ref:
q_ref.append(in_node.id) q_ref.append(in_node.id)
out_nodes = node.out_nodes().values() if node.kind == 'op' else sorted_by_name(node.out_nodes()) if node.kind == 'op':
out_nodes = sorted_by_name([Node(graph, v) for v, _ in node.get_outputs()])
else:
out_nodes = sorted_by_name(node.out_nodes())
for out_node in out_nodes: for out_node in out_nodes:
if out_node.id not in checked_nodes and out_node.id not in q: if out_node.id not in checked_nodes and out_node.id not in q:
q.append(out_node.id) q.append(out_node.id)
out_nodes = node_ref.out_nodes().values() if node_ref.kind == 'op' else sorted_by_name(node_ref.out_nodes()) if node_ref.kind == 'op':
out_nodes = sorted_by_name([Node(graph_ref, v) for v, _ in node_ref.get_outputs()])
else:
out_nodes = sorted_by_name(node_ref.out_nodes())
for out_node in out_nodes: for out_node in out_nodes:
if out_node.id not in checked_nodes_ref and out_node.id not in q_ref: if out_node.id not in checked_nodes_ref and out_node.id not in q_ref:
q_ref.append(out_node.id) q_ref.append(out_node.id)

View File

@ -11,6 +11,7 @@ from extensions.middle.FakeSplitOutputs import AddFakeOutputsToSplit
from extensions.ops.Cast import Cast from extensions.ops.Cast import Cast
from extensions.ops.ReduceOps import ReduceOp from extensions.ops.ReduceOps import ReduceOp
from extensions.ops.activation_ops import Activation from extensions.ops.activation_ops import Activation
from extensions.ops.dft import FFTBase
from extensions.ops.elementwise import Elementwise, UnaryElementwise, LogicalElementwise, BiasAdd, Div, Mul, Pow, Sub from extensions.ops.elementwise import Elementwise, UnaryElementwise, LogicalElementwise, BiasAdd, Div, Mul, Pow, Sub
from extensions.ops.embedding_bag import EmbeddingBagBase from extensions.ops.embedding_bag import EmbeddingBagBase
from extensions.ops.loop import Loop from extensions.ops.loop import Loop
@ -60,7 +61,7 @@ def collect_ops(path: str):
import_by_path(os.path.join(path, 'mo', 'ops'), ['mo', 'ops']) import_by_path(os.path.join(path, 'mo', 'ops'), ['mo', 'ops'])
import_by_path(os.path.join(path, 'extensions', 'ops'), ['extensions', 'ops']) import_by_path(os.path.join(path, 'extensions', 'ops'), ['extensions', 'ops'])
update_registration(classes=[Op, Activation, Elementwise, UnaryElementwise, LogicalElementwise, update_registration(classes=[Op, Activation, Elementwise, UnaryElementwise, LogicalElementwise,
EmbeddingBagBase, ReduceOp, Scatter, ScatterNDBase], EmbeddingBagBase, ReduceOp, Scatter, ScatterNDBase, FFTBase],
enabled_transforms=[], disabled_transforms=[]) enabled_transforms=[], disabled_transforms=[])

View File

@ -0,0 +1,49 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import unittest
from extensions.back.ReverseInputChannels import ReverseChannelsPropagationUp
from mo.graph.graph import Node, Graph
from unit_tests.utils.graph import build_graph, result, connect, regular_op_with_shaped_data
nodes = {
**regular_op_with_shaped_data('placeholder1', [1, 3, 10, 10], {'type': 'Parameter'}),
**regular_op_with_shaped_data('placeholder2', [1, 1, 1, 1], {'type': 'Parameter'}),
**regular_op_with_shaped_data('mul', [1, 3, 10, 10], {'type': 'Multiply'}),
**regular_op_with_shaped_data('reverse_channels', [1, 3, 10, 10], {'type': 'ReverseChannels', 'axis': 1}),
**result('result'),
}
class ReverseInputChannelsTest(unittest.TestCase):
def check_graph_attrs(self, graph: Graph, parameter_node_names: list):
for node in graph.get_op_nodes():
if node.soft_get('name') in parameter_node_names:
self.assertTrue(node.soft_get('type') == 'Parameter')
out_node = node.out_node(0)
self.assertTrue(out_node['fw_tensor_debug_info'] == ['fw_name', 0])
else:
for idx in node.out_nodes():
out_node = node.out_node(idx)
self.assertFalse('fw_tensor_debug_info' in out_node)
def set_graph_attrs(self, graph: Graph, parameter_node_names: list):
for node in graph.get_op_nodes():
if node.soft_get('name') in parameter_node_names:
self.assertTrue(node.soft_get('type') == 'Parameter')
out_node = node.out_node(0)
out_node['fw_tensor_debug_info'] = ['fw_name', 0]
def test_lift_up_through_eltwise(self):
graph = build_graph(nodes, [*connect('placeholder1', '0:mul'), *connect('placeholder2', '1:mul'),
*connect('mul', 'reverse_channels'), *connect('reverse_channels', 'result')])
self.set_graph_attrs(graph, ['placeholder1', 'placeholder2'])
node = Node(graph, 'mul')
reverse_channels = Node(graph, 'reverse_channels')
ReverseChannelsPropagationUp.lift_up_through_eltwise(node, reverse_channels)
self.check_graph_attrs(graph, ['placeholder1', 'placeholder2'])

View File

@ -0,0 +1,84 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import unittest
from extensions.back.TransposeDFT import TransposeDFT
from mo.front.common.partial_infer.utils import int64_array
from mo.utils.ir_engine.compare_graphs import compare_graphs
from unit_tests.utils.graph import build_graph, result, regular_op_with_shaped_data, valued_const_with_data, connect, \
regular_op_with_empty_data
dft_graph_node_attrs = {
**regular_op_with_shaped_data('placeholder', [8, 2, 40, 56], {'type': 'Parameter', 'op': 'Parameter'}),
**valued_const_with_data('axes', int64_array([-2, -1])),
**regular_op_with_shaped_data('dft', [8, 2, 40, 56], {'op': 'DFT', 'need_insert_transposes_for_dft': True}),
**regular_op_with_shaped_data('abs', [8, 2, 40, 56], {'type': 'Abs', 'op': 'Abs'}),
**result(),
}
dft_graph_edges = [
*connect('placeholder', '0:dft'),
*connect('axes', '1:dft'),
*connect('dft', 'abs'),
*connect('abs', 'output'),
]
transposed_dft_graph_node_attrs = {
**regular_op_with_shaped_data('placeholder', [8, 2, 40, 56], {'type': 'Parameter', 'op': 'Parameter'}),
**regular_op_with_empty_data('transpose_before',
{'type': 'Transpose', 'op': 'Transpose', 'need_shape_inference': True}),
**valued_const_with_data('transpose_before_axis_const', int64_array([0, 2, 3, 1])),
**regular_op_with_empty_data('transpose_after',
{'type': 'Transpose', 'op': 'Transpose', 'need_shape_inference': True}),
**valued_const_with_data('transpose_after_axis_const', int64_array([0, 3, 1, 2])),
**valued_const_with_data('dft_axes', int64_array([-2, -1])),
**regular_op_with_shaped_data('dft', [8, 2, 40, 56], {'op': 'DFT', 'need_insert_transposes_for_dft': True}),
**regular_op_with_shaped_data('abs', [8, 2, 40, 56], {'type': 'Abs', 'op': 'Abs'}),
**result(),
}
transposed_dft_graph_edges = [
*connect('placeholder', '0:transpose_before'),
*connect('transpose_before_axis_const', '1:transpose_before'),
*connect('transpose_before', '0:dft'),
*connect('dft_axes', '1:dft'),
*connect('dft', '0:transpose_after'),
*connect('transpose_after_axis_const', '1:transpose_after'),
*connect('transpose_after', 'abs'),
*connect('abs', 'output'),
]
nontransposed_dft_graph_node_attrs = {
**regular_op_with_shaped_data('placeholder', [8, 2, 40, 56], {'type': 'Parameter', 'op': 'Parameter'}),
**valued_const_with_data('axes', int64_array([-2, -1])),
**regular_op_with_shaped_data('dft', [8, 2, 40, 56], {'op': 'DFT'}),
**regular_op_with_shaped_data('abs', [8, 2, 40, 56], {'type': 'Abs', 'op': 'Abs'}),
**result(),
}
nontransposed_dft_graph_edges = [
*connect('placeholder', '0:dft'),
*connect('axes', '1:dft'),
*connect('dft', 'abs'),
*connect('abs', 'output'),
]
class TransposeDFTTest(unittest.TestCase):
def test_dft_transpose(self):
graph = build_graph(nodes_attrs=dft_graph_node_attrs, edges=dft_graph_edges)
ref_graph = build_graph(nodes_attrs=transposed_dft_graph_node_attrs, edges=transposed_dft_graph_edges)
graph.graph['fw'] = 'tf'
TransposeDFT().find_and_replace_pattern(graph)
(flag, resp) = compare_graphs(graph, ref_graph, 'output')
self.assertTrue(flag, resp)
def test_dft_nontranspose(self):
graph = build_graph(nodes_attrs=nontransposed_dft_graph_node_attrs, edges=nontransposed_dft_graph_edges)
ref_graph = build_graph(nodes_attrs=nontransposed_dft_graph_node_attrs, edges=nontransposed_dft_graph_edges)
TransposeDFT().find_and_replace_pattern(graph)
(flag, resp) = compare_graphs(graph, ref_graph, 'output')
self.assertTrue(flag, resp)

View File

@ -0,0 +1,191 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import unittest
from generator import generator, generate
from extensions.front.mxnet.MXFFTToDFT import MXFFTToDFT
from mo.front.common.partial_infer.utils import int64_array
from mo.utils.ir_engine.compare_graphs import compare_graphs
from unit_tests.utils.graph import build_graph
fft_graph_node_attrs = {
'placeholder': {'shape': int64_array([3, 100, 100]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'fft': {'kind': 'op', 'op': 'MXFFT', 'is_inverse': False},
'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
}
fft_graph_edges = [
('placeholder', 'fft', {'in': 0}),
('fft', 'abs'),
('abs', 'output'),
]
ref_converted_fft_graph_node_attrs = {
'placeholder': {'shape': int64_array([3, 100, 100]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'rank': {'kind': 'op', 'op': 'Rank'},
'unsqueeze': {'type': 'Unsqueeze', 'kind': 'op', 'op': 'Unsqueeze'},
'unsqueeze_axis': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([1]), 'value': int64_array([-1])
},
'one': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(1)
},
'add': {'type': 'Add', 'kind': 'op', 'op': 'Add'},
'zero1': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(0)
},
'broadcast1': {'type': 'Broadcast', 'kind': 'op', 'op': 'Broadcast'},
'one2': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(1)
},
'zero2': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(0)
},
'scatter': {'type': 'ScatterUpdate', 'kind': 'op', 'op': 'ScatterUpdate'},
'pad': {'type': 'Pad', 'kind': 'op', 'op': 'Pad', 'mode': 'constant'},
'fft_axes': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([1]), 'value': int64_array([-1])
},
'fft': {'kind': 'op', 'op': 'DFT', 'type': 'DFT'},
'one3': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(1)
},
'sub': {'type': 'Subtract', 'kind': 'op', 'op': 'Sub'},
'zero3': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(0)
},
'broadcast2': {'type': 'Broadcast', 'kind': 'op', 'op': 'Broadcast'},
'm1_2': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([-1, 2])
},
'concat': {'type': 'Concat', 'kind': 'op', 'op': 'Concat', 'axis': 0},
'reshape': {'kind': 'op', 'op': 'Reshape', 'type': 'Reshape'},
'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
}
ref_converted_fft_graph_edges = [
('placeholder', 'rank', {'in': 0, 'out': 0}),
('placeholder', 'unsqueeze', {'in': 0, 'out': 0}),
('unsqueeze_axis', 'unsqueeze', {'in': 1, 'out': 0}),
('rank', 'add', {'in': 0, 'out': 0}),
('one', 'add', {'in': 1, 'out': 0}),
('zero1', 'broadcast1', {'in': 0, 'out': 0}),
('add', 'broadcast1', {'in': 1, 'out': 0}),
('broadcast1', 'scatter', {'in': 0, 'out': 0}),
('rank', 'scatter', {'in': 1, 'out': 0}),
('one2', 'scatter', {'in': 2, 'out': 0}),
('zero2', 'scatter', {'in': 3, 'out': 0}),
('unsqueeze', 'pad', {'in': 0, 'out': 0}),
('broadcast1', 'pad', {'in': 1, 'out': 0}),
('scatter', 'pad', {'in': 2, 'out': 0}),
('pad', 'fft', {'in': 0, 'out': 0}),
('fft_axes', 'fft', {'in': 1, 'out': 0}),
('rank', 'sub', {'in': 0, 'out': 0}),
('one3', 'sub', {'in': 1, 'out': 0}),
('zero3', 'broadcast2', {'in': 0, 'out': 0}),
('sub', 'broadcast2', {'in': 1, 'out': 0}),
('broadcast2', 'concat', {'in': 0, 'out': 0}),
('m1_2', 'concat', {'in': 1, 'out': 0}),
('fft', 'reshape', {'in': 0, 'out': 0}),
('concat', 'reshape', {'in': 1, 'out': 0}),
('reshape', 'abs'),
('abs', 'output'),
]
ref_converted_ifft_graph_node_attrs = {
'placeholder': {'shape': int64_array([3, 100, 100]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'rank': {'kind': 'op', 'op': 'Rank'},
'subtracted_one': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(1)
},
'sub': {'type': 'Subtract', 'kind': 'op', 'op': 'Sub'},
'broadcast': {'type': 'Broadcast', 'kind': 'op', 'op': 'Broadcast'},
'broadcasted_value': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(0)
},
'new_shape': {'type': 'Concat', 'kind': 'op', 'op': 'Concat', 'axis': 0},
'new_shape_const': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([-1, 2])
},
'reshape': {'kind': 'op', 'op': 'Reshape', 'type': 'Reshape'},
'fft': {'kind': 'op', 'op': 'IDFT', 'type': 'IDFT'},
'fft_axes': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([1]), 'value': int64_array([-1])
},
'split': {'kind': 'op', 'op': 'Split', 'type': 'Split', 'num_splits': 2},
'split_axes': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(-1)
},
'squeeze': {'kind': 'op', 'op': 'Squeeze', 'type': 'Squeeze'},
'squeeze_axes': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([1]), 'value': int64_array([-1])
},
'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
}
ref_converted_ifft_graph_edges = [
('placeholder', 'rank', {'out': 0}),
('placeholder', 'reshape', {'out': 0}),
('rank', 'sub'),
('subtracted_one', 'sub'),
('broadcasted_value', 'broadcast'),
('sub', 'broadcast'),
('broadcast', 'new_shape'),
('new_shape_const', 'new_shape'),
('new_shape', 'reshape'),
('reshape', 'fft'),
('fft_axes', 'fft'),
('fft', 'split'),
('split_axes', 'split'),
('split', 'squeeze', {'out': 0}),
('squeeze_axes', 'squeeze'),
('squeeze', 'abs'),
('abs', 'output'),
]
@generator
class MXFFTToDFTTest(unittest.TestCase):
@generate(*[int64_array([3, 100, 100, 8]), int64_array([5, 60])])
def test_fft_replacement(self, input_shape):
graph = build_graph(nodes_attrs=fft_graph_node_attrs,
edges=fft_graph_edges,
update_attributes={
'placeholder': {'shape': input_shape}
})
graph.stage = 'front'
MXFFTToDFT().find_and_replace_pattern(graph)
ref_graph = build_graph(nodes_attrs=ref_converted_fft_graph_node_attrs,
edges=ref_converted_fft_graph_edges,
update_attributes={
'placeholder': {'shape': input_shape}
})
(flag, resp) = compare_graphs(graph, ref_graph, 'output')
self.assertTrue(flag, resp)
@generate(*[int64_array([3, 100, 100, 8]), int64_array([5, 60])])
def test_ifft_replacement(self, input_shape):
graph = build_graph(nodes_attrs=fft_graph_node_attrs,
edges=fft_graph_edges,
update_attributes={
'placeholder': {'shape': input_shape},
'fft': {'is_inverse': True}
})
graph.stage = 'front'
MXFFTToDFT().find_and_replace_pattern(graph)
ref_graph = build_graph(nodes_attrs=ref_converted_ifft_graph_node_attrs,
edges=ref_converted_ifft_graph_edges,
update_attributes={
'placeholder': {'shape': input_shape}
})
(flag, resp) = compare_graphs(graph, ref_graph, 'output')
self.assertTrue(flag, resp)

View File

@ -0,0 +1,74 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import unittest
import numpy as np
from extensions.front.tf.ComplexAbsAfterComplex import ComplexAbsAfterComplex
from mo.front.common.partial_infer.utils import int64_array
from mo.utils.ir_engine.compare_graphs import compare_graphs
from unit_tests.utils.graph import build_graph
graph_node_attrs = {
'placeholder_0': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'placeholder_1': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'complex': {'kind': 'op', 'op': 'Complex'},
'complex_abs': {'kind': 'op', 'op': 'ComplexAbs'},
'relu': {'type': 'ReLU', 'kind': 'op', 'op': 'ReLU'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
}
graph_edges = [
('placeholder_0', 'complex', {'in': 0}),
('placeholder_1', 'complex', {'in': 1}),
('complex', 'complex_abs', {'in': 0}),
('complex_abs', 'relu'),
('relu', 'output'),
]
ref_graph_node_attrs = {
'placeholder_0': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'placeholder_1': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'pow0_const': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': np.float32(2.0)
},
'pow1_const': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': np.float32(2.0)
},
'pow0': {'type': 'Power', 'kind': 'op', 'op': 'Pow'},
'pow1': {'type': 'Power', 'kind': 'op', 'op': 'Pow'},
'add': {'type': 'Add', 'kind': 'op', 'op': 'Add'},
'sqrt_const': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': np.float32(0.5)
},
'sqrt': {'type': 'Power', 'kind': 'op', 'op': 'Pow'},
'relu': {'type': 'ReLU', 'kind': 'op', 'op': 'ReLU'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
}
ref_graph_edges = [
('placeholder_0', 'pow0', {'in': 0}),
('placeholder_1', 'pow1', {'in': 0}),
('pow0_const', 'pow0', {'in': 1}),
('pow1_const', 'pow1', {'in': 1}),
('pow0', 'add', {'in': 0}),
('pow1', 'add', {'in': 1}),
('add', 'sqrt', {'in': 0}),
('sqrt_const', 'sqrt', {'in': 1}),
('sqrt', 'relu'),
('relu', 'output'),
]
class ComplexAbsAfterComplexTest(unittest.TestCase):
def test_replacement(self):
graph = build_graph(nodes_attrs=graph_node_attrs, edges=graph_edges)
graph.stage = 'front'
ComplexAbsAfterComplex().find_and_replace_pattern(graph)
ref_graph = build_graph(nodes_attrs=ref_graph_node_attrs, edges=ref_graph_edges)
(flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
self.assertTrue(flag, resp)

View File

@ -0,0 +1,66 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import unittest
import numpy as np
from extensions.front.tf.ComplexAbs import ComplexAbs
from mo.front.common.partial_infer.utils import int64_array
from mo.utils.ir_engine.compare_graphs import compare_graphs
from unit_tests.utils.graph import build_graph
graph_node_attrs = {
'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'complex_abs': {'kind': 'op', 'op': 'ComplexAbs'},
'relu': {'type': 'ReLU', 'kind': 'op', 'op': 'ReLU'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
}
graph_edges = [
('placeholder', 'complex_abs'),
('complex_abs', 'relu'),
('relu', 'output'),
]
ref_graph_node_attrs = {
'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'pow2_const': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': np.float32(2.0)
},
'pow2': {'type': 'Power', 'kind': 'op', 'op': 'Pow'},
'sum_axis': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': int64_array(-1)
},
'sum': {'type': 'ReduceSum', 'kind': 'op', 'op': 'ReduceSum'},
'sqrt_const': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': np.float32(0.5)
},
'sqrt': {'type': 'Power', 'kind': 'op', 'op': 'Pow'},
'relu': {'type': 'ReLU', 'kind': 'op', 'op': 'ReLU'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
}
ref_graph_edges = [
('placeholder', 'pow2', {'in': 0}),
('pow2_const', 'pow2', {'in': 1}),
('sum_axis', 'sum', {'in': 1}),
('pow2', 'sum', {'in': 0}),
('sum', 'sqrt', {'in': 0}),
('sqrt_const', 'sqrt', {'in': 1}),
('sqrt', 'relu'),
('relu', 'output'),
]
class ComplexAbsTest(unittest.TestCase):
def test_replacement(self):
graph = build_graph(nodes_attrs=graph_node_attrs, edges=graph_edges)
graph.stage = 'front'
ComplexAbs().find_and_replace_pattern(graph)
ref_graph = build_graph(nodes_attrs=ref_graph_node_attrs, edges=ref_graph_edges)
(flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
self.assertTrue(flag, resp)

View File

@ -0,0 +1,89 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import unittest
from extensions.front.tf.CorrectRollAxes import CorrectRollAxes
from mo.front.common.partial_infer.utils import int64_array
from mo.utils.ir_engine.compare_graphs import compare_graphs
from unit_tests.utils.graph import build_graph
graph_node_attrs = {
'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'roll': {'kind': 'op', 'op': 'Roll', 'type': 'Roll', 'input_rank_changed': True},
'roll_shift': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([50, 50])
},
'roll_axes': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([-2, -1])
},
'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
}
graph_edges = [
('placeholder', 'roll', {'in': 0}),
('roll', 'abs'),
('abs', 'output'),
('roll_shift', 'roll', {'in': 1}),
('roll_axes', 'roll', {'in': 2}),
]
ref_graph_node_attrs = {
'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'roll': {'kind': 'op', 'op': 'Roll', 'type': 'Roll'},
'roll_shift': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([50, 50])
},
'roll_axes': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([-2, -1])
},
'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
'add': {'type': 'Add', 'kind': 'op', 'op': 'Add'},
'mul': {'type': 'Multiply', 'kind': 'op', 'op': 'Mul'},
'less': {'type': 'Less', 'kind': 'op', 'op': 'Less'},
'zero': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(0)
},
'minus_one': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(-1)
},
}
ref_graph_edges = [
('placeholder', 'roll', {'out': 0, 'in': 0}),
('roll', 'abs'),
('abs', 'output'),
('roll_shift', 'roll', {'in': 1}),
('mul', 'add', {'in': 1}),
('add', 'roll', {'in': 2}),
('zero', 'less', {'in': 1}),
('minus_one', 'mul', {'in': 1}),
('less', 'mul', {'in': 0}),
('roll_axes', 'less', {'out': 0, 'in': 0}),
('roll_axes', 'add', {'out': 0, 'in': 0}),
]
class CorrectRollAxesTest(unittest.TestCase):
def test_replacement(self):
graph = build_graph(nodes_attrs=graph_node_attrs, edges=graph_edges)
graph.stage = 'front'
CorrectRollAxes().find_and_replace_pattern(graph)
ref_graph = build_graph(nodes_attrs=ref_graph_node_attrs, edges=ref_graph_edges)
(flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
self.assertTrue(flag, resp)
def test_nonreplacement(self):
graph = build_graph(nodes_attrs=graph_node_attrs, edges=graph_edges,
update_attributes={'roll': {'input_rank_changed': False}})
graph.stage = 'front'
CorrectRollAxes().find_and_replace_pattern(graph)
ref_graph = build_graph(nodes_attrs=graph_node_attrs, edges=graph_edges,
update_attributes={'roll': {'input_rank_changed': False}})
(flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
self.assertTrue(flag, resp)

View File

@ -0,0 +1,88 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import unittest
from extensions.front.tf.RollRealImagPack import RollRealImagPack
from mo.front.common.partial_infer.utils import int64_array
from mo.utils.ir_engine.compare_graphs import compare_graphs
from unit_tests.utils.graph import build_graph
graph_node_attrs = {
'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'unroll': {'kind': 'op', 'op': 'Roll', 'type': 'Roll'},
'real': {'kind': 'op', 'op': 'Real'},
'imag': {'kind': 'op', 'op': 'Imag'},
'pack': {'kind': 'op', 'op': 'Pack'},
'unroll_shift': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([50, 50])
},
'unroll_axes': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([-2, -1])
},
'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
}
graph_edges = [
('placeholder', 'unroll', {'in': 0}),
('unroll', 'real', {'out': 0, 'in': 0}),
('unroll', 'imag', {'out': 0, 'in': 0}),
('real', 'pack', {'in': 0}),
('imag', 'pack', {'in': 1}),
('pack', 'abs'),
('abs', 'output'),
('unroll_shift', 'unroll', {'in': 1}),
('unroll_axes', 'unroll', {'in': 2}),
]
ref_graph_node_attrs = {
'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'unroll': {'kind': 'op', 'op': 'Roll', 'type': 'Roll'},
'unroll_shift': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([50, 50])
},
'unroll_axes': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([-2, -1])
},
'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
'add': {'type': 'Add', 'kind': 'op', 'op': 'Add'},
'mul': {'type': 'Multiply', 'kind': 'op', 'op': 'Mul'},
'less': {'type': 'Less', 'kind': 'op', 'op': 'Less'},
'zero': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(0)
},
'minus_one': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([]), 'value': int64_array(-1)
},
}
ref_graph_edges = [
('placeholder', 'unroll', {'out': 0, 'in': 0}),
('unroll', 'abs'),
('abs', 'output'),
('unroll_shift', 'unroll', {'in': 1}),
('unroll_axes', 'unroll', {'in': 2}),
('mul', 'add', {'in': 1}),
('add', 'unroll', {'in': 2}),
('zero', 'less', {'in': 1}),
('minus_one', 'mul', {'in': 1}),
('less', 'mul', {'in': 0}),
('unroll_axes', 'less', {'out': 0, 'in': 0}),
('unroll_axes', 'add', {'out': 0, 'in': 0}),
]
class RollRealImagPackTest(unittest.TestCase):
def test_replacement(self):
graph = build_graph(nodes_attrs=graph_node_attrs, edges=graph_edges)
graph.stage = 'front'
RollRealImagPack().find_and_replace_pattern(graph)
ref_graph = build_graph(nodes_attrs=ref_graph_node_attrs, edges=ref_graph_edges)
(flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
self.assertTrue(flag, resp)

View File

@ -0,0 +1,143 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import unittest
from extensions.front.tf.SSliceComplex import SSliceComplex
from mo.front.common.partial_infer.utils import int64_array
from mo.utils.ir_engine.compare_graphs import compare_graphs
from unit_tests.utils.graph import build_graph
graph_node_attrs = {
'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'strided_slice_real': {
'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice', 'begin_mask': int64_array([1]),
'end_mask': int64_array([1]), 'ellipsis_mask': int64_array([1]), 'new_axis_mask': int64_array([0]),
'shrink_axis_mask': int64_array([0, 1]),
},
'strided_slice_imag': {
'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice', 'begin_mask': int64_array([1]),
'end_mask': int64_array([1]), 'ellipsis_mask': int64_array([1]), 'new_axis_mask': int64_array([0]),
'shrink_axis_mask': int64_array([0, 1]),
},
'complex': {'kind': 'op', 'op': 'Complex'},
'real_begin': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 0])
},
'imag_begin': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 1])
},
'real_end': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 1])
},
'imag_end': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 2])
},
'real_strides': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([1, 1])
},
'imag_strides': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([1, 1])
},
'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
}
graph_edges = [
('placeholder', 'strided_slice_real', {'out': 0, 'in': 0}),
('placeholder', 'strided_slice_imag', {'out': 0, 'in': 0}),
('strided_slice_real', 'complex', {'in': 0}),
('strided_slice_imag', 'complex', {'in': 1}),
('complex', 'abs'),
('abs', 'output'),
('real_begin', 'strided_slice_real', {'in': 1}),
('imag_begin', 'strided_slice_imag', {'in': 1}),
('real_end', 'strided_slice_real', {'in': 2}),
('imag_end', 'strided_slice_imag', {'in': 2}),
('real_strides', 'strided_slice_real', {'in': 3}),
('imag_strides', 'strided_slice_imag', {'in': 3}),
]
ref_graph_node_attrs = {
'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
}
ref_graph_edges = [
('placeholder', 'abs'),
('abs', 'output'),
]
non_transformed_graph_node_attrs = {
'placeholder_0': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'placeholder_1': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'strided_slice_real': {
'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice', 'begin_mask': int64_array([1]),
'end_mask': int64_array([1]), 'ellipsis_mask': int64_array([1]), 'new_axis_mask': int64_array([0]),
'shrink_axis_mask': int64_array([0, 1]),
},
'strided_slice_imag': {
'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice', 'begin_mask': int64_array([1]),
'end_mask': int64_array([1]), 'ellipsis_mask': int64_array([1]), 'new_axis_mask': int64_array([0]),
'shrink_axis_mask': int64_array([0, 1]),
},
'complex': {'kind': 'op', 'op': 'Complex'},
'real_begin': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 0])
},
'imag_begin': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 1])
},
'real_end': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 1])
},
'imag_end': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([0, 2])
},
'real_strides': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([1, 1])
},
'imag_strides': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([1, 1])
},
'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
}
non_transformed_graph_edges = [
('placeholder_0', 'strided_slice_real', {'out': 0, 'in': 0}),
('placeholder_1', 'strided_slice_imag', {'out': 0, 'in': 0}),
('strided_slice_real', 'complex', {'in': 0}),
('strided_slice_imag', 'complex', {'in': 1}),
('complex', 'abs'),
('abs', 'output'),
('real_begin', 'strided_slice_real', {'in': 1}),
('imag_begin', 'strided_slice_imag', {'in': 1}),
('real_end', 'strided_slice_real', {'in': 2}),
('imag_end', 'strided_slice_imag', {'in': 2}),
('real_strides', 'strided_slice_real', {'in': 3}),
('imag_strides', 'strided_slice_imag', {'in': 3}),
]
class SSliceComplexTest(unittest.TestCase):
def test_replacement(self):
graph = build_graph(nodes_attrs=graph_node_attrs, edges=graph_edges)
graph.stage = 'front'
SSliceComplex().find_and_replace_pattern(graph)
ref_graph = build_graph(nodes_attrs=ref_graph_node_attrs, edges=ref_graph_edges)
(flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
self.assertTrue(flag, resp)
def test_nonreplacement(self):
graph = build_graph(nodes_attrs=non_transformed_graph_node_attrs, edges=non_transformed_graph_edges)
ref_graph = build_graph(nodes_attrs=non_transformed_graph_node_attrs, edges=non_transformed_graph_edges)
graph.stage = 'front'
SSliceComplex().find_and_replace_pattern(graph)
(flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
self.assertTrue(flag, resp)

View File

@ -0,0 +1,72 @@
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import unittest
from generator import generator, generate
from extensions.front.tf.TFFFTToDFT import TFFFTToDFT
from mo.front.common.partial_infer.utils import int64_array
from mo.utils.ir_engine.compare_graphs import compare_graphs
from unit_tests.utils.graph import build_graph
dft_graph_node_attrs = {
'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'fft': {'kind': 'op', 'op': 'TFFFT', 'num_of_dimensions': 2, 'is_inverse': False},
'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
}
dft_graph_edges = [
('placeholder', 'fft', {'in': 0}),
('fft', 'abs'),
('abs', 'output'),
]
ref_dft_graph_node_attrs = {
'placeholder': {'shape': int64_array([3, 100, 100, 2]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
'fft': {'kind': 'op', 'op': 'DFT'},
'abs': {'type': 'Abs', 'kind': 'op', 'op': 'Abs'},
'output': {'type': None, 'value': None, 'kind': 'op', 'op': 'Result'},
'fft_axes': {
'type': 'Const', 'kind': 'op', 'op': 'Const', 'shape': int64_array([2]), 'value': int64_array([-2, -1])
},
}
ref_dft_graph_edges = [
('placeholder', 'fft', {'in': 0}),
('fft', 'abs'),
('abs', 'output'),
('fft_axes', 'fft', {'in': 1}),
]
@generator
class TFFFTToDFTTest(unittest.TestCase):
@generate(*[(2, False, 'DFT', int64_array([-2, -1])),
(2, True, 'IDFT', int64_array([-2, -1])),
(1, False, 'DFT', int64_array([-1])),
(1, True, 'IDFT', int64_array([-1])),
(3, False, 'DFT', int64_array([-3, -2, -1])),
(3, True, 'IDFT', int64_array([-3, -2, -1]))])
def test_replacement(self, num_of_dimensions, is_inverse, dft_type, fft_axes):
graph = build_graph(nodes_attrs=dft_graph_node_attrs,
edges=dft_graph_edges,
update_attributes={
'fft': {'num_of_dimensions': num_of_dimensions, 'is_inverse': is_inverse},
})
graph.stage = 'front'
setattr(graph.graph['cmd_params'], 'disable_nhwc_to_nchw', False)
graph.graph['layout'] = 'NHWC'
TFFFTToDFT().find_and_replace_pattern(graph)
ref_graph = build_graph(nodes_attrs=ref_dft_graph_node_attrs,
edges=ref_dft_graph_edges,
update_attributes={
'fft': {'kind': 'op', 'op': dft_type},
'fft_axes': {'value': fft_axes, 'shape': int64_array(fft_axes.shape)},
})
(flag, resp) = compare_graphs(graph, ref_graph, 'output', check_op_attrs=True)
self.assertTrue(flag, resp)

Some files were not shown because too many files have changed in this diff Show More