diff --git a/.ci/azure/linux_ngraph_onnx.yml b/.ci/azure/linux_ngraph_onnx.yml
index e11e72e102d..1e13710f2c2 100644
--- a/.ci/azure/linux_ngraph_onnx.yml
+++ b/.ci/azure/linux_ngraph_onnx.yml
@@ -17,6 +17,8 @@ jobs:
WORK_DIR: $(Pipeline.Workspace)/_w
MODELS_DIR: /mount/cinfsshare/onnxtestdata
TMP_DIR: /mnt/tmp
+ ONNX_MODEL_ZOO_SHA: "d58213534f2a4d1c4b19ba62b3bb5f544353256e"
+
steps:
- script: |
@@ -55,7 +57,7 @@ jobs:
- script: docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile .
displayName: 'Docker build'
- - script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(TMP_DIR) -o
+ - script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(TMP_DIR) -o -s "$(ONNX_MODEL_ZOO_SHA)"
displayName: 'Get models'
- script: |
@@ -77,6 +79,6 @@ jobs:
displayName: 'Create swap'
- script: |
- docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo:/root/.onnx/model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image
+ docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "tox && tox -e zoo_models"
displayName: 'Docker run'
diff --git a/docs/doxygen/ie_docs.xml b/docs/doxygen/ie_docs.xml
index 503f9f38986..120492baef0 100644
--- a/docs/doxygen/ie_docs.xml
+++ b/docs/doxygen/ie_docs.xml
@@ -131,7 +131,7 @@ limitations under the License.
-
+
@@ -189,11 +189,13 @@ limitations under the License.
+
+
diff --git a/docs/ops/arithmetic/Ceiling_1.md b/docs/ops/arithmetic/Ceiling_1.md
index 588b5ff6842..4d4cfeb9450 100644
--- a/docs/ops/arithmetic/Ceiling_1.md
+++ b/docs/ops/arithmetic/Ceiling_1.md
@@ -2,31 +2,31 @@
**Versioned name**: *Ceiling-1*
-**Category**: Arithmetic unary operation
+**Category**: Arithmetic unary operation
**Short description**: *Ceiling* performs element-wise ceiling operation with given tensor.
-**Attributes**:
+**Detailed description**: For each element from the input tensor calculates corresponding
+element in the output tensor with the following formula:
- No attributes available.
+\f[
+a_{i} = ceiling(a_{i})
+\f]
+
+**Attributes**: *Ceiling* operation has no attributes.
**Inputs**
-* **1**: An tensor of type T. **Required.**
+* **1**: A tensor of type *T* and arbitrary shape. **Required.**
**Outputs**
-* **1**: The result of element-wise ceiling operation. A tensor of type T.
+* **1**: The result of element-wise ceiling operation. A tensor of type *T*.
**Types**
* *T*: any numeric type.
-*Ceiling* does the following with the input tensor *a*:
-
-\f[
-a_{i} = ceiling(a_{i})
-\f]
**Examples**
diff --git a/docs/ops/arithmetic/Negative_1.md b/docs/ops/arithmetic/Negative_1.md
index 2e17112e7bc..997342c2d05 100644
--- a/docs/ops/arithmetic/Negative_1.md
+++ b/docs/ops/arithmetic/Negative_1.md
@@ -2,35 +2,33 @@
**Versioned name**: *Negative-1*
-**Category**: Arithmetic unary operation
+**Category**: Arithmetic unary operation
-**Short description**: *Negative* performs element-wise negative operation with given tensor.
+**Short description**: *Negative* performs element-wise negative operation on a given input tensor.
-**Attributes**:
+**Detailed description**
- No attributes available.
-
-**Inputs**
-
-* **1**: An tensor of type T. **Required.**
-
-**Outputs**
-
-* **1**: The result of element-wise negative operation. A tensor of type T.
-
-**Types**
-
-* *T*: any numeric type.
-
-*Negative* does the following with the input tensor *a*:
+*Negative* performs element-wise negative operation on a given input tensor, based on the following mathematical formula:
\f[
a_{i} = -a_{i}
\f]
-**Examples**
+**Attributes**: *Negative* operation has no attributes.
-*Example 1*
+**Inputs**
+
+* **1**: A tensor of type *T* and arbitrary shape. **Required.**
+
+**Outputs**
+
+* **1**: The result of element-wise *Negative* operation applied to the input tensor. A tensor of type *T* and the same shape as input tensor.
+
+**Types**
+
+* *T*: any supported signed numeric type.
+
+**Example**
```xml
@@ -47,4 +45,4 @@ a_{i} = -a_{i}
-```
\ No newline at end of file
+```
diff --git a/docs/ops/convolution/DeformableConvolution_1.md b/docs/ops/convolution/DeformableConvolution_1.md
index 2cba8d84039..612d3c419d4 100644
--- a/docs/ops/convolution/DeformableConvolution_1.md
+++ b/docs/ops/convolution/DeformableConvolution_1.md
@@ -8,6 +8,26 @@
**Detailed description**: *Deformable Convolution* is similar to regular *Convolution* but its receptive field is deformed because of additional spatial offsets used during input sampling. More thorough explanation can be found in [Deformable Convolutions Demystified](https://towardsdatascience.com/deformable-convolutions-demystified-2a77498699e8) and [Deformable Convolutional Networks](https://arxiv.org/abs/1703.06211).
+Output is calculated using the following formula:
+
+ \f[
+
+ y(p) = \sum_{k = 1}^{K}w_{k}x(p + p_{k} + {\Delta}p_{k})
+
+ \f]
+
+Where
+* K is a number of sampling locations, e.g. for kernel 3x3 and dilation = 1, K = 9
+
+* \f$x(p)\f$ and \f$y(p)\f$ denote the features at location p from the input feature maps x and output feature maps y
+
+* \f$w_{k}\f$ is the weight for k-th location.
+
+* \f$p_{k}\f$ is pre-specified offset for the k-th location, e.g. K = 9 and
+\f$p_{k} \in \{(-1, -1),(-1, 0), . . . ,(1, 1)\}\f$
+
+* \f${\Delta}p_{k}\f$ is the learnable offset for the k-th location.
+
**Attributes**:
* *strides*
diff --git a/docs/ops/convolution/DeformableConvolution_8.md b/docs/ops/convolution/DeformableConvolution_8.md
new file mode 100644
index 00000000000..cf59584a5f4
--- /dev/null
+++ b/docs/ops/convolution/DeformableConvolution_8.md
@@ -0,0 +1,168 @@
+## DeformableConvolution {#openvino_docs_ops_convolution_DeformableConvolution_8}
+
+**Versioned name**: *DeformableConvolution-8*
+
+**Category**: Convolution
+
+**Short description**: Computes 2D deformable convolution of input and kernel tensors.
+
+**Detailed description**: *Deformable Convolution* is similar to regular *Convolution* but its receptive field is deformed because of additional spatial offsets used during input sampling. More thorough explanation can be found in [Deformable Convolutions Demystified](https://towardsdatascience.com/deformable-convolutions-demystified-2a77498699e8), [Deformable Convolutional Networks](https://arxiv.org/abs/1703.06211).
+
+Modification of DeformableConvolution using modulating scalars is also supported. Please refer to [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/pdf/1811.11168.pdf).
+
+Output is calculated using the following formula:
+
+ \f[
+
+ y(p) = \sum_{k = 1}^{K}w_{k}x(p + p_{k} + {\Delta}p_{k}) * {\Delta}m_{k}
+
+ \f]
+Where
+* K is a number of sampling locations, e.g. for kernel 3x3 and dilation = 1, K = 9
+
+* \f$x(p)\f$ and \f$y(p)\f$ denote the features at location p from the input feature maps x and output feature maps y
+
+* \f$w_{k}\f$ is the weight for k-th location.
+
+* \f$p_{k}\f$ is pre-specified offset for the k-th location, e.g. K = 9 and
+\f$p_{k} \in \{(-1, -1),(-1, 0), . . . ,(1, 1)\}\f$
+
+* \f${\Delta}p_{k}\f$ is the learnable offset for the k-th location.
+
+* \f${\Delta}m_{k}\f$ is the modulation scalar from 0 to 1 for the k-th location.
+
+**Attributes**:
+
+* *strides*
+
+ * **Description**: *strides* is a distance (in pixels) to slide the filter on the feature map over the `(y,x)` axes. For example, *strides* equal `2,1` means sliding the filter 2 pixel at a time over height dimension and 1 over width dimension.
+ * **Range of values**: integer values starting from `0`
+ * **Type**: `int[]`
+ * **Default value**: None
+ * **Required**: *yes*
+
+* *pads_begin*
+
+ * **Description**: *pads_begin* is a number of pixels to add to the beginning along each axis. For example, *pads_begin* equal `1,2` means adding 1 pixel to the top of the input and 2 to the left of the input.
+ * **Range of values**: integer values starting from `0`
+ * **Type**: `int[]`
+ * **Default value**: None
+ * **Required**: *yes*
+ * **Note**: the attribute is ignored when *auto_pad* attribute is specified.
+
+* *pads_end*
+
+ * **Description**: *pads_end* is a number of pixels to add to the ending along each axis. For example, *pads_end* equal `1,2` means adding 1 pixel to the bottom of the input and 2 to the right of the input.
+ * **Range of values**: integer values starting from `0`
+ * **Type**: `int[]`
+ * **Default value**: None
+ * **Required**: *yes*
+ * **Note**: the attribute is ignored when *auto_pad* attribute is specified.
+
+* *dilations*
+
+ * **Description**: *dilations* denotes the distance in width and height between elements (weights) in the filter. For example, *dilation* equal `1,1` means that all the elements in the filter are neighbors, so it is the same as for the usual convolution. *dilation* equal `2,2` means that all the elements in the filter are matched not to adjacent elements in the input matrix, but to those that are adjacent with distance 1.
+ * **Range of values**: integer value starting from `0`
+ * **Type**: `int[]`
+ * **Default value**: None
+ * **Required**: *yes*
+
+* *auto_pad*
+
+ * **Description**: *auto_pad* how the padding is calculated. Possible values:
+ * *explicit* - use explicit padding values from *pads_begin* and *pads_end*.
+ * *same_upper* - the input is padded to match the output size. In case of odd padding value an extra padding is added at the end.
+ * *same_lower* - the input is padded to match the output size. In case of odd padding value an extra padding is added at the beginning.
+ * *valid* - do not use padding.
+ * **Type**: `string`
+ * **Default value**: explicit
+ * **Required**: *no*
+ * **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is specified.
+
+
+* *group*
+
+ * **Description**: *group* is the number of groups which *output* and *input* should be split into. For example, *group* equal to 1 means that all filters are applied to the whole input (usual convolution), *group* equal to 2 means that both *input* and *output* channels are separated into two groups and the *i-th output* group is connected to the *i-th input* group channel. *group* equal to a number of output feature maps implies depth-wise separable convolution.
+ * **Range of values**: integer value starting from `1`
+ * **Type**: `int`
+ * **Default value**: `1`
+ * **Required**: *no*
+
+* *deformable_group*
+
+ * **Description**: *deformable_group* is the number of groups in which *offsets* input and *output* should be split into along the channel axis. Apply the deformable convolution using the i-th part of the offsets part on the i-th out.
+ * **Range of values**: integer value starting from `1`
+ * **Type**: `int`
+ * **Default value**: `1`
+ * **Required**: *no*
+
+* *bilinear_interpolation_padding*
+
+ * **Description**: *bilinear_interpolation_padding* is the number of pixels outside of the feature map boundary to apply bilinear interpolation.
+ * **Range of values**: non-negative integer value
+ * **Type**: `int`
+ * **Default value**: `0`
+ * **Required**: *no*
+
+**Inputs**:
+
+* **1**: Input tensor of type *T* and rank 4. Layout is `NCYX` (number of batches, number of channels, spatial axes Y and X). **Required.**
+
+* **2**: Offsets tensor of type *T* and rank 4. Layout is `NCYX` (number of batches, *deformable_group* \* kernel_Y \* kernel_X \* 2, spatial axes Y and X). **Required.**
+
+* **3**: Kernel tensor of type *T* and rank 4. Layout is `OIYX` (number of output channels, number of input channels, spatial axes Y and X). **Required.**
+
+* **4**: ModulationScalars tensor of type *T2* and rank 4, the values are within [0, 1]. Layout is `NCYX` (number of batches, *deformable_group* \* kernel_Y \* kernel_X, spatial axes Y and X). If the input is not provided, the values are assumed to be equal to 1. **Optional.**
+
+
+**Outputs**:
+
+* **1**: Output tensor of type *T* and rank 4. Layout is `NOYX` (number of batches, number of kernel output channels, spatial axes Y and X).
+
+**Types**:
+
+* *T*: Any numeric type.
+* *T2*: Any supported floating point.
+
+**Example**
+
+2D DeformableConvolution (deformable_group=1)
+```xml
+
+
+
+
+ 1
+ 4
+ 224
+ 224
+
+
+ 1
+ 50
+ 220
+ 220
+
+
+ 64
+ 4
+ 5
+ 5
+
+
+ 1
+ 25
+ 220
+ 220
+
+
+
+
+```
diff --git a/docs/ops/opset8.md b/docs/ops/opset8.md
index 8f43927b5ec..fc68d6f32e0 100644
--- a/docs/ops/opset8.md
+++ b/docs/ops/opset8.md
@@ -40,7 +40,7 @@ declared in `namespace opset8`.
* [Cos](arithmetic/Cos_1.md)
* [Cosh](arithmetic/Cosh_1.md)
* [CumSum](arithmetic/CumSum_3.md)
-* [DeformableConvolution](convolution/DeformableConvolution_1.md)
+* [DeformableConvolution](convolution/DeformableConvolution_8.md)
* [DeformablePSROIPooling](detection/DeformablePSROIPooling_1.md)
* [DepthToSpace](movement/DepthToSpace_1.md)
* [DetectionOutput](detection/DetectionOutput_1.md)
diff --git a/docs/ops/sort/MatrixNMS_8.md b/docs/ops/sort/MatrixNMS_8.md
new file mode 100644
index 00000000000..d3a42230722
--- /dev/null
+++ b/docs/ops/sort/MatrixNMS_8.md
@@ -0,0 +1,168 @@
+## MatrixNonMaxSuppression {#openvino_docs_ops_sort_MatrixNms_8}
+
+**Versioned name**: *MatrixNonMaxSuppression-8*
+
+**Category**: *Sorting and maximization*
+
+**Short description**: *MatrixNonMaxSuppression* performs matrix non-maximum suppression (NMS) of the boxes with predicted scores.
+
+**Detailed description**: The operation performs the following:
+
+1. Selects candidate bounding boxes with scores higher than `score_threshold`.
+2. For each class, selects at most `nms_top_k` candidate boxes.
+3. Decays scores of the candidate boxes according to the Matrix NMS algorithm [Wang et al](https://arxiv.org/abs/2003.10152.pdf). This algorithm is applied independently to each class and each batch element. Boxes of `background_class` are skipped and thus eliminated during the process.
+4. Selects boxes with the decayed scores higher than `post_threshold`, and selects at most `keep_top_k` scoring candidate boxes per batch element.
+
+The Matrix NMS algorithm is described below:
+1. Sort descending the candidate boxes by score, and compute `n*n` pairwise IOU (IntersectionOverUnion) matrix `X` for the top `n` boxes. Suppose `n` is the number of candidate boxes.
+2. Set the lower triangle and diagonal of `X` to 0. Therefore get the upper triangular matrix `X`.
+3. Take the column-wise max of `X` to compute a vector `K` of maximum IOU for each candidate box.
+4. Repeat element value of `K` along axis 1. Suppose this gets a matrix `X_cmax`.
+5. Compute the decay factor: `decay_factor = exp((X_cmax**2 - X**2) * gaussian_sigma)` if `decay_function` is `guassian`, else `decay_factor = (1 - X) / (1 - X_cmax)`.
+6. Take the column-wise min of `decay_factor`, and element-wise multiply with scores to decay them.
+
+**Attributes**:
+
+* *sort_result*
+
+ * **Description**: *sort_result* specifies the order of output elements.
+ * **Range of values**: `class`, `score`, `none`
+ * *class* - sort selected boxes by class id (ascending).
+ * *score* - sort selected boxes by score (descending).
+ * *none* - do not guarantee the order.
+ * **Type**: `string`
+ * **Default value**: `none`
+ * **Required**: *No*
+
+* *sort_result_across_batch*
+
+ * **Description**: *sort_result_across_batch* is a flag that specifies whenever it is necessary to sort selected boxes across batches or not.
+ * **Range of values**: true or false
+ * *true* - sort selected boxes across batches.
+ * *false* - do not sort selected boxes across batches (boxes are sorted per batch element).
+ * **Type**: boolean
+ * **Default value**: false
+ * **Required**: *No*
+
+* *output_type*
+
+ * **Description**: the tensor type of outputs `selected_indices` and `valid_outputs`.
+ * **Range of values**: `i64` or `i32`
+ * **Type**: `string`
+ * **Default value**: `i64`
+ * **Required**: *No*
+
+* *score_threshold*
+
+ * **Description**: minimum score to consider box for the processing.
+ * **Range of values**: a floating-point number
+ * **Type**: `float`
+ * **Default value**: `0`
+ * **Required**: *No*
+
+* *nms_top_k*
+
+ * **Description**: maximum number of boxes to be selected per class.
+ * **Range of values**: an integer
+ * **Type**: `int`
+ * **Default value**: `-1` meaning to keep all boxes
+ * **Required**: *No*
+
+* *keep_top_k*
+
+ * **Description**: maximum number of boxes to be selected per batch element.
+ * **Range of values**: an integer
+ * **Type**: `int`
+ * **Default value**: `-1` meaning to keep all boxes
+ * **Required**: *No*
+
+* *background_class*
+
+ * **Description**: the background class id.
+ * **Range of values**: an integer
+ * **Type**: `int`
+ * **Default value**: `-1` meaning to keep all classes
+ * **Required**: *No*
+
+* *decay_function*
+
+ * **Description**: decay function used to decay scores.
+ * **Range of values**: `gaussian`, `linear`
+ * **Type**: `string`
+ * **Default value**: `linear`
+ * **Required**: *No*
+
+* *gaussian_sigma*
+
+ * **Description**: gaussian_sigma parameter for gaussian decay_function.
+ * **Range of values**: a floating-point number
+ * **Type**: `float`
+ * **Default value**: `2.0`
+ * **Required**: *No*
+
+* *post_threshold*
+
+ * **Description**: threshold to filter out boxes with low confidence score after decaying.
+ * **Range of values**: a floating-point number
+ * **Type**: `float`
+ * **Default value**: `0`
+ * **Required**: *No*
+
+**Inputs**:
+
+* **1**: `boxes` - tensor of type *T* and shape `[num_batches, num_boxes, 4]` with box coordinates. The box cooridnates are layout as `[xmin, ymin, xmax, ymax]`. **Required.**
+
+* **2**: `scores` - tensor of type *T* and shape `[num_batches, num_classes, num_boxes]` with box scores. **Required.**
+
+**Outputs**:
+
+* **1**: `selected_outputs` - tensor of type *T_THRESHOLDS* and shape `[number of selected boxes, 6]` containing the selected boxes with score and class as tuples `[class_id, box_score, xmin, ymin, xmax, ymax]`.
+
+* **2**: `selected_indices` - tensor of type *T_IND* and shape `[number of selected boxes, 1]` the selected indices in the flattened input `boxes`, which are absolute values cross batches. Therefore possible valid values are in the range `[0, num_batches * num_boxes - 1]`.
+
+* **3**: `selected_num` - 1D tensor of type *T_IND* and shape `[num_batches]` representing the number of selected boxes for each batch element.
+
+When there is no box selected, `selected_num` is filled with `0`. `selected_outputs` is an empty tensor of shape `[0, 6]`, and `selected_indices` is an empty tensor of shape `[0, 1]`.
+
+**Types**
+
+* *T*: floating point type.
+
+* *T_MAX_BOXES*: integer type.
+
+* *T_THRESHOLDS*: floating point type.
+
+* *T_IND*: `int64` or `int32`.
+
+**Example**
+
+```xml
+
+
+
+
+ 3
+ 100
+ 4
+
+
+ 3
+ 5
+ 100
+
+
+
+
+```
diff --git a/docs/ops/sort/MulticlassNMS_8.md b/docs/ops/sort/MulticlassNMS_8.md
new file mode 100644
index 00000000000..16997a81397
--- /dev/null
+++ b/docs/ops/sort/MulticlassNMS_8.md
@@ -0,0 +1,161 @@
+## MulticlassNonMaxSuppression {#openvino_docs_ops_sort_MulticlassNonMaxSuppression_8}
+
+**Versioned name**: *MulticlassNonMaxSuppression-8*
+
+**Category**: *Sorting and maximization*
+
+**Short description**: *MulticlassNonMaxSuppression* performs multi-class non-maximum suppression of the boxes with predicted scores.
+
+**Detailed description**: *MulticlassNonMaxSuppression* is a multi-phase operation. It implements non-maximum suppression algorithm as described below:
+
+1. Let `B = [b_0,...,b_n]` be the list of initial detection boxes, `S = [s_0,...,s_N]` be the list of corresponding scores.
+2. Let `D = []` be an initial collection of resulting boxes. Let `adaptive_threshold = iou_threshold`.
+3. If `B` is empty, go to step 9.
+4. Take the box with highest score. Suppose that it is the box `b` with the score `s`.
+5. Delete `b` from `B`.
+6. If the score `s` is greater than or equal to `score_threshold`, add `b` to `D`, else go to step 9.
+7. If `nms_eta < 1` and `adaptive_threshold > 0.5`, update `adaptive_threshold *= nms_eta`.
+8. For each input box `b_i` from `B` and the corresponding score `s_i`, set `s_i = 0` when `iou(b, b_i) > adaptive_threshold`, and go to step 3.
+9. Return `D`, a collection of the corresponding scores `S`, and the number of elements in `D`.
+
+This algorithm is applied independently to each class of each batch element. The operation feeds at most `nms_top_k` scoring candidate boxes to this algorithm.
+The total number of output boxes of each batch element must not exceed `keep_top_k`.
+Boxes of `background_class` are skipped and thus eliminated.
+
+**Attributes**:
+
+* *sort_result*
+
+ * **Description**: *sort_result* specifies the order of output elements.
+ * **Range of values**: `class`, `score`, `none`
+ * *class* - sort selected boxes by class id (ascending).
+ * *score* - sort selected boxes by score (descending).
+ * *none* - do not guarantee the order.
+ * **Type**: `string`
+ * **Default value**: `none`
+ * **Required**: *No*
+
+* *sort_result_across_batch*
+
+ * **Description**: *sort_result_across_batch* is a flag that specifies whenever it is necessary to sort selected boxes across batches or not.
+ * **Range of values**: true or false
+ * *true* - sort selected boxes across batches.
+ * *false* - do not sort selected boxes across batches (boxes are sorted per batch element).
+ * **Type**: boolean
+ * **Default value**: false
+ * **Required**: *No*
+
+* *output_type*
+
+ * **Description**: the tensor type of outputs `selected_indices` and `valid_outputs`.
+ * **Range of values**: `i64` or `i32`
+ * **Type**: `string`
+ * **Default value**: `i64`
+ * **Required**: *No*
+
+* *iou_threshold*
+
+ * **Description**: intersection over union threshold.
+ * **Range of values**: a floating-point number
+ * **Type**: `float`
+ * **Default value**: `0`
+ * **Required**: *No*
+
+* *score_threshold*
+
+ * **Description**: minimum score to consider box for the processing.
+ * **Range of values**: a floating-point number
+ * **Type**: `float`
+ * **Default value**: `0`
+ * **Required**: *No*
+
+* *nms_top_k*
+
+ * **Description**: maximum number of boxes to be selected per class.
+ * **Range of values**: an integer
+ * **Type**: `int`
+ * **Default value**: `-1` meaning to keep all boxes
+ * **Required**: *No*
+
+* *keep_top_k*
+
+ * **Description**: maximum number of boxes to be selected per batch element.
+ * **Range of values**: an integer
+ * **Type**: `int`
+ * **Default value**: `-1` meaning to keep all boxes
+ * **Required**: *No*
+
+* *background_class*
+
+ * **Description**: the background class id.
+ * **Range of values**: an integer
+ * **Type**: `int`
+ * **Default value**: `-1` meaning to keep all classes.
+ * **Required**: *No*
+
+* *nms_eta*
+
+ * **Description**: eta parameter for adaptive NMS.
+ * **Range of values**: a floating-point number in close range `[0, 1.0]`.
+ * **Type**: `float`
+ * **Default value**: `1.0`
+ * **Required**: *No*
+
+**Inputs**:
+
+* **1**: `boxes` - tensor of type *T* and shape `[num_batches, num_boxes, 4]` with box coordinates. The box coordinates are layout as `[xmin, ymin, xmax, ymax]`. **Required.**
+
+* **2**: `scores` - tensor of type *T* and shape `[num_batches, num_classes, num_boxes]` with box scores. **Required.**
+
+**Outputs**:
+
+* **1**: `selected_outputs` - tensor of type *T_THRESHOLDS* and shape `[number of selected boxes, 6]` containing the selected boxes with score and class as tuples `[class_id, box_score, xmin, ymin, xmax, ymax]`.
+
+* **2**: `selected_indices` - tensor of type *T_IND* and shape `[number of selected boxes, 1]` the selected indices in the flattened `boxes`, which are absolute values cross batches. Therefore possible valid values are in the range `[0, num_batches * num_boxes - 1]`.
+
+* **3**: `selected_num` - 1D tensor of type *T_IND* and shape `[num_batches]` representing the number of selected boxes for each batch element.
+
+When there is no box selected, `selected_num` is filled with `0`. `selected_outputs` is an empty tensor of shape `[0, 6]`, and `selected_indices` is an empty tensor of shape `[0, 1]`.
+
+**Types**
+
+* *T*: floating point type.
+
+* *T_MAX_BOXES*: integer type.
+
+* *T_THRESHOLDS*: floating point type.
+
+* *T_IND*: `int64` or `int32`.
+
+**Example**
+
+```xml
+
+
+
+
+ 3
+ 100
+ 4
+
+
+ 3
+ 5
+ 100
+
+
+
+
+```
diff --git a/docs/template_plugin/tests/functional/skip_tests_config.cpp b/docs/template_plugin/tests/functional/skip_tests_config.cpp
index 252ed7c9a71..8d100118a9d 100644
--- a/docs/template_plugin/tests/functional/skip_tests_config.cpp
+++ b/docs/template_plugin/tests/functional/skip_tests_config.cpp
@@ -10,10 +10,10 @@
std::vector disabledTestPatterns() {
return {
".*ExclusiveAsyncRequests.*",
- ".*reusableCPUStreamsExecutor.*",
+ ".*ReusableCPUStreamsExecutor.*",
R"(.*SplitLayerTest.*numSplits\=30.*)",
// CVS-51758
".*PreprocessConversionTest.*oLT=NHWC.*",
".*PreprocessDynamicallyInSetBlobTest.*oPRC=0.*oLT=1.*",
};
-}
\ No newline at end of file
+}
diff --git a/inference-engine/include/cpp/ie_executable_network.hpp b/inference-engine/include/cpp/ie_executable_network.hpp
index eb1824f9da0..81d5b10e7dd 100644
--- a/inference-engine/include/cpp/ie_executable_network.hpp
+++ b/inference-engine/include/cpp/ie_executable_network.hpp
@@ -32,9 +32,6 @@ class IExecutableNetworkInternal;
class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
details::SharedObjectLoader _so;
std::shared_ptr _impl;
- IE_SUPPRESS_DEPRECATED_START
- std::shared_ptr actual;
- IE_SUPPRESS_DEPRECATED_END
/**
* @brief Constructs ExecutableNetwork from the initialized std::shared_ptr
@@ -51,18 +48,6 @@ public:
*/
ExecutableNetwork() = default;
- IE_SUPPRESS_DEPRECATED_START
- /**
- * @deprecated This ctor will be removed in 2022.1
- * @brief Constructs ExecutableNetwork from the initialized std::shared_ptr
- * @param exec Initialized shared pointer
- * @param splg Plugin to use. This is required to ensure that ExecutableNetwork can work properly even if plugin object is destroyed.
- */
- INFERENCE_ENGINE_DEPRECATED("This ctor will be removed in 2022.1")
- explicit ExecutableNetwork(std::shared_ptr exec,
- std::shared_ptr splg = {});
- IE_SUPPRESS_DEPRECATED_END
-
/**
* @brief Gets the Executable network output Data node information.
*
diff --git a/inference-engine/include/cpp/ie_infer_request.hpp b/inference-engine/include/cpp/ie_infer_request.hpp
index fd71bf18bc2..c5d52ec6fc0 100644
--- a/inference-engine/include/cpp/ie_infer_request.hpp
+++ b/inference-engine/include/cpp/ie_infer_request.hpp
@@ -35,10 +35,6 @@ class ICompletionCallbackWrapper;
class INFERENCE_ENGINE_API_CLASS(InferRequest) {
details::SharedObjectLoader _so;
std::shared_ptr _impl;
- IE_SUPPRESS_DEPRECATED_START
- IInferRequest::Ptr actual;
- std::shared_ptr callback;
- IE_SUPPRESS_DEPRECATED_END
/**
* @brief Constructs InferRequest from the initialized std::shared_ptr
@@ -71,18 +67,6 @@ public:
*/
InferRequest() = default;
- IE_SUPPRESS_DEPRECATED_START
- /**
- * @deprecated This ctor will be removed in 2022.1
- * @brief Constructs InferRequest from the initialized std::shared_ptr
- * @param request Initialized shared pointer
- * @param splg Plugin to use. This is required to ensure that InferRequest can work properly even if plugin object is destroyed.
- */
- INFERENCE_ENGINE_DEPRECATED("This ctor will be removed in 2022.1")
- explicit InferRequest(IInferRequest::Ptr request,
- std::shared_ptr splg = {});
- IE_SUPPRESS_DEPRECATED_END
-
/**
* @brief Sets input/output data to infer
*
diff --git a/inference-engine/include/cpp/ie_memory_state.hpp b/inference-engine/include/cpp/ie_memory_state.hpp
index 0c055cec40c..8d54f79f06c 100644
--- a/inference-engine/include/cpp/ie_memory_state.hpp
+++ b/inference-engine/include/cpp/ie_memory_state.hpp
@@ -3,7 +3,7 @@
//
/**
- * @brief A header file that provides wrapper classes for IVariableState
+ * @brief A header file that provides VariableState
*
* @file ie_memory_state.hpp
*/
@@ -16,21 +16,17 @@
#include "ie_api.h"
#include "ie_blob.h"
#include "details/ie_so_loader.h"
-#include "ie_imemory_state.hpp"
namespace InferenceEngine {
class IVariableStateInternal;
/**
- * @brief C++ exception based error reporting wrapper of API class IVariableState
+ * @brief VariableState class
*/
class INFERENCE_ENGINE_API_CLASS(VariableState) {
details::SharedObjectLoader _so;
std::shared_ptr _impl;
- IE_SUPPRESS_DEPRECATED_START
- std::shared_ptr actual;
- IE_SUPPRESS_DEPRECATED_END
/**
* @brief Constructs VariableState from the initialized std::shared_ptr
@@ -48,55 +44,27 @@ public:
*/
VariableState() = default;
- IE_SUPPRESS_DEPRECATED_START
/**
- * @deprecated This ctor will be removed in 2022.1
- * @brief constructs VariableState from the initialized std::shared_ptr
- * @param pState Initialized shared pointer
- * @param plg Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin object is destroyed.
- */
- INFERENCE_ENGINE_DEPRECATED("This ctor will be removed in 2022.1")
- explicit VariableState(std::shared_ptr pState,
- std::shared_ptr plg = {});
- IE_SUPPRESS_DEPRECATED_END
-
- /**
- * @copybrief IVariableState::Reset
- *
- * Wraps IVariableState::Reset
+ * @brief Reset internal variable state for relevant infer request,
+ * to a value specified as default for according ReadValue node
*/
void Reset();
/**
- * @copybrief IVariableState::GetName
- *
- * Wraps IVariableState::GetName
+ * @brief Gets name of current variable state, if length of array is not enough name is truncated by len, null
+ * terminator is inserted as well. As variable state name `variable_id` from according `ReadValue` used.
* @return A string representing a state name
*/
std::string GetName() const;
/**
- * @copybrief IVariableState::GetState
- *
- * Wraps IVariableState::GetState
+ * @brief Returns the value of the variable state.
* @return A blob representing a state
*/
Blob::CPtr GetState() const;
/**
- * @copybrief IVariableState::GetLastState
- * @deprecated Use IVariableState::SetState instead
- *
- * Wraps IVariableState::GetLastState
- * @return A blob representing a last state
- */
- INFERENCE_ENGINE_DEPRECATED("Use VariableState::GetState function instead")
- Blob::CPtr GetLastState() const;
-
- /**
- * @copybrief IVariableState::SetState
- *
- * Wraps IVariableState::SetState
+ * @brief Sets the new state for the next inference.
* @param state The current state to set
*/
void SetState(Blob::Ptr state);
diff --git a/inference-engine/include/gna/gna_config.hpp b/inference-engine/include/gna/gna_config.hpp
index 958227696a1..3433ab58887 100644
--- a/inference-engine/include/gna/gna_config.hpp
+++ b/inference-engine/include/gna/gna_config.hpp
@@ -65,6 +65,7 @@ DECLARE_GNA_CONFIG_KEY(DEVICE_MODE);
DECLARE_GNA_CONFIG_VALUE(AUTO);
DECLARE_GNA_CONFIG_VALUE(HW);
+DECLARE_GNA_CONFIG_VALUE(HW_WITH_SW_FBACK);
DECLARE_GNA_CONFIG_VALUE(SW);
DECLARE_GNA_CONFIG_VALUE(SW_EXACT);
DECLARE_GNA_CONFIG_VALUE(SW_FP32);
diff --git a/inference-engine/include/ie_iexecutable_network.hpp b/inference-engine/include/ie_iexecutable_network.hpp
index caef9bb95b9..bb0a6f71c4a 100644
--- a/inference-engine/include/ie_iexecutable_network.hpp
+++ b/inference-engine/include/ie_iexecutable_network.hpp
@@ -18,7 +18,6 @@
#include "ie_common.h"
#include "ie_icnn_network.hpp"
#include "ie_iinfer_request.hpp"
-#include "ie_imemory_state.hpp"
#include "ie_input_info.hpp"
#include "ie_parameter.hpp"
#include "ie_remote_context.hpp"
@@ -113,22 +112,6 @@ public:
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::ExecutableNetwork::GetExecGraphInfo instead")
virtual StatusCode GetExecGraphInfo(ICNNNetwork::Ptr& graphPtr, ResponseDesc* resp) noexcept = 0;
- /**
- * @deprecated Use InferRequest::QueryState instead
- * @brief Gets state control interface for given executable network.
- *
- * State control essential for recurrent networks
- *
- * @param pState reference to a pointer that receives internal states
- * @param idx requested index for receiving memory state
- * @param resp Optional: pointer to an already allocated object to contain information in case of failure
- * @return Status code of the operation: InferenceEngine::OK (0) for success, OUT_OF_BOUNDS (-6) no memory state for
- * given index
- */
- INFERENCE_ENGINE_DEPRECATED("Use InferRequest::QueryState instead")
- virtual StatusCode QueryState(IVariableState::Ptr& pState, size_t idx, ResponseDesc* resp) noexcept = 0;
- IE_SUPPRESS_DEPRECATED_END
-
/**
* @brief Sets configuration for current executable network
*
diff --git a/inference-engine/include/ie_iinfer_request.hpp b/inference-engine/include/ie_iinfer_request.hpp
index 7d762d96a11..4fd200c0252 100644
--- a/inference-engine/include/ie_iinfer_request.hpp
+++ b/inference-engine/include/ie_iinfer_request.hpp
@@ -17,7 +17,6 @@
#include "ie_blob.h"
#include "ie_common.h"
#include "ie_preprocess.hpp"
-#include "ie_imemory_state.hpp"
namespace InferenceEngine {
@@ -195,21 +194,6 @@ public:
*/
virtual InferenceEngine::StatusCode SetBatch(int batch_size, ResponseDesc* resp) noexcept = 0;
- IE_SUPPRESS_DEPRECATED_START
- /**
- * @brief Gets state control interface for given infer request.
- *
- * State control essential for recurrent networks
- *
- * @param pState reference to a pointer that receives internal states
- * @param idx requested index for receiving memory state
- * @param resp Optional: pointer to an already allocated object to contain information in case of failure
- * @return Status code of the operation: InferenceEngine::OK (0) for success, OUT_OF_BOUNDS (-6) no memory state for
- * given index
- */
- virtual StatusCode QueryState(IVariableState::Ptr& pState, size_t idx, ResponseDesc* resp) noexcept = 0;
- IE_SUPPRESS_DEPRECATED_END
-
protected:
~IInferRequest() = default;
};
diff --git a/inference-engine/include/ie_imemory_state.hpp b/inference-engine/include/ie_imemory_state.hpp
deleted file mode 100644
index 7f3ef99cbd1..00000000000
--- a/inference-engine/include/ie_imemory_state.hpp
+++ /dev/null
@@ -1,95 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-/**
- * @brief a header file for IVariableState interface
- *
- * @file ie_imemory_state.hpp
- */
-
-#pragma once
-
-#include
-
-#include "ie_blob.h"
-#include "ie_common.h"
-
-namespace InferenceEngine {
-
-/**
- * @deprecated Use InferenceEngine::VariableState C++ wrapper instead
- * @interface IVariableState
- * @brief Manages data for reset operations
- */
-class INFERENCE_ENGINE_DEPRECATED("InferenceEngine::") IVariableState {
-public:
- IE_SUPPRESS_DEPRECATED_START
- /**
- * @brief A shared pointer to the IVariableState interface
- */
- using Ptr = std::shared_ptr;
- IE_SUPPRESS_DEPRECATED_END
-
- /**
- * @brief Gets name of current variable state, if length of array is not enough name is truncated by len, null
- * terminator is inserted as well. As variable state name `variable_id` from according `ReadValue` used.
- *
- * @param name preallocated buffer for receiving name
- * @param len Length of the buffer
- * @param resp Optional: pointer to an already allocated object to contain information in case of failure
- * @return Status code of the operation: InferenceEngine::OK (0) for success
- */
- virtual StatusCode GetName(char* name, size_t len, ResponseDesc* resp) const noexcept = 0;
-
- /**
- * @brief Reset internal variable state for relevant infer request, to a value specified as default for according ReadValue node
- *
- * @param resp Optional: pointer to an already allocated object to contain information in case of failure
- * @return Status code of the operation: InferenceEngine::OK (0) for success*
- */
- virtual StatusCode Reset(ResponseDesc* resp) noexcept = 0;
-
- /**
- * @brief Sets the new state for the next inference.
- *
- * This method can fail if Blob size does not match the internal state size or precision
- *
- * @param newState The data to use as new state
- * @param resp Optional: pointer to an already allocated object to contain information in case of failure
- * @return Status code of the operation: InferenceEngine::OK (0) for success
- */
- virtual StatusCode SetState(Blob::Ptr newState, ResponseDesc* resp) noexcept = 0;
-
- /**
- * @brief Returns the value of the variable state.
- *
- * @param state A reference to a blob containing a variable state
- * @param resp Optional: pointer to an already allocated object to contain information in case of failure
- * @return Status code of the operation: InferenceEngine::OK (0) for success
- */
- INFERENCE_ENGINE_DEPRECATED("Use GetState function instead")
- virtual StatusCode GetLastState(Blob::CPtr& state, ResponseDesc* resp) const noexcept {
- return GetState(state, resp);
- }
-
- /**
- * @brief Returns the value of the variable state.
- *
- * @param state A reference to a blob containing a variable state
- * @param resp Optional: pointer to an already allocated object to contain information in case of failure
- * @return Status code of the operation: InferenceEngine::OK (0) for success
- */
- virtual StatusCode GetState(Blob::CPtr& state, ResponseDesc* resp) const noexcept = 0;
-};
-
-IE_SUPPRESS_DEPRECATED_START
-
-/**
- * @brief For compatibility reasons.
- */
-using IMemoryState = IVariableState;
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace InferenceEngine
\ No newline at end of file
diff --git a/inference-engine/include/ie_parameter.hpp b/inference-engine/include/ie_parameter.hpp
index 1343f89db32..4aa6760d474 100644
--- a/inference-engine/include/ie_parameter.hpp
+++ b/inference-engine/include/ie_parameter.hpp
@@ -49,26 +49,6 @@ public:
std::swap(ptr, parameter.ptr);
}
- /**
- * @deprecated Use ngraph::Variant directly
- * @brief Creates parameter from variant.
- * This method creates empty parameter if variant doesn't contain Parameter
- *
- * @param var ngraph variant
- */
- INFERENCE_ENGINE_DEPRECATED("Use ngraph::Variant directly")
- Parameter(const std::shared_ptr& var);
-
- /**
- * @deprecated Use ngraph::Variant directly
- * @brief Creates parameter from variant.
- * This method creates empty parameter if variant doesn't contain Parameter
- *
- * @param var ngraph variant
- */
- INFERENCE_ENGINE_DEPRECATED("Use ngraph::Variant directly")
- Parameter(std::shared_ptr& var);
-
/**
* @brief Copy constructor
*
@@ -86,7 +66,8 @@ public:
* @param parameter object
*/
template ::type, Parameter>::value>::type>
+ typename = typename std::enable_if::type, Parameter>::value &&
+ !std::is_abstract::type>::value>::type>
Parameter(T&& parameter) { // NOLINT
static_assert(!std::is_same::type, Parameter>::value, "To prevent recursion");
ptr = new RealData::type>(std::forward(parameter));
@@ -203,28 +184,6 @@ public:
return dyn_cast::type>(ptr);
}
- /**
- * @deprecated Use ngraph::Variant directly
- * @brief Converts parameter to shared pointer on ngraph::Variant
- *
- * @return shared pointer on ngraph::Variant
- */
- INFERENCE_ENGINE_DEPRECATED("Use ngraph::Variant directly")
- std::shared_ptr asVariant() const;
-
- /**
- * @deprecated Use ngraph::Variant directly
- * @brief Casts to shared pointer on ngraph::Variant
- *
- * @return shared pointer on ngraph::Variant
- */
- INFERENCE_ENGINE_DEPRECATED("Use ngraph::Variant directly")
- operator std::shared_ptr() const {
- IE_SUPPRESS_DEPRECATED_START
- return asVariant();
- IE_SUPPRESS_DEPRECATED_END
- }
-
/**
* Dynamic cast to specified type
* @tparam T type
@@ -254,6 +213,21 @@ public:
return !(*this == rhs);
}
+ /**
+ * @brief Prints underlying object to the given output stream.
+ * Uses operator<< if it is defined, leaves stream unchanged otherwise.
+ * In case of empty parameter or nullptr stream immediately returns.
+ *
+ * @param object Object to be printed to the given output stream.
+ * @param stream Output stream object will be printed to.
+ */
+ friend void PrintTo(const Parameter& object, std::ostream* stream) {
+ if (object.empty() || !stream) {
+ return;
+ }
+ object.ptr->print(*stream);
+ }
+
private:
template
struct CheckOperatorEqual {
@@ -273,6 +247,24 @@ private:
template
struct HasOperatorEqual : CheckOperatorEqual::type {};
+ template
+ struct CheckOutputStreamOperator {
+ template
+ static auto test(W*) -> decltype(std::declval() << std::declval(), std::true_type()) {
+ return {};
+ }
+
+ template
+ static auto test(...) -> std::false_type {
+ return {};
+ }
+
+ using type = typename std::is_same(nullptr))>::type;
+ };
+
+ template
+ struct HasOutputStreamOperator : CheckOutputStreamOperator::type {};
+
struct Any {
#ifdef __ANDROID__
virtual ~Any();
@@ -282,6 +274,7 @@ private:
virtual bool is(const std::type_info&) const = 0;
virtual Any* copy() const = 0;
virtual bool operator==(const Any& rhs) const = 0;
+ virtual void print(std::ostream&) const = 0;
};
template
@@ -318,6 +311,20 @@ private:
bool operator==(const Any& rhs) const override {
return rhs.is(typeid(T)) && equal(*this, rhs);
}
+
+ template
+ typename std::enable_if::value, void>::type
+ print(std::ostream& stream, const U& object) const {}
+
+ template
+ typename std::enable_if::value, void>::type
+ print(std::ostream& stream, const U& object) const {
+ stream << object;
+ }
+
+ void print(std::ostream& stream) const override {
+ print(stream, get());
+ }
};
template
diff --git a/inference-engine/include/ie_unicode.hpp b/inference-engine/include/ie_unicode.hpp
deleted file mode 100644
index dc943d6f558..00000000000
--- a/inference-engine/include/ie_unicode.hpp
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-/**
- * @brief This is a header file with common inference engine definitions
- *
- * @file ie_unicode.hpp
- */
-#pragma once
-
-#include
-#include
-#include
-#include
-#include
-#include
-
-#ifdef UNICODE
-typedef wchar_t tchar;
-typedef std::wstring file_name_t;
-#else
-typedef char tchar;
-typedef std::string file_name_t;
-#endif
-
-namespace InferenceEngine {
-
-/**
- * @deprecated Use OS-native conversion utilities
- * @brief Conversion from possibly-wide character string to a single-byte chain.
- * @param str A possibly-wide character string
- * @return A single-byte character string
- */
-INFERENCE_ENGINE_DEPRECATED("Use OS-native conversion utilities")
-inline std::string fileNameToString(const file_name_t& str) {
-#ifdef UNICODE
- size_t maxlen = (str.length() + 1) * sizeof(wchar_t) / sizeof(char);
- std::vector mbstr(maxlen);
- mbstr[0] = 0;
- std::wcstombs(&mbstr[0], str.c_str(), maxlen);
- std::string res = std::string(&mbstr[0]);
- return res;
-#else
- return str;
-#endif
-}
-
-/**
- * @deprecated Use OS-native conversion utilities
- * @brief Conversion from single-byte character string to a possibly-wide one
- * @param str A single-byte character string
- * @return A possibly-wide character string
- */
-INFERENCE_ENGINE_DEPRECATED("Use OS-native conversion utilities")
-inline file_name_t stringToFileName(const std::string& str) {
-#ifdef UNICODE
- size_t maxlen = str.length() + 1;
- std::vector wcstr(maxlen);
- wcstr[0] = 0;
- std::mbstowcs(&wcstr[0], str.c_str(), maxlen);
- file_name_t res = file_name_t(&wcstr[0]);
- return res;
-#else
- return str;
-#endif
-}
-
-} // namespace InferenceEngine
diff --git a/inference-engine/samples/speech_sample/main.cpp b/inference-engine/samples/speech_sample/main.cpp
index 2b9131774ad..57db61a8e9e 100644
--- a/inference-engine/samples/speech_sample/main.cpp
+++ b/inference-engine/samples/speech_sample/main.cpp
@@ -236,7 +236,8 @@ float getGnaFrequencyMHz() {
const uint8_t cannon_lake_model = 102;
const uint8_t gemini_lake_model = 122;
const uint8_t ice_lake_model = 126;
- const uint8_t next_model = 140;
+ const uint8_t tgl_model = 140;
+ const uint8_t next_model = 151;
native_cpuid(&eax, &ebx, &ecx, &edx);
family = (eax >> 8) & 0xF;
@@ -254,6 +255,7 @@ float getGnaFrequencyMHz() {
switch (model) {
case cannon_lake_model:
case ice_lake_model:
+ case tgl_model:
case next_model:
return 400;
case gemini_lake_model:
@@ -287,13 +289,14 @@ void printReferenceCompareResults(score_error_t const& totalError, size_t frames
/**
* @brief Print a report on the performance counts
* @param utterancePerfMap reference to a map to store performance counters
- * @param callsNum frame index
+ * @param numberOfFrames number of frames
* @param stream output stream
* @param fullDeviceName full device name string
+ * @param numberOfFramesOnHw number of frames delivered to GNA HW
* @return none.
*/
-void printPerformanceCounters(std::map const& utterancePerfMap, size_t callsNum, std::ostream& stream,
- std::string fullDeviceName) {
+void printPerformanceCounters(std::map const& utterancePerfMap, size_t numberOfFrames,
+ std::ostream& stream, std::string fullDeviceName, const uint64_t numberOfFramesOnHw) {
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
stream << std::endl << "Performance counts:" << std::endl;
stream << std::setw(10) << std::right << ""
@@ -305,29 +308,29 @@ void printPerformanceCounters(std::map(it.second.realTime_uSec);
- float call_units = current_units / callsNum;
- // if GNA HW counters
- // get frequency of GNA module
- float freq = getGnaFrequencyMHz();
- current_units /= freq * 1000;
- call_units /= freq;
+ float current_units_us = static_cast(it.second.realTime_uSec) / freq;
+ float call_units_us = current_units_us / numberOfFrames;
if (FLAGS_d.find("GNA") != std::string::npos) {
stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1);
} else {
stream << std::setw(30) << std::left << counter_name;
}
- stream << std::setw(16) << std::right << current_units;
- stream << std::setw(21) << std::right << call_units;
+ stream << std::setw(16) << std::right << current_units_us / 1000;
+ stream << std::setw(21) << std::right << call_units_us;
stream << std::endl;
}
stream << std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << fullDeviceName << std::endl;
std::cout << std::endl;
+ stream << "Number of frames delivered to GNA HW: " << numberOfFramesOnHw;
+ stream << "/" << numberOfFrames;
+ stream << std::endl;
#endif
}
@@ -346,16 +349,20 @@ void getPerformanceCounters(InferenceEngine::InferRequest& request, std::map const& perfCounters,
- std::map& totalPerfCounters) {
+ std::map& totalPerfCounters, uint64_t& totalRunsOnHw) {
+ auto runOnHw = false;
for (const auto& pair : perfCounters) {
totalPerfCounters[pair.first].realTime_uSec += pair.second.realTime_uSec;
+ runOnHw |= pair.second.realTime_uSec > 0; // if realTime is above zero, that means that a primitive was executed on the device
}
+ totalRunsOnHw += runOnHw;
}
/**
@@ -443,6 +450,7 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
"GPU",
"GNA_AUTO",
"GNA_HW",
+ "GNA_HW_WITH_SW_FBACK",
"GNA_SW_EXACT",
"GNA_SW",
"GNA_SW_FP32",
@@ -829,6 +837,7 @@ int main(int argc, char* argv[]) {
/** Work with each utterance **/
for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) {
std::map utterancePerfMap;
+ uint64_t totalNumberOfRunsOnHw = 0;
std::string uttName;
uint32_t numFrames(0), n(0);
std::vector numFrameElementsInput;
@@ -984,7 +993,7 @@ int main(int argc, char* argv[]) {
// retrieve new counters
getPerformanceCounters(inferRequest.inferRequest, callPerfMap);
// summarize retrieved counters with all previous
- sumPerformanceCounters(callPerfMap, utterancePerfMap);
+ sumPerformanceCounters(callPerfMap, utterancePerfMap, totalNumberOfRunsOnHw);
}
}
// -----------------------------------------------------------------------------------------------------
@@ -1092,7 +1101,7 @@ int main(int argc, char* argv[]) {
std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast(numFrames) << " ms" << std::endl;
if (FLAGS_pc) {
// print performance results
- printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d));
+ printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d), totalNumberOfRunsOnHw);
}
if (!FLAGS_r.empty()) {
// print statistical score error
diff --git a/inference-engine/samples/speech_sample/speech_sample.hpp b/inference-engine/samples/speech_sample/speech_sample.hpp
index cafe4db5c61..66d3b24a4c5 100644
--- a/inference-engine/samples/speech_sample/speech_sample.hpp
+++ b/inference-engine/samples/speech_sample/speech_sample.hpp
@@ -21,10 +21,9 @@ static const char model_message[] = "Required. Path to an .xml file with a train
/// @brief message for assigning cnn calculation to device
static const char target_device_message[] = "Optional. Specify a target device to infer on. CPU, GPU, MYRIAD, GNA_AUTO, GNA_HW, "
- "GNA_SW_FP32, "
+ "GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, "
"GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU"
- " as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown "
- "below. "
+ " as a secondary (e.g. HETERO:GNA,CPU) are supported. "
"The sample will look for a suitable plugin for device specified.";
/// @brief message for execution target
diff --git a/inference-engine/src/gna_plugin/CMakeLists.txt b/inference-engine/src/gna_plugin/CMakeLists.txt
index f3ce2858570..36b9d6d5cc0 100644
--- a/inference-engine/src/gna_plugin/CMakeLists.txt
+++ b/inference-engine/src/gna_plugin/CMakeLists.txt
@@ -29,12 +29,15 @@ endif()
#
# Shared plugin library
-#
+#
ie_add_plugin(NAME ${TARGET_NAME}
DEVICE_NAME "GNA"
SOURCES ${SOURCES} ${HEADERS})
+# Enable support of CC for the plugin
+ie_mark_target_as_cc(${TARGET_NAME})
+
# saving rpath to GNA shared library be used by CI
log_rpath_from_dir(GNA ${libGNA_LIBRARIES_BASE_PATH})
@@ -67,7 +70,8 @@ target_compile_definitions(${TARGET_NAME}_test_static
target_link_libraries(${TARGET_NAME}_test_static PUBLIC inference_engine_preproc_s inference_engine_transformations libGNA::API)
target_include_directories(${TARGET_NAME}_test_static PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
- $)
+ $
+ PRIVATE $)
set_target_properties(${TARGET_NAME}_test_static PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_test_static)
set_target_properties(${TARGET_NAME} ${TARGET_NAME}_test_static
@@ -76,6 +80,6 @@ set_target_properties(${TARGET_NAME} ${TARGET_NAME}_test_static
# install
file(GLOB_RECURSE source_list "${libGNA_LIBRARIES_BASE_PATH}/*${CMAKE_SHARED_LIBRARY_SUFFIX}*")
-install(FILES ${source_list}
+install(FILES ${source_list}
DESTINATION ${IE_CPACK_IE_DIR}/external/gna/lib
COMPONENT gna)
diff --git a/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp
index 1f3f125a029..b57813858ac 100644
--- a/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp
+++ b/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp
@@ -15,6 +15,7 @@
#include "layer_quantizer.hpp"
#include "scale_factor_calc.hpp"
#include "weights_converter.hpp"
+#include "gna_itt.hpp"
namespace GNAPluginNS {
@@ -40,6 +41,7 @@ class ModelQuantizer {
template
InferenceEngine::CNNNetwork quantize(const InferenceEngine::CNNNetwork &model, const PreQuantisationCb &cb, std::vector scaleFactor) const {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ModelQuantizer::quantize");
auto visitor = [&](InferenceEngine::CNNLayerPtr lp) {
auto newLayer = InferenceEngine::injectData(lp);
transformLayer(newLayer, WeightsConverter());
diff --git a/inference-engine/src/gna_plugin/gna_device.cpp b/inference-engine/src/gna_plugin/gna_device.cpp
index cbfc47f57aa..85a246ea34f 100644
--- a/inference-engine/src/gna_plugin/gna_device.cpp
+++ b/inference-engine/src/gna_plugin/gna_device.cpp
@@ -96,14 +96,12 @@ void GNADeviceHelper::setUpActiveList(const uint32_t requestConfigId, uint32_t l
const auto status = Gna2RequestConfigEnableActiveList(requestConfigId, layerIndex, num_active_indices, ptr_active_indices);
checkGna2Status(status, "Gna2RequestConfigEnableActiveList");
}
-void GNADeviceHelper::propagateSync(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode) {
- wait(propagate(requestConfigId, gna2AccelerationMode));
-}
uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode) {
std::unique_lock lockGnaCalls{ acrossPluginsSync };
uint32_t reqId{};
- if (gna2AccelerationMode == Gna2AccelerationModeHardware &&
+ if ((gna2AccelerationMode == Gna2AccelerationModeHardware ||
+ gna2AccelerationMode == Gna2AccelerationModeHardwareWithSoftwareFallback) &&
detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation) {
gnawarn() << "GNA Device not detected, consider using other mode of acceleration";
}
@@ -541,6 +539,8 @@ void GNADeviceHelper::updateGnaPerfCounters() {
#if GNA_LIB_VER == 2
instrumentationTotal[0] = instrumentationResults[0];
instrumentationTotal[1] = instrumentationResults[1];
+ instrumentationResults[0] = 0;
+ instrumentationResults[1] = 0;
#else
nGNAPerfResultsTotal.hw.stall = nGNAPerfResults.hw.stall;
nGNAPerfResultsTotal.hw.total = nGNAPerfResults.hw.total;
diff --git a/inference-engine/src/gna_plugin/gna_device.hpp b/inference-engine/src/gna_plugin/gna_device.hpp
index e032e5532da..cae32c70b1d 100644
--- a/inference-engine/src/gna_plugin/gna_device.hpp
+++ b/inference-engine/src/gna_plugin/gna_device.hpp
@@ -117,18 +117,12 @@ public:
uint8_t *alloc(uint32_t size_requested, uint32_t *size_granted);
#if GNA_LIB_VER == 1
- void propagateSync(const intel_nnet_type_t *pNeuralNetwork,
- const uint32_t *pActiveIndices,
- uint32_t nActiveIndices,
- intel_gna_proc_t nGNAProcType);
-
uint32_t propagate(const intel_nnet_type_t *pNeuralNetwork,
const uint32_t *pActiveIndices,
uint32_t nActiveIndices,
intel_gna_proc_t nGNAProcType);
#else
void setUpActiveList(unsigned req_config_id, uint32_t layerIndex, uint32_t* ptr_active_indices, uint32_t num_active_indices);
- void propagateSync(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode);
uint32_t propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode);
uint32_t createModel(Gna2Model& gnaModel) const;
void releaseModel(const uint32_t model_id);
diff --git a/inference-engine/src/gna_plugin/gna_itt.hpp b/inference-engine/src/gna_plugin/gna_itt.hpp
new file mode 100644
index 00000000000..3fa02119733
--- /dev/null
+++ b/inference-engine/src/gna_plugin/gna_itt.hpp
@@ -0,0 +1,21 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief Defines openvino domains for tracing
+ * @file gna_itt.hpp
+ */
+
+#pragma once
+
+#include
+
+namespace GNAPluginNS {
+namespace itt {
+namespace domains {
+ OV_ITT_DOMAIN(GNAPlugin);
+ OV_ITT_DOMAIN(GNA_LT);
+}
+}
+}
diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
index f49d543def1..cb227304649 100644
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -37,7 +37,7 @@
#include
#include "gna_graph_patterns.hpp"
#include "gna_tensor_tools.hpp"
-#include
+#include "gna_itt.hpp"
#include
#include
@@ -391,6 +391,7 @@ GNAPlugin::GNAPlugin(const std::map& configMap) {
}
void GNAPlugin::Init() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "Init");
dnn = std::make_shared(backend::AMIntelDNN());
inputsDesc = std::make_shared(GNAPluginNS::InputDesc());
gnaFlags = std::make_shared(GNAPluginNS::GNAFlags());
@@ -401,6 +402,7 @@ void GNAPlugin::Init() {
}
void GNAPlugin::InitGNADevice() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNADevice");
#if GNA_LIB_VER == 1
gnadevice = std::make_shared(gnaFlags->gna_lib_async_threads_num,
gnaFlags->gna_openmp_multithreading,
@@ -419,6 +421,7 @@ void GNAPlugin::InitGNADevice() {
}
void GNAPlugin::UpdateGnaQuantModeFromNetwork(InferenceEngine::CNNNetwork & network) {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UpdateGnaQuantModeFromNetwork");
// fp32 emulation mode dont need any modifications to configuration
if (config.gnaFlags.sw_fp32) return;
@@ -454,6 +457,7 @@ void GNAPlugin::UpdateGnaQuantModeFromNetwork(InferenceEngine::CNNNetwork & netw
}
void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & network) {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UpdateInputScaleFromNetwork");
// fp32 emulation mode dont need any modifications to configuration
if (config.gnaFlags.sw_fp32) return;
@@ -561,6 +565,7 @@ bool GNAPlugin::TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer)
}
void GNAPlugin::FillInputsAndOutputsTranspositionInfo(const InferenceEngine::CNNNetwork& net) {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FillInputsAndOutputsTranspositionInfo");
auto printTranspositionInfo = [](const std::vector &transpositionInfo) {
for (const auto &transpositionInfoPart : transpositionInfo) {
gnalog() << "transpose=" << transpositionInfoPart.transpose << " rows_num=" << transpositionInfoPart.num_transpose_rows
@@ -663,6 +668,7 @@ void GNAPlugin::AddDebugProperties(const InferenceEngine::CNNLayerPtr layer,
#endif
void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
+ OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "LoadNetwork");
std::shared_ptr convertedNetwork;
if (_network.getFunction()) {
CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network);
diff --git a/inference-engine/src/gna_plugin/gna_plugin_config.cpp b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
index 2dcb05d6ab8..766e7d2d52c 100644
--- a/inference-engine/src/gna_plugin/gna_plugin_config.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
@@ -23,6 +23,7 @@ static const caseless_unordered_map supported_values = {
{GNAConfigParams::GNA_SW_EXACT, GNA_SOFTWARE & GNA_HARDWARE}
};
static const std::vector supported_values_on_gna2 = {
+ GNAConfigParams::GNA_HW_WITH_SW_FBACK,
GNAConfigParams::GNA_GEN,
GNAConfigParams::GNA_GEN_EXACT,
GNAConfigParams::GNA_SSE,
@@ -34,18 +35,19 @@ static const std::vector supported_values_on_gna2 = {
};
#else
static const caseless_unordered_map > supported_values = {
- {GNAConfigParams::GNA_AUTO, {Gna2AccelerationModeAuto, false}},
- {GNAConfigParams::GNA_HW, {Gna2AccelerationModeHardware, false}},
- {GNAConfigParams::GNA_SW, {Gna2AccelerationModeSoftware, false}},
- {GNAConfigParams::GNA_SW_EXACT, {Gna2AccelerationModeSoftware, true}},
- {GNAConfigParams::GNA_GEN, {Gna2AccelerationModeGeneric, false}},
- {GNAConfigParams::GNA_GEN_EXACT, {Gna2AccelerationModeGeneric, true}},
- {GNAConfigParams::GNA_SSE, {Gna2AccelerationModeSse4x2, false}},
- {GNAConfigParams::GNA_SSE_EXACT, {Gna2AccelerationModeSse4x2, true}},
- {GNAConfigParams::GNA_AVX1, {Gna2AccelerationModeAvx1, false}},
- {GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1, true}},
- {GNAConfigParams::GNA_AVX2, {Gna2AccelerationModeAvx2, false}},
- {GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2, true}},
+ {GNAConfigParams::GNA_AUTO, {Gna2AccelerationModeAuto, false}},
+ {GNAConfigParams::GNA_HW, {Gna2AccelerationModeHardware, false}},
+ {GNAConfigParams::GNA_HW_WITH_SW_FBACK, {Gna2AccelerationModeHardwareWithSoftwareFallback, false}},
+ {GNAConfigParams::GNA_SW, {Gna2AccelerationModeSoftware, false}},
+ {GNAConfigParams::GNA_SW_EXACT, {Gna2AccelerationModeSoftware, true}},
+ {GNAConfigParams::GNA_GEN, {Gna2AccelerationModeGeneric, false}},
+ {GNAConfigParams::GNA_GEN_EXACT, {Gna2AccelerationModeGeneric, true}},
+ {GNAConfigParams::GNA_SSE, {Gna2AccelerationModeSse4x2, false}},
+ {GNAConfigParams::GNA_SSE_EXACT, {Gna2AccelerationModeSse4x2, true}},
+ {GNAConfigParams::GNA_AVX1, {Gna2AccelerationModeAvx1, false}},
+ {GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1, true}},
+ {GNAConfigParams::GNA_AVX2, {Gna2AccelerationModeAvx2, false}},
+ {GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2, true}},
};
#endif
diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
index ef333e7e46f..5355a7b28f1 100644
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -41,6 +41,7 @@
#include "gna_graph_patterns.hpp"
#include "gna_data_types.hpp"
#include "gna_tensor_tools.hpp"
+#include "gna_itt.hpp"
using namespace InferenceEngine;
using namespace InferenceEngine::details;
@@ -112,6 +113,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
*/
static CNNLayerPtr InsertCopyLayer(CNNLayerPtr prevLayer, CNNLayerPtr nextLayer, int beforeIdx,
std::shared_ptr passmanager, std::string copyLayerType) {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertCopyLayer");
auto quantized = InferenceEngine::getInjectedData(prevLayer);
std::string copyName = copyLayerType + std::string("_") + std::to_string(passmanager->getIntVar(copyLayersCounter)++);
gnalog() << "Inserted " << copyName << " between: " << prevLayer->name << " and " << nextLayer->name << std::endl;
@@ -257,6 +259,7 @@ static std::vector getCandidatesForIdentityInsertion(const CNNLayer
}
void InsertDiagonalLayerPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertDiagonalLayerPass");
bool lowPrecision = getPassManager()->isLowPrecision();
for (auto & l : *pLayers) {
@@ -304,6 +307,7 @@ void InsertDiagonalLayerPass::run() {
}
void HandleMultipleActivationsForTheLayerPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "HandleMultipleActivationsForTheLayerPass");
// found layer followed by multiple activations
for (auto & l : *pLayers) {
CNNLayerSet activations;
@@ -333,6 +337,7 @@ void HandleMultipleActivationsForTheLayerPass::run() {
}
void ForbidActivationFusingPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ForbidActivationFusingPass");
for (auto& l : *pLayers) {
if (LayerInfo(l).isActivation()) {
auto prevLayer = CNNNetPrevLayer(l);
@@ -370,6 +375,7 @@ namespace {
} // namespace
void ReorderMaxPoolPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReorderMaxPoolPass");
// detecting following pattern
// conv->activation->maxpooling
// changing it to conv->maxpooling->activation
@@ -398,6 +404,7 @@ void ReorderMaxPoolPass::run() {
}
void SubstituteSoftSignPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "SubstituteSoftSignPass");
//detecting following pattern
// irv7 model: irv10 model:
// a layer a layer
@@ -501,6 +508,7 @@ void SubstituteSoftSignPass::run() {
}
}
void SubstitutePReluPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "SubstitutePReluPass");
auto getScale = [](CNNLayer* layer) {
auto powerCandidate = LayerInfo(layer);
if (!powerCandidate.isPower()) return 0.0f;
@@ -606,6 +614,7 @@ void SubstitutePReluPass::run() {
}
void ReversePermutationsPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReversePermutationsPass");
std::function)> prevLayerSkipCertain
= [&prevLayerSkipCertain](CNNLayerPtr layer, std::function shouldSkip) -> CNNLayerPtr {
if (CNNNetHasPrevLayer(layer.get())) {
@@ -698,6 +707,7 @@ void ReversePermutationsPass::run() {
}
void RemovePermutationsNHWCToNCHWPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "RemovePermutationsNHWCToNCHWPass");
std::set permutations_to_remove;
std::list> nhwc_layout_patterns;
for (auto& l : *pLayers) {
@@ -781,6 +791,7 @@ void RemovePermutationsNHWCToNCHWPass::run() {
}
void InsertIdentityLayerPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertIdentityLayerPass");
auto quantized = InferenceEngine::getInjectedData(pLayers->front());
auto createIdentityLayer = [quantized, this](const TensorDesc& tensorDesc) {
int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
@@ -898,6 +909,7 @@ void InsertIdentityLayerPass::run() {
}
void InsertCopyLayerPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertCopyLayerPass");
// Copy layer insertion happens in few cases:
// Crop output goes to concat layer -> copy layer insertion
// Splitted part of input goes to concat layer -> copy layer insertion
@@ -1020,6 +1032,7 @@ void InsertCopyLayerPass::run() {
}
void FlattenTrivialConcatPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FlattenTrivialConcatPass");
// change all trivial concatenations (concatenation where output buffer is a buffer made by appending input buffers)
// by reshaping its inputs to 1 x total_input_size and its output to 1 x total_cocat_size and chaning the axis to 1
// for example if 4D concat have unaligned inputs then ConcatAlignFilters need to be used if sizes before
@@ -1103,6 +1116,7 @@ void FlattenTrivialConcatPass::run() {
}
void InsertConcatAligningFilterPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertConcatAligningFilterPass");
auto quantized = InferenceEngine::getInjectedData(pLayers->front());
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED) {
@@ -1221,6 +1235,7 @@ void InsertConcatAligningFilterPass::run() {
}
void ReorderConcatInputsPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReorderConcatInputsPass");
auto quantized = InferenceEngine::getInjectedData(pLayers->front());
// aligning specific not required in fp32 mode
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED_FOR_FP32 && !quantized) {
@@ -1318,6 +1333,7 @@ void ReorderConcatInputsPass::run() {
}
void InsertSplitAligningFilterPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertSplitAligningFilterPass");
// currently split layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this is not necessary but is useful for testing
const int bytesPerSplitElement = 2;
auto quantized = InferenceEngine::getInjectedData(pLayers->front());
@@ -1437,6 +1453,7 @@ static InferenceEngine::Blob::Ptr tileBlob(Blob::Ptr& blob, size_t TileTo) {
}
void EltwiseSplitOverChannelsPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "EltwiseSplitOverChannelsPass");
if (getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo == Policy::GNAAffineDiagonal::UNLIMIT) {
return;
}
@@ -1552,6 +1569,7 @@ void EltwiseSplitOverChannelsPass::run() {
}
void SubstituteScaleShiftBroadCastPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "SubstituteScaleShiftBroadCastPass");
std::map reshaped_data;
auto quantized = InferenceEngine::getInjectedData(pLayers->front());
@@ -1633,6 +1651,7 @@ void SubstituteScaleShiftBroadCastPass::run() {
}
void BroadcastConstPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "BroadcastConstPass");
for (auto constLayer : *pLayers) {
if (!LayerInfo(constLayer).isConst()) {
continue;
@@ -1685,6 +1704,7 @@ void BroadcastConstPass::run() {
}
void InsertIdentityToLSTMCellPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertIdentityToLSTMCellPass");
for (auto layer : *pLayers) {
if (layer->type == "LSTMCell") {
// This fixed the cases when both functional and non-functional outputs are mixed (or not outputs are used)
@@ -1722,6 +1742,7 @@ void InsertIdentityToLSTMCellPass::run() {
}
void BreakFusingOfOutputLayersPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "BreakFusingOfOutputLayersPass");
#if GNA_LIB_VER == 1
return;
#endif
@@ -1765,6 +1786,7 @@ void BreakFusingOfOutputLayersPass::run() {
}
void UnrollLSTMCellPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UnrollLSTMCellPass");
InferenceEngine::NetPass::UnrollRNN_if(getPassManager()->getNetwork(), [] (const RNNCellBase& rnn) -> bool {
if (rnn.clip != 0.0f)
return true;
@@ -1781,6 +1803,7 @@ void UnrollLSTMCellPass::run() {
}
void UnrollTIPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UnrollTIPass");
auto sts = InferenceEngine::NetPass::UnrollTI(getPassManager()->getNetwork());
if (!sts) {
THROW_GNA_EXCEPTION << "TensorIterator layer cannot be unrolled!";
@@ -1788,6 +1811,7 @@ void UnrollTIPass::run() {
}
void RemoveConstPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "RemoveConstPass");
auto network = getPassManager()->getNetwork();
IE_SUPPRESS_DEPRECATED_START
auto & icnnnet = static_cast(network);
@@ -1801,6 +1825,7 @@ void RemoveConstPass::run() {
}
void RemoveSingleInputConcatPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "RemoveSingleInputConcatPass");
for (auto &l : *pLayers) {
if (l->type == "Concat") {
auto concat = dynamic_cast(l.get());
@@ -1828,6 +1853,7 @@ void RemoveSingleInputConcatPass::run() {
}
void FuseMultipleIdentitiesPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FuseMultipleIdentitiesPass");
for (auto &l : *pLayers) {
if (l->insData.empty()) continue;
@@ -1909,6 +1935,7 @@ void FuseMultipleIdentitiesPass::run() {
}
void FuseFQIntoWeightsPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FuseFQIntoWeightsPass");
auto isNonFunctional = [](CNNLayerPtr ptr) {
return LayerInfo(ptr).isNonFunctional();
};
@@ -2067,6 +2094,7 @@ void FuseFQIntoWeightsPass::run() {
}
void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "MoveFakeQuantizeLayerIntoQuantParamsPass");
auto quantized = InferenceEngine::getInjectedData(pLayers->front());
if (!quantized) {
return;
@@ -2268,6 +2296,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
}
void TransposeWeightsFromNCHWToNHWCPass::run() {
+ OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "TransposeWeightsFromNCHWToNHWCPass");
if (!MustBeConvertedFromNCHWToNHWC(*pLayers)) return;
auto printTranspositionInfo = [](const std::vector &transpositionInfo) {
diff --git a/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.cpp b/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.cpp
index da7e6279624..e49d95ac2f2 100644
--- a/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.cpp
+++ b/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.cpp
@@ -1,6 +1,7 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
+#include
#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
@@ -107,6 +108,7 @@ static bool Convert(std::shared_ptr matmul_node,
}
ConvertMatmulToPointWiseConvolution::ConvertMatmulToPointWiseConvolution() {
+ MATCHER_SCOPE(ConvertMatmulToPointWiseConvolution);
auto const_input = ngraph::pattern::wrap_type();
auto const_fq = ngraph::pattern::wrap_type({const_input,
ngraph::pattern::wrap_type(),
@@ -121,11 +123,12 @@ ConvertMatmulToPointWiseConvolution::ConvertMatmulToPointWiseConvolution() {
return Convert(pattern_map.at(matmul).get_node_shared_ptr(), nullptr, nullptr, nullptr);
};
- auto m = std::make_shared(matmul, "ConvertMatmulToPointWiseConvolution");
+ auto m = std::make_shared(matmul, matcher_name);
this->register_matcher(m, callback);
}
ConvertMatmulWithBiasToPointWiseConvolution::ConvertMatmulWithBiasToPointWiseConvolution() {
+ MATCHER_SCOPE(ConvertMatmulWithBiasToPointWiseConvolution);
auto const_input = ngraph::pattern::wrap_type();
auto const_fq = ngraph::pattern::wrap_type({const_input,
ngraph::pattern::wrap_type(),
@@ -143,11 +146,12 @@ ConvertMatmulWithBiasToPointWiseConvolution::ConvertMatmulWithBiasToPointWiseCon
pattern_map.at(bias).get_node_shared_ptr(), nullptr);
};
- auto m = std::make_shared(add, "ConvertMatmulWithBiasToPointWiseConvolution");
+ auto m = std::make_shared(add, matcher_name);
this->register_matcher(m, callback);
}
ConvertMatmulWithFqToPointWiseConvolution::ConvertMatmulWithFqToPointWiseConvolution() {
+ MATCHER_SCOPE(ConvertMatmulWithFqToPointWiseConvolution);
auto const_input = ngraph::pattern::wrap_type();
auto const_fq = ngraph::pattern::wrap_type({const_input,
ngraph::pattern::wrap_type(),
@@ -175,6 +179,6 @@ ConvertMatmulWithFqToPointWiseConvolution::ConvertMatmulWithFqToPointWiseConvolu
pattern_map.at(out_fq).get_node_shared_ptr());
};
- auto m = std::make_shared(out_fq, "ConvertMatmulWithFqToPointWiseConvolution");
+ auto m = std::make_shared(out_fq, matcher_name);
this->register_matcher(m, callback);
}
\ No newline at end of file
diff --git a/inference-engine/src/gna_plugin/transformations/insert_transpose_after_convolution_or_pooling.cpp b/inference-engine/src/gna_plugin/transformations/insert_transpose_after_convolution_or_pooling.cpp
index 6bfef2587ae..4954529762d 100644
--- a/inference-engine/src/gna_plugin/transformations/insert_transpose_after_convolution_or_pooling.cpp
+++ b/inference-engine/src/gna_plugin/transformations/insert_transpose_after_convolution_or_pooling.cpp
@@ -1,6 +1,7 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
+#include
#include "transformations/insert_transpose_after_convolution_or_pooling.hpp"
@@ -16,6 +17,7 @@ using namespace GNAPluginNS;
NGRAPH_RTTI_DEFINITION(InsertTransposeAfterConvOrPool, "InsertTransposeAfterConvOrPool", 0);
bool InsertTransposeAfterConvOrPool::run_on_function(std::shared_ptr f) {
+ RUN_ON_FUNCTION_SCOPE(InsertTransposeAfterConvOrPool);
bool is_graph_modfied = false;
for (auto& node : f->get_ordered_ops()) {
if (std::dynamic_pointer_cast(node) == nullptr &&
diff --git a/inference-engine/src/gna_plugin/transformations/insert_transpose_before_matmul.cpp b/inference-engine/src/gna_plugin/transformations/insert_transpose_before_matmul.cpp
index 4de8966d351..3e5c579af8f 100644
--- a/inference-engine/src/gna_plugin/transformations/insert_transpose_before_matmul.cpp
+++ b/inference-engine/src/gna_plugin/transformations/insert_transpose_before_matmul.cpp
@@ -2,6 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
+#include
+
#include "transformations/insert_transpose_before_matmul.hpp"
#include
@@ -13,6 +15,7 @@ using namespace GNAPluginNS;
NGRAPH_RTTI_DEFINITION(InsertTransposeBeforeMatmul, "InsertTransposeBeforeMatmul", 0);
InsertTransposeBeforeMatmul::InsertTransposeBeforeMatmul() {
+ MATCHER_SCOPE(InsertTransposeBeforeMatmul);
auto reshape = ngraph::pattern::wrap_type({ngraph::pattern::any_input(),
ngraph::pattern::any_input()},
ngraph::pattern::rank_equals(2));
@@ -59,6 +62,6 @@ InsertTransposeBeforeMatmul::InsertTransposeBeforeMatmul() {
return true;
};
- auto m = std::make_shared(root, "InsertTransposeBeforeMatmul");
+ auto m = std::make_shared(root, matcher_name);
this->register_matcher(m, callback);
}
diff --git a/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp b/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp
index 1a7d6da2a33..e1cfdefa311 100644
--- a/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp
+++ b/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp
@@ -2,6 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
+#include
+
#include "transformations/remove_extra_reshapes.hpp"
#include
@@ -12,6 +14,7 @@ using namespace GNAPluginNS;
NGRAPH_RTTI_DEFINITION(RemoveExtraReshapes, "RemoveExtraReshapes", 0);
RemoveExtraReshapes::RemoveExtraReshapes() {
+ MATCHER_SCOPE(RemoveExtraReshapes);
const auto reshape = ngraph::pattern::wrap_type();
const auto pooling = ngraph::pattern::wrap_type({reshape});
@@ -26,6 +29,6 @@ RemoveExtraReshapes::RemoveExtraReshapes() {
return true;
};
- auto m = std::make_shared(pooling, "RemoveExtraReshapes");
+ auto m = std::make_shared(pooling, matcher_name);
this->register_matcher(m, callback);
}
diff --git a/inference-engine/src/gna_plugin/transformations/reorder_activation_and_pooling.cpp b/inference-engine/src/gna_plugin/transformations/reorder_activation_and_pooling.cpp
index 69bab295ba7..7e67d900e38 100644
--- a/inference-engine/src/gna_plugin/transformations/reorder_activation_and_pooling.cpp
+++ b/inference-engine/src/gna_plugin/transformations/reorder_activation_and_pooling.cpp
@@ -2,6 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
+#include
+
#include "transformations/reorder_activation_and_pooling.hpp"
#include
@@ -15,6 +17,7 @@ using namespace GNAPluginNS;
NGRAPH_RTTI_DEFINITION(ReorderActivationAndPooling, "ReorderActivationAndPooling", 0);
ReorderActivationAndPooling::ReorderActivationAndPooling() {
+ MATCHER_SCOPE(ReorderActivationAndPooling);
auto conv = ngraph::pattern::wrap_type({ngraph::pattern::any_input(),
ngraph::pattern::any_input()});
auto add = ngraph::pattern::wrap_type({conv, ngraph::pattern::any_input()});
@@ -63,6 +66,6 @@ ReorderActivationAndPooling::ReorderActivationAndPooling() {
return true;
};
- auto m = std::make_shared(pool, "ReorderActivationAndPooling");
+ auto m = std::make_shared(pool, matcher_name);
this->register_matcher(m, callback);
}
diff --git a/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp
index a9d79c831ab..2e750308e5f 100644
--- a/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp
+++ b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp
@@ -1,6 +1,7 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
+#include
#include "transformations/split_convolution_with_large_buffer_size.hpp"
@@ -77,6 +78,7 @@ static bool Convert(std::shared_ptr conv,
}
SplitConvolution::SplitConvolution() {
+ MATCHER_SCOPE(SplitConvolution);
auto conv = ngraph::pattern::wrap_type({ngraph::pattern::any_input(),
ngraph::pattern::any_input()});
@@ -85,11 +87,12 @@ SplitConvolution::SplitConvolution() {
return Convert(pattern_map.at(conv).get_node_shared_ptr(), nullptr, nullptr, nullptr);
};
- auto m = std::make_shared(conv, "SplitConvolution");
+ auto m = std::make_shared(conv, matcher_name);
this->register_matcher(m, callback);
}
SplitConvolutionWithBias::SplitConvolutionWithBias() {
+ MATCHER_SCOPE(SplitConvolutionWithBias);
auto conv = ngraph::pattern::wrap_type({ngraph::pattern::any_input(),
ngraph::pattern::any_input()});
auto bias = ngraph::pattern::wrap_type();
@@ -101,11 +104,12 @@ SplitConvolutionWithBias::SplitConvolutionWithBias() {
pattern_map.at(bias).get_node_shared_ptr(), nullptr);
};
- auto m = std::make_shared(add, "SplitConvolutionWithBias");
+ auto m = std::make_shared(add, matcher_name);
this->register_matcher(m, callback);
}
SplitConvolutionWithFq::SplitConvolutionWithFq() {
+ MATCHER_SCOPE(SplitConvolutionWithFq);
auto conv = ngraph::pattern::wrap_type({ngraph::pattern::any_input(),
ngraph::pattern::any_input()});
auto bias = ngraph::pattern::wrap_type();
@@ -126,6 +130,6 @@ SplitConvolutionWithFq::SplitConvolutionWithFq() {
return Convert(pattern_map.at(conv).get_node_shared_ptr(), add_node, bias_node, pattern_map.at(out_fq).get_node_shared_ptr());
};
- auto m = std::make_shared(out_fq, "SplitConvolutionWithFq");
+ auto m = std::make_shared(out_fq, matcher_name);
this->register_matcher(m, callback);
}
\ No newline at end of file
diff --git a/inference-engine/src/gna_plugin/transformations/swap_input_matmul_gna.cpp b/inference-engine/src/gna_plugin/transformations/swap_input_matmul_gna.cpp
index 9a725c33cf7..d177b83ba40 100644
--- a/inference-engine/src/gna_plugin/transformations/swap_input_matmul_gna.cpp
+++ b/inference-engine/src/gna_plugin/transformations/swap_input_matmul_gna.cpp
@@ -2,6 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
+#include
+
#include
#include
@@ -19,6 +21,7 @@ using namespace GNAPluginNS;
NGRAPH_RTTI_DEFINITION(SwapInputMatMul, "SwapInputMatMul", 0);
SwapInputMatMul::SwapInputMatMul() {
+ MATCHER_SCOPE(SwapInputMatMul);
auto matmul = ngraph::pattern::wrap_type({ngraph::pattern::any_input(
ngraph::pattern::has_static_shape()), ngraph::pattern::any_input(ngraph::pattern::has_static_shape())},
ngraph::pattern::has_static_shape());
@@ -95,6 +98,6 @@ SwapInputMatMul::SwapInputMatMul() {
return true;
};
- auto m = std::make_shared(matmul, "SwapInputMatMul");
+ auto m = std::make_shared(matmul, matcher_name);
this->register_matcher(m, callback);
}
\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp b/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp
index 6de781d11e6..a4afee5a28b 100644
--- a/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp
+++ b/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp
@@ -25,47 +25,15 @@ ExecutableNetwork::ExecutableNetwork(const details::SharedObjectLoader& so,
IE_SUPPRESS_DEPRECATED_START
-ExecutableNetwork::ExecutableNetwork(IExecutableNetwork::Ptr exec,
- std::shared_ptr splg)
- : _so(), _impl(), actual(exec) {
- if (splg) {
- _so = *splg;
- }
-
- // plg can be null, but not the actual
- if (actual == nullptr)
- IE_THROW(NotAllocated) << "ExecutableNetwork was not initialized.";
-}
-
ConstOutputsDataMap ExecutableNetwork::GetOutputsInfo() const {
- if (actual) {
- ConstOutputsDataMap data;
- CALL_STATUS_FNC(GetOutputsInfo, data);
- return data;
- }
-
EXEC_NET_CALL_STATEMENT(return _impl->GetOutputsInfo());
}
ConstInputsDataMap ExecutableNetwork::GetInputsInfo() const {
- if (actual) {
- ConstInputsDataMap info;
- CALL_STATUS_FNC(GetInputsInfo, info);
- return info;
- }
-
EXEC_NET_CALL_STATEMENT(return _impl->GetInputsInfo());
}
void ExecutableNetwork::reset(IExecutableNetwork::Ptr newActual) {
- if (actual) {
- if (newActual == nullptr) {
- THROW_IE_EXCEPTION << "ExecutableNetwork wrapper used for reset was not initialized.";
- }
- this->actual.swap(newActual);
- return;
- }
-
if (_impl == nullptr) IE_THROW() << "ExecutableNetwork was not initialized.";
if (newActual == nullptr) IE_THROW() << "ExecutableNetwork wrapper used for reset was not initialized.";
auto newBase = std::dynamic_pointer_cast(newActual);
@@ -76,36 +44,10 @@ void ExecutableNetwork::reset(IExecutableNetwork::Ptr newActual) {
}
ExecutableNetwork::operator IExecutableNetwork::Ptr() {
- if (actual) {
- return actual;
- }
-
return std::make_shared(_impl);
}
std::vector ExecutableNetwork::QueryState() {
- if (actual) {
- if (actual == nullptr) THROW_IE_EXCEPTION << "ExecutableNetwork was not initialized.";
- IVariableState::Ptr pState = nullptr;
- auto res = OK;
- std::vector controller;
- for (size_t idx = 0; res == OK; ++idx) {
- ResponseDesc resp;
- IE_SUPPRESS_DEPRECATED_START
- res = actual->QueryState(pState, idx, &resp);
- IE_SUPPRESS_DEPRECATED_END
- if (res != OK && res != OUT_OF_BOUNDS) {
- THROW_IE_EXCEPTION << resp.msg;
- }
- if (res != OUT_OF_BOUNDS) {
- controller.push_back(VariableState(pState,
- std::make_shared(_so)));
- }
- }
-
- return controller;
- }
-
std::vector controller;
EXEC_NET_CALL_STATEMENT(
for (auto&& state : _impl->QueryState()) {
@@ -115,13 +57,6 @@ std::vector ExecutableNetwork::QueryState() {
}
InferRequest ExecutableNetwork::CreateInferRequest() {
- if (actual) {
- IInferRequest::Ptr req;
- CALL_STATUS_FNC(CreateInferRequest, req);
- if (req.get() == nullptr) THROW_IE_EXCEPTION << "Internal error: pointer to infer request is null";
- return InferRequest(req, std::make_shared(_so));
- }
-
EXEC_NET_CALL_STATEMENT(return {_so, _impl->CreateInferRequest()});
}
@@ -130,72 +65,38 @@ InferRequest::Ptr ExecutableNetwork::CreateInferRequestPtr() {
}
void ExecutableNetwork::Export(const std::string& modelFileName) {
- if (actual) {
- CALL_STATUS_FNC(Export, modelFileName);
- return;
- }
EXEC_NET_CALL_STATEMENT(_impl->Export(modelFileName));
}
void ExecutableNetwork::Export(std::ostream& networkModel) {
- if (actual) {
- CALL_STATUS_FNC(Export, networkModel);
- return;
- }
EXEC_NET_CALL_STATEMENT(_impl->Export(networkModel));
}
CNNNetwork ExecutableNetwork::GetExecGraphInfo() {
- if (actual) {
- IE_SUPPRESS_DEPRECATED_START
- ICNNNetwork::Ptr ptr = nullptr;
- CALL_STATUS_FNC(GetExecGraphInfo, ptr);
- return CNNNetwork(ptr);
- IE_SUPPRESS_DEPRECATED_END
- }
EXEC_NET_CALL_STATEMENT(return _impl->GetExecGraphInfo());
}
void ExecutableNetwork::SetConfig(const std::map& config) {
- if (actual) {
- CALL_STATUS_FNC(SetConfig, config);
- return;
- }
EXEC_NET_CALL_STATEMENT(_impl->SetConfig(config));
}
Parameter ExecutableNetwork::GetConfig(const std::string& name) const {
- if (actual) {
- Parameter configValue;
- CALL_STATUS_FNC(GetConfig, name, configValue);
- return configValue;
- }
EXEC_NET_CALL_STATEMENT(return _impl->GetConfig(name));
}
Parameter ExecutableNetwork::GetMetric(const std::string& name) const {
- if (actual) {
- Parameter metricValue;
- CALL_STATUS_FNC(GetMetric, name, metricValue);
- return metricValue;
- }
EXEC_NET_CALL_STATEMENT(return _impl->GetMetric(name));
}
RemoteContext::Ptr ExecutableNetwork::GetContext() const {
- if (actual) {
- RemoteContext::Ptr pContext;
- CALL_STATUS_FNC(GetContext, pContext);
- return pContext;
- }
EXEC_NET_CALL_STATEMENT(return _impl->GetContext());
}
bool ExecutableNetwork::operator!() const noexcept {
- return !_impl || !actual;
+ return !_impl;
}
ExecutableNetwork::operator bool() const noexcept {
- return !!_impl || !!actual;
+ return !!_impl;
}
} // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/cpp/ie_executable_network_base.hpp b/inference-engine/src/inference_engine/cpp/ie_executable_network_base.hpp
index 2f813c0b783..c87b1fc7098 100644
--- a/inference-engine/src/inference_engine/cpp/ie_executable_network_base.hpp
+++ b/inference-engine/src/inference_engine/cpp/ie_executable_network_base.hpp
@@ -18,7 +18,6 @@
#include
#include
#include "cpp/exception2status.hpp"
-#include "ie_variable_state_base.hpp"
#include "ie_infer_async_request_base.hpp"
namespace InferenceEngine {
@@ -64,29 +63,10 @@ public:
TO_STATUS(_impl->Export(networkModel));
}
- IE_SUPPRESS_DEPRECATED_START
StatusCode GetExecGraphInfo(ICNNNetwork::Ptr& graphPtr, ResponseDesc* resp) noexcept override {
- // should be refactored together with ExecutableNetwork interface
TO_STATUS(graphPtr = _impl->GetExecGraphInfo());
}
- INFERENCE_ENGINE_DEPRECATED("Use InferRequest::QueryState instead")
- StatusCode QueryState(IVariableState::Ptr& pState, size_t idx, ResponseDesc* resp) noexcept override {
- try {
- auto v = _impl->QueryState();
- if (idx >= v.size()) {
- return OUT_OF_BOUNDS;
- }
- pState = std::make_shared(v[idx]);
- return OK;
- } catch (const std::exception& ex) {
- return InferenceEngine::DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();
- } catch (...) {
- return InferenceEngine::DescriptionBuffer(UNEXPECTED);
- }
- }
- IE_SUPPRESS_DEPRECATED_END
-
StatusCode SetConfig(const std::map& config, ResponseDesc* resp) noexcept override {
TO_STATUS(_impl->SetConfig(config));
}
diff --git a/inference-engine/src/inference_engine/cpp/ie_infer_async_request_base.hpp b/inference-engine/src/inference_engine/cpp/ie_infer_async_request_base.hpp
index 1253947eeaf..6ede78f720e 100644
--- a/inference-engine/src/inference_engine/cpp/ie_infer_async_request_base.hpp
+++ b/inference-engine/src/inference_engine/cpp/ie_infer_async_request_base.hpp
@@ -10,10 +10,10 @@
#include "cpp/exception2status.hpp"
#include "cpp_interfaces/plugin_itt.hpp"
-#include "ie_variable_state_base.hpp"
#include
#include "ie_iinfer_request.hpp"
#include "ie_preprocess.hpp"
+
namespace InferenceEngine {
#define CATCH_IE_EXCEPTION_TO_STATUS_NO_RESP(StatusCode, ExceptionType) catch (const ExceptionType& ex) { \
@@ -169,23 +169,6 @@ public:
StatusCode SetBatch(int batch_size, ResponseDesc* resp) noexcept override {
TO_STATUS(_impl->SetBatch(batch_size));
}
-
- IE_SUPPRESS_DEPRECATED_START
- StatusCode QueryState(IVariableState::Ptr& pState, size_t idx, ResponseDesc* resp) noexcept override {
- try {
- auto v = _impl->QueryState();
- if (idx >= v.size()) {
- return OUT_OF_BOUNDS;
- }
- pState = std::make_shared(v[idx]);
- return OK;
- } catch (const std::exception& ex) {
- return InferenceEngine::DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();
- } catch (...) {
- return InferenceEngine::DescriptionBuffer(UNEXPECTED);
- }
- }
- IE_SUPPRESS_DEPRECATED_END
};
IE_SUPPRESS_DEPRECATED_END
diff --git a/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp b/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp
index 97fba9af7f9..9e68666b7a3 100644
--- a/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp
+++ b/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp
@@ -23,44 +23,17 @@ namespace InferenceEngine {
InferRequest::InferRequest(const details::SharedObjectLoader& so,
const IInferRequestInternal::Ptr& impl)
- : _so(so), _impl(impl), actual() {
+ : _so(so), _impl(impl) {
IE_ASSERT(_impl != nullptr);
}
IE_SUPPRESS_DEPRECATED_START
-InferRequest::InferRequest(IInferRequest::Ptr request,
- std::shared_ptr splg)
- : _so(), _impl(), actual(request) {
- if (splg) {
- _so = *splg;
- }
-
- // plg can be null, but not the actual
- if (actual == nullptr)
- IE_THROW(NotAllocated) << "InferRequest was not initialized.";
-}
-
void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
- if (actual) {
- CALL_STATUS_FNC(SetBlob, name.c_str(), data);
- return;
- }
INFER_REQ_CALL_STATEMENT(_impl->SetBlob(name, data);)
}
Blob::Ptr InferRequest::GetBlob(const std::string& name) {
- if (actual) {
- Blob::Ptr data;
- CALL_STATUS_FNC(GetBlob, name.c_str(), data);
- std::string error = "Internal error: blob with name `" + name + "` is not allocated!";
- auto blobPtr = data.get();
- const bool remoteBlobPassed = blobPtr->is();
- if (blobPtr == nullptr) IE_THROW() << error;
- if (!remoteBlobPassed && blobPtr->buffer() == nullptr) IE_THROW() << error;
- return data;
- }
-
Blob::Ptr blobPtr;
INFER_REQ_CALL_STATEMENT(blobPtr = _impl->GetBlob(name);)
std::string error = "Internal error: blob with name `" + name + "` is not allocated!";
@@ -71,60 +44,26 @@ Blob::Ptr InferRequest::GetBlob(const std::string& name) {
}
void InferRequest::SetBlob(const std::string &name, const Blob::Ptr &data, const PreProcessInfo& info) {
- if (actual) {
- CALL_STATUS_FNC(SetBlob, name.c_str(), data, info);
- return;
- }
-
INFER_REQ_CALL_STATEMENT(_impl->SetBlob(name, data, info);)
}
const PreProcessInfo& InferRequest::GetPreProcess(const std::string& name) const {
- if (actual) {
- const PreProcessInfo* info = nullptr;
- CALL_STATUS_FNC(GetPreProcess, name.c_str(), &info);
- return *info;
- }
-
INFER_REQ_CALL_STATEMENT(return _impl->GetPreProcess(name);)
}
void InferRequest::Infer() {
- if (actual) {
- CALL_STATUS_FNC_NO_ARGS(Infer);
- return;
- }
-
INFER_REQ_CALL_STATEMENT(_impl->Infer();)
}
void InferRequest::Cancel() {
- if (actual) {
- CALL_STATUS_FNC_NO_ARGS(Cancel);
- return;
- }
-
INFER_REQ_CALL_STATEMENT(_impl->Cancel();)
}
std::map InferRequest::GetPerformanceCounts() const {
- if (actual) {
- std::map perfMap;
- CALL_STATUS_FNC(GetPerformanceCounts, perfMap);
- return perfMap;
- }
-
INFER_REQ_CALL_STATEMENT(return _impl->GetPerformanceCounts();)
}
void InferRequest::SetInput(const BlobMap& inputs) {
- if (actual) {
- for (auto&& input : inputs) {
- CALL_STATUS_FNC(SetBlob, input.first.c_str(), input.second);
- }
- return;
- }
-
INFER_REQ_CALL_STATEMENT(
for (auto&& input : inputs) {
_impl->SetBlob(input.first, input.second);
@@ -133,13 +72,6 @@ void InferRequest::SetInput(const BlobMap& inputs) {
}
void InferRequest::SetOutput(const BlobMap& results) {
- if (actual) {
- for (auto&& result : results) {
- CALL_STATUS_FNC(SetBlob, result.first.c_str(), result.second);
- }
- return;
- }
-
INFER_REQ_CALL_STATEMENT(
for (auto&& result : results) {
_impl->SetBlob(result.first, result.second);
@@ -148,106 +80,19 @@ void InferRequest::SetOutput(const BlobMap& results) {
}
void InferRequest::SetBatch(const int batch) {
- if (actual) {
- CALL_STATUS_FNC(SetBatch, batch);
- return;
- }
-
INFER_REQ_CALL_STATEMENT(_impl->SetBatch(batch);)
}
void InferRequest::StartAsync() {
- if (actual) {
- CALL_STATUS_FNC_NO_ARGS(StartAsync);
- return;
- }
-
INFER_REQ_CALL_STATEMENT(_impl->StartAsync();)
}
StatusCode InferRequest::Wait(int64_t millis_timeout) {
- if (actual) {
- ResponseDesc resp;
- if (actual == nullptr) IE_THROW() << "InferRequest was not initialized.";
- auto res = actual->Wait(millis_timeout, &resp);
- if (res != OK && res != RESULT_NOT_READY &&
- res != INFER_NOT_STARTED && res != INFER_CANCELLED) {
- IE_EXCEPTION_SWITCH(res, ExceptionType,
- InferenceEngine::details::ThrowNow{}
- <<= std::stringstream{} << IE_LOCATION << resp.msg)
- }
- return res;
- }
-
INFER_REQ_CALL_STATEMENT(return _impl->Wait(millis_timeout);)
}
-namespace details {
-
-class ICompletionCallbackWrapper {
-public:
- virtual ~ICompletionCallbackWrapper() = default;
-
- virtual void call(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) const noexcept = 0;
-};
-
-template
-class CompletionCallbackWrapper : public ICompletionCallbackWrapper {
- T lambda;
-
-public:
- explicit CompletionCallbackWrapper(const T& lambda): lambda(lambda) {}
-
- void call(InferenceEngine::IInferRequest::Ptr /*request*/, InferenceEngine::StatusCode /*code*/) const
- noexcept override {
- lambda();
- }
-};
-
-template <>
-class CompletionCallbackWrapper : public ICompletionCallbackWrapper {
- IInferRequest::CompletionCallback callBack;
-
-public:
- explicit CompletionCallbackWrapper(const IInferRequest::CompletionCallback& callBack): callBack(callBack) {}
-
- void call(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) const noexcept override {
- callBack(request, code);
- }
-};
-
-template <>
-class CompletionCallbackWrapper> : public ICompletionCallbackWrapper {
- std::function lambda;
-
-public:
- explicit CompletionCallbackWrapper(const std::function& lambda)
- : lambda(lambda) {}
-
- void call(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) const noexcept override {
- lambda(InferRequest(request), code);
- }
-};
-
-void callWrapper(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) {
- details::ICompletionCallbackWrapper* pWrapper = nullptr;
- ResponseDesc dsc;
- request->GetUserData(reinterpret_cast(&pWrapper), &dsc);
- pWrapper->call(request, code);
-}
-
-} // namespace details
-
void InferRequest::SetCompletionCallbackImpl(std::function callbackToSet) {
- if (actual) {
- using T = std::function;
- callback.reset(new details::CompletionCallbackWrapper(callbackToSet));
- CALL_STATUS_FNC(SetUserData, callback.get());
- actual->SetCompletionCallback(InferenceEngine::details::callWrapper);
- return;
- }
-
INFER_REQ_CALL_STATEMENT(
_impl->SetCallback([callbackToSet] (std::exception_ptr) {
callbackToSet();
@@ -274,14 +119,6 @@ void InferRequest::SetCompletionCallbackImpl(std::function callbackToSet
void InferRequest::SetCompletionCallbackImpl(std::function callbackToSet) {
- if (actual) {
- using T = std::function;
- callback.reset(new details::CompletionCallbackWrapper(callbackToSet));
- CALL_STATUS_FNC(SetUserData, callback.get());
- actual->SetCompletionCallback(InferenceEngine::details::callWrapper);
- return;
- }
-
INFER_REQ_CALL_STATEMENT(
auto weakThis = InferRequest{_so, std::shared_ptr{_impl.get(), [](IInferRequestInternal*){}}};
_impl->SetCallback([callbackToSet, weakThis] (std::exception_ptr exceptionPtr) {
@@ -303,14 +140,6 @@ void InferRequest::SetCompletionCallbackImpl(std::function(callbackToSet));
- CALL_STATUS_FNC(SetUserData, callback.get());
- actual->SetCompletionCallback(InferenceEngine::details::callWrapper);
- return;
- }
-
INFER_REQ_CALL_STATEMENT(
IInferRequest::Ptr weakThis = InferRequest{_so, std::shared_ptr{_impl.get(), [](IInferRequestInternal*){}}};
_impl->SetCallback([callbackToSet, weakThis] (std::exception_ptr exceptionPtr) {
@@ -332,38 +161,12 @@ void InferRequest::SetCompletionCallbackImpl(IInferRequest::CompletionCallback c
}
InferRequest::operator IInferRequest::Ptr () {
- if (actual) {
- return actual;
- }
-
INFER_REQ_CALL_STATEMENT(
return std::make_shared(_impl);
)
}
std::vector InferRequest::QueryState() {
- if (actual) {
- IE_SUPPRESS_DEPRECATED_START
- if (actual == nullptr) IE_THROW() << "ExecutableNetwork was not initialized.";
- IVariableState::Ptr pState = nullptr;
- auto res = OK;
- std::vector controller;
- for (size_t idx = 0; res == OK; ++idx) {
- ResponseDesc resp;
- res = actual->QueryState(pState, idx, &resp);
- if (res != OK && res != OUT_OF_BOUNDS) {
- IE_THROW() << resp.msg;
- }
- if (res != OUT_OF_BOUNDS) {
- controller.push_back(VariableState(pState,
- std::make_shared(_so)));
- }
- }
- IE_SUPPRESS_DEPRECATED_END
-
- return controller;
- }
-
std::vector controller;
INFER_REQ_CALL_STATEMENT(
for (auto&& state : _impl->QueryState()) {
@@ -374,11 +177,11 @@ std::vector InferRequest::QueryState() {
}
bool InferRequest::operator!() const noexcept {
- return !_impl || !actual;
+ return !_impl;
}
InferRequest::operator bool() const noexcept {
- return (!!_impl) || (!!actual);
+ return (!!_impl);
}
bool InferRequest::operator!=(const InferRequest& r) const noexcept {
@@ -386,7 +189,7 @@ bool InferRequest::operator!=(const InferRequest& r) const noexcept {
}
bool InferRequest::operator==(const InferRequest& r) const noexcept {
- return r._impl == _impl && r.actual == actual;
+ return r._impl == _impl;
}
} // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/cpp/ie_variable_state.cpp b/inference-engine/src/inference_engine/cpp/ie_variable_state.cpp
index 46f99d3fc6c..63f7305e8b2 100644
--- a/inference-engine/src/inference_engine/cpp/ie_variable_state.cpp
+++ b/inference-engine/src/inference_engine/cpp/ie_variable_state.cpp
@@ -4,7 +4,6 @@
#include "details/ie_so_loader.h"
#include "cpp/ie_memory_state.hpp"
-#include "ie_imemory_state.hpp"
#include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp"
#include "exception2status.hpp"
@@ -24,57 +23,19 @@ VariableState::VariableState(const details::SharedObjectLoader& so,
IE_SUPPRESS_DEPRECATED_START
-VariableState::VariableState(std::shared_ptr state,
- std::shared_ptr splg)
- : _so(), _impl(), actual(state) {
- if (splg) {
- _so = *splg;
- }
-
- // plg can be null, but not the actual
- if (actual == nullptr)
- IE_THROW(NotAllocated) << "VariableState was not initialized.";
-}
-
-Blob::CPtr VariableState::GetLastState() const {
- return GetState();
-}
-
void VariableState::Reset() {
- if (actual) {
- CALL_STATUS_FNC_NO_ARGS(Reset);
- return;
- }
-
VARIABLE_CALL_STATEMENT(_impl->Reset());
}
std::string VariableState::GetName() const {
- if (actual) {
- char name[256];
- CALL_STATUS_FNC(GetName, name, sizeof(name));
- return name;
- }
-
VARIABLE_CALL_STATEMENT(return _impl->GetName());
}
Blob::CPtr VariableState::GetState() const {
- if (actual) {
- Blob::CPtr stateBlob;
- CALL_STATUS_FNC(GetState, stateBlob);
- return stateBlob;
- }
-
VARIABLE_CALL_STATEMENT(return _impl->GetState());
}
void VariableState::SetState(Blob::Ptr state) {
- if (actual) {
- CALL_STATUS_FNC(SetState, state);
- return;
- }
-
VARIABLE_CALL_STATEMENT(_impl->SetState(state));
}
diff --git a/inference-engine/src/inference_engine/cpp/ie_variable_state_base.hpp b/inference-engine/src/inference_engine/cpp/ie_variable_state_base.hpp
deleted file mode 100644
index 2481ca67852..00000000000
--- a/inference-engine/src/inference_engine/cpp/ie_variable_state_base.hpp
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include
-
-#include "cpp/exception2status.hpp"
-#include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp"
-#include "ie_imemory_state.hpp"
-
-namespace InferenceEngine {
-
-IE_SUPPRESS_DEPRECATED_START
-
-/**
- * @brief Default implementation for IVariableState
- * @ingroup ie_dev_api_variable_state_api
- */
-class VariableStateBase : public IVariableState {
- std::shared_ptr impl;
-
-public:
- /**
- * @brief Constructor with actual underlying implementation.
- * @param impl Underlying implementation of type IVariableStateInternal
- */
- explicit VariableStateBase(std::shared_ptr impl): impl(impl) {
- if (impl == nullptr) {
- IE_THROW() << "VariableStateBase implementation is not defined";
- }
- }
-
- StatusCode GetName(char* name, size_t len, ResponseDesc* resp) const noexcept override {
- for (size_t i = 0; i != len; i++) {
- name[i] = 0;
- }
- DescriptionBuffer buf(name, len);
- TO_STATUS(buf << impl->GetName());
- return OK;
- }
-
- StatusCode Reset(ResponseDesc* resp) noexcept override {
- TO_STATUS(impl->Reset());
- }
-
- StatusCode SetState(Blob::Ptr newState, ResponseDesc* resp) noexcept override {
- TO_STATUS(impl->SetState(newState));
- }
-
- StatusCode GetState(Blob::CPtr& state, ResponseDesc* resp) const noexcept override {
- TO_STATUS(state = impl->GetState());
- }
-};
-
-IE_SUPPRESS_DEPRECATED_END
-
-} // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_ivariable_state_internal.cpp b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_ivariable_state_internal.cpp
index 0171292d36b..a499e816ee0 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_ivariable_state_internal.cpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_ivariable_state_internal.cpp
@@ -23,7 +23,4 @@ Blob::CPtr IVariableStateInternal::GetState() const {
return state;
}
-Blob::CPtr IVariableStateInternal::GetLastState() const {
- return GetState();
-}
} // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/ie_parameter.cpp b/inference-engine/src/inference_engine/ie_parameter.cpp
deleted file mode 100644
index 61fbf54c37d..00000000000
--- a/inference-engine/src/inference_engine/ie_parameter.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include
-#include
-
-#include
-
-namespace ngraph {
-
-template class INFERENCE_ENGINE_API_CLASS(VariantImpl);
-
-template <>
-class INFERENCE_ENGINE_API_CLASS(VariantWrapper) : public VariantImpl {
-public:
- static constexpr VariantTypeInfo type_info {"Variant::InferenceEngine::Parameter", 0};
- const VariantTypeInfo& get_type_info() const override {
- return type_info;
- }
- VariantWrapper(const value_type& value): VariantImpl(value) {} // NOLINT
-};
-
-} // namespace ngraph
-
-constexpr ngraph::VariantTypeInfo ngraph::VariantWrapper::type_info;
-
-InferenceEngine::Parameter::Parameter(const std::shared_ptr& var) {
- if (auto paramWrapper = std::dynamic_pointer_cast>(var)) {
- auto param = paramWrapper->get();
- if (!param.empty()) ptr = param.ptr->copy();
- }
-}
-
-InferenceEngine::Parameter::Parameter(std::shared_ptr& var) {
- if (auto paramWrapper = std::dynamic_pointer_cast>(var)) {
- auto param = paramWrapper->get();
- if (!param.empty()) ptr = param.ptr->copy();
- }
-}
-
-
-std::shared_ptr InferenceEngine::Parameter::asVariant() const {
- return std::make_shared>(*this);
-}
diff --git a/inference-engine/src/low_precision_transformations/src/split.cpp b/inference-engine/src/low_precision_transformations/src/split.cpp
index 486111dd737..919c6b5e87b 100644
--- a/inference-engine/src/low_precision_transformations/src/split.cpp
+++ b/inference-engine/src/low_precision_transformations/src/split.cpp
@@ -111,13 +111,13 @@ void SplitTransformation::updateOutputs(
updateOutput(context, lastNodes[0], originalNode);
} else {
const std::string originalName = originalNode->get_friendly_name();
- for (auto& lastNode : lastNodes) {
+ for (size_t outIdx = 0; outIdx < lastNodes.size(); ++outIdx) {
for (size_t i = 0; i < outputSize; ++i) {
std::shared_ptr result = context.function->get_output_op(i);
std::shared_ptr outputNode = result->get_input_node_shared_ptr(0);
- if (outputNode.get() == lastNode.get()) {
+ if (outputNode.get() == lastNodes[outIdx].get()) {
originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix);
- lastNode->set_friendly_name(originalName + "." + std::to_string(i));
+ lastNodes[outIdx]->set_friendly_name(originalName + "." + std::to_string(outIdx));
break;
}
}
diff --git a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
index ce0ae3473d9..babcc95303c 100644
--- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
@@ -157,10 +157,15 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
}
const size_t outChannelsShapeIndex = is_type(layer) ? 1ul : 0ul;
- if ( // Check if all dimensions of scale except the output channels are all ones
+ if (
+ // expected, it's ok: return true
+ (shape_size(constOutputShape) != 1ul) &&
+ // not expected, something wrong: return false
+ ((constOutputShape.size() <= outChannelsShapeIndex) ||
+ // Check if all dimensions of scale except the output channels are all ones
(shape_size(constOutputShape) != constOutputShape[outChannelsShapeIndex]) ||
((constOutputShape[outChannelsShapeIndex] != 1ul) &&
- (fqFromWeights->get_output_shape(0)[outChannelsShapeIndex] != constOutputShape[outChannelsShapeIndex]))) {
+ (fqFromWeights->get_output_shape(0)[outChannelsShapeIndex] != constOutputShape[outChannelsShapeIndex])))) {
return false;
}
} else {
diff --git a/inference-engine/src/mkldnn_plugin/utils/README.md b/inference-engine/src/mkldnn_plugin/utils/README.md
index d3b98f1cb48..bd3f1329a5e 100644
--- a/inference-engine/src/mkldnn_plugin/utils/README.md
+++ b/inference-engine/src/mkldnn_plugin/utils/README.md
@@ -6,7 +6,7 @@ Use the following cmake option to enable debug capabilities:
## Blob dumping
Blob dumping is controlled by environment variables (filters).
-The variables define conditions of the node which input, output and internal blobs
+The variables define conditions of the node which input and output blobs
should be dumped for.
> **NOTE**: Nothing is dumped by default
@@ -15,11 +15,13 @@ should be dumped for.
Environment variables can be set per execution, for example:
```sh
- OV_CPU_BLOB_DUMP_DIR=dump_dir binary ...
+ OV_CPU_BLOB_DUMP_DIR=dump_dir OV_CPU_BLOB_DUMP_FORMAT=TEXT OV_CPU_BLOB_DUMP_NODE_PORTS=OUT binary ...
```
or for shell session (bash example):
```sh
export OV_CPU_BLOB_DUMP_DIR=dump_dir
+ export OV_CPU_BLOB_DUMP_FORMAT=TEXT
+ export OV_CPU_BLOB_DUMP_NODE_PORTS=OUT
binary ...
```
### Specify dump directory
@@ -35,8 +37,22 @@ Options are:
* BIN (default)
* TEXT
+### Filter input / output blobs
+To dump only input / output blobs:
+```sh
+ OV_CPU_BLOB_DUMP_NODE_PORTS='' binary ...
+```
+Example:
+```sh
+ OV_CPU_BLOB_DUMP_NODE_PORTS=OUT binary ...
+```
+Options are:
+* IN
+* OUT
+* ALL
+
### Filter by execution ID
-To dump blobs only for node with specified execution IDs:
+To dump blobs only for nodes with specified execution IDs:
```sh
OV_CPU_BLOB_DUMP_NODE_EXEC_ID='' binary ...
```
@@ -46,19 +62,19 @@ Example:
```
### Filter by type
-To dump blobs only for node with specified type:
+To dump blobs only for nodes with specified types:
```sh
- OV_CPU_BLOB_DUMP_NODE_TYPE= binary ...
+ OV_CPU_BLOB_DUMP_NODE_TYPE= binary ...
```
Example:
```sh
- OV_CPU_BLOB_DUMP_NODE_TYPE=Convolution binary ...
+ OV_CPU_BLOB_DUMP_NODE_TYPE='Convolution Reorder' binary ...
```
> **NOTE**: see **enum Type** in [mkldnn_node.h](../mkldnn_node.h) for list of the types
### Filter by name
-To dump blobs only for node with name matching specified regex:
+To dump blobs only for nodes with name matching specified regex:
```sh
OV_CPU_BLOB_DUMP_NODE_NAME= binary ...
```
@@ -68,9 +84,17 @@ Example:
```
### Dump all the blobs
+```sh
+ OV_CPU_BLOB_DUMP_NODE_NAME="*" binary ...
+```
+ or
```sh
OV_CPU_BLOB_DUMP_NODE_NAME=".+" binary ...
```
+ or
+```sh
+ OV_CPU_BLOB_DUMP_NODE_PORTS=ALL binary ...
+```
## Graph serialization
The functionality allows to serialize execution graph using environment variable:
diff --git a/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h b/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
index be6e7a830c2..c2784f8a467 100644
--- a/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
+++ b/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
@@ -20,6 +20,7 @@ public:
readParam(blobDumpDir, "OV_CPU_BLOB_DUMP_DIR");
readParam(blobDumpFormat, "OV_CPU_BLOB_DUMP_FORMAT");
readParam(blobDumpNodeExecId, "OV_CPU_BLOB_DUMP_NODE_EXEC_ID");
+ readParam(blobDumpNodePorts, "OV_CPU_BLOB_DUMP_NODE_PORTS");
readParam(blobDumpNodeType, "OV_CPU_BLOB_DUMP_NODE_TYPE");
readParam(blobDumpNodeName, "OV_CPU_BLOB_DUMP_NODE_NAME");
readParam(execGraphPath, "OV_CPU_EXEC_GRAPH_PATH");
@@ -28,6 +29,7 @@ public:
std::string blobDumpDir;
std::string blobDumpFormat;
std::string blobDumpNodeExecId;
+ std::string blobDumpNodePorts;
std::string blobDumpNodeType;
std::string blobDumpNodeName;
std::string execGraphPath;
diff --git a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
index 9f3af44a66a..1cfbae1ab5f 100644
--- a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
+++ b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
@@ -20,7 +20,7 @@ using namespace InferenceEngine;
namespace MKLDNNPlugin {
NodeDumper::NodeDumper(const DebugCaps::Config& config, const int _count)
- : dumpFormat(DUMP_FORMAT::BIN)
+ : dumpFormat(FORMAT::BIN)
, dumpDirName("mkldnn_dump")
, count(_count) {
if (!config.blobDumpDir.empty())
@@ -32,6 +32,9 @@ NodeDumper::NodeDumper(const DebugCaps::Config& config, const int _count)
if (!config.blobDumpNodeExecId.empty())
dumpFilters[FILTER::BY_EXEC_ID] = config.blobDumpNodeExecId;
+ if (!config.blobDumpNodePorts.empty())
+ dumpFilters[FILTER::BY_PORTS] = config.blobDumpNodePorts;
+
if (!config.blobDumpNodeType.empty())
dumpFilters[FILTER::BY_TYPE] = config.blobDumpNodeType;
@@ -40,7 +43,7 @@ NodeDumper::NodeDumper(const DebugCaps::Config& config, const int _count)
}
void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const {
- if (!shouldBeDumped(node))
+ if (!shouldBeDumped(node, "IN"))
return;
auto exec_order = std::to_string(node->getExecIndex());
@@ -60,7 +63,7 @@ void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const {
file_name = file_name.substr(file_name.size() - 240);
auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name;
- std::cout << "Dump before: " << dump_file << std::endl;
+ std::cout << "Dump inputs: " << dump_file << std::endl;
TensorDesc desc = prEdge->getDesc();
if (desc.getPrecision() == Precision::BIN)
@@ -77,7 +80,7 @@ void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const {
}
void NodeDumper::dumpOutputBlobs(const MKLDNNNodePtr& node) const {
- if (!shouldBeDumped(node))
+ if (!shouldBeDumped(node, "OUT"))
return;
auto exec_order = std::to_string(node->getExecIndex());
@@ -96,7 +99,7 @@ void NodeDumper::dumpOutputBlobs(const MKLDNNNodePtr& node) const {
file_name = file_name.substr(file_name.size() - 240);
auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name;
- std::cout << "Dump after: " << dump_file << std::endl;
+ std::cout << "Dump outputs: " << dump_file << std::endl;
TensorDesc desc = childEdge->getDesc();
if (desc.getPrecision() == Precision::BIN)
@@ -130,56 +133,77 @@ void NodeDumper::dumpInternalBlobs(const MKLDNNNodePtr& node) const {
void NodeDumper::dump(const BlobDumper& bd, const std::string& file) const {
switch (dumpFormat) {
- case DUMP_FORMAT::BIN: {
+ case FORMAT::BIN: {
bd.dump(file);
break;
}
- case DUMP_FORMAT::TEXT: {
+ case FORMAT::TEXT: {
bd.dumpAsTxt(file);
break;
}
default:
- IE_THROW() << "Unknown dump format";
+ IE_THROW() << "NodeDumper: Unknown dump format";
}
}
-bool NodeDumper::shouldBeDumped(const MKLDNNNodePtr& node) const {
+bool NodeDumper::shouldBeDumped(const MKLDNNNodePtr& node, const std::string& portsKind) const {
if (dumpFilters.empty())
return false;
- if (dumpFilters.count(FILTER::BY_EXEC_ID)) { // filter by exec id env set
+ if (dumpFilters.count(FILTER::BY_PORTS)) { // filter by ports configured
+ if (dumpFilters.at(FILTER::BY_PORTS) != "ALL" &&
+ portsKind != dumpFilters.at(FILTER::BY_PORTS))
+ return false;
+ }
+
+ if (dumpFilters.count(FILTER::BY_EXEC_ID)) { // filter by exec id configured
std::stringstream ss(dumpFilters.at(FILTER::BY_EXEC_ID));
int id;
bool matched = false;
+
while (ss >> id) {
- if (node->getExecIndex() == id) // exec id matches
+ if (node->getExecIndex() == id) {// exec id matches
matched = true;
+ break;
+ }
}
if (!matched)
return false;
}
- if (dumpFilters.count(FILTER::BY_TYPE)) { // filter by type env set
- if (NameFromType(node->getType()) != dumpFilters.at(FILTER::BY_TYPE)) // type does not match
+ if (dumpFilters.count(FILTER::BY_TYPE)) { // filter by type configured
+ std::stringstream ss(dumpFilters.at(FILTER::BY_TYPE));
+ std::string type;
+ bool matched = false;
+
+ while (ss >> type) {
+ if (NameFromType(node->getType()) == type) {// type does not match
+ matched = true;
+ break;
+ }
+ }
+
+ if (!matched)
return false;
}
- if (dumpFilters.count(FILTER::BY_NAME)) { // filter by name env set
- if (!std::regex_match(node->getName(), std::regex(dumpFilters.at(FILTER::BY_NAME)))) // name does not match
+ if (dumpFilters.count(FILTER::BY_NAME)) { // filter by name configured
+ if (dumpFilters.at(FILTER::BY_NAME) != "*" && // to have 'single char' option for matching all the names
+ !std::regex_match(node->getName(), std::regex(dumpFilters.at(FILTER::BY_NAME)))) // name does not match
return false;
}
return true;
}
-NodeDumper::DUMP_FORMAT NodeDumper::parseDumpFormat(const std::string& format) const {
+NodeDumper::FORMAT NodeDumper::parseDumpFormat(const std::string& format) const {
if (format == "BIN")
- return DUMP_FORMAT::BIN;
+ return FORMAT::BIN;
else if (format == "TEXT")
- return DUMP_FORMAT::TEXT;
+ return FORMAT::TEXT;
else
- IE_THROW() << "Unknown dump format";
+ IE_THROW() << "NodeDumper: Unknown dump format";
}
void NodeDumper::formatNodeName(std::string& name) const {
diff --git a/inference-engine/src/mkldnn_plugin/utils/node_dumper.h b/inference-engine/src/mkldnn_plugin/utils/node_dumper.h
index 0580bee4731..bac237c8883 100644
--- a/inference-engine/src/mkldnn_plugin/utils/node_dumper.h
+++ b/inference-engine/src/mkldnn_plugin/utils/node_dumper.h
@@ -31,28 +31,29 @@ public:
private:
void dumpInternalBlobs(const MKLDNNNodePtr& node) const;
void dump(const BlobDumper& bd, const std::string& file) const;
- bool shouldBeDumped(const MKLDNNNodePtr &node) const;
+ bool shouldBeDumped(const MKLDNNNodePtr &node, const std::string& portsKind) const;
- enum class DUMP_FORMAT {
+ enum class FORMAT {
BIN,
TEXT,
};
- DUMP_FORMAT parseDumpFormat(const std::string& format) const;
+ FORMAT parseDumpFormat(const std::string& format) const;
void formatNodeName(std::string& name) const;
- DUMP_FORMAT dumpFormat;
+ FORMAT dumpFormat;
std::string dumpDirName;
int count;
enum FILTER {
+ BY_PORTS,
BY_EXEC_ID,
BY_TYPE,
BY_NAME,
- COUNT,
};
- std::unordered_map dumpFilters;
+ // std::hash is necessary for Ubuntu-16.04 (gcc-5.4 and defect in C++11 standart)
+ std::unordered_map> dumpFilters;
};
} // namespace MKLDNNPlugin
#endif // CPU_DEBUG_CAPS
diff --git a/inference-engine/src/offline_transformations/include/mask_attribute.hpp b/inference-engine/src/offline_transformations/include/mask_attribute.hpp
index 48c5b4ee9f0..282f81b054e 100644
--- a/inference-engine/src/offline_transformations/include/mask_attribute.hpp
+++ b/inference-engine/src/offline_transformations/include/mask_attribute.hpp
@@ -96,7 +96,8 @@ public:
auto mask_2_iter = mask->rbegin();
while (mask_1_iter != rend() &&
- mask_2_iter != mask->rend()) {
+ mask_2_iter != mask->rend() &&
+ result_iter != result_mask->rend()) {
// Merge mask dimension values for both masks
// Example: (MaskValue[1,2,3,4], MaskValue[2,3]) -> MaskValue[2,3]
for (const auto & value : *mask_1_iter) {
@@ -119,7 +120,8 @@ public:
auto mask_2_iter = mask->rbegin();
while (mask_1_iter != rend() &&
- mask_2_iter != mask->rend()) {
+ mask_2_iter != mask->rend() &&
+ result_iter != result_mask->rend()) {
// Union mask dimension values for both masks
// Example: (MaskValue[1,2,3,4], MaskValue[2, 5]) -> MaskValue[1, 2, 3, 4, 5]
for (const auto & value : *mask_1_iter) {
diff --git a/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp b/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp
index 424b6ae9583..271b200f31b 100644
--- a/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp
+++ b/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp
@@ -246,6 +246,9 @@ public:
// To allow pruning on weights (allow reshape input Group (0) dim changing) replace Reshape Shape constant
// [G, 1, 1, X, Y, Z] by [-1, 1, 1, X, Y, Z].
auto old_shape_const = std::dynamic_pointer_cast(m_shape.get_node_shared_ptr());
+ if (!old_shape_const) {
+ return false;
+ }
auto shape_value = old_shape_const.get()->cast_vector();
shape_value[0] = -1;
auto new_const = opset6::Constant::create(old_shape_const->get_element_type(),
@@ -462,6 +465,9 @@ public:
const auto & pattern_map = m.get_pattern_value_map();
const auto & m_output = pattern_map.at(concat);
auto concat_ptr = std::dynamic_pointer_cast(m_output.get_node_shared_ptr());
+ if (!concat_ptr) {
+ return false;
+ }
auto axis = concat_ptr->get_concatenation_axis();
auto inputs = concat_ptr->inputs();
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_ivariable_state_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_ivariable_state_internal.hpp
index f92fd556f00..d34af53631a 100644
--- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_ivariable_state_internal.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_ivariable_state_internal.hpp
@@ -50,14 +50,6 @@ public:
*/
virtual Blob::CPtr GetState() const;
- /**
- * @deprecated Use IVariableStateInternal::GetState method instead
- * @brief Returns the value of the variable state.
- * @return The value of the variable state
- */
- INFERENCE_ENGINE_DEPRECATED("Use IVariableStateInternal::GetState method instead")
- virtual Blob::CPtr GetLastState() const;
-
protected:
/**
* @brief A default dtor
diff --git a/inference-engine/src/plugin_api/debug.h b/inference-engine/src/plugin_api/debug.h
index 838c5b02941..d52c7b0b942 100644
--- a/inference-engine/src/plugin_api/debug.h
+++ b/inference-engine/src/plugin_api/debug.h
@@ -25,6 +25,9 @@
#include "ie_algorithm.hpp"
+namespace InferenceEngine {
+namespace details {
+
/**
* @brief Serializes a `std::vector` to a `std::ostream`
* @ingroup ie_dev_api_error_debug
@@ -32,7 +35,6 @@
* @param vec A vector to serialize
* @return A reference to a `std::stream`
*/
-namespace std {
template
inline std::ostream& operator<<(std::ostream& out, const std::vector& vec) {
if (vec.empty()) return std::operator<<(out, "[]");
@@ -42,10 +44,7 @@ inline std::ostream& operator<<(std::ostream& out, const std::vector& vec) {
}
return out << "]";
}
-} // namespace std
-namespace InferenceEngine {
-namespace details {
/**
* @brief trim from start (in place)
* @ingroup ie_dev_api_error_debug
diff --git a/inference-engine/src/vpu/common/include/vpu/configuration.hpp b/inference-engine/src/vpu/common/include/vpu/configuration.hpp
new file mode 100644
index 00000000000..4ed6f77b91f
--- /dev/null
+++ b/inference-engine/src/vpu/common/include/vpu/configuration.hpp
@@ -0,0 +1,99 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include
+#include
+#include