Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Steve Yoo 2021-06-21 09:31:00 +09:00
commit 4a5811623d
232 changed files with 4088 additions and 1725 deletions

View File

@ -17,6 +17,8 @@ jobs:
WORK_DIR: $(Pipeline.Workspace)/_w
MODELS_DIR: /mount/cinfsshare/onnxtestdata
TMP_DIR: /mnt/tmp
ONNX_MODEL_ZOO_SHA: "d58213534f2a4d1c4b19ba62b3bb5f544353256e"
steps:
- script: |
@ -55,7 +57,7 @@ jobs:
- script: docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile .
displayName: 'Docker build'
- script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(TMP_DIR) -o
- script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(TMP_DIR) -o -s "$(ONNX_MODEL_ZOO_SHA)"
displayName: 'Get models'
- script: |
@ -77,6 +79,6 @@ jobs:
displayName: 'Create swap'
- script: |
docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo:/root/.onnx/model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image
docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "tox && tox -e zoo_models"
displayName: 'Docker run'

View File

@ -131,7 +131,7 @@ limitations under the License.
<tab type="user" title="Cosh-1" url="@ref openvino_docs_ops_arithmetic_Cosh_1"/>
<tab type="user" title="CTCLoss-4" url="@ref openvino_docs_ops_sequence_CTCLoss_4"/>
<tab type="user" title="CumSum" url="@ref openvino_docs_ops_arithmetic_CumSum_3"/>
<tab type="user" title="DeformableConvolution-1" url="@ref openvino_docs_ops_convolution_DeformableConvolution_1"/>
<tab type="user" title="DeformableConvolution-8" url="@ref openvino_docs_ops_convolution_DeformableConvolution_8"/>
<tab type="user" title="DeformablePSROIPooling-1" url="@ref openvino_docs_ops_detection_DeformablePSROIPooling_1"/>
<tab type="user" title="DepthToSpace-1" url="@ref openvino_docs_ops_movement_DepthToSpace_1"/>
<tab type="user" title="DetectionOutput-1" url="@ref openvino_docs_ops_detection_DetectionOutput_1"/>
@ -189,11 +189,13 @@ limitations under the License.
<tab type="user" title="MVN-1" url="@ref openvino_docs_ops_normalization_MVN_1"/>
<tab type="user" title="MVN-6" url="@ref openvino_docs_ops_normalization_MVN_6"/>
<tab type="user" title="MatMul-1" url="@ref openvino_docs_ops_matrix_MatMul_1"/>
<tab type="user" title="MatrixNonMaxSuppression-8" url="@ref openvino_docs_ops_sort_MatrixNonMaxSuppression_8"/>
<tab type="user" title="MaxPool-1" url="@ref openvino_docs_ops_pooling_MaxPool_1"/>
<tab type="user" title="Maximum-1" url="@ref openvino_docs_ops_arithmetic_Maximum_1"/>
<tab type="user" title="Minimum-1" url="@ref openvino_docs_ops_arithmetic_Minimum_1"/>
<tab type="user" title="Mish-4" url="@ref openvino_docs_ops_activation_Mish_4"/>
<tab type="user" title="Mod-1" url="@ref openvino_docs_ops_arithmetic_Mod_1"/>
<tab type="user" title="MulticlassNonMaxSuppression-8" url="@ref openvino_docs_ops_sort_MulticlassNonMaxSuppression_8"/>
<tab type="user" title="Multiply-1" url="@ref openvino_docs_ops_arithmetic_Multiply_1"/>
<tab type="user" title="Negative-1" url="@ref openvino_docs_ops_arithmetic_Negative_1"/>
<tab type="user" title="NonMaxSuppression-1" url="@ref openvino_docs_ops_sort_NonMaxSuppression_1"/>

View File

@ -6,27 +6,27 @@
**Short description**: *Ceiling* performs element-wise ceiling operation with given tensor.
**Attributes**:
**Detailed description**: For each element from the input tensor calculates corresponding
element in the output tensor with the following formula:
No attributes available.
\f[
a_{i} = ceiling(a_{i})
\f]
**Attributes**: *Ceiling* operation has no attributes.
**Inputs**
* **1**: An tensor of type T. **Required.**
* **1**: A tensor of type *T* and arbitrary shape. **Required.**
**Outputs**
* **1**: The result of element-wise ceiling operation. A tensor of type T.
* **1**: The result of element-wise ceiling operation. A tensor of type *T*.
**Types**
* *T*: any numeric type.
*Ceiling* does the following with the input tensor *a*:
\f[
a_{i} = ceiling(a_{i})
\f]
**Examples**

View File

@ -4,33 +4,31 @@
**Category**: Arithmetic unary operation
**Short description**: *Negative* performs element-wise negative operation with given tensor.
**Short description**: *Negative* performs element-wise negative operation on a given input tensor.
**Attributes**:
**Detailed description**
No attributes available.
**Inputs**
* **1**: An tensor of type T. **Required.**
**Outputs**
* **1**: The result of element-wise negative operation. A tensor of type T.
**Types**
* *T*: any numeric type.
*Negative* does the following with the input tensor *a*:
*Negative* performs element-wise negative operation on a given input tensor, based on the following mathematical formula:
\f[
a_{i} = -a_{i}
\f]
**Examples**
**Attributes**: *Negative* operation has no attributes.
*Example 1*
**Inputs**
* **1**: A tensor of type *T* and arbitrary shape. **Required.**
**Outputs**
* **1**: The result of element-wise *Negative* operation applied to the input tensor. A tensor of type *T* and the same shape as input tensor.
**Types**
* *T*: any supported signed numeric type.
**Example**
```xml
<layer ... type="Negative">

View File

@ -8,6 +8,26 @@
**Detailed description**: *Deformable Convolution* is similar to regular *Convolution* but its receptive field is deformed because of additional spatial offsets used during input sampling. More thorough explanation can be found in [Deformable Convolutions Demystified](https://towardsdatascience.com/deformable-convolutions-demystified-2a77498699e8) and [Deformable Convolutional Networks](https://arxiv.org/abs/1703.06211).
Output is calculated using the following formula:
\f[
y(p) = \sum_{k = 1}^{K}w_{k}x(p + p_{k} + {\Delta}p_{k})
\f]
Where
* K is a number of sampling locations, e.g. for kernel 3x3 and dilation = 1, K = 9
* \f$x(p)\f$ and \f$y(p)\f$ denote the features at location p from the input feature maps x and output feature maps y
* \f$w_{k}\f$ is the weight for k-th location.
* \f$p_{k}\f$ is pre-specified offset for the k-th location, e.g. K = 9 and
\f$p_{k} \in \{(-1, -1),(-1, 0), . . . ,(1, 1)\}\f$
* \f${\Delta}p_{k}\f$ is the learnable offset for the k-th location.
**Attributes**:
* *strides*

View File

@ -0,0 +1,168 @@
## DeformableConvolution<a name="DeformableConvolution"></a> {#openvino_docs_ops_convolution_DeformableConvolution_8}
**Versioned name**: *DeformableConvolution-8*
**Category**: Convolution
**Short description**: Computes 2D deformable convolution of input and kernel tensors.
**Detailed description**: *Deformable Convolution* is similar to regular *Convolution* but its receptive field is deformed because of additional spatial offsets used during input sampling. More thorough explanation can be found in [Deformable Convolutions Demystified](https://towardsdatascience.com/deformable-convolutions-demystified-2a77498699e8), [Deformable Convolutional Networks](https://arxiv.org/abs/1703.06211).
Modification of DeformableConvolution using modulating scalars is also supported. Please refer to [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/pdf/1811.11168.pdf).
Output is calculated using the following formula:
\f[
y(p) = \sum_{k = 1}^{K}w_{k}x(p + p_{k} + {\Delta}p_{k}) * {\Delta}m_{k}
\f]
Where
* K is a number of sampling locations, e.g. for kernel 3x3 and dilation = 1, K = 9
* \f$x(p)\f$ and \f$y(p)\f$ denote the features at location p from the input feature maps x and output feature maps y
* \f$w_{k}\f$ is the weight for k-th location.
* \f$p_{k}\f$ is pre-specified offset for the k-th location, e.g. K = 9 and
\f$p_{k} \in \{(-1, -1),(-1, 0), . . . ,(1, 1)\}\f$
* \f${\Delta}p_{k}\f$ is the learnable offset for the k-th location.
* \f${\Delta}m_{k}\f$ is the modulation scalar from 0 to 1 for the k-th location.
**Attributes**:
* *strides*
* **Description**: *strides* is a distance (in pixels) to slide the filter on the feature map over the `(y,x)` axes. For example, *strides* equal `2,1` means sliding the filter 2 pixel at a time over height dimension and 1 over width dimension.
* **Range of values**: integer values starting from `0`
* **Type**: `int[]`
* **Default value**: None
* **Required**: *yes*
* *pads_begin*
* **Description**: *pads_begin* is a number of pixels to add to the beginning along each axis. For example, *pads_begin* equal `1,2` means adding 1 pixel to the top of the input and 2 to the left of the input.
* **Range of values**: integer values starting from `0`
* **Type**: `int[]`
* **Default value**: None
* **Required**: *yes*
* **Note**: the attribute is ignored when *auto_pad* attribute is specified.
* *pads_end*
* **Description**: *pads_end* is a number of pixels to add to the ending along each axis. For example, *pads_end* equal `1,2` means adding 1 pixel to the bottom of the input and 2 to the right of the input.
* **Range of values**: integer values starting from `0`
* **Type**: `int[]`
* **Default value**: None
* **Required**: *yes*
* **Note**: the attribute is ignored when *auto_pad* attribute is specified.
* *dilations*
* **Description**: *dilations* denotes the distance in width and height between elements (weights) in the filter. For example, *dilation* equal `1,1` means that all the elements in the filter are neighbors, so it is the same as for the usual convolution. *dilation* equal `2,2` means that all the elements in the filter are matched not to adjacent elements in the input matrix, but to those that are adjacent with distance 1.
* **Range of values**: integer value starting from `0`
* **Type**: `int[]`
* **Default value**: None
* **Required**: *yes*
* *auto_pad*
* **Description**: *auto_pad* how the padding is calculated. Possible values:
* *explicit* - use explicit padding values from *pads_begin* and *pads_end*.
* *same_upper* - the input is padded to match the output size. In case of odd padding value an extra padding is added at the end.
* *same_lower* - the input is padded to match the output size. In case of odd padding value an extra padding is added at the beginning.
* *valid* - do not use padding.
* **Type**: `string`
* **Default value**: explicit
* **Required**: *no*
* **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is specified.
* *group*
* **Description**: *group* is the number of groups which *output* and *input* should be split into. For example, *group* equal to 1 means that all filters are applied to the whole input (usual convolution), *group* equal to 2 means that both *input* and *output* channels are separated into two groups and the *i-th output* group is connected to the *i-th input* group channel. *group* equal to a number of output feature maps implies depth-wise separable convolution.
* **Range of values**: integer value starting from `1`
* **Type**: `int`
* **Default value**: `1`
* **Required**: *no*
* *deformable_group*
* **Description**: *deformable_group* is the number of groups in which *offsets* input and *output* should be split into along the channel axis. Apply the deformable convolution using the i-th part of the offsets part on the i-th out.
* **Range of values**: integer value starting from `1`
* **Type**: `int`
* **Default value**: `1`
* **Required**: *no*
* *bilinear_interpolation_padding*
* **Description**: *bilinear_interpolation_padding* is the number of pixels outside of the feature map boundary to apply bilinear interpolation.
* **Range of values**: non-negative integer value
* **Type**: `int`
* **Default value**: `0`
* **Required**: *no*
**Inputs**:
* **1**: Input tensor of type *T* and rank 4. Layout is `NCYX` (number of batches, number of channels, spatial axes Y and X). **Required.**
* **2**: Offsets tensor of type *T* and rank 4. Layout is `NCYX` (number of batches, *deformable_group* \* kernel_Y \* kernel_X \* 2, spatial axes Y and X). **Required.**
* **3**: Kernel tensor of type *T* and rank 4. Layout is `OIYX` (number of output channels, number of input channels, spatial axes Y and X). **Required.**
* **4**: ModulationScalars tensor of type *T2* and rank 4, the values are within [0, 1]. Layout is `NCYX` (number of batches, *deformable_group* \* kernel_Y \* kernel_X, spatial axes Y and X). If the input is not provided, the values are assumed to be equal to 1. **Optional.**
**Outputs**:
* **1**: Output tensor of type *T* and rank 4. Layout is `NOYX` (number of batches, number of kernel output channels, spatial axes Y and X).
**Types**:
* *T*: Any numeric type.
* *T2*: Any supported floating point.
**Example**
2D DeformableConvolution (deformable_group=1)
```xml
<layer type="DeformableConvolution" ...>
<data dilations="1,1" pads_begin="0,0" pads_end="0,0" strides="1,1" auto_pad="explicit" group="1" deformable_group="1"/>
<input>
<port id="0">
<dim>1</dim>
<dim>4</dim>
<dim>224</dim>
<dim>224</dim>
</port>
<port id="1">
<dim>1</dim>
<dim>50</dim>
<dim>220</dim>
<dim>220</dim>
</port>
<port id="2">
<dim>64</dim>
<dim>4</dim>
<dim>5</dim>
<dim>5</dim>
</port>
<port id="3">
<dim>1</dim>
<dim>25</dim>
<dim>220</dim>
<dim>220</dim>
</port>
</input>
<output>
<port id="4" precision="FP32">
<dim>1</dim>
<dim>64</dim>
<dim>220</dim>
<dim>220</dim>
</port>
</output>
</layer>
```

View File

@ -40,7 +40,7 @@ declared in `namespace opset8`.
* [Cos](arithmetic/Cos_1.md)
* [Cosh](arithmetic/Cosh_1.md)
* [CumSum](arithmetic/CumSum_3.md)
* [DeformableConvolution](convolution/DeformableConvolution_1.md)
* [DeformableConvolution](convolution/DeformableConvolution_8.md)
* [DeformablePSROIPooling](detection/DeformablePSROIPooling_1.md)
* [DepthToSpace](movement/DepthToSpace_1.md)
* [DetectionOutput](detection/DetectionOutput_1.md)

View File

@ -0,0 +1,168 @@
## MatrixNonMaxSuppression<a name="MatrixNonMaxSuppression"></a> {#openvino_docs_ops_sort_MatrixNms_8}
**Versioned name**: *MatrixNonMaxSuppression-8*
**Category**: *Sorting and maximization*
**Short description**: *MatrixNonMaxSuppression* performs matrix non-maximum suppression (NMS) of the boxes with predicted scores.
**Detailed description**: The operation performs the following:
1. Selects candidate bounding boxes with scores higher than `score_threshold`.
2. For each class, selects at most `nms_top_k` candidate boxes.
3. Decays scores of the candidate boxes according to the Matrix NMS algorithm [Wang et al](https://arxiv.org/abs/2003.10152.pdf). This algorithm is applied independently to each class and each batch element. Boxes of `background_class` are skipped and thus eliminated during the process.
4. Selects boxes with the decayed scores higher than `post_threshold`, and selects at most `keep_top_k` scoring candidate boxes per batch element.
The Matrix NMS algorithm is described below:
1. Sort descending the candidate boxes by score, and compute `n*n` pairwise IOU (IntersectionOverUnion) matrix `X` for the top `n` boxes. Suppose `n` is the number of candidate boxes.
2. Set the lower triangle and diagonal of `X` to 0. Therefore get the upper triangular matrix `X`.
3. Take the column-wise max of `X` to compute a vector `K` of maximum IOU for each candidate box.
4. Repeat element value of `K` along axis 1. Suppose this gets a matrix `X_cmax`.
5. Compute the decay factor: `decay_factor = exp((X_cmax**2 - X**2) * gaussian_sigma)` if `decay_function` is `guassian`, else `decay_factor = (1 - X) / (1 - X_cmax)`.
6. Take the column-wise min of `decay_factor`, and element-wise multiply with scores to decay them.
**Attributes**:
* *sort_result*
* **Description**: *sort_result* specifies the order of output elements.
* **Range of values**: `class`, `score`, `none`
* *class* - sort selected boxes by class id (ascending).
* *score* - sort selected boxes by score (descending).
* *none* - do not guarantee the order.
* **Type**: `string`
* **Default value**: `none`
* **Required**: *No*
* *sort_result_across_batch*
* **Description**: *sort_result_across_batch* is a flag that specifies whenever it is necessary to sort selected boxes across batches or not.
* **Range of values**: true or false
* *true* - sort selected boxes across batches.
* *false* - do not sort selected boxes across batches (boxes are sorted per batch element).
* **Type**: boolean
* **Default value**: false
* **Required**: *No*
* *output_type*
* **Description**: the tensor type of outputs `selected_indices` and `valid_outputs`.
* **Range of values**: `i64` or `i32`
* **Type**: `string`
* **Default value**: `i64`
* **Required**: *No*
* *score_threshold*
* **Description**: minimum score to consider box for the processing.
* **Range of values**: a floating-point number
* **Type**: `float`
* **Default value**: `0`
* **Required**: *No*
* *nms_top_k*
* **Description**: maximum number of boxes to be selected per class.
* **Range of values**: an integer
* **Type**: `int`
* **Default value**: `-1` meaning to keep all boxes
* **Required**: *No*
* *keep_top_k*
* **Description**: maximum number of boxes to be selected per batch element.
* **Range of values**: an integer
* **Type**: `int`
* **Default value**: `-1` meaning to keep all boxes
* **Required**: *No*
* *background_class*
* **Description**: the background class id.
* **Range of values**: an integer
* **Type**: `int`
* **Default value**: `-1` meaning to keep all classes
* **Required**: *No*
* *decay_function*
* **Description**: decay function used to decay scores.
* **Range of values**: `gaussian`, `linear`
* **Type**: `string`
* **Default value**: `linear`
* **Required**: *No*
* *gaussian_sigma*
* **Description**: gaussian_sigma parameter for gaussian decay_function.
* **Range of values**: a floating-point number
* **Type**: `float`
* **Default value**: `2.0`
* **Required**: *No*
* *post_threshold*
* **Description**: threshold to filter out boxes with low confidence score after decaying.
* **Range of values**: a floating-point number
* **Type**: `float`
* **Default value**: `0`
* **Required**: *No*
**Inputs**:
* **1**: `boxes` - tensor of type *T* and shape `[num_batches, num_boxes, 4]` with box coordinates. The box cooridnates are layout as `[xmin, ymin, xmax, ymax]`. **Required.**
* **2**: `scores` - tensor of type *T* and shape `[num_batches, num_classes, num_boxes]` with box scores. **Required.**
**Outputs**:
* **1**: `selected_outputs` - tensor of type *T_THRESHOLDS* and shape `[number of selected boxes, 6]` containing the selected boxes with score and class as tuples `[class_id, box_score, xmin, ymin, xmax, ymax]`.
* **2**: `selected_indices` - tensor of type *T_IND* and shape `[number of selected boxes, 1]` the selected indices in the flattened input `boxes`, which are absolute values cross batches. Therefore possible valid values are in the range `[0, num_batches * num_boxes - 1]`.
* **3**: `selected_num` - 1D tensor of type *T_IND* and shape `[num_batches]` representing the number of selected boxes for each batch element.
When there is no box selected, `selected_num` is filled with `0`. `selected_outputs` is an empty tensor of shape `[0, 6]`, and `selected_indices` is an empty tensor of shape `[0, 1]`.
**Types**
* *T*: floating point type.
* *T_MAX_BOXES*: integer type.
* *T_THRESHOLDS*: floating point type.
* *T_IND*: `int64` or `int32`.
**Example**
```xml
<layer ... type="MatrixNonMaxSuppression" ... >
<data decay_function="guassian" sort_result="score" output_type="i64"/>
<input>
<port id="0">
<dim>3</dim>
<dim>100</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>3</dim>
<dim>5</dim>
<dim>100</dim>
</port>
</input>
<output>
<port id="5" precision="FP32">
<dim>-1</dim> <!-- "-1" means a undefined dimension calculated during the model inference -->
<dim>6</dim>
</port>
<port id="6" precision="I64">
<dim>-1</dim>
<dim>1</dim>
</port>
<port id="7" precision="I64">
<dim>3</dim>
</port>
</output>
</layer>
```

View File

@ -0,0 +1,161 @@
## MulticlassNonMaxSuppression<a name="MulticlassNonMaxSuppression"></a> {#openvino_docs_ops_sort_MulticlassNonMaxSuppression_8}
**Versioned name**: *MulticlassNonMaxSuppression-8*
**Category**: *Sorting and maximization*
**Short description**: *MulticlassNonMaxSuppression* performs multi-class non-maximum suppression of the boxes with predicted scores.
**Detailed description**: *MulticlassNonMaxSuppression* is a multi-phase operation. It implements non-maximum suppression algorithm as described below:
1. Let `B = [b_0,...,b_n]` be the list of initial detection boxes, `S = [s_0,...,s_N]` be the list of corresponding scores.
2. Let `D = []` be an initial collection of resulting boxes. Let `adaptive_threshold = iou_threshold`.
3. If `B` is empty, go to step 9.
4. Take the box with highest score. Suppose that it is the box `b` with the score `s`.
5. Delete `b` from `B`.
6. If the score `s` is greater than or equal to `score_threshold`, add `b` to `D`, else go to step 9.
7. If `nms_eta < 1` and `adaptive_threshold > 0.5`, update `adaptive_threshold *= nms_eta`.
8. For each input box `b_i` from `B` and the corresponding score `s_i`, set `s_i = 0` when `iou(b, b_i) > adaptive_threshold`, and go to step 3.
9. Return `D`, a collection of the corresponding scores `S`, and the number of elements in `D`.
This algorithm is applied independently to each class of each batch element. The operation feeds at most `nms_top_k` scoring candidate boxes to this algorithm.
The total number of output boxes of each batch element must not exceed `keep_top_k`.
Boxes of `background_class` are skipped and thus eliminated.
**Attributes**:
* *sort_result*
* **Description**: *sort_result* specifies the order of output elements.
* **Range of values**: `class`, `score`, `none`
* *class* - sort selected boxes by class id (ascending).
* *score* - sort selected boxes by score (descending).
* *none* - do not guarantee the order.
* **Type**: `string`
* **Default value**: `none`
* **Required**: *No*
* *sort_result_across_batch*
* **Description**: *sort_result_across_batch* is a flag that specifies whenever it is necessary to sort selected boxes across batches or not.
* **Range of values**: true or false
* *true* - sort selected boxes across batches.
* *false* - do not sort selected boxes across batches (boxes are sorted per batch element).
* **Type**: boolean
* **Default value**: false
* **Required**: *No*
* *output_type*
* **Description**: the tensor type of outputs `selected_indices` and `valid_outputs`.
* **Range of values**: `i64` or `i32`
* **Type**: `string`
* **Default value**: `i64`
* **Required**: *No*
* *iou_threshold*
* **Description**: intersection over union threshold.
* **Range of values**: a floating-point number
* **Type**: `float`
* **Default value**: `0`
* **Required**: *No*
* *score_threshold*
* **Description**: minimum score to consider box for the processing.
* **Range of values**: a floating-point number
* **Type**: `float`
* **Default value**: `0`
* **Required**: *No*
* *nms_top_k*
* **Description**: maximum number of boxes to be selected per class.
* **Range of values**: an integer
* **Type**: `int`
* **Default value**: `-1` meaning to keep all boxes
* **Required**: *No*
* *keep_top_k*
* **Description**: maximum number of boxes to be selected per batch element.
* **Range of values**: an integer
* **Type**: `int`
* **Default value**: `-1` meaning to keep all boxes
* **Required**: *No*
* *background_class*
* **Description**: the background class id.
* **Range of values**: an integer
* **Type**: `int`
* **Default value**: `-1` meaning to keep all classes.
* **Required**: *No*
* *nms_eta*
* **Description**: eta parameter for adaptive NMS.
* **Range of values**: a floating-point number in close range `[0, 1.0]`.
* **Type**: `float`
* **Default value**: `1.0`
* **Required**: *No*
**Inputs**:
* **1**: `boxes` - tensor of type *T* and shape `[num_batches, num_boxes, 4]` with box coordinates. The box coordinates are layout as `[xmin, ymin, xmax, ymax]`. **Required.**
* **2**: `scores` - tensor of type *T* and shape `[num_batches, num_classes, num_boxes]` with box scores. **Required.**
**Outputs**:
* **1**: `selected_outputs` - tensor of type *T_THRESHOLDS* and shape `[number of selected boxes, 6]` containing the selected boxes with score and class as tuples `[class_id, box_score, xmin, ymin, xmax, ymax]`.
* **2**: `selected_indices` - tensor of type *T_IND* and shape `[number of selected boxes, 1]` the selected indices in the flattened `boxes`, which are absolute values cross batches. Therefore possible valid values are in the range `[0, num_batches * num_boxes - 1]`.
* **3**: `selected_num` - 1D tensor of type *T_IND* and shape `[num_batches]` representing the number of selected boxes for each batch element.
When there is no box selected, `selected_num` is filled with `0`. `selected_outputs` is an empty tensor of shape `[0, 6]`, and `selected_indices` is an empty tensor of shape `[0, 1]`.
**Types**
* *T*: floating point type.
* *T_MAX_BOXES*: integer type.
* *T_THRESHOLDS*: floating point type.
* *T_IND*: `int64` or `int32`.
**Example**
```xml
<layer ... type="MulticlassNonMaxSuppression" ... >
<data sort_result="score" output_type="i64"/>
<input>
<port id="0">
<dim>3</dim>
<dim>100</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>3</dim>
<dim>5</dim>
<dim>100</dim>
</port>
</input>
<output>
<port id="5" precision="FP32">
<dim>-1</dim> <!-- "-1" means a undefined dimension calculated during the model inference -->
<dim>6</dim>
</port>
<port id="6" precision="I64">
<dim>-1</dim>
<dim>1</dim>
</port>
<port id="7" precision="I64">
<dim>3</dim>
</port>
</output>
</layer>
```

View File

@ -10,7 +10,7 @@
std::vector<std::string> disabledTestPatterns() {
return {
".*ExclusiveAsyncRequests.*",
".*reusableCPUStreamsExecutor.*",
".*ReusableCPUStreamsExecutor.*",
R"(.*SplitLayerTest.*numSplits\=30.*)",
// CVS-51758
".*PreprocessConversionTest.*oLT=NHWC.*",

View File

@ -32,9 +32,6 @@ class IExecutableNetworkInternal;
class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
details::SharedObjectLoader _so;
std::shared_ptr<IExecutableNetworkInternal> _impl;
IE_SUPPRESS_DEPRECATED_START
std::shared_ptr<IExecutableNetwork> actual;
IE_SUPPRESS_DEPRECATED_END
/**
* @brief Constructs ExecutableNetwork from the initialized std::shared_ptr
@ -51,18 +48,6 @@ public:
*/
ExecutableNetwork() = default;
IE_SUPPRESS_DEPRECATED_START
/**
* @deprecated This ctor will be removed in 2022.1
* @brief Constructs ExecutableNetwork from the initialized std::shared_ptr
* @param exec Initialized shared pointer
* @param splg Plugin to use. This is required to ensure that ExecutableNetwork can work properly even if plugin object is destroyed.
*/
INFERENCE_ENGINE_DEPRECATED("This ctor will be removed in 2022.1")
explicit ExecutableNetwork(std::shared_ptr<IExecutableNetwork> exec,
std::shared_ptr<details::SharedObjectLoader> splg = {});
IE_SUPPRESS_DEPRECATED_END
/**
* @brief Gets the Executable network output Data node information.
*

View File

@ -35,10 +35,6 @@ class ICompletionCallbackWrapper;
class INFERENCE_ENGINE_API_CLASS(InferRequest) {
details::SharedObjectLoader _so;
std::shared_ptr<IInferRequestInternal> _impl;
IE_SUPPRESS_DEPRECATED_START
IInferRequest::Ptr actual;
std::shared_ptr<details::ICompletionCallbackWrapper> callback;
IE_SUPPRESS_DEPRECATED_END
/**
* @brief Constructs InferRequest from the initialized std::shared_ptr
@ -71,18 +67,6 @@ public:
*/
InferRequest() = default;
IE_SUPPRESS_DEPRECATED_START
/**
* @deprecated This ctor will be removed in 2022.1
* @brief Constructs InferRequest from the initialized std::shared_ptr
* @param request Initialized shared pointer
* @param splg Plugin to use. This is required to ensure that InferRequest can work properly even if plugin object is destroyed.
*/
INFERENCE_ENGINE_DEPRECATED("This ctor will be removed in 2022.1")
explicit InferRequest(IInferRequest::Ptr request,
std::shared_ptr<details::SharedObjectLoader> splg = {});
IE_SUPPRESS_DEPRECATED_END
/**
* @brief Sets input/output data to infer
*

View File

@ -3,7 +3,7 @@
//
/**
* @brief A header file that provides wrapper classes for IVariableState
* @brief A header file that provides VariableState
*
* @file ie_memory_state.hpp
*/
@ -16,21 +16,17 @@
#include "ie_api.h"
#include "ie_blob.h"
#include "details/ie_so_loader.h"
#include "ie_imemory_state.hpp"
namespace InferenceEngine {
class IVariableStateInternal;
/**
* @brief C++ exception based error reporting wrapper of API class IVariableState
* @brief VariableState class
*/
class INFERENCE_ENGINE_API_CLASS(VariableState) {
details::SharedObjectLoader _so;
std::shared_ptr<IVariableStateInternal> _impl;
IE_SUPPRESS_DEPRECATED_START
std::shared_ptr<IVariableState> actual;
IE_SUPPRESS_DEPRECATED_END
/**
* @brief Constructs VariableState from the initialized std::shared_ptr
@ -48,55 +44,27 @@ public:
*/
VariableState() = default;
IE_SUPPRESS_DEPRECATED_START
/**
* @deprecated This ctor will be removed in 2022.1
* @brief constructs VariableState from the initialized std::shared_ptr
* @param pState Initialized shared pointer
* @param plg Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin object is destroyed.
*/
INFERENCE_ENGINE_DEPRECATED("This ctor will be removed in 2022.1")
explicit VariableState(std::shared_ptr<IVariableState> pState,
std::shared_ptr<details::SharedObjectLoader> plg = {});
IE_SUPPRESS_DEPRECATED_END
/**
* @copybrief IVariableState::Reset
*
* Wraps IVariableState::Reset
* @brief Reset internal variable state for relevant infer request,
* to a value specified as default for according ReadValue node
*/
void Reset();
/**
* @copybrief IVariableState::GetName
*
* Wraps IVariableState::GetName
* @brief Gets name of current variable state, if length of array is not enough name is truncated by len, null
* terminator is inserted as well. As variable state name `variable_id` from according `ReadValue` used.
* @return A string representing a state name
*/
std::string GetName() const;
/**
* @copybrief IVariableState::GetState
*
* Wraps IVariableState::GetState
* @brief Returns the value of the variable state.
* @return A blob representing a state
*/
Blob::CPtr GetState() const;
/**
* @copybrief IVariableState::GetLastState
* @deprecated Use IVariableState::SetState instead
*
* Wraps IVariableState::GetLastState
* @return A blob representing a last state
*/
INFERENCE_ENGINE_DEPRECATED("Use VariableState::GetState function instead")
Blob::CPtr GetLastState() const;
/**
* @copybrief IVariableState::SetState
*
* Wraps IVariableState::SetState
* @brief Sets the new state for the next inference.
* @param state The current state to set
*/
void SetState(Blob::Ptr state);

View File

@ -65,6 +65,7 @@ DECLARE_GNA_CONFIG_KEY(DEVICE_MODE);
DECLARE_GNA_CONFIG_VALUE(AUTO);
DECLARE_GNA_CONFIG_VALUE(HW);
DECLARE_GNA_CONFIG_VALUE(HW_WITH_SW_FBACK);
DECLARE_GNA_CONFIG_VALUE(SW);
DECLARE_GNA_CONFIG_VALUE(SW_EXACT);
DECLARE_GNA_CONFIG_VALUE(SW_FP32);

View File

@ -18,7 +18,6 @@
#include "ie_common.h"
#include "ie_icnn_network.hpp"
#include "ie_iinfer_request.hpp"
#include "ie_imemory_state.hpp"
#include "ie_input_info.hpp"
#include "ie_parameter.hpp"
#include "ie_remote_context.hpp"
@ -113,22 +112,6 @@ public:
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::ExecutableNetwork::GetExecGraphInfo instead")
virtual StatusCode GetExecGraphInfo(ICNNNetwork::Ptr& graphPtr, ResponseDesc* resp) noexcept = 0;
/**
* @deprecated Use InferRequest::QueryState instead
* @brief Gets state control interface for given executable network.
*
* State control essential for recurrent networks
*
* @param pState reference to a pointer that receives internal states
* @param idx requested index for receiving memory state
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: InferenceEngine::OK (0) for success, OUT_OF_BOUNDS (-6) no memory state for
* given index
*/
INFERENCE_ENGINE_DEPRECATED("Use InferRequest::QueryState instead")
virtual StatusCode QueryState(IVariableState::Ptr& pState, size_t idx, ResponseDesc* resp) noexcept = 0;
IE_SUPPRESS_DEPRECATED_END
/**
* @brief Sets configuration for current executable network
*

View File

@ -17,7 +17,6 @@
#include "ie_blob.h"
#include "ie_common.h"
#include "ie_preprocess.hpp"
#include "ie_imemory_state.hpp"
namespace InferenceEngine {
@ -195,21 +194,6 @@ public:
*/
virtual InferenceEngine::StatusCode SetBatch(int batch_size, ResponseDesc* resp) noexcept = 0;
IE_SUPPRESS_DEPRECATED_START
/**
* @brief Gets state control interface for given infer request.
*
* State control essential for recurrent networks
*
* @param pState reference to a pointer that receives internal states
* @param idx requested index for receiving memory state
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: InferenceEngine::OK (0) for success, OUT_OF_BOUNDS (-6) no memory state for
* given index
*/
virtual StatusCode QueryState(IVariableState::Ptr& pState, size_t idx, ResponseDesc* resp) noexcept = 0;
IE_SUPPRESS_DEPRECATED_END
protected:
~IInferRequest() = default;
};

View File

@ -1,95 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief a header file for IVariableState interface
*
* @file ie_imemory_state.hpp
*/
#pragma once
#include <memory>
#include "ie_blob.h"
#include "ie_common.h"
namespace InferenceEngine {
/**
* @deprecated Use InferenceEngine::VariableState C++ wrapper instead
* @interface IVariableState
* @brief Manages data for reset operations
*/
class INFERENCE_ENGINE_DEPRECATED("InferenceEngine::") IVariableState {
public:
IE_SUPPRESS_DEPRECATED_START
/**
* @brief A shared pointer to the IVariableState interface
*/
using Ptr = std::shared_ptr<IVariableState>;
IE_SUPPRESS_DEPRECATED_END
/**
* @brief Gets name of current variable state, if length of array is not enough name is truncated by len, null
* terminator is inserted as well. As variable state name `variable_id` from according `ReadValue` used.
*
* @param name preallocated buffer for receiving name
* @param len Length of the buffer
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: InferenceEngine::OK (0) for success
*/
virtual StatusCode GetName(char* name, size_t len, ResponseDesc* resp) const noexcept = 0;
/**
* @brief Reset internal variable state for relevant infer request, to a value specified as default for according ReadValue node
*
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: InferenceEngine::OK (0) for success*
*/
virtual StatusCode Reset(ResponseDesc* resp) noexcept = 0;
/**
* @brief Sets the new state for the next inference.
*
* This method can fail if Blob size does not match the internal state size or precision
*
* @param newState The data to use as new state
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: InferenceEngine::OK (0) for success
*/
virtual StatusCode SetState(Blob::Ptr newState, ResponseDesc* resp) noexcept = 0;
/**
* @brief Returns the value of the variable state.
*
* @param state A reference to a blob containing a variable state
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: InferenceEngine::OK (0) for success
*/
INFERENCE_ENGINE_DEPRECATED("Use GetState function instead")
virtual StatusCode GetLastState(Blob::CPtr& state, ResponseDesc* resp) const noexcept {
return GetState(state, resp);
}
/**
* @brief Returns the value of the variable state.
*
* @param state A reference to a blob containing a variable state
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
* @return Status code of the operation: InferenceEngine::OK (0) for success
*/
virtual StatusCode GetState(Blob::CPtr& state, ResponseDesc* resp) const noexcept = 0;
};
IE_SUPPRESS_DEPRECATED_START
/**
* @brief For compatibility reasons.
*/
using IMemoryState = IVariableState;
IE_SUPPRESS_DEPRECATED_END
} // namespace InferenceEngine

View File

@ -49,26 +49,6 @@ public:
std::swap(ptr, parameter.ptr);
}
/**
* @deprecated Use ngraph::Variant directly
* @brief Creates parameter from variant.
* This method creates empty parameter if variant doesn't contain Parameter
*
* @param var ngraph variant
*/
INFERENCE_ENGINE_DEPRECATED("Use ngraph::Variant directly")
Parameter(const std::shared_ptr<ngraph::Variant>& var);
/**
* @deprecated Use ngraph::Variant directly
* @brief Creates parameter from variant.
* This method creates empty parameter if variant doesn't contain Parameter
*
* @param var ngraph variant
*/
INFERENCE_ENGINE_DEPRECATED("Use ngraph::Variant directly")
Parameter(std::shared_ptr<ngraph::Variant>& var);
/**
* @brief Copy constructor
*
@ -86,7 +66,8 @@ public:
* @param parameter object
*/
template <class T,
typename = typename std::enable_if<!std::is_same<typename std::decay<T>::type, Parameter>::value>::type>
typename = typename std::enable_if<!std::is_same<typename std::decay<T>::type, Parameter>::value &&
!std::is_abstract<typename std::decay<T>::type>::value>::type>
Parameter(T&& parameter) { // NOLINT
static_assert(!std::is_same<typename std::decay<T>::type, Parameter>::value, "To prevent recursion");
ptr = new RealData<typename std::decay<T>::type>(std::forward<T>(parameter));
@ -203,28 +184,6 @@ public:
return dyn_cast<typename std::remove_cv<T>::type>(ptr);
}
/**
* @deprecated Use ngraph::Variant directly
* @brief Converts parameter to shared pointer on ngraph::Variant
*
* @return shared pointer on ngraph::Variant
*/
INFERENCE_ENGINE_DEPRECATED("Use ngraph::Variant directly")
std::shared_ptr<ngraph::Variant> asVariant() const;
/**
* @deprecated Use ngraph::Variant directly
* @brief Casts to shared pointer on ngraph::Variant
*
* @return shared pointer on ngraph::Variant
*/
INFERENCE_ENGINE_DEPRECATED("Use ngraph::Variant directly")
operator std::shared_ptr<ngraph::Variant>() const {
IE_SUPPRESS_DEPRECATED_START
return asVariant();
IE_SUPPRESS_DEPRECATED_END
}
/**
* Dynamic cast to specified type
* @tparam T type
@ -254,6 +213,21 @@ public:
return !(*this == rhs);
}
/**
* @brief Prints underlying object to the given output stream.
* Uses operator<< if it is defined, leaves stream unchanged otherwise.
* In case of empty parameter or nullptr stream immediately returns.
*
* @param object Object to be printed to the given output stream.
* @param stream Output stream object will be printed to.
*/
friend void PrintTo(const Parameter& object, std::ostream* stream) {
if (object.empty() || !stream) {
return;
}
object.ptr->print(*stream);
}
private:
template <class T, class EqualTo>
struct CheckOperatorEqual {
@ -273,6 +247,24 @@ private:
template <class T, class EqualTo = T>
struct HasOperatorEqual : CheckOperatorEqual<T, EqualTo>::type {};
template <class T, class U>
struct CheckOutputStreamOperator {
template <class V, class W>
static auto test(W*) -> decltype(std::declval<V&>() << std::declval<W>(), std::true_type()) {
return {};
}
template <typename, typename>
static auto test(...) -> std::false_type {
return {};
}
using type = typename std::is_same<std::true_type, decltype(test<T, U>(nullptr))>::type;
};
template <class T>
struct HasOutputStreamOperator : CheckOutputStreamOperator<std::ostream, T>::type {};
struct Any {
#ifdef __ANDROID__
virtual ~Any();
@ -282,6 +274,7 @@ private:
virtual bool is(const std::type_info&) const = 0;
virtual Any* copy() const = 0;
virtual bool operator==(const Any& rhs) const = 0;
virtual void print(std::ostream&) const = 0;
};
template <class T>
@ -318,6 +311,20 @@ private:
bool operator==(const Any& rhs) const override {
return rhs.is(typeid(T)) && equal<T>(*this, rhs);
}
template <class U>
typename std::enable_if<!HasOutputStreamOperator<U>::value, void>::type
print(std::ostream& stream, const U& object) const {}
template <class U>
typename std::enable_if<HasOutputStreamOperator<U>::value, void>::type
print(std::ostream& stream, const U& object) const {
stream << object;
}
void print(std::ostream& stream) const override {
print<T>(stream, get());
}
};
template <typename T>

View File

@ -1,69 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief This is a header file with common inference engine definitions
*
* @file ie_unicode.hpp
*/
#pragma once
#include <algorithm>
#include <cstdlib>
#include <memory>
#include <ostream>
#include <string>
#include <vector>
#ifdef UNICODE
typedef wchar_t tchar;
typedef std::wstring file_name_t;
#else
typedef char tchar;
typedef std::string file_name_t;
#endif
namespace InferenceEngine {
/**
* @deprecated Use OS-native conversion utilities
* @brief Conversion from possibly-wide character string to a single-byte chain.
* @param str A possibly-wide character string
* @return A single-byte character string
*/
INFERENCE_ENGINE_DEPRECATED("Use OS-native conversion utilities")
inline std::string fileNameToString(const file_name_t& str) {
#ifdef UNICODE
size_t maxlen = (str.length() + 1) * sizeof(wchar_t) / sizeof(char);
std::vector<char> mbstr(maxlen);
mbstr[0] = 0;
std::wcstombs(&mbstr[0], str.c_str(), maxlen);
std::string res = std::string(&mbstr[0]);
return res;
#else
return str;
#endif
}
/**
* @deprecated Use OS-native conversion utilities
* @brief Conversion from single-byte character string to a possibly-wide one
* @param str A single-byte character string
* @return A possibly-wide character string
*/
INFERENCE_ENGINE_DEPRECATED("Use OS-native conversion utilities")
inline file_name_t stringToFileName(const std::string& str) {
#ifdef UNICODE
size_t maxlen = str.length() + 1;
std::vector<wchar_t> wcstr(maxlen);
wcstr[0] = 0;
std::mbstowcs(&wcstr[0], str.c_str(), maxlen);
file_name_t res = file_name_t(&wcstr[0]);
return res;
#else
return str;
#endif
}
} // namespace InferenceEngine

View File

@ -236,7 +236,8 @@ float getGnaFrequencyMHz() {
const uint8_t cannon_lake_model = 102;
const uint8_t gemini_lake_model = 122;
const uint8_t ice_lake_model = 126;
const uint8_t next_model = 140;
const uint8_t tgl_model = 140;
const uint8_t next_model = 151;
native_cpuid(&eax, &ebx, &ecx, &edx);
family = (eax >> 8) & 0xF;
@ -254,6 +255,7 @@ float getGnaFrequencyMHz() {
switch (model) {
case cannon_lake_model:
case ice_lake_model:
case tgl_model:
case next_model:
return 400;
case gemini_lake_model:
@ -287,13 +289,14 @@ void printReferenceCompareResults(score_error_t const& totalError, size_t frames
/**
* @brief Print a report on the performance counts
* @param utterancePerfMap reference to a map to store performance counters
* @param callsNum frame index
* @param numberOfFrames number of frames
* @param stream output stream
* @param fullDeviceName full device name string
* @param numberOfFramesOnHw number of frames delivered to GNA HW
* @return none.
*/
void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& utterancePerfMap, size_t callsNum, std::ostream& stream,
std::string fullDeviceName) {
void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& utterancePerfMap, size_t numberOfFrames,
std::ostream& stream, std::string fullDeviceName, const uint64_t numberOfFramesOnHw) {
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
stream << std::endl << "Performance counts:" << std::endl;
stream << std::setw(10) << std::right << ""
@ -305,29 +308,29 @@ void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEn
stream << std::setw(46) << "(ms)";
stream << std::setw(24) << "(us per call)";
stream << std::endl;
// if GNA HW counters
// get frequency of GNA module
float freq = getGnaFrequencyMHz();
for (const auto& it : utterancePerfMap) {
std::string const& counter_name = it.first;
float current_units = static_cast<float>(it.second.realTime_uSec);
float call_units = current_units / callsNum;
// if GNA HW counters
// get frequency of GNA module
float freq = getGnaFrequencyMHz();
current_units /= freq * 1000;
call_units /= freq;
float current_units_us = static_cast<float>(it.second.realTime_uSec) / freq;
float call_units_us = current_units_us / numberOfFrames;
if (FLAGS_d.find("GNA") != std::string::npos) {
stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1);
} else {
stream << std::setw(30) << std::left << counter_name;
}
stream << std::setw(16) << std::right << current_units;
stream << std::setw(21) << std::right << call_units;
stream << std::setw(16) << std::right << current_units_us / 1000;
stream << std::setw(21) << std::right << call_units_us;
stream << std::endl;
}
stream << std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << fullDeviceName << std::endl;
std::cout << std::endl;
stream << "Number of frames delivered to GNA HW: " << numberOfFramesOnHw;
stream << "/" << numberOfFrames;
stream << std::endl;
#endif
}
@ -346,16 +349,20 @@ void getPerformanceCounters(InferenceEngine::InferRequest& request, std::map<std
}
/**
* @brief Summarize performance counts
* @brief Summarize performance counts and total number of frames executed on the GNA HW device
* @param perfCounters reference to a map to get performance counters
* @param totalPerfCounters reference to a map to save total performance counters
* @param totalRunsOnHw reference to a total number of frames computed on GNA HW
* @return none.
*/
void sumPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& perfCounters,
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& totalPerfCounters) {
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& totalPerfCounters, uint64_t& totalRunsOnHw) {
auto runOnHw = false;
for (const auto& pair : perfCounters) {
totalPerfCounters[pair.first].realTime_uSec += pair.second.realTime_uSec;
runOnHw |= pair.second.realTime_uSec > 0; // if realTime is above zero, that means that a primitive was executed on the device
}
totalRunsOnHw += runOnHw;
}
/**
@ -443,6 +450,7 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
"GPU",
"GNA_AUTO",
"GNA_HW",
"GNA_HW_WITH_SW_FBACK",
"GNA_SW_EXACT",
"GNA_SW",
"GNA_SW_FP32",
@ -829,6 +837,7 @@ int main(int argc, char* argv[]) {
/** Work with each utterance **/
for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) {
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> utterancePerfMap;
uint64_t totalNumberOfRunsOnHw = 0;
std::string uttName;
uint32_t numFrames(0), n(0);
std::vector<uint32_t> numFrameElementsInput;
@ -984,7 +993,7 @@ int main(int argc, char* argv[]) {
// retrieve new counters
getPerformanceCounters(inferRequest.inferRequest, callPerfMap);
// summarize retrieved counters with all previous
sumPerformanceCounters(callPerfMap, utterancePerfMap);
sumPerformanceCounters(callPerfMap, utterancePerfMap, totalNumberOfRunsOnHw);
}
}
// -----------------------------------------------------------------------------------------------------
@ -1092,7 +1101,7 @@ int main(int argc, char* argv[]) {
std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast<double>(numFrames) << " ms" << std::endl;
if (FLAGS_pc) {
// print performance results
printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d));
printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d), totalNumberOfRunsOnHw);
}
if (!FLAGS_r.empty()) {
// print statistical score error

View File

@ -21,10 +21,9 @@ static const char model_message[] = "Required. Path to an .xml file with a train
/// @brief message for assigning cnn calculation to device
static const char target_device_message[] = "Optional. Specify a target device to infer on. CPU, GPU, MYRIAD, GNA_AUTO, GNA_HW, "
"GNA_SW_FP32, "
"GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, "
"GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU"
" as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown "
"below. "
" as a secondary (e.g. HETERO:GNA,CPU) are supported. "
"The sample will look for a suitable plugin for device specified.";
/// @brief message for execution target

View File

@ -35,6 +35,9 @@ ie_add_plugin(NAME ${TARGET_NAME}
DEVICE_NAME "GNA"
SOURCES ${SOURCES} ${HEADERS})
# Enable support of CC for the plugin
ie_mark_target_as_cc(${TARGET_NAME})
# saving rpath to GNA shared library be used by CI
log_rpath_from_dir(GNA ${libGNA_LIBRARIES_BASE_PATH})
@ -67,7 +70,8 @@ target_compile_definitions(${TARGET_NAME}_test_static
target_link_libraries(${TARGET_NAME}_test_static PUBLIC inference_engine_preproc_s inference_engine_transformations libGNA::API)
target_include_directories(${TARGET_NAME}_test_static PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
$<TARGET_PROPERTY:inference_engine_legacy,INTERFACE_INCLUDE_DIRECTORIES>)
$<TARGET_PROPERTY:inference_engine_legacy,INTERFACE_INCLUDE_DIRECTORIES>
PRIVATE $<TARGET_PROPERTY:openvino::conditional_compilation,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(${TARGET_NAME}_test_static PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_test_static)
set_target_properties(${TARGET_NAME} ${TARGET_NAME}_test_static

View File

@ -15,6 +15,7 @@
#include "layer_quantizer.hpp"
#include "scale_factor_calc.hpp"
#include "weights_converter.hpp"
#include "gna_itt.hpp"
namespace GNAPluginNS {
@ -40,6 +41,7 @@ class ModelQuantizer {
template <class PreQuantisationCb>
InferenceEngine::CNNNetwork quantize(const InferenceEngine::CNNNetwork &model, const PreQuantisationCb &cb, std::vector<float> scaleFactor) const {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ModelQuantizer::quantize");
auto visitor = [&](InferenceEngine::CNNLayerPtr lp) {
auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
transformLayer(newLayer, WeightsConverter());

View File

@ -96,14 +96,12 @@ void GNADeviceHelper::setUpActiveList(const uint32_t requestConfigId, uint32_t l
const auto status = Gna2RequestConfigEnableActiveList(requestConfigId, layerIndex, num_active_indices, ptr_active_indices);
checkGna2Status(status, "Gna2RequestConfigEnableActiveList");
}
void GNADeviceHelper::propagateSync(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode) {
wait(propagate(requestConfigId, gna2AccelerationMode));
}
uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
uint32_t reqId{};
if (gna2AccelerationMode == Gna2AccelerationModeHardware &&
if ((gna2AccelerationMode == Gna2AccelerationModeHardware ||
gna2AccelerationMode == Gna2AccelerationModeHardwareWithSoftwareFallback) &&
detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation) {
gnawarn() << "GNA Device not detected, consider using other mode of acceleration";
}
@ -541,6 +539,8 @@ void GNADeviceHelper::updateGnaPerfCounters() {
#if GNA_LIB_VER == 2
instrumentationTotal[0] = instrumentationResults[0];
instrumentationTotal[1] = instrumentationResults[1];
instrumentationResults[0] = 0;
instrumentationResults[1] = 0;
#else
nGNAPerfResultsTotal.hw.stall = nGNAPerfResults.hw.stall;
nGNAPerfResultsTotal.hw.total = nGNAPerfResults.hw.total;

View File

@ -117,18 +117,12 @@ public:
uint8_t *alloc(uint32_t size_requested, uint32_t *size_granted);
#if GNA_LIB_VER == 1
void propagateSync(const intel_nnet_type_t *pNeuralNetwork,
const uint32_t *pActiveIndices,
uint32_t nActiveIndices,
intel_gna_proc_t nGNAProcType);
uint32_t propagate(const intel_nnet_type_t *pNeuralNetwork,
const uint32_t *pActiveIndices,
uint32_t nActiveIndices,
intel_gna_proc_t nGNAProcType);
#else
void setUpActiveList(unsigned req_config_id, uint32_t layerIndex, uint32_t* ptr_active_indices, uint32_t num_active_indices);
void propagateSync(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode);
uint32_t propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode);
uint32_t createModel(Gna2Model& gnaModel) const;
void releaseModel(const uint32_t model_id);

View File

@ -0,0 +1,21 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief Defines openvino domains for tracing
* @file gna_itt.hpp
*/
#pragma once
#include <openvino/itt.hpp>
namespace GNAPluginNS {
namespace itt {
namespace domains {
OV_ITT_DOMAIN(GNAPlugin);
OV_ITT_DOMAIN(GNA_LT);
}
}
}

View File

@ -37,7 +37,7 @@
#include <layers/gna_fake_quantize_layer.hpp>
#include "gna_graph_patterns.hpp"
#include "gna_tensor_tools.hpp"
#include <debug.h>
#include "gna_itt.hpp"
#include <ngraph/pass/manager.hpp>
#include <legacy/convert_function_to_cnn_network.hpp>
@ -391,6 +391,7 @@ GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) {
}
void GNAPlugin::Init() {
OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "Init");
dnn = std::make_shared<backend::AMIntelDNN>(backend::AMIntelDNN());
inputsDesc = std::make_shared<GNAPluginNS::InputDesc>(GNAPluginNS::InputDesc());
gnaFlags = std::make_shared<GNAPluginNS::GNAFlags>(GNAPluginNS::GNAFlags());
@ -401,6 +402,7 @@ void GNAPlugin::Init() {
}
void GNAPlugin::InitGNADevice() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNADevice");
#if GNA_LIB_VER == 1
gnadevice = std::make_shared<GNADeviceHelper>(gnaFlags->gna_lib_async_threads_num,
gnaFlags->gna_openmp_multithreading,
@ -419,6 +421,7 @@ void GNAPlugin::InitGNADevice() {
}
void GNAPlugin::UpdateGnaQuantModeFromNetwork(InferenceEngine::CNNNetwork & network) {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UpdateGnaQuantModeFromNetwork");
// fp32 emulation mode dont need any modifications to configuration
if (config.gnaFlags.sw_fp32) return;
@ -454,6 +457,7 @@ void GNAPlugin::UpdateGnaQuantModeFromNetwork(InferenceEngine::CNNNetwork & netw
}
void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & network) {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UpdateInputScaleFromNetwork");
// fp32 emulation mode dont need any modifications to configuration
if (config.gnaFlags.sw_fp32) return;
@ -561,6 +565,7 @@ bool GNAPlugin::TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer)
}
void GNAPlugin::FillInputsAndOutputsTranspositionInfo(const InferenceEngine::CNNNetwork& net) {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FillInputsAndOutputsTranspositionInfo");
auto printTranspositionInfo = [](const std::vector<TranspositionInfo> &transpositionInfo) {
for (const auto &transpositionInfoPart : transpositionInfo) {
gnalog() << "transpose=" << transpositionInfoPart.transpose << " rows_num=" << transpositionInfoPart.num_transpose_rows
@ -663,6 +668,7 @@ void GNAPlugin::AddDebugProperties(const InferenceEngine::CNNLayerPtr layer,
#endif
void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "LoadNetwork");
std::shared_ptr<InferenceEngine::details::CNNNetworkImpl> convertedNetwork;
if (_network.getFunction()) {
CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network);

View File

@ -23,6 +23,7 @@ static const caseless_unordered_map<std::string, uint32_t> supported_values = {
{GNAConfigParams::GNA_SW_EXACT, GNA_SOFTWARE & GNA_HARDWARE}
};
static const std::vector<std::string> supported_values_on_gna2 = {
GNAConfigParams::GNA_HW_WITH_SW_FBACK,
GNAConfigParams::GNA_GEN,
GNAConfigParams::GNA_GEN_EXACT,
GNAConfigParams::GNA_SSE,
@ -34,18 +35,19 @@ static const std::vector<std::string> supported_values_on_gna2 = {
};
#else
static const caseless_unordered_map <std::string, std::pair<Gna2AccelerationMode, bool>> supported_values = {
{GNAConfigParams::GNA_AUTO, {Gna2AccelerationModeAuto, false}},
{GNAConfigParams::GNA_HW, {Gna2AccelerationModeHardware, false}},
{GNAConfigParams::GNA_SW, {Gna2AccelerationModeSoftware, false}},
{GNAConfigParams::GNA_SW_EXACT, {Gna2AccelerationModeSoftware, true}},
{GNAConfigParams::GNA_GEN, {Gna2AccelerationModeGeneric, false}},
{GNAConfigParams::GNA_GEN_EXACT, {Gna2AccelerationModeGeneric, true}},
{GNAConfigParams::GNA_SSE, {Gna2AccelerationModeSse4x2, false}},
{GNAConfigParams::GNA_SSE_EXACT, {Gna2AccelerationModeSse4x2, true}},
{GNAConfigParams::GNA_AVX1, {Gna2AccelerationModeAvx1, false}},
{GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1, true}},
{GNAConfigParams::GNA_AVX2, {Gna2AccelerationModeAvx2, false}},
{GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2, true}},
{GNAConfigParams::GNA_AUTO, {Gna2AccelerationModeAuto, false}},
{GNAConfigParams::GNA_HW, {Gna2AccelerationModeHardware, false}},
{GNAConfigParams::GNA_HW_WITH_SW_FBACK, {Gna2AccelerationModeHardwareWithSoftwareFallback, false}},
{GNAConfigParams::GNA_SW, {Gna2AccelerationModeSoftware, false}},
{GNAConfigParams::GNA_SW_EXACT, {Gna2AccelerationModeSoftware, true}},
{GNAConfigParams::GNA_GEN, {Gna2AccelerationModeGeneric, false}},
{GNAConfigParams::GNA_GEN_EXACT, {Gna2AccelerationModeGeneric, true}},
{GNAConfigParams::GNA_SSE, {Gna2AccelerationModeSse4x2, false}},
{GNAConfigParams::GNA_SSE_EXACT, {Gna2AccelerationModeSse4x2, true}},
{GNAConfigParams::GNA_AVX1, {Gna2AccelerationModeAvx1, false}},
{GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1, true}},
{GNAConfigParams::GNA_AVX2, {Gna2AccelerationModeAvx2, false}},
{GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2, true}},
};
#endif

View File

@ -41,6 +41,7 @@
#include "gna_graph_patterns.hpp"
#include "gna_data_types.hpp"
#include "gna_tensor_tools.hpp"
#include "gna_itt.hpp"
using namespace InferenceEngine;
using namespace InferenceEngine::details;
@ -112,6 +113,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
*/
static CNNLayerPtr InsertCopyLayer(CNNLayerPtr prevLayer, CNNLayerPtr nextLayer, int beforeIdx,
std::shared_ptr<IPassManager> passmanager, std::string copyLayerType) {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertCopyLayer");
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(prevLayer);
std::string copyName = copyLayerType + std::string("_") + std::to_string(passmanager->getIntVar(copyLayersCounter)++);
gnalog() << "Inserted " << copyName << " between: " << prevLayer->name << " and " << nextLayer->name << std::endl;
@ -257,6 +259,7 @@ static std::vector<CNNLayerPtr> getCandidatesForIdentityInsertion(const CNNLayer
}
void InsertDiagonalLayerPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertDiagonalLayerPass");
bool lowPrecision = getPassManager()->isLowPrecision();
for (auto & l : *pLayers) {
@ -304,6 +307,7 @@ void InsertDiagonalLayerPass::run() {
}
void HandleMultipleActivationsForTheLayerPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "HandleMultipleActivationsForTheLayerPass");
// found layer followed by multiple activations
for (auto & l : *pLayers) {
CNNLayerSet activations;
@ -333,6 +337,7 @@ void HandleMultipleActivationsForTheLayerPass::run() {
}
void ForbidActivationFusingPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ForbidActivationFusingPass");
for (auto& l : *pLayers) {
if (LayerInfo(l).isActivation()) {
auto prevLayer = CNNNetPrevLayer(l);
@ -370,6 +375,7 @@ namespace {
} // namespace
void ReorderMaxPoolPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReorderMaxPoolPass");
// detecting following pattern
// conv->activation->maxpooling
// changing it to conv->maxpooling->activation
@ -398,6 +404,7 @@ void ReorderMaxPoolPass::run() {
}
void SubstituteSoftSignPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "SubstituteSoftSignPass");
//detecting following pattern
// irv7 model: irv10 model:
// a layer a layer
@ -501,6 +508,7 @@ void SubstituteSoftSignPass::run() {
}
}
void SubstitutePReluPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "SubstitutePReluPass");
auto getScale = [](CNNLayer* layer) {
auto powerCandidate = LayerInfo(layer);
if (!powerCandidate.isPower()) return 0.0f;
@ -606,6 +614,7 @@ void SubstitutePReluPass::run() {
}
void ReversePermutationsPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReversePermutationsPass");
std::function<CNNLayerPtr(CNNLayerPtr, std::function<bool(CNNLayerPtr)>)> prevLayerSkipCertain
= [&prevLayerSkipCertain](CNNLayerPtr layer, std::function<bool(CNNLayerPtr)> shouldSkip) -> CNNLayerPtr {
if (CNNNetHasPrevLayer(layer.get())) {
@ -698,6 +707,7 @@ void ReversePermutationsPass::run() {
}
void RemovePermutationsNHWCToNCHWPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "RemovePermutationsNHWCToNCHWPass");
std::set<CNNLayerPtr> permutations_to_remove;
std::list<std::pair<CNNLayerPtr, CNNLayerPtr>> nhwc_layout_patterns;
for (auto& l : *pLayers) {
@ -781,6 +791,7 @@ void RemovePermutationsNHWCToNCHWPass::run() {
}
void InsertIdentityLayerPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertIdentityLayerPass");
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
auto createIdentityLayer = [quantized, this](const TensorDesc& tensorDesc) {
int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
@ -898,6 +909,7 @@ void InsertIdentityLayerPass::run() {
}
void InsertCopyLayerPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertCopyLayerPass");
// Copy layer insertion happens in few cases:
// Crop output goes to concat layer -> copy layer insertion
// Splitted part of input goes to concat layer -> copy layer insertion
@ -1020,6 +1032,7 @@ void InsertCopyLayerPass::run() {
}
void FlattenTrivialConcatPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FlattenTrivialConcatPass");
// change all trivial concatenations (concatenation where output buffer is a buffer made by appending input buffers)
// by reshaping its inputs to 1 x total_input_size and its output to 1 x total_cocat_size and chaning the axis to 1
// for example if 4D concat have unaligned inputs then ConcatAlignFilters need to be used if sizes before
@ -1103,6 +1116,7 @@ void FlattenTrivialConcatPass::run() {
}
void InsertConcatAligningFilterPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertConcatAligningFilterPass");
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED) {
@ -1221,6 +1235,7 @@ void InsertConcatAligningFilterPass::run() {
}
void ReorderConcatInputsPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReorderConcatInputsPass");
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
// aligning specific not required in fp32 mode
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED_FOR_FP32 && !quantized) {
@ -1318,6 +1333,7 @@ void ReorderConcatInputsPass::run() {
}
void InsertSplitAligningFilterPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertSplitAligningFilterPass");
// currently split layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this is not necessary but is useful for testing
const int bytesPerSplitElement = 2;
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
@ -1437,6 +1453,7 @@ static InferenceEngine::Blob::Ptr tileBlob(Blob::Ptr& blob, size_t TileTo) {
}
void EltwiseSplitOverChannelsPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "EltwiseSplitOverChannelsPass");
if (getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo == Policy::GNAAffineDiagonal::UNLIMIT) {
return;
}
@ -1552,6 +1569,7 @@ void EltwiseSplitOverChannelsPass::run() {
}
void SubstituteScaleShiftBroadCastPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "SubstituteScaleShiftBroadCastPass");
std::map<std::string, InferenceEngine::SizeVector> reshaped_data;
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
@ -1633,6 +1651,7 @@ void SubstituteScaleShiftBroadCastPass::run() {
}
void BroadcastConstPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "BroadcastConstPass");
for (auto constLayer : *pLayers) {
if (!LayerInfo(constLayer).isConst()) {
continue;
@ -1685,6 +1704,7 @@ void BroadcastConstPass::run() {
}
void InsertIdentityToLSTMCellPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertIdentityToLSTMCellPass");
for (auto layer : *pLayers) {
if (layer->type == "LSTMCell") {
// This fixed the cases when both functional and non-functional outputs are mixed (or not outputs are used)
@ -1722,6 +1742,7 @@ void InsertIdentityToLSTMCellPass::run() {
}
void BreakFusingOfOutputLayersPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "BreakFusingOfOutputLayersPass");
#if GNA_LIB_VER == 1
return;
#endif
@ -1765,6 +1786,7 @@ void BreakFusingOfOutputLayersPass::run() {
}
void UnrollLSTMCellPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UnrollLSTMCellPass");
InferenceEngine::NetPass::UnrollRNN_if(getPassManager()->getNetwork(), [] (const RNNCellBase& rnn) -> bool {
if (rnn.clip != 0.0f)
return true;
@ -1781,6 +1803,7 @@ void UnrollLSTMCellPass::run() {
}
void UnrollTIPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UnrollTIPass");
auto sts = InferenceEngine::NetPass::UnrollTI(getPassManager()->getNetwork());
if (!sts) {
THROW_GNA_EXCEPTION << "TensorIterator layer cannot be unrolled!";
@ -1788,6 +1811,7 @@ void UnrollTIPass::run() {
}
void RemoveConstPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "RemoveConstPass");
auto network = getPassManager()->getNetwork();
IE_SUPPRESS_DEPRECATED_START
auto & icnnnet = static_cast<ICNNNetwork &>(network);
@ -1801,6 +1825,7 @@ void RemoveConstPass::run() {
}
void RemoveSingleInputConcatPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "RemoveSingleInputConcatPass");
for (auto &l : *pLayers) {
if (l->type == "Concat") {
auto concat = dynamic_cast<ConcatLayer*>(l.get());
@ -1828,6 +1853,7 @@ void RemoveSingleInputConcatPass::run() {
}
void FuseMultipleIdentitiesPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FuseMultipleIdentitiesPass");
for (auto &l : *pLayers) {
if (l->insData.empty()) continue;
@ -1909,6 +1935,7 @@ void FuseMultipleIdentitiesPass::run() {
}
void FuseFQIntoWeightsPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FuseFQIntoWeightsPass");
auto isNonFunctional = [](CNNLayerPtr ptr) {
return LayerInfo(ptr).isNonFunctional();
};
@ -2067,6 +2094,7 @@ void FuseFQIntoWeightsPass::run() {
}
void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "MoveFakeQuantizeLayerIntoQuantParamsPass");
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
if (!quantized) {
return;
@ -2268,6 +2296,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
}
void TransposeWeightsFromNCHWToNHWCPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "TransposeWeightsFromNCHWToNHWCPass");
if (!MustBeConvertedFromNCHWToNHWC(*pLayers)) return;
auto printTranspositionInfo = [](const std::vector<TranspositionInfo> &transpositionInfo) {

View File

@ -1,6 +1,7 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <openvino/cc/ngraph/itt.hpp>
#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
@ -107,6 +108,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> matmul_node,
}
ConvertMatmulToPointWiseConvolution::ConvertMatmulToPointWiseConvolution() {
MATCHER_SCOPE(ConvertMatmulToPointWiseConvolution);
auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
auto const_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({const_input,
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
@ -121,11 +123,12 @@ ConvertMatmulToPointWiseConvolution::ConvertMatmulToPointWiseConvolution() {
return Convert(pattern_map.at(matmul).get_node_shared_ptr(), nullptr, nullptr, nullptr);
};
auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, "ConvertMatmulToPointWiseConvolution");
auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, matcher_name);
this->register_matcher(m, callback);
}
ConvertMatmulWithBiasToPointWiseConvolution::ConvertMatmulWithBiasToPointWiseConvolution() {
MATCHER_SCOPE(ConvertMatmulWithBiasToPointWiseConvolution);
auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
auto const_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({const_input,
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
@ -143,11 +146,12 @@ ConvertMatmulWithBiasToPointWiseConvolution::ConvertMatmulWithBiasToPointWiseCon
pattern_map.at(bias).get_node_shared_ptr(), nullptr);
};
auto m = std::make_shared<ngraph::pattern::Matcher>(add, "ConvertMatmulWithBiasToPointWiseConvolution");
auto m = std::make_shared<ngraph::pattern::Matcher>(add, matcher_name);
this->register_matcher(m, callback);
}
ConvertMatmulWithFqToPointWiseConvolution::ConvertMatmulWithFqToPointWiseConvolution() {
MATCHER_SCOPE(ConvertMatmulWithFqToPointWiseConvolution);
auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
auto const_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({const_input,
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
@ -175,6 +179,6 @@ ConvertMatmulWithFqToPointWiseConvolution::ConvertMatmulWithFqToPointWiseConvolu
pattern_map.at(out_fq).get_node_shared_ptr());
};
auto m = std::make_shared<ngraph::pattern::Matcher>(out_fq, "ConvertMatmulWithFqToPointWiseConvolution");
auto m = std::make_shared<ngraph::pattern::Matcher>(out_fq, matcher_name);
this->register_matcher(m, callback);
}

View File

@ -1,6 +1,7 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <openvino/cc/ngraph/itt.hpp>
#include "transformations/insert_transpose_after_convolution_or_pooling.hpp"
@ -16,6 +17,7 @@ using namespace GNAPluginNS;
NGRAPH_RTTI_DEFINITION(InsertTransposeAfterConvOrPool, "InsertTransposeAfterConvOrPool", 0);
bool InsertTransposeAfterConvOrPool::run_on_function(std::shared_ptr<ngraph::Function> f) {
RUN_ON_FUNCTION_SCOPE(InsertTransposeAfterConvOrPool);
bool is_graph_modfied = false;
for (auto& node : f->get_ordered_ops()) {
if (std::dynamic_pointer_cast<ngraph::opset7::Convolution>(node) == nullptr &&

View File

@ -2,6 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <openvino/cc/ngraph/itt.hpp>
#include "transformations/insert_transpose_before_matmul.hpp"
#include <ngraph/opsets/opset7.hpp>
@ -13,6 +15,7 @@ using namespace GNAPluginNS;
NGRAPH_RTTI_DEFINITION(InsertTransposeBeforeMatmul, "InsertTransposeBeforeMatmul", 0);
InsertTransposeBeforeMatmul::InsertTransposeBeforeMatmul() {
MATCHER_SCOPE(InsertTransposeBeforeMatmul);
auto reshape = ngraph::pattern::wrap_type<ngraph::opset7::Reshape>({ngraph::pattern::any_input(),
ngraph::pattern::any_input()},
ngraph::pattern::rank_equals(2));
@ -59,6 +62,6 @@ InsertTransposeBeforeMatmul::InsertTransposeBeforeMatmul() {
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(root, "InsertTransposeBeforeMatmul");
auto m = std::make_shared<ngraph::pattern::Matcher>(root, matcher_name);
this->register_matcher(m, callback);
}

View File

@ -2,6 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <openvino/cc/ngraph/itt.hpp>
#include "transformations/remove_extra_reshapes.hpp"
#include <ngraph/opsets/opset7.hpp>
@ -12,6 +14,7 @@ using namespace GNAPluginNS;
NGRAPH_RTTI_DEFINITION(RemoveExtraReshapes, "RemoveExtraReshapes", 0);
RemoveExtraReshapes::RemoveExtraReshapes() {
MATCHER_SCOPE(RemoveExtraReshapes);
const auto reshape = ngraph::pattern::wrap_type<ngraph::opset7::Reshape>();
const auto pooling = ngraph::pattern::wrap_type<ngraph::opset7::MaxPool>({reshape});
@ -26,6 +29,6 @@ RemoveExtraReshapes::RemoveExtraReshapes() {
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(pooling, "RemoveExtraReshapes");
auto m = std::make_shared<ngraph::pattern::Matcher>(pooling, matcher_name);
this->register_matcher(m, callback);
}

View File

@ -2,6 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <openvino/cc/ngraph/itt.hpp>
#include "transformations/reorder_activation_and_pooling.hpp"
#include <ngraph/opsets/opset7.hpp>
@ -15,6 +17,7 @@ using namespace GNAPluginNS;
NGRAPH_RTTI_DEFINITION(ReorderActivationAndPooling, "ReorderActivationAndPooling", 0);
ReorderActivationAndPooling::ReorderActivationAndPooling() {
MATCHER_SCOPE(ReorderActivationAndPooling);
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
ngraph::pattern::any_input()});
auto add = ngraph::pattern::wrap_type<ngraph::opset7::Add>({conv, ngraph::pattern::any_input()});
@ -63,6 +66,6 @@ ReorderActivationAndPooling::ReorderActivationAndPooling() {
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(pool, "ReorderActivationAndPooling");
auto m = std::make_shared<ngraph::pattern::Matcher>(pool, matcher_name);
this->register_matcher(m, callback);
}

View File

@ -1,6 +1,7 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <openvino/cc/ngraph/itt.hpp>
#include "transformations/split_convolution_with_large_buffer_size.hpp"
@ -77,6 +78,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
}
SplitConvolution::SplitConvolution() {
MATCHER_SCOPE(SplitConvolution);
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
ngraph::pattern::any_input()});
@ -85,11 +87,12 @@ SplitConvolution::SplitConvolution() {
return Convert(pattern_map.at(conv).get_node_shared_ptr(), nullptr, nullptr, nullptr);
};
auto m = std::make_shared<ngraph::pattern::Matcher>(conv, "SplitConvolution");
auto m = std::make_shared<ngraph::pattern::Matcher>(conv, matcher_name);
this->register_matcher(m, callback);
}
SplitConvolutionWithBias::SplitConvolutionWithBias() {
MATCHER_SCOPE(SplitConvolutionWithBias);
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
ngraph::pattern::any_input()});
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
@ -101,11 +104,12 @@ SplitConvolutionWithBias::SplitConvolutionWithBias() {
pattern_map.at(bias).get_node_shared_ptr(), nullptr);
};
auto m = std::make_shared<ngraph::pattern::Matcher>(add, "SplitConvolutionWithBias");
auto m = std::make_shared<ngraph::pattern::Matcher>(add, matcher_name);
this->register_matcher(m, callback);
}
SplitConvolutionWithFq::SplitConvolutionWithFq() {
MATCHER_SCOPE(SplitConvolutionWithFq);
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
ngraph::pattern::any_input()});
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
@ -126,6 +130,6 @@ SplitConvolutionWithFq::SplitConvolutionWithFq() {
return Convert(pattern_map.at(conv).get_node_shared_ptr(), add_node, bias_node, pattern_map.at(out_fq).get_node_shared_ptr());
};
auto m = std::make_shared<ngraph::pattern::Matcher>(out_fq, "SplitConvolutionWithFq");
auto m = std::make_shared<ngraph::pattern::Matcher>(out_fq, matcher_name);
this->register_matcher(m, callback);
}

View File

@ -2,6 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <openvino/cc/ngraph/itt.hpp>
#include <memory>
#include <vector>
@ -19,6 +21,7 @@ using namespace GNAPluginNS;
NGRAPH_RTTI_DEFINITION(SwapInputMatMul, "SwapInputMatMul", 0);
SwapInputMatMul::SwapInputMatMul() {
MATCHER_SCOPE(SwapInputMatMul);
auto matmul = ngraph::pattern::wrap_type<ngraph::opset7::MatMul>({ngraph::pattern::any_input(
ngraph::pattern::has_static_shape()), ngraph::pattern::any_input(ngraph::pattern::has_static_shape())},
ngraph::pattern::has_static_shape());
@ -95,6 +98,6 @@ SwapInputMatMul::SwapInputMatMul() {
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, "SwapInputMatMul");
auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, matcher_name);
this->register_matcher(m, callback);
}

View File

@ -25,47 +25,15 @@ ExecutableNetwork::ExecutableNetwork(const details::SharedObjectLoader& so,
IE_SUPPRESS_DEPRECATED_START
ExecutableNetwork::ExecutableNetwork(IExecutableNetwork::Ptr exec,
std::shared_ptr<details::SharedObjectLoader> splg)
: _so(), _impl(), actual(exec) {
if (splg) {
_so = *splg;
}
// plg can be null, but not the actual
if (actual == nullptr)
IE_THROW(NotAllocated) << "ExecutableNetwork was not initialized.";
}
ConstOutputsDataMap ExecutableNetwork::GetOutputsInfo() const {
if (actual) {
ConstOutputsDataMap data;
CALL_STATUS_FNC(GetOutputsInfo, data);
return data;
}
EXEC_NET_CALL_STATEMENT(return _impl->GetOutputsInfo());
}
ConstInputsDataMap ExecutableNetwork::GetInputsInfo() const {
if (actual) {
ConstInputsDataMap info;
CALL_STATUS_FNC(GetInputsInfo, info);
return info;
}
EXEC_NET_CALL_STATEMENT(return _impl->GetInputsInfo());
}
void ExecutableNetwork::reset(IExecutableNetwork::Ptr newActual) {
if (actual) {
if (newActual == nullptr) {
THROW_IE_EXCEPTION << "ExecutableNetwork wrapper used for reset was not initialized.";
}
this->actual.swap(newActual);
return;
}
if (_impl == nullptr) IE_THROW() << "ExecutableNetwork was not initialized.";
if (newActual == nullptr) IE_THROW() << "ExecutableNetwork wrapper used for reset was not initialized.";
auto newBase = std::dynamic_pointer_cast<ExecutableNetworkBase>(newActual);
@ -76,36 +44,10 @@ void ExecutableNetwork::reset(IExecutableNetwork::Ptr newActual) {
}
ExecutableNetwork::operator IExecutableNetwork::Ptr() {
if (actual) {
return actual;
}
return std::make_shared<ExecutableNetworkBase>(_impl);
}
std::vector<VariableState> ExecutableNetwork::QueryState() {
if (actual) {
if (actual == nullptr) THROW_IE_EXCEPTION << "ExecutableNetwork was not initialized.";
IVariableState::Ptr pState = nullptr;
auto res = OK;
std::vector<VariableState> controller;
for (size_t idx = 0; res == OK; ++idx) {
ResponseDesc resp;
IE_SUPPRESS_DEPRECATED_START
res = actual->QueryState(pState, idx, &resp);
IE_SUPPRESS_DEPRECATED_END
if (res != OK && res != OUT_OF_BOUNDS) {
THROW_IE_EXCEPTION << resp.msg;
}
if (res != OUT_OF_BOUNDS) {
controller.push_back(VariableState(pState,
std::make_shared<details::SharedObjectLoader>(_so)));
}
}
return controller;
}
std::vector<VariableState> controller;
EXEC_NET_CALL_STATEMENT(
for (auto&& state : _impl->QueryState()) {
@ -115,13 +57,6 @@ std::vector<VariableState> ExecutableNetwork::QueryState() {
}
InferRequest ExecutableNetwork::CreateInferRequest() {
if (actual) {
IInferRequest::Ptr req;
CALL_STATUS_FNC(CreateInferRequest, req);
if (req.get() == nullptr) THROW_IE_EXCEPTION << "Internal error: pointer to infer request is null";
return InferRequest(req, std::make_shared<details::SharedObjectLoader>(_so));
}
EXEC_NET_CALL_STATEMENT(return {_so, _impl->CreateInferRequest()});
}
@ -130,72 +65,38 @@ InferRequest::Ptr ExecutableNetwork::CreateInferRequestPtr() {
}
void ExecutableNetwork::Export(const std::string& modelFileName) {
if (actual) {
CALL_STATUS_FNC(Export, modelFileName);
return;
}
EXEC_NET_CALL_STATEMENT(_impl->Export(modelFileName));
}
void ExecutableNetwork::Export(std::ostream& networkModel) {
if (actual) {
CALL_STATUS_FNC(Export, networkModel);
return;
}
EXEC_NET_CALL_STATEMENT(_impl->Export(networkModel));
}
CNNNetwork ExecutableNetwork::GetExecGraphInfo() {
if (actual) {
IE_SUPPRESS_DEPRECATED_START
ICNNNetwork::Ptr ptr = nullptr;
CALL_STATUS_FNC(GetExecGraphInfo, ptr);
return CNNNetwork(ptr);
IE_SUPPRESS_DEPRECATED_END
}
EXEC_NET_CALL_STATEMENT(return _impl->GetExecGraphInfo());
}
void ExecutableNetwork::SetConfig(const std::map<std::string, Parameter>& config) {
if (actual) {
CALL_STATUS_FNC(SetConfig, config);
return;
}
EXEC_NET_CALL_STATEMENT(_impl->SetConfig(config));
}
Parameter ExecutableNetwork::GetConfig(const std::string& name) const {
if (actual) {
Parameter configValue;
CALL_STATUS_FNC(GetConfig, name, configValue);
return configValue;
}
EXEC_NET_CALL_STATEMENT(return _impl->GetConfig(name));
}
Parameter ExecutableNetwork::GetMetric(const std::string& name) const {
if (actual) {
Parameter metricValue;
CALL_STATUS_FNC(GetMetric, name, metricValue);
return metricValue;
}
EXEC_NET_CALL_STATEMENT(return _impl->GetMetric(name));
}
RemoteContext::Ptr ExecutableNetwork::GetContext() const {
if (actual) {
RemoteContext::Ptr pContext;
CALL_STATUS_FNC(GetContext, pContext);
return pContext;
}
EXEC_NET_CALL_STATEMENT(return _impl->GetContext());
}
bool ExecutableNetwork::operator!() const noexcept {
return !_impl || !actual;
return !_impl;
}
ExecutableNetwork::operator bool() const noexcept {
return !!_impl || !!actual;
return !!_impl;
}
} // namespace InferenceEngine

View File

@ -18,7 +18,6 @@
#include <cpp_interfaces/interface/ie_ivariable_state_internal.hpp>
#include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
#include "cpp/exception2status.hpp"
#include "ie_variable_state_base.hpp"
#include "ie_infer_async_request_base.hpp"
namespace InferenceEngine {
@ -64,29 +63,10 @@ public:
TO_STATUS(_impl->Export(networkModel));
}
IE_SUPPRESS_DEPRECATED_START
StatusCode GetExecGraphInfo(ICNNNetwork::Ptr& graphPtr, ResponseDesc* resp) noexcept override {
// should be refactored together with ExecutableNetwork interface
TO_STATUS(graphPtr = _impl->GetExecGraphInfo());
}
INFERENCE_ENGINE_DEPRECATED("Use InferRequest::QueryState instead")
StatusCode QueryState(IVariableState::Ptr& pState, size_t idx, ResponseDesc* resp) noexcept override {
try {
auto v = _impl->QueryState();
if (idx >= v.size()) {
return OUT_OF_BOUNDS;
}
pState = std::make_shared<VariableStateBase>(v[idx]);
return OK;
} catch (const std::exception& ex) {
return InferenceEngine::DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();
} catch (...) {
return InferenceEngine::DescriptionBuffer(UNEXPECTED);
}
}
IE_SUPPRESS_DEPRECATED_END
StatusCode SetConfig(const std::map<std::string, Parameter>& config, ResponseDesc* resp) noexcept override {
TO_STATUS(_impl->SetConfig(config));
}

View File

@ -10,10 +10,10 @@
#include "cpp/exception2status.hpp"
#include "cpp_interfaces/plugin_itt.hpp"
#include "ie_variable_state_base.hpp"
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
#include "ie_iinfer_request.hpp"
#include "ie_preprocess.hpp"
namespace InferenceEngine {
#define CATCH_IE_EXCEPTION_TO_STATUS_NO_RESP(StatusCode, ExceptionType) catch (const ExceptionType& ex) { \
@ -169,23 +169,6 @@ public:
StatusCode SetBatch(int batch_size, ResponseDesc* resp) noexcept override {
TO_STATUS(_impl->SetBatch(batch_size));
}
IE_SUPPRESS_DEPRECATED_START
StatusCode QueryState(IVariableState::Ptr& pState, size_t idx, ResponseDesc* resp) noexcept override {
try {
auto v = _impl->QueryState();
if (idx >= v.size()) {
return OUT_OF_BOUNDS;
}
pState = std::make_shared<VariableStateBase>(v[idx]);
return OK;
} catch (const std::exception& ex) {
return InferenceEngine::DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();
} catch (...) {
return InferenceEngine::DescriptionBuffer(UNEXPECTED);
}
}
IE_SUPPRESS_DEPRECATED_END
};
IE_SUPPRESS_DEPRECATED_END

View File

@ -23,44 +23,17 @@ namespace InferenceEngine {
InferRequest::InferRequest(const details::SharedObjectLoader& so,
const IInferRequestInternal::Ptr& impl)
: _so(so), _impl(impl), actual() {
: _so(so), _impl(impl) {
IE_ASSERT(_impl != nullptr);
}
IE_SUPPRESS_DEPRECATED_START
InferRequest::InferRequest(IInferRequest::Ptr request,
std::shared_ptr<details::SharedObjectLoader> splg)
: _so(), _impl(), actual(request) {
if (splg) {
_so = *splg;
}
// plg can be null, but not the actual
if (actual == nullptr)
IE_THROW(NotAllocated) << "InferRequest was not initialized.";
}
void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
if (actual) {
CALL_STATUS_FNC(SetBlob, name.c_str(), data);
return;
}
INFER_REQ_CALL_STATEMENT(_impl->SetBlob(name, data);)
}
Blob::Ptr InferRequest::GetBlob(const std::string& name) {
if (actual) {
Blob::Ptr data;
CALL_STATUS_FNC(GetBlob, name.c_str(), data);
std::string error = "Internal error: blob with name `" + name + "` is not allocated!";
auto blobPtr = data.get();
const bool remoteBlobPassed = blobPtr->is<RemoteBlob>();
if (blobPtr == nullptr) IE_THROW() << error;
if (!remoteBlobPassed && blobPtr->buffer() == nullptr) IE_THROW() << error;
return data;
}
Blob::Ptr blobPtr;
INFER_REQ_CALL_STATEMENT(blobPtr = _impl->GetBlob(name);)
std::string error = "Internal error: blob with name `" + name + "` is not allocated!";
@ -71,60 +44,26 @@ Blob::Ptr InferRequest::GetBlob(const std::string& name) {
}
void InferRequest::SetBlob(const std::string &name, const Blob::Ptr &data, const PreProcessInfo& info) {
if (actual) {
CALL_STATUS_FNC(SetBlob, name.c_str(), data, info);
return;
}
INFER_REQ_CALL_STATEMENT(_impl->SetBlob(name, data, info);)
}
const PreProcessInfo& InferRequest::GetPreProcess(const std::string& name) const {
if (actual) {
const PreProcessInfo* info = nullptr;
CALL_STATUS_FNC(GetPreProcess, name.c_str(), &info);
return *info;
}
INFER_REQ_CALL_STATEMENT(return _impl->GetPreProcess(name);)
}
void InferRequest::Infer() {
if (actual) {
CALL_STATUS_FNC_NO_ARGS(Infer);
return;
}
INFER_REQ_CALL_STATEMENT(_impl->Infer();)
}
void InferRequest::Cancel() {
if (actual) {
CALL_STATUS_FNC_NO_ARGS(Cancel);
return;
}
INFER_REQ_CALL_STATEMENT(_impl->Cancel();)
}
std::map<std::string, InferenceEngineProfileInfo> InferRequest::GetPerformanceCounts() const {
if (actual) {
std::map<std::string, InferenceEngineProfileInfo> perfMap;
CALL_STATUS_FNC(GetPerformanceCounts, perfMap);
return perfMap;
}
INFER_REQ_CALL_STATEMENT(return _impl->GetPerformanceCounts();)
}
void InferRequest::SetInput(const BlobMap& inputs) {
if (actual) {
for (auto&& input : inputs) {
CALL_STATUS_FNC(SetBlob, input.first.c_str(), input.second);
}
return;
}
INFER_REQ_CALL_STATEMENT(
for (auto&& input : inputs) {
_impl->SetBlob(input.first, input.second);
@ -133,13 +72,6 @@ void InferRequest::SetInput(const BlobMap& inputs) {
}
void InferRequest::SetOutput(const BlobMap& results) {
if (actual) {
for (auto&& result : results) {
CALL_STATUS_FNC(SetBlob, result.first.c_str(), result.second);
}
return;
}
INFER_REQ_CALL_STATEMENT(
for (auto&& result : results) {
_impl->SetBlob(result.first, result.second);
@ -148,106 +80,19 @@ void InferRequest::SetOutput(const BlobMap& results) {
}
void InferRequest::SetBatch(const int batch) {
if (actual) {
CALL_STATUS_FNC(SetBatch, batch);
return;
}
INFER_REQ_CALL_STATEMENT(_impl->SetBatch(batch);)
}
void InferRequest::StartAsync() {
if (actual) {
CALL_STATUS_FNC_NO_ARGS(StartAsync);
return;
}
INFER_REQ_CALL_STATEMENT(_impl->StartAsync();)
}
StatusCode InferRequest::Wait(int64_t millis_timeout) {
if (actual) {
ResponseDesc resp;
if (actual == nullptr) IE_THROW() << "InferRequest was not initialized.";
auto res = actual->Wait(millis_timeout, &resp);
if (res != OK && res != RESULT_NOT_READY &&
res != INFER_NOT_STARTED && res != INFER_CANCELLED) {
IE_EXCEPTION_SWITCH(res, ExceptionType,
InferenceEngine::details::ThrowNow<ExceptionType>{}
<<= std::stringstream{} << IE_LOCATION << resp.msg)
}
return res;
}
INFER_REQ_CALL_STATEMENT(return _impl->Wait(millis_timeout);)
}
namespace details {
class ICompletionCallbackWrapper {
public:
virtual ~ICompletionCallbackWrapper() = default;
virtual void call(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) const noexcept = 0;
};
template <class T>
class CompletionCallbackWrapper : public ICompletionCallbackWrapper {
T lambda;
public:
explicit CompletionCallbackWrapper(const T& lambda): lambda(lambda) {}
void call(InferenceEngine::IInferRequest::Ptr /*request*/, InferenceEngine::StatusCode /*code*/) const
noexcept override {
lambda();
}
};
template <>
class CompletionCallbackWrapper<IInferRequest::CompletionCallback> : public ICompletionCallbackWrapper {
IInferRequest::CompletionCallback callBack;
public:
explicit CompletionCallbackWrapper(const IInferRequest::CompletionCallback& callBack): callBack(callBack) {}
void call(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) const noexcept override {
callBack(request, code);
}
};
template <>
class CompletionCallbackWrapper<std::function<void(InferRequest, StatusCode)>> : public ICompletionCallbackWrapper {
std::function<void(InferRequest, StatusCode)> lambda;
public:
explicit CompletionCallbackWrapper(const std::function<void(InferRequest, InferenceEngine::StatusCode)>& lambda)
: lambda(lambda) {}
void call(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) const noexcept override {
lambda(InferRequest(request), code);
}
};
void callWrapper(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) {
details::ICompletionCallbackWrapper* pWrapper = nullptr;
ResponseDesc dsc;
request->GetUserData(reinterpret_cast<void**>(&pWrapper), &dsc);
pWrapper->call(request, code);
}
} // namespace details
void InferRequest::SetCompletionCallbackImpl(std::function<void()> callbackToSet) {
if (actual) {
using T = std::function<void()>;
callback.reset(new details::CompletionCallbackWrapper<T>(callbackToSet));
CALL_STATUS_FNC(SetUserData, callback.get());
actual->SetCompletionCallback(InferenceEngine::details::callWrapper);
return;
}
INFER_REQ_CALL_STATEMENT(
_impl->SetCallback([callbackToSet] (std::exception_ptr) {
callbackToSet();
@ -274,14 +119,6 @@ void InferRequest::SetCompletionCallbackImpl(std::function<void()> callbackToSet
void InferRequest::SetCompletionCallbackImpl(std::function<void(InferRequest, StatusCode)> callbackToSet) {
if (actual) {
using T = std::function<void(InferRequest, StatusCode)>;
callback.reset(new details::CompletionCallbackWrapper<T>(callbackToSet));
CALL_STATUS_FNC(SetUserData, callback.get());
actual->SetCompletionCallback(InferenceEngine::details::callWrapper);
return;
}
INFER_REQ_CALL_STATEMENT(
auto weakThis = InferRequest{_so, std::shared_ptr<IInferRequestInternal>{_impl.get(), [](IInferRequestInternal*){}}};
_impl->SetCallback([callbackToSet, weakThis] (std::exception_ptr exceptionPtr) {
@ -303,14 +140,6 @@ void InferRequest::SetCompletionCallbackImpl(std::function<void(InferRequest, St
}
void InferRequest::SetCompletionCallbackImpl(IInferRequest::CompletionCallback callbackToSet) {
if (actual) {
using T = IInferRequest::CompletionCallback;
callback.reset(new details::CompletionCallbackWrapper<T>(callbackToSet));
CALL_STATUS_FNC(SetUserData, callback.get());
actual->SetCompletionCallback(InferenceEngine::details::callWrapper);
return;
}
INFER_REQ_CALL_STATEMENT(
IInferRequest::Ptr weakThis = InferRequest{_so, std::shared_ptr<IInferRequestInternal>{_impl.get(), [](IInferRequestInternal*){}}};
_impl->SetCallback([callbackToSet, weakThis] (std::exception_ptr exceptionPtr) {
@ -332,38 +161,12 @@ void InferRequest::SetCompletionCallbackImpl(IInferRequest::CompletionCallback c
}
InferRequest::operator IInferRequest::Ptr () {
if (actual) {
return actual;
}
INFER_REQ_CALL_STATEMENT(
return std::make_shared<InferRequestBase>(_impl);
)
}
std::vector<VariableState> InferRequest::QueryState() {
if (actual) {
IE_SUPPRESS_DEPRECATED_START
if (actual == nullptr) IE_THROW() << "ExecutableNetwork was not initialized.";
IVariableState::Ptr pState = nullptr;
auto res = OK;
std::vector<VariableState> controller;
for (size_t idx = 0; res == OK; ++idx) {
ResponseDesc resp;
res = actual->QueryState(pState, idx, &resp);
if (res != OK && res != OUT_OF_BOUNDS) {
IE_THROW() << resp.msg;
}
if (res != OUT_OF_BOUNDS) {
controller.push_back(VariableState(pState,
std::make_shared<details::SharedObjectLoader>(_so)));
}
}
IE_SUPPRESS_DEPRECATED_END
return controller;
}
std::vector<VariableState> controller;
INFER_REQ_CALL_STATEMENT(
for (auto&& state : _impl->QueryState()) {
@ -374,11 +177,11 @@ std::vector<VariableState> InferRequest::QueryState() {
}
bool InferRequest::operator!() const noexcept {
return !_impl || !actual;
return !_impl;
}
InferRequest::operator bool() const noexcept {
return (!!_impl) || (!!actual);
return (!!_impl);
}
bool InferRequest::operator!=(const InferRequest& r) const noexcept {
@ -386,7 +189,7 @@ bool InferRequest::operator!=(const InferRequest& r) const noexcept {
}
bool InferRequest::operator==(const InferRequest& r) const noexcept {
return r._impl == _impl && r.actual == actual;
return r._impl == _impl;
}
} // namespace InferenceEngine

View File

@ -4,7 +4,6 @@
#include "details/ie_so_loader.h"
#include "cpp/ie_memory_state.hpp"
#include "ie_imemory_state.hpp"
#include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp"
#include "exception2status.hpp"
@ -24,57 +23,19 @@ VariableState::VariableState(const details::SharedObjectLoader& so,
IE_SUPPRESS_DEPRECATED_START
VariableState::VariableState(std::shared_ptr<IVariableState> state,
std::shared_ptr<details::SharedObjectLoader> splg)
: _so(), _impl(), actual(state) {
if (splg) {
_so = *splg;
}
// plg can be null, but not the actual
if (actual == nullptr)
IE_THROW(NotAllocated) << "VariableState was not initialized.";
}
Blob::CPtr VariableState::GetLastState() const {
return GetState();
}
void VariableState::Reset() {
if (actual) {
CALL_STATUS_FNC_NO_ARGS(Reset);
return;
}
VARIABLE_CALL_STATEMENT(_impl->Reset());
}
std::string VariableState::GetName() const {
if (actual) {
char name[256];
CALL_STATUS_FNC(GetName, name, sizeof(name));
return name;
}
VARIABLE_CALL_STATEMENT(return _impl->GetName());
}
Blob::CPtr VariableState::GetState() const {
if (actual) {
Blob::CPtr stateBlob;
CALL_STATUS_FNC(GetState, stateBlob);
return stateBlob;
}
VARIABLE_CALL_STATEMENT(return _impl->GetState());
}
void VariableState::SetState(Blob::Ptr state) {
if (actual) {
CALL_STATUS_FNC(SetState, state);
return;
}
VARIABLE_CALL_STATEMENT(_impl->SetState(state));
}

View File

@ -1,59 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include "cpp/exception2status.hpp"
#include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp"
#include "ie_imemory_state.hpp"
namespace InferenceEngine {
IE_SUPPRESS_DEPRECATED_START
/**
* @brief Default implementation for IVariableState
* @ingroup ie_dev_api_variable_state_api
*/
class VariableStateBase : public IVariableState {
std::shared_ptr<IVariableStateInternal> impl;
public:
/**
* @brief Constructor with actual underlying implementation.
* @param impl Underlying implementation of type IVariableStateInternal
*/
explicit VariableStateBase(std::shared_ptr<IVariableStateInternal> impl): impl(impl) {
if (impl == nullptr) {
IE_THROW() << "VariableStateBase implementation is not defined";
}
}
StatusCode GetName(char* name, size_t len, ResponseDesc* resp) const noexcept override {
for (size_t i = 0; i != len; i++) {
name[i] = 0;
}
DescriptionBuffer buf(name, len);
TO_STATUS(buf << impl->GetName());
return OK;
}
StatusCode Reset(ResponseDesc* resp) noexcept override {
TO_STATUS(impl->Reset());
}
StatusCode SetState(Blob::Ptr newState, ResponseDesc* resp) noexcept override {
TO_STATUS(impl->SetState(newState));
}
StatusCode GetState(Blob::CPtr& state, ResponseDesc* resp) const noexcept override {
TO_STATUS(state = impl->GetState());
}
};
IE_SUPPRESS_DEPRECATED_END
} // namespace InferenceEngine

View File

@ -23,7 +23,4 @@ Blob::CPtr IVariableStateInternal::GetState() const {
return state;
}
Blob::CPtr IVariableStateInternal::GetLastState() const {
return GetState();
}
} // namespace InferenceEngine

View File

@ -1,45 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <ie_parameter.hpp>
#include <memory>
#include <ngraph/variant.hpp>
namespace ngraph {
template class INFERENCE_ENGINE_API_CLASS(VariantImpl<InferenceEngine::Parameter>);
template <>
class INFERENCE_ENGINE_API_CLASS(VariantWrapper<InferenceEngine::Parameter>) : public VariantImpl<InferenceEngine::Parameter> {
public:
static constexpr VariantTypeInfo type_info {"Variant::InferenceEngine::Parameter", 0};
const VariantTypeInfo& get_type_info() const override {
return type_info;
}
VariantWrapper(const value_type& value): VariantImpl<value_type>(value) {} // NOLINT
};
} // namespace ngraph
constexpr ngraph::VariantTypeInfo ngraph::VariantWrapper<InferenceEngine::Parameter>::type_info;
InferenceEngine::Parameter::Parameter(const std::shared_ptr<ngraph::Variant>& var) {
if (auto paramWrapper = std::dynamic_pointer_cast<ngraph::VariantWrapper<InferenceEngine::Parameter>>(var)) {
auto param = paramWrapper->get();
if (!param.empty()) ptr = param.ptr->copy();
}
}
InferenceEngine::Parameter::Parameter(std::shared_ptr<ngraph::Variant>& var) {
if (auto paramWrapper = std::dynamic_pointer_cast<ngraph::VariantWrapper<InferenceEngine::Parameter>>(var)) {
auto param = paramWrapper->get();
if (!param.empty()) ptr = param.ptr->copy();
}
}
std::shared_ptr<ngraph::Variant> InferenceEngine::Parameter::asVariant() const {
return std::make_shared<ngraph::VariantWrapper<InferenceEngine::Parameter>>(*this);
}

View File

@ -111,13 +111,13 @@ void SplitTransformation::updateOutputs(
updateOutput(context, lastNodes[0], originalNode);
} else {
const std::string originalName = originalNode->get_friendly_name();
for (auto& lastNode : lastNodes) {
for (size_t outIdx = 0; outIdx < lastNodes.size(); ++outIdx) {
for (size_t i = 0; i < outputSize; ++i) {
std::shared_ptr<ngraph::Node> result = context.function->get_output_op(i);
std::shared_ptr<ngraph::Node> outputNode = result->get_input_node_shared_ptr(0);
if (outputNode.get() == lastNode.get()) {
if (outputNode.get() == lastNodes[outIdx].get()) {
originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix);
lastNode->set_friendly_name(originalName + "." + std::to_string(i));
lastNodes[outIdx]->set_friendly_name(originalName + "." + std::to_string(outIdx));
break;
}
}

View File

@ -157,10 +157,15 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
}
const size_t outChannelsShapeIndex = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
if ( // Check if all dimensions of scale except the output channels are all ones
if (
// expected, it's ok: return true
(shape_size(constOutputShape) != 1ul) &&
// not expected, something wrong: return false
((constOutputShape.size() <= outChannelsShapeIndex) ||
// Check if all dimensions of scale except the output channels are all ones
(shape_size(constOutputShape) != constOutputShape[outChannelsShapeIndex]) ||
((constOutputShape[outChannelsShapeIndex] != 1ul) &&
(fqFromWeights->get_output_shape(0)[outChannelsShapeIndex] != constOutputShape[outChannelsShapeIndex]))) {
(fqFromWeights->get_output_shape(0)[outChannelsShapeIndex] != constOutputShape[outChannelsShapeIndex])))) {
return false;
}
} else {

View File

@ -6,7 +6,7 @@ Use the following cmake option to enable debug capabilities:
## Blob dumping
Blob dumping is controlled by environment variables (filters).
The variables define conditions of the node which input, output and internal blobs
The variables define conditions of the node which input and output blobs
should be dumped for.
> **NOTE**: Nothing is dumped by default
@ -15,11 +15,13 @@ should be dumped for.
Environment variables can be set per execution, for example:
```sh
OV_CPU_BLOB_DUMP_DIR=dump_dir binary ...
OV_CPU_BLOB_DUMP_DIR=dump_dir OV_CPU_BLOB_DUMP_FORMAT=TEXT OV_CPU_BLOB_DUMP_NODE_PORTS=OUT binary ...
```
or for shell session (bash example):
```sh
export OV_CPU_BLOB_DUMP_DIR=dump_dir
export OV_CPU_BLOB_DUMP_FORMAT=TEXT
export OV_CPU_BLOB_DUMP_NODE_PORTS=OUT
binary ...
```
### Specify dump directory
@ -35,8 +37,22 @@ Options are:
* BIN (default)
* TEXT
### Filter input / output blobs
To dump only input / output blobs:
```sh
OV_CPU_BLOB_DUMP_NODE_PORTS='<ports_kind>' binary ...
```
Example:
```sh
OV_CPU_BLOB_DUMP_NODE_PORTS=OUT binary ...
```
Options are:
* IN
* OUT
* ALL
### Filter by execution ID
To dump blobs only for node with specified execution IDs:
To dump blobs only for nodes with specified execution IDs:
```sh
OV_CPU_BLOB_DUMP_NODE_EXEC_ID='<space_separated_list_of_ids>' binary ...
```
@ -46,19 +62,19 @@ Example:
```
### Filter by type
To dump blobs only for node with specified type:
To dump blobs only for nodes with specified types:
```sh
OV_CPU_BLOB_DUMP_NODE_TYPE=<type> binary ...
OV_CPU_BLOB_DUMP_NODE_TYPE=<space_separated_list_of_types> binary ...
```
Example:
```sh
OV_CPU_BLOB_DUMP_NODE_TYPE=Convolution binary ...
OV_CPU_BLOB_DUMP_NODE_TYPE='Convolution Reorder' binary ...
```
> **NOTE**: see **enum Type** in [mkldnn_node.h](../mkldnn_node.h) for list of the types
### Filter by name
To dump blobs only for node with name matching specified regex:
To dump blobs only for nodes with name matching specified regex:
```sh
OV_CPU_BLOB_DUMP_NODE_NAME=<regex> binary ...
```
@ -68,9 +84,17 @@ Example:
```
### Dump all the blobs
```sh
OV_CPU_BLOB_DUMP_NODE_NAME="*" binary ...
```
or
```sh
OV_CPU_BLOB_DUMP_NODE_NAME=".+" binary ...
```
or
```sh
OV_CPU_BLOB_DUMP_NODE_PORTS=ALL binary ...
```
## Graph serialization
The functionality allows to serialize execution graph using environment variable:

View File

@ -20,6 +20,7 @@ public:
readParam(blobDumpDir, "OV_CPU_BLOB_DUMP_DIR");
readParam(blobDumpFormat, "OV_CPU_BLOB_DUMP_FORMAT");
readParam(blobDumpNodeExecId, "OV_CPU_BLOB_DUMP_NODE_EXEC_ID");
readParam(blobDumpNodePorts, "OV_CPU_BLOB_DUMP_NODE_PORTS");
readParam(blobDumpNodeType, "OV_CPU_BLOB_DUMP_NODE_TYPE");
readParam(blobDumpNodeName, "OV_CPU_BLOB_DUMP_NODE_NAME");
readParam(execGraphPath, "OV_CPU_EXEC_GRAPH_PATH");
@ -28,6 +29,7 @@ public:
std::string blobDumpDir;
std::string blobDumpFormat;
std::string blobDumpNodeExecId;
std::string blobDumpNodePorts;
std::string blobDumpNodeType;
std::string blobDumpNodeName;
std::string execGraphPath;

View File

@ -20,7 +20,7 @@ using namespace InferenceEngine;
namespace MKLDNNPlugin {
NodeDumper::NodeDumper(const DebugCaps::Config& config, const int _count)
: dumpFormat(DUMP_FORMAT::BIN)
: dumpFormat(FORMAT::BIN)
, dumpDirName("mkldnn_dump")
, count(_count) {
if (!config.blobDumpDir.empty())
@ -32,6 +32,9 @@ NodeDumper::NodeDumper(const DebugCaps::Config& config, const int _count)
if (!config.blobDumpNodeExecId.empty())
dumpFilters[FILTER::BY_EXEC_ID] = config.blobDumpNodeExecId;
if (!config.blobDumpNodePorts.empty())
dumpFilters[FILTER::BY_PORTS] = config.blobDumpNodePorts;
if (!config.blobDumpNodeType.empty())
dumpFilters[FILTER::BY_TYPE] = config.blobDumpNodeType;
@ -40,7 +43,7 @@ NodeDumper::NodeDumper(const DebugCaps::Config& config, const int _count)
}
void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const {
if (!shouldBeDumped(node))
if (!shouldBeDumped(node, "IN"))
return;
auto exec_order = std::to_string(node->getExecIndex());
@ -60,7 +63,7 @@ void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const {
file_name = file_name.substr(file_name.size() - 240);
auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name;
std::cout << "Dump before: " << dump_file << std::endl;
std::cout << "Dump inputs: " << dump_file << std::endl;
TensorDesc desc = prEdge->getDesc();
if (desc.getPrecision() == Precision::BIN)
@ -77,7 +80,7 @@ void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const {
}
void NodeDumper::dumpOutputBlobs(const MKLDNNNodePtr& node) const {
if (!shouldBeDumped(node))
if (!shouldBeDumped(node, "OUT"))
return;
auto exec_order = std::to_string(node->getExecIndex());
@ -96,7 +99,7 @@ void NodeDumper::dumpOutputBlobs(const MKLDNNNodePtr& node) const {
file_name = file_name.substr(file_name.size() - 240);
auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name;
std::cout << "Dump after: " << dump_file << std::endl;
std::cout << "Dump outputs: " << dump_file << std::endl;
TensorDesc desc = childEdge->getDesc();
if (desc.getPrecision() == Precision::BIN)
@ -130,56 +133,77 @@ void NodeDumper::dumpInternalBlobs(const MKLDNNNodePtr& node) const {
void NodeDumper::dump(const BlobDumper& bd, const std::string& file) const {
switch (dumpFormat) {
case DUMP_FORMAT::BIN: {
case FORMAT::BIN: {
bd.dump(file);
break;
}
case DUMP_FORMAT::TEXT: {
case FORMAT::TEXT: {
bd.dumpAsTxt(file);
break;
}
default:
IE_THROW() << "Unknown dump format";
IE_THROW() << "NodeDumper: Unknown dump format";
}
}
bool NodeDumper::shouldBeDumped(const MKLDNNNodePtr& node) const {
bool NodeDumper::shouldBeDumped(const MKLDNNNodePtr& node, const std::string& portsKind) const {
if (dumpFilters.empty())
return false;
if (dumpFilters.count(FILTER::BY_EXEC_ID)) { // filter by exec id env set
if (dumpFilters.count(FILTER::BY_PORTS)) { // filter by ports configured
if (dumpFilters.at(FILTER::BY_PORTS) != "ALL" &&
portsKind != dumpFilters.at(FILTER::BY_PORTS))
return false;
}
if (dumpFilters.count(FILTER::BY_EXEC_ID)) { // filter by exec id configured
std::stringstream ss(dumpFilters.at(FILTER::BY_EXEC_ID));
int id;
bool matched = false;
while (ss >> id) {
if (node->getExecIndex() == id) // exec id matches
if (node->getExecIndex() == id) {// exec id matches
matched = true;
break;
}
}
if (!matched)
return false;
}
if (dumpFilters.count(FILTER::BY_TYPE)) { // filter by type env set
if (NameFromType(node->getType()) != dumpFilters.at(FILTER::BY_TYPE)) // type does not match
if (dumpFilters.count(FILTER::BY_TYPE)) { // filter by type configured
std::stringstream ss(dumpFilters.at(FILTER::BY_TYPE));
std::string type;
bool matched = false;
while (ss >> type) {
if (NameFromType(node->getType()) == type) {// type does not match
matched = true;
break;
}
}
if (!matched)
return false;
}
if (dumpFilters.count(FILTER::BY_NAME)) { // filter by name env set
if (!std::regex_match(node->getName(), std::regex(dumpFilters.at(FILTER::BY_NAME)))) // name does not match
if (dumpFilters.count(FILTER::BY_NAME)) { // filter by name configured
if (dumpFilters.at(FILTER::BY_NAME) != "*" && // to have 'single char' option for matching all the names
!std::regex_match(node->getName(), std::regex(dumpFilters.at(FILTER::BY_NAME)))) // name does not match
return false;
}
return true;
}
NodeDumper::DUMP_FORMAT NodeDumper::parseDumpFormat(const std::string& format) const {
NodeDumper::FORMAT NodeDumper::parseDumpFormat(const std::string& format) const {
if (format == "BIN")
return DUMP_FORMAT::BIN;
return FORMAT::BIN;
else if (format == "TEXT")
return DUMP_FORMAT::TEXT;
return FORMAT::TEXT;
else
IE_THROW() << "Unknown dump format";
IE_THROW() << "NodeDumper: Unknown dump format";
}
void NodeDumper::formatNodeName(std::string& name) const {

View File

@ -31,28 +31,29 @@ public:
private:
void dumpInternalBlobs(const MKLDNNNodePtr& node) const;
void dump(const BlobDumper& bd, const std::string& file) const;
bool shouldBeDumped(const MKLDNNNodePtr &node) const;
bool shouldBeDumped(const MKLDNNNodePtr &node, const std::string& portsKind) const;
enum class DUMP_FORMAT {
enum class FORMAT {
BIN,
TEXT,
};
DUMP_FORMAT parseDumpFormat(const std::string& format) const;
FORMAT parseDumpFormat(const std::string& format) const;
void formatNodeName(std::string& name) const;
DUMP_FORMAT dumpFormat;
FORMAT dumpFormat;
std::string dumpDirName;
int count;
enum FILTER {
BY_PORTS,
BY_EXEC_ID,
BY_TYPE,
BY_NAME,
COUNT,
};
std::unordered_map<FILTER, std::string> dumpFilters;
// std::hash<int> is necessary for Ubuntu-16.04 (gcc-5.4 and defect in C++11 standart)
std::unordered_map<FILTER, std::string, std::hash<int>> dumpFilters;
};
} // namespace MKLDNNPlugin
#endif // CPU_DEBUG_CAPS

View File

@ -96,7 +96,8 @@ public:
auto mask_2_iter = mask->rbegin();
while (mask_1_iter != rend() &&
mask_2_iter != mask->rend()) {
mask_2_iter != mask->rend() &&
result_iter != result_mask->rend()) {
// Merge mask dimension values for both masks
// Example: (MaskValue[1,2,3,4], MaskValue[2,3]) -> MaskValue[2,3]
for (const auto & value : *mask_1_iter) {
@ -119,7 +120,8 @@ public:
auto mask_2_iter = mask->rbegin();
while (mask_1_iter != rend() &&
mask_2_iter != mask->rend()) {
mask_2_iter != mask->rend() &&
result_iter != result_mask->rend()) {
// Union mask dimension values for both masks
// Example: (MaskValue[1,2,3,4], MaskValue[2, 5]) -> MaskValue[1, 2, 3, 4, 5]
for (const auto & value : *mask_1_iter) {

View File

@ -246,6 +246,9 @@ public:
// To allow pruning on weights (allow reshape input Group (0) dim changing) replace Reshape Shape constant
// [G, 1, 1, X, Y, Z] by [-1, 1, 1, X, Y, Z].
auto old_shape_const = std::dynamic_pointer_cast<opset6::Constant>(m_shape.get_node_shared_ptr());
if (!old_shape_const) {
return false;
}
auto shape_value = old_shape_const.get()->cast_vector<int64_t>();
shape_value[0] = -1;
auto new_const = opset6::Constant::create(old_shape_const->get_element_type(),
@ -462,6 +465,9 @@ public:
const auto & pattern_map = m.get_pattern_value_map();
const auto & m_output = pattern_map.at(concat);
auto concat_ptr = std::dynamic_pointer_cast<opset6::Concat>(m_output.get_node_shared_ptr());
if (!concat_ptr) {
return false;
}
auto axis = concat_ptr->get_concatenation_axis();
auto inputs = concat_ptr->inputs();

View File

@ -50,14 +50,6 @@ public:
*/
virtual Blob::CPtr GetState() const;
/**
* @deprecated Use IVariableStateInternal::GetState method instead
* @brief Returns the value of the variable state.
* @return The value of the variable state
*/
INFERENCE_ENGINE_DEPRECATED("Use IVariableStateInternal::GetState method instead")
virtual Blob::CPtr GetLastState() const;
protected:
/**
* @brief A default dtor

View File

@ -25,6 +25,9 @@
#include "ie_algorithm.hpp"
namespace InferenceEngine {
namespace details {
/**
* @brief Serializes a `std::vector` to a `std::ostream`
* @ingroup ie_dev_api_error_debug
@ -32,7 +35,6 @@
* @param vec A vector to serialize
* @return A reference to a `std::stream`
*/
namespace std {
template <typename T>
inline std::ostream& operator<<(std::ostream& out, const std::vector<T>& vec) {
if (vec.empty()) return std::operator<<(out, "[]");
@ -42,10 +44,7 @@ inline std::ostream& operator<<(std::ostream& out, const std::vector<T>& vec) {
}
return out << "]";
}
} // namespace std
namespace InferenceEngine {
namespace details {
/**
* @brief trim from start (in place)
* @ingroup ie_dev_api_error_debug

View File

@ -0,0 +1,99 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include <vector>
#include <map>
#include "caseless.hpp"
#include "vpu/utils/optional.hpp"
namespace vpu {
struct CompilationConfig {
int numSHAVEs = -1;
int numCMXSlices = -1;
int numExecutors = -1;
int tilingCMXLimitKB = -1;
bool hwOptimization = true;
bool hwExtraSplit = false;
std::string irWithVpuScalesDir;
std::string customLayers;
bool detectBatch = true;
Optional<bool> injectSwOps;
Optional<bool> packDataInCmx;
bool mergeHwPoolToConv = true;
bool hwDilation = false;
bool forceDeprecatedCnnConversion = false;
bool enableEarlyEltwiseReLUFusion = true;
std::map<std::string, std::vector<int>> ioStrides;
//
// Debug options
//
InferenceEngine::details::caseless_set<std::string> hwWhiteList;
InferenceEngine::details::caseless_set<std::string> hwBlackList;
bool hwDisabled(const std::string& layerName) const {
if (!hwWhiteList.empty()) {
return hwWhiteList.count(layerName) == 0;
}
if (!hwBlackList.empty()) {
return hwBlackList.count(layerName) != 0;
}
return false;
}
InferenceEngine::details::caseless_set<std::string> noneLayers;
bool skipAllLayers() const {
if (noneLayers.size() == 1) {
const auto& val = *noneLayers.begin();
return val == "*";
}
return false;
}
bool skipLayerType(const std::string& layerType) const {
return noneLayers.count(layerType) != 0;
}
bool ignoreUnknownLayers = false;
std::string dumpInternalGraphFileName;
std::string dumpInternalGraphDirectory;
bool dumpAllPasses;
bool disableReorder = false; // TODO: rename to enableReorder and switch logic.
bool disableConvertStages = false;
bool enablePermuteMerging = true;
bool enableReplWithSCRelu = false;
bool enableReplaceWithReduceMean = true;
bool enableTensorIteratorUnrolling = false;
bool forcePureTensorIterator = false;
bool enableMemoryTypesAnnotation = false;
bool enableWeightsAnalysis = true;
bool checkPreprocessingInsideModel = true;
bool enableCustomReshapeParam = false;
//
// Deprecated options
//
float inputScale = 1.0f;
float inputBias = 0.0f;
};
} // namespace vpu

View File

@ -0,0 +1,18 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include "ie_parameter.hpp"
template<class OptionConcept>
struct AsParsedParameterEnabler {
static InferenceEngine::Parameter asParameter(const std::string& value) { return {OptionConcept::parse(value)}; }
};
struct AsParameterEnabler {
static InferenceEngine::Parameter asParameter(const std::string& value);
};

View File

@ -0,0 +1,34 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include "vpu/configuration/as_parameter_enabler.hpp"
namespace vpu {
namespace details {
enum class Access;
enum class Category;
} // namespace details
class PluginConfiguration;
struct CopyOptimizationOption : public AsParsedParameterEnabler<CopyOptimizationOption> {
using value_type = bool;
static std::string key();
static void validate(const std::string&);
static void validate(const PluginConfiguration&);
static std::string defaultValue();
static value_type parse(const std::string&);
static details::Access access();
static details::Category category();
};
} // namespace vpu

View File

@ -0,0 +1,36 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include "vpu/configuration/as_parameter_enabler.hpp"
namespace vpu {
enum class LogLevel;
namespace details {
enum class Access;
enum class Category;
} // namespace details
class PluginConfiguration;
struct LogLevelOption : public AsParameterEnabler {
using value_type = LogLevel;
static std::string key();
static void validate(const std::string&);
static void validate(const PluginConfiguration&);
static std::string defaultValue();
static value_type parse(const std::string&);
static details::Access access();
static details::Category category();
};
} // namespace vpu

View File

@ -0,0 +1,142 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include <map>
#include <unordered_map>
#include <unordered_set>
#include <memory>
#include <vpu/parsed_config.hpp>
#include "ie_parameter.hpp"
#include "vpu/utils/logger.hpp"
namespace vpu {
class PluginConfiguration;
struct ConfigurationOptionConcept {
virtual std::string key() const = 0;
virtual void validate(const std::string&) const = 0;
virtual void validate(const PluginConfiguration&) const = 0;
virtual InferenceEngine::Parameter asParameter(const std::string&) const = 0;
};
namespace details {
template<class Option>
struct ConfigurationOptionModel : public ConfigurationOptionConcept {
std::string key() const override { return Option::key(); }
void validate(const std::string& value) const override { return Option::validate(value); }
void validate(const PluginConfiguration& options) const override { Option::validate(options); }
InferenceEngine::Parameter asParameter(const std::string& value) const override { return Option::asParameter(value); }
};
enum class Deprecation {
Off,
On
};
enum class Access {
Private,
Public
};
enum class Category {
CompileTime,
RunTime
};
class ConfigurationEntry {
public:
template<class Option>
ConfigurationEntry(Option, details::Deprecation deprecation)
: m_access(Option::access())
, m_deprecation(deprecation)
, m_category(Option::category())
, m_value(std::make_shared<ConfigurationOptionModel<Option>>())
{}
ConfigurationOptionConcept& get();
const ConfigurationOptionConcept& get() const;
std::string key() const;
bool isPrivate() const;
bool isDeprecated() const;
Category getCategory() const;
private:
Access m_access = Access::Public;
Deprecation m_deprecation = Deprecation::Off;
Category m_category = Category::CompileTime;
std::shared_ptr<ConfigurationOptionConcept> m_value;
};
} // namespace details
// TODO: remove virtual inheritance once all options are migrated
// it's needed to pass updated compilation config to graph transformer
class PluginConfiguration : public virtual ParsedConfig {
public:
PluginConfiguration();
void from(const std::map<std::string, std::string>& config);
void fromAtRuntime(const std::map<std::string, std::string>& config);
std::unordered_set<std::string> getPublicKeys() const;
bool supports(const std::string& key) const;
template<class Option>
void registerOption() {
const auto& key = Option::key();
concepts.emplace(key, details::ConfigurationEntry(Option{}, details::Deprecation::Off));
if (values.count(key) == 0) {
// option could be registered more than once if there are deprecated versions of it
values.emplace(key, Option::defaultValue());
}
}
template<class Option>
void registerDeprecatedOption(const std::string& deprecatedKey) {
const auto& key = Option::key();
concepts.emplace(deprecatedKey, details::ConfigurationEntry(Option{}, details::Deprecation::On));
if (values.count(key) == 0) {
// option could be registered more than once if there are deprecated versions of it
values.emplace(key, Option::defaultValue());
}
}
template<class Option>
typename Option::value_type get() const {
const auto& key = Option::key();
validate(key);
return Option::parse(values.at(key));
}
void set(const std::string& key, const std::string& value);
const std::string& operator[](const std::string& key) const;
InferenceEngine::Parameter asParameter(const std::string& key) const;
virtual void validate() const;
private:
std::unordered_map<std::string, details::ConfigurationEntry> concepts;
std::unordered_map<std::string, std::string> values;
Logger::Ptr logger;
enum class Mode {
Default,
RunTime
};
void create(const std::map<std::string, std::string>& config, Mode mode = Mode::Default);
void validate(const std::string& key) const;
};
} // namespace vpu

View File

@ -0,0 +1,15 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include <unordered_map>
namespace vpu {
const std::unordered_map<std::string, bool>& string2switch();
const std::unordered_map<bool, std::string>& switch2string();
} // namespace vpu

View File

@ -10,11 +10,11 @@
#include <string>
#include <vpu/myriad_config.hpp>
#include <vpu/configuration.hpp>
#include <vpu/private_plugin_config.hpp>
#include <vpu/parsed_config_base.hpp>
#include <vpu/graph_transformer.hpp>
#include <vpu/utils/perf_report.hpp>
#include <vpu/utils/logger.hpp>
#include <vpu/utils/enums.hpp>
@ -23,6 +23,12 @@ namespace vpu {
class ParsedConfig : public ParsedConfigBase {
public:
ParsedConfig() = default;
ParsedConfig(const ParsedConfig&) = default;
ParsedConfig& operator=(const ParsedConfig&) = default;
ParsedConfig(ParsedConfig&&) = delete;
ParsedConfig& operator=(ParsedConfig&&) = delete;
const std::string& compilerLogFilePath() const {
return _compilerLogFilePath;
}
@ -31,6 +37,10 @@ public:
return _compileConfig;
}
CompilationConfig& compileConfig() {
return _compileConfig;
}
bool printReceiveTensorTime() const {
return _printReceiveTensorTime;
}

View File

@ -25,10 +25,6 @@ VPU_DECLARE_ENUM(ConfigMode,
class ParsedConfigBase {
public:
LogLevel logLevel() const {
return _logLevel;
}
bool exclusiveAsyncRequests() const {
return _exclusiveAsyncRequests;
}
@ -37,11 +33,9 @@ public:
ParsedConfigBase();
virtual ~ParsedConfigBase();
void update(
const std::map<std::string, std::string>& config,
ConfigMode mode = ConfigMode::Any);
protected:
void update(const std::map<std::string, std::string>& config, ConfigMode mode = ConfigMode::Any);
virtual const std::unordered_set<std::string>& getCompileOptions() const;
virtual const std::unordered_set<std::string>& getRunTimeOptions() const;
virtual const std::unordered_set<std::string>& getDeprecatedOptions() const;
@ -130,7 +124,6 @@ protected:
Logger::Ptr _log;
private:
LogLevel _logLevel = LogLevel::None;
bool _exclusiveAsyncRequests = false;
};

View File

@ -0,0 +1,40 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include <algorithm>
#include "error.hpp"
namespace vpu {
template<class Key, class Value, template<class...> class Map>
inline std::vector<Key> getKeys(const Map<Key, Value>& map) {
auto keys = std::vector<Key>{};
keys.reserve(map.size());
std::transform(map.cbegin(), map.cend(), std::back_inserter(keys), [](const std::pair<Key, Value>& entry) { return entry.first; });
return keys;
}
template<class Key, class Value, template<class...> class Map>
inline std::vector<Value> getValues(const Map<Key, Value>& map) {
auto values = std::vector<Value>{};
values.reserve(map.size());
std::transform(map.cbegin(), map.cend(), std::back_inserter(values), [](const std::pair<Key, Value>& entry) { return entry.second; });
return values;
}
template<class Key, class Value, template<class...> class Map>
inline Map<Value, Key> inverse(const Map<Key, Value>& map) {
auto inverted = Map<Value, Key>{};
for (const auto& entry : map) {
const auto& insertion = inverted.emplace(entry.second, entry.first);
VPU_THROW_UNLESS(insertion.second, "Could not invert map {} due to duplicated value \"{}\"", map, entry.second);
}
return inverted;
}
} // namespace vpu

View File

@ -29,6 +29,11 @@ public:
using VPUException::VPUException;
};
class UnsupportedConfigurationOptionException : public VPUException {
public:
using VPUException::VPUException;
};
template <class Exception, typename... Args>
void throwFormat(const char* fileName, int lineNumber, const char* messageFormat, Args&&... args) {
IE_THROW(GeneralError) << '\n' << fileName << ':' << lineNumber << ' '
@ -47,13 +52,20 @@ void throwFormat(const char* fileName, int lineNumber, const char* messageFormat
} \
} while (false)
#define VPU_THROW_UNSUPPORTED_UNLESS(condition, ...) \
#define VPU_THROW_UNSUPPORTED_LAYER_UNLESS(condition, ...) \
do { \
if (!(condition)) { \
::vpu::details::throwFormat<::vpu::details::UnsupportedLayerException>(__FILE__, __LINE__, __VA_ARGS__); \
} \
} while (false)
#define VPU_THROW_UNSUPPORTED_OPTION_UNLESS(condition, ...) \
do { \
if (!(condition)) { \
::vpu::details::throwFormat<::vpu::details::UnsupportedConfigurationOptionException>(__FILE__, __LINE__, __VA_ARGS__); \
} \
} while (false)
#ifdef NDEBUG
# define VPU_INTERNAL_CHECK(condition, ...) \
do { \

View File

@ -0,0 +1,21 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "vpu/utils/enums.hpp"
namespace vpu {
VPU_DECLARE_ENUM(LogLevel,
None,
Fatal, /* used for very severe error events that will most probably cause the application to terminate */
Error, /* reporting events which are not expected during normal execution, containing probable reason */
Warning, /* indicating events which are not usual and might lead to errors later */
Info, /* short enough messages about ongoing activity in the process */
Debug, /* more fine-grained messages with references to particular data and explanations */
Trace /* involved and detailed information about execution, helps to trace the execution flow, produces huge output */
)
} // namespace vpu

View File

@ -13,6 +13,7 @@
#include <vpu/utils/enums.hpp>
#include <vpu/utils/auto_scope.hpp>
#include <vpu/utils/io.hpp>
#include <vpu/utils/log_level.hpp>
namespace vpu {
@ -39,20 +40,6 @@ OutputStream::Ptr fileOutput(const std::string& fileName);
OutputStream::Ptr defaultOutput(const std::string& fileName = std::string());
//
// Logger
//
VPU_DECLARE_ENUM(LogLevel,
None,
Fatal, /* used for very severe error events that will most probably cause the application to terminate */
Error, /* reporting events which are not expected during normal execution, containing probable reason */
Warning, /* indicating events which are not usual and might lead to errors later */
Info, /* short enough messages about ongoing activity in the process */
Debug, /* more fine-grained messages with references to particular data and explanations */
Trace /* involved and detailed information about execution, helps to trace the execution flow, produces huge output */
)
class Logger final {
public:
using Ptr = std::shared_ptr<Logger>;

View File

@ -0,0 +1,10 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vpu/configuration/as_parameter_enabler.hpp>
InferenceEngine::Parameter AsParameterEnabler::asParameter(const std::string& value) {
return {value};
}

View File

@ -0,0 +1,45 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "vpu/private_plugin_config.hpp"
#include "vpu/utils/containers.hpp"
#include "vpu/configuration/options/copy_optimization.hpp"
#include "vpu/configuration/switch_converters.hpp"
#include "vpu/configuration/plugin_configuration.hpp"
namespace vpu {
void CopyOptimizationOption::validate(const std::string& value) {
const auto& converters = string2switch();
VPU_THROW_UNLESS(converters.count(value) != 0, R"(unexpected copy optimization option value "{}", only {} are supported)", value, getKeys(converters));
}
void CopyOptimizationOption::validate(const PluginConfiguration& configuration) {
validate(configuration[key()]);
}
std::string CopyOptimizationOption::key() {
return InferenceEngine::MYRIAD_COPY_OPTIMIZATION;
}
details::Access CopyOptimizationOption::access() {
return details::Access::Private;
}
details::Category CopyOptimizationOption::category() {
return details::Category::CompileTime;
}
std::string CopyOptimizationOption::defaultValue() {
return InferenceEngine::PluginConfigParams::YES;
}
CopyOptimizationOption::value_type CopyOptimizationOption::parse(const std::string& value) {
const auto& converters = string2switch();
VPU_THROW_UNSUPPORTED_OPTION_UNLESS(converters.count(value) != 0, R"(unexpected copy optimization option value "{}", only {} are supported)",
value, getKeys(converters));
return converters.at(value);
}
} // namespace vpu

View File

@ -0,0 +1,64 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "vpu/configuration/options/log_level.hpp"
#include "vpu/utils/log_level.hpp"
#include "vpu/utils/containers.hpp"
#include "vpu/configuration/plugin_configuration.hpp"
#include "ie_plugin_config.hpp"
#include <unordered_map>
namespace vpu {
namespace {
const std::unordered_map<std::string, LogLevel>& string2level() {
static const std::unordered_map<std::string, LogLevel> converters = {
{CONFIG_VALUE(LOG_NONE), LogLevel::None},
{CONFIG_VALUE(LOG_ERROR), LogLevel::Error},
{CONFIG_VALUE(LOG_WARNING), LogLevel::Warning},
{CONFIG_VALUE(LOG_INFO), LogLevel::Info},
{CONFIG_VALUE(LOG_DEBUG), LogLevel::Debug},
{CONFIG_VALUE(LOG_TRACE), LogLevel::Trace},
};
return converters;
}
} // namespace
void LogLevelOption::validate(const std::string& value) {
const auto& converters = string2level();
VPU_THROW_UNLESS(converters.count(value) != 0, R"(unexpected log level option value "{}", only {} are supported)", value, getKeys(converters));
}
void LogLevelOption::validate(const PluginConfiguration& configuration) {
validate(configuration[key()]);
}
std::string LogLevelOption::key() {
return InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL;
}
details::Access LogLevelOption::access() {
return details::Access::Public;
}
details::Category LogLevelOption::category() {
return details::Category::CompileTime;
}
std::string LogLevelOption::defaultValue() {
return InferenceEngine::PluginConfigParams::LOG_NONE;
}
LogLevelOption::value_type LogLevelOption::parse(const std::string& value) {
const auto& converters = string2level();
VPU_THROW_UNSUPPORTED_OPTION_UNLESS(converters.count(value) != 0, R"(unexpected log level option value "{}", only {} are supported)",
value, getKeys(converters));
return converters.at(value);
}
} // namespace vpu

View File

@ -0,0 +1,114 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "vpu/utils/error.hpp"
#include "vpu/configuration/plugin_configuration.hpp"
#include "ie_plugin_config.hpp"
namespace vpu {
namespace details {
ConfigurationOptionConcept& ConfigurationEntry::get() {
return *m_value;
}
const ConfigurationOptionConcept& ConfigurationEntry::get() const {
return *m_value;
}
bool ConfigurationEntry::isPrivate() const {
return m_access == Access::Private;
}
bool ConfigurationEntry::isDeprecated() const {
return m_deprecation == Deprecation::On;
}
Category ConfigurationEntry::getCategory() const {
return m_category;
}
std::string ConfigurationEntry::key() const {
return m_value->key();
}
} // namespace details
PluginConfiguration::PluginConfiguration() : logger(std::make_shared<Logger>("Configuration", LogLevel::Warning, consoleOutput())) {}
std::unordered_set<std::string> PluginConfiguration::getPublicKeys() const {
auto publicKeys = std::unordered_set<std::string>{};
for (const auto& entry : concepts) {
const auto& key = entry.first;
const auto& option = entry.second;
if (option.isPrivate()) {
continue;
}
publicKeys.insert(key);
}
return publicKeys;
}
bool PluginConfiguration::supports(const std::string& key) const {
return concepts.count(key) != 0;
}
void PluginConfiguration::from(const std::map<std::string, std::string>& config) {
create(config);
}
void PluginConfiguration::fromAtRuntime(const std::map<std::string, std::string>& config) {
create(config, Mode::RunTime);
}
void PluginConfiguration::validate() const {
for (const auto& option : concepts) {
option.second.get().validate(*this);
}
}
void PluginConfiguration::create(const std::map<std::string, std::string>& config, Mode mode) {
for (const auto& entry : config) {
const auto& key = entry.first;
validate(key);
const auto& optionConcept = concepts.at(key);
if (mode == Mode::RunTime && optionConcept.getCategory() == details::Category::CompileTime) {
logger->warning("Configuration option \"{}\" is used after network is loaded. Its value is going to be ignored.", key);
continue;
}
const auto& value = entry.second;
set(key, value);
}
}
InferenceEngine::Parameter PluginConfiguration::asParameter(const std::string& key) const {
const auto& value = operator[](key);
return concepts.at(key).get().asParameter(value);
}
void PluginConfiguration::validate(const std::string& key) const {
VPU_THROW_UNSUPPORTED_OPTION_UNLESS(supports(key), "Encountered an unsupported key {}, supported keys are {}", key, getPublicKeys());
if (concepts.at(key).isDeprecated()) {
logger->warning("Encountered deprecated option {} usage, consider replacing it with {} option", key, concepts.at(key).key());
}
}
const std::string& PluginConfiguration::operator[](const std::string& key) const {
validate(key);
return values.at(concepts.at(key).key());
}
void PluginConfiguration::set(const std::string& key, const std::string& value) {
validate(key);
const auto& optionConcept = concepts.at(key).get();
optionConcept.validate(value);
values[optionConcept.key()] = value;
}
} // namespace vpu

View File

@ -0,0 +1,25 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "vpu/utils/containers.hpp"
#include "vpu/configuration/switch_converters.hpp"
#include "ie_plugin_config.hpp"
namespace vpu {
const std::unordered_map<std::string, bool>& string2switch() {
static const std::unordered_map<std::string, bool> converters = {
{CONFIG_VALUE(NO), false},
{CONFIG_VALUE(YES), true}
};
return converters;
}
const std::unordered_map<bool, std::string>& switch2string() {
static const auto converters = inverse(string2switch());
return converters;
}
} // namespace vpu

View File

@ -169,7 +169,6 @@ void ParsedConfig::parse(const std::map<std::string, std::string>& config) {
setOption(_compileConfig.dumpAllPasses, switches, config, ie::MYRIAD_DUMP_ALL_PASSES);
setOption(_compileConfig.detectBatch, switches, config, ie::MYRIAD_DETECT_NETWORK_BATCH);
setOption(_compileConfig.copyOptimization, switches, config, ie::MYRIAD_COPY_OPTIMIZATION);
setOption(_compileConfig.packDataInCmx, switches, config, ie::MYRIAD_PACK_DATA_IN_CMX);
setOption(_compileConfig.ignoreUnknownLayers, switches, config, ie::MYRIAD_IGNORE_UNKNOWN_LAYERS);
setOption(_compileConfig.hwOptimization, switches, config, ie::MYRIAD_ENABLE_HW_ACCELERATION);

View File

@ -59,13 +59,7 @@ void ParsedConfigBase::update(
}
const std::unordered_set<std::string>& ParsedConfigBase::getCompileOptions() const {
IE_SUPPRESS_DEPRECATED_START
static const std::unordered_set<std::string> options = {
CONFIG_KEY(LOG_LEVEL),
VPU_CONFIG_KEY(LOG_LEVEL)
};
IE_SUPPRESS_DEPRECATED_END
static const std::unordered_set<std::string> options;
return options;
}
@ -73,8 +67,6 @@ const std::unordered_set<std::string>& ParsedConfigBase::getRunTimeOptions() con
IE_SUPPRESS_DEPRECATED_START
static const std::unordered_set<std::string> options = {
CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS),
CONFIG_KEY(LOG_LEVEL),
VPU_CONFIG_KEY(LOG_LEVEL)
};
IE_SUPPRESS_DEPRECATED_END
@ -82,37 +74,12 @@ IE_SUPPRESS_DEPRECATED_END
}
const std::unordered_set<std::string>& ParsedConfigBase::getDeprecatedOptions() const {
IE_SUPPRESS_DEPRECATED_START
static const std::unordered_set<std::string> options = {
VPU_CONFIG_KEY(LOG_LEVEL)
};
IE_SUPPRESS_DEPRECATED_END
static const std::unordered_set<std::string> options;
return options;
}
void ParsedConfigBase::parse(const std::map<std::string, std::string>& config) {
static const std::unordered_map<std::string, LogLevel> logLevels = {
{ CONFIG_VALUE(LOG_NONE), LogLevel::None },
{ CONFIG_VALUE(LOG_ERROR), LogLevel::Error },
{ CONFIG_VALUE(LOG_WARNING), LogLevel::Warning },
{ CONFIG_VALUE(LOG_INFO), LogLevel::Info },
{ CONFIG_VALUE(LOG_DEBUG), LogLevel::Debug },
{ CONFIG_VALUE(LOG_TRACE), LogLevel::Trace }
};
setOption(_logLevel, logLevels, config, CONFIG_KEY(LOG_LEVEL));
setOption(_exclusiveAsyncRequests, switches, config, CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS));
IE_SUPPRESS_DEPRECATED_START
setOption(_logLevel, logLevels, config, VPU_CONFIG_KEY(LOG_LEVEL));
IE_SUPPRESS_DEPRECATED_END
#ifndef NDEBUG
if (const auto envVar = std::getenv("IE_VPU_LOG_LEVEL")) {
_logLevel = logLevels.at(envVar);
}
#endif
}
std::unordered_set<std::string> ParsedConfigBase::merge(

View File

@ -48,8 +48,13 @@ function(add_graph_transformer_target TARGET_NAME STATIC_IE)
target_link_libraries(${TARGET_NAME} PUBLIC pugixml vpu_common_lib)
endif()
target_link_libraries(${TARGET_NAME} PUBLIC ${NGRAPH_LIBRARIES}
PRIVATE openvino::itt)
target_link_libraries(${TARGET_NAME}
PUBLIC
${NGRAPH_LIBRARIES}
PRIVATE
openvino::itt
mvnc # TODO: remove once all options are migrated
)
if(WIN32)
target_compile_definitions(${TARGET_NAME} PRIVATE NOMINMAX)

View File

@ -8,28 +8,29 @@
#include <vpu/model/model.hpp>
#include <vpu/utils/logger.hpp>
#include <vpu/utils/profiling.hpp>
#include <mvnc.h>
namespace vpu {
struct DeviceResources {
static int numShaves(const Platform& platform);
static int numSlices(const Platform& platform);
static int numShaves(const ncDevicePlatform_t& platform);
static int numSlices(const ncDevicePlatform_t& platform);
static int numStreams();
};
struct DefaultAllocation {
static int numStreams(const Platform& platform, const CompilationConfig& configuration);
static int numSlices(const Platform& platform, int numStreams);
static int numShaves(const Platform& platform, int numStreams, int numSlices);
static int numStreams(const ncDevicePlatform_t& platform, const PluginConfiguration& configuration);
static int numSlices(const ncDevicePlatform_t& platform, int numStreams);
static int numShaves(const ncDevicePlatform_t& platform, int numStreams, int numSlices);
static int tilingCMXLimit(int numSlices);
};
struct CompileEnv final {
public:
Platform platform;
ncDevicePlatform_t platform;
Resources resources;
CompilationConfig config;
PluginConfiguration config;
Logger::Ptr log;
@ -49,14 +50,14 @@ public:
static const CompileEnv* getOrNull();
static void init(
Platform platform,
const CompilationConfig& config,
const Logger::Ptr& log);
static void updateConfig(const CompilationConfig& config);
ncDevicePlatform_t platform,
const PluginConfiguration& config,
const Logger::Ptr& log);
static void updateConfig(const PluginConfiguration& config);
static void free();
private:
explicit CompileEnv(Platform platform);
explicit CompileEnv(ncDevicePlatform_t platform);
};
} // namespace vpu

View File

@ -21,108 +21,14 @@
#include <vpu/utils/perf_report.hpp>
#include <vpu/utils/logger.hpp>
#include <vpu/utils/optional.hpp>
#include <vpu/configuration/plugin_configuration.hpp>
#include "mvnc.h"
namespace vpu {
namespace ie = InferenceEngine;
//
// CompilationConfig
//
VPU_DECLARE_ENUM(Platform,
MYRIAD_2 = 2450,
MYRIAD_X = 2480,
)
struct CompilationConfig final {
//
// Compilation options
//
int numSHAVEs = -1;
int numCMXSlices = -1;
int numExecutors = -1;
int tilingCMXLimitKB = -1;
bool hwOptimization = true;
bool hwExtraSplit = false;
std::string irWithVpuScalesDir;
std::string customLayers;
bool detectBatch = true;
Optional<bool> copyOptimization;
Optional<bool> injectSwOps;
Optional<bool> packDataInCmx;
bool mergeHwPoolToConv = true;
bool hwDilation = false;
bool forceDeprecatedCnnConversion = false;
bool enableEarlyEltwiseReLUFusion = true;
std::map<std::string, std::vector<int>> ioStrides;
//
// Debug options
//
ie::details::caseless_set<std::string> hwWhiteList;
ie::details::caseless_set<std::string> hwBlackList;
bool hwDisabled(const std::string& layerName) const {
if (!hwWhiteList.empty()) {
return hwWhiteList.count(layerName) == 0;
}
if (!hwBlackList.empty()) {
return hwBlackList.count(layerName) != 0;
}
return false;
}
ie::details::caseless_set<std::string> noneLayers;
bool skipAllLayers() const {
if (noneLayers.size() == 1) {
const auto& val = *noneLayers.begin();
return val == "*";
}
return false;
}
bool skipLayerType(const std::string& layerType) const {
return noneLayers.count(layerType) != 0;
}
bool ignoreUnknownLayers = false;
std::string dumpInternalGraphFileName;
std::string dumpInternalGraphDirectory;
bool dumpAllPasses;
bool disableReorder = false; // TODO: rename to enableReorder and switch logic.
bool disableConvertStages = false;
bool enablePermuteMerging = true;
bool enableReplWithSCRelu = false;
bool enableReplaceWithReduceMean = true;
bool enableTensorIteratorUnrolling = false;
bool forcePureTensorIterator = false;
bool enableMemoryTypesAnnotation = false;
bool enableWeightsAnalysis = true;
bool checkPreprocessingInsideModel = true;
bool enableCustomReshapeParam = false;
//
// Deprecated options
//
float inputScale = 1.0f;
float inputBias = 0.0f;
};
//
// DataInfo
//
@ -165,17 +71,17 @@ struct CompiledGraph final {
// compileNetwork
//
CompiledGraph::Ptr compileNetwork(const ie::CNNNetwork& network, Platform platform, const CompilationConfig& config, const Logger::Ptr& log,
const ie::ICore* core);
CompiledGraph::Ptr compileNetwork(const ie::CNNNetwork& network, ncDevicePlatform_t platform, const PluginConfiguration& config, const Logger::Ptr& log,
const ie::ICore* core);
CompiledGraph::Ptr compileSubNetwork(const ie::CNNNetwork& network, const CompilationConfig& subConfig, const ie::ICore* core);
CompiledGraph::Ptr compileSubNetwork(const ie::CNNNetwork& network, const PluginConfiguration& subConfig, const ie::ICore* core);
//
// getSupportedLayers
//
std::set<std::string> getSupportedLayers(const ie::CNNNetwork& network, Platform platform, const CompilationConfig& config, const Logger::Ptr& log,
const ie::ICore* core);
std::set<std::string> getSupportedLayers(const ie::CNNNetwork& network, ncDevicePlatform_t platform, const PluginConfiguration& config, const Logger::Ptr& log,
const ie::ICore* core);
//
// Blob version and checks

View File

@ -12,8 +12,8 @@ namespace vpu {
CompiledGraph::Ptr compileModel(
const Model& model,
Platform platform,
const CompilationConfig& config,
ncDevicePlatform_t platform,
const PluginConfiguration& config,
const Logger::Ptr& log);
} // namespace vpu

View File

@ -85,12 +85,12 @@ void BackEnd::dumpModel(
std::string fileName;
if (!env.config.dumpInternalGraphFileName.empty()) {
fileName = fileNameNoExt(env.config.dumpInternalGraphFileName);
} else if (!env.config.dumpInternalGraphDirectory.empty()) {
if (!env.config.compileConfig().dumpInternalGraphFileName.empty()) {
fileName = fileNameNoExt(env.config.compileConfig().dumpInternalGraphFileName);
} else if (!env.config.compileConfig().dumpInternalGraphDirectory.empty()) {
fileName = formatString(
"%s/vpu_graph_%f%f%i_%s",
env.config.dumpInternalGraphDirectory,
env.config.compileConfig().dumpInternalGraphDirectory,
std::setw(2), std::setfill('0'),
model->attrs().get<int>("index"),
replaceBadCharacters(model->name()));
@ -99,7 +99,7 @@ void BackEnd::dumpModel(
}
if (!postfix.empty()) {
if (!env.config.dumpAllPasses) {
if (!env.config.compileConfig().dumpAllPasses) {
return;
}

View File

@ -29,7 +29,7 @@ void FrontEnd::detectNetworkBatch(
using PrecisionsMap = std::map<std::string, ie::Precision>;
const auto& env = CompileEnv::get();
if (!env.config.detectBatch) {
if (!env.config.compileConfig().detectBatch) {
// skip batch extraction step and go as is
return;
}

View File

@ -436,7 +436,7 @@ void FrontEnd::processTrivialCases(const Model& model) {
void FrontEnd::defaultOnUnsupportedLayerCallback(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs,
const std::string& extraMessage) {
const auto& env = CompileEnv::get();
VPU_THROW_UNSUPPORTED_UNLESS(env.config.ignoreUnknownLayers, "Failed to compile layer \"%v\": %v", layer->name, extraMessage);
VPU_THROW_UNSUPPORTED_LAYER_UNLESS(env.config.compileConfig().ignoreUnknownLayers, "Failed to compile layer \"%v\": %v", layer->name, extraMessage);
_stageBuilder->addNoneStage(model, layer->name, layer, inputs, outputs);
}
@ -466,15 +466,15 @@ ModelPtr FrontEnd::runCommonPasses(ie::CNNNetwork network,
// Parse custom layers
//
if (!env.config.customLayers.empty()) {
env.log->trace("Parse custom layers : %s", env.config.customLayers);
if (!env.config.compileConfig().customLayers.empty()) {
env.log->trace("Parse custom layers : %s", env.config.compileConfig().customLayers);
VPU_LOGGER_SECTION(env.log);
if (env.platform != Platform::MYRIAD_X) {
if (env.platform != ncDevicePlatform_t::NC_MYRIAD_X) {
VPU_THROW_FORMAT("Custom layers are not supported for %v platforms", env.platform);
}
_customLayers = CustomLayer::loadFromFile(env.config.customLayers);
_customLayers = CustomLayer::loadFromFile(env.config.compileConfig().customLayers);
}
//
@ -494,7 +494,7 @@ ModelPtr FrontEnd::runCommonPasses(ie::CNNNetwork network,
env.log->trace("Update IE Network");
VPU_LOGGER_SECTION(env.log);
if (network.getFunction() && env.config.forceDeprecatedCnnConversion) {
if (network.getFunction() && env.config.compileConfig().forceDeprecatedCnnConversion) {
network = convertNetwork(network);
}
@ -545,7 +545,7 @@ ModelPtr FrontEnd::runCommonPasses(ie::CNNNetwork network,
processTrivialCases(model);
if (!CompileEnv::get().config.disableConvertStages) {
if (!CompileEnv::get().config.compileConfig().disableConvertStages) {
addDataTypeConvertStages(model);
}
@ -567,7 +567,7 @@ ModelPtr FrontEnd::runCommonPasses(ie::CNNNetwork network,
getInputAndOutputData(model, layer, inputs, outputs);
if (env.config.skipAllLayers() || env.config.skipLayerType(layer->type)) {
if (env.config.compileConfig().skipAllLayers() || env.config.compileConfig().skipLayerType(layer->type)) {
_stageBuilder->addNoneStage(model, layer->name, layer, inputs, outputs);
supportedLayer(layer);
continue;

View File

@ -22,7 +22,7 @@ void FrontEnd::addDataTypeConvertStages(const Model& model) {
env.log->trace("Add Data type conversion stages");
VPU_LOGGER_SECTION(env.log);
const bool hasScaleBias = env.config.inputScale != 1.0f || env.config.inputBias != 0.0f;
const bool hasScaleBias = env.config.compileConfig().inputScale != 1.0f || env.config.compileConfig().inputBias != 0.0f;
for (const auto& input : model->datas()) {
if (input->usage() != DataUsage::Input) {
@ -38,11 +38,11 @@ void FrontEnd::addDataTypeConvertStages(const Model& model) {
env.log->trace("Apply deprecated scale/bias parameters");
std::ostringstream postfix;
if (env.config.inputScale != 1.0f) {
postfix << "@SCALE=" << InferenceEngine::CNNLayer::ie_serialize_float(env.config.inputScale);
if (env.config.compileConfig().inputScale != 1.0f) {
postfix << "@SCALE=" << InferenceEngine::CNNLayer::ie_serialize_float(env.config.compileConfig().inputScale);
}
if (env.config.inputBias != 0.0f) {
postfix << "@BIAS=" << InferenceEngine::CNNLayer::ie_serialize_float(env.config.inputBias);
if (env.config.compileConfig().inputBias != 0.0f) {
postfix << "@BIAS=" << InferenceEngine::CNNLayer::ie_serialize_float(env.config.compileConfig().inputBias);
}
const auto scaledInput = model->duplicateData(
@ -55,9 +55,9 @@ void FrontEnd::addDataTypeConvertStages(const Model& model) {
model,
scaledInput->name(),
nullptr,
env.config.inputScale,
env.config.compileConfig().inputScale,
1.0f,
env.config.inputBias,
env.config.compileConfig().inputBias,
input,
scaledInput);
}
@ -89,8 +89,8 @@ void FrontEnd::addDataTypeConvertStages(const Model& model) {
inputFP16->name(),
input,
inputFP16,
env.config.inputScale,
env.config.inputBias);
env.config.compileConfig().inputScale,
env.config.compileConfig().inputBias);
break;
}

View File

@ -25,8 +25,8 @@ void FrontEnd::parseInputAndOutputData(const Model& model) {
VPU_LOGGER_SECTION(env.log);
const auto parseIOStrides = [&env](const std::string& name, const Data& data) {
const auto& match = env.config.ioStrides.find(name);
if (match == env.config.ioStrides.end()) {
const auto& match = env.config.compileConfig().ioStrides.find(name);
if (match == env.config.compileConfig().ioStrides.end()) {
return;
}

View File

@ -21,7 +21,7 @@ void FrontEnd::unrollLoops(ie::CNNNetwork& network) {
env.log->trace("Unroll TensorIterator loops");
VPU_LOGGER_SECTION(env.log);
if (!env.config.irWithVpuScalesDir.empty()) {
if (!env.config.compileConfig().irWithVpuScalesDir.empty()) {
// TODO: Scale dumps does not work with IR, which contain Tensor Iterator layers, because we cannot serialize them. #-23429
for (auto iterator = ie::details::CNNNetworkIterator(network); iterator != ie::details::CNNNetworkIterator(); ++iterator) {
const auto& layer = *iterator;
@ -30,11 +30,11 @@ void FrontEnd::unrollLoops(ie::CNNNetwork& network) {
}
}
if (env.config.forcePureTensorIterator) {
if (env.config.compileConfig().forcePureTensorIterator) {
return;
}
if (env.config.enableTensorIteratorUnrolling) {
if (env.config.compileConfig().enableTensorIteratorUnrolling) {
ie::NetPass::UnrollTI(network);
} else {
// Try to convert network to a RNN sequence due to performance reasons

View File

@ -42,6 +42,7 @@
#include <vpu/utils/auto_scope.hpp>
#include <vpu/utils/dot_io.hpp>
#include <vpu/utils/file_system.hpp>
#include <mvnc.h>
namespace vpu {
@ -55,7 +56,7 @@ thread_local CompileEnv* g_compileEnv = nullptr;
} // namespace
CompileEnv::CompileEnv(Platform platform) : platform(platform) {}
CompileEnv::CompileEnv(ncDevicePlatform_t platform) : platform(platform) {}
const CompileEnv& CompileEnv::get() {
IE_ASSERT(g_compileEnv != nullptr);
@ -70,7 +71,7 @@ const CompileEnv* CompileEnv::getOrNull() {
return g_compileEnv;
}
void CompileEnv::init(Platform platform, const CompilationConfig& config, const Logger::Ptr& log) {
void CompileEnv::init(ncDevicePlatform_t platform, const PluginConfiguration& config, const Logger::Ptr& log) {
g_compileEnv = new CompileEnv(platform);
g_compileEnv->config = config;
g_compileEnv->log = log;
@ -79,31 +80,37 @@ void CompileEnv::init(Platform platform, const CompilationConfig& config, const
g_compileEnv->profile.setLogger(log);
#endif
if (platform == Platform::MYRIAD_2) {
g_compileEnv->config.hwOptimization = false;
if (platform == ncDevicePlatform_t::NC_MYRIAD_2) {
g_compileEnv->config.compileConfig().hwOptimization = false;
}
VPU_THROW_UNLESS(g_compileEnv->config.numSHAVEs <= g_compileEnv->config.numCMXSlices,
VPU_THROW_UNLESS(g_compileEnv->config.compileConfig().numSHAVEs <= g_compileEnv->config.compileConfig().numCMXSlices,
R"(Value of configuration option ("{}") must be not greater than value of configuration option ("{}"), but {} > {} are provided)",
ie::MYRIAD_NUMBER_OF_SHAVES, ie::MYRIAD_NUMBER_OF_CMX_SLICES, config.numSHAVEs, config.numCMXSlices);
ie::MYRIAD_NUMBER_OF_SHAVES, ie::MYRIAD_NUMBER_OF_CMX_SLICES, config.compileConfig().numSHAVEs, config.compileConfig().numCMXSlices);
const auto numExecutors = config.numExecutors != -1 ? config.numExecutors : DefaultAllocation::numStreams(platform, config);
const auto numExecutors = config.compileConfig().numExecutors != -1 ? config.compileConfig().numExecutors : DefaultAllocation::numStreams(platform, config);
VPU_THROW_UNLESS(numExecutors >= 1 && numExecutors <= DeviceResources::numStreams(),
R"(Value of configuration option ("{}") must be in the range [{}, {}], actual is "{}")",
ie::MYRIAD_THROUGHPUT_STREAMS, 1, DeviceResources::numStreams(), numExecutors);
const auto numSlices = config.numCMXSlices != -1 ? config.numCMXSlices : DefaultAllocation::numSlices(platform, numExecutors);
const auto numSlices = config.compileConfig().numCMXSlices != -1
? config.compileConfig().numCMXSlices
: DefaultAllocation::numSlices(platform, numExecutors);
VPU_THROW_UNLESS(numSlices >= 1 && numSlices <= DeviceResources::numSlices(platform),
R"(Value of configuration option ("{}") must be in the range [{}, {}], actual is "{}")",
ie::MYRIAD_NUMBER_OF_CMX_SLICES, 1, DeviceResources::numSlices(platform), numSlices);
int defaultCmxLimit = DefaultAllocation::tilingCMXLimit(numSlices);
const auto tilingCMXLimit = config.tilingCMXLimitKB != -1 ? std::min(config.tilingCMXLimitKB * 1024, defaultCmxLimit) : defaultCmxLimit;
const auto tilingCMXLimit = config.compileConfig().tilingCMXLimitKB != -1
? std::min(config.compileConfig().tilingCMXLimitKB * 1024, defaultCmxLimit)
: defaultCmxLimit;
VPU_THROW_UNLESS(tilingCMXLimit >= 0,
R"(Value of configuration option ("{}") must be greater than {}, actual is "{}")",
ie::MYRIAD_TILING_CMX_LIMIT_KB, 0, tilingCMXLimit);
const auto numShaves = config.numSHAVEs != -1 ? config.numSHAVEs : DefaultAllocation::numShaves(platform, numExecutors, numSlices);
const auto numShaves = config.compileConfig().numSHAVEs != -1
? config.compileConfig().numSHAVEs
: DefaultAllocation::numShaves(platform, numExecutors, numSlices);
VPU_THROW_UNLESS(numShaves >= 1 && numShaves <= DeviceResources::numShaves(platform),
R"(Value of configuration option ("{}") must be in the range [{}, {}], actual is "{}")",
ie::MYRIAD_NUMBER_OF_SHAVES, 1, DeviceResources::numShaves(platform), numShaves);
@ -123,7 +130,7 @@ void CompileEnv::init(Platform platform, const CompilationConfig& config, const
g_compileEnv->initialized = true;
}
void CompileEnv::updateConfig(const CompilationConfig& config) {
void CompileEnv::updateConfig(const PluginConfiguration& config) {
IE_ASSERT(g_compileEnv != nullptr);
IE_ASSERT(g_compileEnv->initialized);
@ -165,9 +172,9 @@ CompiledGraph::Ptr compileImpl(const ie::CNNNetwork& network, const ie::ICore* c
middleEnd->run(model);
if (!env.config.irWithVpuScalesDir.empty()) {
network.serialize(env.config.irWithVpuScalesDir + "/" + network.getName() + "_scales.xml",
env.config.irWithVpuScalesDir + "/" + network.getName() + "_scales.bin");
if (!env.config.compileConfig().irWithVpuScalesDir.empty()) {
network.serialize(env.config.compileConfig().irWithVpuScalesDir + "/" + network.getName() + "_scales.xml",
env.config.compileConfig().irWithVpuScalesDir + "/" + network.getName() + "_scales.bin");
}
return backEnd->build(model, frontEnd->origLayers());
@ -191,8 +198,8 @@ CompiledGraph::Ptr compileImpl(const Model& model) {
} // namespace
CompiledGraph::Ptr compileNetwork(const ie::CNNNetwork& network, Platform platform, const CompilationConfig& config, const Logger::Ptr& log,
const ie::ICore* core) {
CompiledGraph::Ptr compileNetwork(const ie::CNNNetwork& network, ncDevicePlatform_t platform, const PluginConfiguration& config, const Logger::Ptr& log,
const ie::ICore* core) {
CompileEnv::init(platform, config, log);
AutoScope autoDeinit([] {
CompileEnv::free();
@ -205,8 +212,8 @@ CompiledGraph::Ptr compileNetwork(const ie::CNNNetwork& network, Platform platfo
CompiledGraph::Ptr compileModel(
const Model& model,
Platform platform,
const CompilationConfig& config,
ncDevicePlatform_t platform,
const PluginConfiguration& config,
const Logger::Ptr& log) {
CompileEnv::init(platform, config, log);
AutoScope autoDeinit([] {
@ -218,7 +225,7 @@ CompiledGraph::Ptr compileModel(
return compileImpl(model);
}
CompiledGraph::Ptr compileSubNetwork(const ie::CNNNetwork& network, const CompilationConfig& subConfig, const ie::ICore* core) {
CompiledGraph::Ptr compileSubNetwork(const ie::CNNNetwork& network, const PluginConfiguration& subConfig, const ie::ICore* core) {
VPU_PROFILE(compileSubNetwork);
const auto& env = CompileEnv::get();
@ -238,11 +245,11 @@ CompiledGraph::Ptr compileSubNetwork(const ie::CNNNetwork& network, const Compil
//
std::set<std::string> getSupportedLayers(
const ie::CNNNetwork& network,
Platform platform,
const CompilationConfig& config,
const Logger::Ptr& log,
const ie::ICore* core) {
const ie::CNNNetwork& network,
ncDevicePlatform_t platform,
const PluginConfiguration& config,
const Logger::Ptr& log,
const ie::ICore* core) {
CompileEnv::init(platform, config, log);
AutoScope autoDeinit([] {
CompileEnv::free();
@ -255,28 +262,28 @@ std::set<std::string> getSupportedLayers(
return frontEnd->checkSupportedLayers(network);
}
int DeviceResources::numShaves(const Platform& platform) {
return platform == Platform::MYRIAD_2 ? 12 : 16;
int DeviceResources::numShaves(const ncDevicePlatform_t& platform) {
return platform == ncDevicePlatform_t::NC_MYRIAD_2 ? 12 : 16;
}
int DeviceResources::numSlices(const Platform& platform) {
return platform == Platform::MYRIAD_2 ? 12 : 19;
int DeviceResources::numSlices(const ncDevicePlatform_t& platform) {
return platform == ncDevicePlatform_t::NC_MYRIAD_2 ? 12 : 19;
}
int DeviceResources::numStreams() {
return 3;
}
int DefaultAllocation::numStreams(const Platform& platform, const CompilationConfig& configuration) {
return platform == Platform::MYRIAD_X && configuration.hwOptimization ? 2 : 1;
int DefaultAllocation::numStreams(const ncDevicePlatform_t& platform, const PluginConfiguration& configuration) {
return platform == ncDevicePlatform_t::NC_MYRIAD_X && configuration.compileConfig().hwOptimization ? 2 : 1;
}
int DefaultAllocation::numSlices(const Platform& platform, int numStreams) {
int DefaultAllocation::numSlices(const ncDevicePlatform_t& platform, int numStreams) {
const auto capabilities = DeviceResources::numSlices(platform);
return capabilities / numStreams;
}
int DefaultAllocation::numShaves(const Platform& platform, int numStreams, int numSlices) {
int DefaultAllocation::numShaves(const ncDevicePlatform_t& platform, int numStreams, int numSlices) {
const auto numAvailableShaves = DeviceResources::numShaves(platform);
if (numStreams == 1) {
return numAvailableShaves;

View File

@ -10,6 +10,7 @@
#include <string>
#include <vpu/compile_env.hpp>
#include <vpu/configuration/options/copy_optimization.hpp>
namespace vpu {
@ -93,7 +94,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
ADD_PASS(convertShapeNotation);
ADD_DUMP_PASS("convertShapeNotation");
if (!env.config.disableReorder && !env.config.hwOptimization) {
if (!env.config.compileConfig().disableReorder && !env.config.compileConfig().hwOptimization) {
ADD_PASS(reorderInputsToChannelMinor);
ADD_DUMP_PASS("reorderInputsToChannelMinor");
}
@ -125,7 +126,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
// To overcome fp16 limitations
//
if (env.config.hwOptimization && env.config.enableWeightsAnalysis) {
if (env.config.compileConfig().hwOptimization && env.config.compileConfig().enableWeightsAnalysis) {
ADD_PASS(analyzeWeightableLayers);
ADD_DUMP_PASS("analyzeWeightableLayers");
}
@ -150,7 +151,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
// Model HW-specific optimizations
//
if (env.config.hwOptimization) {
if (env.config.compileConfig().hwOptimization) {
ADD_PASS(replaceFCbyConv);
ADD_DUMP_PASS("replaceFCbyConv");
@ -161,7 +162,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
ADD_PASS(replaceDeconvByConv);
ADD_DUMP_PASS("replaceDeconvByConv");
if (env.config.hwDilation) {
if (env.config.compileConfig().hwDilation) {
ADD_PASS(reshapeDilationConv);
ADD_DUMP_PASS("reshapeDilationConv");
}
@ -173,7 +174,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
// Pass should be located before "adjustDataBatch" because "adjustDataBatch" specifies "origConvOutput" attribute
// for convolution in order to provide that information to "hwConvTiling" pass.
// Otherwise, "hwConvTiling" will see incorrect values in "origConvOutput" attribute.
if (env.config.enableCustomReshapeParam) {
if (env.config.compileConfig().enableCustomReshapeParam) {
ADD_PASS(reshapeBeforeConvTiling);
ADD_DUMP_PASS("reshapeBeforeConvTiling");
}
@ -197,7 +198,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
ADD_PASS(hwPadding);
ADD_DUMP_PASS("hwPadding");
if (env.config.hwOptimization) {
if (env.config.compileConfig().hwOptimization) {
ADD_PASS(splitLargeKernelConv);
ADD_DUMP_PASS("splitLargeKernelConv");
}
@ -209,7 +210,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
ADD_PASS(adjustDataBatch);
ADD_DUMP_PASS("adjustDataBatch");
if (env.config.enableReplWithSCRelu) {
if (env.config.compileConfig().enableReplWithSCRelu) {
ADD_PASS(replaceWithSCReLU);
ADD_DUMP_PASS("replaceWithSCReLU");
}
@ -218,13 +219,13 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
// HW stages tiling
//
if (env.config.hwOptimization) {
if (env.config.compileConfig().hwOptimization) {
ADD_PASS(hwConvTiling);
ADD_PASS(hwPoolTiling);
ADD_PASS(hwFullyConnectedTiling);
ADD_DUMP_PASS("hwTiling");
if (env.config.hwExtraSplit) {
if (env.config.compileConfig().hwExtraSplit) {
ADD_PASS(hwExtraSplit);
ADD_DUMP_PASS("hwExtraSplit");
}
@ -242,7 +243,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
//
// this stage should be executed after "hwPoolTiling"
// and before "swPoolAdaptation"
if (env.config.enableReplaceWithReduceMean) {
if (env.config.compileConfig().enableReplaceWithReduceMean) {
ADD_PASS(replaceWithReduceMean);
ADD_DUMP_PASS("replaceWithReduceMean");
}
@ -261,7 +262,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
ADD_PASS(mergeReLUAndBias);
ADD_DUMP_PASS("mergeReLUAndBias");
if (env.config.enableEarlyEltwiseReLUFusion) {
if (env.config.compileConfig().enableEarlyEltwiseReLUFusion) {
ADD_PASS(mergeEltwiseAndReLUDynamic);
ADD_DUMP_PASS("mergeEltwiseAndReLUDynamic");
}
@ -279,7 +280,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
// TODO: mergePermute support for reorder stage too.
// TODO: pass that will swap Permute and per-element operations.
if (env.config.enablePermuteMerging) {
if (env.config.compileConfig().enablePermuteMerging) {
ADD_PASS(mergePermuteStages);
ADD_DUMP_PASS("mergePermuteStages");
}
@ -326,7 +327,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
// Model common optimizations
//
if (env.config.copyOptimization.getOrDefault(true)) {
if (env.config.get<CopyOptimizationOption>()) {
ADD_PASS(eliminateCopyStages);
ADD_DUMP_PASS("eliminateCopyStages");
}
@ -334,7 +335,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
//
// HW/SW injection
if (env.config.hwOptimization && env.config.injectSwOps.getOrDefault(true)) {
if (env.config.compileConfig().hwOptimization && env.config.compileConfig().injectSwOps.getOrDefault(true)) {
ADD_PASS(injectSw);
ADD_DUMP_PASS("injectSw");
}
@ -350,7 +351,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
// HW stages finalization
//
if (env.config.hwOptimization) {
if (env.config.compileConfig().hwOptimization) {
ADD_PASS(finalizeHwOps);
ADD_DUMP_PASS("hwFinalization");
}
@ -361,7 +362,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
ADD_PASS(markFastStages);
ADD_DUMP_PASS("markFastStages");
if (env.config.enableMemoryTypesAnnotation) {
if (env.config.compileConfig().enableMemoryTypesAnnotation) {
ADD_PASS(annotateMemoryTypes);
ADD_DUMP_PASS("annotateMemoryTypes");
}

View File

@ -48,7 +48,7 @@ void PassImpl::run(const Model& model) {
allocNonIntermediateData(model);
adjustModelForMemReqs(model);
copyHwMisalignedInput(model);
if (env.config.packDataInCmx.getOrDefault(true)) {
if (env.config.compileConfig().packDataInCmx.getOrDefault(true)) {
packDataInCmx(model);
}
}
@ -147,7 +147,7 @@ void PassImpl::collectMemReqs(const Model& model) {
}
void PassImpl::resetStageOrder(const Model& model) {
if (!CompileEnv::get().config.hwOptimization)
if (!CompileEnv::get().config.compileConfig().hwOptimization)
return;
static const std::string s_expectCMXOutput {"expectCMXOutput"};

View File

@ -14,6 +14,7 @@
#include <vpu/middleend/allocator/allocator.hpp>
#include <vpu/compile_env.hpp>
#include <vpu/configuration/options/copy_optimization.hpp>
namespace vpu {
@ -78,7 +79,7 @@ void PassImpl::run(const Model& model) {
std::queue<Stage> copyToRemove;
if (!env.config.copyOptimization.hasValue()) {
if (!env.config.get<CopyOptimizationOption>()) {
int nCopyStages = 0;
for (const auto& stage : model->getStages()) {
if (stage->type() == StageType::Copy) {

View File

@ -68,7 +68,7 @@ void PassImpl::run(const Model& model) {
// Collect HW and SW candidates
//
if (!env.config.injectSwOps.hasValue() &&
if (!env.config.compileConfig().injectSwOps.hasValue() &&
model->numStages() > nMaxStagesForInjectSw) {
env.log->warning(
"Pass [injectSw] SKIPPED : number of stages (%d) is larger than threshold %d",

View File

@ -30,7 +30,7 @@ private:
};
void PassImpl::run(const Model& model) {
const bool enableEarlyEltwiseReLUFusion = CompileEnv::get().config.enableEarlyEltwiseReLUFusion;
const bool enableEarlyEltwiseReLUFusion = CompileEnv::get().config.compileConfig().enableEarlyEltwiseReLUFusion;
if (enableEarlyEltwiseReLUFusion) {
if (m_mode == MergeMode::DYNAMIC_NETWORK) {
VPU_PROFILE(mergeEltwiseAndReLUDynamic);

View File

@ -170,7 +170,7 @@ void PassImpl::run(const Model& model) {
// Try to merge next Pooling layer
//
if (env.config.mergeHwPoolToConv) {
if (env.config.compileConfig().mergeHwPoolToConv) {
if (stage->type() == StageType::StubConv) {
if (auto nextPoolStage = getNextPoolStage(stage, output)) {
output = nextPoolStage->output(0);

View File

@ -148,7 +148,7 @@ void PassImpl::run(const Model& model) {
auto output = stage->output(0);
const auto& env = CompileEnv::get();
if (env.config.hwDisabled(stage->origLayer()->name)) {
if (env.config.compileConfig().hwDisabled(stage->origLayer()->name)) {
continue;
}

View File

@ -88,7 +88,7 @@ bool isScalable(const Stage& stage) {
bool checkGrowingOutput(const Model& model) {
const auto& env = CompileEnv::get();
if (!env.config.checkPreprocessingInsideModel) {
if (!env.config.compileConfig().checkPreprocessingInsideModel) {
return false;
}
@ -258,7 +258,7 @@ void PassImpl::run(const Model& model) {
scale = static_cast<float>(1ULL << static_cast<std::uint32_t>(shift));
}
if (!env.config.irWithVpuScalesDir.empty()) {
if (!env.config.compileConfig().irWithVpuScalesDir.empty()) {
stage->origLayer()->params["vpu_scale"] = toString(scale);
}
}

View File

@ -199,7 +199,7 @@ StageSHAVEsRequirements StageNode::getSHAVEsRequirements() const {
// return max for Myriad2
const auto& compileEnv = CompileEnv::get();
if (compileEnv.platform == Platform::MYRIAD_2) {
if (compileEnv.platform == ncDevicePlatform_t::NC_MYRIAD_2) {
return StageSHAVEsRequirements::NeedMax;
}

View File

@ -24,7 +24,7 @@ void FrontEnd::parseActivation(const Model& model, const ie::CNNLayerPtr& layer,
const auto type = layer->GetParamAsString("type");
const auto activationParserIt = activationParsers.find(type);
VPU_THROW_UNSUPPORTED_UNLESS(activationParserIt != activationParsers.end(),
VPU_THROW_UNSUPPORTED_LAYER_UNLESS(activationParserIt != activationParsers.end(),
"Failed to compile layer \"%v\"(type = %v) ", layer->name, type);
activationParserIt->second(model, layer, inputs, outputs);

View File

@ -163,9 +163,9 @@ void parseConv2D(const Model & model,
kernelStrideY,
dilationX,
dilationY,
env.config.hwOptimization,
env.config.hwDilation,
env.config.hwDisabled(layer->name));
env.config.compileConfig().hwOptimization,
env.config.compileConfig().hwDilation,
env.config.compileConfig().hwDisabled(layer->name));
//
// Create const datas
@ -476,9 +476,9 @@ void parseConvND(const Model & model,
strides[1],
dilations[0],
dilations[1],
env.config.hwOptimization,
env.config.hwDilation,
env.config.hwDisabled(layer->name));
env.config.compileConfig().hwOptimization,
env.config.compileConfig().hwDilation,
env.config.compileConfig().hwDisabled(layer->name));
int try_hw = tryHW ? 1 : 0;

View File

@ -37,13 +37,13 @@ void FrontEnd::parseFullyConnected(const Model& model, const ie::CNNLayerPtr& _l
// Check if HW is applicable
//
auto tryHW = env.config.hwOptimization;
auto tryHW = env.config.compileConfig().hwOptimization;
if (output->desc().dim(Dim::W, 1) != 1 || output->desc().dim(Dim::H, 1) != 1) {
tryHW = false;
}
if (env.config.hwDisabled(layer->name)) {
if (env.config.compileConfig().hwDisabled(layer->name)) {
tryHW = false;
}

View File

@ -162,7 +162,7 @@ void FrontEnd::parseMTCNN(const Model& model, const ie::CNNLayerPtr& layer, cons
IE_ASSERT(inputs.size() == 1);
IE_ASSERT(outputs.size() == 1);
if (!env.config.hwOptimization) {
if (!env.config.compileConfig().hwOptimization) {
VPU_THROW_EXCEPTION << "MTCNN layer supports Myriad X with NCE only";
}

View File

@ -124,7 +124,7 @@ Stage StageBuilder::addReorderStage(
const Data& output) {
const auto* env = CompileEnv::getOrNull();
VPU_THROW_UNLESS(
env == nullptr || !env->config.disableReorder,
env == nullptr || !env->config.compileConfig().disableReorder,
"Tried to add Reorder Stage %v, while DISABLE_REORDER option was set",
name);

View File

@ -221,8 +221,8 @@ void parsePool2D(const Model & model,
//
const auto& env = CompileEnv::get();
bool hwOptimization = env.config.hwOptimization;
bool hwDisabled = env.config.hwDisabled(layer->name);
bool hwOptimization = env.config.compileConfig().hwOptimization;
bool hwDisabled = env.config.compileConfig().hwDisabled(layer->name);
int inputWidth = input->desc().dim(Dim::W);
int inputHeight = input->desc().dim(Dim::H);
@ -480,8 +480,8 @@ void parsePoolND(const Model & model,
//
const auto& env = CompileEnv::get();
bool hwOptimization = env.config.hwOptimization;
bool hwDisabled = env.config.hwDisabled(layer->name);
bool hwOptimization = env.config.compileConfig().hwOptimization;
bool hwDisabled = env.config.compileConfig().hwDisabled(layer->name);
bool tryHW = canTryHW(poolLayer->_type,
input_shape[0],

Some files were not shown because too many files have changed in this diff Show More