Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
4a5811623d
@ -17,6 +17,8 @@ jobs:
|
||||
WORK_DIR: $(Pipeline.Workspace)/_w
|
||||
MODELS_DIR: /mount/cinfsshare/onnxtestdata
|
||||
TMP_DIR: /mnt/tmp
|
||||
ONNX_MODEL_ZOO_SHA: "d58213534f2a4d1c4b19ba62b3bb5f544353256e"
|
||||
|
||||
|
||||
steps:
|
||||
- script: |
|
||||
@ -55,7 +57,7 @@ jobs:
|
||||
- script: docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile .
|
||||
displayName: 'Docker build'
|
||||
|
||||
- script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(TMP_DIR) -o
|
||||
- script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(TMP_DIR) -o -s "$(ONNX_MODEL_ZOO_SHA)"
|
||||
displayName: 'Get models'
|
||||
|
||||
- script: |
|
||||
@ -77,6 +79,6 @@ jobs:
|
||||
displayName: 'Create swap'
|
||||
|
||||
- script: |
|
||||
docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo:/root/.onnx/model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image
|
||||
docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "tox && tox -e zoo_models"
|
||||
displayName: 'Docker run'
|
||||
|
||||
|
@ -131,7 +131,7 @@ limitations under the License.
|
||||
<tab type="user" title="Cosh-1" url="@ref openvino_docs_ops_arithmetic_Cosh_1"/>
|
||||
<tab type="user" title="CTCLoss-4" url="@ref openvino_docs_ops_sequence_CTCLoss_4"/>
|
||||
<tab type="user" title="CumSum" url="@ref openvino_docs_ops_arithmetic_CumSum_3"/>
|
||||
<tab type="user" title="DeformableConvolution-1" url="@ref openvino_docs_ops_convolution_DeformableConvolution_1"/>
|
||||
<tab type="user" title="DeformableConvolution-8" url="@ref openvino_docs_ops_convolution_DeformableConvolution_8"/>
|
||||
<tab type="user" title="DeformablePSROIPooling-1" url="@ref openvino_docs_ops_detection_DeformablePSROIPooling_1"/>
|
||||
<tab type="user" title="DepthToSpace-1" url="@ref openvino_docs_ops_movement_DepthToSpace_1"/>
|
||||
<tab type="user" title="DetectionOutput-1" url="@ref openvino_docs_ops_detection_DetectionOutput_1"/>
|
||||
@ -189,11 +189,13 @@ limitations under the License.
|
||||
<tab type="user" title="MVN-1" url="@ref openvino_docs_ops_normalization_MVN_1"/>
|
||||
<tab type="user" title="MVN-6" url="@ref openvino_docs_ops_normalization_MVN_6"/>
|
||||
<tab type="user" title="MatMul-1" url="@ref openvino_docs_ops_matrix_MatMul_1"/>
|
||||
<tab type="user" title="MatrixNonMaxSuppression-8" url="@ref openvino_docs_ops_sort_MatrixNonMaxSuppression_8"/>
|
||||
<tab type="user" title="MaxPool-1" url="@ref openvino_docs_ops_pooling_MaxPool_1"/>
|
||||
<tab type="user" title="Maximum-1" url="@ref openvino_docs_ops_arithmetic_Maximum_1"/>
|
||||
<tab type="user" title="Minimum-1" url="@ref openvino_docs_ops_arithmetic_Minimum_1"/>
|
||||
<tab type="user" title="Mish-4" url="@ref openvino_docs_ops_activation_Mish_4"/>
|
||||
<tab type="user" title="Mod-1" url="@ref openvino_docs_ops_arithmetic_Mod_1"/>
|
||||
<tab type="user" title="MulticlassNonMaxSuppression-8" url="@ref openvino_docs_ops_sort_MulticlassNonMaxSuppression_8"/>
|
||||
<tab type="user" title="Multiply-1" url="@ref openvino_docs_ops_arithmetic_Multiply_1"/>
|
||||
<tab type="user" title="Negative-1" url="@ref openvino_docs_ops_arithmetic_Negative_1"/>
|
||||
<tab type="user" title="NonMaxSuppression-1" url="@ref openvino_docs_ops_sort_NonMaxSuppression_1"/>
|
||||
|
@ -2,31 +2,31 @@
|
||||
|
||||
**Versioned name**: *Ceiling-1*
|
||||
|
||||
**Category**: Arithmetic unary operation
|
||||
**Category**: Arithmetic unary operation
|
||||
|
||||
**Short description**: *Ceiling* performs element-wise ceiling operation with given tensor.
|
||||
|
||||
**Attributes**:
|
||||
**Detailed description**: For each element from the input tensor calculates corresponding
|
||||
element in the output tensor with the following formula:
|
||||
|
||||
No attributes available.
|
||||
\f[
|
||||
a_{i} = ceiling(a_{i})
|
||||
\f]
|
||||
|
||||
**Attributes**: *Ceiling* operation has no attributes.
|
||||
|
||||
**Inputs**
|
||||
|
||||
* **1**: An tensor of type T. **Required.**
|
||||
* **1**: A tensor of type *T* and arbitrary shape. **Required.**
|
||||
|
||||
**Outputs**
|
||||
|
||||
* **1**: The result of element-wise ceiling operation. A tensor of type T.
|
||||
* **1**: The result of element-wise ceiling operation. A tensor of type *T*.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: any numeric type.
|
||||
|
||||
*Ceiling* does the following with the input tensor *a*:
|
||||
|
||||
\f[
|
||||
a_{i} = ceiling(a_{i})
|
||||
\f]
|
||||
|
||||
**Examples**
|
||||
|
||||
|
@ -2,35 +2,33 @@
|
||||
|
||||
**Versioned name**: *Negative-1*
|
||||
|
||||
**Category**: Arithmetic unary operation
|
||||
**Category**: Arithmetic unary operation
|
||||
|
||||
**Short description**: *Negative* performs element-wise negative operation with given tensor.
|
||||
**Short description**: *Negative* performs element-wise negative operation on a given input tensor.
|
||||
|
||||
**Attributes**:
|
||||
**Detailed description**
|
||||
|
||||
No attributes available.
|
||||
|
||||
**Inputs**
|
||||
|
||||
* **1**: An tensor of type T. **Required.**
|
||||
|
||||
**Outputs**
|
||||
|
||||
* **1**: The result of element-wise negative operation. A tensor of type T.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: any numeric type.
|
||||
|
||||
*Negative* does the following with the input tensor *a*:
|
||||
*Negative* performs element-wise negative operation on a given input tensor, based on the following mathematical formula:
|
||||
|
||||
\f[
|
||||
a_{i} = -a_{i}
|
||||
\f]
|
||||
|
||||
**Examples**
|
||||
**Attributes**: *Negative* operation has no attributes.
|
||||
|
||||
*Example 1*
|
||||
**Inputs**
|
||||
|
||||
* **1**: A tensor of type *T* and arbitrary shape. **Required.**
|
||||
|
||||
**Outputs**
|
||||
|
||||
* **1**: The result of element-wise *Negative* operation applied to the input tensor. A tensor of type *T* and the same shape as input tensor.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: any supported signed numeric type.
|
||||
|
||||
**Example**
|
||||
|
||||
```xml
|
||||
<layer ... type="Negative">
|
||||
@ -47,4 +45,4 @@ a_{i} = -a_{i}
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
||||
```
|
||||
|
@ -8,6 +8,26 @@
|
||||
|
||||
**Detailed description**: *Deformable Convolution* is similar to regular *Convolution* but its receptive field is deformed because of additional spatial offsets used during input sampling. More thorough explanation can be found in [Deformable Convolutions Demystified](https://towardsdatascience.com/deformable-convolutions-demystified-2a77498699e8) and [Deformable Convolutional Networks](https://arxiv.org/abs/1703.06211).
|
||||
|
||||
Output is calculated using the following formula:
|
||||
|
||||
\f[
|
||||
|
||||
y(p) = \sum_{k = 1}^{K}w_{k}x(p + p_{k} + {\Delta}p_{k})
|
||||
|
||||
\f]
|
||||
|
||||
Where
|
||||
* K is a number of sampling locations, e.g. for kernel 3x3 and dilation = 1, K = 9
|
||||
|
||||
* \f$x(p)\f$ and \f$y(p)\f$ denote the features at location p from the input feature maps x and output feature maps y
|
||||
|
||||
* \f$w_{k}\f$ is the weight for k-th location.
|
||||
|
||||
* \f$p_{k}\f$ is pre-specified offset for the k-th location, e.g. K = 9 and
|
||||
\f$p_{k} \in \{(-1, -1),(-1, 0), . . . ,(1, 1)\}\f$
|
||||
|
||||
* \f${\Delta}p_{k}\f$ is the learnable offset for the k-th location.
|
||||
|
||||
**Attributes**:
|
||||
|
||||
* *strides*
|
||||
|
168
docs/ops/convolution/DeformableConvolution_8.md
Normal file
168
docs/ops/convolution/DeformableConvolution_8.md
Normal file
@ -0,0 +1,168 @@
|
||||
## DeformableConvolution<a name="DeformableConvolution"></a> {#openvino_docs_ops_convolution_DeformableConvolution_8}
|
||||
|
||||
**Versioned name**: *DeformableConvolution-8*
|
||||
|
||||
**Category**: Convolution
|
||||
|
||||
**Short description**: Computes 2D deformable convolution of input and kernel tensors.
|
||||
|
||||
**Detailed description**: *Deformable Convolution* is similar to regular *Convolution* but its receptive field is deformed because of additional spatial offsets used during input sampling. More thorough explanation can be found in [Deformable Convolutions Demystified](https://towardsdatascience.com/deformable-convolutions-demystified-2a77498699e8), [Deformable Convolutional Networks](https://arxiv.org/abs/1703.06211).
|
||||
|
||||
Modification of DeformableConvolution using modulating scalars is also supported. Please refer to [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/pdf/1811.11168.pdf).
|
||||
|
||||
Output is calculated using the following formula:
|
||||
|
||||
\f[
|
||||
|
||||
y(p) = \sum_{k = 1}^{K}w_{k}x(p + p_{k} + {\Delta}p_{k}) * {\Delta}m_{k}
|
||||
|
||||
\f]
|
||||
Where
|
||||
* K is a number of sampling locations, e.g. for kernel 3x3 and dilation = 1, K = 9
|
||||
|
||||
* \f$x(p)\f$ and \f$y(p)\f$ denote the features at location p from the input feature maps x and output feature maps y
|
||||
|
||||
* \f$w_{k}\f$ is the weight for k-th location.
|
||||
|
||||
* \f$p_{k}\f$ is pre-specified offset for the k-th location, e.g. K = 9 and
|
||||
\f$p_{k} \in \{(-1, -1),(-1, 0), . . . ,(1, 1)\}\f$
|
||||
|
||||
* \f${\Delta}p_{k}\f$ is the learnable offset for the k-th location.
|
||||
|
||||
* \f${\Delta}m_{k}\f$ is the modulation scalar from 0 to 1 for the k-th location.
|
||||
|
||||
**Attributes**:
|
||||
|
||||
* *strides*
|
||||
|
||||
* **Description**: *strides* is a distance (in pixels) to slide the filter on the feature map over the `(y,x)` axes. For example, *strides* equal `2,1` means sliding the filter 2 pixel at a time over height dimension and 1 over width dimension.
|
||||
* **Range of values**: integer values starting from `0`
|
||||
* **Type**: `int[]`
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
* *pads_begin*
|
||||
|
||||
* **Description**: *pads_begin* is a number of pixels to add to the beginning along each axis. For example, *pads_begin* equal `1,2` means adding 1 pixel to the top of the input and 2 to the left of the input.
|
||||
* **Range of values**: integer values starting from `0`
|
||||
* **Type**: `int[]`
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
* **Note**: the attribute is ignored when *auto_pad* attribute is specified.
|
||||
|
||||
* *pads_end*
|
||||
|
||||
* **Description**: *pads_end* is a number of pixels to add to the ending along each axis. For example, *pads_end* equal `1,2` means adding 1 pixel to the bottom of the input and 2 to the right of the input.
|
||||
* **Range of values**: integer values starting from `0`
|
||||
* **Type**: `int[]`
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
* **Note**: the attribute is ignored when *auto_pad* attribute is specified.
|
||||
|
||||
* *dilations*
|
||||
|
||||
* **Description**: *dilations* denotes the distance in width and height between elements (weights) in the filter. For example, *dilation* equal `1,1` means that all the elements in the filter are neighbors, so it is the same as for the usual convolution. *dilation* equal `2,2` means that all the elements in the filter are matched not to adjacent elements in the input matrix, but to those that are adjacent with distance 1.
|
||||
* **Range of values**: integer value starting from `0`
|
||||
* **Type**: `int[]`
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
* *auto_pad*
|
||||
|
||||
* **Description**: *auto_pad* how the padding is calculated. Possible values:
|
||||
* *explicit* - use explicit padding values from *pads_begin* and *pads_end*.
|
||||
* *same_upper* - the input is padded to match the output size. In case of odd padding value an extra padding is added at the end.
|
||||
* *same_lower* - the input is padded to match the output size. In case of odd padding value an extra padding is added at the beginning.
|
||||
* *valid* - do not use padding.
|
||||
* **Type**: `string`
|
||||
* **Default value**: explicit
|
||||
* **Required**: *no*
|
||||
* **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is specified.
|
||||
|
||||
|
||||
* *group*
|
||||
|
||||
* **Description**: *group* is the number of groups which *output* and *input* should be split into. For example, *group* equal to 1 means that all filters are applied to the whole input (usual convolution), *group* equal to 2 means that both *input* and *output* channels are separated into two groups and the *i-th output* group is connected to the *i-th input* group channel. *group* equal to a number of output feature maps implies depth-wise separable convolution.
|
||||
* **Range of values**: integer value starting from `1`
|
||||
* **Type**: `int`
|
||||
* **Default value**: `1`
|
||||
* **Required**: *no*
|
||||
|
||||
* *deformable_group*
|
||||
|
||||
* **Description**: *deformable_group* is the number of groups in which *offsets* input and *output* should be split into along the channel axis. Apply the deformable convolution using the i-th part of the offsets part on the i-th out.
|
||||
* **Range of values**: integer value starting from `1`
|
||||
* **Type**: `int`
|
||||
* **Default value**: `1`
|
||||
* **Required**: *no*
|
||||
|
||||
* *bilinear_interpolation_padding*
|
||||
|
||||
* **Description**: *bilinear_interpolation_padding* is the number of pixels outside of the feature map boundary to apply bilinear interpolation.
|
||||
* **Range of values**: non-negative integer value
|
||||
* **Type**: `int`
|
||||
* **Default value**: `0`
|
||||
* **Required**: *no*
|
||||
|
||||
**Inputs**:
|
||||
|
||||
* **1**: Input tensor of type *T* and rank 4. Layout is `NCYX` (number of batches, number of channels, spatial axes Y and X). **Required.**
|
||||
|
||||
* **2**: Offsets tensor of type *T* and rank 4. Layout is `NCYX` (number of batches, *deformable_group* \* kernel_Y \* kernel_X \* 2, spatial axes Y and X). **Required.**
|
||||
|
||||
* **3**: Kernel tensor of type *T* and rank 4. Layout is `OIYX` (number of output channels, number of input channels, spatial axes Y and X). **Required.**
|
||||
|
||||
* **4**: ModulationScalars tensor of type *T2* and rank 4, the values are within [0, 1]. Layout is `NCYX` (number of batches, *deformable_group* \* kernel_Y \* kernel_X, spatial axes Y and X). If the input is not provided, the values are assumed to be equal to 1. **Optional.**
|
||||
|
||||
|
||||
**Outputs**:
|
||||
|
||||
* **1**: Output tensor of type *T* and rank 4. Layout is `NOYX` (number of batches, number of kernel output channels, spatial axes Y and X).
|
||||
|
||||
**Types**:
|
||||
|
||||
* *T*: Any numeric type.
|
||||
* *T2*: Any supported floating point.
|
||||
|
||||
**Example**
|
||||
|
||||
2D DeformableConvolution (deformable_group=1)
|
||||
```xml
|
||||
<layer type="DeformableConvolution" ...>
|
||||
<data dilations="1,1" pads_begin="0,0" pads_end="0,0" strides="1,1" auto_pad="explicit" group="1" deformable_group="1"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>4</dim>
|
||||
<dim>224</dim>
|
||||
<dim>224</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>50</dim>
|
||||
<dim>220</dim>
|
||||
<dim>220</dim>
|
||||
</port>
|
||||
<port id="2">
|
||||
<dim>64</dim>
|
||||
<dim>4</dim>
|
||||
<dim>5</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
<port id="3">
|
||||
<dim>1</dim>
|
||||
<dim>25</dim>
|
||||
<dim>220</dim>
|
||||
<dim>220</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="4" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>64</dim>
|
||||
<dim>220</dim>
|
||||
<dim>220</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
@ -40,7 +40,7 @@ declared in `namespace opset8`.
|
||||
* [Cos](arithmetic/Cos_1.md)
|
||||
* [Cosh](arithmetic/Cosh_1.md)
|
||||
* [CumSum](arithmetic/CumSum_3.md)
|
||||
* [DeformableConvolution](convolution/DeformableConvolution_1.md)
|
||||
* [DeformableConvolution](convolution/DeformableConvolution_8.md)
|
||||
* [DeformablePSROIPooling](detection/DeformablePSROIPooling_1.md)
|
||||
* [DepthToSpace](movement/DepthToSpace_1.md)
|
||||
* [DetectionOutput](detection/DetectionOutput_1.md)
|
||||
|
168
docs/ops/sort/MatrixNMS_8.md
Normal file
168
docs/ops/sort/MatrixNMS_8.md
Normal file
@ -0,0 +1,168 @@
|
||||
## MatrixNonMaxSuppression<a name="MatrixNonMaxSuppression"></a> {#openvino_docs_ops_sort_MatrixNms_8}
|
||||
|
||||
**Versioned name**: *MatrixNonMaxSuppression-8*
|
||||
|
||||
**Category**: *Sorting and maximization*
|
||||
|
||||
**Short description**: *MatrixNonMaxSuppression* performs matrix non-maximum suppression (NMS) of the boxes with predicted scores.
|
||||
|
||||
**Detailed description**: The operation performs the following:
|
||||
|
||||
1. Selects candidate bounding boxes with scores higher than `score_threshold`.
|
||||
2. For each class, selects at most `nms_top_k` candidate boxes.
|
||||
3. Decays scores of the candidate boxes according to the Matrix NMS algorithm [Wang et al](https://arxiv.org/abs/2003.10152.pdf). This algorithm is applied independently to each class and each batch element. Boxes of `background_class` are skipped and thus eliminated during the process.
|
||||
4. Selects boxes with the decayed scores higher than `post_threshold`, and selects at most `keep_top_k` scoring candidate boxes per batch element.
|
||||
|
||||
The Matrix NMS algorithm is described below:
|
||||
1. Sort descending the candidate boxes by score, and compute `n*n` pairwise IOU (IntersectionOverUnion) matrix `X` for the top `n` boxes. Suppose `n` is the number of candidate boxes.
|
||||
2. Set the lower triangle and diagonal of `X` to 0. Therefore get the upper triangular matrix `X`.
|
||||
3. Take the column-wise max of `X` to compute a vector `K` of maximum IOU for each candidate box.
|
||||
4. Repeat element value of `K` along axis 1. Suppose this gets a matrix `X_cmax`.
|
||||
5. Compute the decay factor: `decay_factor = exp((X_cmax**2 - X**2) * gaussian_sigma)` if `decay_function` is `guassian`, else `decay_factor = (1 - X) / (1 - X_cmax)`.
|
||||
6. Take the column-wise min of `decay_factor`, and element-wise multiply with scores to decay them.
|
||||
|
||||
**Attributes**:
|
||||
|
||||
* *sort_result*
|
||||
|
||||
* **Description**: *sort_result* specifies the order of output elements.
|
||||
* **Range of values**: `class`, `score`, `none`
|
||||
* *class* - sort selected boxes by class id (ascending).
|
||||
* *score* - sort selected boxes by score (descending).
|
||||
* *none* - do not guarantee the order.
|
||||
* **Type**: `string`
|
||||
* **Default value**: `none`
|
||||
* **Required**: *No*
|
||||
|
||||
* *sort_result_across_batch*
|
||||
|
||||
* **Description**: *sort_result_across_batch* is a flag that specifies whenever it is necessary to sort selected boxes across batches or not.
|
||||
* **Range of values**: true or false
|
||||
* *true* - sort selected boxes across batches.
|
||||
* *false* - do not sort selected boxes across batches (boxes are sorted per batch element).
|
||||
* **Type**: boolean
|
||||
* **Default value**: false
|
||||
* **Required**: *No*
|
||||
|
||||
* *output_type*
|
||||
|
||||
* **Description**: the tensor type of outputs `selected_indices` and `valid_outputs`.
|
||||
* **Range of values**: `i64` or `i32`
|
||||
* **Type**: `string`
|
||||
* **Default value**: `i64`
|
||||
* **Required**: *No*
|
||||
|
||||
* *score_threshold*
|
||||
|
||||
* **Description**: minimum score to consider box for the processing.
|
||||
* **Range of values**: a floating-point number
|
||||
* **Type**: `float`
|
||||
* **Default value**: `0`
|
||||
* **Required**: *No*
|
||||
|
||||
* *nms_top_k*
|
||||
|
||||
* **Description**: maximum number of boxes to be selected per class.
|
||||
* **Range of values**: an integer
|
||||
* **Type**: `int`
|
||||
* **Default value**: `-1` meaning to keep all boxes
|
||||
* **Required**: *No*
|
||||
|
||||
* *keep_top_k*
|
||||
|
||||
* **Description**: maximum number of boxes to be selected per batch element.
|
||||
* **Range of values**: an integer
|
||||
* **Type**: `int`
|
||||
* **Default value**: `-1` meaning to keep all boxes
|
||||
* **Required**: *No*
|
||||
|
||||
* *background_class*
|
||||
|
||||
* **Description**: the background class id.
|
||||
* **Range of values**: an integer
|
||||
* **Type**: `int`
|
||||
* **Default value**: `-1` meaning to keep all classes
|
||||
* **Required**: *No*
|
||||
|
||||
* *decay_function*
|
||||
|
||||
* **Description**: decay function used to decay scores.
|
||||
* **Range of values**: `gaussian`, `linear`
|
||||
* **Type**: `string`
|
||||
* **Default value**: `linear`
|
||||
* **Required**: *No*
|
||||
|
||||
* *gaussian_sigma*
|
||||
|
||||
* **Description**: gaussian_sigma parameter for gaussian decay_function.
|
||||
* **Range of values**: a floating-point number
|
||||
* **Type**: `float`
|
||||
* **Default value**: `2.0`
|
||||
* **Required**: *No*
|
||||
|
||||
* *post_threshold*
|
||||
|
||||
* **Description**: threshold to filter out boxes with low confidence score after decaying.
|
||||
* **Range of values**: a floating-point number
|
||||
* **Type**: `float`
|
||||
* **Default value**: `0`
|
||||
* **Required**: *No*
|
||||
|
||||
**Inputs**:
|
||||
|
||||
* **1**: `boxes` - tensor of type *T* and shape `[num_batches, num_boxes, 4]` with box coordinates. The box cooridnates are layout as `[xmin, ymin, xmax, ymax]`. **Required.**
|
||||
|
||||
* **2**: `scores` - tensor of type *T* and shape `[num_batches, num_classes, num_boxes]` with box scores. **Required.**
|
||||
|
||||
**Outputs**:
|
||||
|
||||
* **1**: `selected_outputs` - tensor of type *T_THRESHOLDS* and shape `[number of selected boxes, 6]` containing the selected boxes with score and class as tuples `[class_id, box_score, xmin, ymin, xmax, ymax]`.
|
||||
|
||||
* **2**: `selected_indices` - tensor of type *T_IND* and shape `[number of selected boxes, 1]` the selected indices in the flattened input `boxes`, which are absolute values cross batches. Therefore possible valid values are in the range `[0, num_batches * num_boxes - 1]`.
|
||||
|
||||
* **3**: `selected_num` - 1D tensor of type *T_IND* and shape `[num_batches]` representing the number of selected boxes for each batch element.
|
||||
|
||||
When there is no box selected, `selected_num` is filled with `0`. `selected_outputs` is an empty tensor of shape `[0, 6]`, and `selected_indices` is an empty tensor of shape `[0, 1]`.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: floating point type.
|
||||
|
||||
* *T_MAX_BOXES*: integer type.
|
||||
|
||||
* *T_THRESHOLDS*: floating point type.
|
||||
|
||||
* *T_IND*: `int64` or `int32`.
|
||||
|
||||
**Example**
|
||||
|
||||
```xml
|
||||
<layer ... type="MatrixNonMaxSuppression" ... >
|
||||
<data decay_function="guassian" sort_result="score" output_type="i64"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>3</dim>
|
||||
<dim>100</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>3</dim>
|
||||
<dim>5</dim>
|
||||
<dim>100</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="5" precision="FP32">
|
||||
<dim>-1</dim> <!-- "-1" means a undefined dimension calculated during the model inference -->
|
||||
<dim>6</dim>
|
||||
</port>
|
||||
<port id="6" precision="I64">
|
||||
<dim>-1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
<port id="7" precision="I64">
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
161
docs/ops/sort/MulticlassNMS_8.md
Normal file
161
docs/ops/sort/MulticlassNMS_8.md
Normal file
@ -0,0 +1,161 @@
|
||||
## MulticlassNonMaxSuppression<a name="MulticlassNonMaxSuppression"></a> {#openvino_docs_ops_sort_MulticlassNonMaxSuppression_8}
|
||||
|
||||
**Versioned name**: *MulticlassNonMaxSuppression-8*
|
||||
|
||||
**Category**: *Sorting and maximization*
|
||||
|
||||
**Short description**: *MulticlassNonMaxSuppression* performs multi-class non-maximum suppression of the boxes with predicted scores.
|
||||
|
||||
**Detailed description**: *MulticlassNonMaxSuppression* is a multi-phase operation. It implements non-maximum suppression algorithm as described below:
|
||||
|
||||
1. Let `B = [b_0,...,b_n]` be the list of initial detection boxes, `S = [s_0,...,s_N]` be the list of corresponding scores.
|
||||
2. Let `D = []` be an initial collection of resulting boxes. Let `adaptive_threshold = iou_threshold`.
|
||||
3. If `B` is empty, go to step 9.
|
||||
4. Take the box with highest score. Suppose that it is the box `b` with the score `s`.
|
||||
5. Delete `b` from `B`.
|
||||
6. If the score `s` is greater than or equal to `score_threshold`, add `b` to `D`, else go to step 9.
|
||||
7. If `nms_eta < 1` and `adaptive_threshold > 0.5`, update `adaptive_threshold *= nms_eta`.
|
||||
8. For each input box `b_i` from `B` and the corresponding score `s_i`, set `s_i = 0` when `iou(b, b_i) > adaptive_threshold`, and go to step 3.
|
||||
9. Return `D`, a collection of the corresponding scores `S`, and the number of elements in `D`.
|
||||
|
||||
This algorithm is applied independently to each class of each batch element. The operation feeds at most `nms_top_k` scoring candidate boxes to this algorithm.
|
||||
The total number of output boxes of each batch element must not exceed `keep_top_k`.
|
||||
Boxes of `background_class` are skipped and thus eliminated.
|
||||
|
||||
**Attributes**:
|
||||
|
||||
* *sort_result*
|
||||
|
||||
* **Description**: *sort_result* specifies the order of output elements.
|
||||
* **Range of values**: `class`, `score`, `none`
|
||||
* *class* - sort selected boxes by class id (ascending).
|
||||
* *score* - sort selected boxes by score (descending).
|
||||
* *none* - do not guarantee the order.
|
||||
* **Type**: `string`
|
||||
* **Default value**: `none`
|
||||
* **Required**: *No*
|
||||
|
||||
* *sort_result_across_batch*
|
||||
|
||||
* **Description**: *sort_result_across_batch* is a flag that specifies whenever it is necessary to sort selected boxes across batches or not.
|
||||
* **Range of values**: true or false
|
||||
* *true* - sort selected boxes across batches.
|
||||
* *false* - do not sort selected boxes across batches (boxes are sorted per batch element).
|
||||
* **Type**: boolean
|
||||
* **Default value**: false
|
||||
* **Required**: *No*
|
||||
|
||||
* *output_type*
|
||||
|
||||
* **Description**: the tensor type of outputs `selected_indices` and `valid_outputs`.
|
||||
* **Range of values**: `i64` or `i32`
|
||||
* **Type**: `string`
|
||||
* **Default value**: `i64`
|
||||
* **Required**: *No*
|
||||
|
||||
* *iou_threshold*
|
||||
|
||||
* **Description**: intersection over union threshold.
|
||||
* **Range of values**: a floating-point number
|
||||
* **Type**: `float`
|
||||
* **Default value**: `0`
|
||||
* **Required**: *No*
|
||||
|
||||
* *score_threshold*
|
||||
|
||||
* **Description**: minimum score to consider box for the processing.
|
||||
* **Range of values**: a floating-point number
|
||||
* **Type**: `float`
|
||||
* **Default value**: `0`
|
||||
* **Required**: *No*
|
||||
|
||||
* *nms_top_k*
|
||||
|
||||
* **Description**: maximum number of boxes to be selected per class.
|
||||
* **Range of values**: an integer
|
||||
* **Type**: `int`
|
||||
* **Default value**: `-1` meaning to keep all boxes
|
||||
* **Required**: *No*
|
||||
|
||||
* *keep_top_k*
|
||||
|
||||
* **Description**: maximum number of boxes to be selected per batch element.
|
||||
* **Range of values**: an integer
|
||||
* **Type**: `int`
|
||||
* **Default value**: `-1` meaning to keep all boxes
|
||||
* **Required**: *No*
|
||||
|
||||
* *background_class*
|
||||
|
||||
* **Description**: the background class id.
|
||||
* **Range of values**: an integer
|
||||
* **Type**: `int`
|
||||
* **Default value**: `-1` meaning to keep all classes.
|
||||
* **Required**: *No*
|
||||
|
||||
* *nms_eta*
|
||||
|
||||
* **Description**: eta parameter for adaptive NMS.
|
||||
* **Range of values**: a floating-point number in close range `[0, 1.0]`.
|
||||
* **Type**: `float`
|
||||
* **Default value**: `1.0`
|
||||
* **Required**: *No*
|
||||
|
||||
**Inputs**:
|
||||
|
||||
* **1**: `boxes` - tensor of type *T* and shape `[num_batches, num_boxes, 4]` with box coordinates. The box coordinates are layout as `[xmin, ymin, xmax, ymax]`. **Required.**
|
||||
|
||||
* **2**: `scores` - tensor of type *T* and shape `[num_batches, num_classes, num_boxes]` with box scores. **Required.**
|
||||
|
||||
**Outputs**:
|
||||
|
||||
* **1**: `selected_outputs` - tensor of type *T_THRESHOLDS* and shape `[number of selected boxes, 6]` containing the selected boxes with score and class as tuples `[class_id, box_score, xmin, ymin, xmax, ymax]`.
|
||||
|
||||
* **2**: `selected_indices` - tensor of type *T_IND* and shape `[number of selected boxes, 1]` the selected indices in the flattened `boxes`, which are absolute values cross batches. Therefore possible valid values are in the range `[0, num_batches * num_boxes - 1]`.
|
||||
|
||||
* **3**: `selected_num` - 1D tensor of type *T_IND* and shape `[num_batches]` representing the number of selected boxes for each batch element.
|
||||
|
||||
When there is no box selected, `selected_num` is filled with `0`. `selected_outputs` is an empty tensor of shape `[0, 6]`, and `selected_indices` is an empty tensor of shape `[0, 1]`.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: floating point type.
|
||||
|
||||
* *T_MAX_BOXES*: integer type.
|
||||
|
||||
* *T_THRESHOLDS*: floating point type.
|
||||
|
||||
* *T_IND*: `int64` or `int32`.
|
||||
|
||||
**Example**
|
||||
|
||||
```xml
|
||||
<layer ... type="MulticlassNonMaxSuppression" ... >
|
||||
<data sort_result="score" output_type="i64"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>3</dim>
|
||||
<dim>100</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>3</dim>
|
||||
<dim>5</dim>
|
||||
<dim>100</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="5" precision="FP32">
|
||||
<dim>-1</dim> <!-- "-1" means a undefined dimension calculated during the model inference -->
|
||||
<dim>6</dim>
|
||||
</port>
|
||||
<port id="6" precision="I64">
|
||||
<dim>-1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
<port id="7" precision="I64">
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
@ -10,10 +10,10 @@
|
||||
std::vector<std::string> disabledTestPatterns() {
|
||||
return {
|
||||
".*ExclusiveAsyncRequests.*",
|
||||
".*reusableCPUStreamsExecutor.*",
|
||||
".*ReusableCPUStreamsExecutor.*",
|
||||
R"(.*SplitLayerTest.*numSplits\=30.*)",
|
||||
// CVS-51758
|
||||
".*PreprocessConversionTest.*oLT=NHWC.*",
|
||||
".*PreprocessDynamicallyInSetBlobTest.*oPRC=0.*oLT=1.*",
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -32,9 +32,6 @@ class IExecutableNetworkInternal;
|
||||
class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
|
||||
details::SharedObjectLoader _so;
|
||||
std::shared_ptr<IExecutableNetworkInternal> _impl;
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
std::shared_ptr<IExecutableNetwork> actual;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
/**
|
||||
* @brief Constructs ExecutableNetwork from the initialized std::shared_ptr
|
||||
@ -51,18 +48,6 @@ public:
|
||||
*/
|
||||
ExecutableNetwork() = default;
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
/**
|
||||
* @deprecated This ctor will be removed in 2022.1
|
||||
* @brief Constructs ExecutableNetwork from the initialized std::shared_ptr
|
||||
* @param exec Initialized shared pointer
|
||||
* @param splg Plugin to use. This is required to ensure that ExecutableNetwork can work properly even if plugin object is destroyed.
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("This ctor will be removed in 2022.1")
|
||||
explicit ExecutableNetwork(std::shared_ptr<IExecutableNetwork> exec,
|
||||
std::shared_ptr<details::SharedObjectLoader> splg = {});
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
/**
|
||||
* @brief Gets the Executable network output Data node information.
|
||||
*
|
||||
|
@ -35,10 +35,6 @@ class ICompletionCallbackWrapper;
|
||||
class INFERENCE_ENGINE_API_CLASS(InferRequest) {
|
||||
details::SharedObjectLoader _so;
|
||||
std::shared_ptr<IInferRequestInternal> _impl;
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
IInferRequest::Ptr actual;
|
||||
std::shared_ptr<details::ICompletionCallbackWrapper> callback;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
/**
|
||||
* @brief Constructs InferRequest from the initialized std::shared_ptr
|
||||
@ -71,18 +67,6 @@ public:
|
||||
*/
|
||||
InferRequest() = default;
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
/**
|
||||
* @deprecated This ctor will be removed in 2022.1
|
||||
* @brief Constructs InferRequest from the initialized std::shared_ptr
|
||||
* @param request Initialized shared pointer
|
||||
* @param splg Plugin to use. This is required to ensure that InferRequest can work properly even if plugin object is destroyed.
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("This ctor will be removed in 2022.1")
|
||||
explicit InferRequest(IInferRequest::Ptr request,
|
||||
std::shared_ptr<details::SharedObjectLoader> splg = {});
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
/**
|
||||
* @brief Sets input/output data to infer
|
||||
*
|
||||
|
@ -3,7 +3,7 @@
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief A header file that provides wrapper classes for IVariableState
|
||||
* @brief A header file that provides VariableState
|
||||
*
|
||||
* @file ie_memory_state.hpp
|
||||
*/
|
||||
@ -16,21 +16,17 @@
|
||||
#include "ie_api.h"
|
||||
#include "ie_blob.h"
|
||||
#include "details/ie_so_loader.h"
|
||||
#include "ie_imemory_state.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
class IVariableStateInternal;
|
||||
|
||||
/**
|
||||
* @brief C++ exception based error reporting wrapper of API class IVariableState
|
||||
* @brief VariableState class
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(VariableState) {
|
||||
details::SharedObjectLoader _so;
|
||||
std::shared_ptr<IVariableStateInternal> _impl;
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
std::shared_ptr<IVariableState> actual;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
/**
|
||||
* @brief Constructs VariableState from the initialized std::shared_ptr
|
||||
@ -48,55 +44,27 @@ public:
|
||||
*/
|
||||
VariableState() = default;
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
/**
|
||||
* @deprecated This ctor will be removed in 2022.1
|
||||
* @brief constructs VariableState from the initialized std::shared_ptr
|
||||
* @param pState Initialized shared pointer
|
||||
* @param plg Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin object is destroyed.
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("This ctor will be removed in 2022.1")
|
||||
explicit VariableState(std::shared_ptr<IVariableState> pState,
|
||||
std::shared_ptr<details::SharedObjectLoader> plg = {});
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
/**
|
||||
* @copybrief IVariableState::Reset
|
||||
*
|
||||
* Wraps IVariableState::Reset
|
||||
* @brief Reset internal variable state for relevant infer request,
|
||||
* to a value specified as default for according ReadValue node
|
||||
*/
|
||||
void Reset();
|
||||
|
||||
/**
|
||||
* @copybrief IVariableState::GetName
|
||||
*
|
||||
* Wraps IVariableState::GetName
|
||||
* @brief Gets name of current variable state, if length of array is not enough name is truncated by len, null
|
||||
* terminator is inserted as well. As variable state name `variable_id` from according `ReadValue` used.
|
||||
* @return A string representing a state name
|
||||
*/
|
||||
std::string GetName() const;
|
||||
|
||||
/**
|
||||
* @copybrief IVariableState::GetState
|
||||
*
|
||||
* Wraps IVariableState::GetState
|
||||
* @brief Returns the value of the variable state.
|
||||
* @return A blob representing a state
|
||||
*/
|
||||
Blob::CPtr GetState() const;
|
||||
|
||||
/**
|
||||
* @copybrief IVariableState::GetLastState
|
||||
* @deprecated Use IVariableState::SetState instead
|
||||
*
|
||||
* Wraps IVariableState::GetLastState
|
||||
* @return A blob representing a last state
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use VariableState::GetState function instead")
|
||||
Blob::CPtr GetLastState() const;
|
||||
|
||||
/**
|
||||
* @copybrief IVariableState::SetState
|
||||
*
|
||||
* Wraps IVariableState::SetState
|
||||
* @brief Sets the new state for the next inference.
|
||||
* @param state The current state to set
|
||||
*/
|
||||
void SetState(Blob::Ptr state);
|
||||
|
@ -65,6 +65,7 @@ DECLARE_GNA_CONFIG_KEY(DEVICE_MODE);
|
||||
|
||||
DECLARE_GNA_CONFIG_VALUE(AUTO);
|
||||
DECLARE_GNA_CONFIG_VALUE(HW);
|
||||
DECLARE_GNA_CONFIG_VALUE(HW_WITH_SW_FBACK);
|
||||
DECLARE_GNA_CONFIG_VALUE(SW);
|
||||
DECLARE_GNA_CONFIG_VALUE(SW_EXACT);
|
||||
DECLARE_GNA_CONFIG_VALUE(SW_FP32);
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include "ie_common.h"
|
||||
#include "ie_icnn_network.hpp"
|
||||
#include "ie_iinfer_request.hpp"
|
||||
#include "ie_imemory_state.hpp"
|
||||
#include "ie_input_info.hpp"
|
||||
#include "ie_parameter.hpp"
|
||||
#include "ie_remote_context.hpp"
|
||||
@ -113,22 +112,6 @@ public:
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::ExecutableNetwork::GetExecGraphInfo instead")
|
||||
virtual StatusCode GetExecGraphInfo(ICNNNetwork::Ptr& graphPtr, ResponseDesc* resp) noexcept = 0;
|
||||
|
||||
/**
|
||||
* @deprecated Use InferRequest::QueryState instead
|
||||
* @brief Gets state control interface for given executable network.
|
||||
*
|
||||
* State control essential for recurrent networks
|
||||
*
|
||||
* @param pState reference to a pointer that receives internal states
|
||||
* @param idx requested index for receiving memory state
|
||||
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
|
||||
* @return Status code of the operation: InferenceEngine::OK (0) for success, OUT_OF_BOUNDS (-6) no memory state for
|
||||
* given index
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferRequest::QueryState instead")
|
||||
virtual StatusCode QueryState(IVariableState::Ptr& pState, size_t idx, ResponseDesc* resp) noexcept = 0;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
/**
|
||||
* @brief Sets configuration for current executable network
|
||||
*
|
||||
|
@ -17,7 +17,6 @@
|
||||
#include "ie_blob.h"
|
||||
#include "ie_common.h"
|
||||
#include "ie_preprocess.hpp"
|
||||
#include "ie_imemory_state.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -195,21 +194,6 @@ public:
|
||||
*/
|
||||
virtual InferenceEngine::StatusCode SetBatch(int batch_size, ResponseDesc* resp) noexcept = 0;
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
/**
|
||||
* @brief Gets state control interface for given infer request.
|
||||
*
|
||||
* State control essential for recurrent networks
|
||||
*
|
||||
* @param pState reference to a pointer that receives internal states
|
||||
* @param idx requested index for receiving memory state
|
||||
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
|
||||
* @return Status code of the operation: InferenceEngine::OK (0) for success, OUT_OF_BOUNDS (-6) no memory state for
|
||||
* given index
|
||||
*/
|
||||
virtual StatusCode QueryState(IVariableState::Ptr& pState, size_t idx, ResponseDesc* resp) noexcept = 0;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
protected:
|
||||
~IInferRequest() = default;
|
||||
};
|
||||
|
@ -1,95 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief a header file for IVariableState interface
|
||||
*
|
||||
* @file ie_imemory_state.hpp
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "ie_blob.h"
|
||||
#include "ie_common.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::VariableState C++ wrapper instead
|
||||
* @interface IVariableState
|
||||
* @brief Manages data for reset operations
|
||||
*/
|
||||
class INFERENCE_ENGINE_DEPRECATED("InferenceEngine::") IVariableState {
|
||||
public:
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
/**
|
||||
* @brief A shared pointer to the IVariableState interface
|
||||
*/
|
||||
using Ptr = std::shared_ptr<IVariableState>;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
/**
|
||||
* @brief Gets name of current variable state, if length of array is not enough name is truncated by len, null
|
||||
* terminator is inserted as well. As variable state name `variable_id` from according `ReadValue` used.
|
||||
*
|
||||
* @param name preallocated buffer for receiving name
|
||||
* @param len Length of the buffer
|
||||
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
|
||||
* @return Status code of the operation: InferenceEngine::OK (0) for success
|
||||
*/
|
||||
virtual StatusCode GetName(char* name, size_t len, ResponseDesc* resp) const noexcept = 0;
|
||||
|
||||
/**
|
||||
* @brief Reset internal variable state for relevant infer request, to a value specified as default for according ReadValue node
|
||||
*
|
||||
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
|
||||
* @return Status code of the operation: InferenceEngine::OK (0) for success*
|
||||
*/
|
||||
virtual StatusCode Reset(ResponseDesc* resp) noexcept = 0;
|
||||
|
||||
/**
|
||||
* @brief Sets the new state for the next inference.
|
||||
*
|
||||
* This method can fail if Blob size does not match the internal state size or precision
|
||||
*
|
||||
* @param newState The data to use as new state
|
||||
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
|
||||
* @return Status code of the operation: InferenceEngine::OK (0) for success
|
||||
*/
|
||||
virtual StatusCode SetState(Blob::Ptr newState, ResponseDesc* resp) noexcept = 0;
|
||||
|
||||
/**
|
||||
* @brief Returns the value of the variable state.
|
||||
*
|
||||
* @param state A reference to a blob containing a variable state
|
||||
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
|
||||
* @return Status code of the operation: InferenceEngine::OK (0) for success
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use GetState function instead")
|
||||
virtual StatusCode GetLastState(Blob::CPtr& state, ResponseDesc* resp) const noexcept {
|
||||
return GetState(state, resp);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns the value of the variable state.
|
||||
*
|
||||
* @param state A reference to a blob containing a variable state
|
||||
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
|
||||
* @return Status code of the operation: InferenceEngine::OK (0) for success
|
||||
*/
|
||||
virtual StatusCode GetState(Blob::CPtr& state, ResponseDesc* resp) const noexcept = 0;
|
||||
};
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
|
||||
/**
|
||||
* @brief For compatibility reasons.
|
||||
*/
|
||||
using IMemoryState = IVariableState;
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
} // namespace InferenceEngine
|
@ -49,26 +49,6 @@ public:
|
||||
std::swap(ptr, parameter.ptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use ngraph::Variant directly
|
||||
* @brief Creates parameter from variant.
|
||||
* This method creates empty parameter if variant doesn't contain Parameter
|
||||
*
|
||||
* @param var ngraph variant
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use ngraph::Variant directly")
|
||||
Parameter(const std::shared_ptr<ngraph::Variant>& var);
|
||||
|
||||
/**
|
||||
* @deprecated Use ngraph::Variant directly
|
||||
* @brief Creates parameter from variant.
|
||||
* This method creates empty parameter if variant doesn't contain Parameter
|
||||
*
|
||||
* @param var ngraph variant
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use ngraph::Variant directly")
|
||||
Parameter(std::shared_ptr<ngraph::Variant>& var);
|
||||
|
||||
/**
|
||||
* @brief Copy constructor
|
||||
*
|
||||
@ -86,7 +66,8 @@ public:
|
||||
* @param parameter object
|
||||
*/
|
||||
template <class T,
|
||||
typename = typename std::enable_if<!std::is_same<typename std::decay<T>::type, Parameter>::value>::type>
|
||||
typename = typename std::enable_if<!std::is_same<typename std::decay<T>::type, Parameter>::value &&
|
||||
!std::is_abstract<typename std::decay<T>::type>::value>::type>
|
||||
Parameter(T&& parameter) { // NOLINT
|
||||
static_assert(!std::is_same<typename std::decay<T>::type, Parameter>::value, "To prevent recursion");
|
||||
ptr = new RealData<typename std::decay<T>::type>(std::forward<T>(parameter));
|
||||
@ -203,28 +184,6 @@ public:
|
||||
return dyn_cast<typename std::remove_cv<T>::type>(ptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use ngraph::Variant directly
|
||||
* @brief Converts parameter to shared pointer on ngraph::Variant
|
||||
*
|
||||
* @return shared pointer on ngraph::Variant
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use ngraph::Variant directly")
|
||||
std::shared_ptr<ngraph::Variant> asVariant() const;
|
||||
|
||||
/**
|
||||
* @deprecated Use ngraph::Variant directly
|
||||
* @brief Casts to shared pointer on ngraph::Variant
|
||||
*
|
||||
* @return shared pointer on ngraph::Variant
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use ngraph::Variant directly")
|
||||
operator std::shared_ptr<ngraph::Variant>() const {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
return asVariant();
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
|
||||
/**
|
||||
* Dynamic cast to specified type
|
||||
* @tparam T type
|
||||
@ -254,6 +213,21 @@ public:
|
||||
return !(*this == rhs);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Prints underlying object to the given output stream.
|
||||
* Uses operator<< if it is defined, leaves stream unchanged otherwise.
|
||||
* In case of empty parameter or nullptr stream immediately returns.
|
||||
*
|
||||
* @param object Object to be printed to the given output stream.
|
||||
* @param stream Output stream object will be printed to.
|
||||
*/
|
||||
friend void PrintTo(const Parameter& object, std::ostream* stream) {
|
||||
if (object.empty() || !stream) {
|
||||
return;
|
||||
}
|
||||
object.ptr->print(*stream);
|
||||
}
|
||||
|
||||
private:
|
||||
template <class T, class EqualTo>
|
||||
struct CheckOperatorEqual {
|
||||
@ -273,6 +247,24 @@ private:
|
||||
template <class T, class EqualTo = T>
|
||||
struct HasOperatorEqual : CheckOperatorEqual<T, EqualTo>::type {};
|
||||
|
||||
template <class T, class U>
|
||||
struct CheckOutputStreamOperator {
|
||||
template <class V, class W>
|
||||
static auto test(W*) -> decltype(std::declval<V&>() << std::declval<W>(), std::true_type()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename, typename>
|
||||
static auto test(...) -> std::false_type {
|
||||
return {};
|
||||
}
|
||||
|
||||
using type = typename std::is_same<std::true_type, decltype(test<T, U>(nullptr))>::type;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct HasOutputStreamOperator : CheckOutputStreamOperator<std::ostream, T>::type {};
|
||||
|
||||
struct Any {
|
||||
#ifdef __ANDROID__
|
||||
virtual ~Any();
|
||||
@ -282,6 +274,7 @@ private:
|
||||
virtual bool is(const std::type_info&) const = 0;
|
||||
virtual Any* copy() const = 0;
|
||||
virtual bool operator==(const Any& rhs) const = 0;
|
||||
virtual void print(std::ostream&) const = 0;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
@ -318,6 +311,20 @@ private:
|
||||
bool operator==(const Any& rhs) const override {
|
||||
return rhs.is(typeid(T)) && equal<T>(*this, rhs);
|
||||
}
|
||||
|
||||
template <class U>
|
||||
typename std::enable_if<!HasOutputStreamOperator<U>::value, void>::type
|
||||
print(std::ostream& stream, const U& object) const {}
|
||||
|
||||
template <class U>
|
||||
typename std::enable_if<HasOutputStreamOperator<U>::value, void>::type
|
||||
print(std::ostream& stream, const U& object) const {
|
||||
stream << object;
|
||||
}
|
||||
|
||||
void print(std::ostream& stream) const override {
|
||||
print<T>(stream, get());
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
|
@ -1,69 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief This is a header file with common inference engine definitions
|
||||
*
|
||||
* @file ie_unicode.hpp
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#ifdef UNICODE
|
||||
typedef wchar_t tchar;
|
||||
typedef std::wstring file_name_t;
|
||||
#else
|
||||
typedef char tchar;
|
||||
typedef std::string file_name_t;
|
||||
#endif
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
/**
|
||||
* @deprecated Use OS-native conversion utilities
|
||||
* @brief Conversion from possibly-wide character string to a single-byte chain.
|
||||
* @param str A possibly-wide character string
|
||||
* @return A single-byte character string
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use OS-native conversion utilities")
|
||||
inline std::string fileNameToString(const file_name_t& str) {
|
||||
#ifdef UNICODE
|
||||
size_t maxlen = (str.length() + 1) * sizeof(wchar_t) / sizeof(char);
|
||||
std::vector<char> mbstr(maxlen);
|
||||
mbstr[0] = 0;
|
||||
std::wcstombs(&mbstr[0], str.c_str(), maxlen);
|
||||
std::string res = std::string(&mbstr[0]);
|
||||
return res;
|
||||
#else
|
||||
return str;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use OS-native conversion utilities
|
||||
* @brief Conversion from single-byte character string to a possibly-wide one
|
||||
* @param str A single-byte character string
|
||||
* @return A possibly-wide character string
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use OS-native conversion utilities")
|
||||
inline file_name_t stringToFileName(const std::string& str) {
|
||||
#ifdef UNICODE
|
||||
size_t maxlen = str.length() + 1;
|
||||
std::vector<wchar_t> wcstr(maxlen);
|
||||
wcstr[0] = 0;
|
||||
std::mbstowcs(&wcstr[0], str.c_str(), maxlen);
|
||||
file_name_t res = file_name_t(&wcstr[0]);
|
||||
return res;
|
||||
#else
|
||||
return str;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace InferenceEngine
|
@ -236,7 +236,8 @@ float getGnaFrequencyMHz() {
|
||||
const uint8_t cannon_lake_model = 102;
|
||||
const uint8_t gemini_lake_model = 122;
|
||||
const uint8_t ice_lake_model = 126;
|
||||
const uint8_t next_model = 140;
|
||||
const uint8_t tgl_model = 140;
|
||||
const uint8_t next_model = 151;
|
||||
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
family = (eax >> 8) & 0xF;
|
||||
@ -254,6 +255,7 @@ float getGnaFrequencyMHz() {
|
||||
switch (model) {
|
||||
case cannon_lake_model:
|
||||
case ice_lake_model:
|
||||
case tgl_model:
|
||||
case next_model:
|
||||
return 400;
|
||||
case gemini_lake_model:
|
||||
@ -287,13 +289,14 @@ void printReferenceCompareResults(score_error_t const& totalError, size_t frames
|
||||
/**
|
||||
* @brief Print a report on the performance counts
|
||||
* @param utterancePerfMap reference to a map to store performance counters
|
||||
* @param callsNum frame index
|
||||
* @param numberOfFrames number of frames
|
||||
* @param stream output stream
|
||||
* @param fullDeviceName full device name string
|
||||
* @param numberOfFramesOnHw number of frames delivered to GNA HW
|
||||
* @return none.
|
||||
*/
|
||||
void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& utterancePerfMap, size_t callsNum, std::ostream& stream,
|
||||
std::string fullDeviceName) {
|
||||
void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& utterancePerfMap, size_t numberOfFrames,
|
||||
std::ostream& stream, std::string fullDeviceName, const uint64_t numberOfFramesOnHw) {
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
|
||||
stream << std::endl << "Performance counts:" << std::endl;
|
||||
stream << std::setw(10) << std::right << ""
|
||||
@ -305,29 +308,29 @@ void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEn
|
||||
stream << std::setw(46) << "(ms)";
|
||||
stream << std::setw(24) << "(us per call)";
|
||||
stream << std::endl;
|
||||
|
||||
// if GNA HW counters
|
||||
// get frequency of GNA module
|
||||
float freq = getGnaFrequencyMHz();
|
||||
for (const auto& it : utterancePerfMap) {
|
||||
std::string const& counter_name = it.first;
|
||||
float current_units = static_cast<float>(it.second.realTime_uSec);
|
||||
float call_units = current_units / callsNum;
|
||||
// if GNA HW counters
|
||||
// get frequency of GNA module
|
||||
float freq = getGnaFrequencyMHz();
|
||||
current_units /= freq * 1000;
|
||||
call_units /= freq;
|
||||
float current_units_us = static_cast<float>(it.second.realTime_uSec) / freq;
|
||||
float call_units_us = current_units_us / numberOfFrames;
|
||||
if (FLAGS_d.find("GNA") != std::string::npos) {
|
||||
stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1);
|
||||
} else {
|
||||
stream << std::setw(30) << std::left << counter_name;
|
||||
}
|
||||
stream << std::setw(16) << std::right << current_units;
|
||||
stream << std::setw(21) << std::right << call_units;
|
||||
stream << std::setw(16) << std::right << current_units_us / 1000;
|
||||
stream << std::setw(21) << std::right << call_units_us;
|
||||
stream << std::endl;
|
||||
}
|
||||
stream << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "Full device name: " << fullDeviceName << std::endl;
|
||||
std::cout << std::endl;
|
||||
stream << "Number of frames delivered to GNA HW: " << numberOfFramesOnHw;
|
||||
stream << "/" << numberOfFrames;
|
||||
stream << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -346,16 +349,20 @@ void getPerformanceCounters(InferenceEngine::InferRequest& request, std::map<std
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Summarize performance counts
|
||||
* @brief Summarize performance counts and total number of frames executed on the GNA HW device
|
||||
* @param perfCounters reference to a map to get performance counters
|
||||
* @param totalPerfCounters reference to a map to save total performance counters
|
||||
* @param totalRunsOnHw reference to a total number of frames computed on GNA HW
|
||||
* @return none.
|
||||
*/
|
||||
void sumPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& perfCounters,
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& totalPerfCounters) {
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& totalPerfCounters, uint64_t& totalRunsOnHw) {
|
||||
auto runOnHw = false;
|
||||
for (const auto& pair : perfCounters) {
|
||||
totalPerfCounters[pair.first].realTime_uSec += pair.second.realTime_uSec;
|
||||
runOnHw |= pair.second.realTime_uSec > 0; // if realTime is above zero, that means that a primitive was executed on the device
|
||||
}
|
||||
totalRunsOnHw += runOnHw;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -443,6 +450,7 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
|
||||
"GPU",
|
||||
"GNA_AUTO",
|
||||
"GNA_HW",
|
||||
"GNA_HW_WITH_SW_FBACK",
|
||||
"GNA_SW_EXACT",
|
||||
"GNA_SW",
|
||||
"GNA_SW_FP32",
|
||||
@ -829,6 +837,7 @@ int main(int argc, char* argv[]) {
|
||||
/** Work with each utterance **/
|
||||
for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) {
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> utterancePerfMap;
|
||||
uint64_t totalNumberOfRunsOnHw = 0;
|
||||
std::string uttName;
|
||||
uint32_t numFrames(0), n(0);
|
||||
std::vector<uint32_t> numFrameElementsInput;
|
||||
@ -984,7 +993,7 @@ int main(int argc, char* argv[]) {
|
||||
// retrieve new counters
|
||||
getPerformanceCounters(inferRequest.inferRequest, callPerfMap);
|
||||
// summarize retrieved counters with all previous
|
||||
sumPerformanceCounters(callPerfMap, utterancePerfMap);
|
||||
sumPerformanceCounters(callPerfMap, utterancePerfMap, totalNumberOfRunsOnHw);
|
||||
}
|
||||
}
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
@ -1092,7 +1101,7 @@ int main(int argc, char* argv[]) {
|
||||
std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast<double>(numFrames) << " ms" << std::endl;
|
||||
if (FLAGS_pc) {
|
||||
// print performance results
|
||||
printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d));
|
||||
printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d), totalNumberOfRunsOnHw);
|
||||
}
|
||||
if (!FLAGS_r.empty()) {
|
||||
// print statistical score error
|
||||
|
@ -21,10 +21,9 @@ static const char model_message[] = "Required. Path to an .xml file with a train
|
||||
|
||||
/// @brief message for assigning cnn calculation to device
|
||||
static const char target_device_message[] = "Optional. Specify a target device to infer on. CPU, GPU, MYRIAD, GNA_AUTO, GNA_HW, "
|
||||
"GNA_SW_FP32, "
|
||||
"GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, "
|
||||
"GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU"
|
||||
" as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown "
|
||||
"below. "
|
||||
" as a secondary (e.g. HETERO:GNA,CPU) are supported. "
|
||||
"The sample will look for a suitable plugin for device specified.";
|
||||
|
||||
/// @brief message for execution target
|
||||
|
@ -29,12 +29,15 @@ endif()
|
||||
|
||||
#
|
||||
# Shared plugin library
|
||||
#
|
||||
#
|
||||
|
||||
ie_add_plugin(NAME ${TARGET_NAME}
|
||||
DEVICE_NAME "GNA"
|
||||
SOURCES ${SOURCES} ${HEADERS})
|
||||
|
||||
# Enable support of CC for the plugin
|
||||
ie_mark_target_as_cc(${TARGET_NAME})
|
||||
|
||||
# saving rpath to GNA shared library be used by CI
|
||||
log_rpath_from_dir(GNA ${libGNA_LIBRARIES_BASE_PATH})
|
||||
|
||||
@ -67,7 +70,8 @@ target_compile_definitions(${TARGET_NAME}_test_static
|
||||
|
||||
target_link_libraries(${TARGET_NAME}_test_static PUBLIC inference_engine_preproc_s inference_engine_transformations libGNA::API)
|
||||
target_include_directories(${TARGET_NAME}_test_static PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
$<TARGET_PROPERTY:inference_engine_legacy,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
$<TARGET_PROPERTY:inference_engine_legacy,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
PRIVATE $<TARGET_PROPERTY:openvino::conditional_compilation,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
set_target_properties(${TARGET_NAME}_test_static PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_test_static)
|
||||
|
||||
set_target_properties(${TARGET_NAME} ${TARGET_NAME}_test_static
|
||||
@ -76,6 +80,6 @@ set_target_properties(${TARGET_NAME} ${TARGET_NAME}_test_static
|
||||
# install
|
||||
|
||||
file(GLOB_RECURSE source_list "${libGNA_LIBRARIES_BASE_PATH}/*${CMAKE_SHARED_LIBRARY_SUFFIX}*")
|
||||
install(FILES ${source_list}
|
||||
install(FILES ${source_list}
|
||||
DESTINATION ${IE_CPACK_IE_DIR}/external/gna/lib
|
||||
COMPONENT gna)
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "layer_quantizer.hpp"
|
||||
#include "scale_factor_calc.hpp"
|
||||
#include "weights_converter.hpp"
|
||||
#include "gna_itt.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
|
||||
@ -40,6 +41,7 @@ class ModelQuantizer {
|
||||
|
||||
template <class PreQuantisationCb>
|
||||
InferenceEngine::CNNNetwork quantize(const InferenceEngine::CNNNetwork &model, const PreQuantisationCb &cb, std::vector<float> scaleFactor) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ModelQuantizer::quantize");
|
||||
auto visitor = [&](InferenceEngine::CNNLayerPtr lp) {
|
||||
auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
|
||||
transformLayer(newLayer, WeightsConverter());
|
||||
|
@ -96,14 +96,12 @@ void GNADeviceHelper::setUpActiveList(const uint32_t requestConfigId, uint32_t l
|
||||
const auto status = Gna2RequestConfigEnableActiveList(requestConfigId, layerIndex, num_active_indices, ptr_active_indices);
|
||||
checkGna2Status(status, "Gna2RequestConfigEnableActiveList");
|
||||
}
|
||||
void GNADeviceHelper::propagateSync(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode) {
|
||||
wait(propagate(requestConfigId, gna2AccelerationMode));
|
||||
}
|
||||
|
||||
uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode) {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
uint32_t reqId{};
|
||||
if (gna2AccelerationMode == Gna2AccelerationModeHardware &&
|
||||
if ((gna2AccelerationMode == Gna2AccelerationModeHardware ||
|
||||
gna2AccelerationMode == Gna2AccelerationModeHardwareWithSoftwareFallback) &&
|
||||
detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation) {
|
||||
gnawarn() << "GNA Device not detected, consider using other mode of acceleration";
|
||||
}
|
||||
@ -541,6 +539,8 @@ void GNADeviceHelper::updateGnaPerfCounters() {
|
||||
#if GNA_LIB_VER == 2
|
||||
instrumentationTotal[0] = instrumentationResults[0];
|
||||
instrumentationTotal[1] = instrumentationResults[1];
|
||||
instrumentationResults[0] = 0;
|
||||
instrumentationResults[1] = 0;
|
||||
#else
|
||||
nGNAPerfResultsTotal.hw.stall = nGNAPerfResults.hw.stall;
|
||||
nGNAPerfResultsTotal.hw.total = nGNAPerfResults.hw.total;
|
||||
|
@ -117,18 +117,12 @@ public:
|
||||
uint8_t *alloc(uint32_t size_requested, uint32_t *size_granted);
|
||||
|
||||
#if GNA_LIB_VER == 1
|
||||
void propagateSync(const intel_nnet_type_t *pNeuralNetwork,
|
||||
const uint32_t *pActiveIndices,
|
||||
uint32_t nActiveIndices,
|
||||
intel_gna_proc_t nGNAProcType);
|
||||
|
||||
uint32_t propagate(const intel_nnet_type_t *pNeuralNetwork,
|
||||
const uint32_t *pActiveIndices,
|
||||
uint32_t nActiveIndices,
|
||||
intel_gna_proc_t nGNAProcType);
|
||||
#else
|
||||
void setUpActiveList(unsigned req_config_id, uint32_t layerIndex, uint32_t* ptr_active_indices, uint32_t num_active_indices);
|
||||
void propagateSync(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode);
|
||||
uint32_t propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode);
|
||||
uint32_t createModel(Gna2Model& gnaModel) const;
|
||||
void releaseModel(const uint32_t model_id);
|
||||
|
21
inference-engine/src/gna_plugin/gna_itt.hpp
Normal file
21
inference-engine/src/gna_plugin/gna_itt.hpp
Normal file
@ -0,0 +1,21 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief Defines openvino domains for tracing
|
||||
* @file gna_itt.hpp
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <openvino/itt.hpp>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(GNAPlugin);
|
||||
OV_ITT_DOMAIN(GNA_LT);
|
||||
}
|
||||
}
|
||||
}
|
@ -37,7 +37,7 @@
|
||||
#include <layers/gna_fake_quantize_layer.hpp>
|
||||
#include "gna_graph_patterns.hpp"
|
||||
#include "gna_tensor_tools.hpp"
|
||||
#include <debug.h>
|
||||
#include "gna_itt.hpp"
|
||||
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
#include <legacy/convert_function_to_cnn_network.hpp>
|
||||
@ -391,6 +391,7 @@ GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) {
|
||||
}
|
||||
|
||||
void GNAPlugin::Init() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "Init");
|
||||
dnn = std::make_shared<backend::AMIntelDNN>(backend::AMIntelDNN());
|
||||
inputsDesc = std::make_shared<GNAPluginNS::InputDesc>(GNAPluginNS::InputDesc());
|
||||
gnaFlags = std::make_shared<GNAPluginNS::GNAFlags>(GNAPluginNS::GNAFlags());
|
||||
@ -401,6 +402,7 @@ void GNAPlugin::Init() {
|
||||
}
|
||||
|
||||
void GNAPlugin::InitGNADevice() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNADevice");
|
||||
#if GNA_LIB_VER == 1
|
||||
gnadevice = std::make_shared<GNADeviceHelper>(gnaFlags->gna_lib_async_threads_num,
|
||||
gnaFlags->gna_openmp_multithreading,
|
||||
@ -419,6 +421,7 @@ void GNAPlugin::InitGNADevice() {
|
||||
}
|
||||
|
||||
void GNAPlugin::UpdateGnaQuantModeFromNetwork(InferenceEngine::CNNNetwork & network) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UpdateGnaQuantModeFromNetwork");
|
||||
// fp32 emulation mode dont need any modifications to configuration
|
||||
if (config.gnaFlags.sw_fp32) return;
|
||||
|
||||
@ -454,6 +457,7 @@ void GNAPlugin::UpdateGnaQuantModeFromNetwork(InferenceEngine::CNNNetwork & netw
|
||||
}
|
||||
|
||||
void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & network) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UpdateInputScaleFromNetwork");
|
||||
// fp32 emulation mode dont need any modifications to configuration
|
||||
if (config.gnaFlags.sw_fp32) return;
|
||||
|
||||
@ -561,6 +565,7 @@ bool GNAPlugin::TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer)
|
||||
}
|
||||
|
||||
void GNAPlugin::FillInputsAndOutputsTranspositionInfo(const InferenceEngine::CNNNetwork& net) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FillInputsAndOutputsTranspositionInfo");
|
||||
auto printTranspositionInfo = [](const std::vector<TranspositionInfo> &transpositionInfo) {
|
||||
for (const auto &transpositionInfoPart : transpositionInfo) {
|
||||
gnalog() << "transpose=" << transpositionInfoPart.transpose << " rows_num=" << transpositionInfoPart.num_transpose_rows
|
||||
@ -663,6 +668,7 @@ void GNAPlugin::AddDebugProperties(const InferenceEngine::CNNLayerPtr layer,
|
||||
#endif
|
||||
|
||||
void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "LoadNetwork");
|
||||
std::shared_ptr<InferenceEngine::details::CNNNetworkImpl> convertedNetwork;
|
||||
if (_network.getFunction()) {
|
||||
CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network);
|
||||
|
@ -23,6 +23,7 @@ static const caseless_unordered_map<std::string, uint32_t> supported_values = {
|
||||
{GNAConfigParams::GNA_SW_EXACT, GNA_SOFTWARE & GNA_HARDWARE}
|
||||
};
|
||||
static const std::vector<std::string> supported_values_on_gna2 = {
|
||||
GNAConfigParams::GNA_HW_WITH_SW_FBACK,
|
||||
GNAConfigParams::GNA_GEN,
|
||||
GNAConfigParams::GNA_GEN_EXACT,
|
||||
GNAConfigParams::GNA_SSE,
|
||||
@ -34,18 +35,19 @@ static const std::vector<std::string> supported_values_on_gna2 = {
|
||||
};
|
||||
#else
|
||||
static const caseless_unordered_map <std::string, std::pair<Gna2AccelerationMode, bool>> supported_values = {
|
||||
{GNAConfigParams::GNA_AUTO, {Gna2AccelerationModeAuto, false}},
|
||||
{GNAConfigParams::GNA_HW, {Gna2AccelerationModeHardware, false}},
|
||||
{GNAConfigParams::GNA_SW, {Gna2AccelerationModeSoftware, false}},
|
||||
{GNAConfigParams::GNA_SW_EXACT, {Gna2AccelerationModeSoftware, true}},
|
||||
{GNAConfigParams::GNA_GEN, {Gna2AccelerationModeGeneric, false}},
|
||||
{GNAConfigParams::GNA_GEN_EXACT, {Gna2AccelerationModeGeneric, true}},
|
||||
{GNAConfigParams::GNA_SSE, {Gna2AccelerationModeSse4x2, false}},
|
||||
{GNAConfigParams::GNA_SSE_EXACT, {Gna2AccelerationModeSse4x2, true}},
|
||||
{GNAConfigParams::GNA_AVX1, {Gna2AccelerationModeAvx1, false}},
|
||||
{GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1, true}},
|
||||
{GNAConfigParams::GNA_AVX2, {Gna2AccelerationModeAvx2, false}},
|
||||
{GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2, true}},
|
||||
{GNAConfigParams::GNA_AUTO, {Gna2AccelerationModeAuto, false}},
|
||||
{GNAConfigParams::GNA_HW, {Gna2AccelerationModeHardware, false}},
|
||||
{GNAConfigParams::GNA_HW_WITH_SW_FBACK, {Gna2AccelerationModeHardwareWithSoftwareFallback, false}},
|
||||
{GNAConfigParams::GNA_SW, {Gna2AccelerationModeSoftware, false}},
|
||||
{GNAConfigParams::GNA_SW_EXACT, {Gna2AccelerationModeSoftware, true}},
|
||||
{GNAConfigParams::GNA_GEN, {Gna2AccelerationModeGeneric, false}},
|
||||
{GNAConfigParams::GNA_GEN_EXACT, {Gna2AccelerationModeGeneric, true}},
|
||||
{GNAConfigParams::GNA_SSE, {Gna2AccelerationModeSse4x2, false}},
|
||||
{GNAConfigParams::GNA_SSE_EXACT, {Gna2AccelerationModeSse4x2, true}},
|
||||
{GNAConfigParams::GNA_AVX1, {Gna2AccelerationModeAvx1, false}},
|
||||
{GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1, true}},
|
||||
{GNAConfigParams::GNA_AVX2, {Gna2AccelerationModeAvx2, false}},
|
||||
{GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2, true}},
|
||||
};
|
||||
#endif
|
||||
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include "gna_graph_patterns.hpp"
|
||||
#include "gna_data_types.hpp"
|
||||
#include "gna_tensor_tools.hpp"
|
||||
#include "gna_itt.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
@ -112,6 +113,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
|
||||
*/
|
||||
static CNNLayerPtr InsertCopyLayer(CNNLayerPtr prevLayer, CNNLayerPtr nextLayer, int beforeIdx,
|
||||
std::shared_ptr<IPassManager> passmanager, std::string copyLayerType) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertCopyLayer");
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(prevLayer);
|
||||
std::string copyName = copyLayerType + std::string("_") + std::to_string(passmanager->getIntVar(copyLayersCounter)++);
|
||||
gnalog() << "Inserted " << copyName << " between: " << prevLayer->name << " and " << nextLayer->name << std::endl;
|
||||
@ -257,6 +259,7 @@ static std::vector<CNNLayerPtr> getCandidatesForIdentityInsertion(const CNNLayer
|
||||
}
|
||||
|
||||
void InsertDiagonalLayerPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertDiagonalLayerPass");
|
||||
bool lowPrecision = getPassManager()->isLowPrecision();
|
||||
|
||||
for (auto & l : *pLayers) {
|
||||
@ -304,6 +307,7 @@ void InsertDiagonalLayerPass::run() {
|
||||
}
|
||||
|
||||
void HandleMultipleActivationsForTheLayerPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "HandleMultipleActivationsForTheLayerPass");
|
||||
// found layer followed by multiple activations
|
||||
for (auto & l : *pLayers) {
|
||||
CNNLayerSet activations;
|
||||
@ -333,6 +337,7 @@ void HandleMultipleActivationsForTheLayerPass::run() {
|
||||
}
|
||||
|
||||
void ForbidActivationFusingPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ForbidActivationFusingPass");
|
||||
for (auto& l : *pLayers) {
|
||||
if (LayerInfo(l).isActivation()) {
|
||||
auto prevLayer = CNNNetPrevLayer(l);
|
||||
@ -370,6 +375,7 @@ namespace {
|
||||
} // namespace
|
||||
|
||||
void ReorderMaxPoolPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReorderMaxPoolPass");
|
||||
// detecting following pattern
|
||||
// conv->activation->maxpooling
|
||||
// changing it to conv->maxpooling->activation
|
||||
@ -398,6 +404,7 @@ void ReorderMaxPoolPass::run() {
|
||||
}
|
||||
|
||||
void SubstituteSoftSignPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "SubstituteSoftSignPass");
|
||||
//detecting following pattern
|
||||
// irv7 model: irv10 model:
|
||||
// a layer a layer
|
||||
@ -501,6 +508,7 @@ void SubstituteSoftSignPass::run() {
|
||||
}
|
||||
}
|
||||
void SubstitutePReluPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "SubstitutePReluPass");
|
||||
auto getScale = [](CNNLayer* layer) {
|
||||
auto powerCandidate = LayerInfo(layer);
|
||||
if (!powerCandidate.isPower()) return 0.0f;
|
||||
@ -606,6 +614,7 @@ void SubstitutePReluPass::run() {
|
||||
}
|
||||
|
||||
void ReversePermutationsPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReversePermutationsPass");
|
||||
std::function<CNNLayerPtr(CNNLayerPtr, std::function<bool(CNNLayerPtr)>)> prevLayerSkipCertain
|
||||
= [&prevLayerSkipCertain](CNNLayerPtr layer, std::function<bool(CNNLayerPtr)> shouldSkip) -> CNNLayerPtr {
|
||||
if (CNNNetHasPrevLayer(layer.get())) {
|
||||
@ -698,6 +707,7 @@ void ReversePermutationsPass::run() {
|
||||
}
|
||||
|
||||
void RemovePermutationsNHWCToNCHWPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "RemovePermutationsNHWCToNCHWPass");
|
||||
std::set<CNNLayerPtr> permutations_to_remove;
|
||||
std::list<std::pair<CNNLayerPtr, CNNLayerPtr>> nhwc_layout_patterns;
|
||||
for (auto& l : *pLayers) {
|
||||
@ -781,6 +791,7 @@ void RemovePermutationsNHWCToNCHWPass::run() {
|
||||
}
|
||||
|
||||
void InsertIdentityLayerPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertIdentityLayerPass");
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
|
||||
auto createIdentityLayer = [quantized, this](const TensorDesc& tensorDesc) {
|
||||
int numOfIdentityLayers = this->getPassManager()->getIntVar(identityLayersCounterName)++;
|
||||
@ -898,6 +909,7 @@ void InsertIdentityLayerPass::run() {
|
||||
}
|
||||
|
||||
void InsertCopyLayerPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertCopyLayerPass");
|
||||
// Copy layer insertion happens in few cases:
|
||||
// Crop output goes to concat layer -> copy layer insertion
|
||||
// Splitted part of input goes to concat layer -> copy layer insertion
|
||||
@ -1020,6 +1032,7 @@ void InsertCopyLayerPass::run() {
|
||||
}
|
||||
|
||||
void FlattenTrivialConcatPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FlattenTrivialConcatPass");
|
||||
// change all trivial concatenations (concatenation where output buffer is a buffer made by appending input buffers)
|
||||
// by reshaping its inputs to 1 x total_input_size and its output to 1 x total_cocat_size and chaning the axis to 1
|
||||
// for example if 4D concat have unaligned inputs then ConcatAlignFilters need to be used if sizes before
|
||||
@ -1103,6 +1116,7 @@ void FlattenTrivialConcatPass::run() {
|
||||
}
|
||||
|
||||
void InsertConcatAligningFilterPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertConcatAligningFilterPass");
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
|
||||
|
||||
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED) {
|
||||
@ -1221,6 +1235,7 @@ void InsertConcatAligningFilterPass::run() {
|
||||
}
|
||||
|
||||
void ReorderConcatInputsPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReorderConcatInputsPass");
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
|
||||
// aligning specific not required in fp32 mode
|
||||
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED_FOR_FP32 && !quantized) {
|
||||
@ -1318,6 +1333,7 @@ void ReorderConcatInputsPass::run() {
|
||||
}
|
||||
|
||||
void InsertSplitAligningFilterPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertSplitAligningFilterPass");
|
||||
// currently split layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this is not necessary but is useful for testing
|
||||
const int bytesPerSplitElement = 2;
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
|
||||
@ -1437,6 +1453,7 @@ static InferenceEngine::Blob::Ptr tileBlob(Blob::Ptr& blob, size_t TileTo) {
|
||||
}
|
||||
|
||||
void EltwiseSplitOverChannelsPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "EltwiseSplitOverChannelsPass");
|
||||
if (getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo == Policy::GNAAffineDiagonal::UNLIMIT) {
|
||||
return;
|
||||
}
|
||||
@ -1552,6 +1569,7 @@ void EltwiseSplitOverChannelsPass::run() {
|
||||
}
|
||||
|
||||
void SubstituteScaleShiftBroadCastPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "SubstituteScaleShiftBroadCastPass");
|
||||
std::map<std::string, InferenceEngine::SizeVector> reshaped_data;
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
|
||||
|
||||
@ -1633,6 +1651,7 @@ void SubstituteScaleShiftBroadCastPass::run() {
|
||||
}
|
||||
|
||||
void BroadcastConstPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "BroadcastConstPass");
|
||||
for (auto constLayer : *pLayers) {
|
||||
if (!LayerInfo(constLayer).isConst()) {
|
||||
continue;
|
||||
@ -1685,6 +1704,7 @@ void BroadcastConstPass::run() {
|
||||
}
|
||||
|
||||
void InsertIdentityToLSTMCellPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertIdentityToLSTMCellPass");
|
||||
for (auto layer : *pLayers) {
|
||||
if (layer->type == "LSTMCell") {
|
||||
// This fixed the cases when both functional and non-functional outputs are mixed (or not outputs are used)
|
||||
@ -1722,6 +1742,7 @@ void InsertIdentityToLSTMCellPass::run() {
|
||||
}
|
||||
|
||||
void BreakFusingOfOutputLayersPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "BreakFusingOfOutputLayersPass");
|
||||
#if GNA_LIB_VER == 1
|
||||
return;
|
||||
#endif
|
||||
@ -1765,6 +1786,7 @@ void BreakFusingOfOutputLayersPass::run() {
|
||||
}
|
||||
|
||||
void UnrollLSTMCellPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UnrollLSTMCellPass");
|
||||
InferenceEngine::NetPass::UnrollRNN_if(getPassManager()->getNetwork(), [] (const RNNCellBase& rnn) -> bool {
|
||||
if (rnn.clip != 0.0f)
|
||||
return true;
|
||||
@ -1781,6 +1803,7 @@ void UnrollLSTMCellPass::run() {
|
||||
}
|
||||
|
||||
void UnrollTIPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "UnrollTIPass");
|
||||
auto sts = InferenceEngine::NetPass::UnrollTI(getPassManager()->getNetwork());
|
||||
if (!sts) {
|
||||
THROW_GNA_EXCEPTION << "TensorIterator layer cannot be unrolled!";
|
||||
@ -1788,6 +1811,7 @@ void UnrollTIPass::run() {
|
||||
}
|
||||
|
||||
void RemoveConstPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "RemoveConstPass");
|
||||
auto network = getPassManager()->getNetwork();
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
auto & icnnnet = static_cast<ICNNNetwork &>(network);
|
||||
@ -1801,6 +1825,7 @@ void RemoveConstPass::run() {
|
||||
}
|
||||
|
||||
void RemoveSingleInputConcatPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "RemoveSingleInputConcatPass");
|
||||
for (auto &l : *pLayers) {
|
||||
if (l->type == "Concat") {
|
||||
auto concat = dynamic_cast<ConcatLayer*>(l.get());
|
||||
@ -1828,6 +1853,7 @@ void RemoveSingleInputConcatPass::run() {
|
||||
}
|
||||
|
||||
void FuseMultipleIdentitiesPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FuseMultipleIdentitiesPass");
|
||||
for (auto &l : *pLayers) {
|
||||
if (l->insData.empty()) continue;
|
||||
|
||||
@ -1909,6 +1935,7 @@ void FuseMultipleIdentitiesPass::run() {
|
||||
}
|
||||
|
||||
void FuseFQIntoWeightsPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FuseFQIntoWeightsPass");
|
||||
auto isNonFunctional = [](CNNLayerPtr ptr) {
|
||||
return LayerInfo(ptr).isNonFunctional();
|
||||
};
|
||||
@ -2067,6 +2094,7 @@ void FuseFQIntoWeightsPass::run() {
|
||||
}
|
||||
|
||||
void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "MoveFakeQuantizeLayerIntoQuantParamsPass");
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
|
||||
if (!quantized) {
|
||||
return;
|
||||
@ -2268,6 +2296,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
|
||||
}
|
||||
|
||||
void TransposeWeightsFromNCHWToNHWCPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "TransposeWeightsFromNCHWToNHWCPass");
|
||||
if (!MustBeConvertedFromNCHWToNHWC(*pLayers)) return;
|
||||
|
||||
auto printTranspositionInfo = [](const std::vector<TranspositionInfo> &transpositionInfo) {
|
||||
|
@ -1,6 +1,7 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <openvino/cc/ngraph/itt.hpp>
|
||||
|
||||
#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
|
||||
|
||||
@ -107,6 +108,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> matmul_node,
|
||||
}
|
||||
|
||||
ConvertMatmulToPointWiseConvolution::ConvertMatmulToPointWiseConvolution() {
|
||||
MATCHER_SCOPE(ConvertMatmulToPointWiseConvolution);
|
||||
auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||
auto const_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({const_input,
|
||||
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||
@ -121,11 +123,12 @@ ConvertMatmulToPointWiseConvolution::ConvertMatmulToPointWiseConvolution() {
|
||||
return Convert(pattern_map.at(matmul).get_node_shared_ptr(), nullptr, nullptr, nullptr);
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, "ConvertMatmulToPointWiseConvolution");
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
ConvertMatmulWithBiasToPointWiseConvolution::ConvertMatmulWithBiasToPointWiseConvolution() {
|
||||
MATCHER_SCOPE(ConvertMatmulWithBiasToPointWiseConvolution);
|
||||
auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||
auto const_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({const_input,
|
||||
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||
@ -143,11 +146,12 @@ ConvertMatmulWithBiasToPointWiseConvolution::ConvertMatmulWithBiasToPointWiseCon
|
||||
pattern_map.at(bias).get_node_shared_ptr(), nullptr);
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(add, "ConvertMatmulWithBiasToPointWiseConvolution");
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(add, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
ConvertMatmulWithFqToPointWiseConvolution::ConvertMatmulWithFqToPointWiseConvolution() {
|
||||
MATCHER_SCOPE(ConvertMatmulWithFqToPointWiseConvolution);
|
||||
auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||
auto const_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({const_input,
|
||||
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||
@ -175,6 +179,6 @@ ConvertMatmulWithFqToPointWiseConvolution::ConvertMatmulWithFqToPointWiseConvolu
|
||||
pattern_map.at(out_fq).get_node_shared_ptr());
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(out_fq, "ConvertMatmulWithFqToPointWiseConvolution");
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(out_fq, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <openvino/cc/ngraph/itt.hpp>
|
||||
|
||||
#include "transformations/insert_transpose_after_convolution_or_pooling.hpp"
|
||||
|
||||
@ -16,6 +17,7 @@ using namespace GNAPluginNS;
|
||||
NGRAPH_RTTI_DEFINITION(InsertTransposeAfterConvOrPool, "InsertTransposeAfterConvOrPool", 0);
|
||||
|
||||
bool InsertTransposeAfterConvOrPool::run_on_function(std::shared_ptr<ngraph::Function> f) {
|
||||
RUN_ON_FUNCTION_SCOPE(InsertTransposeAfterConvOrPool);
|
||||
bool is_graph_modfied = false;
|
||||
for (auto& node : f->get_ordered_ops()) {
|
||||
if (std::dynamic_pointer_cast<ngraph::opset7::Convolution>(node) == nullptr &&
|
||||
|
@ -2,6 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <openvino/cc/ngraph/itt.hpp>
|
||||
|
||||
#include "transformations/insert_transpose_before_matmul.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset7.hpp>
|
||||
@ -13,6 +15,7 @@ using namespace GNAPluginNS;
|
||||
NGRAPH_RTTI_DEFINITION(InsertTransposeBeforeMatmul, "InsertTransposeBeforeMatmul", 0);
|
||||
|
||||
InsertTransposeBeforeMatmul::InsertTransposeBeforeMatmul() {
|
||||
MATCHER_SCOPE(InsertTransposeBeforeMatmul);
|
||||
auto reshape = ngraph::pattern::wrap_type<ngraph::opset7::Reshape>({ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input()},
|
||||
ngraph::pattern::rank_equals(2));
|
||||
@ -59,6 +62,6 @@ InsertTransposeBeforeMatmul::InsertTransposeBeforeMatmul() {
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(root, "InsertTransposeBeforeMatmul");
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(root, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
@ -2,6 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <openvino/cc/ngraph/itt.hpp>
|
||||
|
||||
#include "transformations/remove_extra_reshapes.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset7.hpp>
|
||||
@ -12,6 +14,7 @@ using namespace GNAPluginNS;
|
||||
NGRAPH_RTTI_DEFINITION(RemoveExtraReshapes, "RemoveExtraReshapes", 0);
|
||||
|
||||
RemoveExtraReshapes::RemoveExtraReshapes() {
|
||||
MATCHER_SCOPE(RemoveExtraReshapes);
|
||||
const auto reshape = ngraph::pattern::wrap_type<ngraph::opset7::Reshape>();
|
||||
const auto pooling = ngraph::pattern::wrap_type<ngraph::opset7::MaxPool>({reshape});
|
||||
|
||||
@ -26,6 +29,6 @@ RemoveExtraReshapes::RemoveExtraReshapes() {
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(pooling, "RemoveExtraReshapes");
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(pooling, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
@ -2,6 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <openvino/cc/ngraph/itt.hpp>
|
||||
|
||||
#include "transformations/reorder_activation_and_pooling.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset7.hpp>
|
||||
@ -15,6 +17,7 @@ using namespace GNAPluginNS;
|
||||
NGRAPH_RTTI_DEFINITION(ReorderActivationAndPooling, "ReorderActivationAndPooling", 0);
|
||||
|
||||
ReorderActivationAndPooling::ReorderActivationAndPooling() {
|
||||
MATCHER_SCOPE(ReorderActivationAndPooling);
|
||||
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input()});
|
||||
auto add = ngraph::pattern::wrap_type<ngraph::opset7::Add>({conv, ngraph::pattern::any_input()});
|
||||
@ -63,6 +66,6 @@ ReorderActivationAndPooling::ReorderActivationAndPooling() {
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(pool, "ReorderActivationAndPooling");
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(pool, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <openvino/cc/ngraph/itt.hpp>
|
||||
|
||||
#include "transformations/split_convolution_with_large_buffer_size.hpp"
|
||||
|
||||
@ -77,6 +78,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
|
||||
}
|
||||
|
||||
SplitConvolution::SplitConvolution() {
|
||||
MATCHER_SCOPE(SplitConvolution);
|
||||
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input()});
|
||||
|
||||
@ -85,11 +87,12 @@ SplitConvolution::SplitConvolution() {
|
||||
return Convert(pattern_map.at(conv).get_node_shared_ptr(), nullptr, nullptr, nullptr);
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(conv, "SplitConvolution");
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(conv, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
SplitConvolutionWithBias::SplitConvolutionWithBias() {
|
||||
MATCHER_SCOPE(SplitConvolutionWithBias);
|
||||
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input()});
|
||||
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||
@ -101,11 +104,12 @@ SplitConvolutionWithBias::SplitConvolutionWithBias() {
|
||||
pattern_map.at(bias).get_node_shared_ptr(), nullptr);
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(add, "SplitConvolutionWithBias");
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(add, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
SplitConvolutionWithFq::SplitConvolutionWithFq() {
|
||||
MATCHER_SCOPE(SplitConvolutionWithFq);
|
||||
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input()});
|
||||
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||
@ -126,6 +130,6 @@ SplitConvolutionWithFq::SplitConvolutionWithFq() {
|
||||
return Convert(pattern_map.at(conv).get_node_shared_ptr(), add_node, bias_node, pattern_map.at(out_fq).get_node_shared_ptr());
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(out_fq, "SplitConvolutionWithFq");
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(out_fq, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
@ -2,6 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <openvino/cc/ngraph/itt.hpp>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
@ -19,6 +21,7 @@ using namespace GNAPluginNS;
|
||||
NGRAPH_RTTI_DEFINITION(SwapInputMatMul, "SwapInputMatMul", 0);
|
||||
|
||||
SwapInputMatMul::SwapInputMatMul() {
|
||||
MATCHER_SCOPE(SwapInputMatMul);
|
||||
auto matmul = ngraph::pattern::wrap_type<ngraph::opset7::MatMul>({ngraph::pattern::any_input(
|
||||
ngraph::pattern::has_static_shape()), ngraph::pattern::any_input(ngraph::pattern::has_static_shape())},
|
||||
ngraph::pattern::has_static_shape());
|
||||
@ -95,6 +98,6 @@ SwapInputMatMul::SwapInputMatMul() {
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, "SwapInputMatMul");
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
@ -25,47 +25,15 @@ ExecutableNetwork::ExecutableNetwork(const details::SharedObjectLoader& so,
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
|
||||
ExecutableNetwork::ExecutableNetwork(IExecutableNetwork::Ptr exec,
|
||||
std::shared_ptr<details::SharedObjectLoader> splg)
|
||||
: _so(), _impl(), actual(exec) {
|
||||
if (splg) {
|
||||
_so = *splg;
|
||||
}
|
||||
|
||||
// plg can be null, but not the actual
|
||||
if (actual == nullptr)
|
||||
IE_THROW(NotAllocated) << "ExecutableNetwork was not initialized.";
|
||||
}
|
||||
|
||||
ConstOutputsDataMap ExecutableNetwork::GetOutputsInfo() const {
|
||||
if (actual) {
|
||||
ConstOutputsDataMap data;
|
||||
CALL_STATUS_FNC(GetOutputsInfo, data);
|
||||
return data;
|
||||
}
|
||||
|
||||
EXEC_NET_CALL_STATEMENT(return _impl->GetOutputsInfo());
|
||||
}
|
||||
|
||||
ConstInputsDataMap ExecutableNetwork::GetInputsInfo() const {
|
||||
if (actual) {
|
||||
ConstInputsDataMap info;
|
||||
CALL_STATUS_FNC(GetInputsInfo, info);
|
||||
return info;
|
||||
}
|
||||
|
||||
EXEC_NET_CALL_STATEMENT(return _impl->GetInputsInfo());
|
||||
}
|
||||
|
||||
void ExecutableNetwork::reset(IExecutableNetwork::Ptr newActual) {
|
||||
if (actual) {
|
||||
if (newActual == nullptr) {
|
||||
THROW_IE_EXCEPTION << "ExecutableNetwork wrapper used for reset was not initialized.";
|
||||
}
|
||||
this->actual.swap(newActual);
|
||||
return;
|
||||
}
|
||||
|
||||
if (_impl == nullptr) IE_THROW() << "ExecutableNetwork was not initialized.";
|
||||
if (newActual == nullptr) IE_THROW() << "ExecutableNetwork wrapper used for reset was not initialized.";
|
||||
auto newBase = std::dynamic_pointer_cast<ExecutableNetworkBase>(newActual);
|
||||
@ -76,36 +44,10 @@ void ExecutableNetwork::reset(IExecutableNetwork::Ptr newActual) {
|
||||
}
|
||||
|
||||
ExecutableNetwork::operator IExecutableNetwork::Ptr() {
|
||||
if (actual) {
|
||||
return actual;
|
||||
}
|
||||
|
||||
return std::make_shared<ExecutableNetworkBase>(_impl);
|
||||
}
|
||||
|
||||
std::vector<VariableState> ExecutableNetwork::QueryState() {
|
||||
if (actual) {
|
||||
if (actual == nullptr) THROW_IE_EXCEPTION << "ExecutableNetwork was not initialized.";
|
||||
IVariableState::Ptr pState = nullptr;
|
||||
auto res = OK;
|
||||
std::vector<VariableState> controller;
|
||||
for (size_t idx = 0; res == OK; ++idx) {
|
||||
ResponseDesc resp;
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
res = actual->QueryState(pState, idx, &resp);
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
if (res != OK && res != OUT_OF_BOUNDS) {
|
||||
THROW_IE_EXCEPTION << resp.msg;
|
||||
}
|
||||
if (res != OUT_OF_BOUNDS) {
|
||||
controller.push_back(VariableState(pState,
|
||||
std::make_shared<details::SharedObjectLoader>(_so)));
|
||||
}
|
||||
}
|
||||
|
||||
return controller;
|
||||
}
|
||||
|
||||
std::vector<VariableState> controller;
|
||||
EXEC_NET_CALL_STATEMENT(
|
||||
for (auto&& state : _impl->QueryState()) {
|
||||
@ -115,13 +57,6 @@ std::vector<VariableState> ExecutableNetwork::QueryState() {
|
||||
}
|
||||
|
||||
InferRequest ExecutableNetwork::CreateInferRequest() {
|
||||
if (actual) {
|
||||
IInferRequest::Ptr req;
|
||||
CALL_STATUS_FNC(CreateInferRequest, req);
|
||||
if (req.get() == nullptr) THROW_IE_EXCEPTION << "Internal error: pointer to infer request is null";
|
||||
return InferRequest(req, std::make_shared<details::SharedObjectLoader>(_so));
|
||||
}
|
||||
|
||||
EXEC_NET_CALL_STATEMENT(return {_so, _impl->CreateInferRequest()});
|
||||
}
|
||||
|
||||
@ -130,72 +65,38 @@ InferRequest::Ptr ExecutableNetwork::CreateInferRequestPtr() {
|
||||
}
|
||||
|
||||
void ExecutableNetwork::Export(const std::string& modelFileName) {
|
||||
if (actual) {
|
||||
CALL_STATUS_FNC(Export, modelFileName);
|
||||
return;
|
||||
}
|
||||
EXEC_NET_CALL_STATEMENT(_impl->Export(modelFileName));
|
||||
}
|
||||
|
||||
void ExecutableNetwork::Export(std::ostream& networkModel) {
|
||||
if (actual) {
|
||||
CALL_STATUS_FNC(Export, networkModel);
|
||||
return;
|
||||
}
|
||||
EXEC_NET_CALL_STATEMENT(_impl->Export(networkModel));
|
||||
}
|
||||
|
||||
CNNNetwork ExecutableNetwork::GetExecGraphInfo() {
|
||||
if (actual) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
ICNNNetwork::Ptr ptr = nullptr;
|
||||
CALL_STATUS_FNC(GetExecGraphInfo, ptr);
|
||||
return CNNNetwork(ptr);
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
EXEC_NET_CALL_STATEMENT(return _impl->GetExecGraphInfo());
|
||||
}
|
||||
|
||||
void ExecutableNetwork::SetConfig(const std::map<std::string, Parameter>& config) {
|
||||
if (actual) {
|
||||
CALL_STATUS_FNC(SetConfig, config);
|
||||
return;
|
||||
}
|
||||
EXEC_NET_CALL_STATEMENT(_impl->SetConfig(config));
|
||||
}
|
||||
|
||||
Parameter ExecutableNetwork::GetConfig(const std::string& name) const {
|
||||
if (actual) {
|
||||
Parameter configValue;
|
||||
CALL_STATUS_FNC(GetConfig, name, configValue);
|
||||
return configValue;
|
||||
}
|
||||
EXEC_NET_CALL_STATEMENT(return _impl->GetConfig(name));
|
||||
}
|
||||
|
||||
Parameter ExecutableNetwork::GetMetric(const std::string& name) const {
|
||||
if (actual) {
|
||||
Parameter metricValue;
|
||||
CALL_STATUS_FNC(GetMetric, name, metricValue);
|
||||
return metricValue;
|
||||
}
|
||||
EXEC_NET_CALL_STATEMENT(return _impl->GetMetric(name));
|
||||
}
|
||||
|
||||
RemoteContext::Ptr ExecutableNetwork::GetContext() const {
|
||||
if (actual) {
|
||||
RemoteContext::Ptr pContext;
|
||||
CALL_STATUS_FNC(GetContext, pContext);
|
||||
return pContext;
|
||||
}
|
||||
EXEC_NET_CALL_STATEMENT(return _impl->GetContext());
|
||||
}
|
||||
|
||||
bool ExecutableNetwork::operator!() const noexcept {
|
||||
return !_impl || !actual;
|
||||
return !_impl;
|
||||
}
|
||||
|
||||
ExecutableNetwork::operator bool() const noexcept {
|
||||
return !!_impl || !!actual;
|
||||
return !!_impl;
|
||||
}
|
||||
} // namespace InferenceEngine
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include <cpp_interfaces/interface/ie_ivariable_state_internal.hpp>
|
||||
#include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
|
||||
#include "cpp/exception2status.hpp"
|
||||
#include "ie_variable_state_base.hpp"
|
||||
#include "ie_infer_async_request_base.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
@ -64,29 +63,10 @@ public:
|
||||
TO_STATUS(_impl->Export(networkModel));
|
||||
}
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
StatusCode GetExecGraphInfo(ICNNNetwork::Ptr& graphPtr, ResponseDesc* resp) noexcept override {
|
||||
// should be refactored together with ExecutableNetwork interface
|
||||
TO_STATUS(graphPtr = _impl->GetExecGraphInfo());
|
||||
}
|
||||
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferRequest::QueryState instead")
|
||||
StatusCode QueryState(IVariableState::Ptr& pState, size_t idx, ResponseDesc* resp) noexcept override {
|
||||
try {
|
||||
auto v = _impl->QueryState();
|
||||
if (idx >= v.size()) {
|
||||
return OUT_OF_BOUNDS;
|
||||
}
|
||||
pState = std::make_shared<VariableStateBase>(v[idx]);
|
||||
return OK;
|
||||
} catch (const std::exception& ex) {
|
||||
return InferenceEngine::DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();
|
||||
} catch (...) {
|
||||
return InferenceEngine::DescriptionBuffer(UNEXPECTED);
|
||||
}
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
StatusCode SetConfig(const std::map<std::string, Parameter>& config, ResponseDesc* resp) noexcept override {
|
||||
TO_STATUS(_impl->SetConfig(config));
|
||||
}
|
||||
|
@ -10,10 +10,10 @@
|
||||
|
||||
#include "cpp/exception2status.hpp"
|
||||
#include "cpp_interfaces/plugin_itt.hpp"
|
||||
#include "ie_variable_state_base.hpp"
|
||||
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
|
||||
#include "ie_iinfer_request.hpp"
|
||||
#include "ie_preprocess.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
#define CATCH_IE_EXCEPTION_TO_STATUS_NO_RESP(StatusCode, ExceptionType) catch (const ExceptionType& ex) { \
|
||||
@ -169,23 +169,6 @@ public:
|
||||
StatusCode SetBatch(int batch_size, ResponseDesc* resp) noexcept override {
|
||||
TO_STATUS(_impl->SetBatch(batch_size));
|
||||
}
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
StatusCode QueryState(IVariableState::Ptr& pState, size_t idx, ResponseDesc* resp) noexcept override {
|
||||
try {
|
||||
auto v = _impl->QueryState();
|
||||
if (idx >= v.size()) {
|
||||
return OUT_OF_BOUNDS;
|
||||
}
|
||||
pState = std::make_shared<VariableStateBase>(v[idx]);
|
||||
return OK;
|
||||
} catch (const std::exception& ex) {
|
||||
return InferenceEngine::DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();
|
||||
} catch (...) {
|
||||
return InferenceEngine::DescriptionBuffer(UNEXPECTED);
|
||||
}
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
};
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
@ -23,44 +23,17 @@ namespace InferenceEngine {
|
||||
|
||||
InferRequest::InferRequest(const details::SharedObjectLoader& so,
|
||||
const IInferRequestInternal::Ptr& impl)
|
||||
: _so(so), _impl(impl), actual() {
|
||||
: _so(so), _impl(impl) {
|
||||
IE_ASSERT(_impl != nullptr);
|
||||
}
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
|
||||
InferRequest::InferRequest(IInferRequest::Ptr request,
|
||||
std::shared_ptr<details::SharedObjectLoader> splg)
|
||||
: _so(), _impl(), actual(request) {
|
||||
if (splg) {
|
||||
_so = *splg;
|
||||
}
|
||||
|
||||
// plg can be null, but not the actual
|
||||
if (actual == nullptr)
|
||||
IE_THROW(NotAllocated) << "InferRequest was not initialized.";
|
||||
}
|
||||
|
||||
void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
|
||||
if (actual) {
|
||||
CALL_STATUS_FNC(SetBlob, name.c_str(), data);
|
||||
return;
|
||||
}
|
||||
INFER_REQ_CALL_STATEMENT(_impl->SetBlob(name, data);)
|
||||
}
|
||||
|
||||
Blob::Ptr InferRequest::GetBlob(const std::string& name) {
|
||||
if (actual) {
|
||||
Blob::Ptr data;
|
||||
CALL_STATUS_FNC(GetBlob, name.c_str(), data);
|
||||
std::string error = "Internal error: blob with name `" + name + "` is not allocated!";
|
||||
auto blobPtr = data.get();
|
||||
const bool remoteBlobPassed = blobPtr->is<RemoteBlob>();
|
||||
if (blobPtr == nullptr) IE_THROW() << error;
|
||||
if (!remoteBlobPassed && blobPtr->buffer() == nullptr) IE_THROW() << error;
|
||||
return data;
|
||||
}
|
||||
|
||||
Blob::Ptr blobPtr;
|
||||
INFER_REQ_CALL_STATEMENT(blobPtr = _impl->GetBlob(name);)
|
||||
std::string error = "Internal error: blob with name `" + name + "` is not allocated!";
|
||||
@ -71,60 +44,26 @@ Blob::Ptr InferRequest::GetBlob(const std::string& name) {
|
||||
}
|
||||
|
||||
void InferRequest::SetBlob(const std::string &name, const Blob::Ptr &data, const PreProcessInfo& info) {
|
||||
if (actual) {
|
||||
CALL_STATUS_FNC(SetBlob, name.c_str(), data, info);
|
||||
return;
|
||||
}
|
||||
|
||||
INFER_REQ_CALL_STATEMENT(_impl->SetBlob(name, data, info);)
|
||||
}
|
||||
|
||||
const PreProcessInfo& InferRequest::GetPreProcess(const std::string& name) const {
|
||||
if (actual) {
|
||||
const PreProcessInfo* info = nullptr;
|
||||
CALL_STATUS_FNC(GetPreProcess, name.c_str(), &info);
|
||||
return *info;
|
||||
}
|
||||
|
||||
INFER_REQ_CALL_STATEMENT(return _impl->GetPreProcess(name);)
|
||||
}
|
||||
|
||||
void InferRequest::Infer() {
|
||||
if (actual) {
|
||||
CALL_STATUS_FNC_NO_ARGS(Infer);
|
||||
return;
|
||||
}
|
||||
|
||||
INFER_REQ_CALL_STATEMENT(_impl->Infer();)
|
||||
}
|
||||
|
||||
void InferRequest::Cancel() {
|
||||
if (actual) {
|
||||
CALL_STATUS_FNC_NO_ARGS(Cancel);
|
||||
return;
|
||||
}
|
||||
|
||||
INFER_REQ_CALL_STATEMENT(_impl->Cancel();)
|
||||
}
|
||||
|
||||
std::map<std::string, InferenceEngineProfileInfo> InferRequest::GetPerformanceCounts() const {
|
||||
if (actual) {
|
||||
std::map<std::string, InferenceEngineProfileInfo> perfMap;
|
||||
CALL_STATUS_FNC(GetPerformanceCounts, perfMap);
|
||||
return perfMap;
|
||||
}
|
||||
|
||||
INFER_REQ_CALL_STATEMENT(return _impl->GetPerformanceCounts();)
|
||||
}
|
||||
|
||||
void InferRequest::SetInput(const BlobMap& inputs) {
|
||||
if (actual) {
|
||||
for (auto&& input : inputs) {
|
||||
CALL_STATUS_FNC(SetBlob, input.first.c_str(), input.second);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
INFER_REQ_CALL_STATEMENT(
|
||||
for (auto&& input : inputs) {
|
||||
_impl->SetBlob(input.first, input.second);
|
||||
@ -133,13 +72,6 @@ void InferRequest::SetInput(const BlobMap& inputs) {
|
||||
}
|
||||
|
||||
void InferRequest::SetOutput(const BlobMap& results) {
|
||||
if (actual) {
|
||||
for (auto&& result : results) {
|
||||
CALL_STATUS_FNC(SetBlob, result.first.c_str(), result.second);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
INFER_REQ_CALL_STATEMENT(
|
||||
for (auto&& result : results) {
|
||||
_impl->SetBlob(result.first, result.second);
|
||||
@ -148,106 +80,19 @@ void InferRequest::SetOutput(const BlobMap& results) {
|
||||
}
|
||||
|
||||
void InferRequest::SetBatch(const int batch) {
|
||||
if (actual) {
|
||||
CALL_STATUS_FNC(SetBatch, batch);
|
||||
return;
|
||||
}
|
||||
|
||||
INFER_REQ_CALL_STATEMENT(_impl->SetBatch(batch);)
|
||||
}
|
||||
|
||||
void InferRequest::StartAsync() {
|
||||
if (actual) {
|
||||
CALL_STATUS_FNC_NO_ARGS(StartAsync);
|
||||
return;
|
||||
}
|
||||
|
||||
INFER_REQ_CALL_STATEMENT(_impl->StartAsync();)
|
||||
}
|
||||
|
||||
|
||||
StatusCode InferRequest::Wait(int64_t millis_timeout) {
|
||||
if (actual) {
|
||||
ResponseDesc resp;
|
||||
if (actual == nullptr) IE_THROW() << "InferRequest was not initialized.";
|
||||
auto res = actual->Wait(millis_timeout, &resp);
|
||||
if (res != OK && res != RESULT_NOT_READY &&
|
||||
res != INFER_NOT_STARTED && res != INFER_CANCELLED) {
|
||||
IE_EXCEPTION_SWITCH(res, ExceptionType,
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{}
|
||||
<<= std::stringstream{} << IE_LOCATION << resp.msg)
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
INFER_REQ_CALL_STATEMENT(return _impl->Wait(millis_timeout);)
|
||||
}
|
||||
|
||||
namespace details {
|
||||
|
||||
class ICompletionCallbackWrapper {
|
||||
public:
|
||||
virtual ~ICompletionCallbackWrapper() = default;
|
||||
|
||||
virtual void call(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) const noexcept = 0;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class CompletionCallbackWrapper : public ICompletionCallbackWrapper {
|
||||
T lambda;
|
||||
|
||||
public:
|
||||
explicit CompletionCallbackWrapper(const T& lambda): lambda(lambda) {}
|
||||
|
||||
void call(InferenceEngine::IInferRequest::Ptr /*request*/, InferenceEngine::StatusCode /*code*/) const
|
||||
noexcept override {
|
||||
lambda();
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class CompletionCallbackWrapper<IInferRequest::CompletionCallback> : public ICompletionCallbackWrapper {
|
||||
IInferRequest::CompletionCallback callBack;
|
||||
|
||||
public:
|
||||
explicit CompletionCallbackWrapper(const IInferRequest::CompletionCallback& callBack): callBack(callBack) {}
|
||||
|
||||
void call(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) const noexcept override {
|
||||
callBack(request, code);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class CompletionCallbackWrapper<std::function<void(InferRequest, StatusCode)>> : public ICompletionCallbackWrapper {
|
||||
std::function<void(InferRequest, StatusCode)> lambda;
|
||||
|
||||
public:
|
||||
explicit CompletionCallbackWrapper(const std::function<void(InferRequest, InferenceEngine::StatusCode)>& lambda)
|
||||
: lambda(lambda) {}
|
||||
|
||||
void call(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) const noexcept override {
|
||||
lambda(InferRequest(request), code);
|
||||
}
|
||||
};
|
||||
|
||||
void callWrapper(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) {
|
||||
details::ICompletionCallbackWrapper* pWrapper = nullptr;
|
||||
ResponseDesc dsc;
|
||||
request->GetUserData(reinterpret_cast<void**>(&pWrapper), &dsc);
|
||||
pWrapper->call(request, code);
|
||||
}
|
||||
|
||||
} // namespace details
|
||||
|
||||
void InferRequest::SetCompletionCallbackImpl(std::function<void()> callbackToSet) {
|
||||
if (actual) {
|
||||
using T = std::function<void()>;
|
||||
callback.reset(new details::CompletionCallbackWrapper<T>(callbackToSet));
|
||||
CALL_STATUS_FNC(SetUserData, callback.get());
|
||||
actual->SetCompletionCallback(InferenceEngine::details::callWrapper);
|
||||
return;
|
||||
}
|
||||
|
||||
INFER_REQ_CALL_STATEMENT(
|
||||
_impl->SetCallback([callbackToSet] (std::exception_ptr) {
|
||||
callbackToSet();
|
||||
@ -274,14 +119,6 @@ void InferRequest::SetCompletionCallbackImpl(std::function<void()> callbackToSet
|
||||
|
||||
|
||||
void InferRequest::SetCompletionCallbackImpl(std::function<void(InferRequest, StatusCode)> callbackToSet) {
|
||||
if (actual) {
|
||||
using T = std::function<void(InferRequest, StatusCode)>;
|
||||
callback.reset(new details::CompletionCallbackWrapper<T>(callbackToSet));
|
||||
CALL_STATUS_FNC(SetUserData, callback.get());
|
||||
actual->SetCompletionCallback(InferenceEngine::details::callWrapper);
|
||||
return;
|
||||
}
|
||||
|
||||
INFER_REQ_CALL_STATEMENT(
|
||||
auto weakThis = InferRequest{_so, std::shared_ptr<IInferRequestInternal>{_impl.get(), [](IInferRequestInternal*){}}};
|
||||
_impl->SetCallback([callbackToSet, weakThis] (std::exception_ptr exceptionPtr) {
|
||||
@ -303,14 +140,6 @@ void InferRequest::SetCompletionCallbackImpl(std::function<void(InferRequest, St
|
||||
}
|
||||
|
||||
void InferRequest::SetCompletionCallbackImpl(IInferRequest::CompletionCallback callbackToSet) {
|
||||
if (actual) {
|
||||
using T = IInferRequest::CompletionCallback;
|
||||
callback.reset(new details::CompletionCallbackWrapper<T>(callbackToSet));
|
||||
CALL_STATUS_FNC(SetUserData, callback.get());
|
||||
actual->SetCompletionCallback(InferenceEngine::details::callWrapper);
|
||||
return;
|
||||
}
|
||||
|
||||
INFER_REQ_CALL_STATEMENT(
|
||||
IInferRequest::Ptr weakThis = InferRequest{_so, std::shared_ptr<IInferRequestInternal>{_impl.get(), [](IInferRequestInternal*){}}};
|
||||
_impl->SetCallback([callbackToSet, weakThis] (std::exception_ptr exceptionPtr) {
|
||||
@ -332,38 +161,12 @@ void InferRequest::SetCompletionCallbackImpl(IInferRequest::CompletionCallback c
|
||||
}
|
||||
|
||||
InferRequest::operator IInferRequest::Ptr () {
|
||||
if (actual) {
|
||||
return actual;
|
||||
}
|
||||
|
||||
INFER_REQ_CALL_STATEMENT(
|
||||
return std::make_shared<InferRequestBase>(_impl);
|
||||
)
|
||||
}
|
||||
|
||||
std::vector<VariableState> InferRequest::QueryState() {
|
||||
if (actual) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
if (actual == nullptr) IE_THROW() << "ExecutableNetwork was not initialized.";
|
||||
IVariableState::Ptr pState = nullptr;
|
||||
auto res = OK;
|
||||
std::vector<VariableState> controller;
|
||||
for (size_t idx = 0; res == OK; ++idx) {
|
||||
ResponseDesc resp;
|
||||
res = actual->QueryState(pState, idx, &resp);
|
||||
if (res != OK && res != OUT_OF_BOUNDS) {
|
||||
IE_THROW() << resp.msg;
|
||||
}
|
||||
if (res != OUT_OF_BOUNDS) {
|
||||
controller.push_back(VariableState(pState,
|
||||
std::make_shared<details::SharedObjectLoader>(_so)));
|
||||
}
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
return controller;
|
||||
}
|
||||
|
||||
std::vector<VariableState> controller;
|
||||
INFER_REQ_CALL_STATEMENT(
|
||||
for (auto&& state : _impl->QueryState()) {
|
||||
@ -374,11 +177,11 @@ std::vector<VariableState> InferRequest::QueryState() {
|
||||
}
|
||||
|
||||
bool InferRequest::operator!() const noexcept {
|
||||
return !_impl || !actual;
|
||||
return !_impl;
|
||||
}
|
||||
|
||||
InferRequest::operator bool() const noexcept {
|
||||
return (!!_impl) || (!!actual);
|
||||
return (!!_impl);
|
||||
}
|
||||
|
||||
bool InferRequest::operator!=(const InferRequest& r) const noexcept {
|
||||
@ -386,7 +189,7 @@ bool InferRequest::operator!=(const InferRequest& r) const noexcept {
|
||||
}
|
||||
|
||||
bool InferRequest::operator==(const InferRequest& r) const noexcept {
|
||||
return r._impl == _impl && r.actual == actual;
|
||||
return r._impl == _impl;
|
||||
}
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
@ -4,7 +4,6 @@
|
||||
|
||||
#include "details/ie_so_loader.h"
|
||||
#include "cpp/ie_memory_state.hpp"
|
||||
#include "ie_imemory_state.hpp"
|
||||
#include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp"
|
||||
#include "exception2status.hpp"
|
||||
|
||||
@ -24,57 +23,19 @@ VariableState::VariableState(const details::SharedObjectLoader& so,
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
|
||||
VariableState::VariableState(std::shared_ptr<IVariableState> state,
|
||||
std::shared_ptr<details::SharedObjectLoader> splg)
|
||||
: _so(), _impl(), actual(state) {
|
||||
if (splg) {
|
||||
_so = *splg;
|
||||
}
|
||||
|
||||
// plg can be null, but not the actual
|
||||
if (actual == nullptr)
|
||||
IE_THROW(NotAllocated) << "VariableState was not initialized.";
|
||||
}
|
||||
|
||||
Blob::CPtr VariableState::GetLastState() const {
|
||||
return GetState();
|
||||
}
|
||||
|
||||
void VariableState::Reset() {
|
||||
if (actual) {
|
||||
CALL_STATUS_FNC_NO_ARGS(Reset);
|
||||
return;
|
||||
}
|
||||
|
||||
VARIABLE_CALL_STATEMENT(_impl->Reset());
|
||||
}
|
||||
|
||||
std::string VariableState::GetName() const {
|
||||
if (actual) {
|
||||
char name[256];
|
||||
CALL_STATUS_FNC(GetName, name, sizeof(name));
|
||||
return name;
|
||||
}
|
||||
|
||||
VARIABLE_CALL_STATEMENT(return _impl->GetName());
|
||||
}
|
||||
|
||||
Blob::CPtr VariableState::GetState() const {
|
||||
if (actual) {
|
||||
Blob::CPtr stateBlob;
|
||||
CALL_STATUS_FNC(GetState, stateBlob);
|
||||
return stateBlob;
|
||||
}
|
||||
|
||||
VARIABLE_CALL_STATEMENT(return _impl->GetState());
|
||||
}
|
||||
|
||||
void VariableState::SetState(Blob::Ptr state) {
|
||||
if (actual) {
|
||||
CALL_STATUS_FNC(SetState, state);
|
||||
return;
|
||||
}
|
||||
|
||||
VARIABLE_CALL_STATEMENT(_impl->SetState(state));
|
||||
}
|
||||
|
||||
|
@ -1,59 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "cpp/exception2status.hpp"
|
||||
#include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp"
|
||||
#include "ie_imemory_state.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
|
||||
/**
|
||||
* @brief Default implementation for IVariableState
|
||||
* @ingroup ie_dev_api_variable_state_api
|
||||
*/
|
||||
class VariableStateBase : public IVariableState {
|
||||
std::shared_ptr<IVariableStateInternal> impl;
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief Constructor with actual underlying implementation.
|
||||
* @param impl Underlying implementation of type IVariableStateInternal
|
||||
*/
|
||||
explicit VariableStateBase(std::shared_ptr<IVariableStateInternal> impl): impl(impl) {
|
||||
if (impl == nullptr) {
|
||||
IE_THROW() << "VariableStateBase implementation is not defined";
|
||||
}
|
||||
}
|
||||
|
||||
StatusCode GetName(char* name, size_t len, ResponseDesc* resp) const noexcept override {
|
||||
for (size_t i = 0; i != len; i++) {
|
||||
name[i] = 0;
|
||||
}
|
||||
DescriptionBuffer buf(name, len);
|
||||
TO_STATUS(buf << impl->GetName());
|
||||
return OK;
|
||||
}
|
||||
|
||||
StatusCode Reset(ResponseDesc* resp) noexcept override {
|
||||
TO_STATUS(impl->Reset());
|
||||
}
|
||||
|
||||
StatusCode SetState(Blob::Ptr newState, ResponseDesc* resp) noexcept override {
|
||||
TO_STATUS(impl->SetState(newState));
|
||||
}
|
||||
|
||||
StatusCode GetState(Blob::CPtr& state, ResponseDesc* resp) const noexcept override {
|
||||
TO_STATUS(state = impl->GetState());
|
||||
}
|
||||
};
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
} // namespace InferenceEngine
|
@ -23,7 +23,4 @@ Blob::CPtr IVariableStateInternal::GetState() const {
|
||||
return state;
|
||||
}
|
||||
|
||||
Blob::CPtr IVariableStateInternal::GetLastState() const {
|
||||
return GetState();
|
||||
}
|
||||
} // namespace InferenceEngine
|
||||
|
@ -1,45 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <ie_parameter.hpp>
|
||||
#include <memory>
|
||||
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
|
||||
template class INFERENCE_ENGINE_API_CLASS(VariantImpl<InferenceEngine::Parameter>);
|
||||
|
||||
template <>
|
||||
class INFERENCE_ENGINE_API_CLASS(VariantWrapper<InferenceEngine::Parameter>) : public VariantImpl<InferenceEngine::Parameter> {
|
||||
public:
|
||||
static constexpr VariantTypeInfo type_info {"Variant::InferenceEngine::Parameter", 0};
|
||||
const VariantTypeInfo& get_type_info() const override {
|
||||
return type_info;
|
||||
}
|
||||
VariantWrapper(const value_type& value): VariantImpl<value_type>(value) {} // NOLINT
|
||||
};
|
||||
|
||||
} // namespace ngraph
|
||||
|
||||
constexpr ngraph::VariantTypeInfo ngraph::VariantWrapper<InferenceEngine::Parameter>::type_info;
|
||||
|
||||
InferenceEngine::Parameter::Parameter(const std::shared_ptr<ngraph::Variant>& var) {
|
||||
if (auto paramWrapper = std::dynamic_pointer_cast<ngraph::VariantWrapper<InferenceEngine::Parameter>>(var)) {
|
||||
auto param = paramWrapper->get();
|
||||
if (!param.empty()) ptr = param.ptr->copy();
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::Parameter::Parameter(std::shared_ptr<ngraph::Variant>& var) {
|
||||
if (auto paramWrapper = std::dynamic_pointer_cast<ngraph::VariantWrapper<InferenceEngine::Parameter>>(var)) {
|
||||
auto param = paramWrapper->get();
|
||||
if (!param.empty()) ptr = param.ptr->copy();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<ngraph::Variant> InferenceEngine::Parameter::asVariant() const {
|
||||
return std::make_shared<ngraph::VariantWrapper<InferenceEngine::Parameter>>(*this);
|
||||
}
|
@ -111,13 +111,13 @@ void SplitTransformation::updateOutputs(
|
||||
updateOutput(context, lastNodes[0], originalNode);
|
||||
} else {
|
||||
const std::string originalName = originalNode->get_friendly_name();
|
||||
for (auto& lastNode : lastNodes) {
|
||||
for (size_t outIdx = 0; outIdx < lastNodes.size(); ++outIdx) {
|
||||
for (size_t i = 0; i < outputSize; ++i) {
|
||||
std::shared_ptr<ngraph::Node> result = context.function->get_output_op(i);
|
||||
std::shared_ptr<ngraph::Node> outputNode = result->get_input_node_shared_ptr(0);
|
||||
if (outputNode.get() == lastNode.get()) {
|
||||
if (outputNode.get() == lastNodes[outIdx].get()) {
|
||||
originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix);
|
||||
lastNode->set_friendly_name(originalName + "." + std::to_string(i));
|
||||
lastNodes[outIdx]->set_friendly_name(originalName + "." + std::to_string(outIdx));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -157,10 +157,15 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
|
||||
}
|
||||
|
||||
const size_t outChannelsShapeIndex = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
|
||||
if ( // Check if all dimensions of scale except the output channels are all ones
|
||||
if (
|
||||
// expected, it's ok: return true
|
||||
(shape_size(constOutputShape) != 1ul) &&
|
||||
// not expected, something wrong: return false
|
||||
((constOutputShape.size() <= outChannelsShapeIndex) ||
|
||||
// Check if all dimensions of scale except the output channels are all ones
|
||||
(shape_size(constOutputShape) != constOutputShape[outChannelsShapeIndex]) ||
|
||||
((constOutputShape[outChannelsShapeIndex] != 1ul) &&
|
||||
(fqFromWeights->get_output_shape(0)[outChannelsShapeIndex] != constOutputShape[outChannelsShapeIndex]))) {
|
||||
(fqFromWeights->get_output_shape(0)[outChannelsShapeIndex] != constOutputShape[outChannelsShapeIndex])))) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
|
@ -6,7 +6,7 @@ Use the following cmake option to enable debug capabilities:
|
||||
## Blob dumping
|
||||
Blob dumping is controlled by environment variables (filters).
|
||||
|
||||
The variables define conditions of the node which input, output and internal blobs
|
||||
The variables define conditions of the node which input and output blobs
|
||||
should be dumped for.
|
||||
|
||||
> **NOTE**: Nothing is dumped by default
|
||||
@ -15,11 +15,13 @@ should be dumped for.
|
||||
|
||||
Environment variables can be set per execution, for example:
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_DIR=dump_dir binary ...
|
||||
OV_CPU_BLOB_DUMP_DIR=dump_dir OV_CPU_BLOB_DUMP_FORMAT=TEXT OV_CPU_BLOB_DUMP_NODE_PORTS=OUT binary ...
|
||||
```
|
||||
or for shell session (bash example):
|
||||
```sh
|
||||
export OV_CPU_BLOB_DUMP_DIR=dump_dir
|
||||
export OV_CPU_BLOB_DUMP_FORMAT=TEXT
|
||||
export OV_CPU_BLOB_DUMP_NODE_PORTS=OUT
|
||||
binary ...
|
||||
```
|
||||
### Specify dump directory
|
||||
@ -35,8 +37,22 @@ Options are:
|
||||
* BIN (default)
|
||||
* TEXT
|
||||
|
||||
### Filter input / output blobs
|
||||
To dump only input / output blobs:
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_PORTS='<ports_kind>' binary ...
|
||||
```
|
||||
Example:
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_PORTS=OUT binary ...
|
||||
```
|
||||
Options are:
|
||||
* IN
|
||||
* OUT
|
||||
* ALL
|
||||
|
||||
### Filter by execution ID
|
||||
To dump blobs only for node with specified execution IDs:
|
||||
To dump blobs only for nodes with specified execution IDs:
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_EXEC_ID='<space_separated_list_of_ids>' binary ...
|
||||
```
|
||||
@ -46,19 +62,19 @@ Example:
|
||||
```
|
||||
|
||||
### Filter by type
|
||||
To dump blobs only for node with specified type:
|
||||
To dump blobs only for nodes with specified types:
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_TYPE=<type> binary ...
|
||||
OV_CPU_BLOB_DUMP_NODE_TYPE=<space_separated_list_of_types> binary ...
|
||||
```
|
||||
Example:
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_TYPE=Convolution binary ...
|
||||
OV_CPU_BLOB_DUMP_NODE_TYPE='Convolution Reorder' binary ...
|
||||
```
|
||||
|
||||
> **NOTE**: see **enum Type** in [mkldnn_node.h](../mkldnn_node.h) for list of the types
|
||||
|
||||
### Filter by name
|
||||
To dump blobs only for node with name matching specified regex:
|
||||
To dump blobs only for nodes with name matching specified regex:
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_NAME=<regex> binary ...
|
||||
```
|
||||
@ -68,9 +84,17 @@ Example:
|
||||
```
|
||||
|
||||
### Dump all the blobs
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_NAME="*" binary ...
|
||||
```
|
||||
or
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_NAME=".+" binary ...
|
||||
```
|
||||
or
|
||||
```sh
|
||||
OV_CPU_BLOB_DUMP_NODE_PORTS=ALL binary ...
|
||||
```
|
||||
|
||||
## Graph serialization
|
||||
The functionality allows to serialize execution graph using environment variable:
|
||||
|
@ -20,6 +20,7 @@ public:
|
||||
readParam(blobDumpDir, "OV_CPU_BLOB_DUMP_DIR");
|
||||
readParam(blobDumpFormat, "OV_CPU_BLOB_DUMP_FORMAT");
|
||||
readParam(blobDumpNodeExecId, "OV_CPU_BLOB_DUMP_NODE_EXEC_ID");
|
||||
readParam(blobDumpNodePorts, "OV_CPU_BLOB_DUMP_NODE_PORTS");
|
||||
readParam(blobDumpNodeType, "OV_CPU_BLOB_DUMP_NODE_TYPE");
|
||||
readParam(blobDumpNodeName, "OV_CPU_BLOB_DUMP_NODE_NAME");
|
||||
readParam(execGraphPath, "OV_CPU_EXEC_GRAPH_PATH");
|
||||
@ -28,6 +29,7 @@ public:
|
||||
std::string blobDumpDir;
|
||||
std::string blobDumpFormat;
|
||||
std::string blobDumpNodeExecId;
|
||||
std::string blobDumpNodePorts;
|
||||
std::string blobDumpNodeType;
|
||||
std::string blobDumpNodeName;
|
||||
std::string execGraphPath;
|
||||
|
@ -20,7 +20,7 @@ using namespace InferenceEngine;
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
NodeDumper::NodeDumper(const DebugCaps::Config& config, const int _count)
|
||||
: dumpFormat(DUMP_FORMAT::BIN)
|
||||
: dumpFormat(FORMAT::BIN)
|
||||
, dumpDirName("mkldnn_dump")
|
||||
, count(_count) {
|
||||
if (!config.blobDumpDir.empty())
|
||||
@ -32,6 +32,9 @@ NodeDumper::NodeDumper(const DebugCaps::Config& config, const int _count)
|
||||
if (!config.blobDumpNodeExecId.empty())
|
||||
dumpFilters[FILTER::BY_EXEC_ID] = config.blobDumpNodeExecId;
|
||||
|
||||
if (!config.blobDumpNodePorts.empty())
|
||||
dumpFilters[FILTER::BY_PORTS] = config.blobDumpNodePorts;
|
||||
|
||||
if (!config.blobDumpNodeType.empty())
|
||||
dumpFilters[FILTER::BY_TYPE] = config.blobDumpNodeType;
|
||||
|
||||
@ -40,7 +43,7 @@ NodeDumper::NodeDumper(const DebugCaps::Config& config, const int _count)
|
||||
}
|
||||
|
||||
void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const {
|
||||
if (!shouldBeDumped(node))
|
||||
if (!shouldBeDumped(node, "IN"))
|
||||
return;
|
||||
|
||||
auto exec_order = std::to_string(node->getExecIndex());
|
||||
@ -60,7 +63,7 @@ void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const {
|
||||
file_name = file_name.substr(file_name.size() - 240);
|
||||
|
||||
auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name;
|
||||
std::cout << "Dump before: " << dump_file << std::endl;
|
||||
std::cout << "Dump inputs: " << dump_file << std::endl;
|
||||
|
||||
TensorDesc desc = prEdge->getDesc();
|
||||
if (desc.getPrecision() == Precision::BIN)
|
||||
@ -77,7 +80,7 @@ void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const {
|
||||
}
|
||||
|
||||
void NodeDumper::dumpOutputBlobs(const MKLDNNNodePtr& node) const {
|
||||
if (!shouldBeDumped(node))
|
||||
if (!shouldBeDumped(node, "OUT"))
|
||||
return;
|
||||
|
||||
auto exec_order = std::to_string(node->getExecIndex());
|
||||
@ -96,7 +99,7 @@ void NodeDumper::dumpOutputBlobs(const MKLDNNNodePtr& node) const {
|
||||
file_name = file_name.substr(file_name.size() - 240);
|
||||
|
||||
auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name;
|
||||
std::cout << "Dump after: " << dump_file << std::endl;
|
||||
std::cout << "Dump outputs: " << dump_file << std::endl;
|
||||
|
||||
TensorDesc desc = childEdge->getDesc();
|
||||
if (desc.getPrecision() == Precision::BIN)
|
||||
@ -130,56 +133,77 @@ void NodeDumper::dumpInternalBlobs(const MKLDNNNodePtr& node) const {
|
||||
|
||||
void NodeDumper::dump(const BlobDumper& bd, const std::string& file) const {
|
||||
switch (dumpFormat) {
|
||||
case DUMP_FORMAT::BIN: {
|
||||
case FORMAT::BIN: {
|
||||
bd.dump(file);
|
||||
break;
|
||||
}
|
||||
case DUMP_FORMAT::TEXT: {
|
||||
case FORMAT::TEXT: {
|
||||
bd.dumpAsTxt(file);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
IE_THROW() << "Unknown dump format";
|
||||
IE_THROW() << "NodeDumper: Unknown dump format";
|
||||
}
|
||||
}
|
||||
|
||||
bool NodeDumper::shouldBeDumped(const MKLDNNNodePtr& node) const {
|
||||
bool NodeDumper::shouldBeDumped(const MKLDNNNodePtr& node, const std::string& portsKind) const {
|
||||
if (dumpFilters.empty())
|
||||
return false;
|
||||
|
||||
if (dumpFilters.count(FILTER::BY_EXEC_ID)) { // filter by exec id env set
|
||||
if (dumpFilters.count(FILTER::BY_PORTS)) { // filter by ports configured
|
||||
if (dumpFilters.at(FILTER::BY_PORTS) != "ALL" &&
|
||||
portsKind != dumpFilters.at(FILTER::BY_PORTS))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dumpFilters.count(FILTER::BY_EXEC_ID)) { // filter by exec id configured
|
||||
std::stringstream ss(dumpFilters.at(FILTER::BY_EXEC_ID));
|
||||
int id;
|
||||
bool matched = false;
|
||||
|
||||
while (ss >> id) {
|
||||
if (node->getExecIndex() == id) // exec id matches
|
||||
if (node->getExecIndex() == id) {// exec id matches
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!matched)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dumpFilters.count(FILTER::BY_TYPE)) { // filter by type env set
|
||||
if (NameFromType(node->getType()) != dumpFilters.at(FILTER::BY_TYPE)) // type does not match
|
||||
if (dumpFilters.count(FILTER::BY_TYPE)) { // filter by type configured
|
||||
std::stringstream ss(dumpFilters.at(FILTER::BY_TYPE));
|
||||
std::string type;
|
||||
bool matched = false;
|
||||
|
||||
while (ss >> type) {
|
||||
if (NameFromType(node->getType()) == type) {// type does not match
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!matched)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dumpFilters.count(FILTER::BY_NAME)) { // filter by name env set
|
||||
if (!std::regex_match(node->getName(), std::regex(dumpFilters.at(FILTER::BY_NAME)))) // name does not match
|
||||
if (dumpFilters.count(FILTER::BY_NAME)) { // filter by name configured
|
||||
if (dumpFilters.at(FILTER::BY_NAME) != "*" && // to have 'single char' option for matching all the names
|
||||
!std::regex_match(node->getName(), std::regex(dumpFilters.at(FILTER::BY_NAME)))) // name does not match
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
NodeDumper::DUMP_FORMAT NodeDumper::parseDumpFormat(const std::string& format) const {
|
||||
NodeDumper::FORMAT NodeDumper::parseDumpFormat(const std::string& format) const {
|
||||
if (format == "BIN")
|
||||
return DUMP_FORMAT::BIN;
|
||||
return FORMAT::BIN;
|
||||
else if (format == "TEXT")
|
||||
return DUMP_FORMAT::TEXT;
|
||||
return FORMAT::TEXT;
|
||||
else
|
||||
IE_THROW() << "Unknown dump format";
|
||||
IE_THROW() << "NodeDumper: Unknown dump format";
|
||||
}
|
||||
|
||||
void NodeDumper::formatNodeName(std::string& name) const {
|
||||
|
@ -31,28 +31,29 @@ public:
|
||||
private:
|
||||
void dumpInternalBlobs(const MKLDNNNodePtr& node) const;
|
||||
void dump(const BlobDumper& bd, const std::string& file) const;
|
||||
bool shouldBeDumped(const MKLDNNNodePtr &node) const;
|
||||
bool shouldBeDumped(const MKLDNNNodePtr &node, const std::string& portsKind) const;
|
||||
|
||||
enum class DUMP_FORMAT {
|
||||
enum class FORMAT {
|
||||
BIN,
|
||||
TEXT,
|
||||
};
|
||||
|
||||
DUMP_FORMAT parseDumpFormat(const std::string& format) const;
|
||||
FORMAT parseDumpFormat(const std::string& format) const;
|
||||
void formatNodeName(std::string& name) const;
|
||||
|
||||
DUMP_FORMAT dumpFormat;
|
||||
FORMAT dumpFormat;
|
||||
std::string dumpDirName;
|
||||
int count;
|
||||
|
||||
enum FILTER {
|
||||
BY_PORTS,
|
||||
BY_EXEC_ID,
|
||||
BY_TYPE,
|
||||
BY_NAME,
|
||||
COUNT,
|
||||
};
|
||||
|
||||
std::unordered_map<FILTER, std::string> dumpFilters;
|
||||
// std::hash<int> is necessary for Ubuntu-16.04 (gcc-5.4 and defect in C++11 standart)
|
||||
std::unordered_map<FILTER, std::string, std::hash<int>> dumpFilters;
|
||||
};
|
||||
} // namespace MKLDNNPlugin
|
||||
#endif // CPU_DEBUG_CAPS
|
||||
|
@ -96,7 +96,8 @@ public:
|
||||
auto mask_2_iter = mask->rbegin();
|
||||
|
||||
while (mask_1_iter != rend() &&
|
||||
mask_2_iter != mask->rend()) {
|
||||
mask_2_iter != mask->rend() &&
|
||||
result_iter != result_mask->rend()) {
|
||||
// Merge mask dimension values for both masks
|
||||
// Example: (MaskValue[1,2,3,4], MaskValue[2,3]) -> MaskValue[2,3]
|
||||
for (const auto & value : *mask_1_iter) {
|
||||
@ -119,7 +120,8 @@ public:
|
||||
auto mask_2_iter = mask->rbegin();
|
||||
|
||||
while (mask_1_iter != rend() &&
|
||||
mask_2_iter != mask->rend()) {
|
||||
mask_2_iter != mask->rend() &&
|
||||
result_iter != result_mask->rend()) {
|
||||
// Union mask dimension values for both masks
|
||||
// Example: (MaskValue[1,2,3,4], MaskValue[2, 5]) -> MaskValue[1, 2, 3, 4, 5]
|
||||
for (const auto & value : *mask_1_iter) {
|
||||
|
@ -246,6 +246,9 @@ public:
|
||||
// To allow pruning on weights (allow reshape input Group (0) dim changing) replace Reshape Shape constant
|
||||
// [G, 1, 1, X, Y, Z] by [-1, 1, 1, X, Y, Z].
|
||||
auto old_shape_const = std::dynamic_pointer_cast<opset6::Constant>(m_shape.get_node_shared_ptr());
|
||||
if (!old_shape_const) {
|
||||
return false;
|
||||
}
|
||||
auto shape_value = old_shape_const.get()->cast_vector<int64_t>();
|
||||
shape_value[0] = -1;
|
||||
auto new_const = opset6::Constant::create(old_shape_const->get_element_type(),
|
||||
@ -462,6 +465,9 @@ public:
|
||||
const auto & pattern_map = m.get_pattern_value_map();
|
||||
const auto & m_output = pattern_map.at(concat);
|
||||
auto concat_ptr = std::dynamic_pointer_cast<opset6::Concat>(m_output.get_node_shared_ptr());
|
||||
if (!concat_ptr) {
|
||||
return false;
|
||||
}
|
||||
auto axis = concat_ptr->get_concatenation_axis();
|
||||
|
||||
auto inputs = concat_ptr->inputs();
|
||||
|
@ -50,14 +50,6 @@ public:
|
||||
*/
|
||||
virtual Blob::CPtr GetState() const;
|
||||
|
||||
/**
|
||||
* @deprecated Use IVariableStateInternal::GetState method instead
|
||||
* @brief Returns the value of the variable state.
|
||||
* @return The value of the variable state
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use IVariableStateInternal::GetState method instead")
|
||||
virtual Blob::CPtr GetLastState() const;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* @brief A default dtor
|
||||
|
@ -25,6 +25,9 @@
|
||||
|
||||
#include "ie_algorithm.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace details {
|
||||
|
||||
/**
|
||||
* @brief Serializes a `std::vector` to a `std::ostream`
|
||||
* @ingroup ie_dev_api_error_debug
|
||||
@ -32,7 +35,6 @@
|
||||
* @param vec A vector to serialize
|
||||
* @return A reference to a `std::stream`
|
||||
*/
|
||||
namespace std {
|
||||
template <typename T>
|
||||
inline std::ostream& operator<<(std::ostream& out, const std::vector<T>& vec) {
|
||||
if (vec.empty()) return std::operator<<(out, "[]");
|
||||
@ -42,10 +44,7 @@ inline std::ostream& operator<<(std::ostream& out, const std::vector<T>& vec) {
|
||||
}
|
||||
return out << "]";
|
||||
}
|
||||
} // namespace std
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace details {
|
||||
/**
|
||||
* @brief trim from start (in place)
|
||||
* @ingroup ie_dev_api_error_debug
|
||||
|
@ -0,0 +1,99 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
#include "caseless.hpp"
|
||||
|
||||
#include "vpu/utils/optional.hpp"
|
||||
|
||||
namespace vpu {
|
||||
|
||||
struct CompilationConfig {
|
||||
int numSHAVEs = -1;
|
||||
int numCMXSlices = -1;
|
||||
int numExecutors = -1;
|
||||
int tilingCMXLimitKB = -1;
|
||||
|
||||
bool hwOptimization = true;
|
||||
bool hwExtraSplit = false;
|
||||
|
||||
std::string irWithVpuScalesDir;
|
||||
|
||||
std::string customLayers;
|
||||
|
||||
bool detectBatch = true;
|
||||
|
||||
Optional<bool> injectSwOps;
|
||||
Optional<bool> packDataInCmx;
|
||||
bool mergeHwPoolToConv = true;
|
||||
bool hwDilation = false;
|
||||
bool forceDeprecatedCnnConversion = false;
|
||||
bool enableEarlyEltwiseReLUFusion = true;
|
||||
|
||||
std::map<std::string, std::vector<int>> ioStrides;
|
||||
|
||||
//
|
||||
// Debug options
|
||||
//
|
||||
|
||||
InferenceEngine::details::caseless_set<std::string> hwWhiteList;
|
||||
InferenceEngine::details::caseless_set<std::string> hwBlackList;
|
||||
|
||||
bool hwDisabled(const std::string& layerName) const {
|
||||
if (!hwWhiteList.empty()) {
|
||||
return hwWhiteList.count(layerName) == 0;
|
||||
}
|
||||
|
||||
if (!hwBlackList.empty()) {
|
||||
return hwBlackList.count(layerName) != 0;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
InferenceEngine::details::caseless_set<std::string> noneLayers;
|
||||
|
||||
bool skipAllLayers() const {
|
||||
if (noneLayers.size() == 1) {
|
||||
const auto& val = *noneLayers.begin();
|
||||
return val == "*";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool skipLayerType(const std::string& layerType) const {
|
||||
return noneLayers.count(layerType) != 0;
|
||||
}
|
||||
bool ignoreUnknownLayers = false;
|
||||
|
||||
std::string dumpInternalGraphFileName;
|
||||
std::string dumpInternalGraphDirectory;
|
||||
bool dumpAllPasses;
|
||||
|
||||
bool disableReorder = false; // TODO: rename to enableReorder and switch logic.
|
||||
bool disableConvertStages = false;
|
||||
bool enablePermuteMerging = true;
|
||||
bool enableReplWithSCRelu = false;
|
||||
bool enableReplaceWithReduceMean = true;
|
||||
bool enableTensorIteratorUnrolling = false;
|
||||
bool forcePureTensorIterator = false;
|
||||
bool enableMemoryTypesAnnotation = false;
|
||||
bool enableWeightsAnalysis = true;
|
||||
bool checkPreprocessingInsideModel = true;
|
||||
bool enableCustomReshapeParam = false;
|
||||
|
||||
//
|
||||
// Deprecated options
|
||||
//
|
||||
|
||||
float inputScale = 1.0f;
|
||||
float inputBias = 0.0f;
|
||||
};
|
||||
|
||||
} // namespace vpu
|
@ -0,0 +1,18 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "ie_parameter.hpp"
|
||||
|
||||
template<class OptionConcept>
|
||||
struct AsParsedParameterEnabler {
|
||||
static InferenceEngine::Parameter asParameter(const std::string& value) { return {OptionConcept::parse(value)}; }
|
||||
};
|
||||
|
||||
struct AsParameterEnabler {
|
||||
static InferenceEngine::Parameter asParameter(const std::string& value);
|
||||
};
|
@ -0,0 +1,34 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "vpu/configuration/as_parameter_enabler.hpp"
|
||||
|
||||
namespace vpu {
|
||||
|
||||
namespace details {
|
||||
|
||||
enum class Access;
|
||||
enum class Category;
|
||||
|
||||
} // namespace details
|
||||
|
||||
class PluginConfiguration;
|
||||
|
||||
struct CopyOptimizationOption : public AsParsedParameterEnabler<CopyOptimizationOption> {
|
||||
using value_type = bool;
|
||||
|
||||
static std::string key();
|
||||
static void validate(const std::string&);
|
||||
static void validate(const PluginConfiguration&);
|
||||
static std::string defaultValue();
|
||||
static value_type parse(const std::string&);
|
||||
static details::Access access();
|
||||
static details::Category category();
|
||||
};
|
||||
|
||||
} // namespace vpu
|
@ -0,0 +1,36 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "vpu/configuration/as_parameter_enabler.hpp"
|
||||
|
||||
namespace vpu {
|
||||
|
||||
enum class LogLevel;
|
||||
|
||||
namespace details {
|
||||
|
||||
enum class Access;
|
||||
enum class Category;
|
||||
|
||||
} // namespace details
|
||||
|
||||
class PluginConfiguration;
|
||||
|
||||
struct LogLevelOption : public AsParameterEnabler {
|
||||
using value_type = LogLevel;
|
||||
|
||||
static std::string key();
|
||||
static void validate(const std::string&);
|
||||
static void validate(const PluginConfiguration&);
|
||||
static std::string defaultValue();
|
||||
static value_type parse(const std::string&);
|
||||
static details::Access access();
|
||||
static details::Category category();
|
||||
};
|
||||
|
||||
} // namespace vpu
|
@ -0,0 +1,142 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <memory>
|
||||
#include <vpu/parsed_config.hpp>
|
||||
|
||||
#include "ie_parameter.hpp"
|
||||
|
||||
#include "vpu/utils/logger.hpp"
|
||||
|
||||
namespace vpu {
|
||||
|
||||
class PluginConfiguration;
|
||||
|
||||
struct ConfigurationOptionConcept {
|
||||
virtual std::string key() const = 0;
|
||||
virtual void validate(const std::string&) const = 0;
|
||||
virtual void validate(const PluginConfiguration&) const = 0;
|
||||
virtual InferenceEngine::Parameter asParameter(const std::string&) const = 0;
|
||||
};
|
||||
|
||||
namespace details {
|
||||
|
||||
template<class Option>
|
||||
struct ConfigurationOptionModel : public ConfigurationOptionConcept {
|
||||
std::string key() const override { return Option::key(); }
|
||||
void validate(const std::string& value) const override { return Option::validate(value); }
|
||||
void validate(const PluginConfiguration& options) const override { Option::validate(options); }
|
||||
InferenceEngine::Parameter asParameter(const std::string& value) const override { return Option::asParameter(value); }
|
||||
};
|
||||
|
||||
enum class Deprecation {
|
||||
Off,
|
||||
On
|
||||
};
|
||||
|
||||
enum class Access {
|
||||
Private,
|
||||
Public
|
||||
};
|
||||
|
||||
enum class Category {
|
||||
CompileTime,
|
||||
RunTime
|
||||
};
|
||||
|
||||
class ConfigurationEntry {
|
||||
public:
|
||||
template<class Option>
|
||||
ConfigurationEntry(Option, details::Deprecation deprecation)
|
||||
: m_access(Option::access())
|
||||
, m_deprecation(deprecation)
|
||||
, m_category(Option::category())
|
||||
, m_value(std::make_shared<ConfigurationOptionModel<Option>>())
|
||||
{}
|
||||
|
||||
ConfigurationOptionConcept& get();
|
||||
const ConfigurationOptionConcept& get() const;
|
||||
|
||||
std::string key() const;
|
||||
bool isPrivate() const;
|
||||
bool isDeprecated() const;
|
||||
Category getCategory() const;
|
||||
|
||||
private:
|
||||
Access m_access = Access::Public;
|
||||
Deprecation m_deprecation = Deprecation::Off;
|
||||
Category m_category = Category::CompileTime;
|
||||
std::shared_ptr<ConfigurationOptionConcept> m_value;
|
||||
};
|
||||
|
||||
} // namespace details
|
||||
|
||||
// TODO: remove virtual inheritance once all options are migrated
|
||||
// it's needed to pass updated compilation config to graph transformer
|
||||
class PluginConfiguration : public virtual ParsedConfig {
|
||||
public:
|
||||
PluginConfiguration();
|
||||
|
||||
void from(const std::map<std::string, std::string>& config);
|
||||
void fromAtRuntime(const std::map<std::string, std::string>& config);
|
||||
std::unordered_set<std::string> getPublicKeys() const;
|
||||
bool supports(const std::string& key) const;
|
||||
|
||||
template<class Option>
|
||||
void registerOption() {
|
||||
const auto& key = Option::key();
|
||||
concepts.emplace(key, details::ConfigurationEntry(Option{}, details::Deprecation::Off));
|
||||
if (values.count(key) == 0) {
|
||||
// option could be registered more than once if there are deprecated versions of it
|
||||
values.emplace(key, Option::defaultValue());
|
||||
}
|
||||
}
|
||||
|
||||
template<class Option>
|
||||
void registerDeprecatedOption(const std::string& deprecatedKey) {
|
||||
const auto& key = Option::key();
|
||||
concepts.emplace(deprecatedKey, details::ConfigurationEntry(Option{}, details::Deprecation::On));
|
||||
if (values.count(key) == 0) {
|
||||
// option could be registered more than once if there are deprecated versions of it
|
||||
values.emplace(key, Option::defaultValue());
|
||||
}
|
||||
}
|
||||
|
||||
template<class Option>
|
||||
typename Option::value_type get() const {
|
||||
const auto& key = Option::key();
|
||||
validate(key);
|
||||
return Option::parse(values.at(key));
|
||||
}
|
||||
|
||||
void set(const std::string& key, const std::string& value);
|
||||
|
||||
const std::string& operator[](const std::string& key) const;
|
||||
|
||||
InferenceEngine::Parameter asParameter(const std::string& key) const;
|
||||
|
||||
virtual void validate() const;
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, details::ConfigurationEntry> concepts;
|
||||
std::unordered_map<std::string, std::string> values;
|
||||
|
||||
Logger::Ptr logger;
|
||||
|
||||
enum class Mode {
|
||||
Default,
|
||||
RunTime
|
||||
};
|
||||
void create(const std::map<std::string, std::string>& config, Mode mode = Mode::Default);
|
||||
|
||||
void validate(const std::string& key) const;
|
||||
};
|
||||
|
||||
} // namespace vpu
|
@ -0,0 +1,15 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace vpu {
|
||||
|
||||
const std::unordered_map<std::string, bool>& string2switch();
|
||||
const std::unordered_map<bool, std::string>& switch2string();
|
||||
|
||||
} // namespace vpu
|
@ -10,11 +10,11 @@
|
||||
#include <string>
|
||||
|
||||
#include <vpu/myriad_config.hpp>
|
||||
#include <vpu/configuration.hpp>
|
||||
#include <vpu/private_plugin_config.hpp>
|
||||
|
||||
#include <vpu/parsed_config_base.hpp>
|
||||
|
||||
#include <vpu/graph_transformer.hpp>
|
||||
#include <vpu/utils/perf_report.hpp>
|
||||
#include <vpu/utils/logger.hpp>
|
||||
#include <vpu/utils/enums.hpp>
|
||||
@ -23,6 +23,12 @@ namespace vpu {
|
||||
|
||||
class ParsedConfig : public ParsedConfigBase {
|
||||
public:
|
||||
ParsedConfig() = default;
|
||||
ParsedConfig(const ParsedConfig&) = default;
|
||||
ParsedConfig& operator=(const ParsedConfig&) = default;
|
||||
ParsedConfig(ParsedConfig&&) = delete;
|
||||
ParsedConfig& operator=(ParsedConfig&&) = delete;
|
||||
|
||||
const std::string& compilerLogFilePath() const {
|
||||
return _compilerLogFilePath;
|
||||
}
|
||||
@ -31,6 +37,10 @@ public:
|
||||
return _compileConfig;
|
||||
}
|
||||
|
||||
CompilationConfig& compileConfig() {
|
||||
return _compileConfig;
|
||||
}
|
||||
|
||||
bool printReceiveTensorTime() const {
|
||||
return _printReceiveTensorTime;
|
||||
}
|
@ -25,10 +25,6 @@ VPU_DECLARE_ENUM(ConfigMode,
|
||||
|
||||
class ParsedConfigBase {
|
||||
public:
|
||||
LogLevel logLevel() const {
|
||||
return _logLevel;
|
||||
}
|
||||
|
||||
bool exclusiveAsyncRequests() const {
|
||||
return _exclusiveAsyncRequests;
|
||||
}
|
||||
@ -37,11 +33,9 @@ public:
|
||||
ParsedConfigBase();
|
||||
virtual ~ParsedConfigBase();
|
||||
|
||||
void update(
|
||||
const std::map<std::string, std::string>& config,
|
||||
ConfigMode mode = ConfigMode::Any);
|
||||
|
||||
protected:
|
||||
void update(const std::map<std::string, std::string>& config, ConfigMode mode = ConfigMode::Any);
|
||||
|
||||
virtual const std::unordered_set<std::string>& getCompileOptions() const;
|
||||
virtual const std::unordered_set<std::string>& getRunTimeOptions() const;
|
||||
virtual const std::unordered_set<std::string>& getDeprecatedOptions() const;
|
||||
@ -130,7 +124,6 @@ protected:
|
||||
Logger::Ptr _log;
|
||||
|
||||
private:
|
||||
LogLevel _logLevel = LogLevel::None;
|
||||
bool _exclusiveAsyncRequests = false;
|
||||
};
|
||||
|
||||
|
@ -0,0 +1,40 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include "error.hpp"
|
||||
|
||||
namespace vpu {
|
||||
|
||||
template<class Key, class Value, template<class...> class Map>
|
||||
inline std::vector<Key> getKeys(const Map<Key, Value>& map) {
|
||||
auto keys = std::vector<Key>{};
|
||||
keys.reserve(map.size());
|
||||
std::transform(map.cbegin(), map.cend(), std::back_inserter(keys), [](const std::pair<Key, Value>& entry) { return entry.first; });
|
||||
return keys;
|
||||
}
|
||||
|
||||
template<class Key, class Value, template<class...> class Map>
|
||||
inline std::vector<Value> getValues(const Map<Key, Value>& map) {
|
||||
auto values = std::vector<Value>{};
|
||||
values.reserve(map.size());
|
||||
std::transform(map.cbegin(), map.cend(), std::back_inserter(values), [](const std::pair<Key, Value>& entry) { return entry.second; });
|
||||
return values;
|
||||
}
|
||||
|
||||
template<class Key, class Value, template<class...> class Map>
|
||||
inline Map<Value, Key> inverse(const Map<Key, Value>& map) {
|
||||
auto inverted = Map<Value, Key>{};
|
||||
for (const auto& entry : map) {
|
||||
const auto& insertion = inverted.emplace(entry.second, entry.first);
|
||||
VPU_THROW_UNLESS(insertion.second, "Could not invert map {} due to duplicated value \"{}\"", map, entry.second);
|
||||
}
|
||||
return inverted;
|
||||
}
|
||||
|
||||
} // namespace vpu
|
@ -29,6 +29,11 @@ public:
|
||||
using VPUException::VPUException;
|
||||
};
|
||||
|
||||
class UnsupportedConfigurationOptionException : public VPUException {
|
||||
public:
|
||||
using VPUException::VPUException;
|
||||
};
|
||||
|
||||
template <class Exception, typename... Args>
|
||||
void throwFormat(const char* fileName, int lineNumber, const char* messageFormat, Args&&... args) {
|
||||
IE_THROW(GeneralError) << '\n' << fileName << ':' << lineNumber << ' '
|
||||
@ -47,13 +52,20 @@ void throwFormat(const char* fileName, int lineNumber, const char* messageFormat
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define VPU_THROW_UNSUPPORTED_UNLESS(condition, ...) \
|
||||
#define VPU_THROW_UNSUPPORTED_LAYER_UNLESS(condition, ...) \
|
||||
do { \
|
||||
if (!(condition)) { \
|
||||
::vpu::details::throwFormat<::vpu::details::UnsupportedLayerException>(__FILE__, __LINE__, __VA_ARGS__); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define VPU_THROW_UNSUPPORTED_OPTION_UNLESS(condition, ...) \
|
||||
do { \
|
||||
if (!(condition)) { \
|
||||
::vpu::details::throwFormat<::vpu::details::UnsupportedConfigurationOptionException>(__FILE__, __LINE__, __VA_ARGS__); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#ifdef NDEBUG
|
||||
# define VPU_INTERNAL_CHECK(condition, ...) \
|
||||
do { \
|
||||
|
@ -0,0 +1,21 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "vpu/utils/enums.hpp"
|
||||
|
||||
namespace vpu {
|
||||
|
||||
VPU_DECLARE_ENUM(LogLevel,
|
||||
None,
|
||||
Fatal, /* used for very severe error events that will most probably cause the application to terminate */
|
||||
Error, /* reporting events which are not expected during normal execution, containing probable reason */
|
||||
Warning, /* indicating events which are not usual and might lead to errors later */
|
||||
Info, /* short enough messages about ongoing activity in the process */
|
||||
Debug, /* more fine-grained messages with references to particular data and explanations */
|
||||
Trace /* involved and detailed information about execution, helps to trace the execution flow, produces huge output */
|
||||
)
|
||||
|
||||
} // namespace vpu
|
@ -13,6 +13,7 @@
|
||||
#include <vpu/utils/enums.hpp>
|
||||
#include <vpu/utils/auto_scope.hpp>
|
||||
#include <vpu/utils/io.hpp>
|
||||
#include <vpu/utils/log_level.hpp>
|
||||
|
||||
namespace vpu {
|
||||
|
||||
@ -39,20 +40,6 @@ OutputStream::Ptr fileOutput(const std::string& fileName);
|
||||
|
||||
OutputStream::Ptr defaultOutput(const std::string& fileName = std::string());
|
||||
|
||||
//
|
||||
// Logger
|
||||
//
|
||||
|
||||
VPU_DECLARE_ENUM(LogLevel,
|
||||
None,
|
||||
Fatal, /* used for very severe error events that will most probably cause the application to terminate */
|
||||
Error, /* reporting events which are not expected during normal execution, containing probable reason */
|
||||
Warning, /* indicating events which are not usual and might lead to errors later */
|
||||
Info, /* short enough messages about ongoing activity in the process */
|
||||
Debug, /* more fine-grained messages with references to particular data and explanations */
|
||||
Trace /* involved and detailed information about execution, helps to trace the execution flow, produces huge output */
|
||||
)
|
||||
|
||||
class Logger final {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<Logger>;
|
||||
|
@ -0,0 +1,10 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vpu/configuration/as_parameter_enabler.hpp>
|
||||
|
||||
InferenceEngine::Parameter AsParameterEnabler::asParameter(const std::string& value) {
|
||||
return {value};
|
||||
}
|
||||
|
@ -0,0 +1,45 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "vpu/private_plugin_config.hpp"
|
||||
#include "vpu/utils/containers.hpp"
|
||||
#include "vpu/configuration/options/copy_optimization.hpp"
|
||||
#include "vpu/configuration/switch_converters.hpp"
|
||||
#include "vpu/configuration/plugin_configuration.hpp"
|
||||
|
||||
namespace vpu {
|
||||
|
||||
void CopyOptimizationOption::validate(const std::string& value) {
|
||||
const auto& converters = string2switch();
|
||||
VPU_THROW_UNLESS(converters.count(value) != 0, R"(unexpected copy optimization option value "{}", only {} are supported)", value, getKeys(converters));
|
||||
}
|
||||
|
||||
void CopyOptimizationOption::validate(const PluginConfiguration& configuration) {
|
||||
validate(configuration[key()]);
|
||||
}
|
||||
|
||||
std::string CopyOptimizationOption::key() {
|
||||
return InferenceEngine::MYRIAD_COPY_OPTIMIZATION;
|
||||
}
|
||||
|
||||
details::Access CopyOptimizationOption::access() {
|
||||
return details::Access::Private;
|
||||
}
|
||||
|
||||
details::Category CopyOptimizationOption::category() {
|
||||
return details::Category::CompileTime;
|
||||
}
|
||||
|
||||
std::string CopyOptimizationOption::defaultValue() {
|
||||
return InferenceEngine::PluginConfigParams::YES;
|
||||
}
|
||||
|
||||
CopyOptimizationOption::value_type CopyOptimizationOption::parse(const std::string& value) {
|
||||
const auto& converters = string2switch();
|
||||
VPU_THROW_UNSUPPORTED_OPTION_UNLESS(converters.count(value) != 0, R"(unexpected copy optimization option value "{}", only {} are supported)",
|
||||
value, getKeys(converters));
|
||||
return converters.at(value);
|
||||
}
|
||||
|
||||
} // namespace vpu
|
@ -0,0 +1,64 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "vpu/configuration/options/log_level.hpp"
|
||||
#include "vpu/utils/log_level.hpp"
|
||||
#include "vpu/utils/containers.hpp"
|
||||
#include "vpu/configuration/plugin_configuration.hpp"
|
||||
|
||||
#include "ie_plugin_config.hpp"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace vpu {
|
||||
|
||||
namespace {
|
||||
|
||||
const std::unordered_map<std::string, LogLevel>& string2level() {
|
||||
static const std::unordered_map<std::string, LogLevel> converters = {
|
||||
{CONFIG_VALUE(LOG_NONE), LogLevel::None},
|
||||
{CONFIG_VALUE(LOG_ERROR), LogLevel::Error},
|
||||
{CONFIG_VALUE(LOG_WARNING), LogLevel::Warning},
|
||||
{CONFIG_VALUE(LOG_INFO), LogLevel::Info},
|
||||
{CONFIG_VALUE(LOG_DEBUG), LogLevel::Debug},
|
||||
{CONFIG_VALUE(LOG_TRACE), LogLevel::Trace},
|
||||
};
|
||||
return converters;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void LogLevelOption::validate(const std::string& value) {
|
||||
const auto& converters = string2level();
|
||||
VPU_THROW_UNLESS(converters.count(value) != 0, R"(unexpected log level option value "{}", only {} are supported)", value, getKeys(converters));
|
||||
}
|
||||
|
||||
void LogLevelOption::validate(const PluginConfiguration& configuration) {
|
||||
validate(configuration[key()]);
|
||||
}
|
||||
|
||||
std::string LogLevelOption::key() {
|
||||
return InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL;
|
||||
}
|
||||
|
||||
details::Access LogLevelOption::access() {
|
||||
return details::Access::Public;
|
||||
}
|
||||
|
||||
details::Category LogLevelOption::category() {
|
||||
return details::Category::CompileTime;
|
||||
}
|
||||
|
||||
std::string LogLevelOption::defaultValue() {
|
||||
return InferenceEngine::PluginConfigParams::LOG_NONE;
|
||||
}
|
||||
|
||||
LogLevelOption::value_type LogLevelOption::parse(const std::string& value) {
|
||||
const auto& converters = string2level();
|
||||
VPU_THROW_UNSUPPORTED_OPTION_UNLESS(converters.count(value) != 0, R"(unexpected log level option value "{}", only {} are supported)",
|
||||
value, getKeys(converters));
|
||||
return converters.at(value);
|
||||
}
|
||||
|
||||
} // namespace vpu
|
@ -0,0 +1,114 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "vpu/utils/error.hpp"
|
||||
#include "vpu/configuration/plugin_configuration.hpp"
|
||||
|
||||
#include "ie_plugin_config.hpp"
|
||||
|
||||
namespace vpu {
|
||||
|
||||
namespace details {
|
||||
|
||||
ConfigurationOptionConcept& ConfigurationEntry::get() {
|
||||
return *m_value;
|
||||
}
|
||||
|
||||
const ConfigurationOptionConcept& ConfigurationEntry::get() const {
|
||||
return *m_value;
|
||||
}
|
||||
|
||||
bool ConfigurationEntry::isPrivate() const {
|
||||
return m_access == Access::Private;
|
||||
}
|
||||
|
||||
bool ConfigurationEntry::isDeprecated() const {
|
||||
return m_deprecation == Deprecation::On;
|
||||
}
|
||||
|
||||
Category ConfigurationEntry::getCategory() const {
|
||||
return m_category;
|
||||
}
|
||||
|
||||
std::string ConfigurationEntry::key() const {
|
||||
return m_value->key();
|
||||
}
|
||||
|
||||
} // namespace details
|
||||
|
||||
PluginConfiguration::PluginConfiguration() : logger(std::make_shared<Logger>("Configuration", LogLevel::Warning, consoleOutput())) {}
|
||||
|
||||
|
||||
std::unordered_set<std::string> PluginConfiguration::getPublicKeys() const {
|
||||
auto publicKeys = std::unordered_set<std::string>{};
|
||||
for (const auto& entry : concepts) {
|
||||
const auto& key = entry.first;
|
||||
const auto& option = entry.second;
|
||||
if (option.isPrivate()) {
|
||||
continue;
|
||||
}
|
||||
publicKeys.insert(key);
|
||||
}
|
||||
return publicKeys;
|
||||
}
|
||||
|
||||
bool PluginConfiguration::supports(const std::string& key) const {
|
||||
return concepts.count(key) != 0;
|
||||
}
|
||||
|
||||
void PluginConfiguration::from(const std::map<std::string, std::string>& config) {
|
||||
create(config);
|
||||
}
|
||||
|
||||
void PluginConfiguration::fromAtRuntime(const std::map<std::string, std::string>& config) {
|
||||
create(config, Mode::RunTime);
|
||||
}
|
||||
|
||||
void PluginConfiguration::validate() const {
|
||||
for (const auto& option : concepts) {
|
||||
option.second.get().validate(*this);
|
||||
}
|
||||
}
|
||||
|
||||
void PluginConfiguration::create(const std::map<std::string, std::string>& config, Mode mode) {
|
||||
for (const auto& entry : config) {
|
||||
const auto& key = entry.first;
|
||||
validate(key);
|
||||
|
||||
const auto& optionConcept = concepts.at(key);
|
||||
if (mode == Mode::RunTime && optionConcept.getCategory() == details::Category::CompileTime) {
|
||||
logger->warning("Configuration option \"{}\" is used after network is loaded. Its value is going to be ignored.", key);
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& value = entry.second;
|
||||
set(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::Parameter PluginConfiguration::asParameter(const std::string& key) const {
|
||||
const auto& value = operator[](key);
|
||||
return concepts.at(key).get().asParameter(value);
|
||||
}
|
||||
|
||||
void PluginConfiguration::validate(const std::string& key) const {
|
||||
VPU_THROW_UNSUPPORTED_OPTION_UNLESS(supports(key), "Encountered an unsupported key {}, supported keys are {}", key, getPublicKeys());
|
||||
if (concepts.at(key).isDeprecated()) {
|
||||
logger->warning("Encountered deprecated option {} usage, consider replacing it with {} option", key, concepts.at(key).key());
|
||||
}
|
||||
}
|
||||
|
||||
const std::string& PluginConfiguration::operator[](const std::string& key) const {
|
||||
validate(key);
|
||||
return values.at(concepts.at(key).key());
|
||||
}
|
||||
|
||||
void PluginConfiguration::set(const std::string& key, const std::string& value) {
|
||||
validate(key);
|
||||
const auto& optionConcept = concepts.at(key).get();
|
||||
optionConcept.validate(value);
|
||||
values[optionConcept.key()] = value;
|
||||
}
|
||||
|
||||
} // namespace vpu
|
@ -0,0 +1,25 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "vpu/utils/containers.hpp"
|
||||
#include "vpu/configuration/switch_converters.hpp"
|
||||
|
||||
#include "ie_plugin_config.hpp"
|
||||
|
||||
namespace vpu {
|
||||
|
||||
const std::unordered_map<std::string, bool>& string2switch() {
|
||||
static const std::unordered_map<std::string, bool> converters = {
|
||||
{CONFIG_VALUE(NO), false},
|
||||
{CONFIG_VALUE(YES), true}
|
||||
};
|
||||
return converters;
|
||||
}
|
||||
|
||||
const std::unordered_map<bool, std::string>& switch2string() {
|
||||
static const auto converters = inverse(string2switch());
|
||||
return converters;
|
||||
}
|
||||
|
||||
} // namespace vpu
|
@ -169,7 +169,6 @@ void ParsedConfig::parse(const std::map<std::string, std::string>& config) {
|
||||
setOption(_compileConfig.dumpAllPasses, switches, config, ie::MYRIAD_DUMP_ALL_PASSES);
|
||||
|
||||
setOption(_compileConfig.detectBatch, switches, config, ie::MYRIAD_DETECT_NETWORK_BATCH);
|
||||
setOption(_compileConfig.copyOptimization, switches, config, ie::MYRIAD_COPY_OPTIMIZATION);
|
||||
setOption(_compileConfig.packDataInCmx, switches, config, ie::MYRIAD_PACK_DATA_IN_CMX);
|
||||
setOption(_compileConfig.ignoreUnknownLayers, switches, config, ie::MYRIAD_IGNORE_UNKNOWN_LAYERS);
|
||||
setOption(_compileConfig.hwOptimization, switches, config, ie::MYRIAD_ENABLE_HW_ACCELERATION);
|
@ -59,13 +59,7 @@ void ParsedConfigBase::update(
|
||||
}
|
||||
|
||||
const std::unordered_set<std::string>& ParsedConfigBase::getCompileOptions() const {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
static const std::unordered_set<std::string> options = {
|
||||
CONFIG_KEY(LOG_LEVEL),
|
||||
VPU_CONFIG_KEY(LOG_LEVEL)
|
||||
};
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
static const std::unordered_set<std::string> options;
|
||||
return options;
|
||||
}
|
||||
|
||||
@ -73,8 +67,6 @@ const std::unordered_set<std::string>& ParsedConfigBase::getRunTimeOptions() con
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
static const std::unordered_set<std::string> options = {
|
||||
CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS),
|
||||
CONFIG_KEY(LOG_LEVEL),
|
||||
VPU_CONFIG_KEY(LOG_LEVEL)
|
||||
};
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
@ -82,37 +74,12 @@ IE_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
|
||||
const std::unordered_set<std::string>& ParsedConfigBase::getDeprecatedOptions() const {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
static const std::unordered_set<std::string> options = {
|
||||
VPU_CONFIG_KEY(LOG_LEVEL)
|
||||
};
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
static const std::unordered_set<std::string> options;
|
||||
return options;
|
||||
}
|
||||
|
||||
void ParsedConfigBase::parse(const std::map<std::string, std::string>& config) {
|
||||
static const std::unordered_map<std::string, LogLevel> logLevels = {
|
||||
{ CONFIG_VALUE(LOG_NONE), LogLevel::None },
|
||||
{ CONFIG_VALUE(LOG_ERROR), LogLevel::Error },
|
||||
{ CONFIG_VALUE(LOG_WARNING), LogLevel::Warning },
|
||||
{ CONFIG_VALUE(LOG_INFO), LogLevel::Info },
|
||||
{ CONFIG_VALUE(LOG_DEBUG), LogLevel::Debug },
|
||||
{ CONFIG_VALUE(LOG_TRACE), LogLevel::Trace }
|
||||
};
|
||||
|
||||
setOption(_logLevel, logLevels, config, CONFIG_KEY(LOG_LEVEL));
|
||||
setOption(_exclusiveAsyncRequests, switches, config, CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS));
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
setOption(_logLevel, logLevels, config, VPU_CONFIG_KEY(LOG_LEVEL));
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (const auto envVar = std::getenv("IE_VPU_LOG_LEVEL")) {
|
||||
_logLevel = logLevels.at(envVar);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
std::unordered_set<std::string> ParsedConfigBase::merge(
|
||||
|
@ -48,8 +48,13 @@ function(add_graph_transformer_target TARGET_NAME STATIC_IE)
|
||||
target_link_libraries(${TARGET_NAME} PUBLIC pugixml vpu_common_lib)
|
||||
endif()
|
||||
|
||||
target_link_libraries(${TARGET_NAME} PUBLIC ${NGRAPH_LIBRARIES}
|
||||
PRIVATE openvino::itt)
|
||||
target_link_libraries(${TARGET_NAME}
|
||||
PUBLIC
|
||||
${NGRAPH_LIBRARIES}
|
||||
PRIVATE
|
||||
openvino::itt
|
||||
mvnc # TODO: remove once all options are migrated
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
target_compile_definitions(${TARGET_NAME} PRIVATE NOMINMAX)
|
||||
|
@ -8,28 +8,29 @@
|
||||
#include <vpu/model/model.hpp>
|
||||
#include <vpu/utils/logger.hpp>
|
||||
#include <vpu/utils/profiling.hpp>
|
||||
#include <mvnc.h>
|
||||
|
||||
namespace vpu {
|
||||
|
||||
struct DeviceResources {
|
||||
static int numShaves(const Platform& platform);
|
||||
static int numSlices(const Platform& platform);
|
||||
static int numShaves(const ncDevicePlatform_t& platform);
|
||||
static int numSlices(const ncDevicePlatform_t& platform);
|
||||
static int numStreams();
|
||||
};
|
||||
|
||||
struct DefaultAllocation {
|
||||
static int numStreams(const Platform& platform, const CompilationConfig& configuration);
|
||||
static int numSlices(const Platform& platform, int numStreams);
|
||||
static int numShaves(const Platform& platform, int numStreams, int numSlices);
|
||||
static int numStreams(const ncDevicePlatform_t& platform, const PluginConfiguration& configuration);
|
||||
static int numSlices(const ncDevicePlatform_t& platform, int numStreams);
|
||||
static int numShaves(const ncDevicePlatform_t& platform, int numStreams, int numSlices);
|
||||
static int tilingCMXLimit(int numSlices);
|
||||
};
|
||||
|
||||
struct CompileEnv final {
|
||||
public:
|
||||
Platform platform;
|
||||
ncDevicePlatform_t platform;
|
||||
Resources resources;
|
||||
|
||||
CompilationConfig config;
|
||||
PluginConfiguration config;
|
||||
|
||||
Logger::Ptr log;
|
||||
|
||||
@ -49,14 +50,14 @@ public:
|
||||
static const CompileEnv* getOrNull();
|
||||
|
||||
static void init(
|
||||
Platform platform,
|
||||
const CompilationConfig& config,
|
||||
const Logger::Ptr& log);
|
||||
static void updateConfig(const CompilationConfig& config);
|
||||
ncDevicePlatform_t platform,
|
||||
const PluginConfiguration& config,
|
||||
const Logger::Ptr& log);
|
||||
static void updateConfig(const PluginConfiguration& config);
|
||||
static void free();
|
||||
|
||||
private:
|
||||
explicit CompileEnv(Platform platform);
|
||||
explicit CompileEnv(ncDevicePlatform_t platform);
|
||||
};
|
||||
|
||||
} // namespace vpu
|
||||
|
@ -21,108 +21,14 @@
|
||||
#include <vpu/utils/perf_report.hpp>
|
||||
#include <vpu/utils/logger.hpp>
|
||||
#include <vpu/utils/optional.hpp>
|
||||
#include <vpu/configuration/plugin_configuration.hpp>
|
||||
|
||||
#include "mvnc.h"
|
||||
|
||||
namespace vpu {
|
||||
|
||||
namespace ie = InferenceEngine;
|
||||
|
||||
//
|
||||
// CompilationConfig
|
||||
//
|
||||
|
||||
VPU_DECLARE_ENUM(Platform,
|
||||
MYRIAD_2 = 2450,
|
||||
MYRIAD_X = 2480,
|
||||
)
|
||||
|
||||
struct CompilationConfig final {
|
||||
//
|
||||
// Compilation options
|
||||
//
|
||||
|
||||
int numSHAVEs = -1;
|
||||
int numCMXSlices = -1;
|
||||
int numExecutors = -1;
|
||||
int tilingCMXLimitKB = -1;
|
||||
|
||||
bool hwOptimization = true;
|
||||
bool hwExtraSplit = false;
|
||||
|
||||
std::string irWithVpuScalesDir;
|
||||
|
||||
std::string customLayers;
|
||||
|
||||
bool detectBatch = true;
|
||||
|
||||
Optional<bool> copyOptimization;
|
||||
Optional<bool> injectSwOps;
|
||||
Optional<bool> packDataInCmx;
|
||||
bool mergeHwPoolToConv = true;
|
||||
bool hwDilation = false;
|
||||
bool forceDeprecatedCnnConversion = false;
|
||||
bool enableEarlyEltwiseReLUFusion = true;
|
||||
|
||||
std::map<std::string, std::vector<int>> ioStrides;
|
||||
|
||||
//
|
||||
// Debug options
|
||||
//
|
||||
|
||||
ie::details::caseless_set<std::string> hwWhiteList;
|
||||
ie::details::caseless_set<std::string> hwBlackList;
|
||||
|
||||
bool hwDisabled(const std::string& layerName) const {
|
||||
if (!hwWhiteList.empty()) {
|
||||
return hwWhiteList.count(layerName) == 0;
|
||||
}
|
||||
|
||||
if (!hwBlackList.empty()) {
|
||||
return hwBlackList.count(layerName) != 0;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
ie::details::caseless_set<std::string> noneLayers;
|
||||
|
||||
bool skipAllLayers() const {
|
||||
if (noneLayers.size() == 1) {
|
||||
const auto& val = *noneLayers.begin();
|
||||
return val == "*";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool skipLayerType(const std::string& layerType) const {
|
||||
return noneLayers.count(layerType) != 0;
|
||||
}
|
||||
bool ignoreUnknownLayers = false;
|
||||
|
||||
std::string dumpInternalGraphFileName;
|
||||
std::string dumpInternalGraphDirectory;
|
||||
bool dumpAllPasses;
|
||||
|
||||
bool disableReorder = false; // TODO: rename to enableReorder and switch logic.
|
||||
bool disableConvertStages = false;
|
||||
bool enablePermuteMerging = true;
|
||||
bool enableReplWithSCRelu = false;
|
||||
bool enableReplaceWithReduceMean = true;
|
||||
bool enableTensorIteratorUnrolling = false;
|
||||
bool forcePureTensorIterator = false;
|
||||
bool enableMemoryTypesAnnotation = false;
|
||||
bool enableWeightsAnalysis = true;
|
||||
bool checkPreprocessingInsideModel = true;
|
||||
bool enableCustomReshapeParam = false;
|
||||
|
||||
//
|
||||
// Deprecated options
|
||||
//
|
||||
|
||||
float inputScale = 1.0f;
|
||||
float inputBias = 0.0f;
|
||||
};
|
||||
|
||||
|
||||
//
|
||||
// DataInfo
|
||||
//
|
||||
@ -165,17 +71,17 @@ struct CompiledGraph final {
|
||||
// compileNetwork
|
||||
//
|
||||
|
||||
CompiledGraph::Ptr compileNetwork(const ie::CNNNetwork& network, Platform platform, const CompilationConfig& config, const Logger::Ptr& log,
|
||||
const ie::ICore* core);
|
||||
CompiledGraph::Ptr compileNetwork(const ie::CNNNetwork& network, ncDevicePlatform_t platform, const PluginConfiguration& config, const Logger::Ptr& log,
|
||||
const ie::ICore* core);
|
||||
|
||||
CompiledGraph::Ptr compileSubNetwork(const ie::CNNNetwork& network, const CompilationConfig& subConfig, const ie::ICore* core);
|
||||
CompiledGraph::Ptr compileSubNetwork(const ie::CNNNetwork& network, const PluginConfiguration& subConfig, const ie::ICore* core);
|
||||
|
||||
//
|
||||
// getSupportedLayers
|
||||
//
|
||||
|
||||
std::set<std::string> getSupportedLayers(const ie::CNNNetwork& network, Platform platform, const CompilationConfig& config, const Logger::Ptr& log,
|
||||
const ie::ICore* core);
|
||||
std::set<std::string> getSupportedLayers(const ie::CNNNetwork& network, ncDevicePlatform_t platform, const PluginConfiguration& config, const Logger::Ptr& log,
|
||||
const ie::ICore* core);
|
||||
|
||||
//
|
||||
// Blob version and checks
|
||||
|
@ -12,8 +12,8 @@ namespace vpu {
|
||||
|
||||
CompiledGraph::Ptr compileModel(
|
||||
const Model& model,
|
||||
Platform platform,
|
||||
const CompilationConfig& config,
|
||||
ncDevicePlatform_t platform,
|
||||
const PluginConfiguration& config,
|
||||
const Logger::Ptr& log);
|
||||
|
||||
} // namespace vpu
|
||||
|
@ -85,12 +85,12 @@ void BackEnd::dumpModel(
|
||||
|
||||
std::string fileName;
|
||||
|
||||
if (!env.config.dumpInternalGraphFileName.empty()) {
|
||||
fileName = fileNameNoExt(env.config.dumpInternalGraphFileName);
|
||||
} else if (!env.config.dumpInternalGraphDirectory.empty()) {
|
||||
if (!env.config.compileConfig().dumpInternalGraphFileName.empty()) {
|
||||
fileName = fileNameNoExt(env.config.compileConfig().dumpInternalGraphFileName);
|
||||
} else if (!env.config.compileConfig().dumpInternalGraphDirectory.empty()) {
|
||||
fileName = formatString(
|
||||
"%s/vpu_graph_%f%f%i_%s",
|
||||
env.config.dumpInternalGraphDirectory,
|
||||
env.config.compileConfig().dumpInternalGraphDirectory,
|
||||
std::setw(2), std::setfill('0'),
|
||||
model->attrs().get<int>("index"),
|
||||
replaceBadCharacters(model->name()));
|
||||
@ -99,7 +99,7 @@ void BackEnd::dumpModel(
|
||||
}
|
||||
|
||||
if (!postfix.empty()) {
|
||||
if (!env.config.dumpAllPasses) {
|
||||
if (!env.config.compileConfig().dumpAllPasses) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,7 @@ void FrontEnd::detectNetworkBatch(
|
||||
using PrecisionsMap = std::map<std::string, ie::Precision>;
|
||||
const auto& env = CompileEnv::get();
|
||||
|
||||
if (!env.config.detectBatch) {
|
||||
if (!env.config.compileConfig().detectBatch) {
|
||||
// skip batch extraction step and go as is
|
||||
return;
|
||||
}
|
||||
|
@ -436,7 +436,7 @@ void FrontEnd::processTrivialCases(const Model& model) {
|
||||
void FrontEnd::defaultOnUnsupportedLayerCallback(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs,
|
||||
const std::string& extraMessage) {
|
||||
const auto& env = CompileEnv::get();
|
||||
VPU_THROW_UNSUPPORTED_UNLESS(env.config.ignoreUnknownLayers, "Failed to compile layer \"%v\": %v", layer->name, extraMessage);
|
||||
VPU_THROW_UNSUPPORTED_LAYER_UNLESS(env.config.compileConfig().ignoreUnknownLayers, "Failed to compile layer \"%v\": %v", layer->name, extraMessage);
|
||||
_stageBuilder->addNoneStage(model, layer->name, layer, inputs, outputs);
|
||||
}
|
||||
|
||||
@ -466,15 +466,15 @@ ModelPtr FrontEnd::runCommonPasses(ie::CNNNetwork network,
|
||||
// Parse custom layers
|
||||
//
|
||||
|
||||
if (!env.config.customLayers.empty()) {
|
||||
env.log->trace("Parse custom layers : %s", env.config.customLayers);
|
||||
if (!env.config.compileConfig().customLayers.empty()) {
|
||||
env.log->trace("Parse custom layers : %s", env.config.compileConfig().customLayers);
|
||||
VPU_LOGGER_SECTION(env.log);
|
||||
|
||||
if (env.platform != Platform::MYRIAD_X) {
|
||||
if (env.platform != ncDevicePlatform_t::NC_MYRIAD_X) {
|
||||
VPU_THROW_FORMAT("Custom layers are not supported for %v platforms", env.platform);
|
||||
}
|
||||
|
||||
_customLayers = CustomLayer::loadFromFile(env.config.customLayers);
|
||||
_customLayers = CustomLayer::loadFromFile(env.config.compileConfig().customLayers);
|
||||
}
|
||||
|
||||
//
|
||||
@ -494,7 +494,7 @@ ModelPtr FrontEnd::runCommonPasses(ie::CNNNetwork network,
|
||||
env.log->trace("Update IE Network");
|
||||
VPU_LOGGER_SECTION(env.log);
|
||||
|
||||
if (network.getFunction() && env.config.forceDeprecatedCnnConversion) {
|
||||
if (network.getFunction() && env.config.compileConfig().forceDeprecatedCnnConversion) {
|
||||
network = convertNetwork(network);
|
||||
}
|
||||
|
||||
@ -545,7 +545,7 @@ ModelPtr FrontEnd::runCommonPasses(ie::CNNNetwork network,
|
||||
|
||||
processTrivialCases(model);
|
||||
|
||||
if (!CompileEnv::get().config.disableConvertStages) {
|
||||
if (!CompileEnv::get().config.compileConfig().disableConvertStages) {
|
||||
addDataTypeConvertStages(model);
|
||||
}
|
||||
|
||||
@ -567,7 +567,7 @@ ModelPtr FrontEnd::runCommonPasses(ie::CNNNetwork network,
|
||||
|
||||
getInputAndOutputData(model, layer, inputs, outputs);
|
||||
|
||||
if (env.config.skipAllLayers() || env.config.skipLayerType(layer->type)) {
|
||||
if (env.config.compileConfig().skipAllLayers() || env.config.compileConfig().skipLayerType(layer->type)) {
|
||||
_stageBuilder->addNoneStage(model, layer->name, layer, inputs, outputs);
|
||||
supportedLayer(layer);
|
||||
continue;
|
||||
|
@ -22,7 +22,7 @@ void FrontEnd::addDataTypeConvertStages(const Model& model) {
|
||||
env.log->trace("Add Data type conversion stages");
|
||||
VPU_LOGGER_SECTION(env.log);
|
||||
|
||||
const bool hasScaleBias = env.config.inputScale != 1.0f || env.config.inputBias != 0.0f;
|
||||
const bool hasScaleBias = env.config.compileConfig().inputScale != 1.0f || env.config.compileConfig().inputBias != 0.0f;
|
||||
|
||||
for (const auto& input : model->datas()) {
|
||||
if (input->usage() != DataUsage::Input) {
|
||||
@ -38,11 +38,11 @@ void FrontEnd::addDataTypeConvertStages(const Model& model) {
|
||||
env.log->trace("Apply deprecated scale/bias parameters");
|
||||
|
||||
std::ostringstream postfix;
|
||||
if (env.config.inputScale != 1.0f) {
|
||||
postfix << "@SCALE=" << InferenceEngine::CNNLayer::ie_serialize_float(env.config.inputScale);
|
||||
if (env.config.compileConfig().inputScale != 1.0f) {
|
||||
postfix << "@SCALE=" << InferenceEngine::CNNLayer::ie_serialize_float(env.config.compileConfig().inputScale);
|
||||
}
|
||||
if (env.config.inputBias != 0.0f) {
|
||||
postfix << "@BIAS=" << InferenceEngine::CNNLayer::ie_serialize_float(env.config.inputBias);
|
||||
if (env.config.compileConfig().inputBias != 0.0f) {
|
||||
postfix << "@BIAS=" << InferenceEngine::CNNLayer::ie_serialize_float(env.config.compileConfig().inputBias);
|
||||
}
|
||||
|
||||
const auto scaledInput = model->duplicateData(
|
||||
@ -55,9 +55,9 @@ void FrontEnd::addDataTypeConvertStages(const Model& model) {
|
||||
model,
|
||||
scaledInput->name(),
|
||||
nullptr,
|
||||
env.config.inputScale,
|
||||
env.config.compileConfig().inputScale,
|
||||
1.0f,
|
||||
env.config.inputBias,
|
||||
env.config.compileConfig().inputBias,
|
||||
input,
|
||||
scaledInput);
|
||||
}
|
||||
@ -89,8 +89,8 @@ void FrontEnd::addDataTypeConvertStages(const Model& model) {
|
||||
inputFP16->name(),
|
||||
input,
|
||||
inputFP16,
|
||||
env.config.inputScale,
|
||||
env.config.inputBias);
|
||||
env.config.compileConfig().inputScale,
|
||||
env.config.compileConfig().inputBias);
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -25,8 +25,8 @@ void FrontEnd::parseInputAndOutputData(const Model& model) {
|
||||
VPU_LOGGER_SECTION(env.log);
|
||||
|
||||
const auto parseIOStrides = [&env](const std::string& name, const Data& data) {
|
||||
const auto& match = env.config.ioStrides.find(name);
|
||||
if (match == env.config.ioStrides.end()) {
|
||||
const auto& match = env.config.compileConfig().ioStrides.find(name);
|
||||
if (match == env.config.compileConfig().ioStrides.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,7 @@ void FrontEnd::unrollLoops(ie::CNNNetwork& network) {
|
||||
env.log->trace("Unroll TensorIterator loops");
|
||||
VPU_LOGGER_SECTION(env.log);
|
||||
|
||||
if (!env.config.irWithVpuScalesDir.empty()) {
|
||||
if (!env.config.compileConfig().irWithVpuScalesDir.empty()) {
|
||||
// TODO: Scale dumps does not work with IR, which contain Tensor Iterator layers, because we cannot serialize them. #-23429
|
||||
for (auto iterator = ie::details::CNNNetworkIterator(network); iterator != ie::details::CNNNetworkIterator(); ++iterator) {
|
||||
const auto& layer = *iterator;
|
||||
@ -30,11 +30,11 @@ void FrontEnd::unrollLoops(ie::CNNNetwork& network) {
|
||||
}
|
||||
}
|
||||
|
||||
if (env.config.forcePureTensorIterator) {
|
||||
if (env.config.compileConfig().forcePureTensorIterator) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (env.config.enableTensorIteratorUnrolling) {
|
||||
if (env.config.compileConfig().enableTensorIteratorUnrolling) {
|
||||
ie::NetPass::UnrollTI(network);
|
||||
} else {
|
||||
// Try to convert network to a RNN sequence due to performance reasons
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include <vpu/utils/auto_scope.hpp>
|
||||
#include <vpu/utils/dot_io.hpp>
|
||||
#include <vpu/utils/file_system.hpp>
|
||||
#include <mvnc.h>
|
||||
|
||||
namespace vpu {
|
||||
|
||||
@ -55,7 +56,7 @@ thread_local CompileEnv* g_compileEnv = nullptr;
|
||||
|
||||
} // namespace
|
||||
|
||||
CompileEnv::CompileEnv(Platform platform) : platform(platform) {}
|
||||
CompileEnv::CompileEnv(ncDevicePlatform_t platform) : platform(platform) {}
|
||||
|
||||
const CompileEnv& CompileEnv::get() {
|
||||
IE_ASSERT(g_compileEnv != nullptr);
|
||||
@ -70,7 +71,7 @@ const CompileEnv* CompileEnv::getOrNull() {
|
||||
return g_compileEnv;
|
||||
}
|
||||
|
||||
void CompileEnv::init(Platform platform, const CompilationConfig& config, const Logger::Ptr& log) {
|
||||
void CompileEnv::init(ncDevicePlatform_t platform, const PluginConfiguration& config, const Logger::Ptr& log) {
|
||||
g_compileEnv = new CompileEnv(platform);
|
||||
g_compileEnv->config = config;
|
||||
g_compileEnv->log = log;
|
||||
@ -79,31 +80,37 @@ void CompileEnv::init(Platform platform, const CompilationConfig& config, const
|
||||
g_compileEnv->profile.setLogger(log);
|
||||
#endif
|
||||
|
||||
if (platform == Platform::MYRIAD_2) {
|
||||
g_compileEnv->config.hwOptimization = false;
|
||||
if (platform == ncDevicePlatform_t::NC_MYRIAD_2) {
|
||||
g_compileEnv->config.compileConfig().hwOptimization = false;
|
||||
}
|
||||
|
||||
VPU_THROW_UNLESS(g_compileEnv->config.numSHAVEs <= g_compileEnv->config.numCMXSlices,
|
||||
VPU_THROW_UNLESS(g_compileEnv->config.compileConfig().numSHAVEs <= g_compileEnv->config.compileConfig().numCMXSlices,
|
||||
R"(Value of configuration option ("{}") must be not greater than value of configuration option ("{}"), but {} > {} are provided)",
|
||||
ie::MYRIAD_NUMBER_OF_SHAVES, ie::MYRIAD_NUMBER_OF_CMX_SLICES, config.numSHAVEs, config.numCMXSlices);
|
||||
ie::MYRIAD_NUMBER_OF_SHAVES, ie::MYRIAD_NUMBER_OF_CMX_SLICES, config.compileConfig().numSHAVEs, config.compileConfig().numCMXSlices);
|
||||
|
||||
const auto numExecutors = config.numExecutors != -1 ? config.numExecutors : DefaultAllocation::numStreams(platform, config);
|
||||
const auto numExecutors = config.compileConfig().numExecutors != -1 ? config.compileConfig().numExecutors : DefaultAllocation::numStreams(platform, config);
|
||||
VPU_THROW_UNLESS(numExecutors >= 1 && numExecutors <= DeviceResources::numStreams(),
|
||||
R"(Value of configuration option ("{}") must be in the range [{}, {}], actual is "{}")",
|
||||
ie::MYRIAD_THROUGHPUT_STREAMS, 1, DeviceResources::numStreams(), numExecutors);
|
||||
|
||||
const auto numSlices = config.numCMXSlices != -1 ? config.numCMXSlices : DefaultAllocation::numSlices(platform, numExecutors);
|
||||
const auto numSlices = config.compileConfig().numCMXSlices != -1
|
||||
? config.compileConfig().numCMXSlices
|
||||
: DefaultAllocation::numSlices(platform, numExecutors);
|
||||
VPU_THROW_UNLESS(numSlices >= 1 && numSlices <= DeviceResources::numSlices(platform),
|
||||
R"(Value of configuration option ("{}") must be in the range [{}, {}], actual is "{}")",
|
||||
ie::MYRIAD_NUMBER_OF_CMX_SLICES, 1, DeviceResources::numSlices(platform), numSlices);
|
||||
|
||||
int defaultCmxLimit = DefaultAllocation::tilingCMXLimit(numSlices);
|
||||
const auto tilingCMXLimit = config.tilingCMXLimitKB != -1 ? std::min(config.tilingCMXLimitKB * 1024, defaultCmxLimit) : defaultCmxLimit;
|
||||
const auto tilingCMXLimit = config.compileConfig().tilingCMXLimitKB != -1
|
||||
? std::min(config.compileConfig().tilingCMXLimitKB * 1024, defaultCmxLimit)
|
||||
: defaultCmxLimit;
|
||||
VPU_THROW_UNLESS(tilingCMXLimit >= 0,
|
||||
R"(Value of configuration option ("{}") must be greater than {}, actual is "{}")",
|
||||
ie::MYRIAD_TILING_CMX_LIMIT_KB, 0, tilingCMXLimit);
|
||||
|
||||
const auto numShaves = config.numSHAVEs != -1 ? config.numSHAVEs : DefaultAllocation::numShaves(platform, numExecutors, numSlices);
|
||||
const auto numShaves = config.compileConfig().numSHAVEs != -1
|
||||
? config.compileConfig().numSHAVEs
|
||||
: DefaultAllocation::numShaves(platform, numExecutors, numSlices);
|
||||
VPU_THROW_UNLESS(numShaves >= 1 && numShaves <= DeviceResources::numShaves(platform),
|
||||
R"(Value of configuration option ("{}") must be in the range [{}, {}], actual is "{}")",
|
||||
ie::MYRIAD_NUMBER_OF_SHAVES, 1, DeviceResources::numShaves(platform), numShaves);
|
||||
@ -123,7 +130,7 @@ void CompileEnv::init(Platform platform, const CompilationConfig& config, const
|
||||
g_compileEnv->initialized = true;
|
||||
}
|
||||
|
||||
void CompileEnv::updateConfig(const CompilationConfig& config) {
|
||||
void CompileEnv::updateConfig(const PluginConfiguration& config) {
|
||||
IE_ASSERT(g_compileEnv != nullptr);
|
||||
IE_ASSERT(g_compileEnv->initialized);
|
||||
|
||||
@ -165,9 +172,9 @@ CompiledGraph::Ptr compileImpl(const ie::CNNNetwork& network, const ie::ICore* c
|
||||
|
||||
middleEnd->run(model);
|
||||
|
||||
if (!env.config.irWithVpuScalesDir.empty()) {
|
||||
network.serialize(env.config.irWithVpuScalesDir + "/" + network.getName() + "_scales.xml",
|
||||
env.config.irWithVpuScalesDir + "/" + network.getName() + "_scales.bin");
|
||||
if (!env.config.compileConfig().irWithVpuScalesDir.empty()) {
|
||||
network.serialize(env.config.compileConfig().irWithVpuScalesDir + "/" + network.getName() + "_scales.xml",
|
||||
env.config.compileConfig().irWithVpuScalesDir + "/" + network.getName() + "_scales.bin");
|
||||
}
|
||||
|
||||
return backEnd->build(model, frontEnd->origLayers());
|
||||
@ -191,8 +198,8 @@ CompiledGraph::Ptr compileImpl(const Model& model) {
|
||||
|
||||
} // namespace
|
||||
|
||||
CompiledGraph::Ptr compileNetwork(const ie::CNNNetwork& network, Platform platform, const CompilationConfig& config, const Logger::Ptr& log,
|
||||
const ie::ICore* core) {
|
||||
CompiledGraph::Ptr compileNetwork(const ie::CNNNetwork& network, ncDevicePlatform_t platform, const PluginConfiguration& config, const Logger::Ptr& log,
|
||||
const ie::ICore* core) {
|
||||
CompileEnv::init(platform, config, log);
|
||||
AutoScope autoDeinit([] {
|
||||
CompileEnv::free();
|
||||
@ -205,8 +212,8 @@ CompiledGraph::Ptr compileNetwork(const ie::CNNNetwork& network, Platform platfo
|
||||
|
||||
CompiledGraph::Ptr compileModel(
|
||||
const Model& model,
|
||||
Platform platform,
|
||||
const CompilationConfig& config,
|
||||
ncDevicePlatform_t platform,
|
||||
const PluginConfiguration& config,
|
||||
const Logger::Ptr& log) {
|
||||
CompileEnv::init(platform, config, log);
|
||||
AutoScope autoDeinit([] {
|
||||
@ -218,7 +225,7 @@ CompiledGraph::Ptr compileModel(
|
||||
return compileImpl(model);
|
||||
}
|
||||
|
||||
CompiledGraph::Ptr compileSubNetwork(const ie::CNNNetwork& network, const CompilationConfig& subConfig, const ie::ICore* core) {
|
||||
CompiledGraph::Ptr compileSubNetwork(const ie::CNNNetwork& network, const PluginConfiguration& subConfig, const ie::ICore* core) {
|
||||
VPU_PROFILE(compileSubNetwork);
|
||||
|
||||
const auto& env = CompileEnv::get();
|
||||
@ -238,11 +245,11 @@ CompiledGraph::Ptr compileSubNetwork(const ie::CNNNetwork& network, const Compil
|
||||
//
|
||||
|
||||
std::set<std::string> getSupportedLayers(
|
||||
const ie::CNNNetwork& network,
|
||||
Platform platform,
|
||||
const CompilationConfig& config,
|
||||
const Logger::Ptr& log,
|
||||
const ie::ICore* core) {
|
||||
const ie::CNNNetwork& network,
|
||||
ncDevicePlatform_t platform,
|
||||
const PluginConfiguration& config,
|
||||
const Logger::Ptr& log,
|
||||
const ie::ICore* core) {
|
||||
CompileEnv::init(platform, config, log);
|
||||
AutoScope autoDeinit([] {
|
||||
CompileEnv::free();
|
||||
@ -255,28 +262,28 @@ std::set<std::string> getSupportedLayers(
|
||||
return frontEnd->checkSupportedLayers(network);
|
||||
}
|
||||
|
||||
int DeviceResources::numShaves(const Platform& platform) {
|
||||
return platform == Platform::MYRIAD_2 ? 12 : 16;
|
||||
int DeviceResources::numShaves(const ncDevicePlatform_t& platform) {
|
||||
return platform == ncDevicePlatform_t::NC_MYRIAD_2 ? 12 : 16;
|
||||
}
|
||||
|
||||
int DeviceResources::numSlices(const Platform& platform) {
|
||||
return platform == Platform::MYRIAD_2 ? 12 : 19;
|
||||
int DeviceResources::numSlices(const ncDevicePlatform_t& platform) {
|
||||
return platform == ncDevicePlatform_t::NC_MYRIAD_2 ? 12 : 19;
|
||||
}
|
||||
|
||||
int DeviceResources::numStreams() {
|
||||
return 3;
|
||||
}
|
||||
|
||||
int DefaultAllocation::numStreams(const Platform& platform, const CompilationConfig& configuration) {
|
||||
return platform == Platform::MYRIAD_X && configuration.hwOptimization ? 2 : 1;
|
||||
int DefaultAllocation::numStreams(const ncDevicePlatform_t& platform, const PluginConfiguration& configuration) {
|
||||
return platform == ncDevicePlatform_t::NC_MYRIAD_X && configuration.compileConfig().hwOptimization ? 2 : 1;
|
||||
}
|
||||
|
||||
int DefaultAllocation::numSlices(const Platform& platform, int numStreams) {
|
||||
int DefaultAllocation::numSlices(const ncDevicePlatform_t& platform, int numStreams) {
|
||||
const auto capabilities = DeviceResources::numSlices(platform);
|
||||
return capabilities / numStreams;
|
||||
}
|
||||
|
||||
int DefaultAllocation::numShaves(const Platform& platform, int numStreams, int numSlices) {
|
||||
int DefaultAllocation::numShaves(const ncDevicePlatform_t& platform, int numStreams, int numSlices) {
|
||||
const auto numAvailableShaves = DeviceResources::numShaves(platform);
|
||||
if (numStreams == 1) {
|
||||
return numAvailableShaves;
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <string>
|
||||
|
||||
#include <vpu/compile_env.hpp>
|
||||
#include <vpu/configuration/options/copy_optimization.hpp>
|
||||
|
||||
namespace vpu {
|
||||
|
||||
@ -93,7 +94,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
ADD_PASS(convertShapeNotation);
|
||||
ADD_DUMP_PASS("convertShapeNotation");
|
||||
|
||||
if (!env.config.disableReorder && !env.config.hwOptimization) {
|
||||
if (!env.config.compileConfig().disableReorder && !env.config.compileConfig().hwOptimization) {
|
||||
ADD_PASS(reorderInputsToChannelMinor);
|
||||
ADD_DUMP_PASS("reorderInputsToChannelMinor");
|
||||
}
|
||||
@ -125,7 +126,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
// To overcome fp16 limitations
|
||||
//
|
||||
|
||||
if (env.config.hwOptimization && env.config.enableWeightsAnalysis) {
|
||||
if (env.config.compileConfig().hwOptimization && env.config.compileConfig().enableWeightsAnalysis) {
|
||||
ADD_PASS(analyzeWeightableLayers);
|
||||
ADD_DUMP_PASS("analyzeWeightableLayers");
|
||||
}
|
||||
@ -150,7 +151,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
// Model HW-specific optimizations
|
||||
//
|
||||
|
||||
if (env.config.hwOptimization) {
|
||||
if (env.config.compileConfig().hwOptimization) {
|
||||
ADD_PASS(replaceFCbyConv);
|
||||
ADD_DUMP_PASS("replaceFCbyConv");
|
||||
|
||||
@ -161,7 +162,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
ADD_PASS(replaceDeconvByConv);
|
||||
ADD_DUMP_PASS("replaceDeconvByConv");
|
||||
|
||||
if (env.config.hwDilation) {
|
||||
if (env.config.compileConfig().hwDilation) {
|
||||
ADD_PASS(reshapeDilationConv);
|
||||
ADD_DUMP_PASS("reshapeDilationConv");
|
||||
}
|
||||
@ -173,7 +174,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
// Pass should be located before "adjustDataBatch" because "adjustDataBatch" specifies "origConvOutput" attribute
|
||||
// for convolution in order to provide that information to "hwConvTiling" pass.
|
||||
// Otherwise, "hwConvTiling" will see incorrect values in "origConvOutput" attribute.
|
||||
if (env.config.enableCustomReshapeParam) {
|
||||
if (env.config.compileConfig().enableCustomReshapeParam) {
|
||||
ADD_PASS(reshapeBeforeConvTiling);
|
||||
ADD_DUMP_PASS("reshapeBeforeConvTiling");
|
||||
}
|
||||
@ -197,7 +198,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
ADD_PASS(hwPadding);
|
||||
ADD_DUMP_PASS("hwPadding");
|
||||
|
||||
if (env.config.hwOptimization) {
|
||||
if (env.config.compileConfig().hwOptimization) {
|
||||
ADD_PASS(splitLargeKernelConv);
|
||||
ADD_DUMP_PASS("splitLargeKernelConv");
|
||||
}
|
||||
@ -209,7 +210,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
ADD_PASS(adjustDataBatch);
|
||||
ADD_DUMP_PASS("adjustDataBatch");
|
||||
|
||||
if (env.config.enableReplWithSCRelu) {
|
||||
if (env.config.compileConfig().enableReplWithSCRelu) {
|
||||
ADD_PASS(replaceWithSCReLU);
|
||||
ADD_DUMP_PASS("replaceWithSCReLU");
|
||||
}
|
||||
@ -218,13 +219,13 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
// HW stages tiling
|
||||
//
|
||||
|
||||
if (env.config.hwOptimization) {
|
||||
if (env.config.compileConfig().hwOptimization) {
|
||||
ADD_PASS(hwConvTiling);
|
||||
ADD_PASS(hwPoolTiling);
|
||||
ADD_PASS(hwFullyConnectedTiling);
|
||||
ADD_DUMP_PASS("hwTiling");
|
||||
|
||||
if (env.config.hwExtraSplit) {
|
||||
if (env.config.compileConfig().hwExtraSplit) {
|
||||
ADD_PASS(hwExtraSplit);
|
||||
ADD_DUMP_PASS("hwExtraSplit");
|
||||
}
|
||||
@ -242,7 +243,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
//
|
||||
// this stage should be executed after "hwPoolTiling"
|
||||
// and before "swPoolAdaptation"
|
||||
if (env.config.enableReplaceWithReduceMean) {
|
||||
if (env.config.compileConfig().enableReplaceWithReduceMean) {
|
||||
ADD_PASS(replaceWithReduceMean);
|
||||
ADD_DUMP_PASS("replaceWithReduceMean");
|
||||
}
|
||||
@ -261,7 +262,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
ADD_PASS(mergeReLUAndBias);
|
||||
ADD_DUMP_PASS("mergeReLUAndBias");
|
||||
|
||||
if (env.config.enableEarlyEltwiseReLUFusion) {
|
||||
if (env.config.compileConfig().enableEarlyEltwiseReLUFusion) {
|
||||
ADD_PASS(mergeEltwiseAndReLUDynamic);
|
||||
ADD_DUMP_PASS("mergeEltwiseAndReLUDynamic");
|
||||
}
|
||||
@ -279,7 +280,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
|
||||
// TODO: mergePermute support for reorder stage too.
|
||||
// TODO: pass that will swap Permute and per-element operations.
|
||||
if (env.config.enablePermuteMerging) {
|
||||
if (env.config.compileConfig().enablePermuteMerging) {
|
||||
ADD_PASS(mergePermuteStages);
|
||||
ADD_DUMP_PASS("mergePermuteStages");
|
||||
}
|
||||
@ -326,7 +327,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
// Model common optimizations
|
||||
//
|
||||
|
||||
if (env.config.copyOptimization.getOrDefault(true)) {
|
||||
if (env.config.get<CopyOptimizationOption>()) {
|
||||
ADD_PASS(eliminateCopyStages);
|
||||
ADD_DUMP_PASS("eliminateCopyStages");
|
||||
}
|
||||
@ -334,7 +335,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
//
|
||||
// HW/SW injection
|
||||
|
||||
if (env.config.hwOptimization && env.config.injectSwOps.getOrDefault(true)) {
|
||||
if (env.config.compileConfig().hwOptimization && env.config.compileConfig().injectSwOps.getOrDefault(true)) {
|
||||
ADD_PASS(injectSw);
|
||||
ADD_DUMP_PASS("injectSw");
|
||||
}
|
||||
@ -350,7 +351,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
// HW stages finalization
|
||||
//
|
||||
|
||||
if (env.config.hwOptimization) {
|
||||
if (env.config.compileConfig().hwOptimization) {
|
||||
ADD_PASS(finalizeHwOps);
|
||||
ADD_DUMP_PASS("hwFinalization");
|
||||
}
|
||||
@ -361,7 +362,7 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
|
||||
ADD_PASS(markFastStages);
|
||||
ADD_DUMP_PASS("markFastStages");
|
||||
|
||||
if (env.config.enableMemoryTypesAnnotation) {
|
||||
if (env.config.compileConfig().enableMemoryTypesAnnotation) {
|
||||
ADD_PASS(annotateMemoryTypes);
|
||||
ADD_DUMP_PASS("annotateMemoryTypes");
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ void PassImpl::run(const Model& model) {
|
||||
allocNonIntermediateData(model);
|
||||
adjustModelForMemReqs(model);
|
||||
copyHwMisalignedInput(model);
|
||||
if (env.config.packDataInCmx.getOrDefault(true)) {
|
||||
if (env.config.compileConfig().packDataInCmx.getOrDefault(true)) {
|
||||
packDataInCmx(model);
|
||||
}
|
||||
}
|
||||
@ -147,7 +147,7 @@ void PassImpl::collectMemReqs(const Model& model) {
|
||||
}
|
||||
|
||||
void PassImpl::resetStageOrder(const Model& model) {
|
||||
if (!CompileEnv::get().config.hwOptimization)
|
||||
if (!CompileEnv::get().config.compileConfig().hwOptimization)
|
||||
return;
|
||||
|
||||
static const std::string s_expectCMXOutput {"expectCMXOutput"};
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
#include <vpu/middleend/allocator/allocator.hpp>
|
||||
#include <vpu/compile_env.hpp>
|
||||
#include <vpu/configuration/options/copy_optimization.hpp>
|
||||
|
||||
namespace vpu {
|
||||
|
||||
@ -78,7 +79,7 @@ void PassImpl::run(const Model& model) {
|
||||
|
||||
std::queue<Stage> copyToRemove;
|
||||
|
||||
if (!env.config.copyOptimization.hasValue()) {
|
||||
if (!env.config.get<CopyOptimizationOption>()) {
|
||||
int nCopyStages = 0;
|
||||
for (const auto& stage : model->getStages()) {
|
||||
if (stage->type() == StageType::Copy) {
|
||||
|
@ -68,7 +68,7 @@ void PassImpl::run(const Model& model) {
|
||||
// Collect HW and SW candidates
|
||||
//
|
||||
|
||||
if (!env.config.injectSwOps.hasValue() &&
|
||||
if (!env.config.compileConfig().injectSwOps.hasValue() &&
|
||||
model->numStages() > nMaxStagesForInjectSw) {
|
||||
env.log->warning(
|
||||
"Pass [injectSw] SKIPPED : number of stages (%d) is larger than threshold %d",
|
||||
|
@ -30,7 +30,7 @@ private:
|
||||
};
|
||||
|
||||
void PassImpl::run(const Model& model) {
|
||||
const bool enableEarlyEltwiseReLUFusion = CompileEnv::get().config.enableEarlyEltwiseReLUFusion;
|
||||
const bool enableEarlyEltwiseReLUFusion = CompileEnv::get().config.compileConfig().enableEarlyEltwiseReLUFusion;
|
||||
if (enableEarlyEltwiseReLUFusion) {
|
||||
if (m_mode == MergeMode::DYNAMIC_NETWORK) {
|
||||
VPU_PROFILE(mergeEltwiseAndReLUDynamic);
|
||||
|
@ -170,7 +170,7 @@ void PassImpl::run(const Model& model) {
|
||||
// Try to merge next Pooling layer
|
||||
//
|
||||
|
||||
if (env.config.mergeHwPoolToConv) {
|
||||
if (env.config.compileConfig().mergeHwPoolToConv) {
|
||||
if (stage->type() == StageType::StubConv) {
|
||||
if (auto nextPoolStage = getNextPoolStage(stage, output)) {
|
||||
output = nextPoolStage->output(0);
|
||||
|
@ -148,7 +148,7 @@ void PassImpl::run(const Model& model) {
|
||||
auto output = stage->output(0);
|
||||
const auto& env = CompileEnv::get();
|
||||
|
||||
if (env.config.hwDisabled(stage->origLayer()->name)) {
|
||||
if (env.config.compileConfig().hwDisabled(stage->origLayer()->name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -88,7 +88,7 @@ bool isScalable(const Stage& stage) {
|
||||
|
||||
bool checkGrowingOutput(const Model& model) {
|
||||
const auto& env = CompileEnv::get();
|
||||
if (!env.config.checkPreprocessingInsideModel) {
|
||||
if (!env.config.compileConfig().checkPreprocessingInsideModel) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -258,7 +258,7 @@ void PassImpl::run(const Model& model) {
|
||||
scale = static_cast<float>(1ULL << static_cast<std::uint32_t>(shift));
|
||||
}
|
||||
|
||||
if (!env.config.irWithVpuScalesDir.empty()) {
|
||||
if (!env.config.compileConfig().irWithVpuScalesDir.empty()) {
|
||||
stage->origLayer()->params["vpu_scale"] = toString(scale);
|
||||
}
|
||||
}
|
||||
|
@ -199,7 +199,7 @@ StageSHAVEsRequirements StageNode::getSHAVEsRequirements() const {
|
||||
|
||||
// return max for Myriad2
|
||||
const auto& compileEnv = CompileEnv::get();
|
||||
if (compileEnv.platform == Platform::MYRIAD_2) {
|
||||
if (compileEnv.platform == ncDevicePlatform_t::NC_MYRIAD_2) {
|
||||
return StageSHAVEsRequirements::NeedMax;
|
||||
}
|
||||
|
||||
|
@ -24,7 +24,7 @@ void FrontEnd::parseActivation(const Model& model, const ie::CNNLayerPtr& layer,
|
||||
const auto type = layer->GetParamAsString("type");
|
||||
|
||||
const auto activationParserIt = activationParsers.find(type);
|
||||
VPU_THROW_UNSUPPORTED_UNLESS(activationParserIt != activationParsers.end(),
|
||||
VPU_THROW_UNSUPPORTED_LAYER_UNLESS(activationParserIt != activationParsers.end(),
|
||||
"Failed to compile layer \"%v\"(type = %v) ", layer->name, type);
|
||||
|
||||
activationParserIt->second(model, layer, inputs, outputs);
|
||||
|
@ -163,9 +163,9 @@ void parseConv2D(const Model & model,
|
||||
kernelStrideY,
|
||||
dilationX,
|
||||
dilationY,
|
||||
env.config.hwOptimization,
|
||||
env.config.hwDilation,
|
||||
env.config.hwDisabled(layer->name));
|
||||
env.config.compileConfig().hwOptimization,
|
||||
env.config.compileConfig().hwDilation,
|
||||
env.config.compileConfig().hwDisabled(layer->name));
|
||||
|
||||
//
|
||||
// Create const datas
|
||||
@ -476,9 +476,9 @@ void parseConvND(const Model & model,
|
||||
strides[1],
|
||||
dilations[0],
|
||||
dilations[1],
|
||||
env.config.hwOptimization,
|
||||
env.config.hwDilation,
|
||||
env.config.hwDisabled(layer->name));
|
||||
env.config.compileConfig().hwOptimization,
|
||||
env.config.compileConfig().hwDilation,
|
||||
env.config.compileConfig().hwDisabled(layer->name));
|
||||
|
||||
int try_hw = tryHW ? 1 : 0;
|
||||
|
||||
|
@ -37,13 +37,13 @@ void FrontEnd::parseFullyConnected(const Model& model, const ie::CNNLayerPtr& _l
|
||||
// Check if HW is applicable
|
||||
//
|
||||
|
||||
auto tryHW = env.config.hwOptimization;
|
||||
auto tryHW = env.config.compileConfig().hwOptimization;
|
||||
|
||||
if (output->desc().dim(Dim::W, 1) != 1 || output->desc().dim(Dim::H, 1) != 1) {
|
||||
tryHW = false;
|
||||
}
|
||||
|
||||
if (env.config.hwDisabled(layer->name)) {
|
||||
if (env.config.compileConfig().hwDisabled(layer->name)) {
|
||||
tryHW = false;
|
||||
}
|
||||
|
||||
|
@ -162,7 +162,7 @@ void FrontEnd::parseMTCNN(const Model& model, const ie::CNNLayerPtr& layer, cons
|
||||
IE_ASSERT(inputs.size() == 1);
|
||||
IE_ASSERT(outputs.size() == 1);
|
||||
|
||||
if (!env.config.hwOptimization) {
|
||||
if (!env.config.compileConfig().hwOptimization) {
|
||||
VPU_THROW_EXCEPTION << "MTCNN layer supports Myriad X with NCE only";
|
||||
}
|
||||
|
||||
|
@ -124,7 +124,7 @@ Stage StageBuilder::addReorderStage(
|
||||
const Data& output) {
|
||||
const auto* env = CompileEnv::getOrNull();
|
||||
VPU_THROW_UNLESS(
|
||||
env == nullptr || !env->config.disableReorder,
|
||||
env == nullptr || !env->config.compileConfig().disableReorder,
|
||||
"Tried to add Reorder Stage %v, while DISABLE_REORDER option was set",
|
||||
name);
|
||||
|
||||
|
@ -221,8 +221,8 @@ void parsePool2D(const Model & model,
|
||||
//
|
||||
|
||||
const auto& env = CompileEnv::get();
|
||||
bool hwOptimization = env.config.hwOptimization;
|
||||
bool hwDisabled = env.config.hwDisabled(layer->name);
|
||||
bool hwOptimization = env.config.compileConfig().hwOptimization;
|
||||
bool hwDisabled = env.config.compileConfig().hwDisabled(layer->name);
|
||||
|
||||
int inputWidth = input->desc().dim(Dim::W);
|
||||
int inputHeight = input->desc().dim(Dim::H);
|
||||
@ -480,8 +480,8 @@ void parsePoolND(const Model & model,
|
||||
//
|
||||
|
||||
const auto& env = CompileEnv::get();
|
||||
bool hwOptimization = env.config.hwOptimization;
|
||||
bool hwDisabled = env.config.hwDisabled(layer->name);
|
||||
bool hwOptimization = env.config.compileConfig().hwOptimization;
|
||||
bool hwDisabled = env.config.compileConfig().hwDisabled(layer->name);
|
||||
|
||||
bool tryHW = canTryHW(poolLayer->_type,
|
||||
input_shape[0],
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user