Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Steve Yoo 2021-06-17 08:28:23 +09:00
commit 6ce8d8ce66
888 changed files with 36386 additions and 20767 deletions

View File

@ -155,10 +155,9 @@ def getConfigurationsMap() {
CONFIGURATION_WORKFLOW = { configuration ->
node("OpenVINO") {
String workdir = "${HOME}/workspace/${BUILD_NUMBER}_${env.CHANGE_ID}_${configuration.name}"
try {
PROJECT_NAME = "openvino"
String workdir = "${HOME}/workspace/${BUILD_NUMBER}_${env.CHANGE_ID}_${configuration.name}"
stage("Clone repository") {
prepare_repository(workdir)
}
@ -185,10 +184,10 @@ CONFIGURATION_WORKFLOW = { configuration ->
}
finally {
stage("Cleanup") {
deleteDir()
String docker_container_name = get_docker_container_name(configuration)
sh """
docker rm -f ${docker_container_name}
rm -rf ${workdir}
"""
}
}

View File

@ -63,41 +63,3 @@ jobs:
python3 -m xmlrunner discover -p *_test.py --output=../mo-ut-logs
working-directory: model-optimizer
build_wheel:
name: Build Python wheel
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v2
- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
python3 -m pip install wheel setuptools
python3 -m pip install tensorflow==2.3.0
- name: Build
run: |
python3 setup.py sdist bdist_wheel
working-directory: model-optimizer
- name: Test package content
run: |
echo "src = open('openvino_mo.egg-info/SOURCES.txt', 'rt').read().split()" | tee -a test_wheel.py
echo "ref = open('automation/package_BOM.txt', 'rt').read().split()" | tee -a test_wheel.py
echo "for name in ref:" | tee -a test_wheel.py
echo " if name.endswith('.py'):" | tee -a test_wheel.py
echo " assert name in src or './' + name in src, name + ' file missed'" | tee -a test_wheel.py
python3 test_wheel.py
working-directory: model-optimizer
- name: Test conversion
run: |
wget -q http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz
tar -xf mobilenet_v1_1.0_224.tgz
python3 -m pip install model-optimizer/dist/*.whl
python3 -m mo --input_model mobilenet_v1_1.0_224_frozen.pb --input_shape "[1,224,224,3]"
- uses: actions/upload-artifact@v2
with:
name: mo_wheel
path: "model-optimizer/dist/*.whl"

View File

@ -169,10 +169,11 @@ ie_shellcheck_process(DIRECTORY "${OpenVINO_MAIN_SOURCE_DIR}"
"${IE_MAIN_SOURCE_DIR}/thirdparty"
"${IE_MAIN_SOURCE_DIR}/temp"
# TODO fix and enable back:
"${OpenVINO_MAIN_SOURCE_DIR}/scripts/install_dependencies"
"${OpenVINO_MAIN_SOURCE_DIR}/scripts/demo"
"${OpenVINO_MAIN_SOURCE_DIR}/ngraph"
"${IE_MAIN_SOURCE_DIR}/scripts")
"${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/scripts/dependencies.sh"
"${OpenVINO_MAIN_SOURCE_DIR}/scripts/install_dependencies/install_NEO_OCL_driver.sh"
"${OpenVINO_MAIN_SOURCE_DIR}/scripts/install_dependencies/install_openvino_dependencies.sh"
"${OpenVINO_MAIN_SOURCE_DIR}/ngraph/python/tests/test_onnx/model_zoo_preprocess.sh"
)
#
# cpack

View File

@ -11,18 +11,27 @@
* *element_type*
* **Description**: the type of element of output tensor
* **Range of values**: u8, u16, u32, u64, i8, i16, i32, i64, f16, f32, boolean, bf16
* **Type**: string
* **Range of values**: u1, u4, u8, u16, u32, u64, i4, i8, i16, i32, i64, f16, f32, boolean, bf16
* **Type**: `string`
* **Default value**: None
* **Required**: *Yes*
* **Required**: *yes*
* *shape*
* **Description**: the shape of the output tensor
* **Range of values**: list of non-negative integers, empty list is allowed that means 0D or scalar tensor
* **Type**: int[]
* **Range of values**: list of non-negative integers, empty list is allowed, which means 0D or scalar tensor
* **Type**: `int[]`
* **Default value**: None
* **Required**: *Yes*
* **Required**: *yes*
**Outputs**
* **1**: Output tensor of type *T* and shape equal to *shape* attribute.
**Types**
* *T*: any type from *element type* values.
**Example**

View File

@ -8,9 +8,7 @@
**Detailed description**:
The *ExtractImagePatches* operation is similar to the TensorFlow* operation [ExtractImagePatches](https://www.tensorflow.org/api_docs/python/tf/image/extract_patches).
This op extracts patches of shape `sizes` which are `strides` apart in the input image. The output elements are taken from the input at intervals given by the `rate` argument, as in dilated convolutions.
The *ExtractImagePatches* operation extracts patches of shape `sizes` which are `strides` apart in the input image. The output elements are taken from the input at intervals given by the `rate` argument, as in dilated convolutions.
The result is a 4D tensor containing image patches with size `size[0] * size[1] * depth` vectorized in the "depth" dimension.
@ -92,20 +90,23 @@ The "auto_pad" attribute has no effect on the size of each patch, it determines
Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We use the symbol `x` to mark output patches.
1. `sizes="3,3", strides="5,5", rates="1,1", auto_pad="valid"`
  x   x   x    4   5   x   x   x   9 10
  x   x   x  14 15   x   x   x 19 20
  x   x   x  24 25   x   x   x 29 30
31 32 33 34 35 36 37 38 39 40
41 42 43 44 45 46 47 48 49 50
  x   x   x  54 55   x   x   x 59 60
  x   x   x  64 65   x   x   x 69 70
  x   x   x  74 75   x   x   x 79 80
81 82 83 84 85 86 87 88 89 90
91 92 93 94 95 96 97 98 99 100
\f[
\begin{bmatrix}
x & x & x & 4 & 5 & x & x & x & 9 & 10 \\
x & x & x & 14 & 15 & x & x & x & 19 & 20 \\
x & x & x & 24 & 25 & x & x & x & 29 & 30 \\
31 & 32 & 33 & 34 & 35 & 36 & 37 & 38 & 39 & 40 \\
41 & 42 & 43 & 44 & 45 & 46 & 47 & 48 & 49 & 50 \\
x & x & x & 54 & 55 & x & x & x & 59 & 60 \\
x & x & x & 64 & 65 & x & x & x & 69 & 70 \\
x & x & x & 74 & 75 & x & x & x & 79 & 80 \\
81 & 82 & 83 & 84 & 85 & 86 & 87 & 88 & 89 & 90 \\
91 & 92 & 93 & 94 & 95 & 96 & 79 & 98 & 99 & 100
\end{bmatrix}
\f]
output:
```
[[[[ 1 6]
[51 56]]
@ -132,24 +133,27 @@ Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We u
[[23 28]
[73 78]]]]
```
output shape: `[1, 9, 2, 2]`
2. `sizes="4,4", strides="8,8", rates="1,1", auto_pad="valid"`
  x   x   x   x    5   6   7   8   9 10
  x   x   x   x  15 16 17 18 19 20
  x   x   x   x  25 26 27 28 29 30
  x   x   x   x  35 36 37 38 39 40
41 42 43 44 45 46 47 48 49 50
51 52 53 54 55 56 57 58 59 60
61 62 63 64 65 66 67 68 69 70
71 72 73 74 75 76 77 78 79 80
81 82 83 84 85 86 87 88 89 90
91 92 93 94 95 96 97 98 99 100
\f[
\begin{bmatrix}
x & x & x & x & 5 & 6 & 7 & 8 & 9 & 10 \\
x & x & x & x & 15 & 16 & 17 & 18 & 19 & 20 \\
x & x & x & x & 25 & 26 & 27 & 28 & 29 & 30 \\
x & x & x & x & 35 & 36 & 37 & 38 & 39 & 40 \\
41 & 42 & 43 & 44 & 45 & 46 & 47 & 48 & 49 & 50 \\
51 & 52 & 53 & 54 & 55 & 56 & 57 & 58 & 59 & 60 \\
61 & 62 & 63 & 64 & 65 & 66 & 67 & 68 & 69 & 70 \\
71 & 72 & 73 & 74 & 75 & 76 & 77 & 78 & 79 & 80 \\
81 & 82 & 83 & 84 & 85 & 86 & 87 & 88 & 89 & 90 \\
91 & 92 & 93 & 94 & 95 & 96 & 79 & 98 & 99 & 100
\end{bmatrix}
\f]
output:
```
[[[[ 1]]
[[ 2]]
@ -181,27 +185,29 @@ Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We u
[[33]]
[[34]]]]
```
output shape: `[1, 16, 1, 1]`
3. `sizes="4,4", strides="9,9", rates="1,1", auto_pad="same_upper"`
  x   x   x   x    0   0   0   0   0   x   x   x   x
  x   x   x   x    4   5   6   7   8   x   x   x   x
  x   x   x   x  14 15 16 17 18   x   x   x   x
  x   x   x   x  24 25 26 27 28   x   x   x   x
  0 31 32 33 34 35 36 37 38 39 40   0   0
  0 41 42 43 44 45 46 47 48 49 50   0   0
  0 51 52 53 54 55 56 57 58 59 60   0   0
  0 61 62 63 64 65 66 67 68 69 70   0   0
  0 71 72 73 74 75 76 77 78 79 80   0   0
  x   x   x   x  84 85 86 87 88   x   x   x   x
  x   x   x   x  94 95 96 97 98   x   x   x   x
  x   x   x   x    0   0   0   0   0   x   x   x   x
  x   x   x   x    0   0   0   0   0   x   x   x   x
\f[
\begin{bmatrix}
x & x & x & x & 0 & 0 & 0 & 0 & 0 & x & x & x & x\\
x & x & x & x & 4 & 5 & 6 & 7 & 8 & x & x & x & x\\
x & x & x & x & 14 & 15 & 16 & 17 & 18 & x & x & x & x\\
x & x & x & x & 24 & 25 & 26 & 27 & 28 & x & x & x & x\\
0 & 31 & 32 & 33 & 34 & 35 & 36 & 37 & 38 & 39 & 40 & 0 & 0\\
0 & 41 & 42 & 43 & 44 & 45 & 46 & 47 & 48 & 49 & 50 & 0 & 0\\
0 & 51 & 52 & 53 & 54 & 55 & 56 & 57 & 58 & 59 & 60 & 0 & 0\\
0 & 61 & 62 & 63 & 64 & 65 & 66 & 67 & 68 & 69 & 70 & 0 & 0\\
0 & 71 & 72 & 73 & 74 & 75 & 76 & 77 & 78 & 79 & 80 & 0 & 0\\
x & x & x & x & 84 & 85 & 86 & 87 & 88 & x & x & x & x\\
x & x & x & x & 94 & 95 & 96 & 79 & 98 & x & x & x & x\\
x & x & x & x & 0 & 0 & 0 & 0 & 0 & x & x & x & x\\
x & x & x & x & 0 & 0 & 0 & 0 & 0 & x & x & x & x
\end{bmatrix}
\f]
output:
```
[[[[ 0 0]
[ 0 89]]
@ -249,25 +255,28 @@ Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We u
[[ 23 0]
[ 0 0]]]]
```
output shape: `[1, 16, 2, 2]`
4. `sizes="3,3", strides="5,5", rates="2,2", auto_pad="valid"`
This time we use the symbols `x`, `y`, `z` and `k` to distinguish the patches:
  x   2   x   4   x   y   7   y   9   y
11 12 13 14 15 16 17 18 19 20
  x  22   x 24   x   y 27   y 29   y
31 32 33 34 35 36 37 38 39 40
  x  42   x 44   x   y 47   y 49   y
  z  52   z 54   z   k 57   k 59   k
61 62 63 64 65 66 67 68 69 70
  z  72   z 74   z   k 77   k 79   k
81 82 83 84 85 86 87 88 89 90
  z  92   z 94   z   k 97   k 99   k
\f[
\begin{bmatrix}
x & 2 & x & 4 & x & y & 7 & y & 9 & y \\
11 & 12 & 13 & 14 & 15 & 16 & 17 & 18 & 19 & 20 \\
x & 22 & x & 24 & x & y & 27 & y & 29 & y \\
31 & 32 & 33 & 34 & 35 & 36 & 37 & 38 & 39 & 40 \\
x & 42 & x & 44 & x & y & 47 & y & 49 & y \\
z & 52 & z & 54 & z & k & 57 & k & 59 & k \\
61 & 62 & 63 & 64 & 65 & 66 & 67 & 68 & 69 & 70 \\
z & 72 & z & 74 & z & k & 77 & k & 79 & k \\
81 & 82 & 83 & 84 & 85 & 86 & 87 & 88 & 89 & 90 \\
z & 92 & z & 94 & z & k & 79 & k & 99 & k
\end{bmatrix}
\f]
output:
```
[[[[ 1 6]
[ 51 56]]
@ -294,26 +303,30 @@ This time we use the symbols `x`, `y`, `z` and `k` to distinguish the patches:
[[ 45 50]
[ 95 100]]]]
```
output_shape: `[1, 9, 2, 2]`
5. `sizes="2,2", strides="3,3", rates="1,1", auto_pad="valid"`
Image is a `1 x 2 x 5 x 5` array that contains two feature maps where feature map with coordinate 0 contains numbers in a range `[1, 25]` and feature map with coordinate 1 contains numbers in a range `[26, 50]`
  x   x   3   x   x
  6   7   8   x   x
11 12 13 14 15
  x   x  18   x   x
  x   x  23   x   x
  x   x  28   x   x
  x   x  33   x   x
36 37 38 39 40
  x   x  43   x   x
  x   x  48   x   x
\f[
\begin{bmatrix}
x & x & 3 & x & x\\
x & x & 8 & x & x\\
11 & 12 & 13 & 14 & 15\\
x & x & 18 & x & x\\
x & x & 23 & x & x
\end{bmatrix}\\
\begin{bmatrix}
x & x & 28 & x & x\\
x & x & 33 & x & x\\
36 & 37 & 38 & 39 & 40\\
x & x & 43 & x & x\\
x & x & 48 & x & x
\end{bmatrix}
\f]
output:
```
[[[[ 1 4]
[16 19]]
@ -337,5 +350,5 @@ Image is a `1 x 2 x 5 x 5` array that contains two feature maps where feature ma
[[32 35]
[47 50]]]]
```
output shape: `[1, 8, 2, 2]`

View File

@ -8,12 +8,37 @@
**Short description**: *ShuffleChannels* permutes data in the channel dimension of the input tensor.
**Detailed description**:
Input tensor of `data_shape` is always interpreted as 4D tensor with the following shape:
dim 0: data_shape[0] * data_shape[1] * ... * data_shape[axis-1]
(or 1 if axis == 0)
dim 1: group
dim 2: data_shape[axis] / group
dim 3: data_shape[axis+1] * data_shape[axis+2] * ... * data_shape[data_shape.size()-1]
(or 1 if axis points to last dimension)
Trailing and leading to `axis` dimensions are flattened and reshaped back to the original shape after channels shuffling.
The operation is equivalent to the following transformation of the input tensor `x` of shape `[N, C, H, W]` and `axis = 1`:
\f[
x' = reshape(x, [N, group, C / group, H * W])\\
x'' = transpose(x', [0, 2, 1, 3])\\
y = reshape(x'', [N, C, H, W])\\
\f]
where `group` is the layer attribute described below.
**Attributes**:
* *axis*
* **Description**: *axis* specifies the index of a channel dimension.
* **Range of values**: an integer number in the range [-4, 3]
* **Range of values**: an integer number in the range `[-rank(data_shape), rank(data_shape) - 1]`
* **Type**: `int`
* **Default value**: 1
* **Required**: *No*
@ -21,30 +46,22 @@
* *group*
* **Description**: *group* specifies the number of groups to split the channel dimension into. This number must evenly divide the channel dimension size.
* **Range of values**: a positive integer
* **Range of values**: a positive integer in the range `[1, data_shape[axis]]`
* **Type**: `int`
* **Default value**: 1
* **Required**: *No*
**Inputs**:
* **1**: 4D input tensor of any supported data type. Required.
* **1**: `data` input tensor of type *T* and rank greater or equal to 1. **Required.**
**Outputs**:
* **1**: 4D input tensor with shape and element type as for the input tensor.
* **1**: Output tensor with element type *T* and same shape as the input tensor.
**Mathematical Formulation**
**Types**
The operation is the equivalent with the following transformation of the input tensor *x* of shape *[N, C, H, W]*:
```
x' = reshape(x, [N, group, C / group, H * W])
x'' = transpose(x', [0, 2, 1, 3])
y = reshape(x'', [N, C, H, W])
```
where `group` is the layer parameter described above and the `axis = 1`.
* *T*: any supported numeric type.
**Example**

View File

@ -8,20 +8,20 @@
**Detailed description**:
The *SpaceToBatch* operation is similar to the TensorFlow* operation [SpaceToBatchND](https://www.tensorflow.org/api_docs/python/tf/space_to_batch_nd)
The operation is equivalent to the following transformation of the input tensor `data` of shape `[batch, D_1, D_2 ... D_{N - 1}]` and `block_shape`, `pads_begin`, `pads_end` of shapes `[N]` to *Y* output tensor.
Zero-pad the start and end of dimensions [D_0, ..., D_{N - 1}] of the input according to `pads_begin` and `pads_end`:
note: P_0 for batch dimension is expected to be 0 (no-padding).
x = [batch + P_0, D_1 + P_1, D_2 + P_2, ..., D_{N - 1} + P_{N - 1}], where P_i = pads_begin[i] + pads_end[i]
Zero-pad the start and end of dimensions \f$[D_0, \dots, D_{N - 1}]\f$ of the input according to `pads_begin` and `pads_end`:
note: B_0 for batch is ignored.
x' = reshape(x, [batch, (D_1 + P_1) / B_1, B_1, (D_2 + P_2) / B_2, B_2, ..., (D_{N - 1} + P_{N - 1}) / B_{N - 1}, B_{N - 1}]), where B_i = block_shape[i]
\f[x = [batch + P_0, D_1 + P_1, D_2 + P_2, \dots, D_{N - 1} + P_{N - 1}]\f]
\f[x' = reshape(x, [batch, \frac{D_1 + P_1}{B_1}, B_1, \frac{D_2 + P_2}{B_2}, B_2, \dots, \frac{D_{N - 1} + P_{N - 1}}{B_{N - 1}}, B_{N - 1}])\f]
\f[x'' = transpose(x', [2, 4, \dots, (N - 1) + (N - 1), 0, 1, 3, \dots, N + (N - 1)])\f]
\f[y = reshape(x'', [batch \times B_1 \times \dots \times B_{N - 1}, \frac{D_1 + P_1}{B_1}, \frac{D_2 + P_2}{B_2}, \dots, \frac{D_{N - 1} + P_{N - 1}}{B_{N - 1}}]\f]
x'' = transpose(x', [2, 4, ..., (N - 1) + (N - 1), 0, 1, 3, ..., N + (N - 1)])
y = reshape(x'', [batch * B_1 * ... * B_{N - 1}, (D_1 + P_1) / B_1, (D_2 + P_2) / B_2, ... , (D_{N - 1} + P_{N - 1}) / B_{N - 1}])
where
- \f$P_i\f$ = pads_begin[i] + pads_end[i]
- \f$B_i\f$ = block_shape[i]
- \f$P_0\f$ for batch dimension is expected to be 0 (no-padding)
- \f$B_0\f$ for batch is ignored
**Attributes**
@ -36,7 +36,7 @@ The operation is equivalent to the following transformation of the input tensor
**Outputs**
* **1**: N-D tensor with shape `[batch * block_shape[0] * block_shape[1] * ... * block_shape[N - 1], (pads_begin[1] + D_1 + pads_end[1]) / block_shape[1], (pads_begin[2] + D_2 + pads_end[2]) / block_shape[2], ..., (pads_begin[N - 1] + D_{N - 1} + pads_end[N - 1]) / block_shape[N - 1]` of the same type as `data` input.
* **1**: N-D tensor with shape `[batch * block_shape[0] * block_shape[1] * ... * block_shape[N - 1], (D_1 + pads_begin[1] + pads_end[1]) / block_shape[1], (D_2 + pads_begin[2] + pads_end[2]) / block_shape[2], ..., (D_{N -1} + pads_begin[N - 1] + pads_end[N - 1]) / block_shape[N - 1]` of the same type as `data` input.
**Types**

View File

@ -5,13 +5,12 @@
**Category**: Data movement operation
**Short description**: *StridedSlice* extracts a strided slice of a tensor.
It is similar to generalized array indexing in Python\*.
**Attributes**
* *begin_mask*
* **Description**: *begin_mask* is a bit mask. *begin_mask[i]* equal to 1 means that the corresponding dimension of the `begin` input is ignored and the 'real' beginning of the tensor is used along corresponding dimension.
* **Description**: *begin_mask* is a bit mask. *begin_mask[i]* equal to `1` means that the corresponding dimension of the `begin` input is ignored and the 'real' beginning of the tensor is used along corresponding dimension.
* **Range of values**: a list of `0`s and `1`s
* **Type**: `int[]`
* **Default value**: None
@ -19,7 +18,7 @@
* *end_mask*
* **Description**: *end_mask* is a bit mask. If *end_mask[i]* is 1, the corresponding dimension of the `end` input is ignored and the real 'end' of the tensor is used along corresponding dimension.
* **Description**: *end_mask* is a bit mask. If *end_mask[i]* is `1`, the corresponding dimension of the `end` input is ignored and the real 'end' of the tensor is used along corresponding dimension.
* **Range of values**: a list of `0`s and `1`s
* **Type**: `int[]`
* **Default value**: None
@ -27,7 +26,7 @@
* *new_axis_mask*
* **Description**: *new_axis_mask* is a bit mask. If *new_axis_mask[i]* is 1, a length 1 dimension is inserted on the `i`-th position of input tensor.
* **Description**: *new_axis_mask* is a bit mask. If *new_axis_mask[i]* is `1`, a length 1 dimension is inserted on the `i`-th position of input tensor.
* **Range of values**: a list of `0`s and `1`s
* **Type**: `int[]`
* **Default value**: `[0]`
@ -35,7 +34,7 @@
* *shrink_axis_mask*
* **Description**: *shrink_axis_mask* is a bit mask. If *shrink_axis_mask[i]* is 1, the dimension on the `i`-th position is deleted.
* **Description**: *shrink_axis_mask* is a bit mask. If *shrink_axis_mask[i]* is `1`, the dimension on the `i`-th position is deleted.
* **Range of values**: a list of `0`s and `1`s
* **Type**: `int[]`
* **Default value**: `[0]`
@ -51,21 +50,83 @@
**Inputs**:
* **1**: Multidimensional input tensor to be sliced. Required.
* **1**: `data` - input tensor to be sliced of type `T` and arbitrary shape. **Required.**
* **2**: `begin` input - 1D input tensor with begin indexes for input tensor slicing. Required.
Out-of-bounds values are silently clamped. If `begin_mask[i]` is 1, the value of `begin[i]` is ignored
and the range of the appropriate dimension starts from 0.
Negative values mean indexing starts from the end. For example, if `foo=[1,2,3]`, `begin[0]=-1` means `begin[0]=3`.
* **2**: `begin` - 1D tensor of type `T_IND` with begin indexes for input tensor slicing. **Required.**
Out-of-bounds values are silently clamped. If `begin_mask[i]` is `1`, the value of `begin[i]` is ignored and the range of the appropriate dimension starts from `0`. Negative values mean indexing starts from the end. For example, if `data=[1,2,3]`, `begin[0]=-1` means `begin[0]=3`.
* **3**: `end` input - 1D input tensor with end indexes for input tensor slicing. Required.
Out-of-bounds values will be silently clamped. If `end_mask[i]` is 1, the value of `end[i]` is ignored
and the full range of the appropriate dimension is used instead.
Negative values mean indexing starts from the end. For example, if `foo=[1,2,3]`, `end[0]=-1` means `end[0]=3`.
* **3**: `end` - 1D tensor of type `T_IND` with end indexes for input tensor slicing. **Required.**
Out-of-bounds values will be silently clamped. If `end_mask[i]` is `1`, the value of `end[i]` is ignored and the full range of the appropriate dimension is used instead. Negative values mean indexing starts from the end. For example, if `data=[1,2,3]`, `end[0]=-1` means `end[0]=3`.
* **4**: `stride` input - 1D input tensor with strides. Optional.
* **4**: `stride` - 1D tensor of type `T_IND` with strides. **Optional.**
**Types**
* *T*: any supported type.
* *T_IND*: any supported integer type.
**Example**
Example of `begin_mask` & `end_mask` usage.
```xml
<layer ... type="StridedSlice" ...>
<data begin_mask="0,1,1" ellipsis_mask="0,0,0" end_mask="1,1,0" new_axis_mask="0,0,0" shrink_axis_mask="0,0,0"/>
<input>
<port id="0">
<dim>2</dim>
<dim>3</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>2</dim> <!-- begin: [1, 0, 0] -->
</port>
<port id="2">
<dim>2</dim> <!-- end: [0, 0, 2] -->
</port>
<port id="3">
<dim>2</dim> <!-- stride: [1, 1, 1] -->
</port>
</input>
<output>
<port id="4">
<dim>1</dim>
<dim>3</dim>
<dim>2</dim>
</port>
</output>
</layer>
```
Example of `new_axis_mask` usage.
```xml
<layer ... type="StridedSlice" ...>
<data begin_mask="0,1,1" ellipsis_mask="0,0,0" end_mask="0,1,1" new_axis_mask="1,0,0" shrink_axis_mask="0,0,0"/>
<input>
<port id="0">
<dim>2</dim>
<dim>3</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>2</dim>
</port>
<port id="2">
<dim>2</dim>
</port>
<port id="3">
<dim>2</dim>
</port>
</input>
<output>
<port id="4">
<dim>1</dim>
<dim>2</dim>
<dim>3</dim>
<dim>4</dim>
</port>
</output>
</layer>
```
Example of `shrink_axis_mask` usage.
```xml
<layer ... type="StridedSlice" ...>
<data begin_mask="1,0,1,1,1" ellipsis_mask="0,0,0,0,0" end_mask="1,0,1,1,1" new_axis_mask="0,0,0,0,0" shrink_axis_mask="0,1,0,0,0"/>

View File

@ -2,9 +2,10 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "single_layer_tests/reshape.hpp"
#include <vector>
#include "single_layer_tests/reshape.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
@ -14,31 +15,45 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
};
INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheckDynBatch, ReshapeLayerTest,
INSTANTIATE_TEST_CASE_P(
smoke_ReshapeCheckDynBatch, ReshapeLayerTestRevise,
::testing::Combine(
::testing::Values(true),
::testing::ValuesIn(netPrecisions),
::testing::Values(true), ::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
::testing::Values(std::vector<int64_t>({30, 30, 30, 30})),
::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
::testing::Values(std::map<std::string, std::string>({}))),
ReshapeLayerTest::getTestCaseName);
ReshapeLayerTestRevise::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheck, ReshapeLayerTest,
INSTANTIATE_TEST_CASE_P(
smoke_ReshapeCheck, ReshapeLayerTestRevise,
::testing::Combine(
::testing::Values(true),
::testing::ValuesIn(netPrecisions),
::testing::Values(true), ::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t>({10, 10, 10, 10})),
::testing::Values(std::vector<size_t>({10, 0, 100})),
::testing::Values(std::vector<int64_t>({10, 0, 100})),
::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
::testing::Values(std::map<std::string, std::string>({}))),
ReshapeLayerTest::getTestCaseName);
ReshapeLayerTestRevise::getTestCaseName);
INSTANTIATE_TEST_CASE_P(
smoke_ReshapeCheckNegative, ReshapeLayerTestRevise,
::testing::Combine(
::testing::Values(true), ::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t>({10, 10, 10, 10})),
::testing::Values(std::vector<int64_t>({10, -1, 100})),
::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
::testing::Values(std::map<std::string, std::string>({}))),
ReshapeLayerTestRevise::getTestCaseName);
} // namespace

View File

@ -68,6 +68,10 @@ if(ENABLE_WHEEL)
add_subdirectory(wheel)
endif()
if (NGRAPH_PYTHON_BUILD_ENABLE)
add_dependencies(ie_api _pyngraph)
endif()
# install
ie_cpack_add_component(${PYTHON_VERSION})

View File

@ -55,7 +55,7 @@ fi
if [[ "${APPS_TO_RUN}" -ge 4 ]] ; then
# For more then 4 multidevice testing
for (( VAR = 4; VAR <= ${APPS_TO_RUN}; ++VAR )); do
for (( VAR = 4; VAR <= APPS_TO_RUN; ++VAR )); do
./${APP_NAME} --gtest_filter=*VPURegTest*YOLO*myriad* &
pids+=" $!"
done

View File

@ -33,7 +33,7 @@ target_include_directories(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
$<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>
${CLDNN__OCL_ICD_INCDIRS}
${CLDNN_TOP_FOLDER})
${CLDNN_TOP_FOLDER}/api)
set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})

View File

@ -5,7 +5,7 @@
#pragma once
#include <ie_layouts.h>
#include <api/layout.hpp>
#include <cldnn/runtime/layout.hpp>
#include "ngraph/type/element_type.hpp"

View File

@ -9,7 +9,7 @@
#include "cldnn_custom_layer.h"
#include <api/network.hpp>
#include <cldnn/graph/network.hpp>
namespace CLDNNPlugin {

View File

@ -10,7 +10,7 @@
#include <map>
#include <ie_common.h>
#include "pugixml.hpp"
#include "api/tensor.hpp"
#include "cldnn/runtime/tensor.hpp"
namespace CLDNNPlugin {

View File

@ -81,6 +81,8 @@
#include "cldnn_itt.h"
#include "gpu/gpu_config.hpp"
#include "cldnn/runtime/device_query.hpp"
#ifdef __linux__
# include <dlfcn.h>
#endif
@ -117,13 +119,13 @@ struct clDNNEngine::impl {
};
cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map<std::string, std::string> &config) const {
auto device_info = device_map.begin()->second.get_info();
auto device_info = device_map.begin()->second->get_info();
if (config.find(PluginConfigParams::KEY_DEVICE_ID) != config.end()) {
auto val = config.at(PluginConfigParams::KEY_DEVICE_ID);
if (device_map.find(val) == device_map.end()) {
IE_THROW() << "Invalid device ID: " << val;
}
device_info = device_map.at(val).get_info();
device_info = device_map.at(val)->get_info();
}
return device_info;
@ -445,7 +447,8 @@ clDNNEngine::clDNNEngine() : m_defaultContext(nullptr) {
RegisterPrimitives();
// try loading clDNN engine and get info from it
{
cldnn::device_query device_query;
// Set OCL runtime which should be always available
cldnn::device_query device_query(cldnn::engine_types::ocl, cldnn::runtime_types::ocl);
device_map = device_query.get_available_devices();
}
// locate global custom kernel config
@ -851,8 +854,8 @@ auto StringRightTrim = [](std::string string, std::string substring, bool case_s
};
static float GetGOPS(cldnn::device_info info, cldnn::data_types dt) {
auto freqGHz = info.core_frequency / 1000.f;
auto numEUs = info.cores_count;
auto freqGHz = info.gpu_frequency / 1000.f;
auto numEUs = info.execution_units_count;
auto opsPerComputeBlock = 0;
auto computeBlockIPC = 1.0f;
switch (dt) {
@ -894,8 +897,8 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
auto iter = device_map.find(device_id);
auto device_info = iter != device_map.end() ?
iter->second.get_info() :
device_map.begin()->second.get_info();
iter->second->get_info() :
device_map.begin()->second->get_info();
if (name == METRIC_KEY(SUPPORTED_METRICS)) {
std::vector<std::string> metrics;
@ -931,7 +934,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
gops[InferenceEngine::Precision::FP32] = GetGOPS(device_info, cldnn::data_types::f32);
IE_SET_METRIC_RETURN(DEVICE_GOPS, gops);
} else if (name == GPU_METRIC_KEY(EXECUTION_UNITS_COUNT)) {
IE_SET_METRIC_RETURN(GPU_EXECUTION_UNITS_COUNT, device_info.cores_count);
IE_SET_METRIC_RETURN(GPU_EXECUTION_UNITS_COUNT, device_info.execution_units_count);
} else if (name == GPU_METRIC_KEY(UARCH_VERSION)) {
std::stringstream s;
if (device_info.gfx_ver.major == 0 && device_info.gfx_ver.minor == 0 && device_info.gfx_ver.revision == 0) {

View File

@ -7,7 +7,7 @@
#include <map>
#include <string>
#include <memory>
#include <api/engine.hpp>
#include <cldnn/runtime/engine.hpp>
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
#include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
#include "cldnn_remote_context.h"
@ -22,7 +22,7 @@ class clDNNEngine : public InferenceEngine::IInferencePlugin,
std::shared_ptr<impl> _impl;
// key: device_id, value: cldnn device
std::map<std::string, cldnn::device> device_map;
std::map<std::string, cldnn::device::ptr> device_map;
std::mutex engine_mutex;
mutable CLDNNRemoteCLContext::Ptr m_defaultContext;

View File

@ -2,13 +2,13 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <list>
#include <set>
#include <unordered_set>
#include "ie_metric_helpers.hpp"
#include <api/cldnn.hpp>
#include <api/data.hpp>
#include <chrono>
#include <cmath>
#include <algorithm>
#include "ie_metric_helpers.hpp"
#include <chrono>
#include <cmath>
#include <algorithm>
@ -27,7 +27,6 @@
#include "threading/ie_cpu_streams_executor.hpp"
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
using namespace InferenceEngine;
using namespace InferenceEngine::details;

View File

@ -2,22 +2,28 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <cldnn/graph/network.hpp>
#include <cldnn/runtime/profiling.hpp>
#include "cldnn_graph.h"
#include "simple_math.h"
#include <cldnn/cldnn_config.hpp>
#include "cldnn_infer_request.h"
#include <description_buffer.hpp>
#include <threading/ie_executor_manager.hpp>
#include <exec_graph_info.hpp>
#include <ie_ngraph_utils.hpp>
#include <ngraph/variant.hpp>
#include <list>
#include <set>
#include <unordered_set>
#include <sstream>
#include <api/cldnn.hpp>
#include <api/network.hpp>
#include <api/profiling.hpp>
#include <api/custom_gpu_primitive.hpp>
#include <chrono>
#include <cmath>
#include <algorithm>
#include "cldnn_graph.h"
#include "simple_math.h"
#include <description_buffer.hpp>
#include "cldnn_infer_request.h"
#include <threading/ie_executor_manager.hpp>
#include <fstream>
#include <utility>
#include <sys/types.h>
@ -71,12 +77,10 @@ void CLDNNGraph::Build() {
for (int b = m_bv_sz - 1; b >= 0; b--) {
auto network = BuildNetwork(m_program->GetCompiledProgram(b));
m_networks.insert(m_networks.begin(), network);
GetEngine()->release_pending_memory(network->get_id());
}
} else {
auto network = BuildNetwork(m_program->GetCompiledProgram());
m_networks.emplace_back(network);
GetEngine()->release_pending_memory(network->get_id());
}
UpdateImplementationsMap();
@ -499,7 +503,7 @@ void CLDNNGraph::UpdatePerfStatistics() {
}
};
std::map<cldnn::primitive_id, cldnn::event> executedPrimitives = GetNetwork()->get_executed_primitives();
std::map<cldnn::primitive_id, cldnn::event::ptr> executedPrimitives = GetNetwork()->get_executed_primitives();
auto allPrimitives = GetNetwork()->get_all_primitives();
// Get profiling info for all layers
@ -521,7 +525,7 @@ void CLDNNGraph::UpdatePerfStatistics() {
auto event = execIter->second;
executedPrimitives.erase(execIter);
cldnn::instrumentation::profiling_info cldnnInfo{profiledID, event.get_profiling_info()};
cldnn::instrumentation::profiling_info cldnnInfo{profiledID, event->get_profiling_info()};
collectTimings(cldnnInfo, perfCount);
perfCount.num++;
@ -534,7 +538,7 @@ void CLDNNGraph::UpdatePerfStatistics() {
pcIter = perfMap.find(executedID.first);
auto& perfCount = pcIter->second.second;
cldnn::instrumentation::profiling_info cldnnInfo{executedID.first, executedID.second.get_profiling_info()};
cldnn::instrumentation::profiling_info cldnnInfo{executedID.first, executedID.second->get_profiling_info()};
collectTimings(cldnnInfo, perfCount);
perfCount.num++;
@ -675,7 +679,7 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
executedPrimitives.find(primId) != executedPrimitives.end()) {
auto event = executedPrimitives.at(primId);
cldnn::instrumentation::profiling_info cldnnInfo{primId, event.get_profiling_info()};
cldnn::instrumentation::profiling_info cldnnInfo{primId, event->get_profiling_info()};
// Collect timings
long long cpuTime = 0;

View File

@ -17,8 +17,8 @@
#include "ie_blob.h"
#include "cpp/ie_cnn_network.h"
#include <api/network.hpp>
#include <api/topology.hpp>
#include <cldnn/graph/network.hpp>
#include <cldnn/graph/topology.hpp>
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
#include "cldnn_custom_layer.h"
@ -43,7 +43,7 @@ public:
const Config& getConfig() const { return m_config; }
InferenceEngine::gpu::ClContext::Ptr GetContext() { return m_context; }
std::shared_ptr<const cldnn::engine> GetEngine() const { return getContextImpl(m_context)->GetEngine(); }
std::shared_ptr<cldnn::engine> GetEngine() const { return getContextImpl(m_context)->GetEngine(); }
int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; }
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); }
size_t GetNetworksCount() const { return m_networks.size(); }

View File

@ -19,7 +19,7 @@ using namespace InferenceEngine;
namespace CLDNNPlugin {
const char CLDNNInferRequest::fp32_suffix[] = "_fp32";
const char fp32_suffix[] = "_fp32";
const char str_not_allocated[] = "Input data was not allocated.";
const char cannot_set_compound[] = "cannot set compound blob: supported only for input pre-processing";
const char wrong_nv12_blob[] = "NV12 input blob is expected for input with NV12 color format";
@ -110,7 +110,7 @@ Blob::Ptr CLDNNInferRequest::createOutputBlob(const TensorDesc& desc, uint8_t* m
}
}
void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory& inputMem) {
void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory::ptr inputMem) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_attach");
auto impl = getContextImpl(m_graph->GetContext());
impl->acquire_lock();
@ -127,150 +127,57 @@ void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory& in
void CLDNNInferRequest::input_alloc(cldnn::primitive_id name, const cldnn::layout& layout) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_alloc");
cldnn::memory input_mem = cldnn::memory::allocate(*(m_graph->GetEngine()), layout);
cldnn::memory::ptr input_mem = m_graph->GetEngine()->allocate_memory(layout);
input_attach(name, input_mem);
}
void CLDNNInferRequest::copyOutputData(const cldnn::memory& outputMemory,
Blob::Ptr bptr,
buf_info* bi) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyOutputData");
size_t n = (bi == nullptr) ? bptr->size() : bi->buf_size;
template<typename T>
void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi, cldnn::stream& stream) {
size_t n = (bi == nullptr) ? dst->size() : bi->buf_size;
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
auto layout = outputMemory.get_layout();
auto layout = src->get_layout();
auto size = layout.size;
auto l_padd = layout.data_padding.lower_size();
auto u_padd = layout.data_padding.upper_size();
auto h_padding = u_padd.spatial[0] + l_padd.spatial[0];
auto v_padding_l = (h_padding + size.spatial[0]) * u_padd.spatial[1];
auto v_padding_u = (h_padding + size.spatial[0]) * l_padd.spatial[1];
auto locked = bptr->buffer();
switch (bptr->getTensorDesc().getPrecision()) {
case Precision::FP32: {
auto out_f = locked.as<float*>();
if (out_f == nullptr) {
auto locked_dst = dst->buffer();
auto dst_ptr = locked_dst.as<T*>();
if (dst_ptr == nullptr) {
IE_THROW() << "Invalid output blob";
}
auto resPtr = outputMemory.pointer<float>();
float *resVec = out_f + offset;
cldnn::mem_lock<T> src_lock{ src, stream };
T* src_ptr = src_lock.data();
dst_ptr += offset;
if (h_padding || v_padding_l || v_padding_u) {
size_t i = 0;
if (layout.data_padding) {
for (size_t b = 0; b < size.batch[0]; b++) {
for (size_t f = 0; f < size.feature[0]; f++) {
i += v_padding_l;
for (size_t w = 0; w < size.spatial[3]; w++) {
for (size_t z = 0; z < size.spatial[2]; z++) {
for (size_t y = 0; y < size.spatial[1]; y++) {
i += l_padd.spatial[0];
for (size_t x = 0; x < size.spatial[0]; x++, i++) {
*resVec++ = resPtr[i];
for (size_t x = 0; x < size.spatial[0]; x++) {
*dst_ptr++ = src_ptr[layout.get_linear_offset(cldnn::tensor(b, f, x, y, z, w))];
}
}
}
i += u_padd.spatial[0];
}
i += v_padding_u;
}
}
} else {
for (size_t i = 0; i < n; i++) {
resVec[i] = resPtr[i];
dst_ptr[i] = src_ptr[i];
}
}
}
break;
case Precision::FP16: {
auto out_f = locked.as<uint16_t*>();
if (out_f == nullptr) {
IE_THROW() << "Invalid output blob";
}
auto resPtr = outputMemory.pointer<uint16_t>();
uint16_t* resVec = out_f + offset;
if (h_padding || v_padding_l || v_padding_u) {
size_t i = 0;
for (size_t b = 0; b < size.batch[0]; b++) {
for (size_t f = 0; f < size.feature[0]; f++) {
i += v_padding_l;
for (size_t y = 0; y < size.spatial[1]; y++) {
i += l_padd.spatial[0];
for (size_t x = 0; x < size.spatial[0]; x++, i++) {
*resVec++ = resPtr[i];
}
i += u_padd.spatial[0];
}
i += v_padding_u;
}
}
} else {
for (size_t i = 0; i < n; i++) {
resVec[i] = resPtr[i];
}
}
}
break;
case Precision::I32: {
auto out_f = locked.as<int32_t*>();
if (out_f == nullptr) {
IE_THROW() << "Invalid output blob";
}
auto resPtr = outputMemory.pointer<int32_t>();
int32_t* resVec = out_f + offset;
if (h_padding || v_padding_l || v_padding_u) {
size_t i = 0;
for (size_t b = 0; b < size.batch[0]; b++) {
for (size_t f = 0; f < size.feature[0]; f++) {
i += v_padding_l;
for (size_t y = 0; y < size.spatial[1]; y++) {
i += l_padd.spatial[0];
for (size_t x = 0; x < size.spatial[0]; x++, i++) {
*resVec++ = resPtr[i];
}
i += u_padd.spatial[0];
}
i += v_padding_u;
}
}
} else {
for (size_t i = 0; i < n; i++) {
resVec[i] = resPtr[i];
}
}
}
break;
case Precision::I64: {
auto out_f = locked.as<int64_t*>();
if (out_f == nullptr) {
IE_THROW() << "Invalid output blob";
}
auto resPtr = outputMemory.pointer<int64_t>();
int64_t* resVec = out_f + offset;
if (h_padding || v_padding_l || v_padding_u) {
size_t i = 0;
for (size_t b = 0; b < size.batch[0]; b++) {
for (size_t f = 0; f < size.feature[0]; f++) {
i += v_padding_l;
for (size_t y = 0; y < size.spatial[1]; y++) {
i += l_padd.spatial[0];
for (size_t x = 0; x < size.spatial[0]; x++, i++) {
*resVec++ = resPtr[i];
}
i += u_padd.spatial[0];
}
i += v_padding_u;
}
}
} else {
for (size_t i = 0; i < n; i++) {
resVec[i] = resPtr[i];
}
}
}
break;
default:
IE_THROW() << "The plugin does not support output " << bptr->getTensorDesc().getPrecision() << " precision";
void CLDNNInferRequest::copyOutputData(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyOutputData");
auto& stream = m_graph->GetNetwork()->get_stream();
switch (dst->getTensorDesc().getPrecision()) {
case Precision::FP32: copyResultToOutputBlob<float>(src, dst, bi, stream); break;
case Precision::FP16: copyResultToOutputBlob<uint16_t>(src, dst, bi, stream); break;
case Precision::I32: copyResultToOutputBlob<int32_t>(src, dst, bi, stream); break;
case Precision::I64: copyResultToOutputBlob<int64_t>(src, dst, bi, stream); break;
default: IE_THROW(NotImplemented) << "The plugin does not support output " << dst->getTensorDesc().getPrecision() << " precision";
}
}
@ -279,7 +186,7 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr<cldnn::network> network,
const cldnn::layout& inputLayout,
const Blob &inputBlob, buf_info* bi) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyInputData");
size_t n = (bi == nullptr) ? inputBlob.size() : bi->buf_size;
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
cldnn::primitive_id internalName = "parameter:" + inputName;
@ -287,37 +194,37 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr<cldnn::network> network,
switch (inputBlob.getTensorDesc().getPrecision()) {
case Precision::FP32: {
float* blob_ptr = const_cast<float*>(locked.as<const float*>()) + offset;
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
break;
}
case Precision::I32: {
int32_t* blob_ptr = const_cast<int32_t*>(locked.as<const int32_t*>()) + offset;
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
break;
}
case Precision::I64: {
int64_t* blob_ptr = const_cast<int64_t*>(locked.as<const int64_t*>()) + offset;
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
break;
}
case Precision::FP16: {
uint16_t* blob_ptr = const_cast<uint16_t*>(locked.as<const uint16_t*>()) + offset;
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
break;
}
case Precision::I8: {
int8_t* blob_ptr = const_cast<int8_t*>(locked.as<const int8_t*>()) + offset;
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
break;
}
case Precision::U8: {
uint8_t* blob_ptr = const_cast<uint8_t*>(locked.as<const uint8_t*>()) + offset;
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
break;
}
case Precision::BOOL: {
uint8_t* blob_ptr = const_cast<uint8_t*>(locked.as<const uint8_t*>()) + offset;
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
break;
}
default:
@ -601,6 +508,7 @@ void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr &data)
void CLDNNInferRequest::AllocateInputs() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateInputs");
auto inputLayouts = m_graph->GetInputLayouts();
auto& stream = m_graph->GetNetwork()->get_stream();
// allocate inputs
for (auto& ni : _networkInputs) {
std::string name = ni.first;
@ -623,25 +531,24 @@ void CLDNNInferRequest::AllocateInputs() {
input_alloc(UVName, inputLayouts.at(UVName));
size_t height = desc.getDims()[2], width = desc.getDims()[3];
cldnn::pointer<uint8_t> input_mem_ptr_Y = inputsMemory.at(YName).pointer<uint8_t>();
cldnn::mem_lock<uint8_t> input_mem_ptr_Y{inputsMemory.at(YName), stream};
TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
auto blobY = createInputBlob(ydesc, input_mem_ptr_Y.data());
cldnn::pointer<uint8_t> input_mem_ptr_UV = inputsMemory.at(UVName).pointer<uint8_t>();
cldnn::mem_lock<uint8_t> input_mem_ptr_UV{ inputsMemory.at(UVName), stream };
TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC);
auto blobUV = createInputBlob(uvdesc, input_mem_ptr_UV.data());
blobs.push_back(make_shared_blob<NV12Blob>(blobY, blobUV));
}
_inputs[name] = desc.getDims()[0] == 1 ? blobs[0] : make_shared_blob<BatchedBlob>(blobs);
} else {
if (inputLayouts.find(name) == inputLayouts.end()) {
IE_THROW() << "Input layout for " << name << " is not found";
}
cldnn::layout layout = inputLayouts.at(name);
input_alloc(name, layout);
cldnn::pointer<uint8_t> mem_ptr = inputsMemory.at(name).pointer<uint8_t>();
cldnn::mem_lock<uint8_t> mem_ptr{inputsMemory.at(name), stream};
_inputs[name] = createInputBlob(desc, mem_ptr.data());
if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
@ -685,8 +592,8 @@ void CLDNNInferRequest::AllocateOutputs() {
bool can_reuse_internal_mem = !m_useStreams;
for (auto& no : _networkOutputs) {
std::string outputID = m_graph->MapOutputName(no.first);
cldnn::memory output_mem = m_graph->GetNetwork()->get_output_memory(outputID);
cldnn::pointer<uint8_t> output_mem_ptr = output_mem.pointer<uint8_t>();
cldnn::memory::ptr output_mem = m_graph->GetNetwork()->get_output_memory(outputID);
cldnn::mem_lock<uint8_t> output_mem_ptr{output_mem, m_graph->GetNetwork()->get_stream()};
if (output_mem_ptr.data() == nullptr) {
IE_THROW() << "Empty output memory for primitive " << outputID;
}
@ -824,6 +731,7 @@ CLDNNInferRequest::CLDNNInferRequest(InputsDataMap networkInputs, OutputsDataMap
void CLDNNInferRequest::execAndParse() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParse");
auto networkOutputs = m_graph->GetNetwork()->execute();
auto& stream = m_graph->GetNetwork()->get_stream();
// Collect outputs as requested by the model
for (auto& no : _networkOutputs) {
@ -835,12 +743,12 @@ void CLDNNInferRequest::execAndParse() {
// mapping remote blobs not needed -
// let the user take care of them explicitly
if (!bptr->is<gpu::ClBlob>()) {
auto out_ptr = outputMemory.pointer<uint8_t>();
cldnn::mem_lock<uint8_t> out_ptr{outputMemory, stream};
auto blob_ptr = bptr->buffer().as<uint8_t*>();
// If Async API is used, copy of output blobs is not needed, unless SetBlob function was called.
// But in the case when old API is used we have to copy data to memory provided by user.
if (blob_ptr != &out_ptr[0]) {
if (blob_ptr != out_ptr.data()) {
copyOutputData(outputMemory, bptr);
}
}
@ -965,19 +873,20 @@ void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const
IE_THROW() << "Input name mismatch.";
}
auto inputLayout = m_graph->GetInputLayouts().at(inputName);
auto is_same_buffer = [](const Blob& blob, const cldnn::memory& memory) -> bool {
auto is_same_buffer = [&](const Blob& blob, cldnn::memory::ptr memory) -> bool {
const std::string str_not_allocated("Input data was not allocated.");
cldnn::pointer<const uint8_t> ptr = memory.pointer<const uint8_t>();
cldnn::mem_lock<uint8_t> ptr{memory, m_graph->GetNetwork()->get_stream()};
const uint8_t* blob_ptr = blob.cbuffer().as<const uint8_t*>();
const uint8_t* mem_ptr = ptr.data();
if (blob_ptr == nullptr || mem_ptr == nullptr) {
IE_THROW() << str_not_allocated;
}
return (blob_ptr == mem_ptr) && (blob.byteSize() == memory.size());
return (blob_ptr == mem_ptr) && (blob.byteSize() == memory->size());
};
cldnn::primitive_id internalName = "parameter:" + inputName;
const cldnn::memory& memory = inputsMemory.at(inputName);
cldnn::memory::ptr memory = inputsMemory.at(inputName);
auto& stream = m_graph->GetNetwork()->get_stream();
auto _nw_ptr = m_graph->GetNetwork();
auto prec = inputBlob.getTensorDesc().getPrecision();
@ -986,8 +895,8 @@ void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const
_nw_ptr->set_input_data(internalName, memory);
} else if (prec == Precision::I16 || prec == Precision::U16) {
// clDNN doesn't support I16 input precision, so we always have to convert input data to fp32 precision
const cldnn::memory& fp32_mem = inputsMemory.at(inputName+fp32_suffix);
cldnn::pointer<float> ptr = fp32_mem.pointer<float>();
cldnn::memory::ptr fp32_mem = inputsMemory.at(inputName+fp32_suffix);
cldnn::mem_lock<float> ptr {fp32_mem, stream};
if (prec == Precision::I16) {
copyToFloat<int16_t>(ptr.data(), &inputBlob);
} else {
@ -1031,4 +940,4 @@ void CLDNNInferRequest::PrepareInputDyn(const cldnn::primitive_id &inputName, co
}
}
}; // namespace CLDNNPlugin
} // namespace CLDNNPlugin

View File

@ -46,7 +46,7 @@ public:
void EnableStreams() { m_useStreams = true; }
protected:
std::map<std::string, cldnn::memory> inputsMemory;
std::map<std::string, cldnn::memory::ptr> inputsMemory;
std::map<std::string, cldnn::primitive_id> outputsMap;
bool m_useProfiling;
@ -60,12 +60,12 @@ protected:
InferenceEngine::Blob::Ptr createInputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr);
InferenceEngine::Blob::Ptr createOutputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr);
void copyOutputData(const cldnn::memory& outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr);
void copyOutputData(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr);
void copyInputData(std::shared_ptr<cldnn::network> network, const cldnn::primitive_id &inputName,
const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob,
buf_info* bi = nullptr);
void input_attach(cldnn::primitive_id name, cldnn::memory& inputMem);
void input_attach(cldnn::primitive_id name, cldnn::memory::ptr inputMem);
void input_alloc(cldnn::primitive_id name, const cldnn::layout& layout);
void AllocateInputs();
void AllocateOutputs();
@ -76,9 +76,6 @@ protected:
void PrepareInput(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob);
void PrepareInputDyn(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob);
private:
static const char fp32_suffix[];
};
}; // namespace CLDNNPlugin

View File

@ -92,7 +92,7 @@ bool Program::CanProcessDynBatch(std::vector<std::shared_ptr<ngraph::Node>> ops,
return true;
}
Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cldnn::engine> engine, const Config& config, bool createTopologyOnly)
Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::engine> engine, const Config& config, bool createTopologyOnly)
: m_config(config)
, m_engine(engine)
, m_curBatch(-1)
@ -128,11 +128,9 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cld
ChangeInputBatch(1U << static_cast<unsigned>(b));
m_programs.insert(m_programs.begin(), BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly));
m_engine->release_pending_memory(0);
}
} else {
m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly));
m_engine->release_pending_memory(0);
}
}

View File

@ -15,8 +15,8 @@
#include "cldnn_config.h"
#include <api/engine.hpp>
#include <api/topology.hpp>
#include <cldnn/runtime/engine.hpp>
#include <cldnn/graph/topology.hpp>
// Forward declarations for cldnn part
namespace cldnn {
@ -69,8 +69,8 @@ public:
class Program {
public:
Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cldnn::engine> engine, const Config& config, bool createTopologyOnly = false);
Program(std::shared_ptr<const cldnn::engine> engine, const Config& config) : m_config(config), m_engine(engine),
Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::engine> engine, const Config& config, bool createTopologyOnly = false);
Program(std::shared_ptr<cldnn::engine> engine, const Config& config) : m_config(config), m_engine(engine),
m_curBatch(-1), queryMode(false), m_max_batch(1) {}
Program() : m_config({}), m_engine(nullptr), m_curBatch(-1), queryMode(false), m_max_batch(1) {}
@ -100,8 +100,8 @@ public:
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return inputLayouts; }
InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_networkInputs; }
InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_networkOutputs; }
const cldnn::engine& GetEngine() const { return *m_engine; }
std::shared_ptr<const cldnn::engine> GetEnginePtr() const { return m_engine; }
cldnn::engine& GetEngine() const { return *m_engine; }
std::shared_ptr<cldnn::engine> GetEnginePtr() const { return m_engine; }
const Config& GetConfig() const { return m_config; }
int GetMaxBatchSizeForSingleProgram();
@ -150,7 +150,7 @@ public:
private:
static factories_map_t factories_map;
std::vector<std::shared_ptr<cldnn::program>> m_programs;
std::shared_ptr<const cldnn::engine> m_engine;
std::shared_ptr<cldnn::engine> m_engine;
Config m_config;
std::shared_ptr<cldnn::topology> m_topology;

View File

@ -6,21 +6,23 @@
#include "cldnn_remote_context.h"
#include "cldnn_itt.h"
#include "cldnn/runtime/device_query.hpp"
using namespace InferenceEngine;
using namespace InferenceEngine::gpu;
using namespace InferenceEngine::details;
namespace CLDNNPlugin {
static const char unsupported_str[] = "Unsupported shared object type ";
CLDNNRemoteAllocator CLDNNRemoteBlobImpl::m_allocator;
CLDNNRemoteBlobImpl::CLDNNRemoteBlobImpl(ClContext::Ptr context,
cldnn::stream& stream,
const cldnn::layout& layout,
cldnn::shared_handle mem,
cldnn::shared_surface surf,
uint32_t plane,
BlobType mem_type) :
m_context(context), m_layout(layout), m_mem_type(mem_type), m_mem(mem), m_surf(surf), m_plane(plane),
m_context(context), m_stream(stream), m_layout(layout), m_mem_type(mem_type), m_mem(mem), m_surf(surf), m_plane(plane),
_handle(nullptr), _allocator(nullptr), m_memObject(nullptr), lockedHolder(nullptr) {
}
@ -67,7 +69,6 @@ ParamMap CLDNNRemoteBlobImpl::getParams() const {
}
bool CLDNNRemoteBlobImpl::deallocate() noexcept {
if (m_memObject != nullptr)
m_memObject.reset();
return m_memObject == nullptr;
}
@ -86,32 +87,7 @@ void CLDNNRemoteBlobImpl::allocate_if_needed() {
_impl->acquire_lock();
if (m_memObject == nullptr) {
auto eng = _impl->GetEngine();
switch (m_mem_type) {
case BlobType::BT_BUF_INTERNAL:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::allocate(*eng, m_layout)));
break;
case BlobType::BT_BUF_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_buffer(*eng, m_layout, m_mem)));
break;
#ifdef _WIN32
case BlobType::BT_SURF_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_mem, m_plane)));
break;
case BlobType::BT_DX_BUF_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_dx_buffer(*eng, m_layout, m_mem)));
break;
#else
case BlobType::BT_SURF_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_surf, m_plane)));
break;
#endif
case BlobType::BT_IMG_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_image(*eng, m_layout, m_mem)));
break;
default:
IE_THROW() << unsupported_str << m_mem_type;
}
allocate();
}
_impl->release_lock();
@ -120,32 +96,38 @@ void CLDNNRemoteBlobImpl::allocate_if_needed() {
void CLDNNRemoteBlobImpl::allocate() noexcept {
assert(m_memObject == nullptr);
std::shared_ptr<const cldnn::engine> eng = getContextImpl(m_context.lock())->GetEngine();
std::shared_ptr<cldnn::engine> eng = getContextImpl(m_context.lock())->GetEngine();
switch (m_mem_type) {
case BlobType::BT_BUF_INTERNAL:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::allocate(*eng, m_layout)));
case BlobType::BT_BUF_INTERNAL: {
m_memObject = eng->allocate_memory(m_layout);
break;
case BlobType::BT_BUF_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_buffer(*eng, m_layout, m_mem)));
}
case BlobType::BT_BUF_SHARED: {
m_memObject = eng->share_buffer(m_layout, m_mem);
break;
}
#ifdef _WIN32
case BlobType::BT_SURF_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_mem, m_plane)));
case BlobType::BT_SURF_SHARED: {
m_memObject = eng->share_surface(m_layout, m_mem, m_plane);
break;
case BlobType::BT_DX_BUF_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_dx_buffer(*eng, m_layout, m_mem)));
}
case BlobType::BT_DX_BUF_SHARED: {
m_memObject = eng->share_dx_buffer(m_layout, m_mem);
break;
}
#else
case BlobType::BT_SURF_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_surf, m_plane)));
case BlobType::BT_SURF_SHARED: {
m_memObject = eng->share_surface(m_layout, m_surf, m_plane);
break;
}
#endif
case BlobType::BT_IMG_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_image(*eng, m_layout, m_mem)));
case BlobType::BT_IMG_SHARED: {
m_memObject = eng->share_image(m_layout, m_mem);
break;
}
default:
m_memObject = nullptr;
m_memObject.reset();
}
}
@ -165,7 +147,7 @@ std::shared_ptr<RemoteContext> CLDNNRemoteBlobImpl::getContext() const noexcept
}
void CLDNNRemoteBlobImpl::lock() const {
lockedHolder = std::unique_ptr<cldnn::pointer<uint8_t>>(new cldnn::pointer<uint8_t>(m_memObject->pointer<uint8_t>()));
lockedHolder = std::unique_ptr<cldnn::mem_lock<uint8_t>>(new cldnn::mem_lock<uint8_t>(m_memObject, m_stream));
auto ptr = lockedHolder->data();
_handle = reinterpret_cast<void*>(ptr);
m_allocator.regLockedBlob(_handle, this);
@ -244,7 +226,11 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
}
}
cldnn::device_query device_query(_context_id, _va_device);
// TODO: Parameterize this based on plugin config and compilation options
auto engine_type = cldnn::engine_types::ocl;
auto runtime_type = cldnn::runtime_types::ocl;
// Use actual runtime and engine types
cldnn::device_query device_query(engine_type, runtime_type, _context_id, _va_device);
auto device_map = device_query.get_available_devices();
auto iter = device_map.find(m_config.device_id);
@ -252,28 +238,25 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
{
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecutionContextImpl::Create");
m_engine = std::make_shared<cldnn::engine>(dev,
cldnn::engine_configuration((m_config.useProfiling ||
bool enable_profiling = (m_config.useProfiling ||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_tune_and_cache) ||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_retune_and_cache)),
false,
m_config.dumpCustomKernels,
std::string(),
std::string(),
true,
std::string(),
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_retune_and_cache));
cldnn::queue_types queue_type = cldnn::queue_types::out_of_order;
bool use_unified_shared_memory = true;
m_engine = cldnn::engine::create(engine_type, runtime_type, dev, cldnn::engine_configuration(enable_profiling,
queue_type,
m_config.sources_dumps_dir,
m_config.queuePriority,
m_config.queueThrottle,
m_config.memory_pool_on,
m_config.throughput_streams,
use_unified_shared_memory,
m_config.kernels_cache_dir,
m_config.n_threads));
}
}
ParamMap CLDNNExecutionContextImpl::getParams() const {
ParamMap ret = { { GPU_PARAM_KEY(OCL_CONTEXT), m_engine->get_context() } };
ParamMap ret = { { GPU_PARAM_KEY(OCL_CONTEXT), m_engine->get_user_context() } };
switch (m_type) {
case OCL:

View File

@ -4,15 +4,11 @@
#pragma once
#include <string>
#include <map>
#include <memory>
#include <atomic>
#include <cldnn/runtime/memory.hpp>
#include <cldnn/runtime/engine.hpp>
#include <ie_parameter.hpp>
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
#include "cldnn_config.h"
#include <api/memory.hpp>
#include <api/engine.hpp>
#include "cldnn_common_utils.h"
#ifndef NOMINMAX
@ -25,6 +21,11 @@
# include <gpu/gpu_context_api_va.hpp>
#endif
#include <string>
#include <map>
#include <memory>
#include <atomic>
namespace CLDNNPlugin {
class CLDNNRemoteAllocator;
@ -41,6 +42,7 @@ public:
};
explicit CLDNNRemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context,
cldnn::stream& stream,
const cldnn::layout& layout,
cldnn::shared_handle mem,
cldnn::shared_surface surf,
@ -63,11 +65,12 @@ public:
bool is_allocated() const noexcept;
bool is_locked() const noexcept;
void allocate_if_needed();
cldnn::memory& getMemory() { return *m_memObject; }
cldnn::memory::ptr getMemory() { return m_memObject; }
protected:
static CLDNNRemoteAllocator m_allocator;
std::weak_ptr<InferenceEngine::gpu::ClContext> m_context;
cldnn::stream& m_stream;
// constructor stuff
cldnn::shared_handle m_mem;
@ -77,9 +80,9 @@ protected:
cldnn::layout m_layout;
BlobType m_mem_type;
std::unique_ptr<cldnn::memory> m_memObject;
cldnn::memory::ptr m_memObject;
mutable std::unique_ptr<cldnn::pointer<uint8_t>> lockedHolder;
mutable std::unique_ptr<cldnn::mem_lock<uint8_t>> lockedHolder;
mutable void* _handle;
mutable std::shared_ptr<InferenceEngine::IAllocator> _allocator;
@ -93,13 +96,14 @@ public:
using Ptr = std::shared_ptr<typedCLDNNRemoteBlob>;
explicit typedCLDNNRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context,
cldnn::stream& stream,
const InferenceEngine::TensorDesc& desc,
const cldnn::layout& layout,
cldnn::shared_handle mem,
cldnn::shared_surface surf,
uint32_t plane,
CLDNNRemoteBlobImpl::BlobType mem_type)
: _impl(context, layout, mem, surf, plane, mem_type)
: _impl(context, stream, layout, mem, surf, plane, mem_type)
, TpublicAPI(desc) {}
void allocate() noexcept override { _impl.allocate(); }
@ -231,6 +235,7 @@ public:
}
protected:
// TODO: refactor to unique_ptr
std::shared_ptr<cldnn::engine> m_engine;
InferenceEngine::gpu_handle_param m_va_display;
Config m_config;
@ -267,6 +272,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
using namespace InferenceEngine;
using InferenceEngine::gpu::details::param_map_obj_getter;
InferenceEngine::RemoteBlob::Ptr ret = nullptr;
auto& stream = _impl.GetEngine()->get_program_stream();
uint32_t plane = param_map_obj_getter::_ObjFromParamSimple<uint32_t>(params, GPU_PARAM_KEY(VA_PLANE));
#ifdef _WIN32
cldnn::shared_handle mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
@ -290,11 +296,11 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>
(std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this());
#ifdef _WIN32
ret = std::make_shared<CLDNNRemoteD3DSurface>(smart_this,
ret = std::make_shared<CLDNNRemoteD3DSurface>(smart_this, stream,
tensorDesc, layout, mem, 0, plane,
CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED);
#else
ret = std::make_shared<CLDNNRemoteVASurface>(smart_this,
ret = std::make_shared<CLDNNRemoteVASurface>(smart_this, stream,
tensorDesc, layout, nullptr, surf, plane,
CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED);
#endif
@ -311,6 +317,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
InferenceEngine::RemoteBlob::Ptr ret = nullptr;
_impl.acquire_lock();
auto& stream = _impl.GetEngine()->get_program_stream();
// try to locate previously shared object
auto itr = shared_obj_reg.find(mem);
@ -327,15 +334,15 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
switch (blob_type) {
case CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED:
ret = std::make_shared<CLDNNRemoteCLbuffer>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type);
ret = std::make_shared<CLDNNRemoteCLbuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
break;
case CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED:
layout.format = ImageFormatFromLayout(tensorDesc.getLayout());
ret = std::make_shared<CLDNNRemoteCLImage2D>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type);
ret = std::make_shared<CLDNNRemoteCLImage2D>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
break;
#ifdef _WIN32
case CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED:
ret = std::make_shared<CLDNNRemoteD3DBuffer>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type);
ret = std::make_shared<CLDNNRemoteD3DBuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
break;
#endif
default:
@ -354,7 +361,9 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
CldnnTensorFromIEDims(tensorDesc.getDims()));
auto smart_this = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>
(std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this());
auto& stream = _impl.GetEngine()->get_program_stream();
return std::make_shared<CLDNNRemoteCLbuffer>(smart_this,
stream,
tensorDesc,
layout,
nullptr, 0, 0,

View File

@ -8,7 +8,7 @@
#include "ngraph/op/batch_to_space.hpp"
#include "ngraph/op/constant.hpp"
#include "api/batch_to_space.hpp"
#include "cldnn/primitives/batch_to_space.hpp"
namespace CLDNNPlugin {

View File

@ -8,9 +8,9 @@
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/constant.hpp"
#include "api/broadcast.hpp"
#include "api/reorder.hpp"
#include "api/reshape.hpp"
#include "cldnn/primitives/broadcast.hpp"
#include "cldnn/primitives/reorder.hpp"
#include "cldnn/primitives/reshape.hpp"
namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/concat.hpp"
#include "api/concatenation.hpp"
#include "cldnn/primitives/concatenation.hpp"
namespace CLDNNPlugin {

View File

@ -17,7 +17,7 @@
#include "ngraph/op/variadic_split.hpp"
#include "ngraph/op/util/op_types.hpp"
#include "api/data.hpp"
#include "cldnn/primitives/data.hpp"
namespace CLDNNPlugin {
@ -169,9 +169,10 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
if (bufIter != p.blobMemCache.end()) {
constPrimID = bufIter->second;
} else {
auto mem = cldnn::memory::allocate(p.GetEngine(), constLayout, 0, false);
auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor
auto buf = tmpPointer.data();
cldnn::memory::ptr mem = p.GetEngine().allocate_memory(constLayout, false);
auto& stream = p.GetEngine().get_program_stream();
cldnn::mem_lock<char> lock{mem, stream};
auto buf = lock.data();
auto bufSize = constLayout.bytes_count();
// Do actual weights reorder and change O and I channels order

View File

@ -8,7 +8,7 @@
#include "ngraph/op/convert.hpp"
#include "ngraph/op/convert_like.hpp"
#include "api/reorder.hpp"
#include "cldnn/primitives/reorder.hpp"
namespace CLDNNPlugin {

View File

@ -13,11 +13,11 @@
#include "ngraph/op/fake_quantize.hpp"
#include "ngraph/op/util/op_types.hpp"
#include "api/convolution.hpp"
#include "api/deconvolution.hpp"
#include "api/binary_convolution.hpp"
#include "api/permute.hpp"
#include "api/reorder.hpp"
#include "cldnn/primitives/convolution.hpp"
#include "cldnn/primitives/deconvolution.hpp"
#include "cldnn/primitives/binary_convolution.hpp"
#include "cldnn/primitives/permute.hpp"
#include "cldnn/primitives/reorder.hpp"
namespace CLDNNPlugin {

View File

@ -8,9 +8,9 @@
#include "ngraph/op/ctc_greedy_decoder.hpp"
#include "ngraph/op/ctc_greedy_decoder_seq_len.hpp"
#include "api/ctc_greedy_decoder.hpp"
#include "api/reorder.hpp"
#include "api/mutable_data.hpp"
#include "cldnn/primitives/ctc_greedy_decoder.hpp"
#include "cldnn/primitives/reorder.hpp"
#include "cldnn/primitives/mutable_data.hpp"
#include "transformations/utils/utils.hpp"
@ -58,7 +58,7 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::No
std::size_t num_output = op->get_output_size();
std::vector<cldnn::memory> shared_memory;
std::vector<cldnn::memory::ptr> shared_memory;
if (num_output == 2) {
auto mutable_precision = op->get_output_element_type(1);
if (mutable_precision == ngraph::element::i64) {
@ -70,7 +70,7 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::No
DefaultFormatForDims(op->get_output_shape(1).size()),
CldnnTensorFromIEDims(op->get_output_shape(1)));
shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayout));
shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayout));
cldnn::primitive_id ctc_gd_mutable_id_w = layer_type_name_ID(op) + "_md_write";
auto ctc_gd_mutable_prim = cldnn::mutable_data(ctc_gd_mutable_id_w, shared_memory[0]);

View File

@ -8,7 +8,7 @@
#include "ngraph/op/cum_sum.hpp"
#include "ngraph/op/constant.hpp"
#include "api/cum_sum.hpp"
#include "cldnn/primitives/cum_sum.hpp"
namespace CLDNNPlugin {

View File

@ -9,8 +9,8 @@
#include "ngraph/attribute_visitor.hpp"
#include "ngraph/node.hpp"
#include "api/custom_gpu_primitive.hpp"
#include "api/reorder.hpp"
#include "cldnn/primitives/custom_gpu_primitive.hpp"
#include "cldnn/primitives/reorder.hpp"
namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/depth_to_space.hpp"
#include "api/depth_to_space.hpp"
#include "cldnn/primitives/depth_to_space.hpp"
namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/detection_output.hpp"
#include "api/detection_output.hpp"
#include "cldnn/primitives/detection_output.hpp"
namespace CLDNNPlugin {

View File

@ -25,10 +25,10 @@
#include "ngraph/op/power.hpp"
#include "ngraph/op/floor_mod.hpp"
#include "api/activation.hpp"
#include "api/eltwise.hpp"
#include "api/reorder.hpp"
#include "api/reshape.hpp"
#include "cldnn/primitives/activation.hpp"
#include "cldnn/primitives/eltwise.hpp"
#include "cldnn/primitives/reorder.hpp"
#include "cldnn/primitives/reshape.hpp"
namespace CLDNNPlugin {

View File

@ -9,8 +9,8 @@
#include "ngraph/op/embeddingbag_offsets_sum.hpp"
#include "ngraph/op/embeddingbag_packedsum.hpp"
#include "api/embedding_bag.hpp"
#include "api/reorder.hpp"
#include "cldnn/primitives/embedding_bag.hpp"
#include "cldnn/primitives/reorder.hpp"
#include "transformations/utils/utils.hpp"

View File

@ -7,7 +7,7 @@
#include "ngraph/op/extractimagepatches.hpp"
#include "api/extract_image_patches.hpp"
#include "cldnn/primitives/extract_image_patches.hpp"
namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/fake_quantize.hpp"
#include "api/quantize.hpp"
#include "cldnn/primitives/quantize.hpp"
namespace CLDNNPlugin {

View File

@ -7,8 +7,8 @@
#include "ngraph/op/gather_tree.hpp"
#include "api/gather_tree.hpp"
#include "api/reorder.hpp"
#include "cldnn/primitives/gather_tree.hpp"
#include "cldnn/primitives/reorder.hpp"
namespace CLDNNPlugin {

View File

@ -7,8 +7,8 @@
#include "ngraph/op/gather.hpp"
#include "api/gather.hpp"
#include "api/reorder.hpp"
#include "cldnn/primitives/gather.hpp"
#include "cldnn/primitives/reorder.hpp"
namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/gather_nd.hpp"
#include "ngraph/op/constant.hpp"
#include "api/gather_nd.hpp"
#include "cldnn/primitives/gather_nd.hpp"
namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/grn.hpp"
#include "api/grn.hpp"
#include "cldnn/primitives/grn.hpp"
namespace CLDNNPlugin {

View File

@ -9,7 +9,7 @@
#include "ngraph/op/interpolate.hpp"
#include "ngraph/op/constant.hpp"
#include "api/resample.hpp"
#include "cldnn/primitives/resample.hpp"
namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/lrn.hpp"
#include "ngraph/op/constant.hpp"
#include "api/lrn.hpp"
#include "cldnn/primitives/lrn.hpp"
namespace CLDNNPlugin {

View File

@ -9,11 +9,11 @@
#include "ngraph/op/constant.hpp"
#include "ngraph/op/fake_quantize.hpp"
#include "api/gemm.hpp"
#include "api/fully_connected.hpp"
#include "api/reshape.hpp"
#include "api/reorder.hpp"
#include "api/permute.hpp"
#include "cldnn/primitives/gemm.hpp"
#include "cldnn/primitives/fully_connected.hpp"
#include "cldnn/primitives/reshape.hpp"
#include "cldnn/primitives/reorder.hpp"
#include "cldnn/primitives/permute.hpp"
namespace CLDNNPlugin {

View File

@ -8,7 +8,8 @@
#include "ngraph/op/mvn.hpp"
#include "ngraph/op/constant.hpp"
#include "api/mvn.hpp"
#include "cldnn/primitives/mvn.hpp"
#include <algorithm>
namespace CLDNNPlugin {

View File

@ -9,9 +9,9 @@
#include <ngraph/opsets/opset3.hpp>
#include <ngraph_ops/nms_ie_internal.hpp>
#include "api/reorder.hpp"
#include "api/mutable_data.hpp"
#include "api/non_max_suppression.hpp"
#include "cldnn/primitives/reorder.hpp"
#include "cldnn/primitives/mutable_data.hpp"
#include "cldnn/primitives/non_max_suppression.hpp"
namespace CLDNNPlugin {
@ -62,7 +62,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
std::size_t num_output = op->get_output_size();
std::vector<cldnn::memory> shared_memory;
std::vector<cldnn::memory::ptr> shared_memory;
switch (num_output) {
case 3: {
auto mutable_precision_second = op->get_output_element_type(2);
@ -74,7 +74,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
DefaultFormatForDims(op->get_output_shape(2).size()),
CldnnTensorFromIEDims(op->get_output_shape(2)));
shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayoutSecond));
shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutSecond));
cldnn::primitive_id non_max_supression_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second";
auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second, shared_memory.back());
@ -91,7 +91,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
cldnn::format::bfyx,
cldnn::tensor(outputIndices, 3, 1, 1));
shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayoutFirst));
shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutFirst));
cldnn::primitive_id non_max_supression_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first";
auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first, shared_memory.back());

View File

@ -8,8 +8,8 @@
#include "ngraph/op/normalize_l2.hpp"
#include "ngraph/op/constant.hpp"
#include "api/normalize.hpp"
#include "api/data.hpp"
#include "cldnn/primitives/normalize.hpp"
#include "cldnn/primitives/data.hpp"
namespace CLDNNPlugin {
@ -35,8 +35,8 @@ void CreateNormalizeL2Op(Program& p, const std::shared_ptr<ngraph::op::v0::Norma
// We create fake scale constant and fill it with ones to keep the same behavior as current primitive
auto scale = std::make_shared<ngraph::op::v0::Constant>(op->get_output_element_type(0), ngraph::Shape{1}, std::vector<float>{1.0});
cldnn::layout constLayout = cldnn::layout(DataTypeFromPrecision(op->get_output_element_type(0)), cldnn::format::bfyx, cldnn::tensor{1});
auto mem = cldnn::memory::allocate(p.GetEngine(), constLayout, 0, false);
auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor
auto mem = p.GetEngine().allocate_memory(constLayout, false);
cldnn::mem_lock<int8_t> tmpPointer{mem, p.GetEngine().get_program_stream()};
auto buf = tmpPointer.data();
auto bufSize = scale->get_output_tensor(0).size();

View File

@ -8,7 +8,7 @@
#include "ngraph/op/one_hot.hpp"
#include "api/one_hot.hpp"
#include "cldnn/primitives/one_hot.hpp"
namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/pad.hpp"
#include "api/border.hpp"
#include "cldnn/primitives/border.hpp"
namespace CLDNNPlugin {

View File

@ -7,10 +7,10 @@
#include "ngraph/op/parameter.hpp"
#include "api/input_layout.hpp"
#include "api/reorder.hpp"
#include "api/data.hpp"
#include "api/concatenation.hpp"
#include "cldnn/primitives/input_layout.hpp"
#include "cldnn/primitives/reorder.hpp"
#include "cldnn/primitives/data.hpp"
#include "cldnn/primitives/concatenation.hpp"
using namespace InferenceEngine;
@ -158,8 +158,8 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
if (bufIter != p.blobMemCache.end()) {
meanBlobID = bufIter->second;
} else {
auto mem = cldnn::memory::allocate(p.GetEngine(), meanBlobLayout, 0, false);
auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor
auto mem = p.GetEngine().allocate_memory(meanBlobLayout, false);
cldnn::mem_lock<int8_t> tmpPointer{ mem, p.GetEngine().get_program_stream() };
auto buf = tmpPointer.data();
auto bufSize = meanBlobLayout.bytes_count();

View File

@ -8,7 +8,7 @@
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/avg_pool.hpp"
#include "api/pooling.hpp"
#include "cldnn/primitives/pooling.hpp"
namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/prior_box.hpp"
#include "ngraph/op/prior_box_clustered.hpp"
#include "api/prior_box.hpp"
#include "cldnn/primitives/prior_box.hpp"
namespace CLDNNPlugin {

View File

@ -7,8 +7,8 @@
#include "ngraph/op/proposal.hpp"
#include "api/proposal.hpp"
#include "api/mutable_data.hpp"
#include "cldnn/primitives/proposal.hpp"
#include "cldnn/primitives/mutable_data.hpp"
namespace CLDNNPlugin {
@ -62,7 +62,7 @@ void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::Proposal
DefaultFormatForDims(op->get_output_shape(1).size()),
CldnnTensorFromIEDims(op->get_output_shape(1)));
auto shared_memory = cldnn::memory::allocate(p.GetEngine(), mutableLayout);
auto shared_memory = p.GetEngine().allocate_memory(mutableLayout);
cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(op) + "_md_write";
auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w, shared_memory);

View File

@ -16,9 +16,9 @@
#include "ngraph/op/max.hpp"
#include "ngraph/op/constant.hpp"
#include "api/reduce.hpp"
#include "api/reorder.hpp"
#include "api/reshape.hpp"
#include "cldnn/primitives/reduce.hpp"
#include "cldnn/primitives/reorder.hpp"
#include "cldnn/primitives/reshape.hpp"
namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/region_yolo.hpp"
#include "api/region_yolo.hpp"
#include "cldnn/primitives/region_yolo.hpp"
namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/reorg_yolo.hpp"
#include "api/reorg_yolo.hpp"
#include "cldnn/primitives/reorg_yolo.hpp"
namespace CLDNNPlugin {

View File

@ -9,8 +9,8 @@
#include "ngraph/op/squeeze.hpp"
#include "ngraph/op/unsqueeze.hpp"
#include "api/reshape.hpp"
#include "api/reorder.hpp"
#include "cldnn/primitives/reshape.hpp"
#include "cldnn/primitives/reorder.hpp"
namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/result.hpp"
#include "api/reorder.hpp"
#include "cldnn/primitives/reorder.hpp"
using namespace InferenceEngine;

View File

@ -7,7 +7,7 @@
#include "ngraph/op/reverse_sequence.hpp"
#include "api/reverse_sequence.hpp"
#include "cldnn/primitives/reverse_sequence.hpp"
namespace CLDNNPlugin {

View File

@ -8,12 +8,12 @@
#include "ngraph/op/lstm_cell.hpp"
#include "ngraph/op/lstm_sequence.hpp"
#include "api/reshape.hpp"
#include "api/reorder.hpp"
#include "api/fully_connected.hpp"
#include "api/lstm.hpp"
#include "api/crop.hpp"
#include "api/concatenation.hpp"
#include "cldnn/primitives/reshape.hpp"
#include "cldnn/primitives/reorder.hpp"
#include "cldnn/primitives/fully_connected.hpp"
#include "cldnn/primitives/lstm.hpp"
#include "cldnn/primitives/crop.hpp"
#include "cldnn/primitives/concatenation.hpp"
namespace CLDNNPlugin {
cldnn::activation_func GetActivationFunc(std::string name) {

View File

@ -9,7 +9,7 @@
#include "ngraph/op/psroi_pooling.hpp"
#include "ngraph/op/deformable_psroi_pooling.hpp"
#include "api/roi_pooling.hpp"
#include "cldnn/primitives/roi_pooling.hpp"
namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/scatter_elements_update.hpp"
#include "ngraph/op/constant.hpp"
#include "api/scatter_elements_update.hpp"
#include "cldnn/primitives/scatter_elements_update.hpp"
namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/scatter_nd_update.hpp"
#include "ngraph/op/constant.hpp"
#include "api/scatter_nd_update.hpp"
#include "cldnn/primitives/scatter_nd_update.hpp"
namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/scatter_update.hpp"
#include "ngraph/op/constant.hpp"
#include "api/scatter_update.hpp"
#include "cldnn/primitives/scatter_update.hpp"
namespace CLDNNPlugin {

View File

@ -7,9 +7,9 @@
#include "ngraph/op/select.hpp"
#include "api/select.hpp"
#include "api/reorder.hpp"
#include "api/reshape.hpp"
#include "cldnn/primitives/select.hpp"
#include "cldnn/primitives/reorder.hpp"
#include "cldnn/primitives/reshape.hpp"
namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/shuffle_channels.hpp"
#include "api/shuffle_channels.hpp"
#include "cldnn/primitives/shuffle_channels.hpp"
namespace CLDNNPlugin {

View File

@ -8,8 +8,8 @@
#include "ngraph/op/softmax.hpp"
#include "ngraph/op/log_softmax.hpp"
#include "api/softmax.hpp"
#include "api/activation.hpp"
#include "cldnn/primitives/softmax.hpp"
#include "cldnn/primitives/activation.hpp"
namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/space_to_batch.hpp"
#include "ngraph/op/constant.hpp"
#include "api/space_to_batch.hpp"
#include "cldnn/primitives/space_to_batch.hpp"
namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/space_to_depth.hpp"
#include "api/space_to_depth.hpp"
#include "cldnn/primitives/space_to_depth.hpp"
namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/split.hpp"
#include "ngraph/op/variadic_split.hpp"
#include "api/crop.hpp"
#include "cldnn/primitives/crop.hpp"
namespace CLDNNPlugin {

View File

@ -8,9 +8,9 @@
#include "ngraph/op/strided_slice.hpp"
#include "ngraph/op/constant.hpp"
#include "api/strided_slice.hpp"
#include "api/reshape.hpp"
#include "api/crop.hpp"
#include "cldnn/primitives/strided_slice.hpp"
#include "cldnn/primitives/reshape.hpp"
#include "cldnn/primitives/crop.hpp"
namespace CLDNNPlugin {

View File

@ -13,11 +13,11 @@
#include "ngraph/op/constant.hpp"
#include "ngraph/op/util/sub_graph_base.hpp"
#include "api/loop.hpp"
#include "api/mutable_data.hpp"
#include "api/data.hpp"
#include "api/reorder.hpp"
#include "api/topology.hpp"
#include "cldnn/primitives/loop.hpp"
#include "cldnn/primitives/mutable_data.hpp"
#include "cldnn/primitives/data.hpp"
#include "cldnn/primitives/reorder.hpp"
#include "cldnn/graph/topology.hpp"
#include <vector>
#include <algorithm>
@ -28,9 +28,8 @@ namespace CLDNNPlugin {
template<class DATA_TYPE>
static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) {
auto mem = cldnn::memory::allocate(p.GetEngine(),
{ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
auto ptr = mem.pointer<int64_t>();
auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
cldnn::mem_lock<int64_t> ptr{mem, p.GetEngine().get_program_stream()};
*ptr.begin() = num;
return {id, mem};
}
@ -42,7 +41,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha
const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size());
const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx));
cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
auto mem = cldnn::memory::allocate(p.GetEngine(), output_layout);
auto mem = p.GetEngine().allocate_memory(output_layout);
auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency
return md;
}

View File

@ -7,7 +7,7 @@
#include "ngraph/op/tile.hpp"
#include "api/tile.hpp"
#include "cldnn/primitives/tile.hpp"
namespace CLDNNPlugin {

View File

@ -7,8 +7,8 @@
#include "ngraph/op/topk.hpp"
#include "api/arg_max_min.hpp"
#include "api/mutable_data.hpp"
#include "cldnn/primitives/arg_max_min.hpp"
#include "cldnn/primitives/mutable_data.hpp"
namespace CLDNNPlugin {
@ -71,7 +71,7 @@ void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>& op) {
DefaultFormatForDims(op->get_output_shape(1).size()),
CldnnTensorFromIEDims(op->get_output_shape(1)));
auto shared_memory = cldnn::memory::allocate(p.GetEngine(), mutableLayout);
auto shared_memory = p.GetEngine().allocate_memory(mutableLayout);
cldnn::primitive_id argmax_mutable_id_w = layer_type_name_ID(op) + "_md_write";
auto argmax_mutable_prim = cldnn::mutable_data(argmax_mutable_id_w, shared_memory);

View File

@ -8,7 +8,7 @@
#include "ngraph/op/transpose.hpp"
#include "ngraph/op/constant.hpp"
#include "api/permute.hpp"
#include "cldnn/primitives/permute.hpp"
namespace CLDNNPlugin {

View File

@ -41,7 +41,7 @@
#include "ngraph/op/hsigmoid.hpp"
#include "ngraph/op/round.hpp"
#include "api/activation.hpp"
#include "cldnn/primitives/activation.hpp"
namespace CLDNNPlugin {

View File

@ -52,13 +52,6 @@ inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
if (!GNAPluginNS::LayerInfo(layer).isSyntheticScaleShift())
return false;
// Don't reshape the first dnn layer since it breaks groups recognition
auto prevLayer = InferenceEngine::CNNNetPrevLayerSkipCertain(layer, 0, [](InferenceEngine::CNNLayerPtr ptr) {
return LayerInfo(ptr).isNonValuesChangable();
});
IE_ASSERT(prevLayer != nullptr);
if (LayerInfo(prevLayer).isInput()) return false;
// Don't reshape diagonallayers with bias connection
return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
}

View File

@ -85,8 +85,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
return LayerInfo(ptr).isNonValuesChangable();
});
IE_ASSERT(inputLayer != nullptr);
size_t weightsSize = (LayerInfo(prevLayer).has32BOutput() || LayerInfo(inputLayer).isInput()) ?
nextLayer->outData[0]->getDims().back() :
size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() :
Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1];
std::vector<float> weightsValues(weightsSize, fillValue);
IE_ASSERT(diagLayer != nullptr);

View File

@ -42,7 +42,7 @@ static int32_t as_int32_t(T v) {
}
class OstreamHashWrapper final: public std::streambuf {
std::size_t m_res = {};
std::size_t m_res = 0;
public:
std::size_t getResult() const { return m_res; }
std::streamsize xsputn(const char* s, std::streamsize n) override {
@ -65,7 +65,7 @@ public:
//////////////////////////////////////////////////
std::string NetworkCompilationContext::calculateFileInfo(const std::string& filePath) {
size_t seed {};
size_t seed = 0;
auto absPath = filePath;
try {
absPath = FileUtils::absoluteFilePath(filePath);

View File

@ -270,6 +270,12 @@ template <typename T, typename... Args>
std::shared_ptr<Node> fold_reshape(Args&&... args) {
std::shared_ptr<Node> node = std::make_shared<T>(std::forward<Args>(args)...);
if (node->get_output_size() == 1) {
// issue #57985: remove fold_reshape & reuse nGraph implementation
const auto values = as_type_ptr<opset1::Constant>(node->input_value(1).get_node_shared_ptr())->template cast_vector<int64_t>();
if (std::any_of(values.begin(), values.end(), [](const int64_t value) { return (value == 0) || (value == -1); })) {
return fold<opset1::Reshape>(std::forward<Args>(args)...);
}
OutputVector folded;
if (is_type<opset1::Constant>(node->input_value(0).get_node_shared_ptr()) &&
is_type<opset1::Constant>(node->input_value(1).get_node_shared_ptr())) {

View File

@ -683,7 +683,7 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
auto levels_1 = fq->get_levels() - 1.f;
const size_t DHW = D * H * W;
const size_t IDHW = IC * D * H * W;
const size_t IDHW = outChannelsShapeIndex == 0 ? IC * D * H * W : OC * D * H * W;
const auto values = constant->cast_vector<float>();
std::vector<float> quantizedValues(OC * IC * D * H * W);

View File

@ -106,7 +106,6 @@ void jit_load_emitter::emit_isa(const Xbyak::Reg64 &reg_src, int offset_byte, In
break;
case Precision::I32:
if ((src_prc == Precision::FP32) || (src_prc == Precision::BF16)) {
h->uni_vroundps(Vmm(out_vec_idx), Vmm(out_vec_idx), 3); // rounding to zero
h->uni_vcvtps2dq(Vmm(out_vec_idx), Vmm(out_vec_idx));
}
break;
@ -511,6 +510,11 @@ size_t jit_store_emitter::aux_vecs_count() const {
size_t jit_store_emitter::get_inputs_num() const { return 1; }
void jit_store_emitter::emit_data() const {
if (emu_vcvtneps2bf16)
emu_vcvtneps2bf16->emit_data();
}
void jit_store_emitter::emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emit_context) const {
@ -552,7 +556,6 @@ template <mkldnn::impl::cpu::x64::cpu_isa_t isa>
switch (src_prc) {
case Precision::FP32:
if ((dst_prc != Precision::FP32) && (dst_prc != Precision::BF16)) {
h->uni_vroundps(Vmm(in_vec_idx), Vmm(in_vec_idx), 3); // rounding to zero
h->uni_vcvtps2dq(Vmm(in_vec_idx), Vmm(in_vec_idx));
}
break;

View File

@ -18,8 +18,8 @@ struct load_emitter_context : public emitter_context {
load_emitter_context() : src_prc_(Precision::FP32), dst_prc_(Precision::FP32), load_num_(8),
offset_byte_(0), is_fill_(false), fill_value_("zero") {}
load_emitter_context(Precision src_prc, Precision dst_prc, int load_num, bool is_fill = false, std::string fill_value = "zero", int offset_byte = 0):
src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), is_fill_(is_fill), fill_value_(fill_value), offset_byte_(offset_byte) {}
load_emitter_context(Precision src_prc, Precision dst_prc, int load_num, int offset_byte = 0, bool is_fill = false, std::string fill_value = "zero"):
src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), offset_byte_(offset_byte), is_fill_(is_fill), fill_value_(fill_value) {}
int offset_byte_;
int load_num_;
@ -124,6 +124,8 @@ public:
size_t get_inputs_num() const override;
void emit_data() const override;
std::shared_ptr<jit_emu_vcvtneps2bf16> get_emu_vcvtneps2bf16() const {
return emu_vcvtneps2bf16;
}

View File

@ -306,7 +306,7 @@ private:
inline void worker_tail_planar() {
Precision dst_prc = isFloatCompatible(jcp_.src_prc) ? Precision::FP32 : Precision::I32;
load_emitter->emit_code({static_cast<size_t>(reg_src.getIdx())}, {static_cast<size_t>(vmm_val.getIdx())},
std::make_shared<load_emitter_context>(jcp_.src_prc, dst_prc, tail_num, true, "zero"),
std::make_shared<load_emitter_context>(jcp_.src_prc, dst_prc, tail_num, 0, true),
{}, {load_pool_gpr_idxs});
if (jcp_.normalize_variance) {
@ -477,8 +477,7 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator
this->postamble();
load_emitter->emit_data();
if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core) && store_emitter != nullptr && store_emitter->get_emu_vcvtneps2bf16() != nullptr)
store_emitter->get_emu_vcvtneps2bf16()->emit_data();
store_emitter->emit_data();
for (auto& inj : eltwise_injectors)
inj->prepare_table();

View File

@ -88,8 +88,7 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi
this->postamble();
load_emitter->emit_data();
if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core) && store_emitter != nullptr && store_emitter->get_emu_vcvtneps2bf16() != nullptr)
store_emitter->get_emu_vcvtneps2bf16()->emit_data();
store_emitter->emit_data();
}
private:
@ -155,7 +154,7 @@ private:
Vmm vmm_max = get_acc_reg(i);
load_emitter->emit_code({static_cast<size_t>(reg_input.getIdx())}, {static_cast<size_t>(vmm_max.getIdx())},
std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, false, "zero", i * src_c_off),
std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, i * src_c_off),
{}, load_pool_gpr_idxs);
}
@ -169,7 +168,7 @@ private:
Vmm vmm_src = get_src_reg(i);
load_emitter->emit_code({static_cast<size_t>(aux_reg_input1.getIdx())}, {static_cast<size_t>(vmm_src.getIdx())},
std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, false, "zero", i * src_c_off),
std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, i * src_c_off),
{}, load_pool_gpr_idxs);
if (isa == cpu::x64::sse41) {
@ -222,7 +221,7 @@ private:
for (int i = 0; i < c_blocks; i++) {
const int src_c_off = i * jpp_.ih * jpp_.iw * jpp_.c_block * jpp_.src_data_size;
const auto load_context = std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, false, "zero", src_c_off);
const auto load_context = std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, src_c_off);
mov(aux_reg_input, reg_input);

View File

@ -12,9 +12,5 @@
NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0);
bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr<ngraph::Function> f) {
ngraph::pass::Manager m(get_pass_config());
m.register_pass<Pruning>();
m.run_passes(f);
return false;
}

View File

@ -90,21 +90,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[],
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
}
void calculate_nv12_to_rgb(const uchar **srcY,
const uchar *srcUV,
uchar **dstRGBx,
int width) {
calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width);
}
void calculate_i420_to_rgb(const uchar **srcY,
const uchar *srcU,
const uchar *srcV,
uchar **dstRGBx,
int width) {
calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width);
}
void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz,
const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap,
int xmaxdf, const short xindex[], const Q0_16 xalpha[],
@ -119,14 +104,6 @@ void calcRowArea_32F(float dst[], const float *src[], const Size& inSz,
calcRowArea_impl(dst, src, inSz, outSz, yalpha, ymap, xmaxdf, xindex, xalpha, vbuf);
}
void copyRow_8U(const uint8_t in[], uint8_t out[], int length) {
copyRow_8U_impl(in, out, length);
}
void copyRow_32F(const float in[], float out[], int length) {
copyRow_32F_impl(in, out, length);
}
// Resize (bi-linear, 32F)
void calcRowLinear_32F(float* dst[],
const float* src0[],
@ -708,6 +685,14 @@ void calcRowLinear_8UC1(uint8_t* dst[],
}
}
} // namespace neon
template void chanToPlaneRowImpl(neon_tag, const uint8_t* in, int chan, int chs, uint8_t* out, const int length);
template void chanToPlaneRowImpl(neon_tag, const float* in, int chan, int chs, float * out, const int length);
template void nv12ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
template void i420ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels
} // namespace gapi
} // namespace InferenceEngine

View File

@ -167,26 +167,31 @@ void splitRow_32FC4(const float in[],
float out3[],
int length);
void calculate_nv12_to_rgb(const uchar **srcY,
const uchar *srcUV,
uchar **dstRGBx,
int width);
void calculate_i420_to_rgb(const uchar **srcY,
const uchar *srcU,
const uchar *srcV,
uchar **dstRGBx,
int width);
void copyRow_8U(const uint8_t in[],
uint8_t out[],
int length);
void copyRow_32F(const float in[],
float out[],
int length);
} // namespace neon
template<typename isa_tag_t, typename T>
void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs, T* out, const int length);
extern template void chanToPlaneRowImpl(neon_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
extern template void chanToPlaneRowImpl(neon_tag, const float* in, const int chan, const int chs, float * out, const int length);
template<typename isa_tag_t>
void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
extern template void nv12ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
template<typename isa_tag_t>
void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
extern template void i420ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels
} // namespace gapi
} // namespace InferenceEngine

View File

@ -107,21 +107,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[],
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
}
void calculate_nv12_to_rgb(const uchar **srcY,
const uchar *srcUV,
uchar **dstRGBx,
int width) {
calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width);
}
void calculate_i420_to_rgb(const uchar **srcY,
const uchar *srcU,
const uchar *srcV,
uchar **dstRGBx,
int width) {
calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width);
}
void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz,
const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap,
int xmaxdf, const short xindex[], const Q0_16 xalpha[],
@ -555,13 +540,6 @@ void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
}
void copyRow_8U(const uint8_t in[], uint8_t out[], int length) {
copyRow_8U_impl(in, out, length);
}
void copyRow_32F(const float in[], float out[], int length) {
copyRow_32F_impl(in, out, length);
}
void calcRowLinear_32F(float *dst[],
const float *src0[],
const float *src1[],
@ -575,6 +553,15 @@ void calcRowLinear_32F(float *dst[],
}
} // namespace avx
template void chanToPlaneRowImpl(avx2_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
template void chanToPlaneRowImpl(avx2_tag, const float* in, const int chan, const int chs, float* out, const int length);
template void nv12ToRgbRowImpl(avx2_tag, const uint8_t** y_rows, const uint8_t* uv_row,
uint8_t** out_rows, const int buf_width);
template void i420ToRgbRowImpl(avx2_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels
} // namespace gapi
} // namespace InferenceEngine

View File

@ -181,27 +181,29 @@ void splitRow_32FC4(const float in[],
float out2[],
float out3[],
int length);
void calculate_nv12_to_rgb(const uchar **srcY,
const uchar *srcUV,
uchar **dstRGBx,
int width);
void calculate_i420_to_rgb(const uchar **srcY,
const uchar *srcU,
const uchar *srcV,
uchar **dstRGBx,
int width);
void copyRow_8U(const uint8_t in[],
uint8_t out[],
int length);
void copyRow_32F(const float in[],
float out[],
int length);
} // namespace avx
template<typename isa_tag_t, typename T>
void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs, T* out, const int length);
extern template void chanToPlaneRowImpl(avx2_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
extern template void chanToPlaneRowImpl(avx2_tag, const float* in, const int chan, const int chs, float * out, const int length);
template<typename isa_tag_t>
void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row,
uint8_t** out_rows, const int buf_width);
extern template void nv12ToRgbRowImpl(avx2_tag, const uint8_t** y_rows,
const uint8_t* uv_row, uint8_t** out_rows,
const int buf_width);
template<typename isa_tag_t>
void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
extern template void i420ToRgbRowImpl(avx2_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels
} // namespace gapi
} // namespace InferenceEngine

View File

@ -101,21 +101,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[],
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
}
void calculate_nv12_to_rgb(const uchar **srcY,
const uchar *srcUV,
uchar **dstRGBx,
int width) {
calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width);
}
void calculate_i420_to_rgb(const uchar **srcY,
const uchar *srcU,
const uchar *srcV,
uchar **dstRGBx,
int width) {
calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width);
}
void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz,
const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap,
int xmaxdf, const short xindex[], const Q0_16 xalpha[],
@ -636,14 +621,6 @@ void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
}
void copyRow_8U(const uint8_t in[], uint8_t out[], int length) {
copyRow_8U_impl(in, out, length);
}
void copyRow_32F(const float in[], float out[], int length) {
copyRow_32F_impl(in, out, length);
}
void calcRowLinear_32F(float *dst[],
const float *src0[],
const float *src1[],
@ -657,6 +634,14 @@ void calcRowLinear_32F(float *dst[],
}
} // namespace avx512
template void chanToPlaneRowImpl(avx512_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
template void chanToPlaneRowImpl(avx512_tag, const float* in, const int chan, const int chs, float* out, const int length);
template void nv12ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
template void i420ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels
} // namespace gapi
} // namespace InferenceEngine

View File

@ -180,27 +180,26 @@ void splitRow_32FC4(const float in[],
float out2[],
float out3[],
int length);
void calculate_nv12_to_rgb(const uchar **srcY,
const uchar *srcUV,
uchar **dstRGBx,
int width);
void calculate_i420_to_rgb(const uchar **srcY,
const uchar *srcU,
const uchar *srcV,
uchar **dstRGBx,
int width);
void copyRow_8U(const uint8_t in[],
uint8_t out[],
int length);
void copyRow_32F(const float in[],
float out[],
int length);
} // namespace avx512
template<typename isa_tag_t, typename T>
void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs, T* out, const int length);
extern template void chanToPlaneRowImpl(avx512_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
extern template void chanToPlaneRowImpl(avx512_tag, const float* in, const int chan, const int chs, float* out, const int length);
template<typename isa_tag_t>
void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
extern template void nv12ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
template<typename isa_tag_t>
void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
extern template void i420ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels
} // namespace gapi
} // namespace InferenceEngine

View File

@ -1365,33 +1365,13 @@ void splitRow_32FC4(const float in[],
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
}
void calculate_nv12_to_rgb(const uchar **srcY,
const uchar *srcUV,
uchar **dstRGBx,
int width) {
calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width);
}
template void chanToPlaneRowImpl(sse42_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
template void chanToPlaneRowImpl(sse42_tag, const float* in, const int chan, const int chs, float* out, const int length);
void calculate_i420_to_rgb(const uchar **srcY,
const uchar *srcU,
const uchar *srcV,
uchar **dstRGBx,
int width) {
calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width);
}
void copyRow_8U(const uint8_t in[],
uint8_t out[],
int length) {
copyRow_8U_impl(in, out, length);
}
void copyRow_32F(const float in[],
float out[],
int length) {
copyRow_32F_impl(in, out, length);
}
template void nv12ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
template void i420ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels
} // namespace gapi
} // namespace InferenceEngine

View File

@ -180,25 +180,25 @@ void splitRow_32FC4(const float in[],
float out3[],
int length);
void calculate_nv12_to_rgb(const uchar **srcY,
const uchar *srcUV,
uchar **dstRGBx,
int width);
template<typename isa_tag_t, typename T>
void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs,
T* out, const int length);
void calculate_i420_to_rgb(const uchar **srcY,
const uchar *srcU,
const uchar *srcV,
uchar **dstRGBx,
int width);
extern template void chanToPlaneRowImpl(sse42_tag, const uint8_t* in, const int chan,
const int chs, uint8_t* out, const int length);
extern template void chanToPlaneRowImpl(sse42_tag, const float* in, const int chan,
const int chs, float* out, const int length);
template<typename isa_tag_t>
void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
void copyRow_8U(const uint8_t in[],
uint8_t out[],
int length);
extern template void nv12ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
void copyRow_32F(const float in[],
float out[],
int length);
template<typename isa_tag_t>
void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
extern template void i420ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels
} // namespace gapi
} // namespace InferenceEngine

View File

@ -468,15 +468,86 @@ struct type_to_type {};
template <typename typelist>
struct type_dispatch_impl;
//FIXME: add test for type_dispatch
template <template<typename ...> class typelist, typename... type>
struct type_dispatch_impl<typelist<type...>> {
template <typename result_t, typename default_t, typename type_id_t, typename type_to_id_t, typename type_to_value_t>
static result_t dispatch(type_id_t type_id, type_to_id_t&& type_to_id, type_to_value_t&& type_to_value, default_t default_value) {
result_t res = default_value;
std::initializer_list<int> ({(type_id == type_to_id(type_to_type<type>{}) ? (res = type_to_value(type_to_type<type>{})), 0 : 0)...});
bool matched = false;
std::initializer_list<int> ({
!matched && (type_id == type_to_id(type_to_type<type>{})) ?
(matched = true, res = type_to_value(type_to_type<type>{})), 0
: 0
...
});
return res;
}
template <typename result_t, typename default_t, typename pred_t, typename type_to_value_t>
static result_t dispatch(pred_t&& pred, type_to_value_t&& type_to_value, default_t default_value) {
result_t res = default_value;
bool matched = false;
std::initializer_list<int> ({
!matched && pred(type_to_type<type>{}) ?
(matched = true, res = type_to_value(type_to_type<type>{})), 0
: 0
...
});
return res;
}
};
template<typename left_typelsist, typename right_typelsist>
struct concat;
template<typename left_typelsist, typename right_typelsist>
using concat_t = typename concat<left_typelsist, right_typelsist>::type;
template<template<typename ...> class left_list, typename ... left_types, template<typename ...> class right_list, typename ... right_types>
struct concat<left_list<left_types...>, right_list<right_types...>>{
using type = left_list<left_types... , right_types...>;
};
template< class T, class U >
using is_same_t = typename std::is_same<T, U>::type;
template<bool C, class T, class E> struct if_c_impl;
template<class T, class E> struct if_c_impl<true, T, E> {
using type = T;
};
template<class T, class E> struct if_c_impl<false, T, E> {
using type = E;
};
template<bool C, class T, class E>
using if_c = typename if_c_impl<C, T, E>::type;
template<class C, class T, class E>
using if_ = typename if_c_impl<C::value != 0, T, E>::type;
template<typename typelist, typename type>
struct remove;
template<typename typelist, typename type>
using remove_t = typename remove<typelist, type>::type;
template<template<typename ...> class list, typename head_t, typename ... types, typename t>
struct remove<list<head_t, types...>, t> {
using type = concat_t<
if_<is_same_t<head_t, t>, list<>, list<head_t>>,
remove_t<list<types...>, t>
>;
};
template<template<typename ...> class list, typename t>
struct remove<list<>, t> {
using type = list<>;
};
} // namespace
@ -490,6 +561,13 @@ result_t type_dispatch(type_id_t type_id, type_to_id_t&& type_to_id, type_to_val
std::forward<default_t>(default_value));
}
template <typename typelist, typename default_t, typename pred_t, typename type_to_value_t,
typename result_t = decltype(std::declval<type_to_value_t>()(type_to_type<head_t<typelist>> {}))>
result_t type_dispatch(pred_t&& pred, type_to_value_t&& type_to_value, default_t default_value = {}) {
return type_dispatch_impl<typelist>::template dispatch<result_t>(std::forward<pred_t>(pred),
std::forward<type_to_value_t>(type_to_value),
std::forward<default_t>(default_value));
}
namespace {
struct cv_type_id {
@ -668,81 +746,47 @@ GAPI_FLUID_KERNEL(FSplit4, Split4, false) {
};
//----------------------------------------------------------------------
template<typename T>
static void chanToPlaneRow(const uint8_t* in, int chan, int chs, uint8_t* out, int length) {
// AVX512 implementation of wide universal intrinsics is slower than AVX2.
// It is turned off until the cause isn't found out.
#if 0
using isas_set = typelist<
#ifdef HAVE_AVX512
if (with_cpu_x86_avx512f()) {
if (std::is_same<T, uint8_t>::value && chs == 1) {
avx512::copyRow_8U(in, out, length);
return;
}
if (std::is_same<T, float>::value && chs == 1) {
avx512::copyRow_32F(reinterpret_cast<const float*>(in),
reinterpret_cast<float*>(out),
length);
return;
}
}
#endif // HAVE_AVX512
avx512_tag,
#endif
#ifdef HAVE_AVX2
avx2_tag,
#endif
#ifdef HAVE_SSE
sse42_tag,
#endif
#ifdef HAVE_NEON
neon_tag,
#endif
//scalar "ISA" have to be the last one in the list,
//as the search for supported ISA is performed until first match
scalar_tag>;
#ifdef HAVE_AVX512
bool is_present(avx512_tag) { return with_cpu_x86_avx512f(); }
#endif // HAVE_AVX512
#ifdef HAVE_AVX2
if (with_cpu_x86_avx2()) {
if (std::is_same<T, uint8_t>::value && chs == 1) {
avx::copyRow_8U(in, out, length);
return;
}
if (std::is_same<T, float>::value && chs == 1) {
avx::copyRow_32F(reinterpret_cast<const float*>(in),
reinterpret_cast<float*>(out),
length);
return;
}
}
bool is_present(avx2_tag) { return with_cpu_x86_avx2(); }
#endif // HAVE_AVX2
#ifdef HAVE_SSE
if (with_cpu_x86_sse42()) {
if (std::is_same<T, uint8_t>::value && chs == 1) {
copyRow_8U(in, out, length);
return;
}
if (std::is_same<T, float>::value && chs == 1) {
copyRow_32F(reinterpret_cast<const float*>(in),
reinterpret_cast<float*>(out),
length);
return;
}
}
#ifdef HAVE_SSE
bool is_present(sse42_tag) { return with_cpu_x86_sse42(); }
#endif // HAVE_SSE
#ifdef HAVE_NEON
if (std::is_same<T, uint8_t>::value && chs == 1) {
neon::copyRow_8U(in, out, length);
return;
}
if (std::is_same<T, float>::value && chs == 1) {
neon::copyRow_32F(reinterpret_cast<const float*>(in),
reinterpret_cast<float*>(out),
length);
return;
}
bool is_present(neon_tag) { return true; }
#endif // HAVE_NEON
const auto inT = reinterpret_cast<const T*>(in);
auto outT = reinterpret_cast< T*>(out);
//scalar version of kernels is always available
bool is_present(scalar_tag) { return true; }
for (int x = 0; x < length; x++) {
outT[x] = inT[x*chs + chan];
}
struct is_isa_present {
template< typename isa_tag_t>
bool operator()(type_to_type<isa_tag_t>) {
return is_present(isa_tag_t{});
}
};
// GAPI_OCV_KERNEL(OCVChanToPlane, ChanToPlane) {
// static void run(const cv::Mat &in, int chan, cv::Mat &out) {
@ -774,15 +818,225 @@ static void chanToPlaneRow(const uint8_t* in, int chan, int chs, uint8_t* out, i
// }
// };
namespace {
using chan_to_plane_supported_types = typelist<uint8_t, float>;
template<typename T>
void chanToPlaneRowImpl(scalar_tag, const T* in, int chan, int chs, T* out, int length) {
for (int x = 0; x < length; x++) {
out[x] = in[x*chs + chan];
}
}
template<typename isa_tag_t>
struct typed_chan_to_plane_row {
using p_f = void (*)(const uint8_t* in, int chan, int chs, uint8_t* out, int length);
template <typename type>
p_f operator()(type_to_type<type> ) {
return [](const uint8_t* in, int chan, int chs, uint8_t* out, int length){
const auto inT = reinterpret_cast<const type*>(in);
auto outT = reinterpret_cast< type*>(out);
chanToPlaneRowImpl(isa_tag_t{}, inT, chan, chs, outT, length);
};
}
};
} //namespace
namespace {
using nv12_to_rgb_supported_types = typelist<uint8_t>;
void nv12ToRgbRowImpl(scalar_tag, const uint8_t** y_rows, const uint8_t* uv_row,
uint8_t** out_rows, const int buf_width) {
for (int i = 0; i < buf_width; i += 2) {
uint8_t u = uv_row[i];
uint8_t v = uv_row[i + 1];
int ruv, guv, buv;
uvToRGBuv(u, v, ruv, guv, buv);
for (int y = 0; y < 2; y++) {
for (int x = 0; x < 2; x++) {
uint8_t vy = y_rows[y][i + x];
uint8_t r, g, b;
yRGBuvToRGB(vy, ruv, guv, buv, r, g, b);
out_rows[y][3 * (i + x)] = r;
out_rows[y][3 * (i + x) + 1] = g;
out_rows[y][3 * (i + x) + 2] = b;
}
}
}
}
template<typename isa_tag_t>
struct typed_nv12_to_rgb_row {
using p_f = void (*)(const uint8_t** y_rows, const uint8_t* uv_row,
uint8_t** out_rows, const int buf_width);
template <typename type>
p_f operator()(type_to_type<type>) {
return [](const uint8_t** y_rows, const uint8_t* uv_row,
uint8_t** out_rows, const int buf_width) {
const auto inT1 = reinterpret_cast<const type**>(y_rows);
const auto inT2 = reinterpret_cast<const type*>(uv_row);
auto outT = reinterpret_cast<type**>(out_rows);
nv12ToRgbRowImpl(isa_tag_t{}, inT1, inT2, outT, buf_width);
};
}
};
} // namespace
namespace {
using i420_to_rgb_supported_types = typelist<uint8_t>;
static void i420ToRgbRowImpl(scalar_tag, const uint8_t** y_rows,
const uint8_t* u_row,
const uint8_t* v_row,
uint8_t** out_rows,
const int buf_width) {
for (int i = 0; i < buf_width; i += 2) {
uchar u = u_row[i / 2];
uchar v = v_row[i / 2];
int ruv, guv, buv;
uvToRGBuv(u, v, ruv, guv, buv);
for (int y = 0; y < 2; y++) {
for (int x = 0; x < 2; x++) {
uchar vy = y_rows[y][i + x];
uchar r, g, b;
yRGBuvToRGB(vy, ruv, guv, buv, r, g, b);
out_rows[y][3 * (i + x)] = r;
out_rows[y][3 * (i + x) + 1] = g;
out_rows[y][3 * (i + x) + 2] = b;
}
}
}
}
template<typename isa_tag_t>
struct typed_i420_to_rgb_row {
using p_f = void (*)(const uint8_t** y_rows, const uint8_t* u_row, const uint8_t* v_row,
uint8_t** out_rows, const int buf_width);
template <typename type>
p_f operator()(type_to_type<type>) {
return [](const uint8_t** y_rows, const uint8_t* u_row, const uint8_t* v_row,
uint8_t** out_rows, const int buf_width) {
const auto inT1 = reinterpret_cast<const type**>(y_rows);
const auto inT2 = reinterpret_cast<const type*>(u_row);
const auto inT3 = reinterpret_cast<const type*>(v_row);
auto outT = reinterpret_cast<type**>(out_rows);
i420ToRgbRowImpl(isa_tag_t{}, inT1, inT2, inT3, outT, buf_width);
};
}
};
} // namespace
template <typename isa_tag_t>
struct choose_impl {
GAPI_FLUID_KERNEL(FChanToPlane, ChanToPlane, false) {
static const int Window = 1;
static void run(const cv::gapi::fluid::View& in, int chan,
cv::gapi::fluid::Buffer& out) {
const auto rowFunc = (in.meta().depth == CV_8U) ? &chanToPlaneRow<uint8_t> : &chanToPlaneRow<float>;
GAPI_DbgAssert(is_cv_type_in_list<chan_to_plane_supported_types>(out.meta().depth));
const auto rowFunc = type_dispatch<chan_to_plane_supported_types>(out.meta().depth, cv_type_id{}, typed_chan_to_plane_row<isa_tag_t>{}, nullptr);
GAPI_DbgAssert(rowFunc);
rowFunc(in.InLineB(0), chan, in.meta().chan, out.OutLineB(), in.length());
}
};
GAPI_FLUID_KERNEL(FNV12toRGB, NV12toRGB, false) {
static const int Window = 1;
static const int LPI = 2;
static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB;
static void run(const cv::gapi::fluid::View & in_y,
const cv::gapi::fluid::View & in_uv,
cv::gapi::fluid::Buffer & out) {
GAPI_DbgAssert(is_cv_type_in_list<nv12_to_rgb_supported_types>(out.meta().depth));
const uchar* uv_row = in_uv.InLineB(0);
const uchar* y_rows[2] = { in_y.InLineB(0), in_y.InLineB(1) };
uchar* out_rows[2] = { out.OutLineB(0), out.OutLineB(1) };
int buf_width = out.length();
const auto rowFunc = type_dispatch<nv12_to_rgb_supported_types>(out.meta().depth, cv_type_id{}, typed_nv12_to_rgb_row<isa_tag_t>{}, nullptr);
GAPI_DbgAssert(rowFunc);
rowFunc(y_rows, uv_row, out_rows, buf_width);
}
};
GAPI_FLUID_KERNEL(FI420toRGB, I420toRGB, false) {
static const int Window = 1;
static const int LPI = 2;
static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB;
static void run(const cv::gapi::fluid::View & in_y,
const cv::gapi::fluid::View & in_u,
const cv::gapi::fluid::View & in_v,
cv::gapi::fluid::Buffer & out) {
GAPI_DbgAssert(is_cv_type_in_list<i420_to_rgb_supported_types>(out.meta().depth));
const uchar* u_row = in_u.InLineB(0);
const uchar* v_row = in_v.InLineB(0);
const uchar* y_rows[2] = { in_y.InLineB(0), in_y.InLineB(1) };
uchar* out_rows[2] = { out.OutLineB(0), out.OutLineB(1) };
int buf_width = out.length();
GAPI_DbgAssert(in_u.length() == in_v.length());
const auto rowFunc = type_dispatch<i420_to_rgb_supported_types>(out.meta().depth, cv_type_id{}, typed_i420_to_rgb_row<isa_tag_t>{}, nullptr);
GAPI_DbgAssert(rowFunc);
rowFunc(y_rows, u_row, v_row, out_rows, buf_width);
}
};
};
namespace {
struct ColorConversionISA {
cv::gapi::GKernelPackage& pckg;
ColorConversionISA(cv::gapi::GKernelPackage& _pckg) : pckg(_pckg) {}
template<typename isa_tag_t>
bool operator()(type_to_type<isa_tag_t>) {
pckg.include<typename choose_impl<isa_tag_t>::FI420toRGB>();
pckg.include<typename choose_impl<isa_tag_t>::FNV12toRGB>();
pckg.include<typename choose_impl<isa_tag_t>::FChanToPlane>();
//at the moment type_dispatch requires something to be returned by the lambda
return true;
}
};
} //namespace
cv::gapi::GKernelPackage FColorConversionChooseISA() {
// At the moment AVX512 implementation of wide universal intrinsics is slower than AVX2.
// So, disable it for now.
using isas = remove_t<isas_set, avx512_tag>;
cv::gapi::GKernelPackage pckg;
ColorConversionISA ctpISA{pckg};
type_dispatch<isas>(is_isa_present{}, ctpISA, false);
return pckg;
}
//----------------------------------------------------------------------
G_TYPED_KERNEL(ScalePlane8u, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_8u") {
@ -2234,180 +2488,6 @@ GAPI_FLUID_KERNEL(FScalePlaneArea8u, ScalePlaneArea8u, true) {
}
};
static const int ITUR_BT_601_CY = 1220542;
static const int ITUR_BT_601_CUB = 2116026;
static const int ITUR_BT_601_CUG = -409993;
static const int ITUR_BT_601_CVG = -852492;
static const int ITUR_BT_601_CVR = 1673527;
static const int ITUR_BT_601_SHIFT = 20;
static inline void uvToRGBuv(const uchar u, const uchar v, int& ruv, int& guv, int& buv) {
int uu, vv;
uu = static_cast<int>(u) - 128;
vv = static_cast<int>(v) - 128;
ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * vv;
guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * vv + ITUR_BT_601_CUG * uu;
buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * uu;
}
static inline void yRGBuvToRGB(const uchar vy, const int ruv, const int guv, const int buv,
uchar& r, uchar& g, uchar& b) {
int yy = static_cast<int>(vy);
int y = std::max(0, yy - 16) * ITUR_BT_601_CY;
r = saturate_cast<uchar>((y + ruv) >> ITUR_BT_601_SHIFT);
g = saturate_cast<uchar>((y + guv) >> ITUR_BT_601_SHIFT);
b = saturate_cast<uchar>((y + buv) >> ITUR_BT_601_SHIFT);
}
static void calculate_nv12_to_rgb_fallback(const uchar **y_rows,
const uchar *uv_row,
uchar **out_rows,
int buf_width) {
for (int i = 0; i < buf_width; i += 2) {
uchar u = uv_row[i];
uchar v = uv_row[i + 1];
int ruv, guv, buv;
uvToRGBuv(u, v, ruv, guv, buv);
for (int y = 0; y < 2; y++) {
for (int x = 0; x < 2; x++) {
uchar vy = y_rows[y][i + x];
uchar r, g, b;
yRGBuvToRGB(vy, ruv, guv, buv, r, g, b);
out_rows[y][3*(i + x)] = r;
out_rows[y][3*(i + x) + 1] = g;
out_rows[y][3*(i + x) + 2] = b;
}
}
}
}
static void calculate_i420_to_rgb_fallback(const uchar **y_rows,
const uchar *u_row,
const uchar *v_row,
uchar **out_rows,
int buf_width) {
for (int i = 0; i < buf_width; i += 2) {
uchar u = u_row[i / 2];
uchar v = v_row[i / 2];
int ruv, guv, buv;
uvToRGBuv(u, v, ruv, guv, buv);
for (int y = 0; y < 2; y++) {
for (int x = 0; x < 2; x++) {
uchar vy = y_rows[y][i + x];
uchar r, g, b;
yRGBuvToRGB(vy, ruv, guv, buv, r, g, b);
out_rows[y][3*(i + x)] = r;
out_rows[y][3*(i + x) + 1] = g;
out_rows[y][3*(i + x) + 2] = b;
}
}
}
}
GAPI_FLUID_KERNEL(FNV12toRGB, NV12toRGB, false) {
static const int Window = 1;
static const int LPI = 2;
static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB;
static void run(const cv::gapi::fluid::View &in_y,
const cv::gapi::fluid::View &in_uv,
cv::gapi::fluid::Buffer &out) {
const uchar* uv_row = in_uv.InLineB(0);
const uchar* y_rows[2] = {in_y. InLineB(0), in_y. InLineB(1)};
uchar* out_rows[2] = {out.OutLineB(0), out.OutLineB(1)};
int buf_width = out.length();
// AVX512 implementation of wide universal intrinsics is slower than AVX2.
// It is turned off until the cause isn't found out.
#if 0
#ifdef HAVE_AVX512
if (with_cpu_x86_avx512_core()) {
#define CV_AVX_512DQ 1
avx512::calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width);
return;
}
#endif // HAVE_AVX512
#endif
#ifdef HAVE_AVX2
if (with_cpu_x86_avx2()) {
avx::calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width);
return;
}
#endif // HAVE_AVX2
#ifdef HAVE_SSE
if (with_cpu_x86_sse42()) {
calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width);
return;
}
#endif // HAVE_SSE
#ifdef HAVE_NEON
neon::calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width);
return;
#endif // HAVE_NEON
calculate_nv12_to_rgb_fallback(y_rows, uv_row, out_rows, buf_width);
}
};
GAPI_FLUID_KERNEL(FI420toRGB, I420toRGB, false) {
static const int Window = 1;
static const int LPI = 2;
static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB;
static void run(const cv::gapi::fluid::View &in_y,
const cv::gapi::fluid::View &in_u,
const cv::gapi::fluid::View &in_v,
cv::gapi::fluid::Buffer &out) {
const uchar* u_row = in_u.InLineB(0);
const uchar* v_row = in_v.InLineB(0);
const uchar* y_rows[2] = {in_y. InLineB(0), in_y. InLineB(1)};
uchar* out_rows[2] = {out.OutLineB(0), out.OutLineB(1)};
int buf_width = out.length();
GAPI_DbgAssert(in_u.length() == in_v.length());
// AVX512 implementation of wide universal intrinsics is slower than AVX2.
// It is turned off until the cause isn't found out.
#if 0
#ifdef HAVE_AVX512
if (with_cpu_x86_avx512_core()) {
#define CV_AVX_512DQ 1
avx512::calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width);
return;
}
#endif // HAVE_AVX512
#endif
#ifdef HAVE_AVX2
if (with_cpu_x86_avx2()) {
avx::calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width);
return;
}
#endif // HAVE_AVX2
#ifdef HAVE_SSE
if (with_cpu_x86_sse42()) {
calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width);
return;
}
#endif // HAVE_SSE
#ifdef HAVE_NEON
neon::calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width);
return;
#endif // HAVE_NEON
calculate_i420_to_rgb_fallback(y_rows, u_row, v_row, out_rows, buf_width);
}
};
namespace {
template <typename src_t, typename dst_t>
@ -2520,9 +2600,10 @@ GAPI_FLUID_KERNEL(FDivC, GDivC, false) {
using namespace kernels;
cv::gapi::GKernelPackage preprocKernels() {
return cv::gapi::kernels
< FChanToPlane
, FScalePlanes
return combine(
FColorConversionChooseISA(),
cv::gapi::kernels
<FScalePlanes
, FScalePlanes4
, FScalePlane
, FScalePlane32f
@ -2537,12 +2618,10 @@ cv::gapi::GKernelPackage preprocKernels() {
, FSplit2
, FSplit3
, FSplit4
, FNV12toRGB
, FI420toRGB
, FConvertDepth
, FSubC
, FDivC
>();
>());
}
} // namespace gapi

View File

@ -34,6 +34,12 @@ namespace InferenceEngine {
namespace gapi {
namespace kernels {
struct avx512_tag {};
struct avx2_tag {};
struct sse42_tag {};
struct neon_tag {};
struct scalar_tag {};
template<typename DST, typename SRC> static inline DST saturate_cast(SRC x);
template<> inline short saturate_cast(int x) { return (std::min)(SHRT_MAX, (std::max)(SHRT_MIN, x)); }
template<> inline short saturate_cast(float x) { return saturate_cast<short>(static_cast<int>(std::rint(x))); }
@ -116,6 +122,31 @@ static inline Q8_8 mulaw(Q0_16 a, Q8_8 w) { return static_cast<Q8_8>((a * w) >>
static inline float mulas(float a, float s) { return a * s; }
static inline float mulaw(float a, float w) { return a * w; }
static const int ITUR_BT_601_CY = 1220542;
static const int ITUR_BT_601_CUB = 2116026;
static const int ITUR_BT_601_CUG = -409993;
static const int ITUR_BT_601_CVG = -852492;
static const int ITUR_BT_601_CVR = 1673527;
static const int ITUR_BT_601_SHIFT = 20;
static inline void uvToRGBuv(const uchar u, const uchar v, int& ruv, int& guv, int& buv) {
int uu, vv;
uu = static_cast<int>(u) - 128;
vv = static_cast<int>(v) - 128;
ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * vv;
guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * vv + ITUR_BT_601_CUG * uu;
buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * uu;
}
static inline void yRGBuvToRGB(const uchar vy, const int ruv, const int guv, const int buv,
uchar& r, uchar& g, uchar& b) {
int yy = static_cast<int>(vy);
int y = std::max(0, yy - 16) * ITUR_BT_601_CY;
r = saturate_cast<uchar>((y + ruv) >> ITUR_BT_601_SHIFT);
g = saturate_cast<uchar>((y + guv) >> ITUR_BT_601_SHIFT);
b = saturate_cast<uchar>((y + buv) >> ITUR_BT_601_SHIFT);
}
} // namespace kernels
} // namespace gapi
} // namespace InferenceEngine

Some files were not shown because too many files have changed in this diff Show More