Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Steve Yoo 2021-06-17 08:28:23 +09:00
commit 6ce8d8ce66
888 changed files with 36386 additions and 20767 deletions

View File

@ -155,10 +155,9 @@ def getConfigurationsMap() {
CONFIGURATION_WORKFLOW = { configuration -> CONFIGURATION_WORKFLOW = { configuration ->
node("OpenVINO") { node("OpenVINO") {
String workdir = "${HOME}/workspace/${BUILD_NUMBER}_${env.CHANGE_ID}_${configuration.name}"
try { try {
PROJECT_NAME = "openvino" PROJECT_NAME = "openvino"
String workdir = "${HOME}/workspace/${BUILD_NUMBER}_${env.CHANGE_ID}_${configuration.name}"
stage("Clone repository") { stage("Clone repository") {
prepare_repository(workdir) prepare_repository(workdir)
} }
@ -185,10 +184,10 @@ CONFIGURATION_WORKFLOW = { configuration ->
} }
finally { finally {
stage("Cleanup") { stage("Cleanup") {
deleteDir()
String docker_container_name = get_docker_container_name(configuration) String docker_container_name = get_docker_container_name(configuration)
sh """ sh """
docker rm -f ${docker_container_name} docker rm -f ${docker_container_name}
rm -rf ${workdir}
""" """
} }
} }

View File

@ -63,41 +63,3 @@ jobs:
python3 -m xmlrunner discover -p *_test.py --output=../mo-ut-logs python3 -m xmlrunner discover -p *_test.py --output=../mo-ut-logs
working-directory: model-optimizer working-directory: model-optimizer
build_wheel:
name: Build Python wheel
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v2
- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
python3 -m pip install wheel setuptools
python3 -m pip install tensorflow==2.3.0
- name: Build
run: |
python3 setup.py sdist bdist_wheel
working-directory: model-optimizer
- name: Test package content
run: |
echo "src = open('openvino_mo.egg-info/SOURCES.txt', 'rt').read().split()" | tee -a test_wheel.py
echo "ref = open('automation/package_BOM.txt', 'rt').read().split()" | tee -a test_wheel.py
echo "for name in ref:" | tee -a test_wheel.py
echo " if name.endswith('.py'):" | tee -a test_wheel.py
echo " assert name in src or './' + name in src, name + ' file missed'" | tee -a test_wheel.py
python3 test_wheel.py
working-directory: model-optimizer
- name: Test conversion
run: |
wget -q http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz
tar -xf mobilenet_v1_1.0_224.tgz
python3 -m pip install model-optimizer/dist/*.whl
python3 -m mo --input_model mobilenet_v1_1.0_224_frozen.pb --input_shape "[1,224,224,3]"
- uses: actions/upload-artifact@v2
with:
name: mo_wheel
path: "model-optimizer/dist/*.whl"

View File

@ -169,10 +169,11 @@ ie_shellcheck_process(DIRECTORY "${OpenVINO_MAIN_SOURCE_DIR}"
"${IE_MAIN_SOURCE_DIR}/thirdparty" "${IE_MAIN_SOURCE_DIR}/thirdparty"
"${IE_MAIN_SOURCE_DIR}/temp" "${IE_MAIN_SOURCE_DIR}/temp"
# TODO fix and enable back: # TODO fix and enable back:
"${OpenVINO_MAIN_SOURCE_DIR}/scripts/install_dependencies" "${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/scripts/dependencies.sh"
"${OpenVINO_MAIN_SOURCE_DIR}/scripts/demo" "${OpenVINO_MAIN_SOURCE_DIR}/scripts/install_dependencies/install_NEO_OCL_driver.sh"
"${OpenVINO_MAIN_SOURCE_DIR}/ngraph" "${OpenVINO_MAIN_SOURCE_DIR}/scripts/install_dependencies/install_openvino_dependencies.sh"
"${IE_MAIN_SOURCE_DIR}/scripts") "${OpenVINO_MAIN_SOURCE_DIR}/ngraph/python/tests/test_onnx/model_zoo_preprocess.sh"
)
# #
# cpack # cpack

View File

@ -11,18 +11,27 @@
* *element_type* * *element_type*
* **Description**: the type of element of output tensor * **Description**: the type of element of output tensor
* **Range of values**: u8, u16, u32, u64, i8, i16, i32, i64, f16, f32, boolean, bf16 * **Range of values**: u1, u4, u8, u16, u32, u64, i4, i8, i16, i32, i64, f16, f32, boolean, bf16
* **Type**: string * **Type**: `string`
* **Default value**: None * **Default value**: None
* **Required**: *Yes* * **Required**: *yes*
* *shape* * *shape*
* **Description**: the shape of the output tensor * **Description**: the shape of the output tensor
* **Range of values**: list of non-negative integers, empty list is allowed that means 0D or scalar tensor * **Range of values**: list of non-negative integers, empty list is allowed, which means 0D or scalar tensor
* **Type**: int[] * **Type**: `int[]`
* **Default value**: None * **Default value**: None
* **Required**: *Yes* * **Required**: *yes*
**Outputs**
* **1**: Output tensor of type *T* and shape equal to *shape* attribute.
**Types**
* *T*: any type from *element type* values.
**Example** **Example**

View File

@ -8,9 +8,7 @@
**Detailed description**: **Detailed description**:
The *ExtractImagePatches* operation is similar to the TensorFlow* operation [ExtractImagePatches](https://www.tensorflow.org/api_docs/python/tf/image/extract_patches). The *ExtractImagePatches* operation extracts patches of shape `sizes` which are `strides` apart in the input image. The output elements are taken from the input at intervals given by the `rate` argument, as in dilated convolutions.
This op extracts patches of shape `sizes` which are `strides` apart in the input image. The output elements are taken from the input at intervals given by the `rate` argument, as in dilated convolutions.
The result is a 4D tensor containing image patches with size `size[0] * size[1] * depth` vectorized in the "depth" dimension. The result is a 4D tensor containing image patches with size `size[0] * size[1] * depth` vectorized in the "depth" dimension.
@ -92,20 +90,23 @@ The "auto_pad" attribute has no effect on the size of each patch, it determines
Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We use the symbol `x` to mark output patches. Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We use the symbol `x` to mark output patches.
1. `sizes="3,3", strides="5,5", rates="1,1", auto_pad="valid"` 1. `sizes="3,3", strides="5,5", rates="1,1", auto_pad="valid"`
\f[
  x   x   x    4   5   x   x   x   9 10 \begin{bmatrix}
  x   x   x  14 15   x   x   x 19 20 x & x & x & 4 & 5 & x & x & x & 9 & 10 \\
  x   x   x  24 25   x   x   x 29 30 x & x & x & 14 & 15 & x & x & x & 19 & 20 \\
31 32 33 34 35 36 37 38 39 40 x & x & x & 24 & 25 & x & x & x & 29 & 30 \\
41 42 43 44 45 46 47 48 49 50 31 & 32 & 33 & 34 & 35 & 36 & 37 & 38 & 39 & 40 \\
  x   x   x  54 55   x   x   x 59 60 41 & 42 & 43 & 44 & 45 & 46 & 47 & 48 & 49 & 50 \\
  x   x   x  64 65   x   x   x 69 70 x & x & x & 54 & 55 & x & x & x & 59 & 60 \\
  x   x   x  74 75   x   x   x 79 80 x & x & x & 64 & 65 & x & x & x & 69 & 70 \\
81 82 83 84 85 86 87 88 89 90 x & x & x & 74 & 75 & x & x & x & 79 & 80 \\
91 92 93 94 95 96 97 98 99 100 81 & 82 & 83 & 84 & 85 & 86 & 87 & 88 & 89 & 90 \\
91 & 92 & 93 & 94 & 95 & 96 & 79 & 98 & 99 & 100
\end{bmatrix}
\f]
output: output:
```
[[[[ 1 6] [[[[ 1 6]
[51 56]] [51 56]]
@ -132,24 +133,27 @@ Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We u
[[23 28] [[23 28]
[73 78]]]] [73 78]]]]
```
output shape: `[1, 9, 2, 2]` output shape: `[1, 9, 2, 2]`
2. `sizes="4,4", strides="8,8", rates="1,1", auto_pad="valid"` 2. `sizes="4,4", strides="8,8", rates="1,1", auto_pad="valid"`
\f[
  x   x   x   x    5   6   7   8   9 10 \begin{bmatrix}
  x   x   x   x  15 16 17 18 19 20 x & x & x & x & 5 & 6 & 7 & 8 & 9 & 10 \\
  x   x   x   x  25 26 27 28 29 30 x & x & x & x & 15 & 16 & 17 & 18 & 19 & 20 \\
  x   x   x   x  35 36 37 38 39 40 x & x & x & x & 25 & 26 & 27 & 28 & 29 & 30 \\
41 42 43 44 45 46 47 48 49 50 x & x & x & x & 35 & 36 & 37 & 38 & 39 & 40 \\
51 52 53 54 55 56 57 58 59 60 41 & 42 & 43 & 44 & 45 & 46 & 47 & 48 & 49 & 50 \\
61 62 63 64 65 66 67 68 69 70 51 & 52 & 53 & 54 & 55 & 56 & 57 & 58 & 59 & 60 \\
71 72 73 74 75 76 77 78 79 80 61 & 62 & 63 & 64 & 65 & 66 & 67 & 68 & 69 & 70 \\
81 82 83 84 85 86 87 88 89 90 71 & 72 & 73 & 74 & 75 & 76 & 77 & 78 & 79 & 80 \\
91 92 93 94 95 96 97 98 99 100 81 & 82 & 83 & 84 & 85 & 86 & 87 & 88 & 89 & 90 \\
91 & 92 & 93 & 94 & 95 & 96 & 79 & 98 & 99 & 100
\end{bmatrix}
\f]
output: output:
```
[[[[ 1]] [[[[ 1]]
[[ 2]] [[ 2]]
@ -181,27 +185,29 @@ Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We u
[[33]] [[33]]
[[34]]]] [[34]]]]
```
output shape: `[1, 16, 1, 1]` output shape: `[1, 16, 1, 1]`
3. `sizes="4,4", strides="9,9", rates="1,1", auto_pad="same_upper"` 3. `sizes="4,4", strides="9,9", rates="1,1", auto_pad="same_upper"`
\f[
  x   x   x   x    0   0   0   0   0   x   x   x   x \begin{bmatrix}
  x   x   x   x    4   5   6   7   8   x   x   x   x x & x & x & x & 0 & 0 & 0 & 0 & 0 & x & x & x & x\\
  x   x   x   x  14 15 16 17 18   x   x   x   x x & x & x & x & 4 & 5 & 6 & 7 & 8 & x & x & x & x\\
  x   x   x   x  24 25 26 27 28   x   x   x   x x & x & x & x & 14 & 15 & 16 & 17 & 18 & x & x & x & x\\
  0 31 32 33 34 35 36 37 38 39 40   0   0 x & x & x & x & 24 & 25 & 26 & 27 & 28 & x & x & x & x\\
  0 41 42 43 44 45 46 47 48 49 50   0   0 0 & 31 & 32 & 33 & 34 & 35 & 36 & 37 & 38 & 39 & 40 & 0 & 0\\
  0 51 52 53 54 55 56 57 58 59 60   0   0 0 & 41 & 42 & 43 & 44 & 45 & 46 & 47 & 48 & 49 & 50 & 0 & 0\\
  0 61 62 63 64 65 66 67 68 69 70   0   0 0 & 51 & 52 & 53 & 54 & 55 & 56 & 57 & 58 & 59 & 60 & 0 & 0\\
  0 71 72 73 74 75 76 77 78 79 80   0   0 0 & 61 & 62 & 63 & 64 & 65 & 66 & 67 & 68 & 69 & 70 & 0 & 0\\
  x   x   x   x  84 85 86 87 88   x   x   x   x 0 & 71 & 72 & 73 & 74 & 75 & 76 & 77 & 78 & 79 & 80 & 0 & 0\\
  x   x   x   x  94 95 96 97 98   x   x   x   x x & x & x & x & 84 & 85 & 86 & 87 & 88 & x & x & x & x\\
  x   x   x   x    0   0   0   0   0   x   x   x   x x & x & x & x & 94 & 95 & 96 & 79 & 98 & x & x & x & x\\
  x   x   x   x    0   0   0   0   0   x   x   x   x x & x & x & x & 0 & 0 & 0 & 0 & 0 & x & x & x & x\\
x & x & x & x & 0 & 0 & 0 & 0 & 0 & x & x & x & x
\end{bmatrix}
\f]
output: output:
```
[[[[ 0 0] [[[[ 0 0]
[ 0 89]] [ 0 89]]
@ -249,25 +255,28 @@ Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We u
[[ 23 0] [[ 23 0]
[ 0 0]]]] [ 0 0]]]]
```
output shape: `[1, 16, 2, 2]` output shape: `[1, 16, 2, 2]`
4. `sizes="3,3", strides="5,5", rates="2,2", auto_pad="valid"` 4. `sizes="3,3", strides="5,5", rates="2,2", auto_pad="valid"`
This time we use the symbols `x`, `y`, `z` and `k` to distinguish the patches: This time we use the symbols `x`, `y`, `z` and `k` to distinguish the patches:
\f[
  x   2   x   4   x   y   7   y   9   y \begin{bmatrix}
11 12 13 14 15 16 17 18 19 20 x & 2 & x & 4 & x & y & 7 & y & 9 & y \\
  x  22   x 24   x   y 27   y 29   y 11 & 12 & 13 & 14 & 15 & 16 & 17 & 18 & 19 & 20 \\
31 32 33 34 35 36 37 38 39 40 x & 22 & x & 24 & x & y & 27 & y & 29 & y \\
  x  42   x 44   x   y 47   y 49   y 31 & 32 & 33 & 34 & 35 & 36 & 37 & 38 & 39 & 40 \\
  z  52   z 54   z   k 57   k 59   k x & 42 & x & 44 & x & y & 47 & y & 49 & y \\
61 62 63 64 65 66 67 68 69 70 z & 52 & z & 54 & z & k & 57 & k & 59 & k \\
  z  72   z 74   z   k 77   k 79   k 61 & 62 & 63 & 64 & 65 & 66 & 67 & 68 & 69 & 70 \\
81 82 83 84 85 86 87 88 89 90 z & 72 & z & 74 & z & k & 77 & k & 79 & k \\
  z  92   z 94   z   k 97   k 99   k 81 & 82 & 83 & 84 & 85 & 86 & 87 & 88 & 89 & 90 \\
z & 92 & z & 94 & z & k & 79 & k & 99 & k
\end{bmatrix}
\f]
output: output:
```
[[[[ 1 6] [[[[ 1 6]
[ 51 56]] [ 51 56]]
@ -294,26 +303,30 @@ This time we use the symbols `x`, `y`, `z` and `k` to distinguish the patches:
[[ 45 50] [[ 45 50]
[ 95 100]]]] [ 95 100]]]]
```
output_shape: `[1, 9, 2, 2]` output_shape: `[1, 9, 2, 2]`
5. `sizes="2,2", strides="3,3", rates="1,1", auto_pad="valid"` 5. `sizes="2,2", strides="3,3", rates="1,1", auto_pad="valid"`
Image is a `1 x 2 x 5 x 5` array that contains two feature maps where feature map with coordinate 0 contains numbers in a range `[1, 25]` and feature map with coordinate 1 contains numbers in a range `[26, 50]` Image is a `1 x 2 x 5 x 5` array that contains two feature maps where feature map with coordinate 0 contains numbers in a range `[1, 25]` and feature map with coordinate 1 contains numbers in a range `[26, 50]`
  x   x   3   x   x \f[
  6   7   8   x   x \begin{bmatrix}
11 12 13 14 15 x & x & 3 & x & x\\
  x   x  18   x   x x & x & 8 & x & x\\
  x   x  23   x   x 11 & 12 & 13 & 14 & 15\\
x & x & 18 & x & x\\
  x   x  28   x   x x & x & 23 & x & x
  x   x  33   x   x \end{bmatrix}\\
36 37 38 39 40 \begin{bmatrix}
  x   x  43   x   x x & x & 28 & x & x\\
  x   x  48   x   x x & x & 33 & x & x\\
36 & 37 & 38 & 39 & 40\\
x & x & 43 & x & x\\
x & x & 48 & x & x
\end{bmatrix}
\f]
output: output:
```
[[[[ 1 4] [[[[ 1 4]
[16 19]] [16 19]]
@ -337,5 +350,5 @@ Image is a `1 x 2 x 5 x 5` array that contains two feature maps where feature ma
[[32 35] [[32 35]
[47 50]]]] [47 50]]]]
```
output shape: `[1, 8, 2, 2]` output shape: `[1, 8, 2, 2]`

View File

@ -8,12 +8,37 @@
**Short description**: *ShuffleChannels* permutes data in the channel dimension of the input tensor. **Short description**: *ShuffleChannels* permutes data in the channel dimension of the input tensor.
**Detailed description**:
Input tensor of `data_shape` is always interpreted as 4D tensor with the following shape:
dim 0: data_shape[0] * data_shape[1] * ... * data_shape[axis-1]
(or 1 if axis == 0)
dim 1: group
dim 2: data_shape[axis] / group
dim 3: data_shape[axis+1] * data_shape[axis+2] * ... * data_shape[data_shape.size()-1]
(or 1 if axis points to last dimension)
Trailing and leading to `axis` dimensions are flattened and reshaped back to the original shape after channels shuffling.
The operation is equivalent to the following transformation of the input tensor `x` of shape `[N, C, H, W]` and `axis = 1`:
\f[
x' = reshape(x, [N, group, C / group, H * W])\\
x'' = transpose(x', [0, 2, 1, 3])\\
y = reshape(x'', [N, C, H, W])\\
\f]
where `group` is the layer attribute described below.
**Attributes**: **Attributes**:
* *axis* * *axis*
* **Description**: *axis* specifies the index of a channel dimension. * **Description**: *axis* specifies the index of a channel dimension.
* **Range of values**: an integer number in the range [-4, 3] * **Range of values**: an integer number in the range `[-rank(data_shape), rank(data_shape) - 1]`
* **Type**: `int` * **Type**: `int`
* **Default value**: 1 * **Default value**: 1
* **Required**: *No* * **Required**: *No*
@ -21,30 +46,22 @@
* *group* * *group*
* **Description**: *group* specifies the number of groups to split the channel dimension into. This number must evenly divide the channel dimension size. * **Description**: *group* specifies the number of groups to split the channel dimension into. This number must evenly divide the channel dimension size.
* **Range of values**: a positive integer * **Range of values**: a positive integer in the range `[1, data_shape[axis]]`
* **Type**: `int` * **Type**: `int`
* **Default value**: 1 * **Default value**: 1
* **Required**: *No* * **Required**: *No*
**Inputs**: **Inputs**:
* **1**: 4D input tensor of any supported data type. Required. * **1**: `data` input tensor of type *T* and rank greater or equal to 1. **Required.**
**Outputs**: **Outputs**:
* **1**: 4D input tensor with shape and element type as for the input tensor. * **1**: Output tensor with element type *T* and same shape as the input tensor.
**Mathematical Formulation** **Types**
The operation is the equivalent with the following transformation of the input tensor *x* of shape *[N, C, H, W]*: * *T*: any supported numeric type.
```
x' = reshape(x, [N, group, C / group, H * W])
x'' = transpose(x', [0, 2, 1, 3])
y = reshape(x'', [N, C, H, W])
```
where `group` is the layer parameter described above and the `axis = 1`.
**Example** **Example**

View File

@ -8,20 +8,20 @@
**Detailed description**: **Detailed description**:
The *SpaceToBatch* operation is similar to the TensorFlow* operation [SpaceToBatchND](https://www.tensorflow.org/api_docs/python/tf/space_to_batch_nd)
The operation is equivalent to the following transformation of the input tensor `data` of shape `[batch, D_1, D_2 ... D_{N - 1}]` and `block_shape`, `pads_begin`, `pads_end` of shapes `[N]` to *Y* output tensor. The operation is equivalent to the following transformation of the input tensor `data` of shape `[batch, D_1, D_2 ... D_{N - 1}]` and `block_shape`, `pads_begin`, `pads_end` of shapes `[N]` to *Y* output tensor.
Zero-pad the start and end of dimensions [D_0, ..., D_{N - 1}] of the input according to `pads_begin` and `pads_end`: Zero-pad the start and end of dimensions \f$[D_0, \dots, D_{N - 1}]\f$ of the input according to `pads_begin` and `pads_end`:
note: P_0 for batch dimension is expected to be 0 (no-padding).
x = [batch + P_0, D_1 + P_1, D_2 + P_2, ..., D_{N - 1} + P_{N - 1}], where P_i = pads_begin[i] + pads_end[i]
note: B_0 for batch is ignored. \f[x = [batch + P_0, D_1 + P_1, D_2 + P_2, \dots, D_{N - 1} + P_{N - 1}]\f]
x' = reshape(x, [batch, (D_1 + P_1) / B_1, B_1, (D_2 + P_2) / B_2, B_2, ..., (D_{N - 1} + P_{N - 1}) / B_{N - 1}, B_{N - 1}]), where B_i = block_shape[i] \f[x' = reshape(x, [batch, \frac{D_1 + P_1}{B_1}, B_1, \frac{D_2 + P_2}{B_2}, B_2, \dots, \frac{D_{N - 1} + P_{N - 1}}{B_{N - 1}}, B_{N - 1}])\f]
\f[x'' = transpose(x', [2, 4, \dots, (N - 1) + (N - 1), 0, 1, 3, \dots, N + (N - 1)])\f]
\f[y = reshape(x'', [batch \times B_1 \times \dots \times B_{N - 1}, \frac{D_1 + P_1}{B_1}, \frac{D_2 + P_2}{B_2}, \dots, \frac{D_{N - 1} + P_{N - 1}}{B_{N - 1}}]\f]
x'' = transpose(x', [2, 4, ..., (N - 1) + (N - 1), 0, 1, 3, ..., N + (N - 1)]) where
- \f$P_i\f$ = pads_begin[i] + pads_end[i]
y = reshape(x'', [batch * B_1 * ... * B_{N - 1}, (D_1 + P_1) / B_1, (D_2 + P_2) / B_2, ... , (D_{N - 1} + P_{N - 1}) / B_{N - 1}]) - \f$B_i\f$ = block_shape[i]
- \f$P_0\f$ for batch dimension is expected to be 0 (no-padding)
- \f$B_0\f$ for batch is ignored
**Attributes** **Attributes**
@ -36,7 +36,7 @@ The operation is equivalent to the following transformation of the input tensor
**Outputs** **Outputs**
* **1**: N-D tensor with shape `[batch * block_shape[0] * block_shape[1] * ... * block_shape[N - 1], (pads_begin[1] + D_1 + pads_end[1]) / block_shape[1], (pads_begin[2] + D_2 + pads_end[2]) / block_shape[2], ..., (pads_begin[N - 1] + D_{N - 1} + pads_end[N - 1]) / block_shape[N - 1]` of the same type as `data` input. * **1**: N-D tensor with shape `[batch * block_shape[0] * block_shape[1] * ... * block_shape[N - 1], (D_1 + pads_begin[1] + pads_end[1]) / block_shape[1], (D_2 + pads_begin[2] + pads_end[2]) / block_shape[2], ..., (D_{N -1} + pads_begin[N - 1] + pads_end[N - 1]) / block_shape[N - 1]` of the same type as `data` input.
**Types** **Types**

View File

@ -5,13 +5,12 @@
**Category**: Data movement operation **Category**: Data movement operation
**Short description**: *StridedSlice* extracts a strided slice of a tensor. **Short description**: *StridedSlice* extracts a strided slice of a tensor.
It is similar to generalized array indexing in Python\*.
**Attributes** **Attributes**
* *begin_mask* * *begin_mask*
* **Description**: *begin_mask* is a bit mask. *begin_mask[i]* equal to 1 means that the corresponding dimension of the `begin` input is ignored and the 'real' beginning of the tensor is used along corresponding dimension. * **Description**: *begin_mask* is a bit mask. *begin_mask[i]* equal to `1` means that the corresponding dimension of the `begin` input is ignored and the 'real' beginning of the tensor is used along corresponding dimension.
* **Range of values**: a list of `0`s and `1`s * **Range of values**: a list of `0`s and `1`s
* **Type**: `int[]` * **Type**: `int[]`
* **Default value**: None * **Default value**: None
@ -19,7 +18,7 @@
* *end_mask* * *end_mask*
* **Description**: *end_mask* is a bit mask. If *end_mask[i]* is 1, the corresponding dimension of the `end` input is ignored and the real 'end' of the tensor is used along corresponding dimension. * **Description**: *end_mask* is a bit mask. If *end_mask[i]* is `1`, the corresponding dimension of the `end` input is ignored and the real 'end' of the tensor is used along corresponding dimension.
* **Range of values**: a list of `0`s and `1`s * **Range of values**: a list of `0`s and `1`s
* **Type**: `int[]` * **Type**: `int[]`
* **Default value**: None * **Default value**: None
@ -27,7 +26,7 @@
* *new_axis_mask* * *new_axis_mask*
* **Description**: *new_axis_mask* is a bit mask. If *new_axis_mask[i]* is 1, a length 1 dimension is inserted on the `i`-th position of input tensor. * **Description**: *new_axis_mask* is a bit mask. If *new_axis_mask[i]* is `1`, a length 1 dimension is inserted on the `i`-th position of input tensor.
* **Range of values**: a list of `0`s and `1`s * **Range of values**: a list of `0`s and `1`s
* **Type**: `int[]` * **Type**: `int[]`
* **Default value**: `[0]` * **Default value**: `[0]`
@ -35,7 +34,7 @@
* *shrink_axis_mask* * *shrink_axis_mask*
* **Description**: *shrink_axis_mask* is a bit mask. If *shrink_axis_mask[i]* is 1, the dimension on the `i`-th position is deleted. * **Description**: *shrink_axis_mask* is a bit mask. If *shrink_axis_mask[i]* is `1`, the dimension on the `i`-th position is deleted.
* **Range of values**: a list of `0`s and `1`s * **Range of values**: a list of `0`s and `1`s
* **Type**: `int[]` * **Type**: `int[]`
* **Default value**: `[0]` * **Default value**: `[0]`
@ -51,21 +50,83 @@
**Inputs**: **Inputs**:
* **1**: Multidimensional input tensor to be sliced. Required. * **1**: `data` - input tensor to be sliced of type `T` and arbitrary shape. **Required.**
* **2**: `begin` input - 1D input tensor with begin indexes for input tensor slicing. Required. * **2**: `begin` - 1D tensor of type `T_IND` with begin indexes for input tensor slicing. **Required.**
Out-of-bounds values are silently clamped. If `begin_mask[i]` is 1, the value of `begin[i]` is ignored Out-of-bounds values are silently clamped. If `begin_mask[i]` is `1`, the value of `begin[i]` is ignored and the range of the appropriate dimension starts from `0`. Negative values mean indexing starts from the end. For example, if `data=[1,2,3]`, `begin[0]=-1` means `begin[0]=3`.
and the range of the appropriate dimension starts from 0.
Negative values mean indexing starts from the end. For example, if `foo=[1,2,3]`, `begin[0]=-1` means `begin[0]=3`.
* **3**: `end` input - 1D input tensor with end indexes for input tensor slicing. Required. * **3**: `end` - 1D tensor of type `T_IND` with end indexes for input tensor slicing. **Required.**
Out-of-bounds values will be silently clamped. If `end_mask[i]` is 1, the value of `end[i]` is ignored Out-of-bounds values will be silently clamped. If `end_mask[i]` is `1`, the value of `end[i]` is ignored and the full range of the appropriate dimension is used instead. Negative values mean indexing starts from the end. For example, if `data=[1,2,3]`, `end[0]=-1` means `end[0]=3`.
and the full range of the appropriate dimension is used instead.
Negative values mean indexing starts from the end. For example, if `foo=[1,2,3]`, `end[0]=-1` means `end[0]=3`.
* **4**: `stride` input - 1D input tensor with strides. Optional. * **4**: `stride` - 1D tensor of type `T_IND` with strides. **Optional.**
**Types**
* *T*: any supported type.
* *T_IND*: any supported integer type.
**Example** **Example**
Example of `begin_mask` & `end_mask` usage.
```xml
<layer ... type="StridedSlice" ...>
<data begin_mask="0,1,1" ellipsis_mask="0,0,0" end_mask="1,1,0" new_axis_mask="0,0,0" shrink_axis_mask="0,0,0"/>
<input>
<port id="0">
<dim>2</dim>
<dim>3</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>2</dim> <!-- begin: [1, 0, 0] -->
</port>
<port id="2">
<dim>2</dim> <!-- end: [0, 0, 2] -->
</port>
<port id="3">
<dim>2</dim> <!-- stride: [1, 1, 1] -->
</port>
</input>
<output>
<port id="4">
<dim>1</dim>
<dim>3</dim>
<dim>2</dim>
</port>
</output>
</layer>
```
Example of `new_axis_mask` usage.
```xml
<layer ... type="StridedSlice" ...>
<data begin_mask="0,1,1" ellipsis_mask="0,0,0" end_mask="0,1,1" new_axis_mask="1,0,0" shrink_axis_mask="0,0,0"/>
<input>
<port id="0">
<dim>2</dim>
<dim>3</dim>
<dim>4</dim>
</port>
<port id="1">
<dim>2</dim>
</port>
<port id="2">
<dim>2</dim>
</port>
<port id="3">
<dim>2</dim>
</port>
</input>
<output>
<port id="4">
<dim>1</dim>
<dim>2</dim>
<dim>3</dim>
<dim>4</dim>
</port>
</output>
</layer>
```
Example of `shrink_axis_mask` usage.
```xml ```xml
<layer ... type="StridedSlice" ...> <layer ... type="StridedSlice" ...>
<data begin_mask="1,0,1,1,1" ellipsis_mask="0,0,0,0,0" end_mask="1,0,1,1,1" new_axis_mask="0,0,0,0,0" shrink_axis_mask="0,1,0,0,0"/> <data begin_mask="1,0,1,1,1" ellipsis_mask="0,0,0,0,0" end_mask="1,0,1,1,1" new_axis_mask="0,0,0,0,0" shrink_axis_mask="0,1,0,0,0"/>

View File

@ -2,9 +2,10 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// //
#include "single_layer_tests/reshape.hpp"
#include <vector> #include <vector>
#include "single_layer_tests/reshape.hpp"
#include "common_test_utils/test_constants.hpp" #include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions; using namespace LayerTestsDefinitions;
@ -14,31 +15,45 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP32,
}; };
INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheckDynBatch, ReshapeLayerTest, INSTANTIATE_TEST_CASE_P(
smoke_ReshapeCheckDynBatch, ReshapeLayerTestRevise,
::testing::Combine( ::testing::Combine(
::testing::Values(true), ::testing::Values(true), ::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED), ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED), ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t>({30, 30, 30, 30})), ::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
::testing::Values(std::vector<size_t>({30, 30, 30, 30})), ::testing::Values(std::vector<int64_t>({30, 30, 30, 30})),
::testing::Values(CommonTestUtils::DEVICE_TEMPLATE), ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
::testing::Values(std::map<std::string, std::string>({}))), ::testing::Values(std::map<std::string, std::string>({}))),
ReshapeLayerTest::getTestCaseName); ReshapeLayerTestRevise::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheck, ReshapeLayerTest, INSTANTIATE_TEST_CASE_P(
smoke_ReshapeCheck, ReshapeLayerTestRevise,
::testing::Combine( ::testing::Combine(
::testing::Values(true), ::testing::Values(true), ::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED), ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED), ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t>({10, 10, 10, 10})), ::testing::Values(std::vector<size_t>({10, 10, 10, 10})),
::testing::Values(std::vector<size_t>({10, 0, 100})), ::testing::Values(std::vector<int64_t>({10, 0, 100})),
::testing::Values(CommonTestUtils::DEVICE_TEMPLATE), ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
::testing::Values(std::map<std::string, std::string>({}))), ::testing::Values(std::map<std::string, std::string>({}))),
ReshapeLayerTest::getTestCaseName); ReshapeLayerTestRevise::getTestCaseName);
INSTANTIATE_TEST_CASE_P(
smoke_ReshapeCheckNegative, ReshapeLayerTestRevise,
::testing::Combine(
::testing::Values(true), ::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t>({10, 10, 10, 10})),
::testing::Values(std::vector<int64_t>({10, -1, 100})),
::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
::testing::Values(std::map<std::string, std::string>({}))),
ReshapeLayerTestRevise::getTestCaseName);
} // namespace } // namespace

View File

@ -68,6 +68,10 @@ if(ENABLE_WHEEL)
add_subdirectory(wheel) add_subdirectory(wheel)
endif() endif()
if (NGRAPH_PYTHON_BUILD_ENABLE)
add_dependencies(ie_api _pyngraph)
endif()
# install # install
ie_cpack_add_component(${PYTHON_VERSION}) ie_cpack_add_component(${PYTHON_VERSION})

View File

@ -55,7 +55,7 @@ fi
if [[ "${APPS_TO_RUN}" -ge 4 ]] ; then if [[ "${APPS_TO_RUN}" -ge 4 ]] ; then
# For more then 4 multidevice testing # For more then 4 multidevice testing
for (( VAR = 4; VAR <= ${APPS_TO_RUN}; ++VAR )); do for (( VAR = 4; VAR <= APPS_TO_RUN; ++VAR )); do
./${APP_NAME} --gtest_filter=*VPURegTest*YOLO*myriad* & ./${APP_NAME} --gtest_filter=*VPURegTest*YOLO*myriad* &
pids+=" $!" pids+=" $!"
done done

View File

@ -33,7 +33,7 @@ target_include_directories(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
$<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES> $<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>
${CLDNN__OCL_ICD_INCDIRS} ${CLDNN__OCL_ICD_INCDIRS}
${CLDNN_TOP_FOLDER}) ${CLDNN_TOP_FOLDER}/api)
set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})

View File

@ -5,7 +5,7 @@
#pragma once #pragma once
#include <ie_layouts.h> #include <ie_layouts.h>
#include <api/layout.hpp> #include <cldnn/runtime/layout.hpp>
#include "ngraph/type/element_type.hpp" #include "ngraph/type/element_type.hpp"

View File

@ -9,7 +9,7 @@
#include "cldnn_custom_layer.h" #include "cldnn_custom_layer.h"
#include <api/network.hpp> #include <cldnn/graph/network.hpp>
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -10,7 +10,7 @@
#include <map> #include <map>
#include <ie_common.h> #include <ie_common.h>
#include "pugixml.hpp" #include "pugixml.hpp"
#include "api/tensor.hpp" #include "cldnn/runtime/tensor.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -81,6 +81,8 @@
#include "cldnn_itt.h" #include "cldnn_itt.h"
#include "gpu/gpu_config.hpp" #include "gpu/gpu_config.hpp"
#include "cldnn/runtime/device_query.hpp"
#ifdef __linux__ #ifdef __linux__
# include <dlfcn.h> # include <dlfcn.h>
#endif #endif
@ -117,13 +119,13 @@ struct clDNNEngine::impl {
}; };
cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map<std::string, std::string> &config) const { cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map<std::string, std::string> &config) const {
auto device_info = device_map.begin()->second.get_info(); auto device_info = device_map.begin()->second->get_info();
if (config.find(PluginConfigParams::KEY_DEVICE_ID) != config.end()) { if (config.find(PluginConfigParams::KEY_DEVICE_ID) != config.end()) {
auto val = config.at(PluginConfigParams::KEY_DEVICE_ID); auto val = config.at(PluginConfigParams::KEY_DEVICE_ID);
if (device_map.find(val) == device_map.end()) { if (device_map.find(val) == device_map.end()) {
IE_THROW() << "Invalid device ID: " << val; IE_THROW() << "Invalid device ID: " << val;
} }
device_info = device_map.at(val).get_info(); device_info = device_map.at(val)->get_info();
} }
return device_info; return device_info;
@ -445,7 +447,8 @@ clDNNEngine::clDNNEngine() : m_defaultContext(nullptr) {
RegisterPrimitives(); RegisterPrimitives();
// try loading clDNN engine and get info from it // try loading clDNN engine and get info from it
{ {
cldnn::device_query device_query; // Set OCL runtime which should be always available
cldnn::device_query device_query(cldnn::engine_types::ocl, cldnn::runtime_types::ocl);
device_map = device_query.get_available_devices(); device_map = device_query.get_available_devices();
} }
// locate global custom kernel config // locate global custom kernel config
@ -851,8 +854,8 @@ auto StringRightTrim = [](std::string string, std::string substring, bool case_s
}; };
static float GetGOPS(cldnn::device_info info, cldnn::data_types dt) { static float GetGOPS(cldnn::device_info info, cldnn::data_types dt) {
auto freqGHz = info.core_frequency / 1000.f; auto freqGHz = info.gpu_frequency / 1000.f;
auto numEUs = info.cores_count; auto numEUs = info.execution_units_count;
auto opsPerComputeBlock = 0; auto opsPerComputeBlock = 0;
auto computeBlockIPC = 1.0f; auto computeBlockIPC = 1.0f;
switch (dt) { switch (dt) {
@ -894,8 +897,8 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
auto iter = device_map.find(device_id); auto iter = device_map.find(device_id);
auto device_info = iter != device_map.end() ? auto device_info = iter != device_map.end() ?
iter->second.get_info() : iter->second->get_info() :
device_map.begin()->second.get_info(); device_map.begin()->second->get_info();
if (name == METRIC_KEY(SUPPORTED_METRICS)) { if (name == METRIC_KEY(SUPPORTED_METRICS)) {
std::vector<std::string> metrics; std::vector<std::string> metrics;
@ -931,7 +934,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
gops[InferenceEngine::Precision::FP32] = GetGOPS(device_info, cldnn::data_types::f32); gops[InferenceEngine::Precision::FP32] = GetGOPS(device_info, cldnn::data_types::f32);
IE_SET_METRIC_RETURN(DEVICE_GOPS, gops); IE_SET_METRIC_RETURN(DEVICE_GOPS, gops);
} else if (name == GPU_METRIC_KEY(EXECUTION_UNITS_COUNT)) { } else if (name == GPU_METRIC_KEY(EXECUTION_UNITS_COUNT)) {
IE_SET_METRIC_RETURN(GPU_EXECUTION_UNITS_COUNT, device_info.cores_count); IE_SET_METRIC_RETURN(GPU_EXECUTION_UNITS_COUNT, device_info.execution_units_count);
} else if (name == GPU_METRIC_KEY(UARCH_VERSION)) { } else if (name == GPU_METRIC_KEY(UARCH_VERSION)) {
std::stringstream s; std::stringstream s;
if (device_info.gfx_ver.major == 0 && device_info.gfx_ver.minor == 0 && device_info.gfx_ver.revision == 0) { if (device_info.gfx_ver.major == 0 && device_info.gfx_ver.minor == 0 && device_info.gfx_ver.revision == 0) {

View File

@ -7,7 +7,7 @@
#include <map> #include <map>
#include <string> #include <string>
#include <memory> #include <memory>
#include <api/engine.hpp> #include <cldnn/runtime/engine.hpp>
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp> #include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
#include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp> #include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
#include "cldnn_remote_context.h" #include "cldnn_remote_context.h"
@ -22,7 +22,7 @@ class clDNNEngine : public InferenceEngine::IInferencePlugin,
std::shared_ptr<impl> _impl; std::shared_ptr<impl> _impl;
// key: device_id, value: cldnn device // key: device_id, value: cldnn device
std::map<std::string, cldnn::device> device_map; std::map<std::string, cldnn::device::ptr> device_map;
std::mutex engine_mutex; std::mutex engine_mutex;
mutable CLDNNRemoteCLContext::Ptr m_defaultContext; mutable CLDNNRemoteCLContext::Ptr m_defaultContext;

View File

@ -2,13 +2,13 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// //
#include <list>
#include <set>
#include <unordered_set>
#include "ie_metric_helpers.hpp" #include "ie_metric_helpers.hpp"
#include <api/cldnn.hpp> #include <chrono>
#include <api/data.hpp> #include <cmath>
#include <algorithm>
#include "ie_metric_helpers.hpp"
#include <chrono> #include <chrono>
#include <cmath> #include <cmath>
#include <algorithm> #include <algorithm>
@ -27,7 +27,6 @@
#include "threading/ie_cpu_streams_executor.hpp" #include "threading/ie_cpu_streams_executor.hpp"
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp" #include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
using namespace InferenceEngine; using namespace InferenceEngine;
using namespace InferenceEngine::details; using namespace InferenceEngine::details;

View File

@ -2,22 +2,28 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// //
#include <cldnn/graph/network.hpp>
#include <cldnn/runtime/profiling.hpp>
#include "cldnn_graph.h"
#include "simple_math.h"
#include <cldnn/cldnn_config.hpp>
#include "cldnn_infer_request.h"
#include <description_buffer.hpp>
#include <threading/ie_executor_manager.hpp>
#include <exec_graph_info.hpp>
#include <ie_ngraph_utils.hpp>
#include <ngraph/variant.hpp>
#include <list> #include <list>
#include <set> #include <set>
#include <unordered_set> #include <unordered_set>
#include <sstream> #include <sstream>
#include <api/cldnn.hpp>
#include <api/network.hpp>
#include <api/profiling.hpp>
#include <api/custom_gpu_primitive.hpp>
#include <chrono> #include <chrono>
#include <cmath> #include <cmath>
#include <algorithm> #include <algorithm>
#include "cldnn_graph.h"
#include "simple_math.h"
#include <description_buffer.hpp>
#include "cldnn_infer_request.h"
#include <threading/ie_executor_manager.hpp>
#include <fstream> #include <fstream>
#include <utility> #include <utility>
#include <sys/types.h> #include <sys/types.h>
@ -71,12 +77,10 @@ void CLDNNGraph::Build() {
for (int b = m_bv_sz - 1; b >= 0; b--) { for (int b = m_bv_sz - 1; b >= 0; b--) {
auto network = BuildNetwork(m_program->GetCompiledProgram(b)); auto network = BuildNetwork(m_program->GetCompiledProgram(b));
m_networks.insert(m_networks.begin(), network); m_networks.insert(m_networks.begin(), network);
GetEngine()->release_pending_memory(network->get_id());
} }
} else { } else {
auto network = BuildNetwork(m_program->GetCompiledProgram()); auto network = BuildNetwork(m_program->GetCompiledProgram());
m_networks.emplace_back(network); m_networks.emplace_back(network);
GetEngine()->release_pending_memory(network->get_id());
} }
UpdateImplementationsMap(); UpdateImplementationsMap();
@ -499,7 +503,7 @@ void CLDNNGraph::UpdatePerfStatistics() {
} }
}; };
std::map<cldnn::primitive_id, cldnn::event> executedPrimitives = GetNetwork()->get_executed_primitives(); std::map<cldnn::primitive_id, cldnn::event::ptr> executedPrimitives = GetNetwork()->get_executed_primitives();
auto allPrimitives = GetNetwork()->get_all_primitives(); auto allPrimitives = GetNetwork()->get_all_primitives();
// Get profiling info for all layers // Get profiling info for all layers
@ -521,7 +525,7 @@ void CLDNNGraph::UpdatePerfStatistics() {
auto event = execIter->second; auto event = execIter->second;
executedPrimitives.erase(execIter); executedPrimitives.erase(execIter);
cldnn::instrumentation::profiling_info cldnnInfo{profiledID, event.get_profiling_info()}; cldnn::instrumentation::profiling_info cldnnInfo{profiledID, event->get_profiling_info()};
collectTimings(cldnnInfo, perfCount); collectTimings(cldnnInfo, perfCount);
perfCount.num++; perfCount.num++;
@ -534,7 +538,7 @@ void CLDNNGraph::UpdatePerfStatistics() {
pcIter = perfMap.find(executedID.first); pcIter = perfMap.find(executedID.first);
auto& perfCount = pcIter->second.second; auto& perfCount = pcIter->second.second;
cldnn::instrumentation::profiling_info cldnnInfo{executedID.first, executedID.second.get_profiling_info()}; cldnn::instrumentation::profiling_info cldnnInfo{executedID.first, executedID.second->get_profiling_info()};
collectTimings(cldnnInfo, perfCount); collectTimings(cldnnInfo, perfCount);
perfCount.num++; perfCount.num++;
@ -675,7 +679,7 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
executedPrimitives.find(primId) != executedPrimitives.end()) { executedPrimitives.find(primId) != executedPrimitives.end()) {
auto event = executedPrimitives.at(primId); auto event = executedPrimitives.at(primId);
cldnn::instrumentation::profiling_info cldnnInfo{primId, event.get_profiling_info()}; cldnn::instrumentation::profiling_info cldnnInfo{primId, event->get_profiling_info()};
// Collect timings // Collect timings
long long cpuTime = 0; long long cpuTime = 0;

View File

@ -17,8 +17,8 @@
#include "ie_blob.h" #include "ie_blob.h"
#include "cpp/ie_cnn_network.h" #include "cpp/ie_cnn_network.h"
#include <api/network.hpp> #include <cldnn/graph/network.hpp>
#include <api/topology.hpp> #include <cldnn/graph/topology.hpp>
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp> #include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
#include "cldnn_custom_layer.h" #include "cldnn_custom_layer.h"
@ -43,7 +43,7 @@ public:
const Config& getConfig() const { return m_config; } const Config& getConfig() const { return m_config; }
InferenceEngine::gpu::ClContext::Ptr GetContext() { return m_context; } InferenceEngine::gpu::ClContext::Ptr GetContext() { return m_context; }
std::shared_ptr<const cldnn::engine> GetEngine() const { return getContextImpl(m_context)->GetEngine(); } std::shared_ptr<cldnn::engine> GetEngine() const { return getContextImpl(m_context)->GetEngine(); }
int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; } int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; }
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); } const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); }
size_t GetNetworksCount() const { return m_networks.size(); } size_t GetNetworksCount() const { return m_networks.size(); }

View File

@ -19,7 +19,7 @@ using namespace InferenceEngine;
namespace CLDNNPlugin { namespace CLDNNPlugin {
const char CLDNNInferRequest::fp32_suffix[] = "_fp32"; const char fp32_suffix[] = "_fp32";
const char str_not_allocated[] = "Input data was not allocated."; const char str_not_allocated[] = "Input data was not allocated.";
const char cannot_set_compound[] = "cannot set compound blob: supported only for input pre-processing"; const char cannot_set_compound[] = "cannot set compound blob: supported only for input pre-processing";
const char wrong_nv12_blob[] = "NV12 input blob is expected for input with NV12 color format"; const char wrong_nv12_blob[] = "NV12 input blob is expected for input with NV12 color format";
@ -110,7 +110,7 @@ Blob::Ptr CLDNNInferRequest::createOutputBlob(const TensorDesc& desc, uint8_t* m
} }
} }
void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory& inputMem) { void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory::ptr inputMem) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_attach"); OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_attach");
auto impl = getContextImpl(m_graph->GetContext()); auto impl = getContextImpl(m_graph->GetContext());
impl->acquire_lock(); impl->acquire_lock();
@ -127,150 +127,57 @@ void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory& in
void CLDNNInferRequest::input_alloc(cldnn::primitive_id name, const cldnn::layout& layout) { void CLDNNInferRequest::input_alloc(cldnn::primitive_id name, const cldnn::layout& layout) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_alloc"); OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_alloc");
cldnn::memory input_mem = cldnn::memory::allocate(*(m_graph->GetEngine()), layout); cldnn::memory::ptr input_mem = m_graph->GetEngine()->allocate_memory(layout);
input_attach(name, input_mem); input_attach(name, input_mem);
} }
void CLDNNInferRequest::copyOutputData(const cldnn::memory& outputMemory, template<typename T>
Blob::Ptr bptr, void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi, cldnn::stream& stream) {
buf_info* bi) { size_t n = (bi == nullptr) ? dst->size() : bi->buf_size;
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyOutputData");
size_t n = (bi == nullptr) ? bptr->size() : bi->buf_size;
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset; size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
auto layout = outputMemory.get_layout(); auto layout = src->get_layout();
auto size = layout.size; auto size = layout.size;
auto l_padd = layout.data_padding.lower_size();
auto u_padd = layout.data_padding.upper_size();
auto h_padding = u_padd.spatial[0] + l_padd.spatial[0]; auto locked_dst = dst->buffer();
auto v_padding_l = (h_padding + size.spatial[0]) * u_padd.spatial[1]; auto dst_ptr = locked_dst.as<T*>();
auto v_padding_u = (h_padding + size.spatial[0]) * l_padd.spatial[1]; if (dst_ptr == nullptr) {
auto locked = bptr->buffer();
switch (bptr->getTensorDesc().getPrecision()) {
case Precision::FP32: {
auto out_f = locked.as<float*>();
if (out_f == nullptr) {
IE_THROW() << "Invalid output blob"; IE_THROW() << "Invalid output blob";
} }
auto resPtr = outputMemory.pointer<float>(); cldnn::mem_lock<T> src_lock{ src, stream };
float *resVec = out_f + offset; T* src_ptr = src_lock.data();
dst_ptr += offset;
if (h_padding || v_padding_l || v_padding_u) { if (layout.data_padding) {
size_t i = 0;
for (size_t b = 0; b < size.batch[0]; b++) { for (size_t b = 0; b < size.batch[0]; b++) {
for (size_t f = 0; f < size.feature[0]; f++) { for (size_t f = 0; f < size.feature[0]; f++) {
i += v_padding_l; for (size_t w = 0; w < size.spatial[3]; w++) {
for (size_t z = 0; z < size.spatial[2]; z++) {
for (size_t y = 0; y < size.spatial[1]; y++) { for (size_t y = 0; y < size.spatial[1]; y++) {
i += l_padd.spatial[0]; for (size_t x = 0; x < size.spatial[0]; x++) {
for (size_t x = 0; x < size.spatial[0]; x++, i++) { *dst_ptr++ = src_ptr[layout.get_linear_offset(cldnn::tensor(b, f, x, y, z, w))];
*resVec++ = resPtr[i]; }
}
} }
i += u_padd.spatial[0];
} }
i += v_padding_u;
} }
} }
} else { } else {
for (size_t i = 0; i < n; i++) { for (size_t i = 0; i < n; i++) {
resVec[i] = resPtr[i]; dst_ptr[i] = src_ptr[i];
} }
} }
} }
break;
case Precision::FP16: {
auto out_f = locked.as<uint16_t*>();
if (out_f == nullptr) {
IE_THROW() << "Invalid output blob";
}
auto resPtr = outputMemory.pointer<uint16_t>();
uint16_t* resVec = out_f + offset;
if (h_padding || v_padding_l || v_padding_u) { void CLDNNInferRequest::copyOutputData(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) {
size_t i = 0; OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyOutputData");
for (size_t b = 0; b < size.batch[0]; b++) { auto& stream = m_graph->GetNetwork()->get_stream();
for (size_t f = 0; f < size.feature[0]; f++) { switch (dst->getTensorDesc().getPrecision()) {
i += v_padding_l; case Precision::FP32: copyResultToOutputBlob<float>(src, dst, bi, stream); break;
for (size_t y = 0; y < size.spatial[1]; y++) { case Precision::FP16: copyResultToOutputBlob<uint16_t>(src, dst, bi, stream); break;
i += l_padd.spatial[0]; case Precision::I32: copyResultToOutputBlob<int32_t>(src, dst, bi, stream); break;
for (size_t x = 0; x < size.spatial[0]; x++, i++) { case Precision::I64: copyResultToOutputBlob<int64_t>(src, dst, bi, stream); break;
*resVec++ = resPtr[i]; default: IE_THROW(NotImplemented) << "The plugin does not support output " << dst->getTensorDesc().getPrecision() << " precision";
}
i += u_padd.spatial[0];
}
i += v_padding_u;
}
}
} else {
for (size_t i = 0; i < n; i++) {
resVec[i] = resPtr[i];
}
}
}
break;
case Precision::I32: {
auto out_f = locked.as<int32_t*>();
if (out_f == nullptr) {
IE_THROW() << "Invalid output blob";
}
auto resPtr = outputMemory.pointer<int32_t>();
int32_t* resVec = out_f + offset;
if (h_padding || v_padding_l || v_padding_u) {
size_t i = 0;
for (size_t b = 0; b < size.batch[0]; b++) {
for (size_t f = 0; f < size.feature[0]; f++) {
i += v_padding_l;
for (size_t y = 0; y < size.spatial[1]; y++) {
i += l_padd.spatial[0];
for (size_t x = 0; x < size.spatial[0]; x++, i++) {
*resVec++ = resPtr[i];
}
i += u_padd.spatial[0];
}
i += v_padding_u;
}
}
} else {
for (size_t i = 0; i < n; i++) {
resVec[i] = resPtr[i];
}
}
}
break;
case Precision::I64: {
auto out_f = locked.as<int64_t*>();
if (out_f == nullptr) {
IE_THROW() << "Invalid output blob";
}
auto resPtr = outputMemory.pointer<int64_t>();
int64_t* resVec = out_f + offset;
if (h_padding || v_padding_l || v_padding_u) {
size_t i = 0;
for (size_t b = 0; b < size.batch[0]; b++) {
for (size_t f = 0; f < size.feature[0]; f++) {
i += v_padding_l;
for (size_t y = 0; y < size.spatial[1]; y++) {
i += l_padd.spatial[0];
for (size_t x = 0; x < size.spatial[0]; x++, i++) {
*resVec++ = resPtr[i];
}
i += u_padd.spatial[0];
}
i += v_padding_u;
}
}
} else {
for (size_t i = 0; i < n; i++) {
resVec[i] = resPtr[i];
}
}
}
break;
default:
IE_THROW() << "The plugin does not support output " << bptr->getTensorDesc().getPrecision() << " precision";
} }
} }
@ -279,7 +186,7 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr<cldnn::network> network,
const cldnn::layout& inputLayout, const cldnn::layout& inputLayout,
const Blob &inputBlob, buf_info* bi) { const Blob &inputBlob, buf_info* bi) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyInputData"); OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyInputData");
size_t n = (bi == nullptr) ? inputBlob.size() : bi->buf_size;
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset; size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
cldnn::primitive_id internalName = "parameter:" + inputName; cldnn::primitive_id internalName = "parameter:" + inputName;
@ -287,37 +194,37 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr<cldnn::network> network,
switch (inputBlob.getTensorDesc().getPrecision()) { switch (inputBlob.getTensorDesc().getPrecision()) {
case Precision::FP32: { case Precision::FP32: {
float* blob_ptr = const_cast<float*>(locked.as<const float*>()) + offset; float* blob_ptr = const_cast<float*>(locked.as<const float*>()) + offset;
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
break; break;
} }
case Precision::I32: { case Precision::I32: {
int32_t* blob_ptr = const_cast<int32_t*>(locked.as<const int32_t*>()) + offset; int32_t* blob_ptr = const_cast<int32_t*>(locked.as<const int32_t*>()) + offset;
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
break; break;
} }
case Precision::I64: { case Precision::I64: {
int64_t* blob_ptr = const_cast<int64_t*>(locked.as<const int64_t*>()) + offset; int64_t* blob_ptr = const_cast<int64_t*>(locked.as<const int64_t*>()) + offset;
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
break; break;
} }
case Precision::FP16: { case Precision::FP16: {
uint16_t* blob_ptr = const_cast<uint16_t*>(locked.as<const uint16_t*>()) + offset; uint16_t* blob_ptr = const_cast<uint16_t*>(locked.as<const uint16_t*>()) + offset;
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
break; break;
} }
case Precision::I8: { case Precision::I8: {
int8_t* blob_ptr = const_cast<int8_t*>(locked.as<const int8_t*>()) + offset; int8_t* blob_ptr = const_cast<int8_t*>(locked.as<const int8_t*>()) + offset;
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
break; break;
} }
case Precision::U8: { case Precision::U8: {
uint8_t* blob_ptr = const_cast<uint8_t*>(locked.as<const uint8_t*>()) + offset; uint8_t* blob_ptr = const_cast<uint8_t*>(locked.as<const uint8_t*>()) + offset;
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
break; break;
} }
case Precision::BOOL: { case Precision::BOOL: {
uint8_t* blob_ptr = const_cast<uint8_t*>(locked.as<const uint8_t*>()) + offset; uint8_t* blob_ptr = const_cast<uint8_t*>(locked.as<const uint8_t*>()) + offset;
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
break; break;
} }
default: default:
@ -601,6 +508,7 @@ void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr &data)
void CLDNNInferRequest::AllocateInputs() { void CLDNNInferRequest::AllocateInputs() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateInputs"); OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateInputs");
auto inputLayouts = m_graph->GetInputLayouts(); auto inputLayouts = m_graph->GetInputLayouts();
auto& stream = m_graph->GetNetwork()->get_stream();
// allocate inputs // allocate inputs
for (auto& ni : _networkInputs) { for (auto& ni : _networkInputs) {
std::string name = ni.first; std::string name = ni.first;
@ -623,25 +531,24 @@ void CLDNNInferRequest::AllocateInputs() {
input_alloc(UVName, inputLayouts.at(UVName)); input_alloc(UVName, inputLayouts.at(UVName));
size_t height = desc.getDims()[2], width = desc.getDims()[3]; size_t height = desc.getDims()[2], width = desc.getDims()[3];
cldnn::pointer<uint8_t> input_mem_ptr_Y = inputsMemory.at(YName).pointer<uint8_t>(); cldnn::mem_lock<uint8_t> input_mem_ptr_Y{inputsMemory.at(YName), stream};
TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC); TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
auto blobY = createInputBlob(ydesc, input_mem_ptr_Y.data()); auto blobY = createInputBlob(ydesc, input_mem_ptr_Y.data());
cldnn::pointer<uint8_t> input_mem_ptr_UV = inputsMemory.at(UVName).pointer<uint8_t>(); cldnn::mem_lock<uint8_t> input_mem_ptr_UV{ inputsMemory.at(UVName), stream };
TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC); TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC);
auto blobUV = createInputBlob(uvdesc, input_mem_ptr_UV.data()); auto blobUV = createInputBlob(uvdesc, input_mem_ptr_UV.data());
blobs.push_back(make_shared_blob<NV12Blob>(blobY, blobUV)); blobs.push_back(make_shared_blob<NV12Blob>(blobY, blobUV));
} }
_inputs[name] = desc.getDims()[0] == 1 ? blobs[0] : make_shared_blob<BatchedBlob>(blobs); _inputs[name] = desc.getDims()[0] == 1 ? blobs[0] : make_shared_blob<BatchedBlob>(blobs);
} else { } else {
if (inputLayouts.find(name) == inputLayouts.end()) { if (inputLayouts.find(name) == inputLayouts.end()) {
IE_THROW() << "Input layout for " << name << " is not found"; IE_THROW() << "Input layout for " << name << " is not found";
} }
cldnn::layout layout = inputLayouts.at(name); cldnn::layout layout = inputLayouts.at(name);
input_alloc(name, layout); input_alloc(name, layout);
cldnn::pointer<uint8_t> mem_ptr = inputsMemory.at(name).pointer<uint8_t>(); cldnn::mem_lock<uint8_t> mem_ptr{inputsMemory.at(name), stream};
_inputs[name] = createInputBlob(desc, mem_ptr.data()); _inputs[name] = createInputBlob(desc, mem_ptr.data());
if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) { if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
@ -685,8 +592,8 @@ void CLDNNInferRequest::AllocateOutputs() {
bool can_reuse_internal_mem = !m_useStreams; bool can_reuse_internal_mem = !m_useStreams;
for (auto& no : _networkOutputs) { for (auto& no : _networkOutputs) {
std::string outputID = m_graph->MapOutputName(no.first); std::string outputID = m_graph->MapOutputName(no.first);
cldnn::memory output_mem = m_graph->GetNetwork()->get_output_memory(outputID); cldnn::memory::ptr output_mem = m_graph->GetNetwork()->get_output_memory(outputID);
cldnn::pointer<uint8_t> output_mem_ptr = output_mem.pointer<uint8_t>(); cldnn::mem_lock<uint8_t> output_mem_ptr{output_mem, m_graph->GetNetwork()->get_stream()};
if (output_mem_ptr.data() == nullptr) { if (output_mem_ptr.data() == nullptr) {
IE_THROW() << "Empty output memory for primitive " << outputID; IE_THROW() << "Empty output memory for primitive " << outputID;
} }
@ -824,6 +731,7 @@ CLDNNInferRequest::CLDNNInferRequest(InputsDataMap networkInputs, OutputsDataMap
void CLDNNInferRequest::execAndParse() { void CLDNNInferRequest::execAndParse() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParse"); OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParse");
auto networkOutputs = m_graph->GetNetwork()->execute(); auto networkOutputs = m_graph->GetNetwork()->execute();
auto& stream = m_graph->GetNetwork()->get_stream();
// Collect outputs as requested by the model // Collect outputs as requested by the model
for (auto& no : _networkOutputs) { for (auto& no : _networkOutputs) {
@ -835,12 +743,12 @@ void CLDNNInferRequest::execAndParse() {
// mapping remote blobs not needed - // mapping remote blobs not needed -
// let the user take care of them explicitly // let the user take care of them explicitly
if (!bptr->is<gpu::ClBlob>()) { if (!bptr->is<gpu::ClBlob>()) {
auto out_ptr = outputMemory.pointer<uint8_t>(); cldnn::mem_lock<uint8_t> out_ptr{outputMemory, stream};
auto blob_ptr = bptr->buffer().as<uint8_t*>(); auto blob_ptr = bptr->buffer().as<uint8_t*>();
// If Async API is used, copy of output blobs is not needed, unless SetBlob function was called. // If Async API is used, copy of output blobs is not needed, unless SetBlob function was called.
// But in the case when old API is used we have to copy data to memory provided by user. // But in the case when old API is used we have to copy data to memory provided by user.
if (blob_ptr != &out_ptr[0]) { if (blob_ptr != out_ptr.data()) {
copyOutputData(outputMemory, bptr); copyOutputData(outputMemory, bptr);
} }
} }
@ -965,19 +873,20 @@ void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const
IE_THROW() << "Input name mismatch."; IE_THROW() << "Input name mismatch.";
} }
auto inputLayout = m_graph->GetInputLayouts().at(inputName); auto inputLayout = m_graph->GetInputLayouts().at(inputName);
auto is_same_buffer = [](const Blob& blob, const cldnn::memory& memory) -> bool { auto is_same_buffer = [&](const Blob& blob, cldnn::memory::ptr memory) -> bool {
const std::string str_not_allocated("Input data was not allocated."); const std::string str_not_allocated("Input data was not allocated.");
cldnn::pointer<const uint8_t> ptr = memory.pointer<const uint8_t>(); cldnn::mem_lock<uint8_t> ptr{memory, m_graph->GetNetwork()->get_stream()};
const uint8_t* blob_ptr = blob.cbuffer().as<const uint8_t*>(); const uint8_t* blob_ptr = blob.cbuffer().as<const uint8_t*>();
const uint8_t* mem_ptr = ptr.data(); const uint8_t* mem_ptr = ptr.data();
if (blob_ptr == nullptr || mem_ptr == nullptr) { if (blob_ptr == nullptr || mem_ptr == nullptr) {
IE_THROW() << str_not_allocated; IE_THROW() << str_not_allocated;
} }
return (blob_ptr == mem_ptr) && (blob.byteSize() == memory.size()); return (blob_ptr == mem_ptr) && (blob.byteSize() == memory->size());
}; };
cldnn::primitive_id internalName = "parameter:" + inputName; cldnn::primitive_id internalName = "parameter:" + inputName;
const cldnn::memory& memory = inputsMemory.at(inputName); cldnn::memory::ptr memory = inputsMemory.at(inputName);
auto& stream = m_graph->GetNetwork()->get_stream();
auto _nw_ptr = m_graph->GetNetwork(); auto _nw_ptr = m_graph->GetNetwork();
auto prec = inputBlob.getTensorDesc().getPrecision(); auto prec = inputBlob.getTensorDesc().getPrecision();
@ -986,8 +895,8 @@ void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const
_nw_ptr->set_input_data(internalName, memory); _nw_ptr->set_input_data(internalName, memory);
} else if (prec == Precision::I16 || prec == Precision::U16) { } else if (prec == Precision::I16 || prec == Precision::U16) {
// clDNN doesn't support I16 input precision, so we always have to convert input data to fp32 precision // clDNN doesn't support I16 input precision, so we always have to convert input data to fp32 precision
const cldnn::memory& fp32_mem = inputsMemory.at(inputName+fp32_suffix); cldnn::memory::ptr fp32_mem = inputsMemory.at(inputName+fp32_suffix);
cldnn::pointer<float> ptr = fp32_mem.pointer<float>(); cldnn::mem_lock<float> ptr {fp32_mem, stream};
if (prec == Precision::I16) { if (prec == Precision::I16) {
copyToFloat<int16_t>(ptr.data(), &inputBlob); copyToFloat<int16_t>(ptr.data(), &inputBlob);
} else { } else {
@ -1031,4 +940,4 @@ void CLDNNInferRequest::PrepareInputDyn(const cldnn::primitive_id &inputName, co
} }
} }
}; // namespace CLDNNPlugin } // namespace CLDNNPlugin

View File

@ -46,7 +46,7 @@ public:
void EnableStreams() { m_useStreams = true; } void EnableStreams() { m_useStreams = true; }
protected: protected:
std::map<std::string, cldnn::memory> inputsMemory; std::map<std::string, cldnn::memory::ptr> inputsMemory;
std::map<std::string, cldnn::primitive_id> outputsMap; std::map<std::string, cldnn::primitive_id> outputsMap;
bool m_useProfiling; bool m_useProfiling;
@ -60,12 +60,12 @@ protected:
InferenceEngine::Blob::Ptr createInputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr); InferenceEngine::Blob::Ptr createInputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr);
InferenceEngine::Blob::Ptr createOutputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr); InferenceEngine::Blob::Ptr createOutputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr);
void copyOutputData(const cldnn::memory& outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr); void copyOutputData(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr);
void copyInputData(std::shared_ptr<cldnn::network> network, const cldnn::primitive_id &inputName, void copyInputData(std::shared_ptr<cldnn::network> network, const cldnn::primitive_id &inputName,
const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob, const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob,
buf_info* bi = nullptr); buf_info* bi = nullptr);
void input_attach(cldnn::primitive_id name, cldnn::memory& inputMem); void input_attach(cldnn::primitive_id name, cldnn::memory::ptr inputMem);
void input_alloc(cldnn::primitive_id name, const cldnn::layout& layout); void input_alloc(cldnn::primitive_id name, const cldnn::layout& layout);
void AllocateInputs(); void AllocateInputs();
void AllocateOutputs(); void AllocateOutputs();
@ -76,9 +76,6 @@ protected:
void PrepareInput(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob); void PrepareInput(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob);
void PrepareInputDyn(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob); void PrepareInputDyn(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob);
private:
static const char fp32_suffix[];
}; };
}; // namespace CLDNNPlugin }; // namespace CLDNNPlugin

View File

@ -92,7 +92,7 @@ bool Program::CanProcessDynBatch(std::vector<std::shared_ptr<ngraph::Node>> ops,
return true; return true;
} }
Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cldnn::engine> engine, const Config& config, bool createTopologyOnly) Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::engine> engine, const Config& config, bool createTopologyOnly)
: m_config(config) : m_config(config)
, m_engine(engine) , m_engine(engine)
, m_curBatch(-1) , m_curBatch(-1)
@ -128,11 +128,9 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cld
ChangeInputBatch(1U << static_cast<unsigned>(b)); ChangeInputBatch(1U << static_cast<unsigned>(b));
m_programs.insert(m_programs.begin(), BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly)); m_programs.insert(m_programs.begin(), BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly));
m_engine->release_pending_memory(0);
} }
} else { } else {
m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly)); m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly));
m_engine->release_pending_memory(0);
} }
} }

View File

@ -15,8 +15,8 @@
#include "cldnn_config.h" #include "cldnn_config.h"
#include <api/engine.hpp> #include <cldnn/runtime/engine.hpp>
#include <api/topology.hpp> #include <cldnn/graph/topology.hpp>
// Forward declarations for cldnn part // Forward declarations for cldnn part
namespace cldnn { namespace cldnn {
@ -69,8 +69,8 @@ public:
class Program { class Program {
public: public:
Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cldnn::engine> engine, const Config& config, bool createTopologyOnly = false); Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::engine> engine, const Config& config, bool createTopologyOnly = false);
Program(std::shared_ptr<const cldnn::engine> engine, const Config& config) : m_config(config), m_engine(engine), Program(std::shared_ptr<cldnn::engine> engine, const Config& config) : m_config(config), m_engine(engine),
m_curBatch(-1), queryMode(false), m_max_batch(1) {} m_curBatch(-1), queryMode(false), m_max_batch(1) {}
Program() : m_config({}), m_engine(nullptr), m_curBatch(-1), queryMode(false), m_max_batch(1) {} Program() : m_config({}), m_engine(nullptr), m_curBatch(-1), queryMode(false), m_max_batch(1) {}
@ -100,8 +100,8 @@ public:
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return inputLayouts; } const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return inputLayouts; }
InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_networkInputs; } InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_networkInputs; }
InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_networkOutputs; } InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_networkOutputs; }
const cldnn::engine& GetEngine() const { return *m_engine; } cldnn::engine& GetEngine() const { return *m_engine; }
std::shared_ptr<const cldnn::engine> GetEnginePtr() const { return m_engine; } std::shared_ptr<cldnn::engine> GetEnginePtr() const { return m_engine; }
const Config& GetConfig() const { return m_config; } const Config& GetConfig() const { return m_config; }
int GetMaxBatchSizeForSingleProgram(); int GetMaxBatchSizeForSingleProgram();
@ -150,7 +150,7 @@ public:
private: private:
static factories_map_t factories_map; static factories_map_t factories_map;
std::vector<std::shared_ptr<cldnn::program>> m_programs; std::vector<std::shared_ptr<cldnn::program>> m_programs;
std::shared_ptr<const cldnn::engine> m_engine; std::shared_ptr<cldnn::engine> m_engine;
Config m_config; Config m_config;
std::shared_ptr<cldnn::topology> m_topology; std::shared_ptr<cldnn::topology> m_topology;

View File

@ -6,21 +6,23 @@
#include "cldnn_remote_context.h" #include "cldnn_remote_context.h"
#include "cldnn_itt.h" #include "cldnn_itt.h"
#include "cldnn/runtime/device_query.hpp"
using namespace InferenceEngine; using namespace InferenceEngine;
using namespace InferenceEngine::gpu; using namespace InferenceEngine::gpu;
using namespace InferenceEngine::details; using namespace InferenceEngine::details;
namespace CLDNNPlugin { namespace CLDNNPlugin {
static const char unsupported_str[] = "Unsupported shared object type ";
CLDNNRemoteAllocator CLDNNRemoteBlobImpl::m_allocator; CLDNNRemoteAllocator CLDNNRemoteBlobImpl::m_allocator;
CLDNNRemoteBlobImpl::CLDNNRemoteBlobImpl(ClContext::Ptr context, CLDNNRemoteBlobImpl::CLDNNRemoteBlobImpl(ClContext::Ptr context,
cldnn::stream& stream,
const cldnn::layout& layout, const cldnn::layout& layout,
cldnn::shared_handle mem, cldnn::shared_handle mem,
cldnn::shared_surface surf, cldnn::shared_surface surf,
uint32_t plane, uint32_t plane,
BlobType mem_type) : BlobType mem_type) :
m_context(context), m_layout(layout), m_mem_type(mem_type), m_mem(mem), m_surf(surf), m_plane(plane), m_context(context), m_stream(stream), m_layout(layout), m_mem_type(mem_type), m_mem(mem), m_surf(surf), m_plane(plane),
_handle(nullptr), _allocator(nullptr), m_memObject(nullptr), lockedHolder(nullptr) { _handle(nullptr), _allocator(nullptr), m_memObject(nullptr), lockedHolder(nullptr) {
} }
@ -67,7 +69,6 @@ ParamMap CLDNNRemoteBlobImpl::getParams() const {
} }
bool CLDNNRemoteBlobImpl::deallocate() noexcept { bool CLDNNRemoteBlobImpl::deallocate() noexcept {
if (m_memObject != nullptr)
m_memObject.reset(); m_memObject.reset();
return m_memObject == nullptr; return m_memObject == nullptr;
} }
@ -86,32 +87,7 @@ void CLDNNRemoteBlobImpl::allocate_if_needed() {
_impl->acquire_lock(); _impl->acquire_lock();
if (m_memObject == nullptr) { if (m_memObject == nullptr) {
auto eng = _impl->GetEngine(); allocate();
switch (m_mem_type) {
case BlobType::BT_BUF_INTERNAL:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::allocate(*eng, m_layout)));
break;
case BlobType::BT_BUF_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_buffer(*eng, m_layout, m_mem)));
break;
#ifdef _WIN32
case BlobType::BT_SURF_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_mem, m_plane)));
break;
case BlobType::BT_DX_BUF_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_dx_buffer(*eng, m_layout, m_mem)));
break;
#else
case BlobType::BT_SURF_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_surf, m_plane)));
break;
#endif
case BlobType::BT_IMG_SHARED:
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_image(*eng, m_layout, m_mem)));
break;
default:
IE_THROW() << unsupported_str << m_mem_type;
}
} }
_impl->release_lock(); _impl->release_lock();
@ -120,32 +96,38 @@ void CLDNNRemoteBlobImpl::allocate_if_needed() {
void CLDNNRemoteBlobImpl::allocate() noexcept { void CLDNNRemoteBlobImpl::allocate() noexcept {
assert(m_memObject == nullptr); assert(m_memObject == nullptr);
std::shared_ptr<const cldnn::engine> eng = getContextImpl(m_context.lock())->GetEngine(); std::shared_ptr<cldnn::engine> eng = getContextImpl(m_context.lock())->GetEngine();
switch (m_mem_type) { switch (m_mem_type) {
case BlobType::BT_BUF_INTERNAL: case BlobType::BT_BUF_INTERNAL: {
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::allocate(*eng, m_layout))); m_memObject = eng->allocate_memory(m_layout);
break; break;
case BlobType::BT_BUF_SHARED: }
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_buffer(*eng, m_layout, m_mem))); case BlobType::BT_BUF_SHARED: {
m_memObject = eng->share_buffer(m_layout, m_mem);
break; break;
}
#ifdef _WIN32 #ifdef _WIN32
case BlobType::BT_SURF_SHARED: case BlobType::BT_SURF_SHARED: {
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_mem, m_plane))); m_memObject = eng->share_surface(m_layout, m_mem, m_plane);
break; break;
case BlobType::BT_DX_BUF_SHARED: }
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_dx_buffer(*eng, m_layout, m_mem))); case BlobType::BT_DX_BUF_SHARED: {
m_memObject = eng->share_dx_buffer(m_layout, m_mem);
break; break;
}
#else #else
case BlobType::BT_SURF_SHARED: case BlobType::BT_SURF_SHARED: {
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_surf, m_plane))); m_memObject = eng->share_surface(m_layout, m_surf, m_plane);
break; break;
}
#endif #endif
case BlobType::BT_IMG_SHARED: case BlobType::BT_IMG_SHARED: {
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_image(*eng, m_layout, m_mem))); m_memObject = eng->share_image(m_layout, m_mem);
break; break;
}
default: default:
m_memObject = nullptr; m_memObject.reset();
} }
} }
@ -165,7 +147,7 @@ std::shared_ptr<RemoteContext> CLDNNRemoteBlobImpl::getContext() const noexcept
} }
void CLDNNRemoteBlobImpl::lock() const { void CLDNNRemoteBlobImpl::lock() const {
lockedHolder = std::unique_ptr<cldnn::pointer<uint8_t>>(new cldnn::pointer<uint8_t>(m_memObject->pointer<uint8_t>())); lockedHolder = std::unique_ptr<cldnn::mem_lock<uint8_t>>(new cldnn::mem_lock<uint8_t>(m_memObject, m_stream));
auto ptr = lockedHolder->data(); auto ptr = lockedHolder->data();
_handle = reinterpret_cast<void*>(ptr); _handle = reinterpret_cast<void*>(ptr);
m_allocator.regLockedBlob(_handle, this); m_allocator.regLockedBlob(_handle, this);
@ -244,7 +226,11 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
} }
} }
cldnn::device_query device_query(_context_id, _va_device); // TODO: Parameterize this based on plugin config and compilation options
auto engine_type = cldnn::engine_types::ocl;
auto runtime_type = cldnn::runtime_types::ocl;
// Use actual runtime and engine types
cldnn::device_query device_query(engine_type, runtime_type, _context_id, _va_device);
auto device_map = device_query.get_available_devices(); auto device_map = device_query.get_available_devices();
auto iter = device_map.find(m_config.device_id); auto iter = device_map.find(m_config.device_id);
@ -252,28 +238,25 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
{ {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecutionContextImpl::Create"); OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecutionContextImpl::Create");
m_engine = std::make_shared<cldnn::engine>(dev, bool enable_profiling = (m_config.useProfiling ||
cldnn::engine_configuration((m_config.useProfiling ||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_tune_and_cache) || (m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_tune_and_cache) ||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_retune_and_cache)), (m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_retune_and_cache));
false, cldnn::queue_types queue_type = cldnn::queue_types::out_of_order;
m_config.dumpCustomKernels, bool use_unified_shared_memory = true;
std::string(), m_engine = cldnn::engine::create(engine_type, runtime_type, dev, cldnn::engine_configuration(enable_profiling,
std::string(), queue_type,
true,
std::string(),
m_config.sources_dumps_dir, m_config.sources_dumps_dir,
m_config.queuePriority, m_config.queuePriority,
m_config.queueThrottle, m_config.queueThrottle,
m_config.memory_pool_on, m_config.memory_pool_on,
m_config.throughput_streams, use_unified_shared_memory,
m_config.kernels_cache_dir, m_config.kernels_cache_dir,
m_config.n_threads)); m_config.n_threads));
} }
} }
ParamMap CLDNNExecutionContextImpl::getParams() const { ParamMap CLDNNExecutionContextImpl::getParams() const {
ParamMap ret = { { GPU_PARAM_KEY(OCL_CONTEXT), m_engine->get_context() } }; ParamMap ret = { { GPU_PARAM_KEY(OCL_CONTEXT), m_engine->get_user_context() } };
switch (m_type) { switch (m_type) {
case OCL: case OCL:

View File

@ -4,15 +4,11 @@
#pragma once #pragma once
#include <string> #include <cldnn/runtime/memory.hpp>
#include <map> #include <cldnn/runtime/engine.hpp>
#include <memory>
#include <atomic>
#include <ie_parameter.hpp> #include <ie_parameter.hpp>
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp> #include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
#include "cldnn_config.h" #include "cldnn_config.h"
#include <api/memory.hpp>
#include <api/engine.hpp>
#include "cldnn_common_utils.h" #include "cldnn_common_utils.h"
#ifndef NOMINMAX #ifndef NOMINMAX
@ -25,6 +21,11 @@
# include <gpu/gpu_context_api_va.hpp> # include <gpu/gpu_context_api_va.hpp>
#endif #endif
#include <string>
#include <map>
#include <memory>
#include <atomic>
namespace CLDNNPlugin { namespace CLDNNPlugin {
class CLDNNRemoteAllocator; class CLDNNRemoteAllocator;
@ -41,6 +42,7 @@ public:
}; };
explicit CLDNNRemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context, explicit CLDNNRemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context,
cldnn::stream& stream,
const cldnn::layout& layout, const cldnn::layout& layout,
cldnn::shared_handle mem, cldnn::shared_handle mem,
cldnn::shared_surface surf, cldnn::shared_surface surf,
@ -63,11 +65,12 @@ public:
bool is_allocated() const noexcept; bool is_allocated() const noexcept;
bool is_locked() const noexcept; bool is_locked() const noexcept;
void allocate_if_needed(); void allocate_if_needed();
cldnn::memory& getMemory() { return *m_memObject; } cldnn::memory::ptr getMemory() { return m_memObject; }
protected: protected:
static CLDNNRemoteAllocator m_allocator; static CLDNNRemoteAllocator m_allocator;
std::weak_ptr<InferenceEngine::gpu::ClContext> m_context; std::weak_ptr<InferenceEngine::gpu::ClContext> m_context;
cldnn::stream& m_stream;
// constructor stuff // constructor stuff
cldnn::shared_handle m_mem; cldnn::shared_handle m_mem;
@ -77,9 +80,9 @@ protected:
cldnn::layout m_layout; cldnn::layout m_layout;
BlobType m_mem_type; BlobType m_mem_type;
std::unique_ptr<cldnn::memory> m_memObject; cldnn::memory::ptr m_memObject;
mutable std::unique_ptr<cldnn::pointer<uint8_t>> lockedHolder; mutable std::unique_ptr<cldnn::mem_lock<uint8_t>> lockedHolder;
mutable void* _handle; mutable void* _handle;
mutable std::shared_ptr<InferenceEngine::IAllocator> _allocator; mutable std::shared_ptr<InferenceEngine::IAllocator> _allocator;
@ -93,13 +96,14 @@ public:
using Ptr = std::shared_ptr<typedCLDNNRemoteBlob>; using Ptr = std::shared_ptr<typedCLDNNRemoteBlob>;
explicit typedCLDNNRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context, explicit typedCLDNNRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context,
cldnn::stream& stream,
const InferenceEngine::TensorDesc& desc, const InferenceEngine::TensorDesc& desc,
const cldnn::layout& layout, const cldnn::layout& layout,
cldnn::shared_handle mem, cldnn::shared_handle mem,
cldnn::shared_surface surf, cldnn::shared_surface surf,
uint32_t plane, uint32_t plane,
CLDNNRemoteBlobImpl::BlobType mem_type) CLDNNRemoteBlobImpl::BlobType mem_type)
: _impl(context, layout, mem, surf, plane, mem_type) : _impl(context, stream, layout, mem, surf, plane, mem_type)
, TpublicAPI(desc) {} , TpublicAPI(desc) {}
void allocate() noexcept override { _impl.allocate(); } void allocate() noexcept override { _impl.allocate(); }
@ -231,6 +235,7 @@ public:
} }
protected: protected:
// TODO: refactor to unique_ptr
std::shared_ptr<cldnn::engine> m_engine; std::shared_ptr<cldnn::engine> m_engine;
InferenceEngine::gpu_handle_param m_va_display; InferenceEngine::gpu_handle_param m_va_display;
Config m_config; Config m_config;
@ -267,6 +272,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
using namespace InferenceEngine; using namespace InferenceEngine;
using InferenceEngine::gpu::details::param_map_obj_getter; using InferenceEngine::gpu::details::param_map_obj_getter;
InferenceEngine::RemoteBlob::Ptr ret = nullptr; InferenceEngine::RemoteBlob::Ptr ret = nullptr;
auto& stream = _impl.GetEngine()->get_program_stream();
uint32_t plane = param_map_obj_getter::_ObjFromParamSimple<uint32_t>(params, GPU_PARAM_KEY(VA_PLANE)); uint32_t plane = param_map_obj_getter::_ObjFromParamSimple<uint32_t>(params, GPU_PARAM_KEY(VA_PLANE));
#ifdef _WIN32 #ifdef _WIN32
cldnn::shared_handle mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE)); cldnn::shared_handle mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
@ -290,11 +296,11 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext> std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>
(std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this()); (std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this());
#ifdef _WIN32 #ifdef _WIN32
ret = std::make_shared<CLDNNRemoteD3DSurface>(smart_this, ret = std::make_shared<CLDNNRemoteD3DSurface>(smart_this, stream,
tensorDesc, layout, mem, 0, plane, tensorDesc, layout, mem, 0, plane,
CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED); CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED);
#else #else
ret = std::make_shared<CLDNNRemoteVASurface>(smart_this, ret = std::make_shared<CLDNNRemoteVASurface>(smart_this, stream,
tensorDesc, layout, nullptr, surf, plane, tensorDesc, layout, nullptr, surf, plane,
CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED); CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED);
#endif #endif
@ -311,6 +317,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
InferenceEngine::RemoteBlob::Ptr ret = nullptr; InferenceEngine::RemoteBlob::Ptr ret = nullptr;
_impl.acquire_lock(); _impl.acquire_lock();
auto& stream = _impl.GetEngine()->get_program_stream();
// try to locate previously shared object // try to locate previously shared object
auto itr = shared_obj_reg.find(mem); auto itr = shared_obj_reg.find(mem);
@ -327,15 +334,15 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
switch (blob_type) { switch (blob_type) {
case CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED: case CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED:
ret = std::make_shared<CLDNNRemoteCLbuffer>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type); ret = std::make_shared<CLDNNRemoteCLbuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
break; break;
case CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED: case CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED:
layout.format = ImageFormatFromLayout(tensorDesc.getLayout()); layout.format = ImageFormatFromLayout(tensorDesc.getLayout());
ret = std::make_shared<CLDNNRemoteCLImage2D>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type); ret = std::make_shared<CLDNNRemoteCLImage2D>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
break; break;
#ifdef _WIN32 #ifdef _WIN32
case CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED: case CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED:
ret = std::make_shared<CLDNNRemoteD3DBuffer>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type); ret = std::make_shared<CLDNNRemoteD3DBuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
break; break;
#endif #endif
default: default:
@ -354,7 +361,9 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
CldnnTensorFromIEDims(tensorDesc.getDims())); CldnnTensorFromIEDims(tensorDesc.getDims()));
auto smart_this = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext> auto smart_this = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>
(std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this()); (std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this());
auto& stream = _impl.GetEngine()->get_program_stream();
return std::make_shared<CLDNNRemoteCLbuffer>(smart_this, return std::make_shared<CLDNNRemoteCLbuffer>(smart_this,
stream,
tensorDesc, tensorDesc,
layout, layout,
nullptr, 0, 0, nullptr, 0, 0,

View File

@ -8,7 +8,7 @@
#include "ngraph/op/batch_to_space.hpp" #include "ngraph/op/batch_to_space.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/batch_to_space.hpp" #include "cldnn/primitives/batch_to_space.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,9 +8,9 @@
#include "ngraph/op/broadcast.hpp" #include "ngraph/op/broadcast.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/broadcast.hpp" #include "cldnn/primitives/broadcast.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
#include "api/reshape.hpp" #include "cldnn/primitives/reshape.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/concat.hpp" #include "ngraph/op/concat.hpp"
#include "api/concatenation.hpp" #include "cldnn/primitives/concatenation.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -17,7 +17,7 @@
#include "ngraph/op/variadic_split.hpp" #include "ngraph/op/variadic_split.hpp"
#include "ngraph/op/util/op_types.hpp" #include "ngraph/op/util/op_types.hpp"
#include "api/data.hpp" #include "cldnn/primitives/data.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {
@ -169,9 +169,10 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
if (bufIter != p.blobMemCache.end()) { if (bufIter != p.blobMemCache.end()) {
constPrimID = bufIter->second; constPrimID = bufIter->second;
} else { } else {
auto mem = cldnn::memory::allocate(p.GetEngine(), constLayout, 0, false); cldnn::memory::ptr mem = p.GetEngine().allocate_memory(constLayout, false);
auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor auto& stream = p.GetEngine().get_program_stream();
auto buf = tmpPointer.data(); cldnn::mem_lock<char> lock{mem, stream};
auto buf = lock.data();
auto bufSize = constLayout.bytes_count(); auto bufSize = constLayout.bytes_count();
// Do actual weights reorder and change O and I channels order // Do actual weights reorder and change O and I channels order

View File

@ -8,7 +8,7 @@
#include "ngraph/op/convert.hpp" #include "ngraph/op/convert.hpp"
#include "ngraph/op/convert_like.hpp" #include "ngraph/op/convert_like.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -13,11 +13,11 @@
#include "ngraph/op/fake_quantize.hpp" #include "ngraph/op/fake_quantize.hpp"
#include "ngraph/op/util/op_types.hpp" #include "ngraph/op/util/op_types.hpp"
#include "api/convolution.hpp" #include "cldnn/primitives/convolution.hpp"
#include "api/deconvolution.hpp" #include "cldnn/primitives/deconvolution.hpp"
#include "api/binary_convolution.hpp" #include "cldnn/primitives/binary_convolution.hpp"
#include "api/permute.hpp" #include "cldnn/primitives/permute.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,9 +8,9 @@
#include "ngraph/op/ctc_greedy_decoder.hpp" #include "ngraph/op/ctc_greedy_decoder.hpp"
#include "ngraph/op/ctc_greedy_decoder_seq_len.hpp" #include "ngraph/op/ctc_greedy_decoder_seq_len.hpp"
#include "api/ctc_greedy_decoder.hpp" #include "cldnn/primitives/ctc_greedy_decoder.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
#include "api/mutable_data.hpp" #include "cldnn/primitives/mutable_data.hpp"
#include "transformations/utils/utils.hpp" #include "transformations/utils/utils.hpp"
@ -58,7 +58,7 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::No
std::size_t num_output = op->get_output_size(); std::size_t num_output = op->get_output_size();
std::vector<cldnn::memory> shared_memory; std::vector<cldnn::memory::ptr> shared_memory;
if (num_output == 2) { if (num_output == 2) {
auto mutable_precision = op->get_output_element_type(1); auto mutable_precision = op->get_output_element_type(1);
if (mutable_precision == ngraph::element::i64) { if (mutable_precision == ngraph::element::i64) {
@ -70,7 +70,7 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::No
DefaultFormatForDims(op->get_output_shape(1).size()), DefaultFormatForDims(op->get_output_shape(1).size()),
CldnnTensorFromIEDims(op->get_output_shape(1))); CldnnTensorFromIEDims(op->get_output_shape(1)));
shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayout)); shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayout));
cldnn::primitive_id ctc_gd_mutable_id_w = layer_type_name_ID(op) + "_md_write"; cldnn::primitive_id ctc_gd_mutable_id_w = layer_type_name_ID(op) + "_md_write";
auto ctc_gd_mutable_prim = cldnn::mutable_data(ctc_gd_mutable_id_w, shared_memory[0]); auto ctc_gd_mutable_prim = cldnn::mutable_data(ctc_gd_mutable_id_w, shared_memory[0]);

View File

@ -8,7 +8,7 @@
#include "ngraph/op/cum_sum.hpp" #include "ngraph/op/cum_sum.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/cum_sum.hpp" #include "cldnn/primitives/cum_sum.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -9,8 +9,8 @@
#include "ngraph/attribute_visitor.hpp" #include "ngraph/attribute_visitor.hpp"
#include "ngraph/node.hpp" #include "ngraph/node.hpp"
#include "api/custom_gpu_primitive.hpp" #include "cldnn/primitives/custom_gpu_primitive.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/depth_to_space.hpp" #include "ngraph/op/depth_to_space.hpp"
#include "api/depth_to_space.hpp" #include "cldnn/primitives/depth_to_space.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/detection_output.hpp" #include "ngraph/op/detection_output.hpp"
#include "api/detection_output.hpp" #include "cldnn/primitives/detection_output.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -25,10 +25,10 @@
#include "ngraph/op/power.hpp" #include "ngraph/op/power.hpp"
#include "ngraph/op/floor_mod.hpp" #include "ngraph/op/floor_mod.hpp"
#include "api/activation.hpp" #include "cldnn/primitives/activation.hpp"
#include "api/eltwise.hpp" #include "cldnn/primitives/eltwise.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
#include "api/reshape.hpp" #include "cldnn/primitives/reshape.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -9,8 +9,8 @@
#include "ngraph/op/embeddingbag_offsets_sum.hpp" #include "ngraph/op/embeddingbag_offsets_sum.hpp"
#include "ngraph/op/embeddingbag_packedsum.hpp" #include "ngraph/op/embeddingbag_packedsum.hpp"
#include "api/embedding_bag.hpp" #include "cldnn/primitives/embedding_bag.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
#include "transformations/utils/utils.hpp" #include "transformations/utils/utils.hpp"

View File

@ -7,7 +7,7 @@
#include "ngraph/op/extractimagepatches.hpp" #include "ngraph/op/extractimagepatches.hpp"
#include "api/extract_image_patches.hpp" #include "cldnn/primitives/extract_image_patches.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/fake_quantize.hpp" #include "ngraph/op/fake_quantize.hpp"
#include "api/quantize.hpp" #include "cldnn/primitives/quantize.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,8 +7,8 @@
#include "ngraph/op/gather_tree.hpp" #include "ngraph/op/gather_tree.hpp"
#include "api/gather_tree.hpp" #include "cldnn/primitives/gather_tree.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,8 +7,8 @@
#include "ngraph/op/gather.hpp" #include "ngraph/op/gather.hpp"
#include "api/gather.hpp" #include "cldnn/primitives/gather.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/gather_nd.hpp" #include "ngraph/op/gather_nd.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/gather_nd.hpp" #include "cldnn/primitives/gather_nd.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/grn.hpp" #include "ngraph/op/grn.hpp"
#include "api/grn.hpp" #include "cldnn/primitives/grn.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -9,7 +9,7 @@
#include "ngraph/op/interpolate.hpp" #include "ngraph/op/interpolate.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/resample.hpp" #include "cldnn/primitives/resample.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/lrn.hpp" #include "ngraph/op/lrn.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/lrn.hpp" #include "cldnn/primitives/lrn.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -9,11 +9,11 @@
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/op/fake_quantize.hpp" #include "ngraph/op/fake_quantize.hpp"
#include "api/gemm.hpp" #include "cldnn/primitives/gemm.hpp"
#include "api/fully_connected.hpp" #include "cldnn/primitives/fully_connected.hpp"
#include "api/reshape.hpp" #include "cldnn/primitives/reshape.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
#include "api/permute.hpp" #include "cldnn/primitives/permute.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,7 +8,8 @@
#include "ngraph/op/mvn.hpp" #include "ngraph/op/mvn.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/mvn.hpp" #include "cldnn/primitives/mvn.hpp"
#include <algorithm> #include <algorithm>
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -9,9 +9,9 @@
#include <ngraph/opsets/opset3.hpp> #include <ngraph/opsets/opset3.hpp>
#include <ngraph_ops/nms_ie_internal.hpp> #include <ngraph_ops/nms_ie_internal.hpp>
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
#include "api/mutable_data.hpp" #include "cldnn/primitives/mutable_data.hpp"
#include "api/non_max_suppression.hpp" #include "cldnn/primitives/non_max_suppression.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {
@ -62,7 +62,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
std::size_t num_output = op->get_output_size(); std::size_t num_output = op->get_output_size();
std::vector<cldnn::memory> shared_memory; std::vector<cldnn::memory::ptr> shared_memory;
switch (num_output) { switch (num_output) {
case 3: { case 3: {
auto mutable_precision_second = op->get_output_element_type(2); auto mutable_precision_second = op->get_output_element_type(2);
@ -74,7 +74,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
DefaultFormatForDims(op->get_output_shape(2).size()), DefaultFormatForDims(op->get_output_shape(2).size()),
CldnnTensorFromIEDims(op->get_output_shape(2))); CldnnTensorFromIEDims(op->get_output_shape(2)));
shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayoutSecond)); shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutSecond));
cldnn::primitive_id non_max_supression_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second"; cldnn::primitive_id non_max_supression_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second";
auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second, shared_memory.back()); auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second, shared_memory.back());
@ -91,7 +91,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
cldnn::format::bfyx, cldnn::format::bfyx,
cldnn::tensor(outputIndices, 3, 1, 1)); cldnn::tensor(outputIndices, 3, 1, 1));
shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayoutFirst)); shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutFirst));
cldnn::primitive_id non_max_supression_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first"; cldnn::primitive_id non_max_supression_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first";
auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first, shared_memory.back()); auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first, shared_memory.back());

View File

@ -8,8 +8,8 @@
#include "ngraph/op/normalize_l2.hpp" #include "ngraph/op/normalize_l2.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/normalize.hpp" #include "cldnn/primitives/normalize.hpp"
#include "api/data.hpp" #include "cldnn/primitives/data.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {
@ -35,8 +35,8 @@ void CreateNormalizeL2Op(Program& p, const std::shared_ptr<ngraph::op::v0::Norma
// We create fake scale constant and fill it with ones to keep the same behavior as current primitive // We create fake scale constant and fill it with ones to keep the same behavior as current primitive
auto scale = std::make_shared<ngraph::op::v0::Constant>(op->get_output_element_type(0), ngraph::Shape{1}, std::vector<float>{1.0}); auto scale = std::make_shared<ngraph::op::v0::Constant>(op->get_output_element_type(0), ngraph::Shape{1}, std::vector<float>{1.0});
cldnn::layout constLayout = cldnn::layout(DataTypeFromPrecision(op->get_output_element_type(0)), cldnn::format::bfyx, cldnn::tensor{1}); cldnn::layout constLayout = cldnn::layout(DataTypeFromPrecision(op->get_output_element_type(0)), cldnn::format::bfyx, cldnn::tensor{1});
auto mem = cldnn::memory::allocate(p.GetEngine(), constLayout, 0, false); auto mem = p.GetEngine().allocate_memory(constLayout, false);
auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor cldnn::mem_lock<int8_t> tmpPointer{mem, p.GetEngine().get_program_stream()};
auto buf = tmpPointer.data(); auto buf = tmpPointer.data();
auto bufSize = scale->get_output_tensor(0).size(); auto bufSize = scale->get_output_tensor(0).size();

View File

@ -8,7 +8,7 @@
#include "ngraph/op/one_hot.hpp" #include "ngraph/op/one_hot.hpp"
#include "api/one_hot.hpp" #include "cldnn/primitives/one_hot.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/pad.hpp" #include "ngraph/op/pad.hpp"
#include "api/border.hpp" #include "cldnn/primitives/border.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,10 +7,10 @@
#include "ngraph/op/parameter.hpp" #include "ngraph/op/parameter.hpp"
#include "api/input_layout.hpp" #include "cldnn/primitives/input_layout.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
#include "api/data.hpp" #include "cldnn/primitives/data.hpp"
#include "api/concatenation.hpp" #include "cldnn/primitives/concatenation.hpp"
using namespace InferenceEngine; using namespace InferenceEngine;
@ -158,8 +158,8 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
if (bufIter != p.blobMemCache.end()) { if (bufIter != p.blobMemCache.end()) {
meanBlobID = bufIter->second; meanBlobID = bufIter->second;
} else { } else {
auto mem = cldnn::memory::allocate(p.GetEngine(), meanBlobLayout, 0, false); auto mem = p.GetEngine().allocate_memory(meanBlobLayout, false);
auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor cldnn::mem_lock<int8_t> tmpPointer{ mem, p.GetEngine().get_program_stream() };
auto buf = tmpPointer.data(); auto buf = tmpPointer.data();
auto bufSize = meanBlobLayout.bytes_count(); auto bufSize = meanBlobLayout.bytes_count();

View File

@ -8,7 +8,7 @@
#include "ngraph/op/max_pool.hpp" #include "ngraph/op/max_pool.hpp"
#include "ngraph/op/avg_pool.hpp" #include "ngraph/op/avg_pool.hpp"
#include "api/pooling.hpp" #include "cldnn/primitives/pooling.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/prior_box.hpp" #include "ngraph/op/prior_box.hpp"
#include "ngraph/op/prior_box_clustered.hpp" #include "ngraph/op/prior_box_clustered.hpp"
#include "api/prior_box.hpp" #include "cldnn/primitives/prior_box.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,8 +7,8 @@
#include "ngraph/op/proposal.hpp" #include "ngraph/op/proposal.hpp"
#include "api/proposal.hpp" #include "cldnn/primitives/proposal.hpp"
#include "api/mutable_data.hpp" #include "cldnn/primitives/mutable_data.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {
@ -62,7 +62,7 @@ void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::Proposal
DefaultFormatForDims(op->get_output_shape(1).size()), DefaultFormatForDims(op->get_output_shape(1).size()),
CldnnTensorFromIEDims(op->get_output_shape(1))); CldnnTensorFromIEDims(op->get_output_shape(1)));
auto shared_memory = cldnn::memory::allocate(p.GetEngine(), mutableLayout); auto shared_memory = p.GetEngine().allocate_memory(mutableLayout);
cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(op) + "_md_write"; cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(op) + "_md_write";
auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w, shared_memory); auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w, shared_memory);

View File

@ -16,9 +16,9 @@
#include "ngraph/op/max.hpp" #include "ngraph/op/max.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/reduce.hpp" #include "cldnn/primitives/reduce.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
#include "api/reshape.hpp" #include "cldnn/primitives/reshape.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/region_yolo.hpp" #include "ngraph/op/region_yolo.hpp"
#include "api/region_yolo.hpp" #include "cldnn/primitives/region_yolo.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/reorg_yolo.hpp" #include "ngraph/op/reorg_yolo.hpp"
#include "api/reorg_yolo.hpp" #include "cldnn/primitives/reorg_yolo.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -9,8 +9,8 @@
#include "ngraph/op/squeeze.hpp" #include "ngraph/op/squeeze.hpp"
#include "ngraph/op/unsqueeze.hpp" #include "ngraph/op/unsqueeze.hpp"
#include "api/reshape.hpp" #include "cldnn/primitives/reshape.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/result.hpp" #include "ngraph/op/result.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
using namespace InferenceEngine; using namespace InferenceEngine;

View File

@ -7,7 +7,7 @@
#include "ngraph/op/reverse_sequence.hpp" #include "ngraph/op/reverse_sequence.hpp"
#include "api/reverse_sequence.hpp" #include "cldnn/primitives/reverse_sequence.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,12 +8,12 @@
#include "ngraph/op/lstm_cell.hpp" #include "ngraph/op/lstm_cell.hpp"
#include "ngraph/op/lstm_sequence.hpp" #include "ngraph/op/lstm_sequence.hpp"
#include "api/reshape.hpp" #include "cldnn/primitives/reshape.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
#include "api/fully_connected.hpp" #include "cldnn/primitives/fully_connected.hpp"
#include "api/lstm.hpp" #include "cldnn/primitives/lstm.hpp"
#include "api/crop.hpp" #include "cldnn/primitives/crop.hpp"
#include "api/concatenation.hpp" #include "cldnn/primitives/concatenation.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {
cldnn::activation_func GetActivationFunc(std::string name) { cldnn::activation_func GetActivationFunc(std::string name) {

View File

@ -9,7 +9,7 @@
#include "ngraph/op/psroi_pooling.hpp" #include "ngraph/op/psroi_pooling.hpp"
#include "ngraph/op/deformable_psroi_pooling.hpp" #include "ngraph/op/deformable_psroi_pooling.hpp"
#include "api/roi_pooling.hpp" #include "cldnn/primitives/roi_pooling.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/scatter_elements_update.hpp" #include "ngraph/op/scatter_elements_update.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/scatter_elements_update.hpp" #include "cldnn/primitives/scatter_elements_update.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/scatter_nd_update.hpp" #include "ngraph/op/scatter_nd_update.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/scatter_nd_update.hpp" #include "cldnn/primitives/scatter_nd_update.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/scatter_update.hpp" #include "ngraph/op/scatter_update.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/scatter_update.hpp" #include "cldnn/primitives/scatter_update.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,9 +7,9 @@
#include "ngraph/op/select.hpp" #include "ngraph/op/select.hpp"
#include "api/select.hpp" #include "cldnn/primitives/select.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
#include "api/reshape.hpp" #include "cldnn/primitives/reshape.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/shuffle_channels.hpp" #include "ngraph/op/shuffle_channels.hpp"
#include "api/shuffle_channels.hpp" #include "cldnn/primitives/shuffle_channels.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,8 +8,8 @@
#include "ngraph/op/softmax.hpp" #include "ngraph/op/softmax.hpp"
#include "ngraph/op/log_softmax.hpp" #include "ngraph/op/log_softmax.hpp"
#include "api/softmax.hpp" #include "cldnn/primitives/softmax.hpp"
#include "api/activation.hpp" #include "cldnn/primitives/activation.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/space_to_batch.hpp" #include "ngraph/op/space_to_batch.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/space_to_batch.hpp" #include "cldnn/primitives/space_to_batch.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,7 +7,7 @@
#include "ngraph/op/space_to_depth.hpp" #include "ngraph/op/space_to_depth.hpp"
#include "api/space_to_depth.hpp" #include "cldnn/primitives/space_to_depth.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,7 +8,7 @@
#include "ngraph/op/split.hpp" #include "ngraph/op/split.hpp"
#include "ngraph/op/variadic_split.hpp" #include "ngraph/op/variadic_split.hpp"
#include "api/crop.hpp" #include "cldnn/primitives/crop.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -8,9 +8,9 @@
#include "ngraph/op/strided_slice.hpp" #include "ngraph/op/strided_slice.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/strided_slice.hpp" #include "cldnn/primitives/strided_slice.hpp"
#include "api/reshape.hpp" #include "cldnn/primitives/reshape.hpp"
#include "api/crop.hpp" #include "cldnn/primitives/crop.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -13,11 +13,11 @@
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "ngraph/op/util/sub_graph_base.hpp" #include "ngraph/op/util/sub_graph_base.hpp"
#include "api/loop.hpp" #include "cldnn/primitives/loop.hpp"
#include "api/mutable_data.hpp" #include "cldnn/primitives/mutable_data.hpp"
#include "api/data.hpp" #include "cldnn/primitives/data.hpp"
#include "api/reorder.hpp" #include "cldnn/primitives/reorder.hpp"
#include "api/topology.hpp" #include "cldnn/graph/topology.hpp"
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
@ -28,9 +28,8 @@ namespace CLDNNPlugin {
template<class DATA_TYPE> template<class DATA_TYPE>
static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) { static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) {
auto mem = cldnn::memory::allocate(p.GetEngine(), auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
{ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } }); cldnn::mem_lock<int64_t> ptr{mem, p.GetEngine().get_program_stream()};
auto ptr = mem.pointer<int64_t>();
*ptr.begin() = num; *ptr.begin() = num;
return {id, mem}; return {id, mem};
} }
@ -42,7 +41,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha
const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size()); const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size());
const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx)); const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx));
cldnn::layout output_layout = cldnn::layout(precision, format, tensor); cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
auto mem = cldnn::memory::allocate(p.GetEngine(), output_layout); auto mem = p.GetEngine().allocate_memory(output_layout);
auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency
return md; return md;
} }

View File

@ -7,7 +7,7 @@
#include "ngraph/op/tile.hpp" #include "ngraph/op/tile.hpp"
#include "api/tile.hpp" #include "cldnn/primitives/tile.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -7,8 +7,8 @@
#include "ngraph/op/topk.hpp" #include "ngraph/op/topk.hpp"
#include "api/arg_max_min.hpp" #include "cldnn/primitives/arg_max_min.hpp"
#include "api/mutable_data.hpp" #include "cldnn/primitives/mutable_data.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {
@ -71,7 +71,7 @@ void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>& op) {
DefaultFormatForDims(op->get_output_shape(1).size()), DefaultFormatForDims(op->get_output_shape(1).size()),
CldnnTensorFromIEDims(op->get_output_shape(1))); CldnnTensorFromIEDims(op->get_output_shape(1)));
auto shared_memory = cldnn::memory::allocate(p.GetEngine(), mutableLayout); auto shared_memory = p.GetEngine().allocate_memory(mutableLayout);
cldnn::primitive_id argmax_mutable_id_w = layer_type_name_ID(op) + "_md_write"; cldnn::primitive_id argmax_mutable_id_w = layer_type_name_ID(op) + "_md_write";
auto argmax_mutable_prim = cldnn::mutable_data(argmax_mutable_id_w, shared_memory); auto argmax_mutable_prim = cldnn::mutable_data(argmax_mutable_id_w, shared_memory);

View File

@ -8,7 +8,7 @@
#include "ngraph/op/transpose.hpp" #include "ngraph/op/transpose.hpp"
#include "ngraph/op/constant.hpp" #include "ngraph/op/constant.hpp"
#include "api/permute.hpp" #include "cldnn/primitives/permute.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -41,7 +41,7 @@
#include "ngraph/op/hsigmoid.hpp" #include "ngraph/op/hsigmoid.hpp"
#include "ngraph/op/round.hpp" #include "ngraph/op/round.hpp"
#include "api/activation.hpp" #include "cldnn/primitives/activation.hpp"
namespace CLDNNPlugin { namespace CLDNNPlugin {

View File

@ -52,13 +52,6 @@ inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
if (!GNAPluginNS::LayerInfo(layer).isSyntheticScaleShift()) if (!GNAPluginNS::LayerInfo(layer).isSyntheticScaleShift())
return false; return false;
// Don't reshape the first dnn layer since it breaks groups recognition
auto prevLayer = InferenceEngine::CNNNetPrevLayerSkipCertain(layer, 0, [](InferenceEngine::CNNLayerPtr ptr) {
return LayerInfo(ptr).isNonValuesChangable();
});
IE_ASSERT(prevLayer != nullptr);
if (LayerInfo(prevLayer).isInput()) return false;
// Don't reshape diagonallayers with bias connection // Don't reshape diagonallayers with bias connection
return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput(); return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
} }

View File

@ -85,8 +85,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
return LayerInfo(ptr).isNonValuesChangable(); return LayerInfo(ptr).isNonValuesChangable();
}); });
IE_ASSERT(inputLayer != nullptr); IE_ASSERT(inputLayer != nullptr);
size_t weightsSize = (LayerInfo(prevLayer).has32BOutput() || LayerInfo(inputLayer).isInput()) ? size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() :
nextLayer->outData[0]->getDims().back() :
Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1]; Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1];
std::vector<float> weightsValues(weightsSize, fillValue); std::vector<float> weightsValues(weightsSize, fillValue);
IE_ASSERT(diagLayer != nullptr); IE_ASSERT(diagLayer != nullptr);

View File

@ -42,7 +42,7 @@ static int32_t as_int32_t(T v) {
} }
class OstreamHashWrapper final: public std::streambuf { class OstreamHashWrapper final: public std::streambuf {
std::size_t m_res = {}; std::size_t m_res = 0;
public: public:
std::size_t getResult() const { return m_res; } std::size_t getResult() const { return m_res; }
std::streamsize xsputn(const char* s, std::streamsize n) override { std::streamsize xsputn(const char* s, std::streamsize n) override {
@ -65,7 +65,7 @@ public:
////////////////////////////////////////////////// //////////////////////////////////////////////////
std::string NetworkCompilationContext::calculateFileInfo(const std::string& filePath) { std::string NetworkCompilationContext::calculateFileInfo(const std::string& filePath) {
size_t seed {}; size_t seed = 0;
auto absPath = filePath; auto absPath = filePath;
try { try {
absPath = FileUtils::absoluteFilePath(filePath); absPath = FileUtils::absoluteFilePath(filePath);

View File

@ -270,6 +270,12 @@ template <typename T, typename... Args>
std::shared_ptr<Node> fold_reshape(Args&&... args) { std::shared_ptr<Node> fold_reshape(Args&&... args) {
std::shared_ptr<Node> node = std::make_shared<T>(std::forward<Args>(args)...); std::shared_ptr<Node> node = std::make_shared<T>(std::forward<Args>(args)...);
if (node->get_output_size() == 1) { if (node->get_output_size() == 1) {
// issue #57985: remove fold_reshape & reuse nGraph implementation
const auto values = as_type_ptr<opset1::Constant>(node->input_value(1).get_node_shared_ptr())->template cast_vector<int64_t>();
if (std::any_of(values.begin(), values.end(), [](const int64_t value) { return (value == 0) || (value == -1); })) {
return fold<opset1::Reshape>(std::forward<Args>(args)...);
}
OutputVector folded; OutputVector folded;
if (is_type<opset1::Constant>(node->input_value(0).get_node_shared_ptr()) && if (is_type<opset1::Constant>(node->input_value(0).get_node_shared_ptr()) &&
is_type<opset1::Constant>(node->input_value(1).get_node_shared_ptr())) { is_type<opset1::Constant>(node->input_value(1).get_node_shared_ptr())) {

View File

@ -683,7 +683,7 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
auto levels_1 = fq->get_levels() - 1.f; auto levels_1 = fq->get_levels() - 1.f;
const size_t DHW = D * H * W; const size_t DHW = D * H * W;
const size_t IDHW = IC * D * H * W; const size_t IDHW = outChannelsShapeIndex == 0 ? IC * D * H * W : OC * D * H * W;
const auto values = constant->cast_vector<float>(); const auto values = constant->cast_vector<float>();
std::vector<float> quantizedValues(OC * IC * D * H * W); std::vector<float> quantizedValues(OC * IC * D * H * W);

View File

@ -106,7 +106,6 @@ void jit_load_emitter::emit_isa(const Xbyak::Reg64 &reg_src, int offset_byte, In
break; break;
case Precision::I32: case Precision::I32:
if ((src_prc == Precision::FP32) || (src_prc == Precision::BF16)) { if ((src_prc == Precision::FP32) || (src_prc == Precision::BF16)) {
h->uni_vroundps(Vmm(out_vec_idx), Vmm(out_vec_idx), 3); // rounding to zero
h->uni_vcvtps2dq(Vmm(out_vec_idx), Vmm(out_vec_idx)); h->uni_vcvtps2dq(Vmm(out_vec_idx), Vmm(out_vec_idx));
} }
break; break;
@ -511,6 +510,11 @@ size_t jit_store_emitter::aux_vecs_count() const {
size_t jit_store_emitter::get_inputs_num() const { return 1; } size_t jit_store_emitter::get_inputs_num() const { return 1; }
void jit_store_emitter::emit_data() const {
if (emu_vcvtneps2bf16)
emu_vcvtneps2bf16->emit_data();
}
void jit_store_emitter::emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs, void jit_store_emitter::emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs, const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
const emitter_context *emit_context) const { const emitter_context *emit_context) const {
@ -552,7 +556,6 @@ template <mkldnn::impl::cpu::x64::cpu_isa_t isa>
switch (src_prc) { switch (src_prc) {
case Precision::FP32: case Precision::FP32:
if ((dst_prc != Precision::FP32) && (dst_prc != Precision::BF16)) { if ((dst_prc != Precision::FP32) && (dst_prc != Precision::BF16)) {
h->uni_vroundps(Vmm(in_vec_idx), Vmm(in_vec_idx), 3); // rounding to zero
h->uni_vcvtps2dq(Vmm(in_vec_idx), Vmm(in_vec_idx)); h->uni_vcvtps2dq(Vmm(in_vec_idx), Vmm(in_vec_idx));
} }
break; break;

View File

@ -18,8 +18,8 @@ struct load_emitter_context : public emitter_context {
load_emitter_context() : src_prc_(Precision::FP32), dst_prc_(Precision::FP32), load_num_(8), load_emitter_context() : src_prc_(Precision::FP32), dst_prc_(Precision::FP32), load_num_(8),
offset_byte_(0), is_fill_(false), fill_value_("zero") {} offset_byte_(0), is_fill_(false), fill_value_("zero") {}
load_emitter_context(Precision src_prc, Precision dst_prc, int load_num, bool is_fill = false, std::string fill_value = "zero", int offset_byte = 0): load_emitter_context(Precision src_prc, Precision dst_prc, int load_num, int offset_byte = 0, bool is_fill = false, std::string fill_value = "zero"):
src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), is_fill_(is_fill), fill_value_(fill_value), offset_byte_(offset_byte) {} src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), offset_byte_(offset_byte), is_fill_(is_fill), fill_value_(fill_value) {}
int offset_byte_; int offset_byte_;
int load_num_; int load_num_;
@ -124,6 +124,8 @@ public:
size_t get_inputs_num() const override; size_t get_inputs_num() const override;
void emit_data() const override;
std::shared_ptr<jit_emu_vcvtneps2bf16> get_emu_vcvtneps2bf16() const { std::shared_ptr<jit_emu_vcvtneps2bf16> get_emu_vcvtneps2bf16() const {
return emu_vcvtneps2bf16; return emu_vcvtneps2bf16;
} }

View File

@ -306,7 +306,7 @@ private:
inline void worker_tail_planar() { inline void worker_tail_planar() {
Precision dst_prc = isFloatCompatible(jcp_.src_prc) ? Precision::FP32 : Precision::I32; Precision dst_prc = isFloatCompatible(jcp_.src_prc) ? Precision::FP32 : Precision::I32;
load_emitter->emit_code({static_cast<size_t>(reg_src.getIdx())}, {static_cast<size_t>(vmm_val.getIdx())}, load_emitter->emit_code({static_cast<size_t>(reg_src.getIdx())}, {static_cast<size_t>(vmm_val.getIdx())},
std::make_shared<load_emitter_context>(jcp_.src_prc, dst_prc, tail_num, true, "zero"), std::make_shared<load_emitter_context>(jcp_.src_prc, dst_prc, tail_num, 0, true),
{}, {load_pool_gpr_idxs}); {}, {load_pool_gpr_idxs});
if (jcp_.normalize_variance) { if (jcp_.normalize_variance) {
@ -477,8 +477,7 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator
this->postamble(); this->postamble();
load_emitter->emit_data(); load_emitter->emit_data();
if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core) && store_emitter != nullptr && store_emitter->get_emu_vcvtneps2bf16() != nullptr) store_emitter->emit_data();
store_emitter->get_emu_vcvtneps2bf16()->emit_data();
for (auto& inj : eltwise_injectors) for (auto& inj : eltwise_injectors)
inj->prepare_table(); inj->prepare_table();

View File

@ -88,8 +88,7 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi
this->postamble(); this->postamble();
load_emitter->emit_data(); load_emitter->emit_data();
if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core) && store_emitter != nullptr && store_emitter->get_emu_vcvtneps2bf16() != nullptr) store_emitter->emit_data();
store_emitter->get_emu_vcvtneps2bf16()->emit_data();
} }
private: private:
@ -155,7 +154,7 @@ private:
Vmm vmm_max = get_acc_reg(i); Vmm vmm_max = get_acc_reg(i);
load_emitter->emit_code({static_cast<size_t>(reg_input.getIdx())}, {static_cast<size_t>(vmm_max.getIdx())}, load_emitter->emit_code({static_cast<size_t>(reg_input.getIdx())}, {static_cast<size_t>(vmm_max.getIdx())},
std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, false, "zero", i * src_c_off), std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, i * src_c_off),
{}, load_pool_gpr_idxs); {}, load_pool_gpr_idxs);
} }
@ -169,7 +168,7 @@ private:
Vmm vmm_src = get_src_reg(i); Vmm vmm_src = get_src_reg(i);
load_emitter->emit_code({static_cast<size_t>(aux_reg_input1.getIdx())}, {static_cast<size_t>(vmm_src.getIdx())}, load_emitter->emit_code({static_cast<size_t>(aux_reg_input1.getIdx())}, {static_cast<size_t>(vmm_src.getIdx())},
std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, false, "zero", i * src_c_off), std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, i * src_c_off),
{}, load_pool_gpr_idxs); {}, load_pool_gpr_idxs);
if (isa == cpu::x64::sse41) { if (isa == cpu::x64::sse41) {
@ -222,7 +221,7 @@ private:
for (int i = 0; i < c_blocks; i++) { for (int i = 0; i < c_blocks; i++) {
const int src_c_off = i * jpp_.ih * jpp_.iw * jpp_.c_block * jpp_.src_data_size; const int src_c_off = i * jpp_.ih * jpp_.iw * jpp_.c_block * jpp_.src_data_size;
const auto load_context = std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, false, "zero", src_c_off); const auto load_context = std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, src_c_off);
mov(aux_reg_input, reg_input); mov(aux_reg_input, reg_input);

View File

@ -12,9 +12,5 @@
NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0); NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0);
bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr<ngraph::Function> f) { bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr<ngraph::Function> f) {
ngraph::pass::Manager m(get_pass_config());
m.register_pass<Pruning>();
m.run_passes(f);
return false; return false;
} }

View File

@ -90,21 +90,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[],
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length); splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
} }
void calculate_nv12_to_rgb(const uchar **srcY,
const uchar *srcUV,
uchar **dstRGBx,
int width) {
calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width);
}
void calculate_i420_to_rgb(const uchar **srcY,
const uchar *srcU,
const uchar *srcV,
uchar **dstRGBx,
int width) {
calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width);
}
void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz, void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz,
const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap, const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap,
int xmaxdf, const short xindex[], const Q0_16 xalpha[], int xmaxdf, const short xindex[], const Q0_16 xalpha[],
@ -119,14 +104,6 @@ void calcRowArea_32F(float dst[], const float *src[], const Size& inSz,
calcRowArea_impl(dst, src, inSz, outSz, yalpha, ymap, xmaxdf, xindex, xalpha, vbuf); calcRowArea_impl(dst, src, inSz, outSz, yalpha, ymap, xmaxdf, xindex, xalpha, vbuf);
} }
void copyRow_8U(const uint8_t in[], uint8_t out[], int length) {
copyRow_8U_impl(in, out, length);
}
void copyRow_32F(const float in[], float out[], int length) {
copyRow_32F_impl(in, out, length);
}
// Resize (bi-linear, 32F) // Resize (bi-linear, 32F)
void calcRowLinear_32F(float* dst[], void calcRowLinear_32F(float* dst[],
const float* src0[], const float* src0[],
@ -708,6 +685,14 @@ void calcRowLinear_8UC1(uint8_t* dst[],
} }
} }
} // namespace neon } // namespace neon
template void chanToPlaneRowImpl(neon_tag, const uint8_t* in, int chan, int chs, uint8_t* out, const int length);
template void chanToPlaneRowImpl(neon_tag, const float* in, int chan, int chs, float * out, const int length);
template void nv12ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
template void i420ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels } // namespace kernels
} // namespace gapi } // namespace gapi
} // namespace InferenceEngine } // namespace InferenceEngine

View File

@ -167,26 +167,31 @@ void splitRow_32FC4(const float in[],
float out3[], float out3[],
int length); int length);
void calculate_nv12_to_rgb(const uchar **srcY,
const uchar *srcUV,
uchar **dstRGBx,
int width);
void calculate_i420_to_rgb(const uchar **srcY, void calculate_i420_to_rgb(const uchar **srcY,
const uchar *srcU, const uchar *srcU,
const uchar *srcV, const uchar *srcV,
uchar **dstRGBx, uchar **dstRGBx,
int width); int width);
void copyRow_8U(const uint8_t in[],
uint8_t out[],
int length);
void copyRow_32F(const float in[],
float out[],
int length);
} // namespace neon } // namespace neon
template<typename isa_tag_t, typename T>
void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs, T* out, const int length);
extern template void chanToPlaneRowImpl(neon_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
extern template void chanToPlaneRowImpl(neon_tag, const float* in, const int chan, const int chs, float * out, const int length);
template<typename isa_tag_t>
void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
extern template void nv12ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
template<typename isa_tag_t>
void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
extern template void i420ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels } // namespace kernels
} // namespace gapi } // namespace gapi
} // namespace InferenceEngine } // namespace InferenceEngine

View File

@ -107,21 +107,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[],
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length); splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
} }
void calculate_nv12_to_rgb(const uchar **srcY,
const uchar *srcUV,
uchar **dstRGBx,
int width) {
calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width);
}
void calculate_i420_to_rgb(const uchar **srcY,
const uchar *srcU,
const uchar *srcV,
uchar **dstRGBx,
int width) {
calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width);
}
void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz, void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz,
const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap, const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap,
int xmaxdf, const short xindex[], const Q0_16 xalpha[], int xmaxdf, const short xindex[], const Q0_16 xalpha[],
@ -555,13 +540,6 @@ void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi); calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
} }
void copyRow_8U(const uint8_t in[], uint8_t out[], int length) {
copyRow_8U_impl(in, out, length);
}
void copyRow_32F(const float in[], float out[], int length) {
copyRow_32F_impl(in, out, length);
}
void calcRowLinear_32F(float *dst[], void calcRowLinear_32F(float *dst[],
const float *src0[], const float *src0[],
const float *src1[], const float *src1[],
@ -575,6 +553,15 @@ void calcRowLinear_32F(float *dst[],
} }
} // namespace avx } // namespace avx
template void chanToPlaneRowImpl(avx2_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
template void chanToPlaneRowImpl(avx2_tag, const float* in, const int chan, const int chs, float* out, const int length);
template void nv12ToRgbRowImpl(avx2_tag, const uint8_t** y_rows, const uint8_t* uv_row,
uint8_t** out_rows, const int buf_width);
template void i420ToRgbRowImpl(avx2_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels } // namespace kernels
} // namespace gapi } // namespace gapi
} // namespace InferenceEngine } // namespace InferenceEngine

View File

@ -181,27 +181,29 @@ void splitRow_32FC4(const float in[],
float out2[], float out2[],
float out3[], float out3[],
int length); int length);
void calculate_nv12_to_rgb(const uchar **srcY,
const uchar *srcUV,
uchar **dstRGBx,
int width);
void calculate_i420_to_rgb(const uchar **srcY,
const uchar *srcU,
const uchar *srcV,
uchar **dstRGBx,
int width);
void copyRow_8U(const uint8_t in[],
uint8_t out[],
int length);
void copyRow_32F(const float in[],
float out[],
int length);
} // namespace avx } // namespace avx
template<typename isa_tag_t, typename T>
void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs, T* out, const int length);
extern template void chanToPlaneRowImpl(avx2_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
extern template void chanToPlaneRowImpl(avx2_tag, const float* in, const int chan, const int chs, float * out, const int length);
template<typename isa_tag_t>
void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row,
uint8_t** out_rows, const int buf_width);
extern template void nv12ToRgbRowImpl(avx2_tag, const uint8_t** y_rows,
const uint8_t* uv_row, uint8_t** out_rows,
const int buf_width);
template<typename isa_tag_t>
void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
extern template void i420ToRgbRowImpl(avx2_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels } // namespace kernels
} // namespace gapi } // namespace gapi
} // namespace InferenceEngine } // namespace InferenceEngine

View File

@ -101,21 +101,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[],
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length); splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
} }
void calculate_nv12_to_rgb(const uchar **srcY,
const uchar *srcUV,
uchar **dstRGBx,
int width) {
calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width);
}
void calculate_i420_to_rgb(const uchar **srcY,
const uchar *srcU,
const uchar *srcV,
uchar **dstRGBx,
int width) {
calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width);
}
void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz, void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz,
const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap, const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap,
int xmaxdf, const short xindex[], const Q0_16 xalpha[], int xmaxdf, const short xindex[], const Q0_16 xalpha[],
@ -636,14 +621,6 @@ void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi); calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
} }
void copyRow_8U(const uint8_t in[], uint8_t out[], int length) {
copyRow_8U_impl(in, out, length);
}
void copyRow_32F(const float in[], float out[], int length) {
copyRow_32F_impl(in, out, length);
}
void calcRowLinear_32F(float *dst[], void calcRowLinear_32F(float *dst[],
const float *src0[], const float *src0[],
const float *src1[], const float *src1[],
@ -657,6 +634,14 @@ void calcRowLinear_32F(float *dst[],
} }
} // namespace avx512 } // namespace avx512
template void chanToPlaneRowImpl(avx512_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
template void chanToPlaneRowImpl(avx512_tag, const float* in, const int chan, const int chs, float* out, const int length);
template void nv12ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
template void i420ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels } // namespace kernels
} // namespace gapi } // namespace gapi
} // namespace InferenceEngine } // namespace InferenceEngine

View File

@ -180,27 +180,26 @@ void splitRow_32FC4(const float in[],
float out2[], float out2[],
float out3[], float out3[],
int length); int length);
void calculate_nv12_to_rgb(const uchar **srcY,
const uchar *srcUV,
uchar **dstRGBx,
int width);
void calculate_i420_to_rgb(const uchar **srcY,
const uchar *srcU,
const uchar *srcV,
uchar **dstRGBx,
int width);
void copyRow_8U(const uint8_t in[],
uint8_t out[],
int length);
void copyRow_32F(const float in[],
float out[],
int length);
} // namespace avx512 } // namespace avx512
template<typename isa_tag_t, typename T>
void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs, T* out, const int length);
extern template void chanToPlaneRowImpl(avx512_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
extern template void chanToPlaneRowImpl(avx512_tag, const float* in, const int chan, const int chs, float* out, const int length);
template<typename isa_tag_t>
void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
extern template void nv12ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
template<typename isa_tag_t>
void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
extern template void i420ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels } // namespace kernels
} // namespace gapi } // namespace gapi
} // namespace InferenceEngine } // namespace InferenceEngine

View File

@ -1365,33 +1365,13 @@ void splitRow_32FC4(const float in[],
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length); splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
} }
void calculate_nv12_to_rgb(const uchar **srcY, template void chanToPlaneRowImpl(sse42_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
const uchar *srcUV, template void chanToPlaneRowImpl(sse42_tag, const float* in, const int chan, const int chs, float* out, const int length);
uchar **dstRGBx,
int width) {
calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width);
}
void calculate_i420_to_rgb(const uchar **srcY, template void nv12ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
const uchar *srcU,
const uchar *srcV,
uchar **dstRGBx,
int width) {
calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width);
}
void copyRow_8U(const uint8_t in[],
uint8_t out[],
int length) {
copyRow_8U_impl(in, out, length);
}
void copyRow_32F(const float in[],
float out[],
int length) {
copyRow_32F_impl(in, out, length);
}
template void i420ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels } // namespace kernels
} // namespace gapi } // namespace gapi
} // namespace InferenceEngine } // namespace InferenceEngine

View File

@ -180,25 +180,25 @@ void splitRow_32FC4(const float in[],
float out3[], float out3[],
int length); int length);
void calculate_nv12_to_rgb(const uchar **srcY, template<typename isa_tag_t, typename T>
const uchar *srcUV, void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs,
uchar **dstRGBx, T* out, const int length);
int width);
void calculate_i420_to_rgb(const uchar **srcY, extern template void chanToPlaneRowImpl(sse42_tag, const uint8_t* in, const int chan,
const uchar *srcU, const int chs, uint8_t* out, const int length);
const uchar *srcV, extern template void chanToPlaneRowImpl(sse42_tag, const float* in, const int chan,
uchar **dstRGBx, const int chs, float* out, const int length);
int width); template<typename isa_tag_t>
void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
void copyRow_8U(const uint8_t in[], extern template void nv12ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
uint8_t out[],
int length);
void copyRow_32F(const float in[], template<typename isa_tag_t>
float out[], void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
int length); const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
extern template void i420ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* u_row,
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
} // namespace kernels } // namespace kernels
} // namespace gapi } // namespace gapi
} // namespace InferenceEngine } // namespace InferenceEngine

View File

@ -468,15 +468,86 @@ struct type_to_type {};
template <typename typelist> template <typename typelist>
struct type_dispatch_impl; struct type_dispatch_impl;
//FIXME: add test for type_dispatch
template <template<typename ...> class typelist, typename... type> template <template<typename ...> class typelist, typename... type>
struct type_dispatch_impl<typelist<type...>> { struct type_dispatch_impl<typelist<type...>> {
template <typename result_t, typename default_t, typename type_id_t, typename type_to_id_t, typename type_to_value_t> template <typename result_t, typename default_t, typename type_id_t, typename type_to_id_t, typename type_to_value_t>
static result_t dispatch(type_id_t type_id, type_to_id_t&& type_to_id, type_to_value_t&& type_to_value, default_t default_value) { static result_t dispatch(type_id_t type_id, type_to_id_t&& type_to_id, type_to_value_t&& type_to_value, default_t default_value) {
result_t res = default_value; result_t res = default_value;
std::initializer_list<int> ({(type_id == type_to_id(type_to_type<type>{}) ? (res = type_to_value(type_to_type<type>{})), 0 : 0)...}); bool matched = false;
std::initializer_list<int> ({
!matched && (type_id == type_to_id(type_to_type<type>{})) ?
(matched = true, res = type_to_value(type_to_type<type>{})), 0
: 0
...
});
return res; return res;
} }
template <typename result_t, typename default_t, typename pred_t, typename type_to_value_t>
static result_t dispatch(pred_t&& pred, type_to_value_t&& type_to_value, default_t default_value) {
result_t res = default_value;
bool matched = false;
std::initializer_list<int> ({
!matched && pred(type_to_type<type>{}) ?
(matched = true, res = type_to_value(type_to_type<type>{})), 0
: 0
...
});
return res;
}
};
template<typename left_typelsist, typename right_typelsist>
struct concat;
template<typename left_typelsist, typename right_typelsist>
using concat_t = typename concat<left_typelsist, right_typelsist>::type;
template<template<typename ...> class left_list, typename ... left_types, template<typename ...> class right_list, typename ... right_types>
struct concat<left_list<left_types...>, right_list<right_types...>>{
using type = left_list<left_types... , right_types...>;
};
template< class T, class U >
using is_same_t = typename std::is_same<T, U>::type;
template<bool C, class T, class E> struct if_c_impl;
template<class T, class E> struct if_c_impl<true, T, E> {
using type = T;
};
template<class T, class E> struct if_c_impl<false, T, E> {
using type = E;
};
template<bool C, class T, class E>
using if_c = typename if_c_impl<C, T, E>::type;
template<class C, class T, class E>
using if_ = typename if_c_impl<C::value != 0, T, E>::type;
template<typename typelist, typename type>
struct remove;
template<typename typelist, typename type>
using remove_t = typename remove<typelist, type>::type;
template<template<typename ...> class list, typename head_t, typename ... types, typename t>
struct remove<list<head_t, types...>, t> {
using type = concat_t<
if_<is_same_t<head_t, t>, list<>, list<head_t>>,
remove_t<list<types...>, t>
>;
};
template<template<typename ...> class list, typename t>
struct remove<list<>, t> {
using type = list<>;
}; };
} // namespace } // namespace
@ -490,6 +561,13 @@ result_t type_dispatch(type_id_t type_id, type_to_id_t&& type_to_id, type_to_val
std::forward<default_t>(default_value)); std::forward<default_t>(default_value));
} }
template <typename typelist, typename default_t, typename pred_t, typename type_to_value_t,
typename result_t = decltype(std::declval<type_to_value_t>()(type_to_type<head_t<typelist>> {}))>
result_t type_dispatch(pred_t&& pred, type_to_value_t&& type_to_value, default_t default_value = {}) {
return type_dispatch_impl<typelist>::template dispatch<result_t>(std::forward<pred_t>(pred),
std::forward<type_to_value_t>(type_to_value),
std::forward<default_t>(default_value));
}
namespace { namespace {
struct cv_type_id { struct cv_type_id {
@ -668,81 +746,47 @@ GAPI_FLUID_KERNEL(FSplit4, Split4, false) {
}; };
//---------------------------------------------------------------------- //----------------------------------------------------------------------
using isas_set = typelist<
template<typename T> #ifdef HAVE_AVX512
static void chanToPlaneRow(const uint8_t* in, int chan, int chs, uint8_t* out, int length) { avx512_tag,
// AVX512 implementation of wide universal intrinsics is slower than AVX2.
// It is turned off until the cause isn't found out.
#if 0
#ifdef HAVE_AVX512
if (with_cpu_x86_avx512f()) {
if (std::is_same<T, uint8_t>::value && chs == 1) {
avx512::copyRow_8U(in, out, length);
return;
}
if (std::is_same<T, float>::value && chs == 1) {
avx512::copyRow_32F(reinterpret_cast<const float*>(in),
reinterpret_cast<float*>(out),
length);
return;
}
}
#endif // HAVE_AVX512
#endif #endif
#ifdef HAVE_AVX2
avx2_tag,
#endif
#ifdef HAVE_SSE
sse42_tag,
#endif
#ifdef HAVE_NEON
neon_tag,
#endif
//scalar "ISA" have to be the last one in the list,
//as the search for supported ISA is performed until first match
scalar_tag>;
#ifdef HAVE_AVX512
bool is_present(avx512_tag) { return with_cpu_x86_avx512f(); }
#endif // HAVE_AVX512
#ifdef HAVE_AVX2 #ifdef HAVE_AVX2
if (with_cpu_x86_avx2()) { bool is_present(avx2_tag) { return with_cpu_x86_avx2(); }
if (std::is_same<T, uint8_t>::value && chs == 1) { #endif // HAVE_AVX2
avx::copyRow_8U(in, out, length);
return;
}
if (std::is_same<T, float>::value && chs == 1) { #ifdef HAVE_SSE
avx::copyRow_32F(reinterpret_cast<const float*>(in), bool is_present(sse42_tag) { return with_cpu_x86_sse42(); }
reinterpret_cast<float*>(out), #endif // HAVE_SSE
length);
return;
}
}
#endif // HAVE_AVX2
#ifdef HAVE_SSE
if (with_cpu_x86_sse42()) {
if (std::is_same<T, uint8_t>::value && chs == 1) {
copyRow_8U(in, out, length);
return;
}
if (std::is_same<T, float>::value && chs == 1) { #ifdef HAVE_NEON
copyRow_32F(reinterpret_cast<const float*>(in), bool is_present(neon_tag) { return true; }
reinterpret_cast<float*>(out), #endif // HAVE_NEON
length);
return;
}
}
#endif // HAVE_SSE
#ifdef HAVE_NEON //scalar version of kernels is always available
if (std::is_same<T, uint8_t>::value && chs == 1) { bool is_present(scalar_tag) { return true; }
neon::copyRow_8U(in, out, length);
return;
}
if (std::is_same<T, float>::value && chs == 1) { struct is_isa_present {
neon::copyRow_32F(reinterpret_cast<const float*>(in), template< typename isa_tag_t>
reinterpret_cast<float*>(out), bool operator()(type_to_type<isa_tag_t>) {
length); return is_present(isa_tag_t{});
return;
} }
#endif // HAVE_NEON };
const auto inT = reinterpret_cast<const T*>(in);
auto outT = reinterpret_cast< T*>(out);
for (int x = 0; x < length; x++) {
outT[x] = inT[x*chs + chan];
}
}
// GAPI_OCV_KERNEL(OCVChanToPlane, ChanToPlane) { // GAPI_OCV_KERNEL(OCVChanToPlane, ChanToPlane) {
// static void run(const cv::Mat &in, int chan, cv::Mat &out) { // static void run(const cv::Mat &in, int chan, cv::Mat &out) {
@ -774,15 +818,225 @@ static void chanToPlaneRow(const uint8_t* in, int chan, int chs, uint8_t* out, i
// } // }
// }; // };
namespace {
using chan_to_plane_supported_types = typelist<uint8_t, float>;
template<typename T>
void chanToPlaneRowImpl(scalar_tag, const T* in, int chan, int chs, T* out, int length) {
for (int x = 0; x < length; x++) {
out[x] = in[x*chs + chan];
}
}
template<typename isa_tag_t>
struct typed_chan_to_plane_row {
using p_f = void (*)(const uint8_t* in, int chan, int chs, uint8_t* out, int length);
template <typename type>
p_f operator()(type_to_type<type> ) {
return [](const uint8_t* in, int chan, int chs, uint8_t* out, int length){
const auto inT = reinterpret_cast<const type*>(in);
auto outT = reinterpret_cast< type*>(out);
chanToPlaneRowImpl(isa_tag_t{}, inT, chan, chs, outT, length);
};
}
};
} //namespace
namespace {
using nv12_to_rgb_supported_types = typelist<uint8_t>;
void nv12ToRgbRowImpl(scalar_tag, const uint8_t** y_rows, const uint8_t* uv_row,
uint8_t** out_rows, const int buf_width) {
for (int i = 0; i < buf_width; i += 2) {
uint8_t u = uv_row[i];
uint8_t v = uv_row[i + 1];
int ruv, guv, buv;
uvToRGBuv(u, v, ruv, guv, buv);
for (int y = 0; y < 2; y++) {
for (int x = 0; x < 2; x++) {
uint8_t vy = y_rows[y][i + x];
uint8_t r, g, b;
yRGBuvToRGB(vy, ruv, guv, buv, r, g, b);
out_rows[y][3 * (i + x)] = r;
out_rows[y][3 * (i + x) + 1] = g;
out_rows[y][3 * (i + x) + 2] = b;
}
}
}
}
template<typename isa_tag_t>
struct typed_nv12_to_rgb_row {
using p_f = void (*)(const uint8_t** y_rows, const uint8_t* uv_row,
uint8_t** out_rows, const int buf_width);
template <typename type>
p_f operator()(type_to_type<type>) {
return [](const uint8_t** y_rows, const uint8_t* uv_row,
uint8_t** out_rows, const int buf_width) {
const auto inT1 = reinterpret_cast<const type**>(y_rows);
const auto inT2 = reinterpret_cast<const type*>(uv_row);
auto outT = reinterpret_cast<type**>(out_rows);
nv12ToRgbRowImpl(isa_tag_t{}, inT1, inT2, outT, buf_width);
};
}
};
} // namespace
namespace {
using i420_to_rgb_supported_types = typelist<uint8_t>;
static void i420ToRgbRowImpl(scalar_tag, const uint8_t** y_rows,
const uint8_t* u_row,
const uint8_t* v_row,
uint8_t** out_rows,
const int buf_width) {
for (int i = 0; i < buf_width; i += 2) {
uchar u = u_row[i / 2];
uchar v = v_row[i / 2];
int ruv, guv, buv;
uvToRGBuv(u, v, ruv, guv, buv);
for (int y = 0; y < 2; y++) {
for (int x = 0; x < 2; x++) {
uchar vy = y_rows[y][i + x];
uchar r, g, b;
yRGBuvToRGB(vy, ruv, guv, buv, r, g, b);
out_rows[y][3 * (i + x)] = r;
out_rows[y][3 * (i + x) + 1] = g;
out_rows[y][3 * (i + x) + 2] = b;
}
}
}
}
template<typename isa_tag_t>
struct typed_i420_to_rgb_row {
using p_f = void (*)(const uint8_t** y_rows, const uint8_t* u_row, const uint8_t* v_row,
uint8_t** out_rows, const int buf_width);
template <typename type>
p_f operator()(type_to_type<type>) {
return [](const uint8_t** y_rows, const uint8_t* u_row, const uint8_t* v_row,
uint8_t** out_rows, const int buf_width) {
const auto inT1 = reinterpret_cast<const type**>(y_rows);
const auto inT2 = reinterpret_cast<const type*>(u_row);
const auto inT3 = reinterpret_cast<const type*>(v_row);
auto outT = reinterpret_cast<type**>(out_rows);
i420ToRgbRowImpl(isa_tag_t{}, inT1, inT2, inT3, outT, buf_width);
};
}
};
} // namespace
template <typename isa_tag_t>
struct choose_impl {
GAPI_FLUID_KERNEL(FChanToPlane, ChanToPlane, false) { GAPI_FLUID_KERNEL(FChanToPlane, ChanToPlane, false) {
static const int Window = 1; static const int Window = 1;
static void run(const cv::gapi::fluid::View& in, int chan, static void run(const cv::gapi::fluid::View& in, int chan,
cv::gapi::fluid::Buffer& out) { cv::gapi::fluid::Buffer& out) {
const auto rowFunc = (in.meta().depth == CV_8U) ? &chanToPlaneRow<uint8_t> : &chanToPlaneRow<float>; GAPI_DbgAssert(is_cv_type_in_list<chan_to_plane_supported_types>(out.meta().depth));
const auto rowFunc = type_dispatch<chan_to_plane_supported_types>(out.meta().depth, cv_type_id{}, typed_chan_to_plane_row<isa_tag_t>{}, nullptr);
GAPI_DbgAssert(rowFunc);
rowFunc(in.InLineB(0), chan, in.meta().chan, out.OutLineB(), in.length()); rowFunc(in.InLineB(0), chan, in.meta().chan, out.OutLineB(), in.length());
} }
}; };
GAPI_FLUID_KERNEL(FNV12toRGB, NV12toRGB, false) {
static const int Window = 1;
static const int LPI = 2;
static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB;
static void run(const cv::gapi::fluid::View & in_y,
const cv::gapi::fluid::View & in_uv,
cv::gapi::fluid::Buffer & out) {
GAPI_DbgAssert(is_cv_type_in_list<nv12_to_rgb_supported_types>(out.meta().depth));
const uchar* uv_row = in_uv.InLineB(0);
const uchar* y_rows[2] = { in_y.InLineB(0), in_y.InLineB(1) };
uchar* out_rows[2] = { out.OutLineB(0), out.OutLineB(1) };
int buf_width = out.length();
const auto rowFunc = type_dispatch<nv12_to_rgb_supported_types>(out.meta().depth, cv_type_id{}, typed_nv12_to_rgb_row<isa_tag_t>{}, nullptr);
GAPI_DbgAssert(rowFunc);
rowFunc(y_rows, uv_row, out_rows, buf_width);
}
};
GAPI_FLUID_KERNEL(FI420toRGB, I420toRGB, false) {
static const int Window = 1;
static const int LPI = 2;
static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB;
static void run(const cv::gapi::fluid::View & in_y,
const cv::gapi::fluid::View & in_u,
const cv::gapi::fluid::View & in_v,
cv::gapi::fluid::Buffer & out) {
GAPI_DbgAssert(is_cv_type_in_list<i420_to_rgb_supported_types>(out.meta().depth));
const uchar* u_row = in_u.InLineB(0);
const uchar* v_row = in_v.InLineB(0);
const uchar* y_rows[2] = { in_y.InLineB(0), in_y.InLineB(1) };
uchar* out_rows[2] = { out.OutLineB(0), out.OutLineB(1) };
int buf_width = out.length();
GAPI_DbgAssert(in_u.length() == in_v.length());
const auto rowFunc = type_dispatch<i420_to_rgb_supported_types>(out.meta().depth, cv_type_id{}, typed_i420_to_rgb_row<isa_tag_t>{}, nullptr);
GAPI_DbgAssert(rowFunc);
rowFunc(y_rows, u_row, v_row, out_rows, buf_width);
}
};
};
namespace {
struct ColorConversionISA {
cv::gapi::GKernelPackage& pckg;
ColorConversionISA(cv::gapi::GKernelPackage& _pckg) : pckg(_pckg) {}
template<typename isa_tag_t>
bool operator()(type_to_type<isa_tag_t>) {
pckg.include<typename choose_impl<isa_tag_t>::FI420toRGB>();
pckg.include<typename choose_impl<isa_tag_t>::FNV12toRGB>();
pckg.include<typename choose_impl<isa_tag_t>::FChanToPlane>();
//at the moment type_dispatch requires something to be returned by the lambda
return true;
}
};
} //namespace
cv::gapi::GKernelPackage FColorConversionChooseISA() {
// At the moment AVX512 implementation of wide universal intrinsics is slower than AVX2.
// So, disable it for now.
using isas = remove_t<isas_set, avx512_tag>;
cv::gapi::GKernelPackage pckg;
ColorConversionISA ctpISA{pckg};
type_dispatch<isas>(is_isa_present{}, ctpISA, false);
return pckg;
}
//---------------------------------------------------------------------- //----------------------------------------------------------------------
G_TYPED_KERNEL(ScalePlane8u, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_8u") { G_TYPED_KERNEL(ScalePlane8u, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_8u") {
@ -2234,180 +2488,6 @@ GAPI_FLUID_KERNEL(FScalePlaneArea8u, ScalePlaneArea8u, true) {
} }
}; };
static const int ITUR_BT_601_CY = 1220542;
static const int ITUR_BT_601_CUB = 2116026;
static const int ITUR_BT_601_CUG = -409993;
static const int ITUR_BT_601_CVG = -852492;
static const int ITUR_BT_601_CVR = 1673527;
static const int ITUR_BT_601_SHIFT = 20;
static inline void uvToRGBuv(const uchar u, const uchar v, int& ruv, int& guv, int& buv) {
int uu, vv;
uu = static_cast<int>(u) - 128;
vv = static_cast<int>(v) - 128;
ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * vv;
guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * vv + ITUR_BT_601_CUG * uu;
buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * uu;
}
static inline void yRGBuvToRGB(const uchar vy, const int ruv, const int guv, const int buv,
uchar& r, uchar& g, uchar& b) {
int yy = static_cast<int>(vy);
int y = std::max(0, yy - 16) * ITUR_BT_601_CY;
r = saturate_cast<uchar>((y + ruv) >> ITUR_BT_601_SHIFT);
g = saturate_cast<uchar>((y + guv) >> ITUR_BT_601_SHIFT);
b = saturate_cast<uchar>((y + buv) >> ITUR_BT_601_SHIFT);
}
static void calculate_nv12_to_rgb_fallback(const uchar **y_rows,
const uchar *uv_row,
uchar **out_rows,
int buf_width) {
for (int i = 0; i < buf_width; i += 2) {
uchar u = uv_row[i];
uchar v = uv_row[i + 1];
int ruv, guv, buv;
uvToRGBuv(u, v, ruv, guv, buv);
for (int y = 0; y < 2; y++) {
for (int x = 0; x < 2; x++) {
uchar vy = y_rows[y][i + x];
uchar r, g, b;
yRGBuvToRGB(vy, ruv, guv, buv, r, g, b);
out_rows[y][3*(i + x)] = r;
out_rows[y][3*(i + x) + 1] = g;
out_rows[y][3*(i + x) + 2] = b;
}
}
}
}
static void calculate_i420_to_rgb_fallback(const uchar **y_rows,
const uchar *u_row,
const uchar *v_row,
uchar **out_rows,
int buf_width) {
for (int i = 0; i < buf_width; i += 2) {
uchar u = u_row[i / 2];
uchar v = v_row[i / 2];
int ruv, guv, buv;
uvToRGBuv(u, v, ruv, guv, buv);
for (int y = 0; y < 2; y++) {
for (int x = 0; x < 2; x++) {
uchar vy = y_rows[y][i + x];
uchar r, g, b;
yRGBuvToRGB(vy, ruv, guv, buv, r, g, b);
out_rows[y][3*(i + x)] = r;
out_rows[y][3*(i + x) + 1] = g;
out_rows[y][3*(i + x) + 2] = b;
}
}
}
}
GAPI_FLUID_KERNEL(FNV12toRGB, NV12toRGB, false) {
static const int Window = 1;
static const int LPI = 2;
static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB;
static void run(const cv::gapi::fluid::View &in_y,
const cv::gapi::fluid::View &in_uv,
cv::gapi::fluid::Buffer &out) {
const uchar* uv_row = in_uv.InLineB(0);
const uchar* y_rows[2] = {in_y. InLineB(0), in_y. InLineB(1)};
uchar* out_rows[2] = {out.OutLineB(0), out.OutLineB(1)};
int buf_width = out.length();
// AVX512 implementation of wide universal intrinsics is slower than AVX2.
// It is turned off until the cause isn't found out.
#if 0
#ifdef HAVE_AVX512
if (with_cpu_x86_avx512_core()) {
#define CV_AVX_512DQ 1
avx512::calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width);
return;
}
#endif // HAVE_AVX512
#endif
#ifdef HAVE_AVX2
if (with_cpu_x86_avx2()) {
avx::calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width);
return;
}
#endif // HAVE_AVX2
#ifdef HAVE_SSE
if (with_cpu_x86_sse42()) {
calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width);
return;
}
#endif // HAVE_SSE
#ifdef HAVE_NEON
neon::calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width);
return;
#endif // HAVE_NEON
calculate_nv12_to_rgb_fallback(y_rows, uv_row, out_rows, buf_width);
}
};
GAPI_FLUID_KERNEL(FI420toRGB, I420toRGB, false) {
static const int Window = 1;
static const int LPI = 2;
static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB;
static void run(const cv::gapi::fluid::View &in_y,
const cv::gapi::fluid::View &in_u,
const cv::gapi::fluid::View &in_v,
cv::gapi::fluid::Buffer &out) {
const uchar* u_row = in_u.InLineB(0);
const uchar* v_row = in_v.InLineB(0);
const uchar* y_rows[2] = {in_y. InLineB(0), in_y. InLineB(1)};
uchar* out_rows[2] = {out.OutLineB(0), out.OutLineB(1)};
int buf_width = out.length();
GAPI_DbgAssert(in_u.length() == in_v.length());
// AVX512 implementation of wide universal intrinsics is slower than AVX2.
// It is turned off until the cause isn't found out.
#if 0
#ifdef HAVE_AVX512
if (with_cpu_x86_avx512_core()) {
#define CV_AVX_512DQ 1
avx512::calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width);
return;
}
#endif // HAVE_AVX512
#endif
#ifdef HAVE_AVX2
if (with_cpu_x86_avx2()) {
avx::calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width);
return;
}
#endif // HAVE_AVX2
#ifdef HAVE_SSE
if (with_cpu_x86_sse42()) {
calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width);
return;
}
#endif // HAVE_SSE
#ifdef HAVE_NEON
neon::calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width);
return;
#endif // HAVE_NEON
calculate_i420_to_rgb_fallback(y_rows, u_row, v_row, out_rows, buf_width);
}
};
namespace { namespace {
template <typename src_t, typename dst_t> template <typename src_t, typename dst_t>
@ -2520,9 +2600,10 @@ GAPI_FLUID_KERNEL(FDivC, GDivC, false) {
using namespace kernels; using namespace kernels;
cv::gapi::GKernelPackage preprocKernels() { cv::gapi::GKernelPackage preprocKernels() {
return cv::gapi::kernels return combine(
< FChanToPlane FColorConversionChooseISA(),
, FScalePlanes cv::gapi::kernels
<FScalePlanes
, FScalePlanes4 , FScalePlanes4
, FScalePlane , FScalePlane
, FScalePlane32f , FScalePlane32f
@ -2537,12 +2618,10 @@ cv::gapi::GKernelPackage preprocKernels() {
, FSplit2 , FSplit2
, FSplit3 , FSplit3
, FSplit4 , FSplit4
, FNV12toRGB
, FI420toRGB
, FConvertDepth , FConvertDepth
, FSubC , FSubC
, FDivC , FDivC
>(); >());
} }
} // namespace gapi } // namespace gapi

View File

@ -34,6 +34,12 @@ namespace InferenceEngine {
namespace gapi { namespace gapi {
namespace kernels { namespace kernels {
struct avx512_tag {};
struct avx2_tag {};
struct sse42_tag {};
struct neon_tag {};
struct scalar_tag {};
template<typename DST, typename SRC> static inline DST saturate_cast(SRC x); template<typename DST, typename SRC> static inline DST saturate_cast(SRC x);
template<> inline short saturate_cast(int x) { return (std::min)(SHRT_MAX, (std::max)(SHRT_MIN, x)); } template<> inline short saturate_cast(int x) { return (std::min)(SHRT_MAX, (std::max)(SHRT_MIN, x)); }
template<> inline short saturate_cast(float x) { return saturate_cast<short>(static_cast<int>(std::rint(x))); } template<> inline short saturate_cast(float x) { return saturate_cast<short>(static_cast<int>(std::rint(x))); }
@ -116,6 +122,31 @@ static inline Q8_8 mulaw(Q0_16 a, Q8_8 w) { return static_cast<Q8_8>((a * w) >>
static inline float mulas(float a, float s) { return a * s; } static inline float mulas(float a, float s) { return a * s; }
static inline float mulaw(float a, float w) { return a * w; } static inline float mulaw(float a, float w) { return a * w; }
static const int ITUR_BT_601_CY = 1220542;
static const int ITUR_BT_601_CUB = 2116026;
static const int ITUR_BT_601_CUG = -409993;
static const int ITUR_BT_601_CVG = -852492;
static const int ITUR_BT_601_CVR = 1673527;
static const int ITUR_BT_601_SHIFT = 20;
static inline void uvToRGBuv(const uchar u, const uchar v, int& ruv, int& guv, int& buv) {
int uu, vv;
uu = static_cast<int>(u) - 128;
vv = static_cast<int>(v) - 128;
ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * vv;
guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * vv + ITUR_BT_601_CUG * uu;
buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * uu;
}
static inline void yRGBuvToRGB(const uchar vy, const int ruv, const int guv, const int buv,
uchar& r, uchar& g, uchar& b) {
int yy = static_cast<int>(vy);
int y = std::max(0, yy - 16) * ITUR_BT_601_CY;
r = saturate_cast<uchar>((y + ruv) >> ITUR_BT_601_SHIFT);
g = saturate_cast<uchar>((y + guv) >> ITUR_BT_601_SHIFT);
b = saturate_cast<uchar>((y + buv) >> ITUR_BT_601_SHIFT);
}
} // namespace kernels } // namespace kernels
} // namespace gapi } // namespace gapi
} // namespace InferenceEngine } // namespace InferenceEngine

Some files were not shown because too many files have changed in this diff Show More