Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
6ce8d8ce66
5
.ci/openvino-onnx/Jenkinsfile
vendored
5
.ci/openvino-onnx/Jenkinsfile
vendored
@ -155,10 +155,9 @@ def getConfigurationsMap() {
|
|||||||
|
|
||||||
CONFIGURATION_WORKFLOW = { configuration ->
|
CONFIGURATION_WORKFLOW = { configuration ->
|
||||||
node("OpenVINO") {
|
node("OpenVINO") {
|
||||||
|
String workdir = "${HOME}/workspace/${BUILD_NUMBER}_${env.CHANGE_ID}_${configuration.name}"
|
||||||
try {
|
try {
|
||||||
PROJECT_NAME = "openvino"
|
PROJECT_NAME = "openvino"
|
||||||
String workdir = "${HOME}/workspace/${BUILD_NUMBER}_${env.CHANGE_ID}_${configuration.name}"
|
|
||||||
|
|
||||||
stage("Clone repository") {
|
stage("Clone repository") {
|
||||||
prepare_repository(workdir)
|
prepare_repository(workdir)
|
||||||
}
|
}
|
||||||
@ -185,10 +184,10 @@ CONFIGURATION_WORKFLOW = { configuration ->
|
|||||||
}
|
}
|
||||||
finally {
|
finally {
|
||||||
stage("Cleanup") {
|
stage("Cleanup") {
|
||||||
deleteDir()
|
|
||||||
String docker_container_name = get_docker_container_name(configuration)
|
String docker_container_name = get_docker_container_name(configuration)
|
||||||
sh """
|
sh """
|
||||||
docker rm -f ${docker_container_name}
|
docker rm -f ${docker_container_name}
|
||||||
|
rm -rf ${workdir}
|
||||||
"""
|
"""
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
38
.github/workflows/mo.yml
vendored
38
.github/workflows/mo.yml
vendored
@ -63,41 +63,3 @@ jobs:
|
|||||||
python3 -m xmlrunner discover -p *_test.py --output=../mo-ut-logs
|
python3 -m xmlrunner discover -p *_test.py --output=../mo-ut-logs
|
||||||
working-directory: model-optimizer
|
working-directory: model-optimizer
|
||||||
|
|
||||||
build_wheel:
|
|
||||||
name: Build Python wheel
|
|
||||||
runs-on: ubuntu-18.04
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
python3 -m pip install --upgrade pip
|
|
||||||
python3 -m pip install wheel setuptools
|
|
||||||
python3 -m pip install tensorflow==2.3.0
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
run: |
|
|
||||||
python3 setup.py sdist bdist_wheel
|
|
||||||
working-directory: model-optimizer
|
|
||||||
|
|
||||||
- name: Test package content
|
|
||||||
run: |
|
|
||||||
echo "src = open('openvino_mo.egg-info/SOURCES.txt', 'rt').read().split()" | tee -a test_wheel.py
|
|
||||||
echo "ref = open('automation/package_BOM.txt', 'rt').read().split()" | tee -a test_wheel.py
|
|
||||||
echo "for name in ref:" | tee -a test_wheel.py
|
|
||||||
echo " if name.endswith('.py'):" | tee -a test_wheel.py
|
|
||||||
echo " assert name in src or './' + name in src, name + ' file missed'" | tee -a test_wheel.py
|
|
||||||
python3 test_wheel.py
|
|
||||||
working-directory: model-optimizer
|
|
||||||
|
|
||||||
- name: Test conversion
|
|
||||||
run: |
|
|
||||||
wget -q http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz
|
|
||||||
tar -xf mobilenet_v1_1.0_224.tgz
|
|
||||||
python3 -m pip install model-optimizer/dist/*.whl
|
|
||||||
python3 -m mo --input_model mobilenet_v1_1.0_224_frozen.pb --input_shape "[1,224,224,3]"
|
|
||||||
|
|
||||||
- uses: actions/upload-artifact@v2
|
|
||||||
with:
|
|
||||||
name: mo_wheel
|
|
||||||
path: "model-optimizer/dist/*.whl"
|
|
||||||
|
@ -169,10 +169,11 @@ ie_shellcheck_process(DIRECTORY "${OpenVINO_MAIN_SOURCE_DIR}"
|
|||||||
"${IE_MAIN_SOURCE_DIR}/thirdparty"
|
"${IE_MAIN_SOURCE_DIR}/thirdparty"
|
||||||
"${IE_MAIN_SOURCE_DIR}/temp"
|
"${IE_MAIN_SOURCE_DIR}/temp"
|
||||||
# TODO fix and enable back:
|
# TODO fix and enable back:
|
||||||
"${OpenVINO_MAIN_SOURCE_DIR}/scripts/install_dependencies"
|
"${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/scripts/dependencies.sh"
|
||||||
"${OpenVINO_MAIN_SOURCE_DIR}/scripts/demo"
|
"${OpenVINO_MAIN_SOURCE_DIR}/scripts/install_dependencies/install_NEO_OCL_driver.sh"
|
||||||
"${OpenVINO_MAIN_SOURCE_DIR}/ngraph"
|
"${OpenVINO_MAIN_SOURCE_DIR}/scripts/install_dependencies/install_openvino_dependencies.sh"
|
||||||
"${IE_MAIN_SOURCE_DIR}/scripts")
|
"${OpenVINO_MAIN_SOURCE_DIR}/ngraph/python/tests/test_onnx/model_zoo_preprocess.sh"
|
||||||
|
)
|
||||||
|
|
||||||
#
|
#
|
||||||
# cpack
|
# cpack
|
||||||
|
@ -11,18 +11,27 @@
|
|||||||
* *element_type*
|
* *element_type*
|
||||||
|
|
||||||
* **Description**: the type of element of output tensor
|
* **Description**: the type of element of output tensor
|
||||||
* **Range of values**: u8, u16, u32, u64, i8, i16, i32, i64, f16, f32, boolean, bf16
|
* **Range of values**: u1, u4, u8, u16, u32, u64, i4, i8, i16, i32, i64, f16, f32, boolean, bf16
|
||||||
* **Type**: string
|
* **Type**: `string`
|
||||||
* **Default value**: None
|
* **Default value**: None
|
||||||
* **Required**: *Yes*
|
* **Required**: *yes*
|
||||||
|
|
||||||
* *shape*
|
* *shape*
|
||||||
|
|
||||||
* **Description**: the shape of the output tensor
|
* **Description**: the shape of the output tensor
|
||||||
* **Range of values**: list of non-negative integers, empty list is allowed that means 0D or scalar tensor
|
* **Range of values**: list of non-negative integers, empty list is allowed, which means 0D or scalar tensor
|
||||||
* **Type**: int[]
|
* **Type**: `int[]`
|
||||||
* **Default value**: None
|
* **Default value**: None
|
||||||
* **Required**: *Yes*
|
* **Required**: *yes*
|
||||||
|
|
||||||
|
|
||||||
|
**Outputs**
|
||||||
|
|
||||||
|
* **1**: Output tensor of type *T* and shape equal to *shape* attribute.
|
||||||
|
|
||||||
|
**Types**
|
||||||
|
|
||||||
|
* *T*: any type from *element type* values.
|
||||||
|
|
||||||
**Example**
|
**Example**
|
||||||
|
|
||||||
|
@ -8,9 +8,7 @@
|
|||||||
|
|
||||||
**Detailed description**:
|
**Detailed description**:
|
||||||
|
|
||||||
The *ExtractImagePatches* operation is similar to the TensorFlow* operation [ExtractImagePatches](https://www.tensorflow.org/api_docs/python/tf/image/extract_patches).
|
The *ExtractImagePatches* operation extracts patches of shape `sizes` which are `strides` apart in the input image. The output elements are taken from the input at intervals given by the `rate` argument, as in dilated convolutions.
|
||||||
|
|
||||||
This op extracts patches of shape `sizes` which are `strides` apart in the input image. The output elements are taken from the input at intervals given by the `rate` argument, as in dilated convolutions.
|
|
||||||
|
|
||||||
The result is a 4D tensor containing image patches with size `size[0] * size[1] * depth` vectorized in the "depth" dimension.
|
The result is a 4D tensor containing image patches with size `size[0] * size[1] * depth` vectorized in the "depth" dimension.
|
||||||
|
|
||||||
@ -92,20 +90,23 @@ The "auto_pad" attribute has no effect on the size of each patch, it determines
|
|||||||
Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We use the symbol `x` to mark output patches.
|
Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We use the symbol `x` to mark output patches.
|
||||||
|
|
||||||
1. `sizes="3,3", strides="5,5", rates="1,1", auto_pad="valid"`
|
1. `sizes="3,3", strides="5,5", rates="1,1", auto_pad="valid"`
|
||||||
|
\f[
|
||||||
x x x 4 5 x x x 9 10
|
\begin{bmatrix}
|
||||||
x x x 14 15 x x x 19 20
|
x & x & x & 4 & 5 & x & x & x & 9 & 10 \\
|
||||||
x x x 24 25 x x x 29 30
|
x & x & x & 14 & 15 & x & x & x & 19 & 20 \\
|
||||||
31 32 33 34 35 36 37 38 39 40
|
x & x & x & 24 & 25 & x & x & x & 29 & 30 \\
|
||||||
41 42 43 44 45 46 47 48 49 50
|
31 & 32 & 33 & 34 & 35 & 36 & 37 & 38 & 39 & 40 \\
|
||||||
x x x 54 55 x x x 59 60
|
41 & 42 & 43 & 44 & 45 & 46 & 47 & 48 & 49 & 50 \\
|
||||||
x x x 64 65 x x x 69 70
|
x & x & x & 54 & 55 & x & x & x & 59 & 60 \\
|
||||||
x x x 74 75 x x x 79 80
|
x & x & x & 64 & 65 & x & x & x & 69 & 70 \\
|
||||||
81 82 83 84 85 86 87 88 89 90
|
x & x & x & 74 & 75 & x & x & x & 79 & 80 \\
|
||||||
91 92 93 94 95 96 97 98 99 100
|
81 & 82 & 83 & 84 & 85 & 86 & 87 & 88 & 89 & 90 \\
|
||||||
|
91 & 92 & 93 & 94 & 95 & 96 & 79 & 98 & 99 & 100
|
||||||
|
\end{bmatrix}
|
||||||
|
\f]
|
||||||
|
|
||||||
output:
|
output:
|
||||||
|
```
|
||||||
[[[[ 1 6]
|
[[[[ 1 6]
|
||||||
[51 56]]
|
[51 56]]
|
||||||
|
|
||||||
@ -132,24 +133,27 @@ Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We u
|
|||||||
|
|
||||||
[[23 28]
|
[[23 28]
|
||||||
[73 78]]]]
|
[73 78]]]]
|
||||||
|
```
|
||||||
output shape: `[1, 9, 2, 2]`
|
output shape: `[1, 9, 2, 2]`
|
||||||
|
|
||||||
2. `sizes="4,4", strides="8,8", rates="1,1", auto_pad="valid"`
|
2. `sizes="4,4", strides="8,8", rates="1,1", auto_pad="valid"`
|
||||||
|
\f[
|
||||||
x x x x 5 6 7 8 9 10
|
\begin{bmatrix}
|
||||||
x x x x 15 16 17 18 19 20
|
x & x & x & x & 5 & 6 & 7 & 8 & 9 & 10 \\
|
||||||
x x x x 25 26 27 28 29 30
|
x & x & x & x & 15 & 16 & 17 & 18 & 19 & 20 \\
|
||||||
x x x x 35 36 37 38 39 40
|
x & x & x & x & 25 & 26 & 27 & 28 & 29 & 30 \\
|
||||||
41 42 43 44 45 46 47 48 49 50
|
x & x & x & x & 35 & 36 & 37 & 38 & 39 & 40 \\
|
||||||
51 52 53 54 55 56 57 58 59 60
|
41 & 42 & 43 & 44 & 45 & 46 & 47 & 48 & 49 & 50 \\
|
||||||
61 62 63 64 65 66 67 68 69 70
|
51 & 52 & 53 & 54 & 55 & 56 & 57 & 58 & 59 & 60 \\
|
||||||
71 72 73 74 75 76 77 78 79 80
|
61 & 62 & 63 & 64 & 65 & 66 & 67 & 68 & 69 & 70 \\
|
||||||
81 82 83 84 85 86 87 88 89 90
|
71 & 72 & 73 & 74 & 75 & 76 & 77 & 78 & 79 & 80 \\
|
||||||
91 92 93 94 95 96 97 98 99 100
|
81 & 82 & 83 & 84 & 85 & 86 & 87 & 88 & 89 & 90 \\
|
||||||
|
91 & 92 & 93 & 94 & 95 & 96 & 79 & 98 & 99 & 100
|
||||||
|
\end{bmatrix}
|
||||||
|
\f]
|
||||||
|
|
||||||
output:
|
output:
|
||||||
|
```
|
||||||
[[[[ 1]]
|
[[[[ 1]]
|
||||||
|
|
||||||
[[ 2]]
|
[[ 2]]
|
||||||
@ -181,27 +185,29 @@ Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We u
|
|||||||
[[33]]
|
[[33]]
|
||||||
|
|
||||||
[[34]]]]
|
[[34]]]]
|
||||||
|
```
|
||||||
output shape: `[1, 16, 1, 1]`
|
output shape: `[1, 16, 1, 1]`
|
||||||
|
|
||||||
3. `sizes="4,4", strides="9,9", rates="1,1", auto_pad="same_upper"`
|
3. `sizes="4,4", strides="9,9", rates="1,1", auto_pad="same_upper"`
|
||||||
|
\f[
|
||||||
x x x x 0 0 0 0 0 x x x x
|
\begin{bmatrix}
|
||||||
x x x x 4 5 6 7 8 x x x x
|
x & x & x & x & 0 & 0 & 0 & 0 & 0 & x & x & x & x\\
|
||||||
x x x x 14 15 16 17 18 x x x x
|
x & x & x & x & 4 & 5 & 6 & 7 & 8 & x & x & x & x\\
|
||||||
x x x x 24 25 26 27 28 x x x x
|
x & x & x & x & 14 & 15 & 16 & 17 & 18 & x & x & x & x\\
|
||||||
0 31 32 33 34 35 36 37 38 39 40 0 0
|
x & x & x & x & 24 & 25 & 26 & 27 & 28 & x & x & x & x\\
|
||||||
0 41 42 43 44 45 46 47 48 49 50 0 0
|
0 & 31 & 32 & 33 & 34 & 35 & 36 & 37 & 38 & 39 & 40 & 0 & 0\\
|
||||||
0 51 52 53 54 55 56 57 58 59 60 0 0
|
0 & 41 & 42 & 43 & 44 & 45 & 46 & 47 & 48 & 49 & 50 & 0 & 0\\
|
||||||
0 61 62 63 64 65 66 67 68 69 70 0 0
|
0 & 51 & 52 & 53 & 54 & 55 & 56 & 57 & 58 & 59 & 60 & 0 & 0\\
|
||||||
0 71 72 73 74 75 76 77 78 79 80 0 0
|
0 & 61 & 62 & 63 & 64 & 65 & 66 & 67 & 68 & 69 & 70 & 0 & 0\\
|
||||||
x x x x 84 85 86 87 88 x x x x
|
0 & 71 & 72 & 73 & 74 & 75 & 76 & 77 & 78 & 79 & 80 & 0 & 0\\
|
||||||
x x x x 94 95 96 97 98 x x x x
|
x & x & x & x & 84 & 85 & 86 & 87 & 88 & x & x & x & x\\
|
||||||
x x x x 0 0 0 0 0 x x x x
|
x & x & x & x & 94 & 95 & 96 & 79 & 98 & x & x & x & x\\
|
||||||
x x x x 0 0 0 0 0 x x x x
|
x & x & x & x & 0 & 0 & 0 & 0 & 0 & x & x & x & x\\
|
||||||
|
x & x & x & x & 0 & 0 & 0 & 0 & 0 & x & x & x & x
|
||||||
|
\end{bmatrix}
|
||||||
|
\f]
|
||||||
output:
|
output:
|
||||||
|
```
|
||||||
[[[[ 0 0]
|
[[[[ 0 0]
|
||||||
[ 0 89]]
|
[ 0 89]]
|
||||||
|
|
||||||
@ -249,25 +255,28 @@ Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We u
|
|||||||
|
|
||||||
[[ 23 0]
|
[[ 23 0]
|
||||||
[ 0 0]]]]
|
[ 0 0]]]]
|
||||||
|
```
|
||||||
output shape: `[1, 16, 2, 2]`
|
output shape: `[1, 16, 2, 2]`
|
||||||
|
|
||||||
4. `sizes="3,3", strides="5,5", rates="2,2", auto_pad="valid"`
|
4. `sizes="3,3", strides="5,5", rates="2,2", auto_pad="valid"`
|
||||||
This time we use the symbols `x`, `y`, `z` and `k` to distinguish the patches:
|
This time we use the symbols `x`, `y`, `z` and `k` to distinguish the patches:
|
||||||
|
\f[
|
||||||
x 2 x 4 x y 7 y 9 y
|
\begin{bmatrix}
|
||||||
11 12 13 14 15 16 17 18 19 20
|
x & 2 & x & 4 & x & y & 7 & y & 9 & y \\
|
||||||
x 22 x 24 x y 27 y 29 y
|
11 & 12 & 13 & 14 & 15 & 16 & 17 & 18 & 19 & 20 \\
|
||||||
31 32 33 34 35 36 37 38 39 40
|
x & 22 & x & 24 & x & y & 27 & y & 29 & y \\
|
||||||
x 42 x 44 x y 47 y 49 y
|
31 & 32 & 33 & 34 & 35 & 36 & 37 & 38 & 39 & 40 \\
|
||||||
z 52 z 54 z k 57 k 59 k
|
x & 42 & x & 44 & x & y & 47 & y & 49 & y \\
|
||||||
61 62 63 64 65 66 67 68 69 70
|
z & 52 & z & 54 & z & k & 57 & k & 59 & k \\
|
||||||
z 72 z 74 z k 77 k 79 k
|
61 & 62 & 63 & 64 & 65 & 66 & 67 & 68 & 69 & 70 \\
|
||||||
81 82 83 84 85 86 87 88 89 90
|
z & 72 & z & 74 & z & k & 77 & k & 79 & k \\
|
||||||
z 92 z 94 z k 97 k 99 k
|
81 & 82 & 83 & 84 & 85 & 86 & 87 & 88 & 89 & 90 \\
|
||||||
|
z & 92 & z & 94 & z & k & 79 & k & 99 & k
|
||||||
|
\end{bmatrix}
|
||||||
|
\f]
|
||||||
|
|
||||||
output:
|
output:
|
||||||
|
```
|
||||||
[[[[ 1 6]
|
[[[[ 1 6]
|
||||||
[ 51 56]]
|
[ 51 56]]
|
||||||
|
|
||||||
@ -294,26 +303,30 @@ This time we use the symbols `x`, `y`, `z` and `k` to distinguish the patches:
|
|||||||
|
|
||||||
[[ 45 50]
|
[[ 45 50]
|
||||||
[ 95 100]]]]
|
[ 95 100]]]]
|
||||||
|
```
|
||||||
output_shape: `[1, 9, 2, 2]`
|
output_shape: `[1, 9, 2, 2]`
|
||||||
|
|
||||||
5. `sizes="2,2", strides="3,3", rates="1,1", auto_pad="valid"`
|
5. `sizes="2,2", strides="3,3", rates="1,1", auto_pad="valid"`
|
||||||
Image is a `1 x 2 x 5 x 5` array that contains two feature maps where feature map with coordinate 0 contains numbers in a range `[1, 25]` and feature map with coordinate 1 contains numbers in a range `[26, 50]`
|
Image is a `1 x 2 x 5 x 5` array that contains two feature maps where feature map with coordinate 0 contains numbers in a range `[1, 25]` and feature map with coordinate 1 contains numbers in a range `[26, 50]`
|
||||||
|
|
||||||
x x 3 x x
|
\f[
|
||||||
6 7 8 x x
|
\begin{bmatrix}
|
||||||
11 12 13 14 15
|
x & x & 3 & x & x\\
|
||||||
x x 18 x x
|
x & x & 8 & x & x\\
|
||||||
x x 23 x x
|
11 & 12 & 13 & 14 & 15\\
|
||||||
|
x & x & 18 & x & x\\
|
||||||
x x 28 x x
|
x & x & 23 & x & x
|
||||||
x x 33 x x
|
\end{bmatrix}\\
|
||||||
36 37 38 39 40
|
\begin{bmatrix}
|
||||||
x x 43 x x
|
x & x & 28 & x & x\\
|
||||||
x x 48 x x
|
x & x & 33 & x & x\\
|
||||||
|
36 & 37 & 38 & 39 & 40\\
|
||||||
|
x & x & 43 & x & x\\
|
||||||
|
x & x & 48 & x & x
|
||||||
|
\end{bmatrix}
|
||||||
|
\f]
|
||||||
output:
|
output:
|
||||||
|
```
|
||||||
[[[[ 1 4]
|
[[[[ 1 4]
|
||||||
[16 19]]
|
[16 19]]
|
||||||
|
|
||||||
@ -337,5 +350,5 @@ Image is a `1 x 2 x 5 x 5` array that contains two feature maps where feature ma
|
|||||||
|
|
||||||
[[32 35]
|
[[32 35]
|
||||||
[47 50]]]]
|
[47 50]]]]
|
||||||
|
```
|
||||||
output shape: `[1, 8, 2, 2]`
|
output shape: `[1, 8, 2, 2]`
|
||||||
|
@ -8,12 +8,37 @@
|
|||||||
|
|
||||||
**Short description**: *ShuffleChannels* permutes data in the channel dimension of the input tensor.
|
**Short description**: *ShuffleChannels* permutes data in the channel dimension of the input tensor.
|
||||||
|
|
||||||
|
**Detailed description**:
|
||||||
|
|
||||||
|
Input tensor of `data_shape` is always interpreted as 4D tensor with the following shape:
|
||||||
|
|
||||||
|
dim 0: data_shape[0] * data_shape[1] * ... * data_shape[axis-1]
|
||||||
|
(or 1 if axis == 0)
|
||||||
|
dim 1: group
|
||||||
|
dim 2: data_shape[axis] / group
|
||||||
|
dim 3: data_shape[axis+1] * data_shape[axis+2] * ... * data_shape[data_shape.size()-1]
|
||||||
|
(or 1 if axis points to last dimension)
|
||||||
|
|
||||||
|
|
||||||
|
Trailing and leading to `axis` dimensions are flattened and reshaped back to the original shape after channels shuffling.
|
||||||
|
|
||||||
|
|
||||||
|
The operation is equivalent to the following transformation of the input tensor `x` of shape `[N, C, H, W]` and `axis = 1`:
|
||||||
|
|
||||||
|
\f[
|
||||||
|
x' = reshape(x, [N, group, C / group, H * W])\\
|
||||||
|
x'' = transpose(x', [0, 2, 1, 3])\\
|
||||||
|
y = reshape(x'', [N, C, H, W])\\
|
||||||
|
\f]
|
||||||
|
|
||||||
|
where `group` is the layer attribute described below.
|
||||||
|
|
||||||
**Attributes**:
|
**Attributes**:
|
||||||
|
|
||||||
* *axis*
|
* *axis*
|
||||||
|
|
||||||
* **Description**: *axis* specifies the index of a channel dimension.
|
* **Description**: *axis* specifies the index of a channel dimension.
|
||||||
* **Range of values**: an integer number in the range [-4, 3]
|
* **Range of values**: an integer number in the range `[-rank(data_shape), rank(data_shape) - 1]`
|
||||||
* **Type**: `int`
|
* **Type**: `int`
|
||||||
* **Default value**: 1
|
* **Default value**: 1
|
||||||
* **Required**: *No*
|
* **Required**: *No*
|
||||||
@ -21,30 +46,22 @@
|
|||||||
* *group*
|
* *group*
|
||||||
|
|
||||||
* **Description**: *group* specifies the number of groups to split the channel dimension into. This number must evenly divide the channel dimension size.
|
* **Description**: *group* specifies the number of groups to split the channel dimension into. This number must evenly divide the channel dimension size.
|
||||||
* **Range of values**: a positive integer
|
* **Range of values**: a positive integer in the range `[1, data_shape[axis]]`
|
||||||
* **Type**: `int`
|
* **Type**: `int`
|
||||||
* **Default value**: 1
|
* **Default value**: 1
|
||||||
* **Required**: *No*
|
* **Required**: *No*
|
||||||
|
|
||||||
**Inputs**:
|
**Inputs**:
|
||||||
|
|
||||||
* **1**: 4D input tensor of any supported data type. Required.
|
* **1**: `data` input tensor of type *T* and rank greater or equal to 1. **Required.**
|
||||||
|
|
||||||
**Outputs**:
|
**Outputs**:
|
||||||
|
|
||||||
* **1**: 4D input tensor with shape and element type as for the input tensor.
|
* **1**: Output tensor with element type *T* and same shape as the input tensor.
|
||||||
|
|
||||||
**Mathematical Formulation**
|
**Types**
|
||||||
|
|
||||||
The operation is the equivalent with the following transformation of the input tensor *x* of shape *[N, C, H, W]*:
|
* *T*: any supported numeric type.
|
||||||
|
|
||||||
```
|
|
||||||
x' = reshape(x, [N, group, C / group, H * W])
|
|
||||||
x'' = transpose(x', [0, 2, 1, 3])
|
|
||||||
y = reshape(x'', [N, C, H, W])
|
|
||||||
```
|
|
||||||
|
|
||||||
where `group` is the layer parameter described above and the `axis = 1`.
|
|
||||||
|
|
||||||
**Example**
|
**Example**
|
||||||
|
|
||||||
|
@ -8,20 +8,20 @@
|
|||||||
|
|
||||||
**Detailed description**:
|
**Detailed description**:
|
||||||
|
|
||||||
The *SpaceToBatch* operation is similar to the TensorFlow* operation [SpaceToBatchND](https://www.tensorflow.org/api_docs/python/tf/space_to_batch_nd)
|
|
||||||
|
|
||||||
The operation is equivalent to the following transformation of the input tensor `data` of shape `[batch, D_1, D_2 ... D_{N - 1}]` and `block_shape`, `pads_begin`, `pads_end` of shapes `[N]` to *Y* output tensor.
|
The operation is equivalent to the following transformation of the input tensor `data` of shape `[batch, D_1, D_2 ... D_{N - 1}]` and `block_shape`, `pads_begin`, `pads_end` of shapes `[N]` to *Y* output tensor.
|
||||||
|
|
||||||
Zero-pad the start and end of dimensions [D_0, ..., D_{N - 1}] of the input according to `pads_begin` and `pads_end`:
|
Zero-pad the start and end of dimensions \f$[D_0, \dots, D_{N - 1}]\f$ of the input according to `pads_begin` and `pads_end`:
|
||||||
note: P_0 for batch dimension is expected to be 0 (no-padding).
|
|
||||||
x = [batch + P_0, D_1 + P_1, D_2 + P_2, ..., D_{N - 1} + P_{N - 1}], where P_i = pads_begin[i] + pads_end[i]
|
|
||||||
|
|
||||||
note: B_0 for batch is ignored.
|
\f[x = [batch + P_0, D_1 + P_1, D_2 + P_2, \dots, D_{N - 1} + P_{N - 1}]\f]
|
||||||
x' = reshape(x, [batch, (D_1 + P_1) / B_1, B_1, (D_2 + P_2) / B_2, B_2, ..., (D_{N - 1} + P_{N - 1}) / B_{N - 1}, B_{N - 1}]), where B_i = block_shape[i]
|
\f[x' = reshape(x, [batch, \frac{D_1 + P_1}{B_1}, B_1, \frac{D_2 + P_2}{B_2}, B_2, \dots, \frac{D_{N - 1} + P_{N - 1}}{B_{N - 1}}, B_{N - 1}])\f]
|
||||||
|
\f[x'' = transpose(x', [2, 4, \dots, (N - 1) + (N - 1), 0, 1, 3, \dots, N + (N - 1)])\f]
|
||||||
|
\f[y = reshape(x'', [batch \times B_1 \times \dots \times B_{N - 1}, \frac{D_1 + P_1}{B_1}, \frac{D_2 + P_2}{B_2}, \dots, \frac{D_{N - 1} + P_{N - 1}}{B_{N - 1}}]\f]
|
||||||
|
|
||||||
x'' = transpose(x', [2, 4, ..., (N - 1) + (N - 1), 0, 1, 3, ..., N + (N - 1)])
|
where
|
||||||
|
- \f$P_i\f$ = pads_begin[i] + pads_end[i]
|
||||||
y = reshape(x'', [batch * B_1 * ... * B_{N - 1}, (D_1 + P_1) / B_1, (D_2 + P_2) / B_2, ... , (D_{N - 1} + P_{N - 1}) / B_{N - 1}])
|
- \f$B_i\f$ = block_shape[i]
|
||||||
|
- \f$P_0\f$ for batch dimension is expected to be 0 (no-padding)
|
||||||
|
- \f$B_0\f$ for batch is ignored
|
||||||
|
|
||||||
**Attributes**
|
**Attributes**
|
||||||
|
|
||||||
@ -36,7 +36,7 @@ The operation is equivalent to the following transformation of the input tensor
|
|||||||
|
|
||||||
**Outputs**
|
**Outputs**
|
||||||
|
|
||||||
* **1**: N-D tensor with shape `[batch * block_shape[0] * block_shape[1] * ... * block_shape[N - 1], (pads_begin[1] + D_1 + pads_end[1]) / block_shape[1], (pads_begin[2] + D_2 + pads_end[2]) / block_shape[2], ..., (pads_begin[N - 1] + D_{N - 1} + pads_end[N - 1]) / block_shape[N - 1]` of the same type as `data` input.
|
* **1**: N-D tensor with shape `[batch * block_shape[0] * block_shape[1] * ... * block_shape[N - 1], (D_1 + pads_begin[1] + pads_end[1]) / block_shape[1], (D_2 + pads_begin[2] + pads_end[2]) / block_shape[2], ..., (D_{N -1} + pads_begin[N - 1] + pads_end[N - 1]) / block_shape[N - 1]` of the same type as `data` input.
|
||||||
|
|
||||||
**Types**
|
**Types**
|
||||||
|
|
||||||
|
@ -5,13 +5,12 @@
|
|||||||
**Category**: Data movement operation
|
**Category**: Data movement operation
|
||||||
|
|
||||||
**Short description**: *StridedSlice* extracts a strided slice of a tensor.
|
**Short description**: *StridedSlice* extracts a strided slice of a tensor.
|
||||||
It is similar to generalized array indexing in Python\*.
|
|
||||||
|
|
||||||
**Attributes**
|
**Attributes**
|
||||||
|
|
||||||
* *begin_mask*
|
* *begin_mask*
|
||||||
|
|
||||||
* **Description**: *begin_mask* is a bit mask. *begin_mask[i]* equal to 1 means that the corresponding dimension of the `begin` input is ignored and the 'real' beginning of the tensor is used along corresponding dimension.
|
* **Description**: *begin_mask* is a bit mask. *begin_mask[i]* equal to `1` means that the corresponding dimension of the `begin` input is ignored and the 'real' beginning of the tensor is used along corresponding dimension.
|
||||||
* **Range of values**: a list of `0`s and `1`s
|
* **Range of values**: a list of `0`s and `1`s
|
||||||
* **Type**: `int[]`
|
* **Type**: `int[]`
|
||||||
* **Default value**: None
|
* **Default value**: None
|
||||||
@ -19,7 +18,7 @@
|
|||||||
|
|
||||||
* *end_mask*
|
* *end_mask*
|
||||||
|
|
||||||
* **Description**: *end_mask* is a bit mask. If *end_mask[i]* is 1, the corresponding dimension of the `end` input is ignored and the real 'end' of the tensor is used along corresponding dimension.
|
* **Description**: *end_mask* is a bit mask. If *end_mask[i]* is `1`, the corresponding dimension of the `end` input is ignored and the real 'end' of the tensor is used along corresponding dimension.
|
||||||
* **Range of values**: a list of `0`s and `1`s
|
* **Range of values**: a list of `0`s and `1`s
|
||||||
* **Type**: `int[]`
|
* **Type**: `int[]`
|
||||||
* **Default value**: None
|
* **Default value**: None
|
||||||
@ -27,7 +26,7 @@
|
|||||||
|
|
||||||
* *new_axis_mask*
|
* *new_axis_mask*
|
||||||
|
|
||||||
* **Description**: *new_axis_mask* is a bit mask. If *new_axis_mask[i]* is 1, a length 1 dimension is inserted on the `i`-th position of input tensor.
|
* **Description**: *new_axis_mask* is a bit mask. If *new_axis_mask[i]* is `1`, a length 1 dimension is inserted on the `i`-th position of input tensor.
|
||||||
* **Range of values**: a list of `0`s and `1`s
|
* **Range of values**: a list of `0`s and `1`s
|
||||||
* **Type**: `int[]`
|
* **Type**: `int[]`
|
||||||
* **Default value**: `[0]`
|
* **Default value**: `[0]`
|
||||||
@ -35,7 +34,7 @@
|
|||||||
|
|
||||||
* *shrink_axis_mask*
|
* *shrink_axis_mask*
|
||||||
|
|
||||||
* **Description**: *shrink_axis_mask* is a bit mask. If *shrink_axis_mask[i]* is 1, the dimension on the `i`-th position is deleted.
|
* **Description**: *shrink_axis_mask* is a bit mask. If *shrink_axis_mask[i]* is `1`, the dimension on the `i`-th position is deleted.
|
||||||
* **Range of values**: a list of `0`s and `1`s
|
* **Range of values**: a list of `0`s and `1`s
|
||||||
* **Type**: `int[]`
|
* **Type**: `int[]`
|
||||||
* **Default value**: `[0]`
|
* **Default value**: `[0]`
|
||||||
@ -51,21 +50,83 @@
|
|||||||
|
|
||||||
**Inputs**:
|
**Inputs**:
|
||||||
|
|
||||||
* **1**: Multidimensional input tensor to be sliced. Required.
|
* **1**: `data` - input tensor to be sliced of type `T` and arbitrary shape. **Required.**
|
||||||
|
|
||||||
* **2**: `begin` input - 1D input tensor with begin indexes for input tensor slicing. Required.
|
* **2**: `begin` - 1D tensor of type `T_IND` with begin indexes for input tensor slicing. **Required.**
|
||||||
Out-of-bounds values are silently clamped. If `begin_mask[i]` is 1, the value of `begin[i]` is ignored
|
Out-of-bounds values are silently clamped. If `begin_mask[i]` is `1`, the value of `begin[i]` is ignored and the range of the appropriate dimension starts from `0`. Negative values mean indexing starts from the end. For example, if `data=[1,2,3]`, `begin[0]=-1` means `begin[0]=3`.
|
||||||
and the range of the appropriate dimension starts from 0.
|
|
||||||
Negative values mean indexing starts from the end. For example, if `foo=[1,2,3]`, `begin[0]=-1` means `begin[0]=3`.
|
|
||||||
|
|
||||||
* **3**: `end` input - 1D input tensor with end indexes for input tensor slicing. Required.
|
* **3**: `end` - 1D tensor of type `T_IND` with end indexes for input tensor slicing. **Required.**
|
||||||
Out-of-bounds values will be silently clamped. If `end_mask[i]` is 1, the value of `end[i]` is ignored
|
Out-of-bounds values will be silently clamped. If `end_mask[i]` is `1`, the value of `end[i]` is ignored and the full range of the appropriate dimension is used instead. Negative values mean indexing starts from the end. For example, if `data=[1,2,3]`, `end[0]=-1` means `end[0]=3`.
|
||||||
and the full range of the appropriate dimension is used instead.
|
|
||||||
Negative values mean indexing starts from the end. For example, if `foo=[1,2,3]`, `end[0]=-1` means `end[0]=3`.
|
|
||||||
|
|
||||||
* **4**: `stride` input - 1D input tensor with strides. Optional.
|
* **4**: `stride` - 1D tensor of type `T_IND` with strides. **Optional.**
|
||||||
|
|
||||||
|
**Types**
|
||||||
|
* *T*: any supported type.
|
||||||
|
* *T_IND*: any supported integer type.
|
||||||
|
|
||||||
**Example**
|
**Example**
|
||||||
|
Example of `begin_mask` & `end_mask` usage.
|
||||||
|
```xml
|
||||||
|
<layer ... type="StridedSlice" ...>
|
||||||
|
<data begin_mask="0,1,1" ellipsis_mask="0,0,0" end_mask="1,1,0" new_axis_mask="0,0,0" shrink_axis_mask="0,0,0"/>
|
||||||
|
<input>
|
||||||
|
<port id="0">
|
||||||
|
<dim>2</dim>
|
||||||
|
<dim>3</dim>
|
||||||
|
<dim>4</dim>
|
||||||
|
</port>
|
||||||
|
<port id="1">
|
||||||
|
<dim>2</dim> <!-- begin: [1, 0, 0] -->
|
||||||
|
</port>
|
||||||
|
<port id="2">
|
||||||
|
<dim>2</dim> <!-- end: [0, 0, 2] -->
|
||||||
|
</port>
|
||||||
|
<port id="3">
|
||||||
|
<dim>2</dim> <!-- stride: [1, 1, 1] -->
|
||||||
|
</port>
|
||||||
|
</input>
|
||||||
|
<output>
|
||||||
|
<port id="4">
|
||||||
|
<dim>1</dim>
|
||||||
|
<dim>3</dim>
|
||||||
|
<dim>2</dim>
|
||||||
|
</port>
|
||||||
|
</output>
|
||||||
|
</layer>
|
||||||
|
```
|
||||||
|
|
||||||
|
Example of `new_axis_mask` usage.
|
||||||
|
```xml
|
||||||
|
<layer ... type="StridedSlice" ...>
|
||||||
|
<data begin_mask="0,1,1" ellipsis_mask="0,0,0" end_mask="0,1,1" new_axis_mask="1,0,0" shrink_axis_mask="0,0,0"/>
|
||||||
|
<input>
|
||||||
|
<port id="0">
|
||||||
|
<dim>2</dim>
|
||||||
|
<dim>3</dim>
|
||||||
|
<dim>4</dim>
|
||||||
|
</port>
|
||||||
|
<port id="1">
|
||||||
|
<dim>2</dim>
|
||||||
|
</port>
|
||||||
|
<port id="2">
|
||||||
|
<dim>2</dim>
|
||||||
|
</port>
|
||||||
|
<port id="3">
|
||||||
|
<dim>2</dim>
|
||||||
|
</port>
|
||||||
|
</input>
|
||||||
|
<output>
|
||||||
|
<port id="4">
|
||||||
|
<dim>1</dim>
|
||||||
|
<dim>2</dim>
|
||||||
|
<dim>3</dim>
|
||||||
|
<dim>4</dim>
|
||||||
|
</port>
|
||||||
|
</output>
|
||||||
|
</layer>
|
||||||
|
```
|
||||||
|
|
||||||
|
Example of `shrink_axis_mask` usage.
|
||||||
```xml
|
```xml
|
||||||
<layer ... type="StridedSlice" ...>
|
<layer ... type="StridedSlice" ...>
|
||||||
<data begin_mask="1,0,1,1,1" ellipsis_mask="0,0,0,0,0" end_mask="1,0,1,1,1" new_axis_mask="0,0,0,0,0" shrink_axis_mask="0,1,0,0,0"/>
|
<data begin_mask="1,0,1,1,1" ellipsis_mask="0,0,0,0,0" end_mask="1,0,1,1,1" new_axis_mask="0,0,0,0,0" shrink_axis_mask="0,1,0,0,0"/>
|
||||||
|
@ -2,9 +2,10 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
|
#include "single_layer_tests/reshape.hpp"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "single_layer_tests/reshape.hpp"
|
|
||||||
#include "common_test_utils/test_constants.hpp"
|
#include "common_test_utils/test_constants.hpp"
|
||||||
|
|
||||||
using namespace LayerTestsDefinitions;
|
using namespace LayerTestsDefinitions;
|
||||||
@ -14,31 +15,45 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
|
|||||||
InferenceEngine::Precision::FP32,
|
InferenceEngine::Precision::FP32,
|
||||||
};
|
};
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheckDynBatch, ReshapeLayerTest,
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
smoke_ReshapeCheckDynBatch, ReshapeLayerTestRevise,
|
||||||
::testing::Combine(
|
::testing::Combine(
|
||||||
::testing::Values(true),
|
::testing::Values(true), ::testing::ValuesIn(netPrecisions),
|
||||||
::testing::ValuesIn(netPrecisions),
|
|
||||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||||
::testing::Values(InferenceEngine::Layout::ANY),
|
::testing::Values(InferenceEngine::Layout::ANY),
|
||||||
::testing::Values(InferenceEngine::Layout::ANY),
|
::testing::Values(InferenceEngine::Layout::ANY),
|
||||||
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
|
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
|
||||||
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
|
::testing::Values(std::vector<int64_t>({30, 30, 30, 30})),
|
||||||
::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
|
::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
|
||||||
::testing::Values(std::map<std::string, std::string>({}))),
|
::testing::Values(std::map<std::string, std::string>({}))),
|
||||||
ReshapeLayerTest::getTestCaseName);
|
ReshapeLayerTestRevise::getTestCaseName);
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheck, ReshapeLayerTest,
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
smoke_ReshapeCheck, ReshapeLayerTestRevise,
|
||||||
::testing::Combine(
|
::testing::Combine(
|
||||||
::testing::Values(true),
|
::testing::Values(true), ::testing::ValuesIn(netPrecisions),
|
||||||
::testing::ValuesIn(netPrecisions),
|
|
||||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||||
::testing::Values(InferenceEngine::Layout::ANY),
|
::testing::Values(InferenceEngine::Layout::ANY),
|
||||||
::testing::Values(InferenceEngine::Layout::ANY),
|
::testing::Values(InferenceEngine::Layout::ANY),
|
||||||
::testing::Values(std::vector<size_t>({10, 10, 10, 10})),
|
::testing::Values(std::vector<size_t>({10, 10, 10, 10})),
|
||||||
::testing::Values(std::vector<size_t>({10, 0, 100})),
|
::testing::Values(std::vector<int64_t>({10, 0, 100})),
|
||||||
::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
|
::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
|
||||||
::testing::Values(std::map<std::string, std::string>({}))),
|
::testing::Values(std::map<std::string, std::string>({}))),
|
||||||
ReshapeLayerTest::getTestCaseName);
|
ReshapeLayerTestRevise::getTestCaseName);
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
smoke_ReshapeCheckNegative, ReshapeLayerTestRevise,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::Values(true), ::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||||
|
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||||
|
::testing::Values(InferenceEngine::Layout::ANY),
|
||||||
|
::testing::Values(InferenceEngine::Layout::ANY),
|
||||||
|
::testing::Values(std::vector<size_t>({10, 10, 10, 10})),
|
||||||
|
::testing::Values(std::vector<int64_t>({10, -1, 100})),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
|
||||||
|
::testing::Values(std::map<std::string, std::string>({}))),
|
||||||
|
ReshapeLayerTestRevise::getTestCaseName);
|
||||||
} // namespace
|
} // namespace
|
@ -68,6 +68,10 @@ if(ENABLE_WHEEL)
|
|||||||
add_subdirectory(wheel)
|
add_subdirectory(wheel)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (NGRAPH_PYTHON_BUILD_ENABLE)
|
||||||
|
add_dependencies(ie_api _pyngraph)
|
||||||
|
endif()
|
||||||
|
|
||||||
# install
|
# install
|
||||||
|
|
||||||
ie_cpack_add_component(${PYTHON_VERSION})
|
ie_cpack_add_component(${PYTHON_VERSION})
|
||||||
|
@ -55,7 +55,7 @@ fi
|
|||||||
|
|
||||||
if [[ "${APPS_TO_RUN}" -ge 4 ]] ; then
|
if [[ "${APPS_TO_RUN}" -ge 4 ]] ; then
|
||||||
# For more then 4 multidevice testing
|
# For more then 4 multidevice testing
|
||||||
for (( VAR = 4; VAR <= ${APPS_TO_RUN}; ++VAR )); do
|
for (( VAR = 4; VAR <= APPS_TO_RUN; ++VAR )); do
|
||||||
./${APP_NAME} --gtest_filter=*VPURegTest*YOLO*myriad* &
|
./${APP_NAME} --gtest_filter=*VPURegTest*YOLO*myriad* &
|
||||||
pids+=" $!"
|
pids+=" $!"
|
||||||
done
|
done
|
||||||
|
@ -33,7 +33,7 @@ target_include_directories(${TARGET_NAME} PRIVATE
|
|||||||
${CMAKE_CURRENT_SOURCE_DIR}
|
${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
$<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>
|
$<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>
|
||||||
${CLDNN__OCL_ICD_INCDIRS}
|
${CLDNN__OCL_ICD_INCDIRS}
|
||||||
${CLDNN_TOP_FOLDER})
|
${CLDNN_TOP_FOLDER}/api)
|
||||||
|
|
||||||
set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
|
set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <ie_layouts.h>
|
#include <ie_layouts.h>
|
||||||
#include <api/layout.hpp>
|
#include <cldnn/runtime/layout.hpp>
|
||||||
|
|
||||||
#include "ngraph/type/element_type.hpp"
|
#include "ngraph/type/element_type.hpp"
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
|
|
||||||
#include "cldnn_custom_layer.h"
|
#include "cldnn_custom_layer.h"
|
||||||
|
|
||||||
#include <api/network.hpp>
|
#include <cldnn/graph/network.hpp>
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <ie_common.h>
|
#include <ie_common.h>
|
||||||
#include "pugixml.hpp"
|
#include "pugixml.hpp"
|
||||||
#include "api/tensor.hpp"
|
#include "cldnn/runtime/tensor.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -81,6 +81,8 @@
|
|||||||
#include "cldnn_itt.h"
|
#include "cldnn_itt.h"
|
||||||
#include "gpu/gpu_config.hpp"
|
#include "gpu/gpu_config.hpp"
|
||||||
|
|
||||||
|
#include "cldnn/runtime/device_query.hpp"
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
# include <dlfcn.h>
|
# include <dlfcn.h>
|
||||||
#endif
|
#endif
|
||||||
@ -117,13 +119,13 @@ struct clDNNEngine::impl {
|
|||||||
};
|
};
|
||||||
|
|
||||||
cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map<std::string, std::string> &config) const {
|
cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map<std::string, std::string> &config) const {
|
||||||
auto device_info = device_map.begin()->second.get_info();
|
auto device_info = device_map.begin()->second->get_info();
|
||||||
if (config.find(PluginConfigParams::KEY_DEVICE_ID) != config.end()) {
|
if (config.find(PluginConfigParams::KEY_DEVICE_ID) != config.end()) {
|
||||||
auto val = config.at(PluginConfigParams::KEY_DEVICE_ID);
|
auto val = config.at(PluginConfigParams::KEY_DEVICE_ID);
|
||||||
if (device_map.find(val) == device_map.end()) {
|
if (device_map.find(val) == device_map.end()) {
|
||||||
IE_THROW() << "Invalid device ID: " << val;
|
IE_THROW() << "Invalid device ID: " << val;
|
||||||
}
|
}
|
||||||
device_info = device_map.at(val).get_info();
|
device_info = device_map.at(val)->get_info();
|
||||||
}
|
}
|
||||||
|
|
||||||
return device_info;
|
return device_info;
|
||||||
@ -445,7 +447,8 @@ clDNNEngine::clDNNEngine() : m_defaultContext(nullptr) {
|
|||||||
RegisterPrimitives();
|
RegisterPrimitives();
|
||||||
// try loading clDNN engine and get info from it
|
// try loading clDNN engine and get info from it
|
||||||
{
|
{
|
||||||
cldnn::device_query device_query;
|
// Set OCL runtime which should be always available
|
||||||
|
cldnn::device_query device_query(cldnn::engine_types::ocl, cldnn::runtime_types::ocl);
|
||||||
device_map = device_query.get_available_devices();
|
device_map = device_query.get_available_devices();
|
||||||
}
|
}
|
||||||
// locate global custom kernel config
|
// locate global custom kernel config
|
||||||
@ -851,8 +854,8 @@ auto StringRightTrim = [](std::string string, std::string substring, bool case_s
|
|||||||
};
|
};
|
||||||
|
|
||||||
static float GetGOPS(cldnn::device_info info, cldnn::data_types dt) {
|
static float GetGOPS(cldnn::device_info info, cldnn::data_types dt) {
|
||||||
auto freqGHz = info.core_frequency / 1000.f;
|
auto freqGHz = info.gpu_frequency / 1000.f;
|
||||||
auto numEUs = info.cores_count;
|
auto numEUs = info.execution_units_count;
|
||||||
auto opsPerComputeBlock = 0;
|
auto opsPerComputeBlock = 0;
|
||||||
auto computeBlockIPC = 1.0f;
|
auto computeBlockIPC = 1.0f;
|
||||||
switch (dt) {
|
switch (dt) {
|
||||||
@ -894,8 +897,8 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
|
|||||||
|
|
||||||
auto iter = device_map.find(device_id);
|
auto iter = device_map.find(device_id);
|
||||||
auto device_info = iter != device_map.end() ?
|
auto device_info = iter != device_map.end() ?
|
||||||
iter->second.get_info() :
|
iter->second->get_info() :
|
||||||
device_map.begin()->second.get_info();
|
device_map.begin()->second->get_info();
|
||||||
|
|
||||||
if (name == METRIC_KEY(SUPPORTED_METRICS)) {
|
if (name == METRIC_KEY(SUPPORTED_METRICS)) {
|
||||||
std::vector<std::string> metrics;
|
std::vector<std::string> metrics;
|
||||||
@ -931,7 +934,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
|
|||||||
gops[InferenceEngine::Precision::FP32] = GetGOPS(device_info, cldnn::data_types::f32);
|
gops[InferenceEngine::Precision::FP32] = GetGOPS(device_info, cldnn::data_types::f32);
|
||||||
IE_SET_METRIC_RETURN(DEVICE_GOPS, gops);
|
IE_SET_METRIC_RETURN(DEVICE_GOPS, gops);
|
||||||
} else if (name == GPU_METRIC_KEY(EXECUTION_UNITS_COUNT)) {
|
} else if (name == GPU_METRIC_KEY(EXECUTION_UNITS_COUNT)) {
|
||||||
IE_SET_METRIC_RETURN(GPU_EXECUTION_UNITS_COUNT, device_info.cores_count);
|
IE_SET_METRIC_RETURN(GPU_EXECUTION_UNITS_COUNT, device_info.execution_units_count);
|
||||||
} else if (name == GPU_METRIC_KEY(UARCH_VERSION)) {
|
} else if (name == GPU_METRIC_KEY(UARCH_VERSION)) {
|
||||||
std::stringstream s;
|
std::stringstream s;
|
||||||
if (device_info.gfx_ver.major == 0 && device_info.gfx_ver.minor == 0 && device_info.gfx_ver.revision == 0) {
|
if (device_info.gfx_ver.major == 0 && device_info.gfx_ver.minor == 0 && device_info.gfx_ver.revision == 0) {
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <api/engine.hpp>
|
#include <cldnn/runtime/engine.hpp>
|
||||||
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
||||||
#include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
|
#include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
|
||||||
#include "cldnn_remote_context.h"
|
#include "cldnn_remote_context.h"
|
||||||
@ -22,7 +22,7 @@ class clDNNEngine : public InferenceEngine::IInferencePlugin,
|
|||||||
std::shared_ptr<impl> _impl;
|
std::shared_ptr<impl> _impl;
|
||||||
|
|
||||||
// key: device_id, value: cldnn device
|
// key: device_id, value: cldnn device
|
||||||
std::map<std::string, cldnn::device> device_map;
|
std::map<std::string, cldnn::device::ptr> device_map;
|
||||||
std::mutex engine_mutex;
|
std::mutex engine_mutex;
|
||||||
|
|
||||||
mutable CLDNNRemoteCLContext::Ptr m_defaultContext;
|
mutable CLDNNRemoteCLContext::Ptr m_defaultContext;
|
||||||
|
@ -2,13 +2,13 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
#include <list>
|
|
||||||
#include <set>
|
|
||||||
#include <unordered_set>
|
|
||||||
|
|
||||||
#include "ie_metric_helpers.hpp"
|
#include "ie_metric_helpers.hpp"
|
||||||
#include <api/cldnn.hpp>
|
#include <chrono>
|
||||||
#include <api/data.hpp>
|
#include <cmath>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "ie_metric_helpers.hpp"
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@ -27,7 +27,6 @@
|
|||||||
#include "threading/ie_cpu_streams_executor.hpp"
|
#include "threading/ie_cpu_streams_executor.hpp"
|
||||||
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
|
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
|
||||||
|
|
||||||
|
|
||||||
using namespace InferenceEngine;
|
using namespace InferenceEngine;
|
||||||
using namespace InferenceEngine::details;
|
using namespace InferenceEngine::details;
|
||||||
|
|
||||||
|
@ -2,22 +2,28 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
|
#include <cldnn/graph/network.hpp>
|
||||||
|
#include <cldnn/runtime/profiling.hpp>
|
||||||
|
|
||||||
|
#include "cldnn_graph.h"
|
||||||
|
#include "simple_math.h"
|
||||||
|
#include <cldnn/cldnn_config.hpp>
|
||||||
|
#include "cldnn_infer_request.h"
|
||||||
|
|
||||||
|
#include <description_buffer.hpp>
|
||||||
|
#include <threading/ie_executor_manager.hpp>
|
||||||
|
#include <exec_graph_info.hpp>
|
||||||
|
|
||||||
|
#include <ie_ngraph_utils.hpp>
|
||||||
|
#include <ngraph/variant.hpp>
|
||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <api/cldnn.hpp>
|
|
||||||
#include <api/network.hpp>
|
|
||||||
#include <api/profiling.hpp>
|
|
||||||
#include <api/custom_gpu_primitive.hpp>
|
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "cldnn_graph.h"
|
|
||||||
#include "simple_math.h"
|
|
||||||
#include <description_buffer.hpp>
|
|
||||||
#include "cldnn_infer_request.h"
|
|
||||||
#include <threading/ie_executor_manager.hpp>
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
@ -71,12 +77,10 @@ void CLDNNGraph::Build() {
|
|||||||
for (int b = m_bv_sz - 1; b >= 0; b--) {
|
for (int b = m_bv_sz - 1; b >= 0; b--) {
|
||||||
auto network = BuildNetwork(m_program->GetCompiledProgram(b));
|
auto network = BuildNetwork(m_program->GetCompiledProgram(b));
|
||||||
m_networks.insert(m_networks.begin(), network);
|
m_networks.insert(m_networks.begin(), network);
|
||||||
GetEngine()->release_pending_memory(network->get_id());
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
auto network = BuildNetwork(m_program->GetCompiledProgram());
|
auto network = BuildNetwork(m_program->GetCompiledProgram());
|
||||||
m_networks.emplace_back(network);
|
m_networks.emplace_back(network);
|
||||||
GetEngine()->release_pending_memory(network->get_id());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
UpdateImplementationsMap();
|
UpdateImplementationsMap();
|
||||||
@ -499,7 +503,7 @@ void CLDNNGraph::UpdatePerfStatistics() {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::map<cldnn::primitive_id, cldnn::event> executedPrimitives = GetNetwork()->get_executed_primitives();
|
std::map<cldnn::primitive_id, cldnn::event::ptr> executedPrimitives = GetNetwork()->get_executed_primitives();
|
||||||
auto allPrimitives = GetNetwork()->get_all_primitives();
|
auto allPrimitives = GetNetwork()->get_all_primitives();
|
||||||
|
|
||||||
// Get profiling info for all layers
|
// Get profiling info for all layers
|
||||||
@ -521,7 +525,7 @@ void CLDNNGraph::UpdatePerfStatistics() {
|
|||||||
auto event = execIter->second;
|
auto event = execIter->second;
|
||||||
executedPrimitives.erase(execIter);
|
executedPrimitives.erase(execIter);
|
||||||
|
|
||||||
cldnn::instrumentation::profiling_info cldnnInfo{profiledID, event.get_profiling_info()};
|
cldnn::instrumentation::profiling_info cldnnInfo{profiledID, event->get_profiling_info()};
|
||||||
|
|
||||||
collectTimings(cldnnInfo, perfCount);
|
collectTimings(cldnnInfo, perfCount);
|
||||||
perfCount.num++;
|
perfCount.num++;
|
||||||
@ -534,7 +538,7 @@ void CLDNNGraph::UpdatePerfStatistics() {
|
|||||||
pcIter = perfMap.find(executedID.first);
|
pcIter = perfMap.find(executedID.first);
|
||||||
auto& perfCount = pcIter->second.second;
|
auto& perfCount = pcIter->second.second;
|
||||||
|
|
||||||
cldnn::instrumentation::profiling_info cldnnInfo{executedID.first, executedID.second.get_profiling_info()};
|
cldnn::instrumentation::profiling_info cldnnInfo{executedID.first, executedID.second->get_profiling_info()};
|
||||||
|
|
||||||
collectTimings(cldnnInfo, perfCount);
|
collectTimings(cldnnInfo, perfCount);
|
||||||
perfCount.num++;
|
perfCount.num++;
|
||||||
@ -675,7 +679,7 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
|
|||||||
executedPrimitives.find(primId) != executedPrimitives.end()) {
|
executedPrimitives.find(primId) != executedPrimitives.end()) {
|
||||||
auto event = executedPrimitives.at(primId);
|
auto event = executedPrimitives.at(primId);
|
||||||
|
|
||||||
cldnn::instrumentation::profiling_info cldnnInfo{primId, event.get_profiling_info()};
|
cldnn::instrumentation::profiling_info cldnnInfo{primId, event->get_profiling_info()};
|
||||||
|
|
||||||
// Collect timings
|
// Collect timings
|
||||||
long long cpuTime = 0;
|
long long cpuTime = 0;
|
||||||
|
@ -17,8 +17,8 @@
|
|||||||
#include "ie_blob.h"
|
#include "ie_blob.h"
|
||||||
#include "cpp/ie_cnn_network.h"
|
#include "cpp/ie_cnn_network.h"
|
||||||
|
|
||||||
#include <api/network.hpp>
|
#include <cldnn/graph/network.hpp>
|
||||||
#include <api/topology.hpp>
|
#include <cldnn/graph/topology.hpp>
|
||||||
|
|
||||||
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
|
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
|
||||||
#include "cldnn_custom_layer.h"
|
#include "cldnn_custom_layer.h"
|
||||||
@ -43,7 +43,7 @@ public:
|
|||||||
|
|
||||||
const Config& getConfig() const { return m_config; }
|
const Config& getConfig() const { return m_config; }
|
||||||
InferenceEngine::gpu::ClContext::Ptr GetContext() { return m_context; }
|
InferenceEngine::gpu::ClContext::Ptr GetContext() { return m_context; }
|
||||||
std::shared_ptr<const cldnn::engine> GetEngine() const { return getContextImpl(m_context)->GetEngine(); }
|
std::shared_ptr<cldnn::engine> GetEngine() const { return getContextImpl(m_context)->GetEngine(); }
|
||||||
int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; }
|
int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; }
|
||||||
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); }
|
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); }
|
||||||
size_t GetNetworksCount() const { return m_networks.size(); }
|
size_t GetNetworksCount() const { return m_networks.size(); }
|
||||||
|
@ -19,7 +19,7 @@ using namespace InferenceEngine;
|
|||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
const char CLDNNInferRequest::fp32_suffix[] = "_fp32";
|
const char fp32_suffix[] = "_fp32";
|
||||||
const char str_not_allocated[] = "Input data was not allocated.";
|
const char str_not_allocated[] = "Input data was not allocated.";
|
||||||
const char cannot_set_compound[] = "cannot set compound blob: supported only for input pre-processing";
|
const char cannot_set_compound[] = "cannot set compound blob: supported only for input pre-processing";
|
||||||
const char wrong_nv12_blob[] = "NV12 input blob is expected for input with NV12 color format";
|
const char wrong_nv12_blob[] = "NV12 input blob is expected for input with NV12 color format";
|
||||||
@ -110,7 +110,7 @@ Blob::Ptr CLDNNInferRequest::createOutputBlob(const TensorDesc& desc, uint8_t* m
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory& inputMem) {
|
void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory::ptr inputMem) {
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_attach");
|
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_attach");
|
||||||
auto impl = getContextImpl(m_graph->GetContext());
|
auto impl = getContextImpl(m_graph->GetContext());
|
||||||
impl->acquire_lock();
|
impl->acquire_lock();
|
||||||
@ -127,150 +127,57 @@ void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory& in
|
|||||||
|
|
||||||
void CLDNNInferRequest::input_alloc(cldnn::primitive_id name, const cldnn::layout& layout) {
|
void CLDNNInferRequest::input_alloc(cldnn::primitive_id name, const cldnn::layout& layout) {
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_alloc");
|
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_alloc");
|
||||||
cldnn::memory input_mem = cldnn::memory::allocate(*(m_graph->GetEngine()), layout);
|
cldnn::memory::ptr input_mem = m_graph->GetEngine()->allocate_memory(layout);
|
||||||
input_attach(name, input_mem);
|
input_attach(name, input_mem);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CLDNNInferRequest::copyOutputData(const cldnn::memory& outputMemory,
|
template<typename T>
|
||||||
Blob::Ptr bptr,
|
void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi, cldnn::stream& stream) {
|
||||||
buf_info* bi) {
|
size_t n = (bi == nullptr) ? dst->size() : bi->buf_size;
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyOutputData");
|
|
||||||
size_t n = (bi == nullptr) ? bptr->size() : bi->buf_size;
|
|
||||||
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
|
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
|
||||||
|
|
||||||
auto layout = outputMemory.get_layout();
|
auto layout = src->get_layout();
|
||||||
auto size = layout.size;
|
auto size = layout.size;
|
||||||
auto l_padd = layout.data_padding.lower_size();
|
|
||||||
auto u_padd = layout.data_padding.upper_size();
|
|
||||||
|
|
||||||
auto h_padding = u_padd.spatial[0] + l_padd.spatial[0];
|
auto locked_dst = dst->buffer();
|
||||||
auto v_padding_l = (h_padding + size.spatial[0]) * u_padd.spatial[1];
|
auto dst_ptr = locked_dst.as<T*>();
|
||||||
auto v_padding_u = (h_padding + size.spatial[0]) * l_padd.spatial[1];
|
if (dst_ptr == nullptr) {
|
||||||
|
|
||||||
auto locked = bptr->buffer();
|
|
||||||
switch (bptr->getTensorDesc().getPrecision()) {
|
|
||||||
case Precision::FP32: {
|
|
||||||
auto out_f = locked.as<float*>();
|
|
||||||
if (out_f == nullptr) {
|
|
||||||
IE_THROW() << "Invalid output blob";
|
IE_THROW() << "Invalid output blob";
|
||||||
}
|
}
|
||||||
auto resPtr = outputMemory.pointer<float>();
|
cldnn::mem_lock<T> src_lock{ src, stream };
|
||||||
float *resVec = out_f + offset;
|
T* src_ptr = src_lock.data();
|
||||||
|
dst_ptr += offset;
|
||||||
|
|
||||||
if (h_padding || v_padding_l || v_padding_u) {
|
if (layout.data_padding) {
|
||||||
size_t i = 0;
|
|
||||||
for (size_t b = 0; b < size.batch[0]; b++) {
|
for (size_t b = 0; b < size.batch[0]; b++) {
|
||||||
for (size_t f = 0; f < size.feature[0]; f++) {
|
for (size_t f = 0; f < size.feature[0]; f++) {
|
||||||
i += v_padding_l;
|
for (size_t w = 0; w < size.spatial[3]; w++) {
|
||||||
|
for (size_t z = 0; z < size.spatial[2]; z++) {
|
||||||
for (size_t y = 0; y < size.spatial[1]; y++) {
|
for (size_t y = 0; y < size.spatial[1]; y++) {
|
||||||
i += l_padd.spatial[0];
|
for (size_t x = 0; x < size.spatial[0]; x++) {
|
||||||
for (size_t x = 0; x < size.spatial[0]; x++, i++) {
|
*dst_ptr++ = src_ptr[layout.get_linear_offset(cldnn::tensor(b, f, x, y, z, w))];
|
||||||
*resVec++ = resPtr[i];
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
i += u_padd.spatial[0];
|
|
||||||
}
|
}
|
||||||
i += v_padding_u;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
resVec[i] = resPtr[i];
|
dst_ptr[i] = src_ptr[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
case Precision::FP16: {
|
|
||||||
auto out_f = locked.as<uint16_t*>();
|
|
||||||
if (out_f == nullptr) {
|
|
||||||
IE_THROW() << "Invalid output blob";
|
|
||||||
}
|
|
||||||
auto resPtr = outputMemory.pointer<uint16_t>();
|
|
||||||
uint16_t* resVec = out_f + offset;
|
|
||||||
|
|
||||||
if (h_padding || v_padding_l || v_padding_u) {
|
void CLDNNInferRequest::copyOutputData(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) {
|
||||||
size_t i = 0;
|
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyOutputData");
|
||||||
for (size_t b = 0; b < size.batch[0]; b++) {
|
auto& stream = m_graph->GetNetwork()->get_stream();
|
||||||
for (size_t f = 0; f < size.feature[0]; f++) {
|
switch (dst->getTensorDesc().getPrecision()) {
|
||||||
i += v_padding_l;
|
case Precision::FP32: copyResultToOutputBlob<float>(src, dst, bi, stream); break;
|
||||||
for (size_t y = 0; y < size.spatial[1]; y++) {
|
case Precision::FP16: copyResultToOutputBlob<uint16_t>(src, dst, bi, stream); break;
|
||||||
i += l_padd.spatial[0];
|
case Precision::I32: copyResultToOutputBlob<int32_t>(src, dst, bi, stream); break;
|
||||||
for (size_t x = 0; x < size.spatial[0]; x++, i++) {
|
case Precision::I64: copyResultToOutputBlob<int64_t>(src, dst, bi, stream); break;
|
||||||
*resVec++ = resPtr[i];
|
default: IE_THROW(NotImplemented) << "The plugin does not support output " << dst->getTensorDesc().getPrecision() << " precision";
|
||||||
}
|
|
||||||
i += u_padd.spatial[0];
|
|
||||||
}
|
|
||||||
i += v_padding_u;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (size_t i = 0; i < n; i++) {
|
|
||||||
resVec[i] = resPtr[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case Precision::I32: {
|
|
||||||
auto out_f = locked.as<int32_t*>();
|
|
||||||
if (out_f == nullptr) {
|
|
||||||
IE_THROW() << "Invalid output blob";
|
|
||||||
}
|
|
||||||
auto resPtr = outputMemory.pointer<int32_t>();
|
|
||||||
int32_t* resVec = out_f + offset;
|
|
||||||
|
|
||||||
if (h_padding || v_padding_l || v_padding_u) {
|
|
||||||
size_t i = 0;
|
|
||||||
for (size_t b = 0; b < size.batch[0]; b++) {
|
|
||||||
for (size_t f = 0; f < size.feature[0]; f++) {
|
|
||||||
i += v_padding_l;
|
|
||||||
for (size_t y = 0; y < size.spatial[1]; y++) {
|
|
||||||
i += l_padd.spatial[0];
|
|
||||||
for (size_t x = 0; x < size.spatial[0]; x++, i++) {
|
|
||||||
*resVec++ = resPtr[i];
|
|
||||||
}
|
|
||||||
i += u_padd.spatial[0];
|
|
||||||
}
|
|
||||||
i += v_padding_u;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (size_t i = 0; i < n; i++) {
|
|
||||||
resVec[i] = resPtr[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case Precision::I64: {
|
|
||||||
auto out_f = locked.as<int64_t*>();
|
|
||||||
if (out_f == nullptr) {
|
|
||||||
IE_THROW() << "Invalid output blob";
|
|
||||||
}
|
|
||||||
auto resPtr = outputMemory.pointer<int64_t>();
|
|
||||||
int64_t* resVec = out_f + offset;
|
|
||||||
|
|
||||||
if (h_padding || v_padding_l || v_padding_u) {
|
|
||||||
size_t i = 0;
|
|
||||||
for (size_t b = 0; b < size.batch[0]; b++) {
|
|
||||||
for (size_t f = 0; f < size.feature[0]; f++) {
|
|
||||||
i += v_padding_l;
|
|
||||||
for (size_t y = 0; y < size.spatial[1]; y++) {
|
|
||||||
i += l_padd.spatial[0];
|
|
||||||
for (size_t x = 0; x < size.spatial[0]; x++, i++) {
|
|
||||||
*resVec++ = resPtr[i];
|
|
||||||
}
|
|
||||||
i += u_padd.spatial[0];
|
|
||||||
}
|
|
||||||
i += v_padding_u;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (size_t i = 0; i < n; i++) {
|
|
||||||
resVec[i] = resPtr[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
IE_THROW() << "The plugin does not support output " << bptr->getTensorDesc().getPrecision() << " precision";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -279,7 +186,7 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr<cldnn::network> network,
|
|||||||
const cldnn::layout& inputLayout,
|
const cldnn::layout& inputLayout,
|
||||||
const Blob &inputBlob, buf_info* bi) {
|
const Blob &inputBlob, buf_info* bi) {
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyInputData");
|
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyInputData");
|
||||||
size_t n = (bi == nullptr) ? inputBlob.size() : bi->buf_size;
|
|
||||||
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
|
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
|
||||||
|
|
||||||
cldnn::primitive_id internalName = "parameter:" + inputName;
|
cldnn::primitive_id internalName = "parameter:" + inputName;
|
||||||
@ -287,37 +194,37 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr<cldnn::network> network,
|
|||||||
switch (inputBlob.getTensorDesc().getPrecision()) {
|
switch (inputBlob.getTensorDesc().getPrecision()) {
|
||||||
case Precision::FP32: {
|
case Precision::FP32: {
|
||||||
float* blob_ptr = const_cast<float*>(locked.as<const float*>()) + offset;
|
float* blob_ptr = const_cast<float*>(locked.as<const float*>()) + offset;
|
||||||
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
|
network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Precision::I32: {
|
case Precision::I32: {
|
||||||
int32_t* blob_ptr = const_cast<int32_t*>(locked.as<const int32_t*>()) + offset;
|
int32_t* blob_ptr = const_cast<int32_t*>(locked.as<const int32_t*>()) + offset;
|
||||||
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
|
network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Precision::I64: {
|
case Precision::I64: {
|
||||||
int64_t* blob_ptr = const_cast<int64_t*>(locked.as<const int64_t*>()) + offset;
|
int64_t* blob_ptr = const_cast<int64_t*>(locked.as<const int64_t*>()) + offset;
|
||||||
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
|
network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Precision::FP16: {
|
case Precision::FP16: {
|
||||||
uint16_t* blob_ptr = const_cast<uint16_t*>(locked.as<const uint16_t*>()) + offset;
|
uint16_t* blob_ptr = const_cast<uint16_t*>(locked.as<const uint16_t*>()) + offset;
|
||||||
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
|
network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Precision::I8: {
|
case Precision::I8: {
|
||||||
int8_t* blob_ptr = const_cast<int8_t*>(locked.as<const int8_t*>()) + offset;
|
int8_t* blob_ptr = const_cast<int8_t*>(locked.as<const int8_t*>()) + offset;
|
||||||
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
|
network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Precision::U8: {
|
case Precision::U8: {
|
||||||
uint8_t* blob_ptr = const_cast<uint8_t*>(locked.as<const uint8_t*>()) + offset;
|
uint8_t* blob_ptr = const_cast<uint8_t*>(locked.as<const uint8_t*>()) + offset;
|
||||||
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
|
network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Precision::BOOL: {
|
case Precision::BOOL: {
|
||||||
uint8_t* blob_ptr = const_cast<uint8_t*>(locked.as<const uint8_t*>()) + offset;
|
uint8_t* blob_ptr = const_cast<uint8_t*>(locked.as<const uint8_t*>()) + offset;
|
||||||
network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
|
network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -601,6 +508,7 @@ void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr &data)
|
|||||||
void CLDNNInferRequest::AllocateInputs() {
|
void CLDNNInferRequest::AllocateInputs() {
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateInputs");
|
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateInputs");
|
||||||
auto inputLayouts = m_graph->GetInputLayouts();
|
auto inputLayouts = m_graph->GetInputLayouts();
|
||||||
|
auto& stream = m_graph->GetNetwork()->get_stream();
|
||||||
// allocate inputs
|
// allocate inputs
|
||||||
for (auto& ni : _networkInputs) {
|
for (auto& ni : _networkInputs) {
|
||||||
std::string name = ni.first;
|
std::string name = ni.first;
|
||||||
@ -623,25 +531,24 @@ void CLDNNInferRequest::AllocateInputs() {
|
|||||||
input_alloc(UVName, inputLayouts.at(UVName));
|
input_alloc(UVName, inputLayouts.at(UVName));
|
||||||
|
|
||||||
size_t height = desc.getDims()[2], width = desc.getDims()[3];
|
size_t height = desc.getDims()[2], width = desc.getDims()[3];
|
||||||
cldnn::pointer<uint8_t> input_mem_ptr_Y = inputsMemory.at(YName).pointer<uint8_t>();
|
cldnn::mem_lock<uint8_t> input_mem_ptr_Y{inputsMemory.at(YName), stream};
|
||||||
TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
|
TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
|
||||||
auto blobY = createInputBlob(ydesc, input_mem_ptr_Y.data());
|
auto blobY = createInputBlob(ydesc, input_mem_ptr_Y.data());
|
||||||
|
|
||||||
cldnn::pointer<uint8_t> input_mem_ptr_UV = inputsMemory.at(UVName).pointer<uint8_t>();
|
cldnn::mem_lock<uint8_t> input_mem_ptr_UV{ inputsMemory.at(UVName), stream };
|
||||||
TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC);
|
TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC);
|
||||||
auto blobUV = createInputBlob(uvdesc, input_mem_ptr_UV.data());
|
auto blobUV = createInputBlob(uvdesc, input_mem_ptr_UV.data());
|
||||||
|
|
||||||
blobs.push_back(make_shared_blob<NV12Blob>(blobY, blobUV));
|
blobs.push_back(make_shared_blob<NV12Blob>(blobY, blobUV));
|
||||||
}
|
}
|
||||||
_inputs[name] = desc.getDims()[0] == 1 ? blobs[0] : make_shared_blob<BatchedBlob>(blobs);
|
_inputs[name] = desc.getDims()[0] == 1 ? blobs[0] : make_shared_blob<BatchedBlob>(blobs);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
if (inputLayouts.find(name) == inputLayouts.end()) {
|
if (inputLayouts.find(name) == inputLayouts.end()) {
|
||||||
IE_THROW() << "Input layout for " << name << " is not found";
|
IE_THROW() << "Input layout for " << name << " is not found";
|
||||||
}
|
}
|
||||||
cldnn::layout layout = inputLayouts.at(name);
|
cldnn::layout layout = inputLayouts.at(name);
|
||||||
input_alloc(name, layout);
|
input_alloc(name, layout);
|
||||||
cldnn::pointer<uint8_t> mem_ptr = inputsMemory.at(name).pointer<uint8_t>();
|
cldnn::mem_lock<uint8_t> mem_ptr{inputsMemory.at(name), stream};
|
||||||
_inputs[name] = createInputBlob(desc, mem_ptr.data());
|
_inputs[name] = createInputBlob(desc, mem_ptr.data());
|
||||||
|
|
||||||
if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
|
if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
|
||||||
@ -685,8 +592,8 @@ void CLDNNInferRequest::AllocateOutputs() {
|
|||||||
bool can_reuse_internal_mem = !m_useStreams;
|
bool can_reuse_internal_mem = !m_useStreams;
|
||||||
for (auto& no : _networkOutputs) {
|
for (auto& no : _networkOutputs) {
|
||||||
std::string outputID = m_graph->MapOutputName(no.first);
|
std::string outputID = m_graph->MapOutputName(no.first);
|
||||||
cldnn::memory output_mem = m_graph->GetNetwork()->get_output_memory(outputID);
|
cldnn::memory::ptr output_mem = m_graph->GetNetwork()->get_output_memory(outputID);
|
||||||
cldnn::pointer<uint8_t> output_mem_ptr = output_mem.pointer<uint8_t>();
|
cldnn::mem_lock<uint8_t> output_mem_ptr{output_mem, m_graph->GetNetwork()->get_stream()};
|
||||||
if (output_mem_ptr.data() == nullptr) {
|
if (output_mem_ptr.data() == nullptr) {
|
||||||
IE_THROW() << "Empty output memory for primitive " << outputID;
|
IE_THROW() << "Empty output memory for primitive " << outputID;
|
||||||
}
|
}
|
||||||
@ -824,6 +731,7 @@ CLDNNInferRequest::CLDNNInferRequest(InputsDataMap networkInputs, OutputsDataMap
|
|||||||
void CLDNNInferRequest::execAndParse() {
|
void CLDNNInferRequest::execAndParse() {
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParse");
|
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParse");
|
||||||
auto networkOutputs = m_graph->GetNetwork()->execute();
|
auto networkOutputs = m_graph->GetNetwork()->execute();
|
||||||
|
auto& stream = m_graph->GetNetwork()->get_stream();
|
||||||
|
|
||||||
// Collect outputs as requested by the model
|
// Collect outputs as requested by the model
|
||||||
for (auto& no : _networkOutputs) {
|
for (auto& no : _networkOutputs) {
|
||||||
@ -835,12 +743,12 @@ void CLDNNInferRequest::execAndParse() {
|
|||||||
// mapping remote blobs not needed -
|
// mapping remote blobs not needed -
|
||||||
// let the user take care of them explicitly
|
// let the user take care of them explicitly
|
||||||
if (!bptr->is<gpu::ClBlob>()) {
|
if (!bptr->is<gpu::ClBlob>()) {
|
||||||
auto out_ptr = outputMemory.pointer<uint8_t>();
|
cldnn::mem_lock<uint8_t> out_ptr{outputMemory, stream};
|
||||||
auto blob_ptr = bptr->buffer().as<uint8_t*>();
|
auto blob_ptr = bptr->buffer().as<uint8_t*>();
|
||||||
|
|
||||||
// If Async API is used, copy of output blobs is not needed, unless SetBlob function was called.
|
// If Async API is used, copy of output blobs is not needed, unless SetBlob function was called.
|
||||||
// But in the case when old API is used we have to copy data to memory provided by user.
|
// But in the case when old API is used we have to copy data to memory provided by user.
|
||||||
if (blob_ptr != &out_ptr[0]) {
|
if (blob_ptr != out_ptr.data()) {
|
||||||
copyOutputData(outputMemory, bptr);
|
copyOutputData(outputMemory, bptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -965,19 +873,20 @@ void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const
|
|||||||
IE_THROW() << "Input name mismatch.";
|
IE_THROW() << "Input name mismatch.";
|
||||||
}
|
}
|
||||||
auto inputLayout = m_graph->GetInputLayouts().at(inputName);
|
auto inputLayout = m_graph->GetInputLayouts().at(inputName);
|
||||||
auto is_same_buffer = [](const Blob& blob, const cldnn::memory& memory) -> bool {
|
auto is_same_buffer = [&](const Blob& blob, cldnn::memory::ptr memory) -> bool {
|
||||||
const std::string str_not_allocated("Input data was not allocated.");
|
const std::string str_not_allocated("Input data was not allocated.");
|
||||||
cldnn::pointer<const uint8_t> ptr = memory.pointer<const uint8_t>();
|
cldnn::mem_lock<uint8_t> ptr{memory, m_graph->GetNetwork()->get_stream()};
|
||||||
const uint8_t* blob_ptr = blob.cbuffer().as<const uint8_t*>();
|
const uint8_t* blob_ptr = blob.cbuffer().as<const uint8_t*>();
|
||||||
const uint8_t* mem_ptr = ptr.data();
|
const uint8_t* mem_ptr = ptr.data();
|
||||||
if (blob_ptr == nullptr || mem_ptr == nullptr) {
|
if (blob_ptr == nullptr || mem_ptr == nullptr) {
|
||||||
IE_THROW() << str_not_allocated;
|
IE_THROW() << str_not_allocated;
|
||||||
}
|
}
|
||||||
return (blob_ptr == mem_ptr) && (blob.byteSize() == memory.size());
|
return (blob_ptr == mem_ptr) && (blob.byteSize() == memory->size());
|
||||||
};
|
};
|
||||||
|
|
||||||
cldnn::primitive_id internalName = "parameter:" + inputName;
|
cldnn::primitive_id internalName = "parameter:" + inputName;
|
||||||
const cldnn::memory& memory = inputsMemory.at(inputName);
|
cldnn::memory::ptr memory = inputsMemory.at(inputName);
|
||||||
|
auto& stream = m_graph->GetNetwork()->get_stream();
|
||||||
auto _nw_ptr = m_graph->GetNetwork();
|
auto _nw_ptr = m_graph->GetNetwork();
|
||||||
auto prec = inputBlob.getTensorDesc().getPrecision();
|
auto prec = inputBlob.getTensorDesc().getPrecision();
|
||||||
|
|
||||||
@ -986,8 +895,8 @@ void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const
|
|||||||
_nw_ptr->set_input_data(internalName, memory);
|
_nw_ptr->set_input_data(internalName, memory);
|
||||||
} else if (prec == Precision::I16 || prec == Precision::U16) {
|
} else if (prec == Precision::I16 || prec == Precision::U16) {
|
||||||
// clDNN doesn't support I16 input precision, so we always have to convert input data to fp32 precision
|
// clDNN doesn't support I16 input precision, so we always have to convert input data to fp32 precision
|
||||||
const cldnn::memory& fp32_mem = inputsMemory.at(inputName+fp32_suffix);
|
cldnn::memory::ptr fp32_mem = inputsMemory.at(inputName+fp32_suffix);
|
||||||
cldnn::pointer<float> ptr = fp32_mem.pointer<float>();
|
cldnn::mem_lock<float> ptr {fp32_mem, stream};
|
||||||
if (prec == Precision::I16) {
|
if (prec == Precision::I16) {
|
||||||
copyToFloat<int16_t>(ptr.data(), &inputBlob);
|
copyToFloat<int16_t>(ptr.data(), &inputBlob);
|
||||||
} else {
|
} else {
|
||||||
@ -1031,4 +940,4 @@ void CLDNNInferRequest::PrepareInputDyn(const cldnn::primitive_id &inputName, co
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}; // namespace CLDNNPlugin
|
} // namespace CLDNNPlugin
|
||||||
|
@ -46,7 +46,7 @@ public:
|
|||||||
void EnableStreams() { m_useStreams = true; }
|
void EnableStreams() { m_useStreams = true; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::map<std::string, cldnn::memory> inputsMemory;
|
std::map<std::string, cldnn::memory::ptr> inputsMemory;
|
||||||
std::map<std::string, cldnn::primitive_id> outputsMap;
|
std::map<std::string, cldnn::primitive_id> outputsMap;
|
||||||
|
|
||||||
bool m_useProfiling;
|
bool m_useProfiling;
|
||||||
@ -60,12 +60,12 @@ protected:
|
|||||||
|
|
||||||
InferenceEngine::Blob::Ptr createInputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr);
|
InferenceEngine::Blob::Ptr createInputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr);
|
||||||
InferenceEngine::Blob::Ptr createOutputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr);
|
InferenceEngine::Blob::Ptr createOutputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr);
|
||||||
void copyOutputData(const cldnn::memory& outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr);
|
void copyOutputData(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr);
|
||||||
void copyInputData(std::shared_ptr<cldnn::network> network, const cldnn::primitive_id &inputName,
|
void copyInputData(std::shared_ptr<cldnn::network> network, const cldnn::primitive_id &inputName,
|
||||||
const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob,
|
const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob,
|
||||||
buf_info* bi = nullptr);
|
buf_info* bi = nullptr);
|
||||||
|
|
||||||
void input_attach(cldnn::primitive_id name, cldnn::memory& inputMem);
|
void input_attach(cldnn::primitive_id name, cldnn::memory::ptr inputMem);
|
||||||
void input_alloc(cldnn::primitive_id name, const cldnn::layout& layout);
|
void input_alloc(cldnn::primitive_id name, const cldnn::layout& layout);
|
||||||
void AllocateInputs();
|
void AllocateInputs();
|
||||||
void AllocateOutputs();
|
void AllocateOutputs();
|
||||||
@ -76,9 +76,6 @@ protected:
|
|||||||
|
|
||||||
void PrepareInput(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob);
|
void PrepareInput(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob);
|
||||||
void PrepareInputDyn(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob);
|
void PrepareInputDyn(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob);
|
||||||
|
|
||||||
private:
|
|
||||||
static const char fp32_suffix[];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}; // namespace CLDNNPlugin
|
}; // namespace CLDNNPlugin
|
||||||
|
@ -92,7 +92,7 @@ bool Program::CanProcessDynBatch(std::vector<std::shared_ptr<ngraph::Node>> ops,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cldnn::engine> engine, const Config& config, bool createTopologyOnly)
|
Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::engine> engine, const Config& config, bool createTopologyOnly)
|
||||||
: m_config(config)
|
: m_config(config)
|
||||||
, m_engine(engine)
|
, m_engine(engine)
|
||||||
, m_curBatch(-1)
|
, m_curBatch(-1)
|
||||||
@ -128,11 +128,9 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cld
|
|||||||
|
|
||||||
ChangeInputBatch(1U << static_cast<unsigned>(b));
|
ChangeInputBatch(1U << static_cast<unsigned>(b));
|
||||||
m_programs.insert(m_programs.begin(), BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly));
|
m_programs.insert(m_programs.begin(), BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly));
|
||||||
m_engine->release_pending_memory(0);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly));
|
m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly));
|
||||||
m_engine->release_pending_memory(0);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,8 +15,8 @@
|
|||||||
|
|
||||||
#include "cldnn_config.h"
|
#include "cldnn_config.h"
|
||||||
|
|
||||||
#include <api/engine.hpp>
|
#include <cldnn/runtime/engine.hpp>
|
||||||
#include <api/topology.hpp>
|
#include <cldnn/graph/topology.hpp>
|
||||||
|
|
||||||
// Forward declarations for cldnn part
|
// Forward declarations for cldnn part
|
||||||
namespace cldnn {
|
namespace cldnn {
|
||||||
@ -69,8 +69,8 @@ public:
|
|||||||
|
|
||||||
class Program {
|
class Program {
|
||||||
public:
|
public:
|
||||||
Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cldnn::engine> engine, const Config& config, bool createTopologyOnly = false);
|
Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<cldnn::engine> engine, const Config& config, bool createTopologyOnly = false);
|
||||||
Program(std::shared_ptr<const cldnn::engine> engine, const Config& config) : m_config(config), m_engine(engine),
|
Program(std::shared_ptr<cldnn::engine> engine, const Config& config) : m_config(config), m_engine(engine),
|
||||||
m_curBatch(-1), queryMode(false), m_max_batch(1) {}
|
m_curBatch(-1), queryMode(false), m_max_batch(1) {}
|
||||||
Program() : m_config({}), m_engine(nullptr), m_curBatch(-1), queryMode(false), m_max_batch(1) {}
|
Program() : m_config({}), m_engine(nullptr), m_curBatch(-1), queryMode(false), m_max_batch(1) {}
|
||||||
|
|
||||||
@ -100,8 +100,8 @@ public:
|
|||||||
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return inputLayouts; }
|
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return inputLayouts; }
|
||||||
InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_networkInputs; }
|
InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_networkInputs; }
|
||||||
InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_networkOutputs; }
|
InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_networkOutputs; }
|
||||||
const cldnn::engine& GetEngine() const { return *m_engine; }
|
cldnn::engine& GetEngine() const { return *m_engine; }
|
||||||
std::shared_ptr<const cldnn::engine> GetEnginePtr() const { return m_engine; }
|
std::shared_ptr<cldnn::engine> GetEnginePtr() const { return m_engine; }
|
||||||
const Config& GetConfig() const { return m_config; }
|
const Config& GetConfig() const { return m_config; }
|
||||||
int GetMaxBatchSizeForSingleProgram();
|
int GetMaxBatchSizeForSingleProgram();
|
||||||
|
|
||||||
@ -150,7 +150,7 @@ public:
|
|||||||
private:
|
private:
|
||||||
static factories_map_t factories_map;
|
static factories_map_t factories_map;
|
||||||
std::vector<std::shared_ptr<cldnn::program>> m_programs;
|
std::vector<std::shared_ptr<cldnn::program>> m_programs;
|
||||||
std::shared_ptr<const cldnn::engine> m_engine;
|
std::shared_ptr<cldnn::engine> m_engine;
|
||||||
Config m_config;
|
Config m_config;
|
||||||
|
|
||||||
std::shared_ptr<cldnn::topology> m_topology;
|
std::shared_ptr<cldnn::topology> m_topology;
|
||||||
|
@ -6,21 +6,23 @@
|
|||||||
#include "cldnn_remote_context.h"
|
#include "cldnn_remote_context.h"
|
||||||
#include "cldnn_itt.h"
|
#include "cldnn_itt.h"
|
||||||
|
|
||||||
|
#include "cldnn/runtime/device_query.hpp"
|
||||||
|
|
||||||
using namespace InferenceEngine;
|
using namespace InferenceEngine;
|
||||||
using namespace InferenceEngine::gpu;
|
using namespace InferenceEngine::gpu;
|
||||||
using namespace InferenceEngine::details;
|
using namespace InferenceEngine::details;
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
static const char unsupported_str[] = "Unsupported shared object type ";
|
|
||||||
CLDNNRemoteAllocator CLDNNRemoteBlobImpl::m_allocator;
|
CLDNNRemoteAllocator CLDNNRemoteBlobImpl::m_allocator;
|
||||||
|
|
||||||
CLDNNRemoteBlobImpl::CLDNNRemoteBlobImpl(ClContext::Ptr context,
|
CLDNNRemoteBlobImpl::CLDNNRemoteBlobImpl(ClContext::Ptr context,
|
||||||
|
cldnn::stream& stream,
|
||||||
const cldnn::layout& layout,
|
const cldnn::layout& layout,
|
||||||
cldnn::shared_handle mem,
|
cldnn::shared_handle mem,
|
||||||
cldnn::shared_surface surf,
|
cldnn::shared_surface surf,
|
||||||
uint32_t plane,
|
uint32_t plane,
|
||||||
BlobType mem_type) :
|
BlobType mem_type) :
|
||||||
m_context(context), m_layout(layout), m_mem_type(mem_type), m_mem(mem), m_surf(surf), m_plane(plane),
|
m_context(context), m_stream(stream), m_layout(layout), m_mem_type(mem_type), m_mem(mem), m_surf(surf), m_plane(plane),
|
||||||
_handle(nullptr), _allocator(nullptr), m_memObject(nullptr), lockedHolder(nullptr) {
|
_handle(nullptr), _allocator(nullptr), m_memObject(nullptr), lockedHolder(nullptr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -67,7 +69,6 @@ ParamMap CLDNNRemoteBlobImpl::getParams() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool CLDNNRemoteBlobImpl::deallocate() noexcept {
|
bool CLDNNRemoteBlobImpl::deallocate() noexcept {
|
||||||
if (m_memObject != nullptr)
|
|
||||||
m_memObject.reset();
|
m_memObject.reset();
|
||||||
return m_memObject == nullptr;
|
return m_memObject == nullptr;
|
||||||
}
|
}
|
||||||
@ -86,32 +87,7 @@ void CLDNNRemoteBlobImpl::allocate_if_needed() {
|
|||||||
_impl->acquire_lock();
|
_impl->acquire_lock();
|
||||||
|
|
||||||
if (m_memObject == nullptr) {
|
if (m_memObject == nullptr) {
|
||||||
auto eng = _impl->GetEngine();
|
allocate();
|
||||||
switch (m_mem_type) {
|
|
||||||
case BlobType::BT_BUF_INTERNAL:
|
|
||||||
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::allocate(*eng, m_layout)));
|
|
||||||
break;
|
|
||||||
case BlobType::BT_BUF_SHARED:
|
|
||||||
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_buffer(*eng, m_layout, m_mem)));
|
|
||||||
break;
|
|
||||||
#ifdef _WIN32
|
|
||||||
case BlobType::BT_SURF_SHARED:
|
|
||||||
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_mem, m_plane)));
|
|
||||||
break;
|
|
||||||
case BlobType::BT_DX_BUF_SHARED:
|
|
||||||
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_dx_buffer(*eng, m_layout, m_mem)));
|
|
||||||
break;
|
|
||||||
#else
|
|
||||||
case BlobType::BT_SURF_SHARED:
|
|
||||||
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_surf, m_plane)));
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
case BlobType::BT_IMG_SHARED:
|
|
||||||
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_image(*eng, m_layout, m_mem)));
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
IE_THROW() << unsupported_str << m_mem_type;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_impl->release_lock();
|
_impl->release_lock();
|
||||||
@ -120,32 +96,38 @@ void CLDNNRemoteBlobImpl::allocate_if_needed() {
|
|||||||
void CLDNNRemoteBlobImpl::allocate() noexcept {
|
void CLDNNRemoteBlobImpl::allocate() noexcept {
|
||||||
assert(m_memObject == nullptr);
|
assert(m_memObject == nullptr);
|
||||||
|
|
||||||
std::shared_ptr<const cldnn::engine> eng = getContextImpl(m_context.lock())->GetEngine();
|
std::shared_ptr<cldnn::engine> eng = getContextImpl(m_context.lock())->GetEngine();
|
||||||
|
|
||||||
switch (m_mem_type) {
|
switch (m_mem_type) {
|
||||||
case BlobType::BT_BUF_INTERNAL:
|
case BlobType::BT_BUF_INTERNAL: {
|
||||||
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::allocate(*eng, m_layout)));
|
m_memObject = eng->allocate_memory(m_layout);
|
||||||
break;
|
break;
|
||||||
case BlobType::BT_BUF_SHARED:
|
}
|
||||||
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_buffer(*eng, m_layout, m_mem)));
|
case BlobType::BT_BUF_SHARED: {
|
||||||
|
m_memObject = eng->share_buffer(m_layout, m_mem);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
case BlobType::BT_SURF_SHARED:
|
case BlobType::BT_SURF_SHARED: {
|
||||||
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_mem, m_plane)));
|
m_memObject = eng->share_surface(m_layout, m_mem, m_plane);
|
||||||
break;
|
break;
|
||||||
case BlobType::BT_DX_BUF_SHARED:
|
}
|
||||||
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_dx_buffer(*eng, m_layout, m_mem)));
|
case BlobType::BT_DX_BUF_SHARED: {
|
||||||
|
m_memObject = eng->share_dx_buffer(m_layout, m_mem);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
case BlobType::BT_SURF_SHARED:
|
case BlobType::BT_SURF_SHARED: {
|
||||||
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_surf, m_plane)));
|
m_memObject = eng->share_surface(m_layout, m_surf, m_plane);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
case BlobType::BT_IMG_SHARED:
|
case BlobType::BT_IMG_SHARED: {
|
||||||
m_memObject = std::unique_ptr<cldnn::memory>(new cldnn::memory(cldnn::memory::share_image(*eng, m_layout, m_mem)));
|
m_memObject = eng->share_image(m_layout, m_mem);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
m_memObject = nullptr;
|
m_memObject.reset();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -165,7 +147,7 @@ std::shared_ptr<RemoteContext> CLDNNRemoteBlobImpl::getContext() const noexcept
|
|||||||
}
|
}
|
||||||
|
|
||||||
void CLDNNRemoteBlobImpl::lock() const {
|
void CLDNNRemoteBlobImpl::lock() const {
|
||||||
lockedHolder = std::unique_ptr<cldnn::pointer<uint8_t>>(new cldnn::pointer<uint8_t>(m_memObject->pointer<uint8_t>()));
|
lockedHolder = std::unique_ptr<cldnn::mem_lock<uint8_t>>(new cldnn::mem_lock<uint8_t>(m_memObject, m_stream));
|
||||||
auto ptr = lockedHolder->data();
|
auto ptr = lockedHolder->data();
|
||||||
_handle = reinterpret_cast<void*>(ptr);
|
_handle = reinterpret_cast<void*>(ptr);
|
||||||
m_allocator.regLockedBlob(_handle, this);
|
m_allocator.regLockedBlob(_handle, this);
|
||||||
@ -244,7 +226,11 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cldnn::device_query device_query(_context_id, _va_device);
|
// TODO: Parameterize this based on plugin config and compilation options
|
||||||
|
auto engine_type = cldnn::engine_types::ocl;
|
||||||
|
auto runtime_type = cldnn::runtime_types::ocl;
|
||||||
|
// Use actual runtime and engine types
|
||||||
|
cldnn::device_query device_query(engine_type, runtime_type, _context_id, _va_device);
|
||||||
auto device_map = device_query.get_available_devices();
|
auto device_map = device_query.get_available_devices();
|
||||||
|
|
||||||
auto iter = device_map.find(m_config.device_id);
|
auto iter = device_map.find(m_config.device_id);
|
||||||
@ -252,28 +238,25 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
|
|||||||
|
|
||||||
{
|
{
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecutionContextImpl::Create");
|
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecutionContextImpl::Create");
|
||||||
m_engine = std::make_shared<cldnn::engine>(dev,
|
bool enable_profiling = (m_config.useProfiling ||
|
||||||
cldnn::engine_configuration((m_config.useProfiling ||
|
|
||||||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_tune_and_cache) ||
|
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_tune_and_cache) ||
|
||||||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_retune_and_cache)),
|
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_retune_and_cache));
|
||||||
false,
|
cldnn::queue_types queue_type = cldnn::queue_types::out_of_order;
|
||||||
m_config.dumpCustomKernels,
|
bool use_unified_shared_memory = true;
|
||||||
std::string(),
|
m_engine = cldnn::engine::create(engine_type, runtime_type, dev, cldnn::engine_configuration(enable_profiling,
|
||||||
std::string(),
|
queue_type,
|
||||||
true,
|
|
||||||
std::string(),
|
|
||||||
m_config.sources_dumps_dir,
|
m_config.sources_dumps_dir,
|
||||||
m_config.queuePriority,
|
m_config.queuePriority,
|
||||||
m_config.queueThrottle,
|
m_config.queueThrottle,
|
||||||
m_config.memory_pool_on,
|
m_config.memory_pool_on,
|
||||||
m_config.throughput_streams,
|
use_unified_shared_memory,
|
||||||
m_config.kernels_cache_dir,
|
m_config.kernels_cache_dir,
|
||||||
m_config.n_threads));
|
m_config.n_threads));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ParamMap CLDNNExecutionContextImpl::getParams() const {
|
ParamMap CLDNNExecutionContextImpl::getParams() const {
|
||||||
ParamMap ret = { { GPU_PARAM_KEY(OCL_CONTEXT), m_engine->get_context() } };
|
ParamMap ret = { { GPU_PARAM_KEY(OCL_CONTEXT), m_engine->get_user_context() } };
|
||||||
|
|
||||||
switch (m_type) {
|
switch (m_type) {
|
||||||
case OCL:
|
case OCL:
|
||||||
|
@ -4,15 +4,11 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <string>
|
#include <cldnn/runtime/memory.hpp>
|
||||||
#include <map>
|
#include <cldnn/runtime/engine.hpp>
|
||||||
#include <memory>
|
|
||||||
#include <atomic>
|
|
||||||
#include <ie_parameter.hpp>
|
#include <ie_parameter.hpp>
|
||||||
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
||||||
#include "cldnn_config.h"
|
#include "cldnn_config.h"
|
||||||
#include <api/memory.hpp>
|
|
||||||
#include <api/engine.hpp>
|
|
||||||
#include "cldnn_common_utils.h"
|
#include "cldnn_common_utils.h"
|
||||||
|
|
||||||
#ifndef NOMINMAX
|
#ifndef NOMINMAX
|
||||||
@ -25,6 +21,11 @@
|
|||||||
# include <gpu/gpu_context_api_va.hpp>
|
# include <gpu/gpu_context_api_va.hpp>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
class CLDNNRemoteAllocator;
|
class CLDNNRemoteAllocator;
|
||||||
|
|
||||||
@ -41,6 +42,7 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
explicit CLDNNRemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context,
|
explicit CLDNNRemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context,
|
||||||
|
cldnn::stream& stream,
|
||||||
const cldnn::layout& layout,
|
const cldnn::layout& layout,
|
||||||
cldnn::shared_handle mem,
|
cldnn::shared_handle mem,
|
||||||
cldnn::shared_surface surf,
|
cldnn::shared_surface surf,
|
||||||
@ -63,11 +65,12 @@ public:
|
|||||||
bool is_allocated() const noexcept;
|
bool is_allocated() const noexcept;
|
||||||
bool is_locked() const noexcept;
|
bool is_locked() const noexcept;
|
||||||
void allocate_if_needed();
|
void allocate_if_needed();
|
||||||
cldnn::memory& getMemory() { return *m_memObject; }
|
cldnn::memory::ptr getMemory() { return m_memObject; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
static CLDNNRemoteAllocator m_allocator;
|
static CLDNNRemoteAllocator m_allocator;
|
||||||
std::weak_ptr<InferenceEngine::gpu::ClContext> m_context;
|
std::weak_ptr<InferenceEngine::gpu::ClContext> m_context;
|
||||||
|
cldnn::stream& m_stream;
|
||||||
|
|
||||||
// constructor stuff
|
// constructor stuff
|
||||||
cldnn::shared_handle m_mem;
|
cldnn::shared_handle m_mem;
|
||||||
@ -77,9 +80,9 @@ protected:
|
|||||||
cldnn::layout m_layout;
|
cldnn::layout m_layout;
|
||||||
BlobType m_mem_type;
|
BlobType m_mem_type;
|
||||||
|
|
||||||
std::unique_ptr<cldnn::memory> m_memObject;
|
cldnn::memory::ptr m_memObject;
|
||||||
|
|
||||||
mutable std::unique_ptr<cldnn::pointer<uint8_t>> lockedHolder;
|
mutable std::unique_ptr<cldnn::mem_lock<uint8_t>> lockedHolder;
|
||||||
mutable void* _handle;
|
mutable void* _handle;
|
||||||
mutable std::shared_ptr<InferenceEngine::IAllocator> _allocator;
|
mutable std::shared_ptr<InferenceEngine::IAllocator> _allocator;
|
||||||
|
|
||||||
@ -93,13 +96,14 @@ public:
|
|||||||
using Ptr = std::shared_ptr<typedCLDNNRemoteBlob>;
|
using Ptr = std::shared_ptr<typedCLDNNRemoteBlob>;
|
||||||
|
|
||||||
explicit typedCLDNNRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context,
|
explicit typedCLDNNRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context,
|
||||||
|
cldnn::stream& stream,
|
||||||
const InferenceEngine::TensorDesc& desc,
|
const InferenceEngine::TensorDesc& desc,
|
||||||
const cldnn::layout& layout,
|
const cldnn::layout& layout,
|
||||||
cldnn::shared_handle mem,
|
cldnn::shared_handle mem,
|
||||||
cldnn::shared_surface surf,
|
cldnn::shared_surface surf,
|
||||||
uint32_t plane,
|
uint32_t plane,
|
||||||
CLDNNRemoteBlobImpl::BlobType mem_type)
|
CLDNNRemoteBlobImpl::BlobType mem_type)
|
||||||
: _impl(context, layout, mem, surf, plane, mem_type)
|
: _impl(context, stream, layout, mem, surf, plane, mem_type)
|
||||||
, TpublicAPI(desc) {}
|
, TpublicAPI(desc) {}
|
||||||
|
|
||||||
void allocate() noexcept override { _impl.allocate(); }
|
void allocate() noexcept override { _impl.allocate(); }
|
||||||
@ -231,6 +235,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
// TODO: refactor to unique_ptr
|
||||||
std::shared_ptr<cldnn::engine> m_engine;
|
std::shared_ptr<cldnn::engine> m_engine;
|
||||||
InferenceEngine::gpu_handle_param m_va_display;
|
InferenceEngine::gpu_handle_param m_va_display;
|
||||||
Config m_config;
|
Config m_config;
|
||||||
@ -267,6 +272,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
|
|||||||
using namespace InferenceEngine;
|
using namespace InferenceEngine;
|
||||||
using InferenceEngine::gpu::details::param_map_obj_getter;
|
using InferenceEngine::gpu::details::param_map_obj_getter;
|
||||||
InferenceEngine::RemoteBlob::Ptr ret = nullptr;
|
InferenceEngine::RemoteBlob::Ptr ret = nullptr;
|
||||||
|
auto& stream = _impl.GetEngine()->get_program_stream();
|
||||||
uint32_t plane = param_map_obj_getter::_ObjFromParamSimple<uint32_t>(params, GPU_PARAM_KEY(VA_PLANE));
|
uint32_t plane = param_map_obj_getter::_ObjFromParamSimple<uint32_t>(params, GPU_PARAM_KEY(VA_PLANE));
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
cldnn::shared_handle mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
|
cldnn::shared_handle mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
|
||||||
@ -290,11 +296,11 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
|
|||||||
std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>
|
std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>
|
||||||
(std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this());
|
(std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this());
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
ret = std::make_shared<CLDNNRemoteD3DSurface>(smart_this,
|
ret = std::make_shared<CLDNNRemoteD3DSurface>(smart_this, stream,
|
||||||
tensorDesc, layout, mem, 0, plane,
|
tensorDesc, layout, mem, 0, plane,
|
||||||
CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED);
|
CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED);
|
||||||
#else
|
#else
|
||||||
ret = std::make_shared<CLDNNRemoteVASurface>(smart_this,
|
ret = std::make_shared<CLDNNRemoteVASurface>(smart_this, stream,
|
||||||
tensorDesc, layout, nullptr, surf, plane,
|
tensorDesc, layout, nullptr, surf, plane,
|
||||||
CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED);
|
CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED);
|
||||||
#endif
|
#endif
|
||||||
@ -311,6 +317,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
|
|||||||
InferenceEngine::RemoteBlob::Ptr ret = nullptr;
|
InferenceEngine::RemoteBlob::Ptr ret = nullptr;
|
||||||
|
|
||||||
_impl.acquire_lock();
|
_impl.acquire_lock();
|
||||||
|
auto& stream = _impl.GetEngine()->get_program_stream();
|
||||||
|
|
||||||
// try to locate previously shared object
|
// try to locate previously shared object
|
||||||
auto itr = shared_obj_reg.find(mem);
|
auto itr = shared_obj_reg.find(mem);
|
||||||
@ -327,15 +334,15 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
|
|||||||
|
|
||||||
switch (blob_type) {
|
switch (blob_type) {
|
||||||
case CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED:
|
case CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED:
|
||||||
ret = std::make_shared<CLDNNRemoteCLbuffer>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type);
|
ret = std::make_shared<CLDNNRemoteCLbuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
|
||||||
break;
|
break;
|
||||||
case CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED:
|
case CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED:
|
||||||
layout.format = ImageFormatFromLayout(tensorDesc.getLayout());
|
layout.format = ImageFormatFromLayout(tensorDesc.getLayout());
|
||||||
ret = std::make_shared<CLDNNRemoteCLImage2D>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type);
|
ret = std::make_shared<CLDNNRemoteCLImage2D>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
|
||||||
break;
|
break;
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
case CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED:
|
case CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED:
|
||||||
ret = std::make_shared<CLDNNRemoteD3DBuffer>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type);
|
ret = std::make_shared<CLDNNRemoteD3DBuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
default:
|
default:
|
||||||
@ -354,7 +361,9 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
|
|||||||
CldnnTensorFromIEDims(tensorDesc.getDims()));
|
CldnnTensorFromIEDims(tensorDesc.getDims()));
|
||||||
auto smart_this = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>
|
auto smart_this = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>
|
||||||
(std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this());
|
(std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this());
|
||||||
|
auto& stream = _impl.GetEngine()->get_program_stream();
|
||||||
return std::make_shared<CLDNNRemoteCLbuffer>(smart_this,
|
return std::make_shared<CLDNNRemoteCLbuffer>(smart_this,
|
||||||
|
stream,
|
||||||
tensorDesc,
|
tensorDesc,
|
||||||
layout,
|
layout,
|
||||||
nullptr, 0, 0,
|
nullptr, 0, 0,
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include "ngraph/op/batch_to_space.hpp"
|
#include "ngraph/op/batch_to_space.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/batch_to_space.hpp"
|
#include "cldnn/primitives/batch_to_space.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,9 +8,9 @@
|
|||||||
#include "ngraph/op/broadcast.hpp"
|
#include "ngraph/op/broadcast.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/broadcast.hpp"
|
#include "cldnn/primitives/broadcast.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
#include "api/reshape.hpp"
|
#include "cldnn/primitives/reshape.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/concat.hpp"
|
#include "ngraph/op/concat.hpp"
|
||||||
|
|
||||||
#include "api/concatenation.hpp"
|
#include "cldnn/primitives/concatenation.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
#include "ngraph/op/variadic_split.hpp"
|
#include "ngraph/op/variadic_split.hpp"
|
||||||
#include "ngraph/op/util/op_types.hpp"
|
#include "ngraph/op/util/op_types.hpp"
|
||||||
|
|
||||||
#include "api/data.hpp"
|
#include "cldnn/primitives/data.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
@ -169,9 +169,10 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
|
|||||||
if (bufIter != p.blobMemCache.end()) {
|
if (bufIter != p.blobMemCache.end()) {
|
||||||
constPrimID = bufIter->second;
|
constPrimID = bufIter->second;
|
||||||
} else {
|
} else {
|
||||||
auto mem = cldnn::memory::allocate(p.GetEngine(), constLayout, 0, false);
|
cldnn::memory::ptr mem = p.GetEngine().allocate_memory(constLayout, false);
|
||||||
auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor
|
auto& stream = p.GetEngine().get_program_stream();
|
||||||
auto buf = tmpPointer.data();
|
cldnn::mem_lock<char> lock{mem, stream};
|
||||||
|
auto buf = lock.data();
|
||||||
auto bufSize = constLayout.bytes_count();
|
auto bufSize = constLayout.bytes_count();
|
||||||
|
|
||||||
// Do actual weights reorder and change O and I channels order
|
// Do actual weights reorder and change O and I channels order
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include "ngraph/op/convert.hpp"
|
#include "ngraph/op/convert.hpp"
|
||||||
#include "ngraph/op/convert_like.hpp"
|
#include "ngraph/op/convert_like.hpp"
|
||||||
|
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -13,11 +13,11 @@
|
|||||||
#include "ngraph/op/fake_quantize.hpp"
|
#include "ngraph/op/fake_quantize.hpp"
|
||||||
#include "ngraph/op/util/op_types.hpp"
|
#include "ngraph/op/util/op_types.hpp"
|
||||||
|
|
||||||
#include "api/convolution.hpp"
|
#include "cldnn/primitives/convolution.hpp"
|
||||||
#include "api/deconvolution.hpp"
|
#include "cldnn/primitives/deconvolution.hpp"
|
||||||
#include "api/binary_convolution.hpp"
|
#include "cldnn/primitives/binary_convolution.hpp"
|
||||||
#include "api/permute.hpp"
|
#include "cldnn/primitives/permute.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,9 +8,9 @@
|
|||||||
#include "ngraph/op/ctc_greedy_decoder.hpp"
|
#include "ngraph/op/ctc_greedy_decoder.hpp"
|
||||||
#include "ngraph/op/ctc_greedy_decoder_seq_len.hpp"
|
#include "ngraph/op/ctc_greedy_decoder_seq_len.hpp"
|
||||||
|
|
||||||
#include "api/ctc_greedy_decoder.hpp"
|
#include "cldnn/primitives/ctc_greedy_decoder.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
#include "api/mutable_data.hpp"
|
#include "cldnn/primitives/mutable_data.hpp"
|
||||||
|
|
||||||
#include "transformations/utils/utils.hpp"
|
#include "transformations/utils/utils.hpp"
|
||||||
|
|
||||||
@ -58,7 +58,7 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::No
|
|||||||
|
|
||||||
std::size_t num_output = op->get_output_size();
|
std::size_t num_output = op->get_output_size();
|
||||||
|
|
||||||
std::vector<cldnn::memory> shared_memory;
|
std::vector<cldnn::memory::ptr> shared_memory;
|
||||||
if (num_output == 2) {
|
if (num_output == 2) {
|
||||||
auto mutable_precision = op->get_output_element_type(1);
|
auto mutable_precision = op->get_output_element_type(1);
|
||||||
if (mutable_precision == ngraph::element::i64) {
|
if (mutable_precision == ngraph::element::i64) {
|
||||||
@ -70,7 +70,7 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::No
|
|||||||
DefaultFormatForDims(op->get_output_shape(1).size()),
|
DefaultFormatForDims(op->get_output_shape(1).size()),
|
||||||
CldnnTensorFromIEDims(op->get_output_shape(1)));
|
CldnnTensorFromIEDims(op->get_output_shape(1)));
|
||||||
|
|
||||||
shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayout));
|
shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayout));
|
||||||
|
|
||||||
cldnn::primitive_id ctc_gd_mutable_id_w = layer_type_name_ID(op) + "_md_write";
|
cldnn::primitive_id ctc_gd_mutable_id_w = layer_type_name_ID(op) + "_md_write";
|
||||||
auto ctc_gd_mutable_prim = cldnn::mutable_data(ctc_gd_mutable_id_w, shared_memory[0]);
|
auto ctc_gd_mutable_prim = cldnn::mutable_data(ctc_gd_mutable_id_w, shared_memory[0]);
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include "ngraph/op/cum_sum.hpp"
|
#include "ngraph/op/cum_sum.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/cum_sum.hpp"
|
#include "cldnn/primitives/cum_sum.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -9,8 +9,8 @@
|
|||||||
#include "ngraph/attribute_visitor.hpp"
|
#include "ngraph/attribute_visitor.hpp"
|
||||||
#include "ngraph/node.hpp"
|
#include "ngraph/node.hpp"
|
||||||
|
|
||||||
#include "api/custom_gpu_primitive.hpp"
|
#include "cldnn/primitives/custom_gpu_primitive.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/depth_to_space.hpp"
|
#include "ngraph/op/depth_to_space.hpp"
|
||||||
|
|
||||||
#include "api/depth_to_space.hpp"
|
#include "cldnn/primitives/depth_to_space.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/detection_output.hpp"
|
#include "ngraph/op/detection_output.hpp"
|
||||||
|
|
||||||
#include "api/detection_output.hpp"
|
#include "cldnn/primitives/detection_output.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -25,10 +25,10 @@
|
|||||||
#include "ngraph/op/power.hpp"
|
#include "ngraph/op/power.hpp"
|
||||||
#include "ngraph/op/floor_mod.hpp"
|
#include "ngraph/op/floor_mod.hpp"
|
||||||
|
|
||||||
#include "api/activation.hpp"
|
#include "cldnn/primitives/activation.hpp"
|
||||||
#include "api/eltwise.hpp"
|
#include "cldnn/primitives/eltwise.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
#include "api/reshape.hpp"
|
#include "cldnn/primitives/reshape.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -9,8 +9,8 @@
|
|||||||
#include "ngraph/op/embeddingbag_offsets_sum.hpp"
|
#include "ngraph/op/embeddingbag_offsets_sum.hpp"
|
||||||
#include "ngraph/op/embeddingbag_packedsum.hpp"
|
#include "ngraph/op/embeddingbag_packedsum.hpp"
|
||||||
|
|
||||||
#include "api/embedding_bag.hpp"
|
#include "cldnn/primitives/embedding_bag.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
|
|
||||||
#include "transformations/utils/utils.hpp"
|
#include "transformations/utils/utils.hpp"
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/extractimagepatches.hpp"
|
#include "ngraph/op/extractimagepatches.hpp"
|
||||||
|
|
||||||
#include "api/extract_image_patches.hpp"
|
#include "cldnn/primitives/extract_image_patches.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/fake_quantize.hpp"
|
#include "ngraph/op/fake_quantize.hpp"
|
||||||
|
|
||||||
#include "api/quantize.hpp"
|
#include "cldnn/primitives/quantize.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,8 +7,8 @@
|
|||||||
|
|
||||||
#include "ngraph/op/gather_tree.hpp"
|
#include "ngraph/op/gather_tree.hpp"
|
||||||
|
|
||||||
#include "api/gather_tree.hpp"
|
#include "cldnn/primitives/gather_tree.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,8 +7,8 @@
|
|||||||
|
|
||||||
#include "ngraph/op/gather.hpp"
|
#include "ngraph/op/gather.hpp"
|
||||||
|
|
||||||
#include "api/gather.hpp"
|
#include "cldnn/primitives/gather.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include "ngraph/op/gather_nd.hpp"
|
#include "ngraph/op/gather_nd.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/gather_nd.hpp"
|
#include "cldnn/primitives/gather_nd.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/grn.hpp"
|
#include "ngraph/op/grn.hpp"
|
||||||
|
|
||||||
#include "api/grn.hpp"
|
#include "cldnn/primitives/grn.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
#include "ngraph/op/interpolate.hpp"
|
#include "ngraph/op/interpolate.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/resample.hpp"
|
#include "cldnn/primitives/resample.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include "ngraph/op/lrn.hpp"
|
#include "ngraph/op/lrn.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/lrn.hpp"
|
#include "cldnn/primitives/lrn.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -9,11 +9,11 @@
|
|||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
#include "ngraph/op/fake_quantize.hpp"
|
#include "ngraph/op/fake_quantize.hpp"
|
||||||
|
|
||||||
#include "api/gemm.hpp"
|
#include "cldnn/primitives/gemm.hpp"
|
||||||
#include "api/fully_connected.hpp"
|
#include "cldnn/primitives/fully_connected.hpp"
|
||||||
#include "api/reshape.hpp"
|
#include "cldnn/primitives/reshape.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
#include "api/permute.hpp"
|
#include "cldnn/primitives/permute.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,7 +8,8 @@
|
|||||||
#include "ngraph/op/mvn.hpp"
|
#include "ngraph/op/mvn.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/mvn.hpp"
|
#include "cldnn/primitives/mvn.hpp"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
@ -9,9 +9,9 @@
|
|||||||
#include <ngraph/opsets/opset3.hpp>
|
#include <ngraph/opsets/opset3.hpp>
|
||||||
#include <ngraph_ops/nms_ie_internal.hpp>
|
#include <ngraph_ops/nms_ie_internal.hpp>
|
||||||
|
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
#include "api/mutable_data.hpp"
|
#include "cldnn/primitives/mutable_data.hpp"
|
||||||
#include "api/non_max_suppression.hpp"
|
#include "cldnn/primitives/non_max_suppression.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
@ -62,7 +62,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
|
|||||||
|
|
||||||
std::size_t num_output = op->get_output_size();
|
std::size_t num_output = op->get_output_size();
|
||||||
|
|
||||||
std::vector<cldnn::memory> shared_memory;
|
std::vector<cldnn::memory::ptr> shared_memory;
|
||||||
switch (num_output) {
|
switch (num_output) {
|
||||||
case 3: {
|
case 3: {
|
||||||
auto mutable_precision_second = op->get_output_element_type(2);
|
auto mutable_precision_second = op->get_output_element_type(2);
|
||||||
@ -74,7 +74,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
|
|||||||
DefaultFormatForDims(op->get_output_shape(2).size()),
|
DefaultFormatForDims(op->get_output_shape(2).size()),
|
||||||
CldnnTensorFromIEDims(op->get_output_shape(2)));
|
CldnnTensorFromIEDims(op->get_output_shape(2)));
|
||||||
|
|
||||||
shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayoutSecond));
|
shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutSecond));
|
||||||
|
|
||||||
cldnn::primitive_id non_max_supression_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second";
|
cldnn::primitive_id non_max_supression_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second";
|
||||||
auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second, shared_memory.back());
|
auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second, shared_memory.back());
|
||||||
@ -91,7 +91,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
|
|||||||
cldnn::format::bfyx,
|
cldnn::format::bfyx,
|
||||||
cldnn::tensor(outputIndices, 3, 1, 1));
|
cldnn::tensor(outputIndices, 3, 1, 1));
|
||||||
|
|
||||||
shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayoutFirst));
|
shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutFirst));
|
||||||
|
|
||||||
cldnn::primitive_id non_max_supression_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first";
|
cldnn::primitive_id non_max_supression_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first";
|
||||||
auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first, shared_memory.back());
|
auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first, shared_memory.back());
|
||||||
|
@ -8,8 +8,8 @@
|
|||||||
#include "ngraph/op/normalize_l2.hpp"
|
#include "ngraph/op/normalize_l2.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/normalize.hpp"
|
#include "cldnn/primitives/normalize.hpp"
|
||||||
#include "api/data.hpp"
|
#include "cldnn/primitives/data.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
@ -35,8 +35,8 @@ void CreateNormalizeL2Op(Program& p, const std::shared_ptr<ngraph::op::v0::Norma
|
|||||||
// We create fake scale constant and fill it with ones to keep the same behavior as current primitive
|
// We create fake scale constant and fill it with ones to keep the same behavior as current primitive
|
||||||
auto scale = std::make_shared<ngraph::op::v0::Constant>(op->get_output_element_type(0), ngraph::Shape{1}, std::vector<float>{1.0});
|
auto scale = std::make_shared<ngraph::op::v0::Constant>(op->get_output_element_type(0), ngraph::Shape{1}, std::vector<float>{1.0});
|
||||||
cldnn::layout constLayout = cldnn::layout(DataTypeFromPrecision(op->get_output_element_type(0)), cldnn::format::bfyx, cldnn::tensor{1});
|
cldnn::layout constLayout = cldnn::layout(DataTypeFromPrecision(op->get_output_element_type(0)), cldnn::format::bfyx, cldnn::tensor{1});
|
||||||
auto mem = cldnn::memory::allocate(p.GetEngine(), constLayout, 0, false);
|
auto mem = p.GetEngine().allocate_memory(constLayout, false);
|
||||||
auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor
|
cldnn::mem_lock<int8_t> tmpPointer{mem, p.GetEngine().get_program_stream()};
|
||||||
auto buf = tmpPointer.data();
|
auto buf = tmpPointer.data();
|
||||||
auto bufSize = scale->get_output_tensor(0).size();
|
auto bufSize = scale->get_output_tensor(0).size();
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/one_hot.hpp"
|
#include "ngraph/op/one_hot.hpp"
|
||||||
|
|
||||||
#include "api/one_hot.hpp"
|
#include "cldnn/primitives/one_hot.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/pad.hpp"
|
#include "ngraph/op/pad.hpp"
|
||||||
|
|
||||||
#include "api/border.hpp"
|
#include "cldnn/primitives/border.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,10 +7,10 @@
|
|||||||
|
|
||||||
#include "ngraph/op/parameter.hpp"
|
#include "ngraph/op/parameter.hpp"
|
||||||
|
|
||||||
#include "api/input_layout.hpp"
|
#include "cldnn/primitives/input_layout.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
#include "api/data.hpp"
|
#include "cldnn/primitives/data.hpp"
|
||||||
#include "api/concatenation.hpp"
|
#include "cldnn/primitives/concatenation.hpp"
|
||||||
|
|
||||||
using namespace InferenceEngine;
|
using namespace InferenceEngine;
|
||||||
|
|
||||||
@ -158,8 +158,8 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
|
|||||||
if (bufIter != p.blobMemCache.end()) {
|
if (bufIter != p.blobMemCache.end()) {
|
||||||
meanBlobID = bufIter->second;
|
meanBlobID = bufIter->second;
|
||||||
} else {
|
} else {
|
||||||
auto mem = cldnn::memory::allocate(p.GetEngine(), meanBlobLayout, 0, false);
|
auto mem = p.GetEngine().allocate_memory(meanBlobLayout, false);
|
||||||
auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor
|
cldnn::mem_lock<int8_t> tmpPointer{ mem, p.GetEngine().get_program_stream() };
|
||||||
auto buf = tmpPointer.data();
|
auto buf = tmpPointer.data();
|
||||||
auto bufSize = meanBlobLayout.bytes_count();
|
auto bufSize = meanBlobLayout.bytes_count();
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include "ngraph/op/max_pool.hpp"
|
#include "ngraph/op/max_pool.hpp"
|
||||||
#include "ngraph/op/avg_pool.hpp"
|
#include "ngraph/op/avg_pool.hpp"
|
||||||
|
|
||||||
#include "api/pooling.hpp"
|
#include "cldnn/primitives/pooling.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include "ngraph/op/prior_box.hpp"
|
#include "ngraph/op/prior_box.hpp"
|
||||||
#include "ngraph/op/prior_box_clustered.hpp"
|
#include "ngraph/op/prior_box_clustered.hpp"
|
||||||
|
|
||||||
#include "api/prior_box.hpp"
|
#include "cldnn/primitives/prior_box.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,8 +7,8 @@
|
|||||||
|
|
||||||
#include "ngraph/op/proposal.hpp"
|
#include "ngraph/op/proposal.hpp"
|
||||||
|
|
||||||
#include "api/proposal.hpp"
|
#include "cldnn/primitives/proposal.hpp"
|
||||||
#include "api/mutable_data.hpp"
|
#include "cldnn/primitives/mutable_data.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
@ -62,7 +62,7 @@ void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::Proposal
|
|||||||
DefaultFormatForDims(op->get_output_shape(1).size()),
|
DefaultFormatForDims(op->get_output_shape(1).size()),
|
||||||
CldnnTensorFromIEDims(op->get_output_shape(1)));
|
CldnnTensorFromIEDims(op->get_output_shape(1)));
|
||||||
|
|
||||||
auto shared_memory = cldnn::memory::allocate(p.GetEngine(), mutableLayout);
|
auto shared_memory = p.GetEngine().allocate_memory(mutableLayout);
|
||||||
|
|
||||||
cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(op) + "_md_write";
|
cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(op) + "_md_write";
|
||||||
auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w, shared_memory);
|
auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w, shared_memory);
|
||||||
|
@ -16,9 +16,9 @@
|
|||||||
#include "ngraph/op/max.hpp"
|
#include "ngraph/op/max.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/reduce.hpp"
|
#include "cldnn/primitives/reduce.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
#include "api/reshape.hpp"
|
#include "cldnn/primitives/reshape.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/region_yolo.hpp"
|
#include "ngraph/op/region_yolo.hpp"
|
||||||
|
|
||||||
#include "api/region_yolo.hpp"
|
#include "cldnn/primitives/region_yolo.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/reorg_yolo.hpp"
|
#include "ngraph/op/reorg_yolo.hpp"
|
||||||
|
|
||||||
#include "api/reorg_yolo.hpp"
|
#include "cldnn/primitives/reorg_yolo.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -9,8 +9,8 @@
|
|||||||
#include "ngraph/op/squeeze.hpp"
|
#include "ngraph/op/squeeze.hpp"
|
||||||
#include "ngraph/op/unsqueeze.hpp"
|
#include "ngraph/op/unsqueeze.hpp"
|
||||||
|
|
||||||
#include "api/reshape.hpp"
|
#include "cldnn/primitives/reshape.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/result.hpp"
|
#include "ngraph/op/result.hpp"
|
||||||
|
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
|
|
||||||
using namespace InferenceEngine;
|
using namespace InferenceEngine;
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/reverse_sequence.hpp"
|
#include "ngraph/op/reverse_sequence.hpp"
|
||||||
|
|
||||||
#include "api/reverse_sequence.hpp"
|
#include "cldnn/primitives/reverse_sequence.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,12 +8,12 @@
|
|||||||
#include "ngraph/op/lstm_cell.hpp"
|
#include "ngraph/op/lstm_cell.hpp"
|
||||||
#include "ngraph/op/lstm_sequence.hpp"
|
#include "ngraph/op/lstm_sequence.hpp"
|
||||||
|
|
||||||
#include "api/reshape.hpp"
|
#include "cldnn/primitives/reshape.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
#include "api/fully_connected.hpp"
|
#include "cldnn/primitives/fully_connected.hpp"
|
||||||
#include "api/lstm.hpp"
|
#include "cldnn/primitives/lstm.hpp"
|
||||||
#include "api/crop.hpp"
|
#include "cldnn/primitives/crop.hpp"
|
||||||
#include "api/concatenation.hpp"
|
#include "cldnn/primitives/concatenation.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
cldnn::activation_func GetActivationFunc(std::string name) {
|
cldnn::activation_func GetActivationFunc(std::string name) {
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
#include "ngraph/op/psroi_pooling.hpp"
|
#include "ngraph/op/psroi_pooling.hpp"
|
||||||
#include "ngraph/op/deformable_psroi_pooling.hpp"
|
#include "ngraph/op/deformable_psroi_pooling.hpp"
|
||||||
|
|
||||||
#include "api/roi_pooling.hpp"
|
#include "cldnn/primitives/roi_pooling.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include "ngraph/op/scatter_elements_update.hpp"
|
#include "ngraph/op/scatter_elements_update.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/scatter_elements_update.hpp"
|
#include "cldnn/primitives/scatter_elements_update.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include "ngraph/op/scatter_nd_update.hpp"
|
#include "ngraph/op/scatter_nd_update.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/scatter_nd_update.hpp"
|
#include "cldnn/primitives/scatter_nd_update.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include "ngraph/op/scatter_update.hpp"
|
#include "ngraph/op/scatter_update.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/scatter_update.hpp"
|
#include "cldnn/primitives/scatter_update.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,9 +7,9 @@
|
|||||||
|
|
||||||
#include "ngraph/op/select.hpp"
|
#include "ngraph/op/select.hpp"
|
||||||
|
|
||||||
#include "api/select.hpp"
|
#include "cldnn/primitives/select.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
#include "api/reshape.hpp"
|
#include "cldnn/primitives/reshape.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/shuffle_channels.hpp"
|
#include "ngraph/op/shuffle_channels.hpp"
|
||||||
|
|
||||||
#include "api/shuffle_channels.hpp"
|
#include "cldnn/primitives/shuffle_channels.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,8 +8,8 @@
|
|||||||
#include "ngraph/op/softmax.hpp"
|
#include "ngraph/op/softmax.hpp"
|
||||||
#include "ngraph/op/log_softmax.hpp"
|
#include "ngraph/op/log_softmax.hpp"
|
||||||
|
|
||||||
#include "api/softmax.hpp"
|
#include "cldnn/primitives/softmax.hpp"
|
||||||
#include "api/activation.hpp"
|
#include "cldnn/primitives/activation.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include "ngraph/op/space_to_batch.hpp"
|
#include "ngraph/op/space_to_batch.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/space_to_batch.hpp"
|
#include "cldnn/primitives/space_to_batch.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/space_to_depth.hpp"
|
#include "ngraph/op/space_to_depth.hpp"
|
||||||
|
|
||||||
#include "api/space_to_depth.hpp"
|
#include "cldnn/primitives/space_to_depth.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include "ngraph/op/split.hpp"
|
#include "ngraph/op/split.hpp"
|
||||||
#include "ngraph/op/variadic_split.hpp"
|
#include "ngraph/op/variadic_split.hpp"
|
||||||
|
|
||||||
#include "api/crop.hpp"
|
#include "cldnn/primitives/crop.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -8,9 +8,9 @@
|
|||||||
#include "ngraph/op/strided_slice.hpp"
|
#include "ngraph/op/strided_slice.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/strided_slice.hpp"
|
#include "cldnn/primitives/strided_slice.hpp"
|
||||||
#include "api/reshape.hpp"
|
#include "cldnn/primitives/reshape.hpp"
|
||||||
#include "api/crop.hpp"
|
#include "cldnn/primitives/crop.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -13,11 +13,11 @@
|
|||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
#include "ngraph/op/util/sub_graph_base.hpp"
|
#include "ngraph/op/util/sub_graph_base.hpp"
|
||||||
|
|
||||||
#include "api/loop.hpp"
|
#include "cldnn/primitives/loop.hpp"
|
||||||
#include "api/mutable_data.hpp"
|
#include "cldnn/primitives/mutable_data.hpp"
|
||||||
#include "api/data.hpp"
|
#include "cldnn/primitives/data.hpp"
|
||||||
#include "api/reorder.hpp"
|
#include "cldnn/primitives/reorder.hpp"
|
||||||
#include "api/topology.hpp"
|
#include "cldnn/graph/topology.hpp"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@ -28,9 +28,8 @@ namespace CLDNNPlugin {
|
|||||||
|
|
||||||
template<class DATA_TYPE>
|
template<class DATA_TYPE>
|
||||||
static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) {
|
static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) {
|
||||||
auto mem = cldnn::memory::allocate(p.GetEngine(),
|
auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
|
||||||
{ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
|
cldnn::mem_lock<int64_t> ptr{mem, p.GetEngine().get_program_stream()};
|
||||||
auto ptr = mem.pointer<int64_t>();
|
|
||||||
*ptr.begin() = num;
|
*ptr.begin() = num;
|
||||||
return {id, mem};
|
return {id, mem};
|
||||||
}
|
}
|
||||||
@ -42,7 +41,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha
|
|||||||
const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size());
|
const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size());
|
||||||
const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx));
|
const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx));
|
||||||
cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
|
cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
|
||||||
auto mem = cldnn::memory::allocate(p.GetEngine(), output_layout);
|
auto mem = p.GetEngine().allocate_memory(output_layout);
|
||||||
auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency
|
auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency
|
||||||
return md;
|
return md;
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "ngraph/op/tile.hpp"
|
#include "ngraph/op/tile.hpp"
|
||||||
|
|
||||||
#include "api/tile.hpp"
|
#include "cldnn/primitives/tile.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -7,8 +7,8 @@
|
|||||||
|
|
||||||
#include "ngraph/op/topk.hpp"
|
#include "ngraph/op/topk.hpp"
|
||||||
|
|
||||||
#include "api/arg_max_min.hpp"
|
#include "cldnn/primitives/arg_max_min.hpp"
|
||||||
#include "api/mutable_data.hpp"
|
#include "cldnn/primitives/mutable_data.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
@ -71,7 +71,7 @@ void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>& op) {
|
|||||||
DefaultFormatForDims(op->get_output_shape(1).size()),
|
DefaultFormatForDims(op->get_output_shape(1).size()),
|
||||||
CldnnTensorFromIEDims(op->get_output_shape(1)));
|
CldnnTensorFromIEDims(op->get_output_shape(1)));
|
||||||
|
|
||||||
auto shared_memory = cldnn::memory::allocate(p.GetEngine(), mutableLayout);
|
auto shared_memory = p.GetEngine().allocate_memory(mutableLayout);
|
||||||
|
|
||||||
cldnn::primitive_id argmax_mutable_id_w = layer_type_name_ID(op) + "_md_write";
|
cldnn::primitive_id argmax_mutable_id_w = layer_type_name_ID(op) + "_md_write";
|
||||||
auto argmax_mutable_prim = cldnn::mutable_data(argmax_mutable_id_w, shared_memory);
|
auto argmax_mutable_prim = cldnn::mutable_data(argmax_mutable_id_w, shared_memory);
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include "ngraph/op/transpose.hpp"
|
#include "ngraph/op/transpose.hpp"
|
||||||
#include "ngraph/op/constant.hpp"
|
#include "ngraph/op/constant.hpp"
|
||||||
|
|
||||||
#include "api/permute.hpp"
|
#include "cldnn/primitives/permute.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@
|
|||||||
#include "ngraph/op/hsigmoid.hpp"
|
#include "ngraph/op/hsigmoid.hpp"
|
||||||
#include "ngraph/op/round.hpp"
|
#include "ngraph/op/round.hpp"
|
||||||
|
|
||||||
#include "api/activation.hpp"
|
#include "cldnn/primitives/activation.hpp"
|
||||||
|
|
||||||
namespace CLDNNPlugin {
|
namespace CLDNNPlugin {
|
||||||
|
|
||||||
|
@ -52,13 +52,6 @@ inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
if (!GNAPluginNS::LayerInfo(layer).isSyntheticScaleShift())
|
if (!GNAPluginNS::LayerInfo(layer).isSyntheticScaleShift())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Don't reshape the first dnn layer since it breaks groups recognition
|
|
||||||
auto prevLayer = InferenceEngine::CNNNetPrevLayerSkipCertain(layer, 0, [](InferenceEngine::CNNLayerPtr ptr) {
|
|
||||||
return LayerInfo(ptr).isNonValuesChangable();
|
|
||||||
});
|
|
||||||
IE_ASSERT(prevLayer != nullptr);
|
|
||||||
if (LayerInfo(prevLayer).isInput()) return false;
|
|
||||||
|
|
||||||
// Don't reshape diagonallayers with bias connection
|
// Don't reshape diagonallayers with bias connection
|
||||||
return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
|
return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
|
||||||
}
|
}
|
||||||
|
@ -85,8 +85,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
|
|||||||
return LayerInfo(ptr).isNonValuesChangable();
|
return LayerInfo(ptr).isNonValuesChangable();
|
||||||
});
|
});
|
||||||
IE_ASSERT(inputLayer != nullptr);
|
IE_ASSERT(inputLayer != nullptr);
|
||||||
size_t weightsSize = (LayerInfo(prevLayer).has32BOutput() || LayerInfo(inputLayer).isInput()) ?
|
size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() :
|
||||||
nextLayer->outData[0]->getDims().back() :
|
|
||||||
Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1];
|
Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1];
|
||||||
std::vector<float> weightsValues(weightsSize, fillValue);
|
std::vector<float> weightsValues(weightsSize, fillValue);
|
||||||
IE_ASSERT(diagLayer != nullptr);
|
IE_ASSERT(diagLayer != nullptr);
|
||||||
|
@ -42,7 +42,7 @@ static int32_t as_int32_t(T v) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class OstreamHashWrapper final: public std::streambuf {
|
class OstreamHashWrapper final: public std::streambuf {
|
||||||
std::size_t m_res = {};
|
std::size_t m_res = 0;
|
||||||
public:
|
public:
|
||||||
std::size_t getResult() const { return m_res; }
|
std::size_t getResult() const { return m_res; }
|
||||||
std::streamsize xsputn(const char* s, std::streamsize n) override {
|
std::streamsize xsputn(const char* s, std::streamsize n) override {
|
||||||
@ -65,7 +65,7 @@ public:
|
|||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
|
|
||||||
std::string NetworkCompilationContext::calculateFileInfo(const std::string& filePath) {
|
std::string NetworkCompilationContext::calculateFileInfo(const std::string& filePath) {
|
||||||
size_t seed {};
|
size_t seed = 0;
|
||||||
auto absPath = filePath;
|
auto absPath = filePath;
|
||||||
try {
|
try {
|
||||||
absPath = FileUtils::absoluteFilePath(filePath);
|
absPath = FileUtils::absoluteFilePath(filePath);
|
||||||
|
@ -270,6 +270,12 @@ template <typename T, typename... Args>
|
|||||||
std::shared_ptr<Node> fold_reshape(Args&&... args) {
|
std::shared_ptr<Node> fold_reshape(Args&&... args) {
|
||||||
std::shared_ptr<Node> node = std::make_shared<T>(std::forward<Args>(args)...);
|
std::shared_ptr<Node> node = std::make_shared<T>(std::forward<Args>(args)...);
|
||||||
if (node->get_output_size() == 1) {
|
if (node->get_output_size() == 1) {
|
||||||
|
// issue #57985: remove fold_reshape & reuse nGraph implementation
|
||||||
|
const auto values = as_type_ptr<opset1::Constant>(node->input_value(1).get_node_shared_ptr())->template cast_vector<int64_t>();
|
||||||
|
if (std::any_of(values.begin(), values.end(), [](const int64_t value) { return (value == 0) || (value == -1); })) {
|
||||||
|
return fold<opset1::Reshape>(std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
OutputVector folded;
|
OutputVector folded;
|
||||||
if (is_type<opset1::Constant>(node->input_value(0).get_node_shared_ptr()) &&
|
if (is_type<opset1::Constant>(node->input_value(0).get_node_shared_ptr()) &&
|
||||||
is_type<opset1::Constant>(node->input_value(1).get_node_shared_ptr())) {
|
is_type<opset1::Constant>(node->input_value(1).get_node_shared_ptr())) {
|
||||||
|
@ -683,7 +683,7 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
|
|||||||
auto levels_1 = fq->get_levels() - 1.f;
|
auto levels_1 = fq->get_levels() - 1.f;
|
||||||
|
|
||||||
const size_t DHW = D * H * W;
|
const size_t DHW = D * H * W;
|
||||||
const size_t IDHW = IC * D * H * W;
|
const size_t IDHW = outChannelsShapeIndex == 0 ? IC * D * H * W : OC * D * H * W;
|
||||||
|
|
||||||
const auto values = constant->cast_vector<float>();
|
const auto values = constant->cast_vector<float>();
|
||||||
std::vector<float> quantizedValues(OC * IC * D * H * W);
|
std::vector<float> quantizedValues(OC * IC * D * H * W);
|
||||||
|
@ -106,7 +106,6 @@ void jit_load_emitter::emit_isa(const Xbyak::Reg64 ®_src, int offset_byte, In
|
|||||||
break;
|
break;
|
||||||
case Precision::I32:
|
case Precision::I32:
|
||||||
if ((src_prc == Precision::FP32) || (src_prc == Precision::BF16)) {
|
if ((src_prc == Precision::FP32) || (src_prc == Precision::BF16)) {
|
||||||
h->uni_vroundps(Vmm(out_vec_idx), Vmm(out_vec_idx), 3); // rounding to zero
|
|
||||||
h->uni_vcvtps2dq(Vmm(out_vec_idx), Vmm(out_vec_idx));
|
h->uni_vcvtps2dq(Vmm(out_vec_idx), Vmm(out_vec_idx));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -511,6 +510,11 @@ size_t jit_store_emitter::aux_vecs_count() const {
|
|||||||
|
|
||||||
size_t jit_store_emitter::get_inputs_num() const { return 1; }
|
size_t jit_store_emitter::get_inputs_num() const { return 1; }
|
||||||
|
|
||||||
|
void jit_store_emitter::emit_data() const {
|
||||||
|
if (emu_vcvtneps2bf16)
|
||||||
|
emu_vcvtneps2bf16->emit_data();
|
||||||
|
}
|
||||||
|
|
||||||
void jit_store_emitter::emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
|
void jit_store_emitter::emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
|
||||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
|
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs,
|
||||||
const emitter_context *emit_context) const {
|
const emitter_context *emit_context) const {
|
||||||
@ -552,7 +556,6 @@ template <mkldnn::impl::cpu::x64::cpu_isa_t isa>
|
|||||||
switch (src_prc) {
|
switch (src_prc) {
|
||||||
case Precision::FP32:
|
case Precision::FP32:
|
||||||
if ((dst_prc != Precision::FP32) && (dst_prc != Precision::BF16)) {
|
if ((dst_prc != Precision::FP32) && (dst_prc != Precision::BF16)) {
|
||||||
h->uni_vroundps(Vmm(in_vec_idx), Vmm(in_vec_idx), 3); // rounding to zero
|
|
||||||
h->uni_vcvtps2dq(Vmm(in_vec_idx), Vmm(in_vec_idx));
|
h->uni_vcvtps2dq(Vmm(in_vec_idx), Vmm(in_vec_idx));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -18,8 +18,8 @@ struct load_emitter_context : public emitter_context {
|
|||||||
load_emitter_context() : src_prc_(Precision::FP32), dst_prc_(Precision::FP32), load_num_(8),
|
load_emitter_context() : src_prc_(Precision::FP32), dst_prc_(Precision::FP32), load_num_(8),
|
||||||
offset_byte_(0), is_fill_(false), fill_value_("zero") {}
|
offset_byte_(0), is_fill_(false), fill_value_("zero") {}
|
||||||
|
|
||||||
load_emitter_context(Precision src_prc, Precision dst_prc, int load_num, bool is_fill = false, std::string fill_value = "zero", int offset_byte = 0):
|
load_emitter_context(Precision src_prc, Precision dst_prc, int load_num, int offset_byte = 0, bool is_fill = false, std::string fill_value = "zero"):
|
||||||
src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), is_fill_(is_fill), fill_value_(fill_value), offset_byte_(offset_byte) {}
|
src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), offset_byte_(offset_byte), is_fill_(is_fill), fill_value_(fill_value) {}
|
||||||
|
|
||||||
int offset_byte_;
|
int offset_byte_;
|
||||||
int load_num_;
|
int load_num_;
|
||||||
@ -124,6 +124,8 @@ public:
|
|||||||
|
|
||||||
size_t get_inputs_num() const override;
|
size_t get_inputs_num() const override;
|
||||||
|
|
||||||
|
void emit_data() const override;
|
||||||
|
|
||||||
std::shared_ptr<jit_emu_vcvtneps2bf16> get_emu_vcvtneps2bf16() const {
|
std::shared_ptr<jit_emu_vcvtneps2bf16> get_emu_vcvtneps2bf16() const {
|
||||||
return emu_vcvtneps2bf16;
|
return emu_vcvtneps2bf16;
|
||||||
}
|
}
|
||||||
|
@ -306,7 +306,7 @@ private:
|
|||||||
inline void worker_tail_planar() {
|
inline void worker_tail_planar() {
|
||||||
Precision dst_prc = isFloatCompatible(jcp_.src_prc) ? Precision::FP32 : Precision::I32;
|
Precision dst_prc = isFloatCompatible(jcp_.src_prc) ? Precision::FP32 : Precision::I32;
|
||||||
load_emitter->emit_code({static_cast<size_t>(reg_src.getIdx())}, {static_cast<size_t>(vmm_val.getIdx())},
|
load_emitter->emit_code({static_cast<size_t>(reg_src.getIdx())}, {static_cast<size_t>(vmm_val.getIdx())},
|
||||||
std::make_shared<load_emitter_context>(jcp_.src_prc, dst_prc, tail_num, true, "zero"),
|
std::make_shared<load_emitter_context>(jcp_.src_prc, dst_prc, tail_num, 0, true),
|
||||||
{}, {load_pool_gpr_idxs});
|
{}, {load_pool_gpr_idxs});
|
||||||
|
|
||||||
if (jcp_.normalize_variance) {
|
if (jcp_.normalize_variance) {
|
||||||
@ -477,8 +477,7 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator
|
|||||||
this->postamble();
|
this->postamble();
|
||||||
|
|
||||||
load_emitter->emit_data();
|
load_emitter->emit_data();
|
||||||
if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core) && store_emitter != nullptr && store_emitter->get_emu_vcvtneps2bf16() != nullptr)
|
store_emitter->emit_data();
|
||||||
store_emitter->get_emu_vcvtneps2bf16()->emit_data();
|
|
||||||
|
|
||||||
for (auto& inj : eltwise_injectors)
|
for (auto& inj : eltwise_injectors)
|
||||||
inj->prepare_table();
|
inj->prepare_table();
|
||||||
|
@ -88,8 +88,7 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi
|
|||||||
this->postamble();
|
this->postamble();
|
||||||
|
|
||||||
load_emitter->emit_data();
|
load_emitter->emit_data();
|
||||||
if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core) && store_emitter != nullptr && store_emitter->get_emu_vcvtneps2bf16() != nullptr)
|
store_emitter->emit_data();
|
||||||
store_emitter->get_emu_vcvtneps2bf16()->emit_data();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -155,7 +154,7 @@ private:
|
|||||||
Vmm vmm_max = get_acc_reg(i);
|
Vmm vmm_max = get_acc_reg(i);
|
||||||
|
|
||||||
load_emitter->emit_code({static_cast<size_t>(reg_input.getIdx())}, {static_cast<size_t>(vmm_max.getIdx())},
|
load_emitter->emit_code({static_cast<size_t>(reg_input.getIdx())}, {static_cast<size_t>(vmm_max.getIdx())},
|
||||||
std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, false, "zero", i * src_c_off),
|
std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, i * src_c_off),
|
||||||
{}, load_pool_gpr_idxs);
|
{}, load_pool_gpr_idxs);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -169,7 +168,7 @@ private:
|
|||||||
Vmm vmm_src = get_src_reg(i);
|
Vmm vmm_src = get_src_reg(i);
|
||||||
|
|
||||||
load_emitter->emit_code({static_cast<size_t>(aux_reg_input1.getIdx())}, {static_cast<size_t>(vmm_src.getIdx())},
|
load_emitter->emit_code({static_cast<size_t>(aux_reg_input1.getIdx())}, {static_cast<size_t>(vmm_src.getIdx())},
|
||||||
std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, false, "zero", i * src_c_off),
|
std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, i * src_c_off),
|
||||||
{}, load_pool_gpr_idxs);
|
{}, load_pool_gpr_idxs);
|
||||||
|
|
||||||
if (isa == cpu::x64::sse41) {
|
if (isa == cpu::x64::sse41) {
|
||||||
@ -222,7 +221,7 @@ private:
|
|||||||
|
|
||||||
for (int i = 0; i < c_blocks; i++) {
|
for (int i = 0; i < c_blocks; i++) {
|
||||||
const int src_c_off = i * jpp_.ih * jpp_.iw * jpp_.c_block * jpp_.src_data_size;
|
const int src_c_off = i * jpp_.ih * jpp_.iw * jpp_.c_block * jpp_.src_data_size;
|
||||||
const auto load_context = std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, false, "zero", src_c_off);
|
const auto load_context = std::make_shared<load_emitter_context>(jpp_.src_prc, Precision::FP32, step, src_c_off);
|
||||||
|
|
||||||
mov(aux_reg_input, reg_input);
|
mov(aux_reg_input, reg_input);
|
||||||
|
|
||||||
|
@ -12,9 +12,5 @@
|
|||||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0);
|
NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0);
|
||||||
|
|
||||||
bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr<ngraph::Function> f) {
|
bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr<ngraph::Function> f) {
|
||||||
ngraph::pass::Manager m(get_pass_config());
|
|
||||||
m.register_pass<Pruning>();
|
|
||||||
m.run_passes(f);
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
@ -90,21 +90,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[],
|
|||||||
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
|
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
void calculate_nv12_to_rgb(const uchar **srcY,
|
|
||||||
const uchar *srcUV,
|
|
||||||
uchar **dstRGBx,
|
|
||||||
int width) {
|
|
||||||
calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width);
|
|
||||||
}
|
|
||||||
|
|
||||||
void calculate_i420_to_rgb(const uchar **srcY,
|
|
||||||
const uchar *srcU,
|
|
||||||
const uchar *srcV,
|
|
||||||
uchar **dstRGBx,
|
|
||||||
int width) {
|
|
||||||
calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width);
|
|
||||||
}
|
|
||||||
|
|
||||||
void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz,
|
void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz,
|
||||||
const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap,
|
const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap,
|
||||||
int xmaxdf, const short xindex[], const Q0_16 xalpha[],
|
int xmaxdf, const short xindex[], const Q0_16 xalpha[],
|
||||||
@ -119,14 +104,6 @@ void calcRowArea_32F(float dst[], const float *src[], const Size& inSz,
|
|||||||
calcRowArea_impl(dst, src, inSz, outSz, yalpha, ymap, xmaxdf, xindex, xalpha, vbuf);
|
calcRowArea_impl(dst, src, inSz, outSz, yalpha, ymap, xmaxdf, xindex, xalpha, vbuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
void copyRow_8U(const uint8_t in[], uint8_t out[], int length) {
|
|
||||||
copyRow_8U_impl(in, out, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
void copyRow_32F(const float in[], float out[], int length) {
|
|
||||||
copyRow_32F_impl(in, out, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resize (bi-linear, 32F)
|
// Resize (bi-linear, 32F)
|
||||||
void calcRowLinear_32F(float* dst[],
|
void calcRowLinear_32F(float* dst[],
|
||||||
const float* src0[],
|
const float* src0[],
|
||||||
@ -708,6 +685,14 @@ void calcRowLinear_8UC1(uint8_t* dst[],
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace neon
|
} // namespace neon
|
||||||
|
|
||||||
|
template void chanToPlaneRowImpl(neon_tag, const uint8_t* in, int chan, int chs, uint8_t* out, const int length);
|
||||||
|
template void chanToPlaneRowImpl(neon_tag, const float* in, int chan, int chs, float * out, const int length);
|
||||||
|
|
||||||
|
template void nv12ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
|
template void i420ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* u_row,
|
||||||
|
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||||
} // namespace kernels
|
} // namespace kernels
|
||||||
} // namespace gapi
|
} // namespace gapi
|
||||||
} // namespace InferenceEngine
|
} // namespace InferenceEngine
|
||||||
|
@ -167,26 +167,31 @@ void splitRow_32FC4(const float in[],
|
|||||||
float out3[],
|
float out3[],
|
||||||
int length);
|
int length);
|
||||||
|
|
||||||
void calculate_nv12_to_rgb(const uchar **srcY,
|
|
||||||
const uchar *srcUV,
|
|
||||||
uchar **dstRGBx,
|
|
||||||
int width);
|
|
||||||
|
|
||||||
void calculate_i420_to_rgb(const uchar **srcY,
|
void calculate_i420_to_rgb(const uchar **srcY,
|
||||||
const uchar *srcU,
|
const uchar *srcU,
|
||||||
const uchar *srcV,
|
const uchar *srcV,
|
||||||
uchar **dstRGBx,
|
uchar **dstRGBx,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
void copyRow_8U(const uint8_t in[],
|
|
||||||
uint8_t out[],
|
|
||||||
int length);
|
|
||||||
|
|
||||||
void copyRow_32F(const float in[],
|
|
||||||
float out[],
|
|
||||||
int length);
|
|
||||||
|
|
||||||
} // namespace neon
|
} // namespace neon
|
||||||
|
|
||||||
|
template<typename isa_tag_t, typename T>
|
||||||
|
void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs, T* out, const int length);
|
||||||
|
|
||||||
|
extern template void chanToPlaneRowImpl(neon_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
|
||||||
|
extern template void chanToPlaneRowImpl(neon_tag, const float* in, const int chan, const int chs, float * out, const int length);
|
||||||
|
|
||||||
|
template<typename isa_tag_t>
|
||||||
|
void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
|
extern template void nv12ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
|
template<typename isa_tag_t>
|
||||||
|
void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
|
||||||
|
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
|
extern template void i420ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* u_row,
|
||||||
|
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||||
} // namespace kernels
|
} // namespace kernels
|
||||||
} // namespace gapi
|
} // namespace gapi
|
||||||
} // namespace InferenceEngine
|
} // namespace InferenceEngine
|
||||||
|
@ -107,21 +107,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[],
|
|||||||
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
|
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
void calculate_nv12_to_rgb(const uchar **srcY,
|
|
||||||
const uchar *srcUV,
|
|
||||||
uchar **dstRGBx,
|
|
||||||
int width) {
|
|
||||||
calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width);
|
|
||||||
}
|
|
||||||
|
|
||||||
void calculate_i420_to_rgb(const uchar **srcY,
|
|
||||||
const uchar *srcU,
|
|
||||||
const uchar *srcV,
|
|
||||||
uchar **dstRGBx,
|
|
||||||
int width) {
|
|
||||||
calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width);
|
|
||||||
}
|
|
||||||
|
|
||||||
void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz,
|
void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz,
|
||||||
const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap,
|
const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap,
|
||||||
int xmaxdf, const short xindex[], const Q0_16 xalpha[],
|
int xmaxdf, const short xindex[], const Q0_16 xalpha[],
|
||||||
@ -555,13 +540,6 @@ void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
|
|||||||
calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||||
}
|
}
|
||||||
|
|
||||||
void copyRow_8U(const uint8_t in[], uint8_t out[], int length) {
|
|
||||||
copyRow_8U_impl(in, out, length);
|
|
||||||
}
|
|
||||||
void copyRow_32F(const float in[], float out[], int length) {
|
|
||||||
copyRow_32F_impl(in, out, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
void calcRowLinear_32F(float *dst[],
|
void calcRowLinear_32F(float *dst[],
|
||||||
const float *src0[],
|
const float *src0[],
|
||||||
const float *src1[],
|
const float *src1[],
|
||||||
@ -575,6 +553,15 @@ void calcRowLinear_32F(float *dst[],
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace avx
|
} // namespace avx
|
||||||
|
|
||||||
|
template void chanToPlaneRowImpl(avx2_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
|
||||||
|
template void chanToPlaneRowImpl(avx2_tag, const float* in, const int chan, const int chs, float* out, const int length);
|
||||||
|
|
||||||
|
template void nv12ToRgbRowImpl(avx2_tag, const uint8_t** y_rows, const uint8_t* uv_row,
|
||||||
|
uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
|
template void i420ToRgbRowImpl(avx2_tag, const uint8_t** y_rows, const uint8_t* u_row,
|
||||||
|
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||||
} // namespace kernels
|
} // namespace kernels
|
||||||
} // namespace gapi
|
} // namespace gapi
|
||||||
} // namespace InferenceEngine
|
} // namespace InferenceEngine
|
||||||
|
@ -181,27 +181,29 @@ void splitRow_32FC4(const float in[],
|
|||||||
float out2[],
|
float out2[],
|
||||||
float out3[],
|
float out3[],
|
||||||
int length);
|
int length);
|
||||||
|
|
||||||
void calculate_nv12_to_rgb(const uchar **srcY,
|
|
||||||
const uchar *srcUV,
|
|
||||||
uchar **dstRGBx,
|
|
||||||
int width);
|
|
||||||
|
|
||||||
void calculate_i420_to_rgb(const uchar **srcY,
|
|
||||||
const uchar *srcU,
|
|
||||||
const uchar *srcV,
|
|
||||||
uchar **dstRGBx,
|
|
||||||
int width);
|
|
||||||
|
|
||||||
void copyRow_8U(const uint8_t in[],
|
|
||||||
uint8_t out[],
|
|
||||||
int length);
|
|
||||||
|
|
||||||
void copyRow_32F(const float in[],
|
|
||||||
float out[],
|
|
||||||
int length);
|
|
||||||
|
|
||||||
} // namespace avx
|
} // namespace avx
|
||||||
|
|
||||||
|
|
||||||
|
template<typename isa_tag_t, typename T>
|
||||||
|
void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs, T* out, const int length);
|
||||||
|
|
||||||
|
extern template void chanToPlaneRowImpl(avx2_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
|
||||||
|
extern template void chanToPlaneRowImpl(avx2_tag, const float* in, const int chan, const int chs, float * out, const int length);
|
||||||
|
|
||||||
|
template<typename isa_tag_t>
|
||||||
|
void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row,
|
||||||
|
uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
|
extern template void nv12ToRgbRowImpl(avx2_tag, const uint8_t** y_rows,
|
||||||
|
const uint8_t* uv_row, uint8_t** out_rows,
|
||||||
|
const int buf_width);
|
||||||
|
|
||||||
|
template<typename isa_tag_t>
|
||||||
|
void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
|
||||||
|
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
|
extern template void i420ToRgbRowImpl(avx2_tag, const uint8_t** y_rows, const uint8_t* u_row,
|
||||||
|
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||||
} // namespace kernels
|
} // namespace kernels
|
||||||
} // namespace gapi
|
} // namespace gapi
|
||||||
} // namespace InferenceEngine
|
} // namespace InferenceEngine
|
||||||
|
@ -101,21 +101,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[],
|
|||||||
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
|
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
void calculate_nv12_to_rgb(const uchar **srcY,
|
|
||||||
const uchar *srcUV,
|
|
||||||
uchar **dstRGBx,
|
|
||||||
int width) {
|
|
||||||
calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width);
|
|
||||||
}
|
|
||||||
|
|
||||||
void calculate_i420_to_rgb(const uchar **srcY,
|
|
||||||
const uchar *srcU,
|
|
||||||
const uchar *srcV,
|
|
||||||
uchar **dstRGBx,
|
|
||||||
int width) {
|
|
||||||
calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width);
|
|
||||||
}
|
|
||||||
|
|
||||||
void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz,
|
void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz,
|
||||||
const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap,
|
const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap,
|
||||||
int xmaxdf, const short xindex[], const Q0_16 xalpha[],
|
int xmaxdf, const short xindex[], const Q0_16 xalpha[],
|
||||||
@ -636,14 +621,6 @@ void calcRowLinear_8U(C4, std::array<std::array<uint8_t*, 4>, 4> &dst,
|
|||||||
calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
calcRowLinear_8UC_Impl<chanNum>(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi);
|
||||||
}
|
}
|
||||||
|
|
||||||
void copyRow_8U(const uint8_t in[], uint8_t out[], int length) {
|
|
||||||
copyRow_8U_impl(in, out, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
void copyRow_32F(const float in[], float out[], int length) {
|
|
||||||
copyRow_32F_impl(in, out, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
void calcRowLinear_32F(float *dst[],
|
void calcRowLinear_32F(float *dst[],
|
||||||
const float *src0[],
|
const float *src0[],
|
||||||
const float *src1[],
|
const float *src1[],
|
||||||
@ -657,6 +634,14 @@ void calcRowLinear_32F(float *dst[],
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace avx512
|
} // namespace avx512
|
||||||
|
|
||||||
|
template void chanToPlaneRowImpl(avx512_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
|
||||||
|
template void chanToPlaneRowImpl(avx512_tag, const float* in, const int chan, const int chs, float* out, const int length);
|
||||||
|
|
||||||
|
template void nv12ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
|
template void i420ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* u_row,
|
||||||
|
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||||
} // namespace kernels
|
} // namespace kernels
|
||||||
} // namespace gapi
|
} // namespace gapi
|
||||||
} // namespace InferenceEngine
|
} // namespace InferenceEngine
|
||||||
|
@ -180,27 +180,26 @@ void splitRow_32FC4(const float in[],
|
|||||||
float out2[],
|
float out2[],
|
||||||
float out3[],
|
float out3[],
|
||||||
int length);
|
int length);
|
||||||
|
|
||||||
void calculate_nv12_to_rgb(const uchar **srcY,
|
|
||||||
const uchar *srcUV,
|
|
||||||
uchar **dstRGBx,
|
|
||||||
int width);
|
|
||||||
|
|
||||||
void calculate_i420_to_rgb(const uchar **srcY,
|
|
||||||
const uchar *srcU,
|
|
||||||
const uchar *srcV,
|
|
||||||
uchar **dstRGBx,
|
|
||||||
int width);
|
|
||||||
|
|
||||||
void copyRow_8U(const uint8_t in[],
|
|
||||||
uint8_t out[],
|
|
||||||
int length);
|
|
||||||
|
|
||||||
void copyRow_32F(const float in[],
|
|
||||||
float out[],
|
|
||||||
int length);
|
|
||||||
|
|
||||||
} // namespace avx512
|
} // namespace avx512
|
||||||
|
|
||||||
|
|
||||||
|
template<typename isa_tag_t, typename T>
|
||||||
|
void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs, T* out, const int length);
|
||||||
|
|
||||||
|
extern template void chanToPlaneRowImpl(avx512_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
|
||||||
|
extern template void chanToPlaneRowImpl(avx512_tag, const float* in, const int chan, const int chs, float* out, const int length);
|
||||||
|
|
||||||
|
template<typename isa_tag_t>
|
||||||
|
void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
|
extern template void nv12ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
|
template<typename isa_tag_t>
|
||||||
|
void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
|
||||||
|
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
|
extern template void i420ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* u_row,
|
||||||
|
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||||
} // namespace kernels
|
} // namespace kernels
|
||||||
} // namespace gapi
|
} // namespace gapi
|
||||||
} // namespace InferenceEngine
|
} // namespace InferenceEngine
|
||||||
|
@ -1365,33 +1365,13 @@ void splitRow_32FC4(const float in[],
|
|||||||
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
|
splitRow_32FC4_Impl(in, out0, out1, out2, out3, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
void calculate_nv12_to_rgb(const uchar **srcY,
|
template void chanToPlaneRowImpl(sse42_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length);
|
||||||
const uchar *srcUV,
|
template void chanToPlaneRowImpl(sse42_tag, const float* in, const int chan, const int chs, float* out, const int length);
|
||||||
uchar **dstRGBx,
|
|
||||||
int width) {
|
|
||||||
calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width);
|
|
||||||
}
|
|
||||||
|
|
||||||
void calculate_i420_to_rgb(const uchar **srcY,
|
template void nv12ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
|
||||||
const uchar *srcU,
|
|
||||||
const uchar *srcV,
|
|
||||||
uchar **dstRGBx,
|
|
||||||
int width) {
|
|
||||||
calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width);
|
|
||||||
}
|
|
||||||
|
|
||||||
void copyRow_8U(const uint8_t in[],
|
|
||||||
uint8_t out[],
|
|
||||||
int length) {
|
|
||||||
copyRow_8U_impl(in, out, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
void copyRow_32F(const float in[],
|
|
||||||
float out[],
|
|
||||||
int length) {
|
|
||||||
copyRow_32F_impl(in, out, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
template void i420ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* u_row,
|
||||||
|
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||||
} // namespace kernels
|
} // namespace kernels
|
||||||
} // namespace gapi
|
} // namespace gapi
|
||||||
} // namespace InferenceEngine
|
} // namespace InferenceEngine
|
||||||
|
@ -180,25 +180,25 @@ void splitRow_32FC4(const float in[],
|
|||||||
float out3[],
|
float out3[],
|
||||||
int length);
|
int length);
|
||||||
|
|
||||||
void calculate_nv12_to_rgb(const uchar **srcY,
|
template<typename isa_tag_t, typename T>
|
||||||
const uchar *srcUV,
|
void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs,
|
||||||
uchar **dstRGBx,
|
T* out, const int length);
|
||||||
int width);
|
|
||||||
|
|
||||||
void calculate_i420_to_rgb(const uchar **srcY,
|
extern template void chanToPlaneRowImpl(sse42_tag, const uint8_t* in, const int chan,
|
||||||
const uchar *srcU,
|
const int chs, uint8_t* out, const int length);
|
||||||
const uchar *srcV,
|
extern template void chanToPlaneRowImpl(sse42_tag, const float* in, const int chan,
|
||||||
uchar **dstRGBx,
|
const int chs, float* out, const int length);
|
||||||
int width);
|
template<typename isa_tag_t>
|
||||||
|
void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
void copyRow_8U(const uint8_t in[],
|
extern template void nv12ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width);
|
||||||
uint8_t out[],
|
|
||||||
int length);
|
|
||||||
|
|
||||||
void copyRow_32F(const float in[],
|
template<typename isa_tag_t>
|
||||||
float out[],
|
void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row,
|
||||||
int length);
|
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
|
extern template void i420ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* u_row,
|
||||||
|
const uint8_t* v_row, uint8_t** out_rows, const int buf_width);
|
||||||
} // namespace kernels
|
} // namespace kernels
|
||||||
} // namespace gapi
|
} // namespace gapi
|
||||||
} // namespace InferenceEngine
|
} // namespace InferenceEngine
|
||||||
|
@ -468,15 +468,86 @@ struct type_to_type {};
|
|||||||
template <typename typelist>
|
template <typename typelist>
|
||||||
struct type_dispatch_impl;
|
struct type_dispatch_impl;
|
||||||
|
|
||||||
|
//FIXME: add test for type_dispatch
|
||||||
template <template<typename ...> class typelist, typename... type>
|
template <template<typename ...> class typelist, typename... type>
|
||||||
struct type_dispatch_impl<typelist<type...>> {
|
struct type_dispatch_impl<typelist<type...>> {
|
||||||
template <typename result_t, typename default_t, typename type_id_t, typename type_to_id_t, typename type_to_value_t>
|
template <typename result_t, typename default_t, typename type_id_t, typename type_to_id_t, typename type_to_value_t>
|
||||||
static result_t dispatch(type_id_t type_id, type_to_id_t&& type_to_id, type_to_value_t&& type_to_value, default_t default_value) {
|
static result_t dispatch(type_id_t type_id, type_to_id_t&& type_to_id, type_to_value_t&& type_to_value, default_t default_value) {
|
||||||
result_t res = default_value;
|
result_t res = default_value;
|
||||||
|
|
||||||
std::initializer_list<int> ({(type_id == type_to_id(type_to_type<type>{}) ? (res = type_to_value(type_to_type<type>{})), 0 : 0)...});
|
bool matched = false;
|
||||||
|
std::initializer_list<int> ({
|
||||||
|
!matched && (type_id == type_to_id(type_to_type<type>{})) ?
|
||||||
|
(matched = true, res = type_to_value(type_to_type<type>{})), 0
|
||||||
|
: 0
|
||||||
|
...
|
||||||
|
});
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename result_t, typename default_t, typename pred_t, typename type_to_value_t>
|
||||||
|
static result_t dispatch(pred_t&& pred, type_to_value_t&& type_to_value, default_t default_value) {
|
||||||
|
result_t res = default_value;
|
||||||
|
|
||||||
|
bool matched = false;
|
||||||
|
std::initializer_list<int> ({
|
||||||
|
!matched && pred(type_to_type<type>{}) ?
|
||||||
|
(matched = true, res = type_to_value(type_to_type<type>{})), 0
|
||||||
|
: 0
|
||||||
|
...
|
||||||
|
});
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename left_typelsist, typename right_typelsist>
|
||||||
|
struct concat;
|
||||||
|
|
||||||
|
template<typename left_typelsist, typename right_typelsist>
|
||||||
|
using concat_t = typename concat<left_typelsist, right_typelsist>::type;
|
||||||
|
|
||||||
|
template<template<typename ...> class left_list, typename ... left_types, template<typename ...> class right_list, typename ... right_types>
|
||||||
|
struct concat<left_list<left_types...>, right_list<right_types...>>{
|
||||||
|
using type = left_list<left_types... , right_types...>;
|
||||||
|
};
|
||||||
|
|
||||||
|
template< class T, class U >
|
||||||
|
using is_same_t = typename std::is_same<T, U>::type;
|
||||||
|
|
||||||
|
template<bool C, class T, class E> struct if_c_impl;
|
||||||
|
|
||||||
|
template<class T, class E> struct if_c_impl<true, T, E> {
|
||||||
|
using type = T;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class T, class E> struct if_c_impl<false, T, E> {
|
||||||
|
using type = E;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<bool C, class T, class E>
|
||||||
|
using if_c = typename if_c_impl<C, T, E>::type;
|
||||||
|
|
||||||
|
template<class C, class T, class E>
|
||||||
|
using if_ = typename if_c_impl<C::value != 0, T, E>::type;
|
||||||
|
|
||||||
|
template<typename typelist, typename type>
|
||||||
|
struct remove;
|
||||||
|
|
||||||
|
template<typename typelist, typename type>
|
||||||
|
using remove_t = typename remove<typelist, type>::type;
|
||||||
|
|
||||||
|
|
||||||
|
template<template<typename ...> class list, typename head_t, typename ... types, typename t>
|
||||||
|
struct remove<list<head_t, types...>, t> {
|
||||||
|
using type = concat_t<
|
||||||
|
if_<is_same_t<head_t, t>, list<>, list<head_t>>,
|
||||||
|
remove_t<list<types...>, t>
|
||||||
|
>;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<template<typename ...> class list, typename t>
|
||||||
|
struct remove<list<>, t> {
|
||||||
|
using type = list<>;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
@ -490,6 +561,13 @@ result_t type_dispatch(type_id_t type_id, type_to_id_t&& type_to_id, type_to_val
|
|||||||
std::forward<default_t>(default_value));
|
std::forward<default_t>(default_value));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename typelist, typename default_t, typename pred_t, typename type_to_value_t,
|
||||||
|
typename result_t = decltype(std::declval<type_to_value_t>()(type_to_type<head_t<typelist>> {}))>
|
||||||
|
result_t type_dispatch(pred_t&& pred, type_to_value_t&& type_to_value, default_t default_value = {}) {
|
||||||
|
return type_dispatch_impl<typelist>::template dispatch<result_t>(std::forward<pred_t>(pred),
|
||||||
|
std::forward<type_to_value_t>(type_to_value),
|
||||||
|
std::forward<default_t>(default_value));
|
||||||
|
}
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
struct cv_type_id {
|
struct cv_type_id {
|
||||||
@ -668,81 +746,47 @@ GAPI_FLUID_KERNEL(FSplit4, Split4, false) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
|
using isas_set = typelist<
|
||||||
template<typename T>
|
#ifdef HAVE_AVX512
|
||||||
static void chanToPlaneRow(const uint8_t* in, int chan, int chs, uint8_t* out, int length) {
|
avx512_tag,
|
||||||
// AVX512 implementation of wide universal intrinsics is slower than AVX2.
|
|
||||||
// It is turned off until the cause isn't found out.
|
|
||||||
#if 0
|
|
||||||
#ifdef HAVE_AVX512
|
|
||||||
if (with_cpu_x86_avx512f()) {
|
|
||||||
if (std::is_same<T, uint8_t>::value && chs == 1) {
|
|
||||||
avx512::copyRow_8U(in, out, length);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (std::is_same<T, float>::value && chs == 1) {
|
|
||||||
avx512::copyRow_32F(reinterpret_cast<const float*>(in),
|
|
||||||
reinterpret_cast<float*>(out),
|
|
||||||
length);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif // HAVE_AVX512
|
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef HAVE_AVX2
|
||||||
|
avx2_tag,
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SSE
|
||||||
|
sse42_tag,
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
neon_tag,
|
||||||
|
#endif
|
||||||
|
//scalar "ISA" have to be the last one in the list,
|
||||||
|
//as the search for supported ISA is performed until first match
|
||||||
|
scalar_tag>;
|
||||||
|
#ifdef HAVE_AVX512
|
||||||
|
bool is_present(avx512_tag) { return with_cpu_x86_avx512f(); }
|
||||||
|
#endif // HAVE_AVX512
|
||||||
|
|
||||||
#ifdef HAVE_AVX2
|
#ifdef HAVE_AVX2
|
||||||
if (with_cpu_x86_avx2()) {
|
bool is_present(avx2_tag) { return with_cpu_x86_avx2(); }
|
||||||
if (std::is_same<T, uint8_t>::value && chs == 1) {
|
#endif // HAVE_AVX2
|
||||||
avx::copyRow_8U(in, out, length);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (std::is_same<T, float>::value && chs == 1) {
|
#ifdef HAVE_SSE
|
||||||
avx::copyRow_32F(reinterpret_cast<const float*>(in),
|
bool is_present(sse42_tag) { return with_cpu_x86_sse42(); }
|
||||||
reinterpret_cast<float*>(out),
|
#endif // HAVE_SSE
|
||||||
length);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif // HAVE_AVX2
|
|
||||||
#ifdef HAVE_SSE
|
|
||||||
if (with_cpu_x86_sse42()) {
|
|
||||||
if (std::is_same<T, uint8_t>::value && chs == 1) {
|
|
||||||
copyRow_8U(in, out, length);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (std::is_same<T, float>::value && chs == 1) {
|
#ifdef HAVE_NEON
|
||||||
copyRow_32F(reinterpret_cast<const float*>(in),
|
bool is_present(neon_tag) { return true; }
|
||||||
reinterpret_cast<float*>(out),
|
#endif // HAVE_NEON
|
||||||
length);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif // HAVE_SSE
|
|
||||||
|
|
||||||
#ifdef HAVE_NEON
|
//scalar version of kernels is always available
|
||||||
if (std::is_same<T, uint8_t>::value && chs == 1) {
|
bool is_present(scalar_tag) { return true; }
|
||||||
neon::copyRow_8U(in, out, length);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (std::is_same<T, float>::value && chs == 1) {
|
struct is_isa_present {
|
||||||
neon::copyRow_32F(reinterpret_cast<const float*>(in),
|
template< typename isa_tag_t>
|
||||||
reinterpret_cast<float*>(out),
|
bool operator()(type_to_type<isa_tag_t>) {
|
||||||
length);
|
return is_present(isa_tag_t{});
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
#endif // HAVE_NEON
|
};
|
||||||
|
|
||||||
const auto inT = reinterpret_cast<const T*>(in);
|
|
||||||
auto outT = reinterpret_cast< T*>(out);
|
|
||||||
|
|
||||||
for (int x = 0; x < length; x++) {
|
|
||||||
outT[x] = inT[x*chs + chan];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// GAPI_OCV_KERNEL(OCVChanToPlane, ChanToPlane) {
|
// GAPI_OCV_KERNEL(OCVChanToPlane, ChanToPlane) {
|
||||||
// static void run(const cv::Mat &in, int chan, cv::Mat &out) {
|
// static void run(const cv::Mat &in, int chan, cv::Mat &out) {
|
||||||
@ -774,15 +818,225 @@ static void chanToPlaneRow(const uint8_t* in, int chan, int chs, uint8_t* out, i
|
|||||||
// }
|
// }
|
||||||
// };
|
// };
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using chan_to_plane_supported_types = typelist<uint8_t, float>;
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void chanToPlaneRowImpl(scalar_tag, const T* in, int chan, int chs, T* out, int length) {
|
||||||
|
for (int x = 0; x < length; x++) {
|
||||||
|
out[x] = in[x*chs + chan];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename isa_tag_t>
|
||||||
|
struct typed_chan_to_plane_row {
|
||||||
|
using p_f = void (*)(const uint8_t* in, int chan, int chs, uint8_t* out, int length);
|
||||||
|
|
||||||
|
template <typename type>
|
||||||
|
p_f operator()(type_to_type<type> ) {
|
||||||
|
return [](const uint8_t* in, int chan, int chs, uint8_t* out, int length){
|
||||||
|
const auto inT = reinterpret_cast<const type*>(in);
|
||||||
|
auto outT = reinterpret_cast< type*>(out);
|
||||||
|
|
||||||
|
chanToPlaneRowImpl(isa_tag_t{}, inT, chan, chs, outT, length);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} //namespace
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using nv12_to_rgb_supported_types = typelist<uint8_t>;
|
||||||
|
|
||||||
|
void nv12ToRgbRowImpl(scalar_tag, const uint8_t** y_rows, const uint8_t* uv_row,
|
||||||
|
uint8_t** out_rows, const int buf_width) {
|
||||||
|
for (int i = 0; i < buf_width; i += 2) {
|
||||||
|
uint8_t u = uv_row[i];
|
||||||
|
uint8_t v = uv_row[i + 1];
|
||||||
|
int ruv, guv, buv;
|
||||||
|
uvToRGBuv(u, v, ruv, guv, buv);
|
||||||
|
|
||||||
|
for (int y = 0; y < 2; y++) {
|
||||||
|
for (int x = 0; x < 2; x++) {
|
||||||
|
uint8_t vy = y_rows[y][i + x];
|
||||||
|
uint8_t r, g, b;
|
||||||
|
yRGBuvToRGB(vy, ruv, guv, buv, r, g, b);
|
||||||
|
|
||||||
|
out_rows[y][3 * (i + x)] = r;
|
||||||
|
out_rows[y][3 * (i + x) + 1] = g;
|
||||||
|
out_rows[y][3 * (i + x) + 2] = b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename isa_tag_t>
|
||||||
|
struct typed_nv12_to_rgb_row {
|
||||||
|
using p_f = void (*)(const uint8_t** y_rows, const uint8_t* uv_row,
|
||||||
|
uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
|
template <typename type>
|
||||||
|
p_f operator()(type_to_type<type>) {
|
||||||
|
return [](const uint8_t** y_rows, const uint8_t* uv_row,
|
||||||
|
uint8_t** out_rows, const int buf_width) {
|
||||||
|
const auto inT1 = reinterpret_cast<const type**>(y_rows);
|
||||||
|
const auto inT2 = reinterpret_cast<const type*>(uv_row);
|
||||||
|
auto outT = reinterpret_cast<type**>(out_rows);
|
||||||
|
|
||||||
|
nv12ToRgbRowImpl(isa_tag_t{}, inT1, inT2, outT, buf_width);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using i420_to_rgb_supported_types = typelist<uint8_t>;
|
||||||
|
|
||||||
|
static void i420ToRgbRowImpl(scalar_tag, const uint8_t** y_rows,
|
||||||
|
const uint8_t* u_row,
|
||||||
|
const uint8_t* v_row,
|
||||||
|
uint8_t** out_rows,
|
||||||
|
const int buf_width) {
|
||||||
|
for (int i = 0; i < buf_width; i += 2) {
|
||||||
|
uchar u = u_row[i / 2];
|
||||||
|
uchar v = v_row[i / 2];
|
||||||
|
int ruv, guv, buv;
|
||||||
|
uvToRGBuv(u, v, ruv, guv, buv);
|
||||||
|
|
||||||
|
for (int y = 0; y < 2; y++) {
|
||||||
|
for (int x = 0; x < 2; x++) {
|
||||||
|
uchar vy = y_rows[y][i + x];
|
||||||
|
uchar r, g, b;
|
||||||
|
yRGBuvToRGB(vy, ruv, guv, buv, r, g, b);
|
||||||
|
|
||||||
|
out_rows[y][3 * (i + x)] = r;
|
||||||
|
out_rows[y][3 * (i + x) + 1] = g;
|
||||||
|
out_rows[y][3 * (i + x) + 2] = b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename isa_tag_t>
|
||||||
|
struct typed_i420_to_rgb_row {
|
||||||
|
using p_f = void (*)(const uint8_t** y_rows, const uint8_t* u_row, const uint8_t* v_row,
|
||||||
|
uint8_t** out_rows, const int buf_width);
|
||||||
|
|
||||||
|
template <typename type>
|
||||||
|
p_f operator()(type_to_type<type>) {
|
||||||
|
return [](const uint8_t** y_rows, const uint8_t* u_row, const uint8_t* v_row,
|
||||||
|
uint8_t** out_rows, const int buf_width) {
|
||||||
|
const auto inT1 = reinterpret_cast<const type**>(y_rows);
|
||||||
|
const auto inT2 = reinterpret_cast<const type*>(u_row);
|
||||||
|
const auto inT3 = reinterpret_cast<const type*>(v_row);
|
||||||
|
auto outT = reinterpret_cast<type**>(out_rows);
|
||||||
|
|
||||||
|
i420ToRgbRowImpl(isa_tag_t{}, inT1, inT2, inT3, outT, buf_width);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
template <typename isa_tag_t>
|
||||||
|
struct choose_impl {
|
||||||
GAPI_FLUID_KERNEL(FChanToPlane, ChanToPlane, false) {
|
GAPI_FLUID_KERNEL(FChanToPlane, ChanToPlane, false) {
|
||||||
static const int Window = 1;
|
static const int Window = 1;
|
||||||
static void run(const cv::gapi::fluid::View& in, int chan,
|
static void run(const cv::gapi::fluid::View& in, int chan,
|
||||||
cv::gapi::fluid::Buffer& out) {
|
cv::gapi::fluid::Buffer& out) {
|
||||||
const auto rowFunc = (in.meta().depth == CV_8U) ? &chanToPlaneRow<uint8_t> : &chanToPlaneRow<float>;
|
GAPI_DbgAssert(is_cv_type_in_list<chan_to_plane_supported_types>(out.meta().depth));
|
||||||
|
|
||||||
|
const auto rowFunc = type_dispatch<chan_to_plane_supported_types>(out.meta().depth, cv_type_id{}, typed_chan_to_plane_row<isa_tag_t>{}, nullptr);
|
||||||
|
|
||||||
|
GAPI_DbgAssert(rowFunc);
|
||||||
|
|
||||||
rowFunc(in.InLineB(0), chan, in.meta().chan, out.OutLineB(), in.length());
|
rowFunc(in.InLineB(0), chan, in.meta().chan, out.OutLineB(), in.length());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
GAPI_FLUID_KERNEL(FNV12toRGB, NV12toRGB, false) {
|
||||||
|
static const int Window = 1;
|
||||||
|
static const int LPI = 2;
|
||||||
|
static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB;
|
||||||
|
|
||||||
|
static void run(const cv::gapi::fluid::View & in_y,
|
||||||
|
const cv::gapi::fluid::View & in_uv,
|
||||||
|
cv::gapi::fluid::Buffer & out) {
|
||||||
|
GAPI_DbgAssert(is_cv_type_in_list<nv12_to_rgb_supported_types>(out.meta().depth));
|
||||||
|
|
||||||
|
const uchar* uv_row = in_uv.InLineB(0);
|
||||||
|
const uchar* y_rows[2] = { in_y.InLineB(0), in_y.InLineB(1) };
|
||||||
|
uchar* out_rows[2] = { out.OutLineB(0), out.OutLineB(1) };
|
||||||
|
|
||||||
|
int buf_width = out.length();
|
||||||
|
|
||||||
|
const auto rowFunc = type_dispatch<nv12_to_rgb_supported_types>(out.meta().depth, cv_type_id{}, typed_nv12_to_rgb_row<isa_tag_t>{}, nullptr);
|
||||||
|
|
||||||
|
GAPI_DbgAssert(rowFunc);
|
||||||
|
|
||||||
|
rowFunc(y_rows, uv_row, out_rows, buf_width);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
GAPI_FLUID_KERNEL(FI420toRGB, I420toRGB, false) {
|
||||||
|
static const int Window = 1;
|
||||||
|
static const int LPI = 2;
|
||||||
|
static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB;
|
||||||
|
|
||||||
|
static void run(const cv::gapi::fluid::View & in_y,
|
||||||
|
const cv::gapi::fluid::View & in_u,
|
||||||
|
const cv::gapi::fluid::View & in_v,
|
||||||
|
cv::gapi::fluid::Buffer & out) {
|
||||||
|
GAPI_DbgAssert(is_cv_type_in_list<i420_to_rgb_supported_types>(out.meta().depth));
|
||||||
|
|
||||||
|
const uchar* u_row = in_u.InLineB(0);
|
||||||
|
const uchar* v_row = in_v.InLineB(0);
|
||||||
|
const uchar* y_rows[2] = { in_y.InLineB(0), in_y.InLineB(1) };
|
||||||
|
uchar* out_rows[2] = { out.OutLineB(0), out.OutLineB(1) };
|
||||||
|
|
||||||
|
int buf_width = out.length();
|
||||||
|
GAPI_DbgAssert(in_u.length() == in_v.length());
|
||||||
|
|
||||||
|
const auto rowFunc = type_dispatch<i420_to_rgb_supported_types>(out.meta().depth, cv_type_id{}, typed_i420_to_rgb_row<isa_tag_t>{}, nullptr);
|
||||||
|
|
||||||
|
GAPI_DbgAssert(rowFunc);
|
||||||
|
|
||||||
|
rowFunc(y_rows, u_row, v_row, out_rows, buf_width);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
struct ColorConversionISA {
|
||||||
|
cv::gapi::GKernelPackage& pckg;
|
||||||
|
|
||||||
|
ColorConversionISA(cv::gapi::GKernelPackage& _pckg) : pckg(_pckg) {}
|
||||||
|
|
||||||
|
template<typename isa_tag_t>
|
||||||
|
bool operator()(type_to_type<isa_tag_t>) {
|
||||||
|
pckg.include<typename choose_impl<isa_tag_t>::FI420toRGB>();
|
||||||
|
pckg.include<typename choose_impl<isa_tag_t>::FNV12toRGB>();
|
||||||
|
pckg.include<typename choose_impl<isa_tag_t>::FChanToPlane>();
|
||||||
|
//at the moment type_dispatch requires something to be returned by the lambda
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} //namespace
|
||||||
|
|
||||||
|
cv::gapi::GKernelPackage FColorConversionChooseISA() {
|
||||||
|
// At the moment AVX512 implementation of wide universal intrinsics is slower than AVX2.
|
||||||
|
// So, disable it for now.
|
||||||
|
using isas = remove_t<isas_set, avx512_tag>;
|
||||||
|
|
||||||
|
cv::gapi::GKernelPackage pckg;
|
||||||
|
ColorConversionISA ctpISA{pckg};
|
||||||
|
|
||||||
|
type_dispatch<isas>(is_isa_present{}, ctpISA, false);
|
||||||
|
|
||||||
|
return pckg;
|
||||||
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
|
|
||||||
G_TYPED_KERNEL(ScalePlane8u, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_8u") {
|
G_TYPED_KERNEL(ScalePlane8u, <cv::GMat(cv::GMat, Size, int)>, "com.intel.ie.scale_plane_8u") {
|
||||||
@ -2234,180 +2488,6 @@ GAPI_FLUID_KERNEL(FScalePlaneArea8u, ScalePlaneArea8u, true) {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static const int ITUR_BT_601_CY = 1220542;
|
|
||||||
static const int ITUR_BT_601_CUB = 2116026;
|
|
||||||
static const int ITUR_BT_601_CUG = -409993;
|
|
||||||
static const int ITUR_BT_601_CVG = -852492;
|
|
||||||
static const int ITUR_BT_601_CVR = 1673527;
|
|
||||||
static const int ITUR_BT_601_SHIFT = 20;
|
|
||||||
|
|
||||||
static inline void uvToRGBuv(const uchar u, const uchar v, int& ruv, int& guv, int& buv) {
|
|
||||||
int uu, vv;
|
|
||||||
uu = static_cast<int>(u) - 128;
|
|
||||||
vv = static_cast<int>(v) - 128;
|
|
||||||
|
|
||||||
ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * vv;
|
|
||||||
guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * vv + ITUR_BT_601_CUG * uu;
|
|
||||||
buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * uu;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void yRGBuvToRGB(const uchar vy, const int ruv, const int guv, const int buv,
|
|
||||||
uchar& r, uchar& g, uchar& b) {
|
|
||||||
int yy = static_cast<int>(vy);
|
|
||||||
int y = std::max(0, yy - 16) * ITUR_BT_601_CY;
|
|
||||||
r = saturate_cast<uchar>((y + ruv) >> ITUR_BT_601_SHIFT);
|
|
||||||
g = saturate_cast<uchar>((y + guv) >> ITUR_BT_601_SHIFT);
|
|
||||||
b = saturate_cast<uchar>((y + buv) >> ITUR_BT_601_SHIFT);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void calculate_nv12_to_rgb_fallback(const uchar **y_rows,
|
|
||||||
const uchar *uv_row,
|
|
||||||
uchar **out_rows,
|
|
||||||
int buf_width) {
|
|
||||||
for (int i = 0; i < buf_width; i += 2) {
|
|
||||||
uchar u = uv_row[i];
|
|
||||||
uchar v = uv_row[i + 1];
|
|
||||||
int ruv, guv, buv;
|
|
||||||
uvToRGBuv(u, v, ruv, guv, buv);
|
|
||||||
|
|
||||||
for (int y = 0; y < 2; y++) {
|
|
||||||
for (int x = 0; x < 2; x++) {
|
|
||||||
uchar vy = y_rows[y][i + x];
|
|
||||||
uchar r, g, b;
|
|
||||||
yRGBuvToRGB(vy, ruv, guv, buv, r, g, b);
|
|
||||||
|
|
||||||
out_rows[y][3*(i + x)] = r;
|
|
||||||
out_rows[y][3*(i + x) + 1] = g;
|
|
||||||
out_rows[y][3*(i + x) + 2] = b;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void calculate_i420_to_rgb_fallback(const uchar **y_rows,
|
|
||||||
const uchar *u_row,
|
|
||||||
const uchar *v_row,
|
|
||||||
uchar **out_rows,
|
|
||||||
int buf_width) {
|
|
||||||
for (int i = 0; i < buf_width; i += 2) {
|
|
||||||
uchar u = u_row[i / 2];
|
|
||||||
uchar v = v_row[i / 2];
|
|
||||||
int ruv, guv, buv;
|
|
||||||
uvToRGBuv(u, v, ruv, guv, buv);
|
|
||||||
|
|
||||||
for (int y = 0; y < 2; y++) {
|
|
||||||
for (int x = 0; x < 2; x++) {
|
|
||||||
uchar vy = y_rows[y][i + x];
|
|
||||||
uchar r, g, b;
|
|
||||||
yRGBuvToRGB(vy, ruv, guv, buv, r, g, b);
|
|
||||||
|
|
||||||
out_rows[y][3*(i + x)] = r;
|
|
||||||
out_rows[y][3*(i + x) + 1] = g;
|
|
||||||
out_rows[y][3*(i + x) + 2] = b;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
GAPI_FLUID_KERNEL(FNV12toRGB, NV12toRGB, false) {
|
|
||||||
static const int Window = 1;
|
|
||||||
static const int LPI = 2;
|
|
||||||
static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB;
|
|
||||||
|
|
||||||
static void run(const cv::gapi::fluid::View &in_y,
|
|
||||||
const cv::gapi::fluid::View &in_uv,
|
|
||||||
cv::gapi::fluid::Buffer &out) {
|
|
||||||
const uchar* uv_row = in_uv.InLineB(0);
|
|
||||||
const uchar* y_rows[2] = {in_y. InLineB(0), in_y. InLineB(1)};
|
|
||||||
uchar* out_rows[2] = {out.OutLineB(0), out.OutLineB(1)};
|
|
||||||
|
|
||||||
int buf_width = out.length();
|
|
||||||
|
|
||||||
// AVX512 implementation of wide universal intrinsics is slower than AVX2.
|
|
||||||
// It is turned off until the cause isn't found out.
|
|
||||||
#if 0
|
|
||||||
#ifdef HAVE_AVX512
|
|
||||||
if (with_cpu_x86_avx512_core()) {
|
|
||||||
#define CV_AVX_512DQ 1
|
|
||||||
avx512::calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif // HAVE_AVX512
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_AVX2
|
|
||||||
if (with_cpu_x86_avx2()) {
|
|
||||||
avx::calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif // HAVE_AVX2
|
|
||||||
#ifdef HAVE_SSE
|
|
||||||
if (with_cpu_x86_sse42()) {
|
|
||||||
calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif // HAVE_SSE
|
|
||||||
|
|
||||||
#ifdef HAVE_NEON
|
|
||||||
neon::calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width);
|
|
||||||
return;
|
|
||||||
#endif // HAVE_NEON
|
|
||||||
|
|
||||||
calculate_nv12_to_rgb_fallback(y_rows, uv_row, out_rows, buf_width);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
GAPI_FLUID_KERNEL(FI420toRGB, I420toRGB, false) {
|
|
||||||
static const int Window = 1;
|
|
||||||
static const int LPI = 2;
|
|
||||||
static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB;
|
|
||||||
|
|
||||||
static void run(const cv::gapi::fluid::View &in_y,
|
|
||||||
const cv::gapi::fluid::View &in_u,
|
|
||||||
const cv::gapi::fluid::View &in_v,
|
|
||||||
cv::gapi::fluid::Buffer &out) {
|
|
||||||
const uchar* u_row = in_u.InLineB(0);
|
|
||||||
const uchar* v_row = in_v.InLineB(0);
|
|
||||||
const uchar* y_rows[2] = {in_y. InLineB(0), in_y. InLineB(1)};
|
|
||||||
uchar* out_rows[2] = {out.OutLineB(0), out.OutLineB(1)};
|
|
||||||
|
|
||||||
int buf_width = out.length();
|
|
||||||
GAPI_DbgAssert(in_u.length() == in_v.length());
|
|
||||||
|
|
||||||
// AVX512 implementation of wide universal intrinsics is slower than AVX2.
|
|
||||||
// It is turned off until the cause isn't found out.
|
|
||||||
#if 0
|
|
||||||
#ifdef HAVE_AVX512
|
|
||||||
if (with_cpu_x86_avx512_core()) {
|
|
||||||
#define CV_AVX_512DQ 1
|
|
||||||
avx512::calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif // HAVE_AVX512
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_AVX2
|
|
||||||
if (with_cpu_x86_avx2()) {
|
|
||||||
avx::calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif // HAVE_AVX2
|
|
||||||
#ifdef HAVE_SSE
|
|
||||||
if (with_cpu_x86_sse42()) {
|
|
||||||
calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif // HAVE_SSE
|
|
||||||
|
|
||||||
#ifdef HAVE_NEON
|
|
||||||
neon::calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width);
|
|
||||||
return;
|
|
||||||
#endif // HAVE_NEON
|
|
||||||
|
|
||||||
calculate_i420_to_rgb_fallback(y_rows, u_row, v_row, out_rows, buf_width);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
template <typename src_t, typename dst_t>
|
template <typename src_t, typename dst_t>
|
||||||
@ -2520,9 +2600,10 @@ GAPI_FLUID_KERNEL(FDivC, GDivC, false) {
|
|||||||
using namespace kernels;
|
using namespace kernels;
|
||||||
|
|
||||||
cv::gapi::GKernelPackage preprocKernels() {
|
cv::gapi::GKernelPackage preprocKernels() {
|
||||||
return cv::gapi::kernels
|
return combine(
|
||||||
< FChanToPlane
|
FColorConversionChooseISA(),
|
||||||
, FScalePlanes
|
cv::gapi::kernels
|
||||||
|
<FScalePlanes
|
||||||
, FScalePlanes4
|
, FScalePlanes4
|
||||||
, FScalePlane
|
, FScalePlane
|
||||||
, FScalePlane32f
|
, FScalePlane32f
|
||||||
@ -2537,12 +2618,10 @@ cv::gapi::GKernelPackage preprocKernels() {
|
|||||||
, FSplit2
|
, FSplit2
|
||||||
, FSplit3
|
, FSplit3
|
||||||
, FSplit4
|
, FSplit4
|
||||||
, FNV12toRGB
|
|
||||||
, FI420toRGB
|
|
||||||
, FConvertDepth
|
, FConvertDepth
|
||||||
, FSubC
|
, FSubC
|
||||||
, FDivC
|
, FDivC
|
||||||
>();
|
>());
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace gapi
|
} // namespace gapi
|
||||||
|
@ -34,6 +34,12 @@ namespace InferenceEngine {
|
|||||||
namespace gapi {
|
namespace gapi {
|
||||||
namespace kernels {
|
namespace kernels {
|
||||||
|
|
||||||
|
struct avx512_tag {};
|
||||||
|
struct avx2_tag {};
|
||||||
|
struct sse42_tag {};
|
||||||
|
struct neon_tag {};
|
||||||
|
struct scalar_tag {};
|
||||||
|
|
||||||
template<typename DST, typename SRC> static inline DST saturate_cast(SRC x);
|
template<typename DST, typename SRC> static inline DST saturate_cast(SRC x);
|
||||||
template<> inline short saturate_cast(int x) { return (std::min)(SHRT_MAX, (std::max)(SHRT_MIN, x)); }
|
template<> inline short saturate_cast(int x) { return (std::min)(SHRT_MAX, (std::max)(SHRT_MIN, x)); }
|
||||||
template<> inline short saturate_cast(float x) { return saturate_cast<short>(static_cast<int>(std::rint(x))); }
|
template<> inline short saturate_cast(float x) { return saturate_cast<short>(static_cast<int>(std::rint(x))); }
|
||||||
@ -116,6 +122,31 @@ static inline Q8_8 mulaw(Q0_16 a, Q8_8 w) { return static_cast<Q8_8>((a * w) >>
|
|||||||
static inline float mulas(float a, float s) { return a * s; }
|
static inline float mulas(float a, float s) { return a * s; }
|
||||||
static inline float mulaw(float a, float w) { return a * w; }
|
static inline float mulaw(float a, float w) { return a * w; }
|
||||||
|
|
||||||
|
static const int ITUR_BT_601_CY = 1220542;
|
||||||
|
static const int ITUR_BT_601_CUB = 2116026;
|
||||||
|
static const int ITUR_BT_601_CUG = -409993;
|
||||||
|
static const int ITUR_BT_601_CVG = -852492;
|
||||||
|
static const int ITUR_BT_601_CVR = 1673527;
|
||||||
|
static const int ITUR_BT_601_SHIFT = 20;
|
||||||
|
|
||||||
|
static inline void uvToRGBuv(const uchar u, const uchar v, int& ruv, int& guv, int& buv) {
|
||||||
|
int uu, vv;
|
||||||
|
uu = static_cast<int>(u) - 128;
|
||||||
|
vv = static_cast<int>(v) - 128;
|
||||||
|
|
||||||
|
ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * vv;
|
||||||
|
guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * vv + ITUR_BT_601_CUG * uu;
|
||||||
|
buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * uu;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void yRGBuvToRGB(const uchar vy, const int ruv, const int guv, const int buv,
|
||||||
|
uchar& r, uchar& g, uchar& b) {
|
||||||
|
int yy = static_cast<int>(vy);
|
||||||
|
int y = std::max(0, yy - 16) * ITUR_BT_601_CY;
|
||||||
|
r = saturate_cast<uchar>((y + ruv) >> ITUR_BT_601_SHIFT);
|
||||||
|
g = saturate_cast<uchar>((y + guv) >> ITUR_BT_601_SHIFT);
|
||||||
|
b = saturate_cast<uchar>((y + buv) >> ITUR_BT_601_SHIFT);
|
||||||
|
}
|
||||||
} // namespace kernels
|
} // namespace kernels
|
||||||
} // namespace gapi
|
} // namespace gapi
|
||||||
} // namespace InferenceEngine
|
} // namespace InferenceEngine
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user