Merge branch 'master' into river/cpu_plugin_api_2.0
This commit is contained in:
commit
91fe9fa5df
@ -449,6 +449,10 @@ jobs:
|
||||
python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml
|
||||
displayName: 'Model Optimizer UT'
|
||||
|
||||
- script: |
|
||||
python3 -m pytest -s $(REPO_DIR)/tools/ovc/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-OpenVinoConversion.xml
|
||||
displayName: 'OpenVino Conversion UT'
|
||||
|
||||
- script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_cpu_func_tests --gtest_filter=*smoke* --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_cpu_func_tests.xml
|
||||
displayName: 'CPU FuncTests'
|
||||
condition: and(succeeded(), eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'OFF'))
|
||||
|
1
.github/CODEOWNERS
vendored
1
.github/CODEOWNERS
vendored
@ -99,6 +99,7 @@
|
||||
/tools/legacy/ @openvinotoolkit/openvino-samples-maintainers
|
||||
/tools/openvino_dev/ @openvinotoolkit/openvino-tools-maintainers @openvinotoolkit/openvino-ie-python-api-maintainers
|
||||
/tools/mo/ @openvinotoolkit/openvino-mo-maintainers
|
||||
/tools/ovc/ @openvinotoolkit/openvino-mo-maintainers
|
||||
/tools/pot/ @openvinotoolkit/openvino-pot-maintainers
|
||||
/thirdparty/open_model_zoo/ @openvinotoolkit/omz-maintainers @openvinotoolkit/openvino-pot-maintainers
|
||||
|
||||
|
1
.github/labeler.yml
vendored
1
.github/labeler.yml
vendored
@ -87,6 +87,7 @@
|
||||
|
||||
'category: MO':
|
||||
- 'tools/mo/**/*'
|
||||
- 'tools/ovc/**/*'
|
||||
|
||||
'category: ONNX FE':
|
||||
- 'src/frontends/onnx/**/*'
|
||||
|
@ -87,7 +87,7 @@ macro(ov_cpack_settings)
|
||||
# - 2022.1.1, 2022.2 do not have debian packages enabled, distributed only as archives
|
||||
# - 2022.3 is the first release where Debian updated packages are introduced, others 2022.3.X are LTS
|
||||
2022.3.0 2022.3.1 2022.3.2 2022.3.3 2022.3.4 2022.3.5
|
||||
2023.0.0
|
||||
2023.0.0 2023.0.1
|
||||
)
|
||||
|
||||
#
|
||||
|
@ -73,7 +73,7 @@ macro(ov_cpack_settings)
|
||||
# - 2022.1.1, 2022.2 do not have rpm packages enabled, distributed only as archives
|
||||
# - 2022.3 is the first release where RPM updated packages are introduced, others 2022.3.X are LTS
|
||||
2022.3.0 2022.3.1 2022.3.2 2022.3.3 2022.3.4 2022.3.5
|
||||
2023.0.0
|
||||
2023.0.0 2023.0.1
|
||||
)
|
||||
|
||||
find_host_program(rpmlint_PROGRAM NAMES rpmlint DOC "Path to rpmlint")
|
||||
|
1
docs/_static/css/custom.css
vendored
1
docs/_static/css/custom.css
vendored
@ -70,6 +70,7 @@ ul#navbar-main-elements li:first-of-type {
|
||||
|
||||
ul#navbar-main-elements > li:hover {
|
||||
text-decoration: underline;
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
|
||||
|
10
docs/_static/css/homepage_style.css
vendored
10
docs/_static/css/homepage_style.css
vendored
@ -3,13 +3,13 @@
|
||||
#openvino-documentation > h1 {
|
||||
display: none;
|
||||
}
|
||||
img {
|
||||
cursor: default;
|
||||
}
|
||||
h1 {
|
||||
font-size: var(--pst-font-size-h2);
|
||||
margin-bottom: 3rem;
|
||||
/*font-size: var(--pst-font-size-h2);*/
|
||||
/*margin-bottom: 3rem;*/
|
||||
display: none!important;
|
||||
}
|
||||
|
||||
|
||||
#ov-homepage-banner, .openvino-diagram, .ov-homepage-higlight-grid {
|
||||
margin-bottom: 90px!important;
|
||||
}
|
||||
|
BIN
docs/_static/download/OV_2023_models_supported.pdf
vendored
BIN
docs/_static/download/OV_2023_models_supported.pdf
vendored
Binary file not shown.
@ -1 +0,0 @@
|
||||
const e=document.getElementById("selector");if(!e)throw new Error("cannot find selector document");window.addEventListener("message",i=>{e.style.height=i.data.height+"px"});var o,n;const t=(n=(o=e.contentDocument)==null?void 0:o.body)==null?void 0:n.offsetHeight;t&&(e.style.height=`${t}px`);
|
1
docs/_static/selector-tool/assets/index-f34d1fad.js
vendored
Normal file
1
docs/_static/selector-tool/assets/index-f34d1fad.js
vendored
Normal file
@ -0,0 +1 @@
|
||||
const e=document.getElementById("selector");if(!e)throw new Error("cannot find selector document");window.addEventListener("message",t=>{t.origin===window.origin&&(t.data.type!=="size"||!t.data.height||(e.style.height=t.data.height+"px"))});var n,o;const i=(o=(n=e.contentDocument)==null?void 0:n.body)==null?void 0:o.offsetHeight;i&&(e.style.height=`${i}px`);
|
File diff suppressed because one or more lines are too long
@ -1,7 +1,7 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="version" content="0290a24" />
|
||||
<meta name="version" content="8db148d" />
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Download Intel® Distribution of OpenVINO™ Toolkit</title>
|
||||
@ -9,14 +9,11 @@
|
||||
name="description"
|
||||
content="Download a version of the Intel® Distribution of OpenVINO™ toolkit for Linux, Windows, or macOS."
|
||||
/>
|
||||
<script
|
||||
type="module"
|
||||
crossorigin
|
||||
src="./assets/selector-363359f4.js"
|
||||
></script>
|
||||
<link rel="stylesheet" href="./assets/selector-5c3f26d1.css" />
|
||||
<script type="module" crossorigin src="./assets/selector-860516f5.js"></script>
|
||||
<link rel="stylesheet" href="./assets/selector-5c3f26d1.css">
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
|
||||
</body>
|
||||
</html>
|
@ -28,6 +28,7 @@ copyright = '2023, Intel®'
|
||||
author = 'Intel®'
|
||||
|
||||
language = 'en'
|
||||
version_name = 'nightly'
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
|
||||
@ -48,7 +49,13 @@ extensions = [
|
||||
|
||||
html_baseurl = 'https://docs.openvino.ai/canonical/'
|
||||
|
||||
# -- Sitemap configuration ---------------------------
|
||||
|
||||
sitemap_url_scheme = "{link}"
|
||||
site_url = f'https://docs.openvino.ai/{version_name}/'
|
||||
|
||||
# ----------------------------------------------------
|
||||
|
||||
|
||||
html_favicon = '_static/favicon.ico'
|
||||
autodoc_default_flags = ['members']
|
||||
|
@ -1,7 +1,6 @@
|
||||
.. OpenVINO Toolkit documentation master file, created by
|
||||
sphinx-quickstart on Wed Jul 7 10:46:56 2021.
|
||||
You can adapt this file completely to your liking, but it should at least
|
||||
contain the root `toctree` directive.
|
||||
============================
|
||||
OpenVINO 2023.0
|
||||
============================
|
||||
|
||||
.. meta::
|
||||
:google-site-verification: _YqumYQ98cmXUTwtzM_0WIIadtDc6r_TMYGbmGgNvrk
|
||||
@ -34,8 +33,6 @@
|
||||
:align: center
|
||||
|
||||
|
||||
|
||||
|
||||
.. grid:: 2 2 3 3
|
||||
:class-container: ov-homepage-higlight-grid
|
||||
|
||||
@ -75,11 +72,8 @@
|
||||
Reach for performance with post-training and training-time compression with NNCF
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Feature Overview
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
##############################
|
||||
|
||||
.. grid:: 1 2 2 2
|
||||
:class-container: ov-homepage-feature-grid
|
||||
@ -109,9 +103,6 @@ Feature Overview
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:hidden:
|
||||
@ -122,4 +113,5 @@ Feature Overview
|
||||
DOCUMENTATION <documentation>
|
||||
MODEL ZOO <model_zoo>
|
||||
RESOURCES <resources>
|
||||
RELEASE NOTES <https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html>
|
||||
RELEASE NOTES <release_notes>
|
||||
|
||||
|
@ -7,5 +7,5 @@ OpenVINO™ Documentation
|
||||
Install <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>
|
||||
Blog <https://blog.openvino.ai/>
|
||||
Forum <https://community.intel.com/t5/Intel-Distribution-of-OpenVINO/bd-p/distribution-openvino-toolkit>
|
||||
Training <https://www.intel.com/content/www/us/en/developer/tools/devcloud/edge/learn/certification.html>
|
||||
Support <https://www.intel.com/content/www/us/en/support/products/96066/software/development-software/openvino-toolkit.html>
|
||||
GitHub <https://github.com/openvinotoolkit>
|
||||
|
@ -20,8 +20,6 @@ To use a GPU device for OpenVINO inference, you must meet the following prerequi
|
||||
- `Intel® Graphics Compiler for OpenCL™ <https://github.com/intel/intel-graphics-compiler>`__
|
||||
- `OpenCL ICD loader package <https://github.com/KhronosGroup/OpenCL-ICD-Loader>`__
|
||||
|
||||
.. _wsl-instal:
|
||||
|
||||
Depending on your operating system, there may be different methods to install the above packages. Below are the instructions on how to install the packages on supported Linux distributions.
|
||||
|
||||
.. tab-set::
|
||||
@ -92,6 +90,8 @@ To check if the driver has been installed:
|
||||
|
||||
Your device driver has been updated and is now ready to use your GPU.
|
||||
|
||||
.. _wsl-install:
|
||||
|
||||
Windows Subsystem for Linux (WSL)
|
||||
#################################
|
||||
|
||||
|
@ -7,22 +7,27 @@ Supported operating systems for the Docker Base image:
|
||||
- Ubuntu 22.04 LTS
|
||||
- Ubuntu 20.04 LTS
|
||||
- RedHat UBI 8
|
||||
- Windows (WSL2)
|
||||
|
||||
.. important::
|
||||
|
||||
While Windows is listed as a supported system, there is no dedicated Docker Image for it. To work with Windows, use Windows Subsystem for Linux (WSL2).
|
||||
|
||||
The `Docker CI framework <https://github.com/openvinotoolkit/docker_ci/>`__ can generate a Dockerfile, build, test, and deploy an image using the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the OpenVINO™ image to your needs. You can get started easily with pre-built and published docker images. Details on how to get started can be found `here <https://github.com/openvinotoolkit/docker_ci/blob/master/get-started.md>`__.
|
||||
|
||||
To start using them, the following conditions must be met:
|
||||
|
||||
- Linux OS or Windows Subsystem for Linux (WSL2)
|
||||
- Linux OS or Windows (under :ref:`Windows Subsystem for Linux (WSL2) <wsl-install>`)
|
||||
- Installed docker engine or compatible container engine
|
||||
- Permissions to run containers (sudo or docker group membership)
|
||||
|
||||
OpenVINO's `Docker <https://docs.docker.com/>`__ and `Bare Metal <https://docs.openvino.ai/2023.0/ovms_docs_deploying_server.html#doxid-ovms-docs-deploying-server>` distributions are identical, so the documentation applies to both.
|
||||
OpenVINO's `Docker <https://docs.docker.com/>`__ and `Bare Metal <https://docs.openvino.ai/2023.0/ovms_docs_deploying_server.html#doxid-ovms-docs-deploying-server>`__ distributions are identical, so the documentation applies to both.
|
||||
|
||||
.. note::
|
||||
|
||||
The OpenVINO development environment in a docker container is also available in the `notebook repository <https://github.com/openvinotoolkit/openvino_notebooks>`__ . It can be implemented in `OpenShift RedHat OpenData Science (RHODS) <https://github.com/openvinotoolkit/operator/blob/main/docs/notebook_in_rhods.md>`__.
|
||||
|
||||
ore information about Docker CI for Intel® Distribution of OpenVINO™ toolset can be found `here <https://github.com/openvinotoolkit/docker_ci/blob/master/README.md>`__
|
||||
More information about Docker CI for Intel® Distribution of OpenVINO™ toolset can be found `here <https://github.com/openvinotoolkit/docker_ci/blob/master/README.md>`__
|
||||
|
||||
* `Docker CI framework for Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/README.md>`__
|
||||
* `Get Started with DockerHub CI for Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/get-started.md>`__
|
||||
|
@ -75,7 +75,7 @@ Step 1: Download and Install OpenVINO Core Components
|
||||
``C:\Program Files (x86)\Intel`` is the recommended folder. You may also use a different path if desired or if you don't have administrator privileges on your computer.
|
||||
|
||||
|
||||
2. Download the `OpenVINO Runtime archive file for Windows <https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/windows/>`__ to your local ``Downloads`` folder.
|
||||
2. Download the `OpenVINO Runtime archive file for Windows <https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/windows/>`__ to your local ``Downloads`` folder.
|
||||
|
||||
If you prefer using command-lines, run the following commands in the command prompt window you opened:
|
||||
|
||||
|
@ -24,9 +24,9 @@ Install OpenVINO
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<script type="module" crossorigin src="_static/selector-tool/assets/index-89e3365b.js"></script>
|
||||
<script type="module" crossorigin src="_static/selector-tool/assets/index-f34d1fad.js"></script>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<iframe id="selector" src="_static/selector-tool/selector-0290a24.html" style="width: 100%; border: none" title="Download Intel® Distribution of OpenVINO™ Toolkit"></iframe>
|
||||
<iframe id="selector" src="_static/selector-tool/selector-8db148d.html" style="width: 100%; border: none" title="Download Intel® Distribution of OpenVINO™ Toolkit"></iframe>
|
||||
|
||||
|
||||
OpenVINO installation package is distributed in two parts: OpenVINO Runtime and OpenVINO Development Tools.
|
||||
|
@ -9,6 +9,7 @@
|
||||
Use Archive <openvino_docs_install_guides_installing_openvino_from_archive_windows>
|
||||
Use PyPI <openvino_docs_install_guides_installing_openvino_pip>
|
||||
Use Conda Forge <openvino_docs_install_guides_installing_openvino_conda>
|
||||
Use Docker <openvino_docs_install_guides_installing_openvino_docker_linux>
|
||||
|
||||
|
||||
If you want to install OpenVINO™ Runtime on Windows, you have the following options:
|
||||
@ -16,6 +17,7 @@ If you want to install OpenVINO™ Runtime on Windows, you have the following op
|
||||
* :doc:`Install OpenVINO Runtime from an Archive File <openvino_docs_install_guides_installing_openvino_from_archive_windows>`
|
||||
* :doc:`Install OpenVINO Runtime using PyPI <openvino_docs_install_guides_installing_openvino_pip>`
|
||||
* :doc:`Install OpenVINO Runtime using Conda Forge <openvino_docs_install_guides_installing_openvino_conda>`
|
||||
* :doc:`Install OpenVINO using Docker <openvino_docs_install_guides_installing_openvino_docker_linux>`
|
||||
|
||||
For a full selection of distribution channels,
|
||||
see the `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
|
||||
|
@ -17,13 +17,23 @@ Please file a github Issue on these with the label “pre-release” so we can g
|
||||
* NOT subject to official support.
|
||||
* Subject to change in the future.
|
||||
* Introduced to allow early testing and get early feedback from the community.
|
||||
|
||||
|
||||
.. dropdown:: OpenVINO Toolkit 2023.0.0.dev20230427
|
||||
|
||||
|
||||
.. dropdown:: OpenVINO Toolkit 2023.1.0.dev20230623
|
||||
:open:
|
||||
:animate: fade-in-slide-down
|
||||
:color: primary
|
||||
|
||||
The first pre-release for OpenVINO 2023.1, focused on fixing bugs and performance issues.
|
||||
|
||||
`Check on GitHub <https://github.com/openvinotoolkit/openvino/releases/tag/2023.1.0.dev20230623>`__
|
||||
|
||||
|
||||
.. dropdown:: OpenVINO Toolkit 2023.0.0.dev20230407
|
||||
:animate: fade-in-slide-down
|
||||
:color: secondary
|
||||
|
||||
Note that a new distribution channel has been introduced for C++ developers: `Conda Forge <https://anaconda.org/conda-forge/openvino>`__
|
||||
(the 2022.3.0 release is available there now).
|
||||
|
||||
|
18
docs/resources/release_notes.md
Normal file
18
docs/resources/release_notes.md
Normal file
@ -0,0 +1,18 @@
|
||||
# Release Notes {#release_notes}
|
||||
|
||||
@sphinxdirective
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<meta http-equiv="Refresh" content="0; url='https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html'" />
|
||||
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
|
||||
prerelease_information
|
||||
|
||||
The official OpenVINO Release Notes are published at `intel.com <https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html>`__
|
||||
|
||||
|
||||
@endsphinxdirective
|
@ -8,7 +8,6 @@
|
||||
:hidden:
|
||||
|
||||
openvino_docs_performance_benchmarks
|
||||
prerelease_information
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
@ -6,12 +6,14 @@ The OpenVINO team continues the effort to support as many models out-of-the-box
|
||||
Based on our research and user feedback, we prioritize the most common models and test them
|
||||
before every release. These models are considered officially supported.
|
||||
|
||||
|
||||
.. button-link:: _static/download/OV_2023_models_supported.pdf
|
||||
:color: primary
|
||||
:outline:
|
||||
|
||||
:material-regular:`download;1.5em` Click for supported models [PDF]
|
||||
|
||||
The list is based on release 2023.0, as of June 01, 2023
|
||||
|
||||
| Note that the list provided here does not include all models supported by OpenVINO.
|
||||
| If your model is not included but is similar to those that are, it is still very likely to work.
|
||||
@ -22,30 +24,6 @@ before every release. These models are considered officially supported.
|
||||
* As OpenVINO™ is open source you can enhance it with your own contribution to the GitHub repository. To learn more, see the articles on :doc:`OpenVINO Extensibility <openvino_docs_Extensibility_UG_Intro>`.
|
||||
|
||||
|
||||
The following table summarizes the number of models supported by OpenVINO™ in different categories:
|
||||
|
||||
=========================================== ====================
|
||||
Model Categories: Number of Models:
|
||||
=========================================== ====================
|
||||
Object Detection 149
|
||||
Instance Segmentation 3
|
||||
Semantic Segmentation 19
|
||||
Image Processing, Enhancement 16
|
||||
Monodepth 2
|
||||
Colorization 2
|
||||
Behavior / Decision Prediction 1
|
||||
Action Recognition 2
|
||||
Time Series Forecasting 1
|
||||
Image Classification 68
|
||||
Image Classification, Dual Path Network 1
|
||||
Image Classification, Emotion 1
|
||||
Image Translation 1
|
||||
Natural language Processing 35
|
||||
Text Detection 18
|
||||
Audio Enhancement 3
|
||||
Sound Classification 2
|
||||
=========================================== ====================
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
|
||||
|
@ -202,25 +202,25 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
|
||||
elif [ "$os" == "centos8" ] || [ "$os" == "rhel8" ] || [ "$os" == "almalinux8.7" ] ; then
|
||||
pkgs_core+=(
|
||||
"https://vault.centos.org/centos/8/AppStream/$arch/os/Packages/tbb-2018.2-9.el8.$arch.rpm"
|
||||
"https://download-ib01.fedoraproject.org/pub/epel/8/Everything/$arch/Packages/p/pugixml-1.13-1.el8.$arch.rpm"
|
||||
"https://dl.fedoraproject.org/pub/epel/8/Everything/$arch/Packages/p/pugixml-1.13-1.el8.$arch.rpm"
|
||||
"https://vault.centos.org/centos/8/PowerTools/$arch/os/Packages/gflags-2.1.2-6.el8.$arch.rpm"
|
||||
)
|
||||
pkgs_gpu+=("http://mirror.centos.org/centos/8-stream/AppStream/$arch/os/Packages/ocl-icd-2.2.12-1.el8.$arch.rpm")
|
||||
pkgs_python+=(python38 python38-pip)
|
||||
pkgs_dev+=(
|
||||
"https://vault.centos.org/centos/8/PowerTools/$arch/os/Packages/gflags-devel-2.1.2-6.el8.$arch.rpm"
|
||||
"https://download-ib01.fedoraproject.org/pub/epel/8/Everything/$arch/Packages/j/json-devel-3.6.1-2.el8.$arch.rpm"
|
||||
"https://dl.fedoraproject.org/pub/epel/8/Everything/$arch/Packages/j/json-devel-3.6.1-2.el8.$arch.rpm"
|
||||
)
|
||||
extra_repos+=("https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm")
|
||||
elif [ "$os" == "rhel9.1" ] ; then
|
||||
pkgs_core=(
|
||||
"http://mirror.stream.centos.org/9-stream/AppStream/$arch/os/Packages/tbb-2020.3-8.el9.$arch.rpm"
|
||||
"https://download-ib01.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/p/pugixml-1.13-1.el9.$arch.rpm"
|
||||
"https://download-ib01.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-2.2.2-9.el9.$arch.rpm"
|
||||
"https://dl.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/p/pugixml-1.13-1.el9.$arch.rpm"
|
||||
"https://dl.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-2.2.2-9.el9.$arch.rpm"
|
||||
)
|
||||
pkgs_gpu+=("https://mirror.stream.centos.org/9-stream/AppStream/$arch/os/Packages/ocl-icd-2.2.13-4.el9.$arch.rpm")
|
||||
pkgs_python=(python3 python3-pip)
|
||||
pkgs_dev+=("https://download-ib01.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-devel-2.2.2-9.el9.$arch.rpm")
|
||||
pkgs_dev+=("https://dl.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-devel-2.2.2-9.el9.$arch.rpm")
|
||||
extra_repos+=("https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm")
|
||||
fi
|
||||
elif [ "$os" == "opensuse-leap15.3" ] ; then
|
||||
|
@ -1,2 +1,3 @@
|
||||
numpy>=1.16.6
|
||||
singledispatchmethod; python_version<'3.8'
|
||||
openvino-telemetry>=2023.0.0
|
||||
|
@ -511,10 +511,8 @@ void InferenceEnginePython::IEExecNetwork::createInferRequests(int num_requests)
|
||||
if (code != InferenceEngine::StatusCode::OK) {
|
||||
IE_EXCEPTION_SWITCH(code,
|
||||
ExceptionType,
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{} <<=
|
||||
std::stringstream{}
|
||||
<< IE_LOCATION
|
||||
<< InferenceEngine::details::ExceptionTraits<ExceptionType>::string());
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{IE_LOCATION_PARAM} <<=
|
||||
std::stringstream{});
|
||||
}
|
||||
|
||||
auto end_time = Time::now();
|
||||
|
@ -5,9 +5,9 @@
|
||||
# mypy: ignore-errors
|
||||
|
||||
|
||||
from openvino.tools.mo.moc_frontend.shape_utils import get_static_shape
|
||||
from openvino.tools.mo.utils.versions_checker import get_environment_setup # pylint: disable=no-name-in-module
|
||||
from openvino.tools.mo.utils.error import Error
|
||||
from openvino.tools.ovc.moc_frontend.shape_utils import get_static_shape
|
||||
from openvino.tools.ovc.environment_setup_utils import get_environment_setup # pylint: disable=no-name-in-module
|
||||
from openvino.tools.ovc.error import Error
|
||||
from distutils.version import LooseVersion
|
||||
import logging as log
|
||||
|
||||
|
@ -67,6 +67,13 @@ from openvino.runtime.ie_api import tensor_from_file
|
||||
from openvino.runtime.ie_api import compile_model
|
||||
|
||||
|
||||
# Model Conversion API
|
||||
try:
|
||||
from openvino.tools.ovc import convert_model, InputCutInfo, LayoutMap
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
# Extend Node class to support binary operators
|
||||
Node.__add__ = opset11.add
|
||||
Node.__sub__ = opset11.subtract
|
||||
|
@ -58,7 +58,7 @@ class InferRequest(_InferRequestWrapper):
|
||||
Tensors for every input in form of:
|
||||
* `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
|
||||
Data that is going to be copied:
|
||||
* `numpy.ndarray` which are not C contiguous
|
||||
* `numpy.ndarray` which are not C contiguous and/or not writable (WRITEABLE flag is set to False)
|
||||
* inputs which data types are mismatched from Infer Request's inputs
|
||||
* inputs that should be in `BF16` data type
|
||||
* scalar inputs (i.e. `np.float_`/`int`/`float`)
|
||||
@ -118,7 +118,7 @@ class InferRequest(_InferRequestWrapper):
|
||||
Tensors for every input in form of:
|
||||
* `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
|
||||
Data that is going to be copied:
|
||||
* `numpy.ndarray` which are not C contiguous
|
||||
* `numpy.ndarray` which are not C contiguous and/or not writable (WRITEABLE flag is set to False)
|
||||
* inputs which data types are mismatched from Infer Request's inputs
|
||||
* inputs that should be in `BF16` data type
|
||||
* scalar inputs (i.e. `np.float_`/`int`/`float`)
|
||||
@ -246,7 +246,7 @@ class CompiledModel(CompiledModelBase):
|
||||
Tensors for every input in form of:
|
||||
* `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
|
||||
Data that is going to be copied:
|
||||
* `numpy.ndarray` which are not C contiguous
|
||||
* `numpy.ndarray` which are not C contiguous and/or not writable (WRITEABLE flag is set to False)
|
||||
* inputs which data types are mismatched from Infer Request's inputs
|
||||
* inputs that should be in `BF16` data type
|
||||
* scalar inputs (i.e. `np.float_`/`int`/`float`)
|
||||
@ -340,7 +340,7 @@ class AsyncInferQueue(AsyncInferQueueBase):
|
||||
Tensors for every input in form of:
|
||||
* `numpy.ndarray` and all the types that are castable to it, e.g. `torch.Tensor`
|
||||
Data that is going to be copied:
|
||||
* `numpy.ndarray` which are not C contiguous
|
||||
* `numpy.ndarray` which are not C contiguous and/or not writable (WRITEABLE flag is set to False)
|
||||
* inputs which data types are mismatched from Infer Request's inputs
|
||||
* inputs that should be in `BF16` data type
|
||||
* scalar inputs (i.e. `np.float_`/`int`/`float`)
|
||||
|
@ -70,6 +70,11 @@ def _(
|
||||
tensor = Tensor(tensor_type, value.shape)
|
||||
tensor.data[:] = value.view(tensor_dtype)
|
||||
return tensor
|
||||
# WA for "not writeable" edge-case, always copy.
|
||||
if value.flags["WRITEABLE"] is False:
|
||||
tensor = Tensor(tensor_type, value.shape)
|
||||
tensor.data[:] = value.astype(tensor_dtype) if tensor_dtype != value.dtype else value
|
||||
return tensor
|
||||
# If types are mismatched, convert and always copy.
|
||||
if tensor_dtype != value.dtype:
|
||||
return Tensor(value.astype(tensor_dtype), shared_memory=False)
|
||||
|
@ -1112,3 +1112,33 @@ def test_mixed_dynamic_infer(device, shared_flag, input_data):
|
||||
else:
|
||||
assert not np.shares_memory(input_data[0], input_tensor0.data)
|
||||
assert not np.shares_memory(input_data[1], input_tensor1.data)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("shared_flag", [True, False])
|
||||
@pytest.mark.parametrize(("input_data", "change_flags"), [
|
||||
({0: np.frombuffer(b"\x01\x02\x03\x04", np.uint8)}, False),
|
||||
({0: np.array([1, 2, 3, 4], dtype=np.uint8)}, True),
|
||||
])
|
||||
def test_not_writable_inputs_infer(device, shared_flag, input_data, change_flags):
|
||||
if change_flags is True:
|
||||
input_data[0].setflags(write=0)
|
||||
# identity model
|
||||
input_shape = [4]
|
||||
param_node = ops.parameter(input_shape, np.uint8, name="data0")
|
||||
core = Core()
|
||||
model = Model(param_node, [param_node])
|
||||
compiled = core.compile_model(model, "CPU")
|
||||
|
||||
results = compiled(input_data, shared_memory=shared_flag)
|
||||
|
||||
assert np.array_equal(results[0], input_data[0])
|
||||
|
||||
request = compiled.create_infer_request()
|
||||
results = request.infer(input_data, shared_memory=shared_flag)
|
||||
|
||||
assert np.array_equal(results[0], input_data[0])
|
||||
|
||||
input_tensor = request.get_input_tensor(0)
|
||||
|
||||
# Not writable inputs should always be copied.
|
||||
assert not np.shares_memory(input_data[0], input_tensor.data)
|
||||
|
@ -175,6 +175,18 @@ PY_INSTALL_CFG = {
|
||||
"install_dir": PY_PACKAGES_DIR,
|
||||
"binary_dir": OPENVINO_PYTHON_BINARY_DIR,
|
||||
},
|
||||
"ovc": {
|
||||
"entry_point": {
|
||||
"console_scripts": [
|
||||
"ovc = openvino.tools.ovc.main:main",
|
||||
],
|
||||
},
|
||||
"name": f"pyopenvino_{PYTHON_VERSION}",
|
||||
"prefix": f"{BUILD_BASE}/site-packages",
|
||||
"source_dir": f"{OPENVINO_SOURCE_DIR}/tools/ovc",
|
||||
"install_dir": PY_PACKAGES_DIR,
|
||||
"binary_dir": "ovc",
|
||||
},
|
||||
# "benchmark_app": { # noqa: E731
|
||||
# "entry_point": { # noqa: E731
|
||||
# "console_scripts": [ # noqa: E731
|
||||
@ -187,18 +199,6 @@ PY_INSTALL_CFG = {
|
||||
# "install_dir": PY_PACKAGES_DIR, # noqa: E731
|
||||
# "binary_dir": "benchmark_app", # noqa: E731
|
||||
# }, # noqa: E731
|
||||
# "model_optimizer": { # noqa: E731
|
||||
# "entry_point": { # noqa: E731
|
||||
# "console_scripts": [ # noqa: E731
|
||||
# "mo = openvino.tools.mo.main:main", # noqa: E731
|
||||
# ], # noqa: E731
|
||||
# }, # noqa: E731
|
||||
# "name": f"pyopenvino_{PYTHON_VERSION}", # noqa: E731
|
||||
# "prefix": f"{BUILD_BASE}/site-packages", # noqa: E731
|
||||
# "source_dir": f"{OPENVINO_SOURCE_DIR}/tools/mo", # noqa: E731
|
||||
# "install_dir": PY_PACKAGES_DIR, # noqa: E731
|
||||
# "binary_dir": "model_optimizer", # noqa: E731
|
||||
# }, # noqa: E731
|
||||
}
|
||||
|
||||
|
||||
|
@ -76,11 +76,19 @@ public:
|
||||
LinearIR::constExprIt loop_end_pos,
|
||||
size_t loop_depth, size_t vector_size);
|
||||
// Return Loop ID
|
||||
template <typename T>
|
||||
size_t mark_loop(LinearIR::constExprIt loop_begin_pos,
|
||||
LinearIR::constExprIt loop_end_pos,
|
||||
size_t work_amount, size_t work_amount_increment, size_t dim_idx,
|
||||
const std::vector<ExpressionPort>& entries,
|
||||
const std::vector<ExpressionPort>& exits);
|
||||
const std::vector<T>& entries,
|
||||
const std::vector<T>& exits) {
|
||||
const auto loop_info = std::make_shared<LoopManager::LoopInfo>(work_amount, work_amount_increment, dim_idx, entries, exits);
|
||||
const auto loop_id = this->add_loop_info(loop_info);
|
||||
for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) {
|
||||
insert_loop_id(*expr_it, loop_id);
|
||||
}
|
||||
return loop_id;
|
||||
}
|
||||
|
||||
void fuse_loops(const LinearIR& linear_ir, size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper = true);
|
||||
void fuse_loops(LinearIR::constExprIt loop_begin_target, LinearIR::constExprIt loop_end_target,
|
||||
@ -123,6 +131,8 @@ public:
|
||||
LinearIR::constExprIt& loop_end_pos,
|
||||
size_t loop_id, bool loop_ops_inserted = false);
|
||||
|
||||
LoopPort get_loop_port_by_expr_port(const ExpressionPort& expr_port, const size_t loop_id);
|
||||
|
||||
private:
|
||||
static void get_io_loop_ports(LinearIR::constExprIt loop_begin_pos,
|
||||
LinearIR::constExprIt loop_end_pos,
|
||||
|
@ -42,6 +42,12 @@ public:
|
||||
FuseLoops();
|
||||
bool run(LinearIR& linear_ir) override;
|
||||
|
||||
// This method checks that all ports which connect lower and upper loops are incremented.
|
||||
// This helps to avoid fusing for the ports with incompleted data
|
||||
static bool loop_ports_are_compatible(const LinearIR::LoopManagerPtr& loop_manager,
|
||||
const size_t loop_lower_id,
|
||||
const size_t loop_upper_id);
|
||||
|
||||
private:
|
||||
static bool can_be_fused(const LinearIR::LoopManager::LoopInfoPtr& loop_current,
|
||||
const LinearIR::LoopManager::LoopInfoPtr& loop_target);
|
||||
|
@ -0,0 +1,46 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "pass.hpp"
|
||||
#include "snippets/lowered/loop_manager.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace snippets {
|
||||
namespace lowered {
|
||||
namespace pass {
|
||||
|
||||
/**
|
||||
* @interface SplitLoops
|
||||
* @brief If loop_1 has larger increment but the same works amount of loop_2, that follows loop_1, then split loop_2
|
||||
* into two loops so the outermost of the split loops could be fused with the loop_1 using the pass `FuseLoops`.
|
||||
* Example:
|
||||
* Loop_1_begin Loop_1_begin Loop_1_begin
|
||||
* ... ... ...
|
||||
* Loop_1_end (wa = 128, inc = 32) Loop_1_end (wa = 128, inc = 32) Split_loop_2_begin
|
||||
* ... Splitting ... Fusing ...
|
||||
* Loop_2_begin => Split_loop_1_begin => Split_loop_2_end (wa = 32, inc = 1)
|
||||
* ... Split_loop_2_begin ...
|
||||
* Loop_2_end (wa = 128, inc = 1) ... Loop_1_end (wa = 128, inc = 32)
|
||||
* Split_loop_2_end (wa = 32, inc = 1)
|
||||
* Split_loop_1_end (wa = 128, inc = 32)
|
||||
* @ingroup snippets
|
||||
*/
|
||||
|
||||
class SplitLoops : public Pass {
|
||||
public:
|
||||
OPENVINO_RTTI("SplitLoops", "Pass")
|
||||
SplitLoops();
|
||||
bool run(LinearIR& linear_ir) override;
|
||||
|
||||
private:
|
||||
static bool can_be_split(const LinearIR::LoopManager::LoopInfoPtr& current,
|
||||
const LinearIR::LoopManager::LoopInfoPtr& target);
|
||||
};
|
||||
|
||||
} // namespace pass
|
||||
} // namespace lowered
|
||||
} // namespace snippets
|
||||
} // namespace ov
|
@ -104,12 +104,14 @@ public:
|
||||
ov::pass::Manager& pre_common,
|
||||
ov::pass::Manager& post_common,
|
||||
ov::pass::Manager& post_precision,
|
||||
lowered::pass::PassPipeline& target_lowered_markup_pipeline,
|
||||
lowered::pass::PassPipeline& target_lowered_pipeline,
|
||||
const void* compile_params = nullptr);
|
||||
snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, const void* compile_params = nullptr);
|
||||
snippets::Schedule generate(ov::pass::Manager& pre_common,
|
||||
ov::pass::Manager& post_common,
|
||||
ov::pass::Manager& post_precision,
|
||||
lowered::pass::PassPipeline& target_lowered_markup_pipeline,
|
||||
lowered::pass::PassPipeline& target_lowered_pipeline,
|
||||
const void* compile_params = nullptr);
|
||||
snippets::Schedule generate(const void* compile_params = nullptr);
|
||||
@ -144,7 +146,9 @@ public:
|
||||
private:
|
||||
void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes);
|
||||
void data_flow_transformations(ov::pass::Manager& pre_common, ov::pass::Manager& post_common, ov::pass::Manager& post_precision);
|
||||
void control_flow_transformations(lowered::LinearIR& linear_ir, lowered::pass::PassPipeline& target_pipeline);
|
||||
void control_flow_transformations(lowered::LinearIR& linear_ir,
|
||||
lowered::pass::PassPipeline& target_markup_pipeline,
|
||||
lowered::pass::PassPipeline& target_pipeline);
|
||||
void init_config();
|
||||
// Count of Subgraph virtual ports:
|
||||
// - Potential non-scalar Constants that will be created after some transformations (At the moment it's relevant only for FakeQuantize decomposition)
|
||||
|
@ -113,6 +113,18 @@ void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir,
|
||||
}
|
||||
}
|
||||
|
||||
LinearIR::LoopManager::LoopPort LinearIR::LoopManager::get_loop_port_by_expr_port(const ExpressionPort& expr_port, const size_t loop_id) {
|
||||
auto get_loop_port = [&](const std::vector<LinearIR::LoopManager::LoopPort>& ports) {
|
||||
auto it = std::find_if(ports.cbegin(), ports.cend(), [&](const LinearIR::LoopManager::LoopPort& p) { return *p.expr_port == expr_port; });
|
||||
if (it == ports.cend())
|
||||
OPENVINO_THROW("Expression has not been found among loop ports. Loop id: " + std::to_string(loop_id));
|
||||
return *it;
|
||||
};
|
||||
const auto& loop_info = get_loop_info(loop_id);
|
||||
return expr_port.get_type() == ExpressionPort::Input ? get_loop_port(loop_info->entry_points)
|
||||
: get_loop_port(loop_info->exit_points);
|
||||
}
|
||||
|
||||
void LinearIR::LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_pos,
|
||||
LinearIR::constExprIt loop_end_pos,
|
||||
std::vector<ExpressionPort> &entries,
|
||||
@ -211,18 +223,6 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos,
|
||||
}
|
||||
}
|
||||
|
||||
size_t LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos,
|
||||
LinearIR::constExprIt loop_end_pos,
|
||||
size_t work_amount, size_t work_amount_increment, size_t dim_idx,
|
||||
const std::vector<ExpressionPort>& entries,
|
||||
const std::vector<ExpressionPort>& exits) {
|
||||
const auto loop_info = std::make_shared<LoopManager::LoopInfo>(work_amount, work_amount_increment, dim_idx, entries, exits);
|
||||
const auto loop_id = this->add_loop_info(loop_info);
|
||||
for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) {
|
||||
insert_loop_id(*expr_it, loop_id);
|
||||
}
|
||||
return loop_id;
|
||||
}
|
||||
void LinearIR::LoopManager::fuse_loops(const LinearIR& linear_ir, size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper) {
|
||||
LinearIR::constExprIt loop_begin_target, loop_end_target;
|
||||
get_loop_bounds(linear_ir, fuse_into_upper ? loop_id_lower : loop_id_upper, loop_begin_target, loop_end_target);
|
||||
|
@ -24,6 +24,23 @@ using LoopInfoPtr = LoopManager::LoopInfoPtr;
|
||||
|
||||
FuseLoops::FuseLoops() : Pass() {}
|
||||
|
||||
bool FuseLoops::loop_ports_are_compatible(const LinearIR::LoopManagerPtr& loop_manager,
|
||||
const size_t loop_lower_id,
|
||||
const size_t loop_upper_id) {
|
||||
const auto loop_lower = loop_manager->get_loop_info(loop_lower_id);
|
||||
for (const auto& entry : loop_lower->entry_points) {
|
||||
const auto& src_port = entry.expr_port->get_port_connector_ptr()->get_source();
|
||||
if (is_loop_id_found(src_port.get_expr()->get_loop_ids(), loop_upper_id)) {
|
||||
if (!entry.is_incremented)
|
||||
return false;
|
||||
auto src_loop_port = loop_manager->get_loop_port_by_expr_port(src_port, loop_upper_id);
|
||||
if (!src_loop_port.is_incremented)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FuseLoops::can_be_fused(const LoopInfoPtr& loop_current, const LoopInfoPtr& loop_target) {
|
||||
auto current_work_amount = loop_current->work_amount;
|
||||
auto target_work_amount = loop_target->work_amount;
|
||||
@ -79,7 +96,7 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo
|
||||
LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos) {
|
||||
const auto& loop_current = loop_manager->get_loop_info(current_loop_id);
|
||||
const auto& loop_target = loop_manager->get_loop_info(target_loop_id);
|
||||
if (!can_be_fused(loop_current, loop_target))
|
||||
if (!can_be_fused(loop_current, loop_target) || !loop_ports_are_compatible(loop_manager, current_loop_id, target_loop_id))
|
||||
return false;
|
||||
|
||||
// We can fuse Loop_up to Loop_down only in cases when other consumers of Loop_up are after Loop_down
|
||||
@ -129,7 +146,7 @@ bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::Loo
|
||||
LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos) {
|
||||
const auto& loop_current = loop_manager->get_loop_info(current_loop_id);
|
||||
const auto& loop_target = loop_manager->get_loop_info(target_loop_id);
|
||||
if (!can_be_fused(loop_current, loop_target))
|
||||
if (!can_be_fused(loop_current, loop_target) || !loop_ports_are_compatible(loop_manager, target_loop_id, current_loop_id))
|
||||
return false;
|
||||
|
||||
// We can fuse Loop_down to Loop_up only in cases when other parents of Loop_down are before Loop_up
|
||||
|
@ -51,10 +51,15 @@ void InitLoops::init_ptr_increments(std::vector<LoopPort>& loop_inputs, std::vec
|
||||
const auto& layout = port->get_descriptor_ptr()->get_layout();
|
||||
const auto& shape = port->get_descriptor_ptr()->get_shape();
|
||||
const auto& dim = *(layout.rbegin() + dim_idx);
|
||||
// Ticket: 113106
|
||||
// WA: the current logic doesn't support the case with transposed output shape for brgemm layer
|
||||
// but for all existing cases planar layout can be used
|
||||
std::vector<size_t> planar(layout.size());
|
||||
std::iota(planar.begin(), planar.end(), 0);
|
||||
loop_output.ptr_increment = 0;
|
||||
// If relevant dim is not broadcasted, then ptr_increment is the dim stride in the new layout
|
||||
if (loop_output.is_incremented && !(shape[dim] == 1 && work_amount != 1)) {
|
||||
loop_output.ptr_increment = get_dim_stride(dim, layout, shape);
|
||||
loop_output.ptr_increment = get_dim_stride(dim, planar, shape);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4,9 +4,10 @@
|
||||
|
||||
#include "snippets/lowered/pass/insert_buffers.hpp"
|
||||
|
||||
#include "snippets/itt.hpp"
|
||||
#include "snippets/lowered/linear_ir.hpp"
|
||||
#include "snippets/snippets_isa.hpp"
|
||||
#include "snippets/itt.hpp"
|
||||
#include "snippets/utils.hpp"
|
||||
|
||||
|
||||
namespace ov {
|
||||
@ -28,6 +29,49 @@ std::vector<size_t> get_buffer_loop_ids(const std::vector<size_t>& lhs, const st
|
||||
}
|
||||
return buffer_loop_ids;
|
||||
}
|
||||
|
||||
// Ticket: 113744
|
||||
// TODO: This logic covers only several specific cases so it should be generalized.
|
||||
ov::Shape compute_allocation_shape(const LinearIR::LoopManagerPtr& loop_manager,
|
||||
const std::vector<size_t>& buffer_loop_ids,
|
||||
const std::vector<size_t>& parent_loop_ids,
|
||||
const ov::Output<ov::Node>& parent_output,
|
||||
const int allocation_rank) {
|
||||
const size_t rank = allocation_rank >= 0 ? allocation_rank : parent_output.get_shape().size();
|
||||
ov::Shape allocation_shape(rank);
|
||||
const auto port = lowered::PortDescriptorUtils::get_port_descriptor_ptr(parent_output);
|
||||
const auto planar_shape = utils::get_reordered_planar_shape(ov::Shape{port->get_shape()}, port->get_layout());
|
||||
for (size_t i = 0; i < rank; ++i) {
|
||||
*(allocation_shape.rbegin() + i) = (planar_shape.rbegin() + i)->get_length();
|
||||
}
|
||||
|
||||
if (buffer_loop_ids.empty() || parent_loop_ids.empty()) {
|
||||
return allocation_shape;
|
||||
}
|
||||
|
||||
auto set_rest_dims_to_ones = [&](const int filled_dims_count) {
|
||||
for (int i = 0; i < static_cast<int>(allocation_shape.size()) - filled_dims_count; ++i) {
|
||||
allocation_shape[i] = 1;
|
||||
}
|
||||
};
|
||||
|
||||
// In some cases it's possible to allocate less shape
|
||||
// 1. Buffer and its parent are in the same loop: allocation size for the outer dimension can be extracted from loop increment
|
||||
// 2. Buffer is outside the parent's loops: allocation size can be extracted from the corresponding loop work amount
|
||||
// TODO: Use general logic with the help of memory counts for allocation shape computation
|
||||
if (buffer_loop_ids.back() == parent_loop_ids.back()) {
|
||||
const auto buffer_loop = loop_manager->get_loop_info(buffer_loop_ids.back());
|
||||
*(allocation_shape.rbegin() + 1) = buffer_loop->increment;
|
||||
set_rest_dims_to_ones(2);
|
||||
} else {
|
||||
for (size_t i = 0; i < std::min(rank, parent_loop_ids.size()); ++i) {
|
||||
const auto loop = loop_manager->get_loop_info(*(parent_loop_ids.rbegin() + i));
|
||||
*(allocation_shape.rbegin() + i) = loop->work_amount;
|
||||
}
|
||||
set_rest_dims_to_ones(static_cast<int>(parent_loop_ids.size()));
|
||||
}
|
||||
return allocation_shape;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
InsertBuffers::InsertBuffers(int32_t buffer_allocation_rank)
|
||||
@ -110,7 +154,12 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt
|
||||
// Current expr Loop identifies: 3, 4, 6
|
||||
// Need to insert between 2nd and 4th Loops - after 2nd Loop
|
||||
const auto pos = insertion_position(linear_ir, loop_manager, parent_expr, expr);
|
||||
const auto buffer = std::make_shared<op::Buffer>(parent->output(parent_port), m_buffer_allocation_rank);
|
||||
const auto allocation_shape = compute_allocation_shape(loop_manager,
|
||||
buffer_loop_ids,
|
||||
parent_loops,
|
||||
parent->output(parent_port),
|
||||
m_buffer_allocation_rank);
|
||||
const auto buffer = std::make_shared<op::Buffer>(parent->output(parent_port), allocation_shape);
|
||||
PortDescriptorUtils::set_port_descriptor_ptr(buffer->output(0), parent_expr_output.get_descriptor_ptr()->clone());
|
||||
// Output connector is automatically filled from PortDescriptor
|
||||
const auto buffer_expr = linear_ir.create_expression(buffer, {input_connector});
|
||||
@ -183,7 +232,12 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt
|
||||
// Note: All potential consumers must have the same count of first equal Loop identifies and the same count of different last identifies
|
||||
const auto pos = insertion_position(linear_ir, loop_manager, expr, (*potential_consumers.begin()).get_expr());
|
||||
|
||||
auto buffer = std::make_shared<op::Buffer>(node->output(port), m_buffer_allocation_rank);
|
||||
const auto allocation_shape = compute_allocation_shape(loop_manager,
|
||||
buffer_loop_ids,
|
||||
current_loops,
|
||||
node->output(port),
|
||||
m_buffer_allocation_rank);
|
||||
auto buffer = std::make_shared<op::Buffer>(node->output(port), allocation_shape);
|
||||
PortDescriptorUtils::set_port_descriptor_ptr(buffer->output(0), exit_port->get_descriptor_ptr()->clone());
|
||||
// We cannot insert Node output connector on Buffer output because not all consumers of Node needs Buffer
|
||||
// Example:
|
||||
|
96
src/common/snippets/src/lowered/pass/split_loops.cpp
Normal file
96
src/common/snippets/src/lowered/pass/split_loops.cpp
Normal file
@ -0,0 +1,96 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "snippets/lowered/pass/split_loops.hpp"
|
||||
|
||||
#include "snippets/lowered/pass/fuse_loops.hpp"
|
||||
#include "snippets/lowered/linear_ir.hpp"
|
||||
#include "snippets/lowered/loop_manager.hpp"
|
||||
#include "snippets/snippets_isa.hpp"
|
||||
#include "snippets/itt.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace snippets {
|
||||
namespace lowered {
|
||||
namespace pass {
|
||||
using LoopManager = LinearIR::LoopManager;
|
||||
using LoopInfoPtr = LoopManager::LoopInfoPtr;
|
||||
|
||||
SplitLoops::SplitLoops() : Pass() {}
|
||||
|
||||
bool SplitLoops::can_be_split(const LoopInfoPtr& current, const LoopInfoPtr& parent) {
|
||||
return current->work_amount == parent->work_amount && current->dim_idx == parent->dim_idx &&
|
||||
current->increment != parent->increment;
|
||||
}
|
||||
|
||||
bool SplitLoops::run(LinearIR& linear_ir) {
|
||||
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::SplitLoops")
|
||||
if (linear_ir.empty())
|
||||
return false;
|
||||
|
||||
const auto& loop_manager = linear_ir.get_loop_manager();
|
||||
bool loop_was_split = false;
|
||||
for (const auto& expr : linear_ir) {
|
||||
const auto& loop_ids = expr->get_loop_ids();
|
||||
if (loop_ids.empty())
|
||||
continue;
|
||||
|
||||
// Ticket: 113755
|
||||
// Note: we currently consider only the outermost loops for splitting
|
||||
// Splitting could also be done in a more general case, but the splitted loop and its parent must always
|
||||
// be in the same set of outer loops. Otherwise they won't be fused.
|
||||
const auto& loop_id = loop_ids.front();
|
||||
const auto loop = loop_manager->get_loop_info(loop_id);
|
||||
for (const auto& entry_point : loop->entry_points) {
|
||||
const auto& parent_port = entry_point.expr_port->get_port_connector_ptr()->get_source();
|
||||
const auto& parent_expr = parent_port.get_expr();
|
||||
const auto parent_loop_ids = parent_expr->get_loop_ids();
|
||||
if (parent_loop_ids.empty())
|
||||
continue;
|
||||
|
||||
const auto& parent_loop_id = parent_loop_ids.front();
|
||||
const auto parent_loop_port = loop_manager->get_loop_port_by_expr_port(parent_port, parent_loop_id);
|
||||
// We don't split loop which are not compatible with parent loop because such loops will not be fused
|
||||
if (!FuseLoops::loop_ports_are_compatible(loop_manager, loop_id, parent_loop_id))
|
||||
continue;
|
||||
|
||||
const auto parent_loop = loop_manager->get_loop_info(parent_loop_id);
|
||||
if (can_be_split(loop, parent_loop)) {
|
||||
loop_was_split = true;
|
||||
const bool split_parent = parent_loop->increment < loop->increment;
|
||||
const auto& loop_to_split = split_parent ? parent_loop : loop;
|
||||
const auto& loop_to_split_id = split_parent ? parent_loop_id : loop_id;
|
||||
const auto& loop_to_fuse = !split_parent ? parent_loop : loop;
|
||||
loop_to_split->work_amount = loop_to_fuse->increment;
|
||||
|
||||
LinearIR::constExprIt loop_begin_pos, loop_end_pos;
|
||||
LoopManager::get_loop_bounds(linear_ir,
|
||||
loop_to_split->entry_points,
|
||||
loop_to_split->exit_points,
|
||||
loop_begin_pos,
|
||||
loop_end_pos,
|
||||
loop_to_split_id);
|
||||
const auto split_loop_id = loop_manager->mark_loop(loop_begin_pos,
|
||||
loop_end_pos,
|
||||
loop_to_fuse->work_amount,
|
||||
loop_to_fuse->increment,
|
||||
loop_to_split->dim_idx,
|
||||
loop_to_split->entry_points,
|
||||
loop_to_split->exit_points);
|
||||
loop_manager->get_loop_info(split_loop_id)->outer_splited_loop = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Ticket: 113666
|
||||
// FuseLoops pass is explicitly run here in order to avoid unnecessary computations
|
||||
// in case if loops are not split but FuseLoops is registered in pass manager after SplitLoops
|
||||
if (loop_was_split)
|
||||
FuseLoops().run(linear_ir);
|
||||
return loop_was_split;
|
||||
}
|
||||
} // namespace pass
|
||||
} // namespace lowered
|
||||
} // namespace snippets
|
||||
} // namespace ov
|
@ -24,6 +24,7 @@
|
||||
#include "snippets/lowered/linear_ir.hpp"
|
||||
#include "snippets/lowered/pass/assign_registers.hpp"
|
||||
#include "snippets/lowered/pass/mark_loops.hpp"
|
||||
#include "snippets/lowered/pass/split_loops.hpp"
|
||||
#include "snippets/lowered/pass/fuse_loops.hpp"
|
||||
#include "snippets/lowered/pass/init_loops.hpp"
|
||||
#include "snippets/lowered/pass/insert_buffers.hpp"
|
||||
@ -507,6 +508,7 @@ void snippets::op::Subgraph::data_flow_transformations(ov::pass::Manager& pre_co
|
||||
}
|
||||
|
||||
void snippets::op::Subgraph::control_flow_transformations(lowered::LinearIR& linear_ir,
|
||||
lowered::pass::PassPipeline& target_markup_pipeline,
|
||||
lowered::pass::PassPipeline& target_pipeline) {
|
||||
INTERNAL_OP_SCOPE(Subgraph);
|
||||
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::control_flow_transformations")
|
||||
@ -514,10 +516,15 @@ void snippets::op::Subgraph::control_flow_transformations(lowered::LinearIR& lin
|
||||
const size_t vector_size = get_generator()->get_target_machine()->get_lanes();
|
||||
const int32_t buffer_allocation_rank = static_cast<int32_t>(linear_ir.get_config().m_loop_depth);
|
||||
|
||||
// Ticket: 113666
|
||||
// TODO: Make pass pipeline with backend passes more flexible
|
||||
target_markup_pipeline.run(linear_ir);
|
||||
|
||||
lowered::pass::PassPipeline common_pipeline;
|
||||
common_pipeline.register_pass<lowered::pass::MarkLoops>(vector_size);
|
||||
common_pipeline.register_pass<lowered::pass::SoftmaxDecomposition>(vector_size);
|
||||
common_pipeline.register_pass<lowered::pass::FuseLoops>();
|
||||
common_pipeline.register_pass<lowered::pass::SplitLoops>();
|
||||
common_pipeline.register_pass<lowered::pass::MoveResultOutOfLoop>();
|
||||
common_pipeline.register_pass<lowered::pass::InsertBuffers>(buffer_allocation_rank);
|
||||
common_pipeline.register_pass<lowered::pass::InsertLoadStore>(vector_size);
|
||||
@ -557,22 +564,24 @@ snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& ou
|
||||
ov::pass::Manager& pre_common,
|
||||
ov::pass::Manager& post_common,
|
||||
ov::pass::Manager& post_precision,
|
||||
lowered::pass::PassPipeline& target_lowered_markup_pipeline,
|
||||
lowered::pass::PassPipeline& target_lowered_pipeline,
|
||||
const void* compile_params) {
|
||||
canonicalize(output_shapes, input_shapes);
|
||||
return generate(pre_common, post_common, post_precision, target_lowered_pipeline, compile_params);
|
||||
return generate(pre_common, post_common, post_precision, target_lowered_markup_pipeline, target_lowered_pipeline, compile_params);
|
||||
}
|
||||
|
||||
snippets::Schedule snippets::op::Subgraph::generate(const void* compile_params) {
|
||||
auto mngr = ov::pass::Manager();
|
||||
auto lowered = lowered::pass::PassPipeline();
|
||||
return generate(mngr, mngr, mngr, lowered, compile_params);
|
||||
return generate(mngr, mngr, mngr, lowered, lowered, compile_params);
|
||||
}
|
||||
|
||||
snippets::Schedule snippets::op::Subgraph::generate(
|
||||
ov::pass::Manager& pre_common,
|
||||
ov::pass::Manager& post_common,
|
||||
ov::pass::Manager& post_precision,
|
||||
lowered::pass::PassPipeline& target_lowered_markup_pipeline,
|
||||
lowered::pass::PassPipeline& target_lowered_pipeline,
|
||||
const void* compile_params) {
|
||||
INTERNAL_OP_SCOPE(Subgraph);
|
||||
@ -587,7 +596,7 @@ snippets::Schedule snippets::op::Subgraph::generate(
|
||||
lowering_config.m_loop_depth = tileRank;
|
||||
|
||||
lowered::LinearIR linear_ir = lowered::LinearIR(body_ptr(), lowering_config);
|
||||
control_flow_transformations(linear_ir, target_lowered_pipeline);
|
||||
control_flow_transformations(linear_ir, target_lowered_markup_pipeline, target_lowered_pipeline);
|
||||
|
||||
// actual code emission
|
||||
const auto& lowering_result = m_generator->generate(linear_ir, lowering_config, compile_params);
|
||||
|
@ -126,7 +126,8 @@ std::shared_ptr<ov::snippets::op::Subgraph> LoweringTests::getLoweredSubgraph(co
|
||||
}
|
||||
body_rt_info["PluginShapesOverride"] = new_shapes;
|
||||
subgraph->set_tile_rank(2);
|
||||
subgraph->generate(pre_dialect, post_precision, post_precision, lowered_pipeline);
|
||||
ov::snippets::lowered::pass::PassPipeline empty_pipeline;
|
||||
subgraph->generate(pre_dialect, post_precision, post_precision, empty_pipeline, lowered_pipeline);
|
||||
return subgraph;
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,23 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <openvino/pass/graph_rewrite.hpp>
|
||||
#include <transformations_visibility.hpp>
|
||||
|
||||
namespace ov {
|
||||
namespace pass {
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief Converts Pad v12 to Pad v1
|
||||
*/
|
||||
class TRANSFORMATIONS_API ConvertPad12ToPad1 : public MatcherPass {
|
||||
public:
|
||||
OPENVINO_RTTI("ConvertPad12ToPad1", "0");
|
||||
ConvertPad12ToPad1();
|
||||
};
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ov
|
@ -0,0 +1,23 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <openvino/pass/graph_rewrite.hpp>
|
||||
#include <transformations_visibility.hpp>
|
||||
|
||||
namespace ov {
|
||||
namespace pass {
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief Converts Pad v12 to Pad v1
|
||||
*/
|
||||
class TRANSFORMATIONS_API ConvertScatterElementsUpdate12ToScatterElementsUpdate3 : public MatcherPass {
|
||||
public:
|
||||
OPENVINO_RTTI("ConvertScatterElementsUpdate12ToScatterElementsUpdate3", "0");
|
||||
ConvertScatterElementsUpdate12ToScatterElementsUpdate3();
|
||||
};
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ov
|
@ -83,12 +83,13 @@
|
||||
#include "transformations/op_conversions/convert_minimum_to_power_and_max.hpp"
|
||||
#include "transformations/op_conversions/convert_mod.hpp"
|
||||
#include "transformations/op_conversions/convert_multiclass_nms_upgrade.hpp"
|
||||
#include "transformations/op_conversions/convert_pad12_downgrade.hpp"
|
||||
#include "transformations/op_conversions/convert_pad_to_group_conv.hpp"
|
||||
#include "transformations/op_conversions/convert_prior_box_v8_to_v0.hpp"
|
||||
#include "transformations/op_conversions/convert_reduce_to_pooling.hpp"
|
||||
#include "transformations/op_conversions/convert_roi_align_v3_to_v9.hpp"
|
||||
#include "transformations/op_conversions/convert_roi_align_v9_to_v3.hpp"
|
||||
#include "transformations/op_conversions/convert_scatter_elements_to_scatter.hpp"
|
||||
#include "transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp"
|
||||
#include "transformations/op_conversions/convert_softmax_downgrade.hpp"
|
||||
#include "transformations/op_conversions/convert_softmax_upgrade.hpp"
|
||||
#include "transformations/op_conversions/convert_space_to_depth.hpp"
|
||||
@ -213,6 +214,8 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr<ov::Model
|
||||
REGISTER_PASS(manager, ConvertXorToLogicalXor)
|
||||
REGISTER_PASS(manager, ConvertTopK11ToTopK3)
|
||||
REGISTER_PASS(manager, ConvertInterpolate11ToInterpolate4)
|
||||
REGISTER_PASS(manager, ConvertPad12ToPad1)
|
||||
REGISTER_PASS(manager, ConvertScatterElementsUpdate12ToScatterElementsUpdate3)
|
||||
|
||||
auto fq_fusions = manager.register_pass<GraphRewrite>();
|
||||
ADD_MATCHER(fq_fusions, FakeQuantizeMulFusion)
|
||||
|
@ -0,0 +1,50 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "transformations/op_conversions/convert_pad12_downgrade.hpp"
|
||||
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <openvino/op/pad.hpp>
|
||||
|
||||
#include "itt.hpp"
|
||||
|
||||
ov::pass::ConvertPad12ToPad1::ConvertPad12ToPad1() {
|
||||
MATCHER_SCOPE(ConvertPad12ToPad1);
|
||||
|
||||
const auto pad_v12_pattern = pattern::wrap_type<ov::op::v12::Pad>();
|
||||
|
||||
const matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
const auto pad_v12 = std::dynamic_pointer_cast<ov::op::v12::Pad>(m.get_match_root());
|
||||
if (!pad_v12 || transformation_callback(pad_v12)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::Node> pad_v1;
|
||||
if (pad_v12->get_input_size() == 4) {
|
||||
pad_v1 = std::make_shared<ov::op::v1::Pad>(pad_v12->input_value(0),
|
||||
pad_v12->input_value(1),
|
||||
pad_v12->input_value(2),
|
||||
pad_v12->input_value(3),
|
||||
pad_v12->get_pad_mode());
|
||||
} else {
|
||||
const auto pad_value =
|
||||
ov::op::v0::Constant::create(pad_v12->input_value(0).get_element_type(), ov::Shape{}, {0});
|
||||
|
||||
pad_v1 = std::make_shared<ov::op::v1::Pad>(pad_v12->input_value(0),
|
||||
pad_v12->input_value(1),
|
||||
pad_v12->input_value(2),
|
||||
pad_value,
|
||||
pad_v12->get_pad_mode());
|
||||
}
|
||||
pad_v1->set_friendly_name(pad_v12->get_friendly_name());
|
||||
copy_runtime_info(pad_v12, pad_v1);
|
||||
replace_node(pad_v12, pad_v1);
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<pattern::Matcher>(pad_v12_pattern, matcher_name);
|
||||
register_matcher(m, callback);
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp"
|
||||
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <openvino/op/scatter_elements_update.hpp>
|
||||
|
||||
#include "itt.hpp"
|
||||
|
||||
ov::pass::ConvertScatterElementsUpdate12ToScatterElementsUpdate3::
|
||||
ConvertScatterElementsUpdate12ToScatterElementsUpdate3() {
|
||||
MATCHER_SCOPE(ConvertScatterElementsUpdate12ToScatterElementsUpdate3);
|
||||
|
||||
const auto seu_v12_pattern = pattern::wrap_type<ov::op::v12::ScatterElementsUpdate>();
|
||||
|
||||
const matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
const auto seu_v12 = std::dynamic_pointer_cast<ov::op::v12::ScatterElementsUpdate>(m.get_match_root());
|
||||
if (!seu_v12 || transformation_callback(seu_v12) ||
|
||||
seu_v12->get_reduction() != ov::op::v12::ScatterElementsUpdate::Reduction::NONE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto seu_v3 = std::make_shared<ov::op::v3::ScatterElementsUpdate>(seu_v12->input_value(0),
|
||||
seu_v12->input_value(1),
|
||||
seu_v12->input_value(2),
|
||||
seu_v12->input_value(3));
|
||||
|
||||
seu_v3->set_friendly_name(seu_v12->get_friendly_name());
|
||||
copy_runtime_info(seu_v12, seu_v3);
|
||||
replace_node(seu_v12, seu_v3);
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<pattern::Matcher>(seu_v12_pattern, matcher_name);
|
||||
register_matcher(m, callback);
|
||||
}
|
@ -0,0 +1,86 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <memory>
|
||||
#include <openvino/opsets/opset1.hpp>
|
||||
#include <openvino/opsets/opset12.hpp>
|
||||
#include <openvino/pass/manager.hpp>
|
||||
#include <transformations/op_conversions/convert_pad12_downgrade.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
std::shared_ptr<ov::Model> create_v12_model(const ov::op::PadMode pad_mode, const int16_t pad_v = -1) {
|
||||
const auto input = std::make_shared<ov::opset12::Parameter>(ov::element::i16, ov::Shape{1, 3, 100, 100});
|
||||
const auto pads_begin =
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{0, 2, 1, 0});
|
||||
const auto pads_end =
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{0, 1, 1, 0});
|
||||
|
||||
std::shared_ptr<ov::opset12::Pad> pad;
|
||||
if (pad_v != -1) {
|
||||
const auto pad_value =
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::i16, ov::Shape{}, std::vector<int16_t>{pad_v});
|
||||
pad = std::make_shared<ov::opset12::Pad>(input, pads_begin, pads_end, pad_value, pad_mode);
|
||||
} else {
|
||||
pad = std::make_shared<ov::opset12::Pad>(input, pads_begin, pads_end, pad_mode);
|
||||
}
|
||||
pad->set_friendly_name("pad12");
|
||||
|
||||
return std::make_shared<ov::Model>(pad->outputs(), ov::ParameterVector{input});
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::Model> create_v1_model(const ov::op::PadMode pad_mode, const int16_t pad_v) {
|
||||
const auto input = std::make_shared<ov::opset1::Parameter>(ov::element::i16, ov::Shape{1, 3, 100, 100});
|
||||
const auto pads_begin =
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{0, 2, 1, 0});
|
||||
const auto pads_end =
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{4}, std::vector<int64_t>{0, 1, 1, 0});
|
||||
const auto pad_value =
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::i16, ov::Shape{}, std::vector<int16_t>{pad_v});
|
||||
|
||||
const auto pad = std::make_shared<ov::opset1::Pad>(input, pads_begin, pads_end, pad_value, pad_mode);
|
||||
pad->set_friendly_name("pad1");
|
||||
|
||||
return std::make_shared<ov::Model>(pad->outputs(), ov::ParameterVector{input});
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST_F(TransformationTestsF, ConvertPad12ToPad1) {
|
||||
manager.register_pass<ov::pass::ConvertPad12ToPad1>();
|
||||
function = create_v12_model(ov::op::PadMode::CONSTANT);
|
||||
function_ref = create_v1_model(ov::op::PadMode::CONSTANT, 0);
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, ConvertPad12ToPad1_explicit_pad_value) {
|
||||
manager.register_pass<ov::pass::ConvertPad12ToPad1>();
|
||||
function = create_v12_model(ov::op::PadMode::CONSTANT, 5);
|
||||
function_ref = create_v1_model(ov::op::PadMode::CONSTANT, 5);
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, ConvertPad12ToPad1_symmetric) {
|
||||
manager.register_pass<ov::pass::ConvertPad12ToPad1>();
|
||||
function = create_v12_model(ov::op::PadMode::SYMMETRIC);
|
||||
function_ref = create_v1_model(ov::op::PadMode::SYMMETRIC, 0);
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, ConvertPad12ToPad1_symmetric_explicit_pad_value) {
|
||||
manager.register_pass<ov::pass::ConvertPad12ToPad1>();
|
||||
function = create_v12_model(ov::op::PadMode::SYMMETRIC, 5);
|
||||
function_ref = create_v1_model(ov::op::PadMode::SYMMETRIC, 5);
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
|
||||
}
|
@ -0,0 +1,78 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <memory>
|
||||
#include <openvino/opsets/opset12.hpp>
|
||||
#include <openvino/opsets/opset3.hpp>
|
||||
#include <openvino/pass/manager.hpp>
|
||||
#include <transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
using namespace testing;
|
||||
|
||||
namespace {
|
||||
using Reduction = ov::opset12::ScatterElementsUpdate::Reduction;
|
||||
|
||||
std::shared_ptr<ov::Model> create_v12_model(const Reduction reduction_type, const bool use_init_value) {
|
||||
const auto input = std::make_shared<ov::opset12::Parameter>(ov::element::f32, ov::Shape{1, 3, 100, 100});
|
||||
const auto indices = std::make_shared<ov::opset12::Parameter>(ov::element::i32, ov::Shape{1, 1, 5, 5});
|
||||
const auto updates = std::make_shared<ov::opset12::Parameter>(ov::element::f32, ov::Shape{1, 1, 5, 5});
|
||||
const auto axis = std::make_shared<ov::opset12::Parameter>(ov::element::i64, ov::Shape{});
|
||||
|
||||
const auto seu = std::make_shared<ov::opset12::ScatterElementsUpdate>(input,
|
||||
indices,
|
||||
updates,
|
||||
axis,
|
||||
reduction_type,
|
||||
use_init_value);
|
||||
|
||||
seu->set_friendly_name("seu12");
|
||||
|
||||
return std::make_shared<ov::Model>(seu->outputs(), ov::ParameterVector{input, indices, updates, axis});
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::Model> create_v3_model() {
|
||||
const auto input = std::make_shared<ov::opset3::Parameter>(ov::element::f32, ov::Shape{1, 3, 100, 100});
|
||||
const auto indices = std::make_shared<ov::opset3::Parameter>(ov::element::i32, ov::Shape{1, 1, 5, 5});
|
||||
const auto updates = std::make_shared<ov::opset3::Parameter>(ov::element::f32, ov::Shape{1, 1, 5, 5});
|
||||
const auto axis = std::make_shared<ov::opset3::Parameter>(ov::element::i64, ov::Shape{});
|
||||
|
||||
const auto seu = std::make_shared<ov::opset3::ScatterElementsUpdate>(input, indices, updates, axis);
|
||||
|
||||
seu->set_friendly_name("seu3");
|
||||
|
||||
return std::make_shared<ov::Model>(seu->outputs(), ov::ParameterVector{input, indices, updates, axis});
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST_F(TransformationTestsF, ConvertScatterElementsUpdate12ToScatterElementsUpdate3_no_reduction_use_init_value) {
|
||||
manager.register_pass<ov::pass::ConvertScatterElementsUpdate12ToScatterElementsUpdate3>();
|
||||
function = create_v12_model(Reduction::NONE, true);
|
||||
function_ref = create_v3_model();
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, ConvertScatterElementsUpdate12ToScatterElementsUpdate3_no_reduction) {
|
||||
manager.register_pass<ov::pass::ConvertScatterElementsUpdate12ToScatterElementsUpdate3>();
|
||||
function = create_v12_model(Reduction::NONE, false);
|
||||
function_ref = create_v3_model();
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, ConvertScatterElementsUpdate12ToScatterElementsUpdate3_reduction_use_init_value) {
|
||||
manager.register_pass<ov::pass::ConvertScatterElementsUpdate12ToScatterElementsUpdate3>();
|
||||
function = create_v12_model(Reduction::MEAN, true);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, ConvertScatterElementsUpdate12ToScatterElementsUpdate3_reduction) {
|
||||
manager.register_pass<ov::pass::ConvertScatterElementsUpdate12ToScatterElementsUpdate3>();
|
||||
function = create_v12_model(Reduction::PROD, false);
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "openvino/op/group_normalization.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace op {
|
||||
namespace v12 {
|
||||
using ov::op::v12::GroupNormalization;
|
||||
} // namespace v12
|
||||
} // namespace op
|
||||
} // namespace ngraph
|
@ -82,7 +82,6 @@
|
||||
#include "ngraph/op/grid_sample.hpp"
|
||||
#include "ngraph/op/grn.hpp"
|
||||
#include "ngraph/op/group_conv.hpp"
|
||||
#include "ngraph/op/group_normalization.hpp"
|
||||
#include "ngraph/op/gru_cell.hpp"
|
||||
#include "ngraph/op/gru_sequence.hpp"
|
||||
#include "ngraph/op/hard_sigmoid.hpp"
|
||||
|
@ -67,7 +67,6 @@ const NGRAPH_API OpSet& get_opset8();
|
||||
const NGRAPH_API OpSet& get_opset9();
|
||||
const NGRAPH_API OpSet& get_opset10();
|
||||
const NGRAPH_API OpSet& get_opset11();
|
||||
const NGRAPH_API OpSet& get_opset12();
|
||||
const NGRAPH_API std::map<std::string, std::function<const ngraph::OpSet&()>>& get_available_opsets();
|
||||
} // namespace ngraph
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
|
@ -1,15 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ngraph/ops.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace opset12 {
|
||||
#define NGRAPH_OP(a, b) using b::a;
|
||||
#include "ngraph/opsets/opset12_tbl.hpp"
|
||||
#undef NGRAPH_OP
|
||||
} // namespace opset12
|
||||
} // namespace ngraph
|
@ -1,12 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#ifndef NGRAPH_OP
|
||||
# warning "NGRAPH_OP not defined"
|
||||
# define NGRAPH_OP(x, y)
|
||||
#endif
|
||||
|
||||
#define _OPENVINO_OP_REG NGRAPH_OP
|
||||
#include "openvino/opsets/opset12_tbl.hpp"
|
||||
#undef _OPENVINO_OP_REG
|
@ -31,6 +31,13 @@ public:
|
||||
bool visit_attributes(AttributeVisitor& visitor) override;
|
||||
|
||||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
|
||||
private:
|
||||
bool evaluate_scatter_elements_update(const HostTensorVector& outputs, const HostTensorVector& inputs) const;
|
||||
};
|
||||
} // namespace v3
|
||||
namespace v12 {
|
||||
@ -80,7 +87,12 @@ public:
|
||||
|
||||
bool has_evaluate() const override;
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
|
||||
private:
|
||||
bool evaluate_scatter_elements_update(const HostTensorVector& outputs, const HostTensorVector& inputs) const;
|
||||
Reduction m_reduction = Reduction::NONE;
|
||||
bool m_use_init_val = true;
|
||||
};
|
||||
|
@ -33,12 +33,9 @@ public:
|
||||
bool evaluate_upper(TensorVector& output_values) const override;
|
||||
bool evaluate_label(TensorLabelVector& output_labels) const override;
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
|
||||
private:
|
||||
bool evaluate_scatter_element_update(const HostTensorVector& outputs, const HostTensorVector& inputs) const;
|
||||
protected:
|
||||
bool is_supported_index_input_element_type() const;
|
||||
int64_t get_normalized_axis(const HostTensorVector& inputs) const;
|
||||
};
|
||||
} // namespace util
|
||||
} // namespace op
|
||||
|
@ -4,26 +4,57 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cfenv>
|
||||
#include <cstring>
|
||||
#include <iterator>
|
||||
|
||||
#include "ngraph/check.hpp"
|
||||
#include "ngraph/coordinate_transform.hpp"
|
||||
#include "ngraph/shape.hpp"
|
||||
#include "openvino/op/scatter_elements_update.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace runtime {
|
||||
namespace reference {
|
||||
using Reduction = ov::op::v12::ScatterElementsUpdate::Reduction;
|
||||
|
||||
template <typename DataType, typename IndicesType>
|
||||
void scatter_elem_update_with_reduction(const DataType* input_data,
|
||||
const IndicesType* indices,
|
||||
const DataType* updates,
|
||||
const int64_t axis,
|
||||
DataType* out_buf,
|
||||
const Shape& data_shape,
|
||||
const Shape& indices_shape,
|
||||
const ov::op::v12::ScatterElementsUpdate::Reduction reduction_type,
|
||||
const bool use_init_val);
|
||||
|
||||
template <typename DataType, typename IndicesType>
|
||||
void scatter_elem_update(const DataType* input_data,
|
||||
const IndicesType* indices,
|
||||
const DataType* updates,
|
||||
const int64_t& axis,
|
||||
const int64_t axis,
|
||||
DataType* out_buf,
|
||||
const Shape& data_shape,
|
||||
const Shape& indices_shape) {
|
||||
const Shape& indices_shape,
|
||||
const Reduction reduction_type = Reduction::NONE,
|
||||
const bool use_init_val = true) {
|
||||
// Copy inputs to out
|
||||
std::memcpy(out_buf, input_data, sizeof(DataType) * shape_size(data_shape));
|
||||
|
||||
if (reduction_type != Reduction::NONE) {
|
||||
scatter_elem_update_with_reduction(input_data,
|
||||
indices,
|
||||
updates,
|
||||
axis,
|
||||
out_buf,
|
||||
data_shape,
|
||||
indices_shape,
|
||||
reduction_type,
|
||||
use_init_val);
|
||||
return;
|
||||
}
|
||||
|
||||
// 3D example
|
||||
// output[indices[i][j][k]][j][k] = updates[i][j][k] if axis = 0,
|
||||
// output[i][indices[i][j][k]][k] = updates[i][j][k] if axis = 1,
|
||||
@ -43,6 +74,160 @@ void scatter_elem_update(const DataType* input_data,
|
||||
out_buf[out_idx] = updates[indices_idx];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T reduction_neutral_value(const Reduction reduction_type) {
|
||||
switch (reduction_type) {
|
||||
case Reduction::MAX:
|
||||
return std::numeric_limits<T>::min();
|
||||
case Reduction::MIN:
|
||||
return std::numeric_limits<T>::max();
|
||||
case Reduction::PROD:
|
||||
return T{1};
|
||||
case Reduction::SUM:
|
||||
case Reduction::MEAN:
|
||||
return T{0};
|
||||
default:
|
||||
OPENVINO_THROW("Neutral value not available for this type of reduction");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::function<T(const T, const T)> reduction_functor_for(const Reduction reduction_type) {
|
||||
switch (reduction_type) {
|
||||
case Reduction::MAX:
|
||||
return [](const T a, const T b) {
|
||||
return a > b ? a : b;
|
||||
};
|
||||
case Reduction::MIN:
|
||||
return [](const T a, const T b) {
|
||||
return a < b ? a : b;
|
||||
};
|
||||
case Reduction::PROD:
|
||||
return std::multiplies<T>{};
|
||||
case Reduction::SUM:
|
||||
case Reduction::MEAN:
|
||||
return std::plus<T>{};
|
||||
default:
|
||||
OPENVINO_THROW("No functor available for this type of reduction");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
std::function<char(const char, const char)> reduction_functor_for<char>(const Reduction reduction_type) {
|
||||
switch (reduction_type) {
|
||||
case Reduction::MAX:
|
||||
return [](const char a, const char b) {
|
||||
return a > b ? a : b;
|
||||
};
|
||||
case Reduction::MIN:
|
||||
return [](const char a, const char b) {
|
||||
return a < b ? a : b;
|
||||
};
|
||||
case Reduction::PROD:
|
||||
return [](const char a, const char b) {
|
||||
return static_cast<bool>(a) && static_cast<bool>(b);
|
||||
};
|
||||
case Reduction::SUM:
|
||||
return [](const char a, const char b) {
|
||||
return static_cast<bool>(a) || static_cast<bool>(b);
|
||||
};
|
||||
default:
|
||||
OPENVINO_THROW("No functor available for this type of reduction");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if<std::is_floating_point<T>::value || std::is_class<T>::value, T>::type arithmetic_mean(
|
||||
const T accumulator,
|
||||
const int32_t N) {
|
||||
return accumulator / N;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if<std::is_integral<T>::value, T>::type arithmetic_mean(const T accumulator, const int32_t N) {
|
||||
const auto old_mode = std::fegetround();
|
||||
std::fesetround(FE_DOWNWARD);
|
||||
const T value = static_cast<T>(std::nearbyint(static_cast<double>(accumulator) / N));
|
||||
std::fesetround(old_mode);
|
||||
return value;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t normalize_index(const T idx, const size_t dim_value) {
|
||||
if (idx < 0) {
|
||||
return static_cast<size_t>(idx + dim_value);
|
||||
} else {
|
||||
return static_cast<size_t>(idx);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DataType, typename IndicesType>
|
||||
void scatter_elem_update_with_reduction(const DataType* input_data,
|
||||
const IndicesType* indices,
|
||||
const DataType* updates,
|
||||
const int64_t axis,
|
||||
DataType* out_buf,
|
||||
const Shape& data_shape,
|
||||
const Shape& indices_shape,
|
||||
const Reduction reduction_type,
|
||||
const bool use_init_val) {
|
||||
CoordinateTransformBasic indices_transform{indices_shape};
|
||||
CoordinateTransformBasic data_transform{data_shape};
|
||||
const auto indices_strides = row_major_strides(indices_shape);
|
||||
const auto data_strides = row_major_strides(data_shape);
|
||||
|
||||
struct Offsets {
|
||||
size_t idx_offset;
|
||||
size_t out_offset;
|
||||
};
|
||||
|
||||
std::vector<Offsets> idx_to_output_element;
|
||||
idx_to_output_element.reserve(shape_size(indices_shape));
|
||||
for (const Coordinate& indices_cord : indices_transform) {
|
||||
const size_t indices_offset =
|
||||
std::inner_product(indices_cord.begin(), indices_cord.end(), indices_strides.begin(), uint64_t(0));
|
||||
Coordinate out_cord(indices_cord);
|
||||
out_cord.at(axis) = normalize_index(indices[indices_offset], data_shape[axis]);
|
||||
const auto out_offset = std::inner_product(out_cord.begin(), out_cord.end(), data_strides.begin(), uint64_t(0));
|
||||
|
||||
idx_to_output_element.push_back({indices_offset, out_offset});
|
||||
}
|
||||
|
||||
// When this is false we need to substitute the copied values at target locations with values that will not affect
|
||||
// the particular reduction algorithms. Effectively what happens here is setting the initial value
|
||||
// for the reduction accumulators.
|
||||
if (!use_init_val) {
|
||||
const auto value = reduction_neutral_value<DataType>(reduction_type);
|
||||
for (const auto& offsets : idx_to_output_element) {
|
||||
out_buf[offsets.out_offset] = value;
|
||||
}
|
||||
}
|
||||
|
||||
// keeps the count of numbers included in the initial sums accumulated in the output tensor (reduction: MEAN)
|
||||
// the values in this map will later be used to divide the sums and calculate the final means
|
||||
// the key is the output tensor's element index and the value is the count
|
||||
std::unordered_map<size_t, int32_t> mean_reduction_counters;
|
||||
|
||||
const auto reduce = reduction_functor_for<DataType>(reduction_type);
|
||||
for (const auto& offsets : idx_to_output_element) {
|
||||
out_buf[offsets.out_offset] = reduce(out_buf[offsets.out_offset], updates[offsets.idx_offset]);
|
||||
if (reduction_type == Reduction::MEAN) {
|
||||
mean_reduction_counters[offsets.out_offset] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (reduction_type == Reduction::MEAN) {
|
||||
for (const auto& counter : mean_reduction_counters) {
|
||||
// include the initial value in the arithmetic mean divisor (if needed)
|
||||
const auto N = counter.second + static_cast<int32_t>(use_init_val);
|
||||
out_buf[counter.first] = arithmetic_mean<DataType>(out_buf[counter.first], N);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace reference
|
||||
} // namespace runtime
|
||||
} // namespace ngraph
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <scatter_elements_update_shape_inference.hpp>
|
||||
|
||||
#include "itt.hpp"
|
||||
#include "ngraph/runtime/reference/scatter_elements_update.hpp"
|
||||
#include "openvino/core/validation_util.hpp"
|
||||
|
||||
using namespace std;
|
||||
@ -86,11 +87,246 @@ shared_ptr<Node> op::v12::ScatterElementsUpdate::clone_with_new_inputs(const Out
|
||||
}
|
||||
|
||||
bool op::v12::ScatterElementsUpdate::has_evaluate() const {
|
||||
if (m_reduction != Reduction::NONE) {
|
||||
return false;
|
||||
} else {
|
||||
return ScatterElementsUpdateBase::has_evaluate();
|
||||
return ScatterElementsUpdateBase::has_evaluate() ||
|
||||
(get_output_element_type(0) == element::boolean && is_supported_index_input_element_type());
|
||||
}
|
||||
|
||||
namespace scatter_elements_update {
|
||||
namespace {
|
||||
template <element::Type_t DT, element::Type_t IT, element::Type_t AT>
|
||||
bool evaluate(const HostTensorPtr& data,
|
||||
const HostTensorPtr& indices,
|
||||
const HostTensorPtr& updates,
|
||||
const HostTensorPtr& axis,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis,
|
||||
const op::v12::ScatterElementsUpdate::Reduction reduction_type,
|
||||
const bool use_init_value) {
|
||||
using DataType = typename element_type_traits<DT>::value_type;
|
||||
using IndicesType = typename element_type_traits<IT>::value_type;
|
||||
|
||||
out->set_shape(data->get_shape());
|
||||
|
||||
ngraph::runtime::reference::scatter_elem_update<DataType, IndicesType>(data->get_data_ptr<DT>(),
|
||||
indices->get_data_ptr<IT>(),
|
||||
updates->get_data_ptr<DT>(),
|
||||
normalized_axis,
|
||||
out->get_data_ptr<DT>(),
|
||||
data->get_shape(),
|
||||
indices->get_shape(),
|
||||
reduction_type,
|
||||
use_init_value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define TYPE_AXS_CASE(a, ...) \
|
||||
case element::Type_t::a: { \
|
||||
OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_axs, _, a)); \
|
||||
rc = evaluate<DT, IT, element::Type_t::a>(__VA_ARGS__); \
|
||||
} break;
|
||||
|
||||
template <element::Type_t DT, element::Type_t IT>
|
||||
bool evaluate(const HostTensorPtr& arg0,
|
||||
const HostTensorPtr& arg1,
|
||||
const HostTensorPtr& arg2,
|
||||
const HostTensorPtr& arg3,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis,
|
||||
const op::v12::ScatterElementsUpdate::Reduction reduction_type,
|
||||
const bool use_init_value) {
|
||||
auto axis_type = arg3->get_element_type();
|
||||
|
||||
// Dispatch specialization based on axis data type.
|
||||
bool rc = true;
|
||||
|
||||
switch (axis_type) {
|
||||
TYPE_AXS_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
TYPE_AXS_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
TYPE_AXS_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
TYPE_AXS_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
TYPE_AXS_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
TYPE_AXS_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
TYPE_AXS_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
TYPE_AXS_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
default:
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
#define TYPE_IND_CASE(a, ...) \
|
||||
case element::Type_t::a: { \
|
||||
OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_ind, _, a)); \
|
||||
rc = evaluate<DT, element::Type_t::a>(__VA_ARGS__); \
|
||||
} break;
|
||||
|
||||
template <element::Type_t DT>
|
||||
bool evaluate(const HostTensorPtr& arg0,
|
||||
const HostTensorPtr& arg1,
|
||||
const HostTensorPtr& arg2,
|
||||
const HostTensorPtr& arg3,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis,
|
||||
const op::v12::ScatterElementsUpdate::Reduction reduction_type,
|
||||
const bool use_init_value) {
|
||||
auto indices_type = arg1->get_element_type();
|
||||
|
||||
// Dispatch specialization based on indicies data type.
|
||||
bool rc = true;
|
||||
|
||||
switch (indices_type) {
|
||||
TYPE_IND_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
TYPE_IND_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
TYPE_IND_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
TYPE_IND_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
TYPE_IND_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
TYPE_IND_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
TYPE_IND_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
TYPE_IND_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis, reduction_type, use_init_value);
|
||||
default:
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
bool evaluate_scatter_elements_update(
|
||||
const HostTensorPtr& arg0,
|
||||
const HostTensorPtr& arg1,
|
||||
const HostTensorPtr& arg2,
|
||||
const HostTensorPtr& arg3,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis,
|
||||
const op::v12::ScatterElementsUpdate::Reduction reduction_type = op::v12::ScatterElementsUpdate::Reduction::NONE,
|
||||
const bool use_init_value = false) {
|
||||
bool rc = true;
|
||||
|
||||
switch (out->get_element_type()) {
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
|
||||
i16,
|
||||
arg0,
|
||||
arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
out,
|
||||
normalized_axis,
|
||||
reduction_type,
|
||||
use_init_value);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
|
||||
i32,
|
||||
arg0,
|
||||
arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
out,
|
||||
normalized_axis,
|
||||
reduction_type,
|
||||
use_init_value);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
|
||||
i64,
|
||||
arg0,
|
||||
arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
out,
|
||||
normalized_axis,
|
||||
reduction_type,
|
||||
use_init_value);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
|
||||
u32,
|
||||
arg0,
|
||||
arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
out,
|
||||
normalized_axis,
|
||||
reduction_type,
|
||||
use_init_value);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
|
||||
u64,
|
||||
arg0,
|
||||
arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
out,
|
||||
normalized_axis,
|
||||
reduction_type,
|
||||
use_init_value);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
|
||||
f16,
|
||||
arg0,
|
||||
arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
out,
|
||||
normalized_axis,
|
||||
reduction_type,
|
||||
use_init_value);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
|
||||
f32,
|
||||
arg0,
|
||||
arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
out,
|
||||
normalized_axis,
|
||||
reduction_type,
|
||||
use_init_value);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update,
|
||||
boolean,
|
||||
arg0,
|
||||
arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
out,
|
||||
normalized_axis,
|
||||
reduction_type,
|
||||
use_init_value);
|
||||
default:
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
} // namespace
|
||||
} // namespace scatter_elements_update
|
||||
|
||||
bool op::v3::ScatterElementsUpdate::evaluate_scatter_elements_update(const HostTensorVector& outputs,
|
||||
const HostTensorVector& inputs) const {
|
||||
const auto normalized_axis = get_normalized_axis(inputs);
|
||||
|
||||
return scatter_elements_update::evaluate_scatter_elements_update(inputs[0],
|
||||
inputs[1],
|
||||
inputs[2],
|
||||
inputs[3],
|
||||
outputs[0],
|
||||
normalized_axis);
|
||||
}
|
||||
|
||||
bool op::v12::ScatterElementsUpdate::evaluate_scatter_elements_update(const HostTensorVector& outputs,
|
||||
const HostTensorVector& inputs) const {
|
||||
const auto normalized_axis = get_normalized_axis(inputs);
|
||||
|
||||
return scatter_elements_update::evaluate_scatter_elements_update(inputs[0],
|
||||
inputs[1],
|
||||
inputs[2],
|
||||
inputs[3],
|
||||
outputs[0],
|
||||
normalized_axis,
|
||||
m_reduction,
|
||||
m_use_init_val);
|
||||
}
|
||||
|
||||
bool op::v3::ScatterElementsUpdate::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
|
||||
OV_OP_SCOPE(v3_ScatterElementsUpdate_evaluate);
|
||||
return evaluate_scatter_elements_update(outputs, inputs);
|
||||
}
|
||||
|
||||
bool op::v12::ScatterElementsUpdate::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
|
||||
OV_OP_SCOPE(v12_ScatterElementsUpdate_evaluate);
|
||||
return evaluate_scatter_elements_update(outputs, inputs);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
@ -8,7 +8,6 @@
|
||||
|
||||
#include "bound_evaluate.hpp"
|
||||
#include "itt.hpp"
|
||||
#include "ngraph/runtime/reference/scatter_elements_update.hpp"
|
||||
#include "openvino/core/validation_util.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -59,31 +58,35 @@ bool op::util::ScatterElementsUpdateBase::has_evaluate() const {
|
||||
OV_OP_SCOPE(util_ScatterElementsUpdateBase_has_evaluate);
|
||||
|
||||
switch (get_output_element_type(0)) {
|
||||
case ngraph::element::i16:
|
||||
case ngraph::element::i32:
|
||||
case ngraph::element::i64:
|
||||
case ngraph::element::u32:
|
||||
case ngraph::element::u64:
|
||||
case ngraph::element::f16:
|
||||
case ngraph::element::f32:
|
||||
case element::i16:
|
||||
case element::i32:
|
||||
case element::i64:
|
||||
case element::u32:
|
||||
case element::u64:
|
||||
case element::f16:
|
||||
case element::f32:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return is_supported_index_input_element_type();
|
||||
}
|
||||
|
||||
bool op::util::ScatterElementsUpdateBase::is_supported_index_input_element_type() const {
|
||||
switch (get_input_element_type(1)) {
|
||||
case ngraph::element::i8:
|
||||
case ngraph::element::i16:
|
||||
case ngraph::element::i32:
|
||||
case ngraph::element::i64:
|
||||
case ngraph::element::u8:
|
||||
case ngraph::element::u16:
|
||||
case ngraph::element::u32:
|
||||
case ngraph::element::u64:
|
||||
break;
|
||||
case element::i8:
|
||||
case element::i16:
|
||||
case element::i32:
|
||||
case element::i64:
|
||||
case element::u8:
|
||||
case element::u16:
|
||||
case element::u32:
|
||||
case element::u64:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool op::util::ScatterElementsUpdateBase::evaluate_lower(ov::TensorVector& output_values) const {
|
||||
@ -104,126 +107,7 @@ bool op::util::ScatterElementsUpdateBase::evaluate_label(TensorLabelVector& outp
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
|
||||
namespace scatter_element_update {
|
||||
namespace {
|
||||
template <element::Type_t DT, element::Type_t IT, element::Type_t AT>
|
||||
bool evaluate(const HostTensorPtr& data,
|
||||
const HostTensorPtr& indices,
|
||||
const HostTensorPtr& updates,
|
||||
const HostTensorPtr& axis,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis) {
|
||||
using DataType = typename element_type_traits<DT>::value_type;
|
||||
using IndicesType = typename element_type_traits<IT>::value_type;
|
||||
|
||||
out->set_shape(data->get_shape());
|
||||
|
||||
ngraph::runtime::reference::scatter_elem_update<DataType, IndicesType>(data->get_data_ptr<DT>(),
|
||||
indices->get_data_ptr<IT>(),
|
||||
updates->get_data_ptr<DT>(),
|
||||
normalized_axis,
|
||||
out->get_data_ptr<DT>(),
|
||||
data->get_shape(),
|
||||
indices->get_shape());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define TYPE_AXS_CASE(a, ...) \
|
||||
case element::Type_t::a: { \
|
||||
OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_axs, _, a)); \
|
||||
rc = evaluate<DT, IT, element::Type_t::a>(__VA_ARGS__); \
|
||||
} break;
|
||||
|
||||
template <element::Type_t DT, element::Type_t IT>
|
||||
bool evaluate(const HostTensorPtr& arg0,
|
||||
const HostTensorPtr& arg1,
|
||||
const HostTensorPtr& arg2,
|
||||
const HostTensorPtr& arg3,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis) {
|
||||
auto axis_type = arg3->get_element_type();
|
||||
|
||||
// Dispatch specialization based on axis data type.
|
||||
bool rc = true;
|
||||
|
||||
switch (axis_type) {
|
||||
TYPE_AXS_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
default:
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
#define TYPE_IND_CASE(a, ...) \
|
||||
case element::Type_t::a: { \
|
||||
OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_ind, _, a)); \
|
||||
rc = evaluate<DT, element::Type_t::a>(__VA_ARGS__); \
|
||||
} break;
|
||||
|
||||
template <element::Type_t DT>
|
||||
bool evaluate(const HostTensorPtr& arg0,
|
||||
const HostTensorPtr& arg1,
|
||||
const HostTensorPtr& arg2,
|
||||
const HostTensorPtr& arg3,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis) {
|
||||
auto indices_type = arg1->get_element_type();
|
||||
|
||||
// Dispatch specialization based on indicies data type.
|
||||
bool rc = true;
|
||||
|
||||
switch (indices_type) {
|
||||
TYPE_IND_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
default:
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
bool evaluate_scatter_element_update(const HostTensorPtr& arg0,
|
||||
const HostTensorPtr& arg1,
|
||||
const HostTensorPtr& arg2,
|
||||
const HostTensorPtr& arg3,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis) {
|
||||
bool rc = true;
|
||||
|
||||
switch (out->get_element_type()) {
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, u32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, u64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, f16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, f32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
default:
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
} // namespace
|
||||
} // namespace scatter_element_update
|
||||
|
||||
bool op::util::ScatterElementsUpdateBase::evaluate_scatter_element_update(const HostTensorVector& outputs,
|
||||
const HostTensorVector& inputs) const {
|
||||
int64_t op::util::ScatterElementsUpdateBase::get_normalized_axis(const HostTensorVector& inputs) const {
|
||||
NGRAPH_CHECK(inputs[3]->get_element_type().is_integral_number(), "axis element type is not integral data type");
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
@ -243,20 +127,7 @@ bool op::util::ScatterElementsUpdateBase::evaluate_scatter_element_update(const
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
}
|
||||
|
||||
return scatter_element_update::evaluate_scatter_element_update(inputs[0],
|
||||
inputs[1],
|
||||
inputs[2],
|
||||
inputs[3],
|
||||
outputs[0],
|
||||
normalized_axis);
|
||||
return normalized_axis;
|
||||
}
|
||||
|
||||
bool op::util::ScatterElementsUpdateBase::evaluate(const HostTensorVector& outputs,
|
||||
const HostTensorVector& inputs) const {
|
||||
OV_OP_SCOPE(util_ScatterElementsUpdate_evaluate);
|
||||
return evaluate_scatter_element_update(outputs, inputs);
|
||||
}
|
||||
|
||||
} // namespace op
|
||||
} // namespace ov
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include "itt.hpp"
|
||||
#include "ngraph/deprecated.hpp"
|
||||
#include "ngraph/log.hpp"
|
||||
#include "ngraph/ops.hpp"
|
||||
#include "openvino/op/ops.hpp"
|
||||
#include "openvino/opsets/opset.hpp"
|
||||
#include "openvino/util/log.hpp"
|
||||
|
||||
@ -64,8 +64,7 @@ const std::map<std::string, std::function<const ngraph::OpSet&()>>& ngraph::get_
|
||||
_NGRAPH_REG_OPSET(opset8),
|
||||
_NGRAPH_REG_OPSET(opset9),
|
||||
_NGRAPH_REG_OPSET(opset10),
|
||||
_NGRAPH_REG_OPSET(opset11),
|
||||
_NGRAPH_REG_OPSET(opset12)};
|
||||
_NGRAPH_REG_OPSET(opset11)};
|
||||
#undef _NGRAPH_REG_OPSET
|
||||
return opset_map;
|
||||
}
|
||||
@ -275,8 +274,3 @@ const ngraph::OpSet& ngraph::get_opset11() {
|
||||
static OpSet opset(ov::get_opset11());
|
||||
return opset;
|
||||
}
|
||||
|
||||
const ngraph::OpSet& ngraph::get_opset12() {
|
||||
static OpSet opset(ov::get_opset12());
|
||||
return opset;
|
||||
}
|
||||
|
@ -1218,14 +1218,18 @@ TEST(eval, max_pool_v1_dynamic) {
|
||||
vector<float> out{1, 2, 2, 2, 3, 3, 3, 2, 2, 2, 2, 0};
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_basic) {
|
||||
template <class T>
|
||||
class ScatterElementsUpdateEvalTest : public ::testing::Test {};
|
||||
TYPED_TEST_SUITE_P(ScatterElementsUpdateEvalTest);
|
||||
|
||||
TYPED_TEST_P(ScatterElementsUpdateEvalTest, evaluate_static_scatter_elements_update_basic) {
|
||||
const Shape data_shape{3, 3};
|
||||
const Shape indices_shape{2, 3};
|
||||
auto arg1 = make_shared<op::Parameter>(element::f32, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::f32, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update = make_shared<op::v3::ScatterElementsUpdate>(arg1, arg2, arg3, arg4);
|
||||
auto scatter_elements_update = make_shared<TypeParam>(arg1, arg2, arg3, arg4);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate(
|
||||
@ -1241,7 +1245,7 @@ TEST(eval, evaluate_static_scatter_elements_update_basic) {
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_dynamic_scatter_elements_update_basic) {
|
||||
TYPED_TEST_P(ScatterElementsUpdateEvalTest, evaluate_dynamic_scatter_elements_update_basic) {
|
||||
const Shape data_shape{3, 3};
|
||||
const Shape indices_shape{2, 3};
|
||||
|
||||
@ -1250,7 +1254,7 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_basic) {
|
||||
auto arg3 = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, PartialShape::dynamic());
|
||||
|
||||
auto scatter_elements_update = make_shared<op::v3::ScatterElementsUpdate>(arg1, arg2, arg3, arg4);
|
||||
auto scatter_elements_update = make_shared<TypeParam>(arg1, arg2, arg3, arg4);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate(
|
||||
@ -1267,7 +1271,7 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_basic) {
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_dynamic_scatter_elements_update_negative_axis) {
|
||||
TYPED_TEST_P(ScatterElementsUpdateEvalTest, evaluate_dynamic_scatter_elements_update_negative_axis) {
|
||||
const Shape data_shape{3, 3};
|
||||
const Shape indices_shape{2, 3};
|
||||
const Shape axis_shape{};
|
||||
@ -1277,7 +1281,7 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_negative_axis) {
|
||||
auto arg3 = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, PartialShape::dynamic());
|
||||
|
||||
auto scatter_elements_update = make_shared<op::v3::ScatterElementsUpdate>(arg1, arg2, arg3, arg4);
|
||||
auto scatter_elements_update = make_shared<TypeParam>(arg1, arg2, arg3, arg4);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate(
|
||||
@ -1294,7 +1298,7 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_negative_axis) {
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_dynamic_scatter_elements_update_1d_axis) {
|
||||
TYPED_TEST_P(ScatterElementsUpdateEvalTest, evaluate_dynamic_scatter_elements_update_1d_axis) {
|
||||
const Shape data_shape{3, 3};
|
||||
const Shape indices_shape{2, 3};
|
||||
|
||||
@ -1303,7 +1307,7 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_1d_axis) {
|
||||
auto arg3 = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, PartialShape::dynamic());
|
||||
|
||||
auto scatter_elements_update = make_shared<op::v3::ScatterElementsUpdate>(arg1, arg2, arg3, arg4);
|
||||
auto scatter_elements_update = make_shared<TypeParam>(arg1, arg2, arg3, arg4);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate(
|
||||
@ -1321,7 +1325,7 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_1d_axis) {
|
||||
}
|
||||
|
||||
// Disabled test for disabled reference implementation
|
||||
TEST(eval, DISABLED_evaluate_dynamic_scatter_elements_update_3d_i16) {
|
||||
TYPED_TEST_P(ScatterElementsUpdateEvalTest, DISABLED_evaluate_dynamic_scatter_elements_update_3d_i16) {
|
||||
const Shape data_shape{3, 3, 3};
|
||||
const Shape indices_shape{2, 2, 3};
|
||||
|
||||
@ -1330,7 +1334,7 @@ TEST(eval, DISABLED_evaluate_dynamic_scatter_elements_update_3d_i16) {
|
||||
auto arg3 = make_shared<op::Parameter>(element::i16, PartialShape::dynamic());
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, PartialShape::dynamic());
|
||||
|
||||
auto scatter_elements_update = make_shared<op::v3::ScatterElementsUpdate>(arg1, arg2, arg3, arg4);
|
||||
auto scatter_elements_update = make_shared<TypeParam>(arg1, arg2, arg3, arg4);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(
|
||||
@ -1348,7 +1352,7 @@ TEST(eval, DISABLED_evaluate_dynamic_scatter_elements_update_3d_i16) {
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_dynamic_scatter_elements_update_one_elem_i32) {
|
||||
TYPED_TEST_P(ScatterElementsUpdateEvalTest, evaluate_dynamic_scatter_elements_update_one_elem_i32) {
|
||||
const Shape data_shape{3, 3, 3};
|
||||
const Shape indices_shape{1, 1, 1};
|
||||
|
||||
@ -1357,7 +1361,7 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_one_elem_i32) {
|
||||
auto arg3 = make_shared<op::Parameter>(element::i32, PartialShape::dynamic());
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, PartialShape::dynamic());
|
||||
|
||||
auto scatter_elements_update = make_shared<op::v3::ScatterElementsUpdate>(arg1, arg2, arg3, arg4);
|
||||
auto scatter_elements_update = make_shared<TypeParam>(arg1, arg2, arg3, arg4);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(
|
||||
@ -1375,6 +1379,505 @@ TEST(eval, evaluate_dynamic_scatter_elements_update_one_elem_i32) {
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
REGISTER_TYPED_TEST_SUITE_P(ScatterElementsUpdateEvalTest,
|
||||
evaluate_dynamic_scatter_elements_update_one_elem_i32,
|
||||
DISABLED_evaluate_dynamic_scatter_elements_update_3d_i16,
|
||||
evaluate_dynamic_scatter_elements_update_1d_axis,
|
||||
evaluate_dynamic_scatter_elements_update_negative_axis,
|
||||
evaluate_dynamic_scatter_elements_update_basic,
|
||||
evaluate_static_scatter_elements_update_basic);
|
||||
|
||||
using OpVersions = ::testing::Types<ov::op::v3::ScatterElementsUpdate, ov::op::v12::ScatterElementsUpdate>;
|
||||
INSTANTIATE_TYPED_TEST_SUITE_P(eval, ScatterElementsUpdateEvalTest, OpVersions);
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_reduction_sum) {
|
||||
const Shape data_shape{10};
|
||||
const Shape indices_shape{4};
|
||||
auto arg1 = make_shared<op::Parameter>(element::f32, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::f32, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::SUM);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate(
|
||||
{result_tensor},
|
||||
{make_host_tensor<element::Type_t::f32>(data_shape,
|
||||
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {5, 0, 7, 5}),
|
||||
make_host_tensor<element::Type_t::f32>(indices_shape, {5.0f, 6.0f, 1.5f, -5.0f}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::f32);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<float>(result_tensor);
|
||||
const vector<float> out{6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 8.5f, 8.0f, 9.0f};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_reduction_prod_exclusive) {
|
||||
const Shape data_shape{10};
|
||||
const Shape indices_shape{4};
|
||||
auto arg1 = make_shared<op::Parameter>(element::f32, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::f32, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::PROD,
|
||||
false);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate(
|
||||
{result_tensor},
|
||||
{make_host_tensor<element::Type_t::f32>(data_shape,
|
||||
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {1, 9, 4, 9}),
|
||||
make_host_tensor<element::Type_t::f32>(indices_shape, {5.0f, 6.0f, 1.5f, -2.0f}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::f32);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<float>(result_tensor);
|
||||
const vector<float> out{0.0f, 5.0f, 2.0f, 3.0f, 1.5f, 5.0f, 6.0f, 7.0f, 8.0f, -12.0f};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_reduction_mean) {
|
||||
const Shape data_shape{3, 3};
|
||||
const Shape indices_shape{2, 2};
|
||||
auto arg1 = make_shared<op::Parameter>(element::f32, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::f32, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::MEAN,
|
||||
true);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate(
|
||||
{result_tensor},
|
||||
{make_host_tensor<element::Type_t::f32>(data_shape, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {2, 2, 0, 1}),
|
||||
make_host_tensor<element::Type_t::f32>(indices_shape, {10.f, 21.f, 25.f, 38.f}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {1})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::f32);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<float>(result_tensor);
|
||||
const vector<float> out{1.0f, 2.0f, 11.33333f, 14.5f, 21.5f, 6.0f, 7.0f, 8.0f, 9.0f};
|
||||
for (size_t i = 0; i < cval.size(); ++i)
|
||||
EXPECT_NEAR(cval[i], out[i], 1e-5f);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_reduction_mean_exclusive) {
|
||||
const Shape data_shape{3, 3};
|
||||
const Shape indices_shape{2, 2};
|
||||
auto arg1 = make_shared<op::Parameter>(element::f32, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::f32, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::MEAN,
|
||||
false);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate(
|
||||
{result_tensor},
|
||||
{make_host_tensor<element::Type_t::f32>(data_shape, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {2, 2, 0, 1}),
|
||||
make_host_tensor<element::Type_t::f32>(indices_shape, {10.f, 21.f, 25.f, 38.f}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {1})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::f32);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<float>(result_tensor);
|
||||
const vector<float> out{1.0f, 2.0f, 15.5f, 25.f, 38.f, 6.0f, 7.0f, 8.0f, 9.0f};
|
||||
for (size_t i = 0; i < cval.size(); ++i)
|
||||
EXPECT_NEAR(cval[i], out[i], 1e-5f);
|
||||
}
|
||||
|
||||
TEST(eval, DISABLED_evaluate_static_scatter_elements_update_reduction_mean_ints) {
|
||||
// on MAC rounding towards -infinity doesn't work as expected, to be investigated
|
||||
const Shape data_shape{3, 3};
|
||||
const Shape indices_shape{2, 2};
|
||||
auto arg1 = make_shared<op::Parameter>(element::i32, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::MEAN,
|
||||
true);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate({result_tensor},
|
||||
{make_host_tensor<element::Type_t::i32>(data_shape, {1, 2, 3, 4, -5, 6, 7, 8, 9}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 1}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {-6, -2, 600, -120}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::i32);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<int32_t>(result_tensor);
|
||||
const vector<int32_t> out{-3, 2, 3, 4, -43, 6, 303, 8, 9};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_reduction_min) {
|
||||
const Shape data_shape{9};
|
||||
const Shape indices_shape{9};
|
||||
auto arg1 = make_shared<op::Parameter>(element::i32, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::MIN,
|
||||
true);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(
|
||||
fun->evaluate({result_tensor},
|
||||
{make_host_tensor<element::Type_t::i32>(data_shape, {-1000, 2, 3, 4, -5, 6, 7, -2, 8}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 5, 6, 7, 0}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {-999, 1, 3, 5, -4, 6, 8, 9, -1001}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::i32);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<int32_t>(result_tensor);
|
||||
const vector<int32_t> out{-1001, 1, 3, 4, -5, 6, 7, -2, 8};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_reduction_max) {
|
||||
const Shape data_shape{9};
|
||||
const Shape indices_shape{9};
|
||||
auto arg1 = make_shared<op::Parameter>(element::i32, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::MAX,
|
||||
true);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(
|
||||
fun->evaluate({result_tensor},
|
||||
{make_host_tensor<element::Type_t::i32>(data_shape, {-1000, 2, 3, 4, -5, 6, 7, -2, 8}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 5, 6, 7, 0}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {-999, 1, 3, 5, -4, 6, 8, 9, -1001}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::i32);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<int32_t>(result_tensor);
|
||||
const vector<int32_t> out{-999, 2, 3, 5, -4, 6, 8, 9, 8};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_reduction_max_exclusive) {
|
||||
const Shape data_shape{9};
|
||||
const Shape indices_shape{9};
|
||||
auto arg1 = make_shared<op::Parameter>(element::i32, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::MAX,
|
||||
false);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(
|
||||
fun->evaluate({result_tensor},
|
||||
{make_host_tensor<element::Type_t::i32>(data_shape, {1000, 2, 3, 4, -5, 6, 7, -2, 8}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {0, 2, 1, 3, 7, 5, 6, 7, 0}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {999, 10, 20, 30, -40, 6, 8, 9, 555}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::i32);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<int32_t>(result_tensor);
|
||||
const vector<int32_t> out{999, 20, 10, 30, -5, 6, 8, 9, 8};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_boolean_sum) {
|
||||
const Shape data_shape{5};
|
||||
const Shape indices_shape{6};
|
||||
auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::SUM,
|
||||
true);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate({result_tensor},
|
||||
{make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 0, 1, 0}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 1}),
|
||||
make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 0, 0, 1, 1, 1}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<char>(result_tensor);
|
||||
const vector<char> out{1, 1, 0, 1, 1};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_boolean_sum_exclusive) {
|
||||
const Shape data_shape{5};
|
||||
const Shape indices_shape{6};
|
||||
auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::SUM,
|
||||
false);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate({result_tensor},
|
||||
{make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 1, 1, 0}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 4, 4, 0}),
|
||||
make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 1, 0, 1, 1, 1}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<char>(result_tensor);
|
||||
const vector<char> out{1, 1, 0, 1, 1};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_boolean_prod) {
|
||||
const Shape data_shape{5};
|
||||
const Shape indices_shape{6};
|
||||
auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::PROD,
|
||||
true);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate({result_tensor},
|
||||
{make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 0, 1, 1}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 1}),
|
||||
make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 0, 1, 1, 0, 1}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<char>(result_tensor);
|
||||
const vector<char> out{0, 0, 0, 1, 0};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_boolean_prod_exclusive) {
|
||||
const Shape data_shape{5};
|
||||
const Shape indices_shape{6};
|
||||
auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::PROD,
|
||||
false);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate({result_tensor},
|
||||
{make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 1, 1, 0}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 4, 4, 0}),
|
||||
make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 0, 1, 1, 1, 1}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<char>(result_tensor);
|
||||
const vector<char> out{0, 0, 1, 1, 1};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_boolean_min) {
|
||||
const Shape data_shape{6};
|
||||
const Shape indices_shape{8};
|
||||
auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::MIN,
|
||||
true);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate({result_tensor},
|
||||
{make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 0, 1, 1, 0}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 4, 5, 5}),
|
||||
make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 0, 0, 1, 0, 1, 1, 0}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<char>(result_tensor);
|
||||
const vector<char> out{0, 0, 0, 1, 0, 0};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_boolean_min_exclusive) {
|
||||
const Shape data_shape{6};
|
||||
const Shape indices_shape{8};
|
||||
auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::MIN,
|
||||
false);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate({result_tensor},
|
||||
{make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 1, 0, 1, 0}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 4, 5, 5}),
|
||||
make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 0, 1, 1, 0, 1, 1, 1}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<char>(result_tensor);
|
||||
const vector<char> out{0, 0, 1, 1, 0, 1};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_boolean_max) {
|
||||
const Shape data_shape{6};
|
||||
const Shape indices_shape{8};
|
||||
auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::MAX,
|
||||
true);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate({result_tensor},
|
||||
{make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 0, 1, 1, 0}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 4, 5, 5}),
|
||||
make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 1, 0, 1, 0, 1, 0, 0}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<char>(result_tensor);
|
||||
const vector<char> out{1, 1, 0, 1, 1, 0};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_boolean_max_exclusive) {
|
||||
const Shape data_shape{6};
|
||||
const Shape indices_shape{8};
|
||||
auto arg1 = make_shared<op::Parameter>(element::boolean, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::boolean, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::MAX,
|
||||
false);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate({result_tensor},
|
||||
{make_host_tensor<element::Type_t::boolean>(data_shape, {1, 0, 1, 0, 1, 0}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {0, 1, 2, 3, 4, 4, 5, 5}),
|
||||
make_host_tensor<element::Type_t::boolean>(indices_shape, {0, 1, 1, 0, 0, 1, 0, 0}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::boolean);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<char>(result_tensor);
|
||||
const vector<char> out{0, 1, 1, 0, 1, 0};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, evaluate_static_scatter_elements_update_reduction_sum_negative_idx) {
|
||||
const Shape data_shape{10};
|
||||
const Shape indices_shape{4};
|
||||
auto arg1 = make_shared<op::Parameter>(element::f32, data_shape);
|
||||
auto arg2 = make_shared<op::Parameter>(element::i32, indices_shape);
|
||||
auto arg3 = make_shared<op::Parameter>(element::f32, indices_shape);
|
||||
auto arg4 = make_shared<op::Parameter>(element::i64, Shape{});
|
||||
auto scatter_elements_update =
|
||||
make_shared<ov::op::v12::ScatterElementsUpdate>(arg1,
|
||||
arg2,
|
||||
arg3,
|
||||
arg4,
|
||||
ov::op::v12::ScatterElementsUpdate::Reduction::SUM);
|
||||
auto fun = make_shared<Function>(OutputVector{scatter_elements_update}, ParameterVector{arg1, arg2, arg3, arg4});
|
||||
auto result_tensor = make_shared<HostTensor>();
|
||||
ASSERT_TRUE(fun->evaluate(
|
||||
{result_tensor},
|
||||
{make_host_tensor<element::Type_t::f32>(data_shape,
|
||||
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}),
|
||||
make_host_tensor<element::Type_t::i32>(indices_shape, {-5, 0, -3, -5}),
|
||||
make_host_tensor<element::Type_t::f32>(indices_shape, {5.0f, 6.0f, 1.5f, -5.0f}),
|
||||
make_host_tensor<element::Type_t::i64>({}, {0})}));
|
||||
EXPECT_EQ(result_tensor->get_element_type(), element::f32);
|
||||
EXPECT_EQ(result_tensor->get_shape(), data_shape);
|
||||
const auto cval = read_vector<float>(result_tensor);
|
||||
const vector<float> out{6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 8.5f, 8.0f, 9.0f};
|
||||
ASSERT_EQ(cval, out);
|
||||
}
|
||||
|
||||
TEST(eval, topk_v1) {
|
||||
Shape shape{2, 3, 2};
|
||||
Shape rshape{2, 2, 2};
|
||||
|
@ -67,7 +67,7 @@ _OPENVINO_OP_REG(Greater, ngraph::op::v1)
|
||||
_OPENVINO_OP_REG(GreaterEqual, ngraph::op::v1)
|
||||
_OPENVINO_OP_REG(GroupConvolution, ngraph::op::v1)
|
||||
_OPENVINO_OP_REG(GroupConvolutionBackpropData, ngraph::op::v1)
|
||||
_OPENVINO_OP_REG(GroupNormalization, ngraph::op::v12)
|
||||
_OPENVINO_OP_REG(GroupNormalization, ov::op::v12)
|
||||
_OPENVINO_OP_REG(HardSigmoid, ngraph::op::v0)
|
||||
_OPENVINO_OP_REG(Interpolate, ngraph::op::v0)
|
||||
_OPENVINO_OP_REG(Interpolate, ngraph::op::v4)
|
||||
|
@ -164,8 +164,8 @@ protected:
|
||||
if (sts != OK) {
|
||||
IE_EXCEPTION_SWITCH(sts,
|
||||
ExceptionType,
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{} <<=
|
||||
std::stringstream{} << IE_LOCATION << desc.msg)
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{IE_LOCATION_PARAM} <<=
|
||||
std::stringstream{} << desc.msg)
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
_ptr = std::shared_ptr<T>(object, [](T* ptr) {
|
||||
|
@ -328,7 +328,14 @@ IE_SUPPRESS_DEPRECATED_END
|
||||
namespace details {
|
||||
template <typename ExceptionType>
|
||||
struct ExceptionTraits;
|
||||
}
|
||||
|
||||
template <>
|
||||
struct INFERENCE_ENGINE_1_0_DEPRECATED ExceptionTraits<InferenceEngineException> {
|
||||
static const char* string() {
|
||||
return "";
|
||||
}
|
||||
};
|
||||
} // namespace details
|
||||
|
||||
#define INFERENCE_ENGINE_DECLARE_EXCEPTION(ExceptionType, statusCode) \
|
||||
struct INFERENCE_ENGINE_1_0_DEPRECATED INFERENCE_ENGINE_API_CLASS(ExceptionType) final \
|
||||
@ -400,20 +407,45 @@ namespace details {
|
||||
/**
|
||||
* @brief Tag struct used to throw exception
|
||||
*/
|
||||
#ifndef NDEBUG
|
||||
template <typename ExceptionType>
|
||||
struct INFERENCE_ENGINE_1_0_DEPRECATED ThrowNow final {
|
||||
[[noreturn]] void operator<<=(const std::ostream& ostream) {
|
||||
std::ostringstream stream;
|
||||
stream << ostream.rdbuf();
|
||||
const char* const file;
|
||||
const int line;
|
||||
|
||||
[[noreturn]] static void create(const std::ostream& ostream, const char* file, int line) {
|
||||
std::stringstream stream;
|
||||
stream << '\n' << file << ':' << line << ' ';
|
||||
stream << ExceptionTraits<ExceptionType>::string() << ' ' << ostream.rdbuf();
|
||||
throw ExceptionType{stream.str()};
|
||||
}
|
||||
|
||||
[[noreturn]] void operator<<=(const std::ostream& ostream) {
|
||||
create(ostream, file, line);
|
||||
}
|
||||
};
|
||||
#else
|
||||
template <typename ExceptionType>
|
||||
struct INFERENCE_ENGINE_1_0_DEPRECATED ThrowNow final {
|
||||
[[noreturn]] static void create(const std::ostream& ostream) {
|
||||
std::stringstream stream;
|
||||
stream << ExceptionTraits<ExceptionType>::string() << ' ' << ostream.rdbuf();
|
||||
throw ExceptionType{stream.str()};
|
||||
}
|
||||
|
||||
[[noreturn]] void operator<<=(const std::ostream& ostream) {
|
||||
create(ostream);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
/// @cond
|
||||
#ifndef NDEBUG
|
||||
# define IE_LOCATION '\n' << __FILE__ << ':' << __LINE__ << ' '
|
||||
# define IE_LOCATION '\n' << __FILE__ << ':' << __LINE__ << ' '
|
||||
# define IE_LOCATION_PARAM __FILE__, __LINE__
|
||||
#else
|
||||
# define IE_LOCATION ""
|
||||
# define IE_LOCATION_PARAM
|
||||
#endif // NDEBUG
|
||||
|
||||
// WARNING: DO NOT USE THIS MACRO! Use openvino/util/pp.hpp macro library
|
||||
@ -430,13 +462,10 @@ struct INFERENCE_ENGINE_1_0_DEPRECATED ThrowNow final {
|
||||
// ENDWARNING
|
||||
|
||||
#define IE_THROW_0() \
|
||||
InferenceEngine::details::ThrowNow<InferenceEngine::GeneralError>{} <<= std::stringstream{} << IE_LOCATION
|
||||
(InferenceEngine::details::ThrowNow<InferenceEngine::GeneralError>{IE_LOCATION_PARAM}) <<= std::stringstream {}
|
||||
|
||||
#define IE_THROW_1(ExceptionType) \
|
||||
InferenceEngine::details::ThrowNow<InferenceEngine::ExceptionType>{} <<= \
|
||||
std::stringstream{} << IE_LOCATION \
|
||||
<< InferenceEngine::details::ExceptionTraits<InferenceEngine::ExceptionType>::string() \
|
||||
<< ' '
|
||||
#define IE_THROW_1(ExceptionType) \
|
||||
(InferenceEngine::details::ThrowNow<InferenceEngine::ExceptionType>{IE_LOCATION_PARAM}) <<= std::stringstream {}
|
||||
/// @endcond
|
||||
|
||||
/**
|
||||
@ -452,7 +481,7 @@ struct INFERENCE_ENGINE_1_0_DEPRECATED ThrowNow final {
|
||||
#ifdef NDEBUG
|
||||
# define IE_ASSERT(EXPRESSION) \
|
||||
if (!(EXPRESSION)) \
|
||||
IE_THROW(GeneralError) << " AssertionFailed: " << #EXPRESSION
|
||||
IE_THROW(GeneralError) << " AssertionError " #EXPRESSION
|
||||
#else
|
||||
/**
|
||||
* @private
|
||||
@ -470,9 +499,9 @@ struct NullStream {
|
||||
#endif // NDEBUG
|
||||
|
||||
/// @cond
|
||||
#define THROW_IE_EXCEPTION \
|
||||
InferenceEngine::details::ThrowNow<InferenceEngine::details::InferenceEngineException>{} <<= std::stringstream{} \
|
||||
<< IE_LOCATION
|
||||
#define THROW_IE_EXCEPTION \
|
||||
(InferenceEngine::details::ThrowNow<InferenceEngine::details::InferenceEngineException>{IE_LOCATION_PARAM}) <<= \
|
||||
std::stringstream {}
|
||||
|
||||
#define IE_EXCEPTION_CASE(TYPE_ALIAS, STATUS_CODE, EXCEPTION_TYPE, ...) \
|
||||
case InferenceEngine::STATUS_CODE: { \
|
||||
|
@ -59,15 +59,16 @@ namespace InferenceEngine {
|
||||
}
|
||||
|
||||
#define CALL_STATUS_FNC(function, ...) \
|
||||
if (!actual) \
|
||||
if (!actual) { \
|
||||
IE_THROW() << "Wrapper used was not initialized."; \
|
||||
} \
|
||||
ResponseDesc resp; \
|
||||
auto res = actual->function(__VA_ARGS__, &resp); \
|
||||
if (res != OK) \
|
||||
IE_EXCEPTION_SWITCH( \
|
||||
res, \
|
||||
ExceptionType, \
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << IE_LOCATION << resp.msg)
|
||||
(InferenceEngine::details::ThrowNow<ExceptionType>{IE_LOCATION_PARAM}) <<= std::stringstream{} << resp.msg)
|
||||
|
||||
#define CALL_STATUS_FNC_NO_ARGS(function) \
|
||||
if (!actual) \
|
||||
@ -75,8 +76,9 @@ namespace InferenceEngine {
|
||||
ResponseDesc resp; \
|
||||
auto res = actual->function(&resp); \
|
||||
if (res != OK) \
|
||||
IE_EXCEPTION_SWITCH(res, \
|
||||
ExceptionType, \
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << IE_LOCATION)
|
||||
IE_EXCEPTION_SWITCH( \
|
||||
res, \
|
||||
ExceptionType, \
|
||||
(InferenceEngine::details::ThrowNow<ExceptionType>{IE_LOCATION_PARAM}) <<= std::stringstream{})
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
@ -31,10 +31,9 @@ std::shared_ptr<T> CreateExtensionFromLibrary(std::shared_ptr<void> _so) {
|
||||
ResponseDesc desc;
|
||||
StatusCode sts = reinterpret_cast<CreateF*>(create)(object, &desc);
|
||||
if (sts != OK) {
|
||||
IE_EXCEPTION_SWITCH(
|
||||
sts,
|
||||
ExceptionType,
|
||||
details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << IE_LOCATION << desc.msg)
|
||||
IE_EXCEPTION_SWITCH(sts,
|
||||
ExceptionType,
|
||||
details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << desc.msg)
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
_ptr = std::shared_ptr<T>(object, [](T* ptr) {
|
||||
|
@ -18,14 +18,14 @@ public:
|
||||
TO_STATUS(IE_EXCEPTION_SWITCH(
|
||||
statusCode,
|
||||
ExceptionType,
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << IE_LOCATION))
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{IE_LOCATION_PARAM} <<= std::stringstream{}))
|
||||
}
|
||||
|
||||
static InferenceEngine::StatusCode toStatusWrapperMsg(std::string& msg, InferenceEngine::ResponseDesc* resp) {
|
||||
TO_STATUS(IE_EXCEPTION_SWITCH(
|
||||
statusCode,
|
||||
ExceptionType,
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << IE_LOCATION << msg))
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{IE_LOCATION_PARAM} <<= std::stringstream{} << msg))
|
||||
}
|
||||
};
|
||||
|
||||
@ -72,7 +72,7 @@ TEST_F(ExceptionTests, throwAfterConvertStatusToClassContainMessage) {
|
||||
std::string refMessage = "Exception message!";
|
||||
auto actual = std::make_shared<WrapperClass<StatusCode::NOT_ALLOCATED>>();
|
||||
try {
|
||||
CALL_STATUS_FNC(toStatusWrapperMsg, refMessage)
|
||||
CALL_STATUS_FNC(toStatusWrapperMsg, refMessage);
|
||||
} catch (const NotAllocated& iex) {
|
||||
std::string actualMessage = iex.what();
|
||||
ASSERT_TRUE(actualMessage.find(refMessage) != std::string::npos);
|
||||
|
@ -36,7 +36,8 @@ TEST(ExceptionTests, ExceptionShowsCorrectMessageDebugVersion) {
|
||||
lineNum = __LINE__ + 1;
|
||||
IE_THROW() << message;
|
||||
} catch (InferenceEngine::Exception& iex) {
|
||||
std::string ref_message = std::string{"\n"} + __FILE__ + ":" + std::to_string(lineNum) + " " + message;
|
||||
std::string ref_message =
|
||||
std::string{"\n"} + __FILE__ + ":" + std::to_string(lineNum) + " [ GENERAL_ERROR ] " + message;
|
||||
ASSERT_STREQ(iex.what(), ref_message.c_str());
|
||||
}
|
||||
}
|
||||
@ -46,7 +47,7 @@ TEST(ExceptionTests, ExceptionShowsCorrectMessageReleaseVersion) {
|
||||
try {
|
||||
IE_THROW() << message;
|
||||
} catch (InferenceEngine::Exception& iex) {
|
||||
std::string ref_message = message;
|
||||
std::string ref_message = "[ GENERAL_ERROR ] " + message;
|
||||
ASSERT_STREQ(iex.what(), ref_message.c_str());
|
||||
}
|
||||
}
|
||||
|
@ -92,9 +92,9 @@ struct DeviceInformation {
|
||||
DeviceName unique_name;
|
||||
unsigned int device_priority;
|
||||
DeviceInformation(DeviceName dn = {}, ov::AnyMap conf = {},
|
||||
int nReq = -1, std::string defaultID = {}, DeviceName uName = {}, unsigned int priority = 0)
|
||||
: device_name(dn), config(conf),
|
||||
num_requests_per_devices(nReq), default_device_id(defaultID), unique_name(uName), device_priority(priority)
|
||||
int n_req = -1, std::string default_id = {}, DeviceName name = {}, unsigned int priority = 0)
|
||||
: device_name(std::move(dn)), config(std::move(conf)),
|
||||
num_requests_per_devices(n_req), default_device_id(std::move(default_id)), unique_name(std::move(name)), device_priority(priority)
|
||||
{}
|
||||
};
|
||||
|
||||
|
@ -282,8 +282,7 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
|
||||
auto ret = m_plugin_config.supported_properties(get_device_name());
|
||||
return ret;
|
||||
} else if (name == ov::device::full_name) {
|
||||
std::string device_name = { get_device_name() };
|
||||
return decltype(ov::device::full_name)::value_type {device_name};
|
||||
return decltype(ov::device::full_name)::value_type {get_device_name()};
|
||||
} else if (name == ov::device::capabilities.name()) {
|
||||
auto device_list = get_core()->get_available_devices();
|
||||
std::vector<std::string> capabilities;
|
||||
@ -538,7 +537,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
|
||||
queryconfig.apply_user_properties();
|
||||
auto full_property = queryconfig.get_full_properties();
|
||||
auto priorities = full_property.find(ov::device::priorities.name());
|
||||
if (!priorities->second.empty()) {
|
||||
if (priorities!= full_property.end() && !priorities->second.empty()) {
|
||||
auto meta_devices = parse_meta_devices(priorities->second.as<std::string>(), full_property);
|
||||
std::unordered_set<std::string> supported_layers;
|
||||
for (auto&& value : meta_devices) {
|
||||
|
@ -701,8 +701,8 @@ void StoreConvertEmitter::emit_isa(const std::vector<size_t> &in, const std::vec
|
||||
void StoreConvertEmitter::emit_data() const {
|
||||
store_emitter->emit_data();
|
||||
}
|
||||
size_t BrgemmEmitter::getBrgIdx(size_t mIdx, size_t kIdx, size_t nIdx) const {
|
||||
return mIdx * 4 + kIdx * 2 + nIdx;
|
||||
size_t BrgemmEmitter::getBrgIdx(size_t kIdx, size_t nIdx) const {
|
||||
return kIdx * 2 + nIdx;
|
||||
}
|
||||
BrgemmEmitter::BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa,
|
||||
const std::shared_ptr<ov::Node>& node) : jit_emitter(h, isa, node) {
|
||||
@ -758,10 +758,8 @@ BrgemmEmitter::BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl:
|
||||
return std::distance(layout.begin(), std::find(layout.begin(), layout.end(), idx));
|
||||
};
|
||||
|
||||
m_M = C_shape[get_ordered_idx(C_layout, C_layout.size() - 2)];
|
||||
m_K = A_shape[get_ordered_idx(A_layout, A_layout.size() - 1)];
|
||||
m_M_blk = matmulOptimalM;
|
||||
m_M_tail = m_M % m_M_blk;
|
||||
m_M = brgemm_node->get_input_count(0);
|
||||
m_N = C_shape[get_ordered_idx(C_layout, C_layout.size() - 1)];
|
||||
|
||||
auto brg0Prc = InferenceEngine::details::convertPrecision(brgemm_node->get_input_element_type(0));
|
||||
@ -780,34 +778,28 @@ BrgemmEmitter::BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl:
|
||||
: m_K;
|
||||
m_K_tail = m_K % m_K_blk;
|
||||
|
||||
size_t brg0BaseIdx = std::numeric_limits<size_t>::max();
|
||||
for (size_t m = 0; m < 2; m++) {
|
||||
for (size_t k = 0; k < 2; k++) {
|
||||
for (size_t n = 0; n < 2; n++) {
|
||||
auto& brgemmCtx = m_brgCtxs0[getBrgIdx(m, k, n)];
|
||||
for (size_t k = 0; k < 2; k++) {
|
||||
for (size_t n = 0; n < 2; n++) {
|
||||
auto& brgemmCtx = m_brgCtxs0[getBrgIdx(k, n)];
|
||||
|
||||
auto M_ = m ? m_M_tail
|
||||
: m_M < m_M_blk ? 0 : m_M_blk;
|
||||
auto N_ = n ? m_N_tail : m_N - m_N_tail;
|
||||
auto K_ = k ? m_K_tail : m_K - m_K_tail;
|
||||
auto beta = k && m_brgCtxs0[getBrgIdx(m, 0, n)].K != 0 ? 1.0f : 0.0f;
|
||||
auto M_ = m_M;
|
||||
auto N_ = n ? m_N_tail : m_N - m_N_tail;
|
||||
auto K_ = k ? m_K_tail : m_K - m_K_tail;
|
||||
auto beta = k && m_brgCtxs0[getBrgIdx(0, n)].K != 0 ? 1.0f : 0.0f;
|
||||
|
||||
brgemmCtx.M = M_;
|
||||
brgemmCtx.N = N_;
|
||||
brgemmCtx.K = K_;
|
||||
brgemmCtx.LDA = leading_dimensions[0];
|
||||
brgemmCtx.LDB = brgemm_node->is_with_data_repacking() ? rnd_up(m_N, m_N_blk) : leading_dimensions[1];
|
||||
brgemmCtx.LDC = leading_dimensions[2];
|
||||
brgemmCtx.dt_in0 = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::IEPrecisionToDataType(brg0Prc));
|
||||
brgemmCtx.dt_in1 = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::IEPrecisionToDataType(brg1Prc));
|
||||
brgemmCtx.beta = beta;
|
||||
brgemmCtx.M = M_;
|
||||
brgemmCtx.N = N_;
|
||||
brgemmCtx.K = K_;
|
||||
brgemmCtx.LDA = leading_dimensions[0];
|
||||
brgemmCtx.LDB = brgemm_node->is_with_data_repacking() ? rnd_up(m_N, m_N_blk) : leading_dimensions[1];
|
||||
brgemmCtx.LDC = leading_dimensions[2];
|
||||
brgemmCtx.dt_in0 = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::IEPrecisionToDataType(brg0Prc));
|
||||
brgemmCtx.dt_in1 = static_cast<dnnl_data_type_t>(DnnlExtensionUtils::IEPrecisionToDataType(brg1Prc));
|
||||
brgemmCtx.beta = beta;
|
||||
|
||||
// don't create brgemm kernels for empty tiles
|
||||
if (M_ != 0 && K_ != 0 && N_ != 0) {
|
||||
if (brg0BaseIdx == std::numeric_limits<size_t>::max())
|
||||
brg0BaseIdx = getBrgIdx(m, k, n);
|
||||
initBrgemm(brgemmCtx, m_brgKernels0[getBrgIdx(m, k, n)], brgWithAMX);
|
||||
}
|
||||
// don't create brgemm kernels for empty tiles
|
||||
if (M_ != 0 && K_ != 0 && N_ != 0) {
|
||||
initBrgemm(brgemmCtx, m_brgKernels0[getBrgIdx(k, n)], brgWithAMX);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -878,36 +870,31 @@ void BrgemmEmitter::emit_impl(const std::vector<size_t>& in,
|
||||
}
|
||||
Xbyak::Reg64 output_0(static_cast<int>(out[0]));
|
||||
|
||||
for (size_t mb = 0; mb < div_up(m_M, m_M_blk); mb++) {
|
||||
const bool is_M_tail = (m_M - mb * m_M_blk < m_M_blk);
|
||||
size_t brgIdx0 = getBrgIdx(0, 0);
|
||||
size_t K0_step0 = m_brgCtxs0[brgIdx0].K;
|
||||
size_t K0_step1 = m_brgCtxs0[brgIdx0].K * m_brgCtxs0[brgIdx0].LDB;
|
||||
size_t N0_step0 = m_brgCtxs0[brgIdx0].N * m_brg0VnniFactor;
|
||||
size_t N0_step1 = m_brgCtxs0[brgIdx0].N;
|
||||
for (size_t n = 0; n < 2; n++) {
|
||||
for (size_t k = 0; k < 2; k++) {
|
||||
auto& brgemmCtx = m_brgCtxs0[getBrgIdx(k, n)];
|
||||
|
||||
size_t brgIdx0 = getBrgIdx(0, 0, 0);
|
||||
size_t K0_step0 = m_brgCtxs0[brgIdx0].K;
|
||||
size_t K0_step1 = m_brgCtxs0[brgIdx0].K * m_brgCtxs0[brgIdx0].LDB;
|
||||
size_t N0_step0 = m_brgCtxs0[brgIdx0].N * m_brg0VnniFactor;
|
||||
size_t N0_step1 = m_brgCtxs0[brgIdx0].N;
|
||||
for (size_t n = 0; n < 2; n++) {
|
||||
for (size_t k = 0; k < 2; k++) {
|
||||
size_t mIdx = is_M_tail ? 1 : 0;
|
||||
auto& brgemmCtx = m_brgCtxs0[getBrgIdx(mIdx, k, n)];
|
||||
if (brgemmCtx.K != 0 && brgemmCtx.N != 0) {
|
||||
const size_t in0_offset = m_load_offset_a + k * K0_step0 * io_data_size[0];
|
||||
const size_t in1_offset = m_load_offset_b + (k * K0_step1 + n * N0_step0) * io_data_size[1];
|
||||
const size_t in2_offset = m_load_offset_scratch + (m_with_comp ? n * N0_step1 * sizeof(int32_t) : 0);
|
||||
const size_t out0_offset = m_store_offset_c + n * N0_step1 * io_data_size[2];
|
||||
|
||||
if (brgemmCtx.K != 0 && brgemmCtx.N != 0) {
|
||||
const size_t in0_offset = m_load_offset_a + (k * K0_step0 + mb * m_M_blk * brgemmCtx.LDA) * io_data_size[0];
|
||||
const size_t in1_offset = m_load_offset_b + (k * K0_step1 + n * N0_step0) * io_data_size[1];
|
||||
const size_t in2_offset = m_load_offset_scratch + (m_with_comp ? n * N0_step1 * sizeof(int32_t) : 0);
|
||||
const size_t out0_offset = m_store_offset_c + (n * N0_step1 + mb * m_M_blk * brgemmCtx.LDC) * io_data_size[2];
|
||||
|
||||
emit_brgemm_kernel_call(m_brgKernels0[getBrgIdx(mIdx, k, n)].get(),
|
||||
brgemmCtx,
|
||||
input_0,
|
||||
input_1,
|
||||
input_2,
|
||||
output_0,
|
||||
in0_offset,
|
||||
in1_offset,
|
||||
in2_offset,
|
||||
out0_offset);
|
||||
}
|
||||
emit_brgemm_kernel_call(m_brgKernels0[getBrgIdx(k, n)].get(),
|
||||
brgemmCtx,
|
||||
input_0,
|
||||
input_1,
|
||||
input_2,
|
||||
output_0,
|
||||
in0_offset,
|
||||
in1_offset,
|
||||
in2_offset,
|
||||
out0_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -353,7 +353,7 @@ private:
|
||||
float beta;
|
||||
};
|
||||
void initBrgemm(brgemmCtx& ctx, std::unique_ptr<dnnl::impl::cpu::x64::brgemm_kernel_t>& brgKernel, bool use_amx) const;
|
||||
size_t getBrgIdx(size_t mIdx, size_t kIdx, size_t nIdx) const;
|
||||
size_t getBrgIdx(size_t kIdx, size_t nIdx) const;
|
||||
|
||||
void emit_brgemm_kernel_call(const dnnl::impl::cpu::x64::brgemm_kernel_t* brg_kernel, const brgemmCtx& ctx,
|
||||
Xbyak::Reg64 addr_A, Xbyak::Reg64 addr_B, Xbyak::Reg64 scratch, Xbyak::Reg64 addr_C,
|
||||
@ -362,11 +362,10 @@ private:
|
||||
static void kernel_execute(const dnnl::impl::cpu::x64::brgemm_kernel_t *brg_kernel, const void *A, const void *B, void *C, void *scratch, int with_comp);
|
||||
|
||||
static constexpr size_t BRGEMM_KERNELS_NUM = 8;
|
||||
static constexpr size_t matmulOptimalM = 32;
|
||||
brgemmCtx m_brgCtxs0[BRGEMM_KERNELS_NUM];
|
||||
std::unique_ptr<dnnl::impl::cpu::x64::brgemm_kernel_t> m_brgKernels0[BRGEMM_KERNELS_NUM];
|
||||
|
||||
size_t m_M, m_M_blk, m_M_tail;
|
||||
size_t m_M;
|
||||
size_t m_K, m_K_blk, m_K_tail;
|
||||
size_t m_N, m_N_blk, m_N_tail;
|
||||
size_t m_brg0VnniFactor;
|
||||
|
@ -1996,7 +1996,15 @@ void GraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(Graph &graph) {
|
||||
|
||||
const auto &outputShape = child->getOutputShapeAtPort(0);
|
||||
VectorDims outputDims = outputShape.getDims();
|
||||
const auto channelPos = parent->getParentEdgeAt(0)->getParent()->getFusingAxis();
|
||||
|
||||
// We need to compute explicitly port with unfolded parent,
|
||||
// because there is no guarantee, that the order of operands will be invariant
|
||||
// (i.e. zero) after all transformations, which may cause wrong channel-dim in
|
||||
// [Const-Schift -> Add <- Mul] topology with constant-folded schift,
|
||||
// (Const node return 1 by default as channel dim.)
|
||||
// Look into FQScaleshiftWithConstantShift test
|
||||
const auto nonConstPort = (parent->getParentEdgeAt(0)->getParent()->isConstant() ? 1 : 0);
|
||||
const auto channelPos = parent->getParentEdgeAt(nonConstPort)->getParent()->getFusingAxis();
|
||||
|
||||
if (outputShape.isDynamic()) {
|
||||
if (outputDims[channelPos] == Shape::UNDEFINED_DIM) {
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "utils/cpu_utils.hpp"
|
||||
#include "emitters/x64/cpu_generator.hpp"
|
||||
#include "transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.hpp"
|
||||
#include "transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp"
|
||||
#include "transformations/snippets/x64/pass/mul_add_to_fma.hpp"
|
||||
#include "transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.hpp"
|
||||
#include "transformations/snippets/x64/pass/remove_converts.hpp"
|
||||
@ -564,6 +565,9 @@ void Snippet::generate(const jit_snippets_compile_args* jcp) {
|
||||
CPU_REGISTER_PASS_X64(post_precision, ov::intel_cpu::pass::RemoveConverts);
|
||||
CPU_REGISTER_PASS_X64(post_precision, ov::intel_cpu::pass::MulAddToFMA);
|
||||
|
||||
ov::snippets::lowered::pass::PassPipeline control_flow_markup_pipeline;
|
||||
CPU_REGISTER_PASS_X64(control_flow_markup_pipeline, ov::intel_cpu::pass::BrgemmBlocking);
|
||||
|
||||
ov::snippets::lowered::pass::PassPipeline control_flow_pipeline;
|
||||
CPU_REGISTER_PASS_X64(control_flow_pipeline, ov::intel_cpu::pass::FuseLoadStoreConvert);
|
||||
|
||||
@ -571,6 +575,7 @@ void Snippet::generate(const jit_snippets_compile_args* jcp) {
|
||||
pre_dialect,
|
||||
post_dialect,
|
||||
post_precision,
|
||||
control_flow_markup_pipeline,
|
||||
control_flow_pipeline,
|
||||
reinterpret_cast<const void*>(jcp));
|
||||
}
|
||||
|
@ -0,0 +1,80 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "brgemm_blocking.hpp"
|
||||
|
||||
#include "openvino/pass/pattern/matcher.hpp"
|
||||
#include "openvino/pass/pattern/op/wrap_type.hpp"
|
||||
#include "snippets/itt.hpp"
|
||||
#include "snippets/lowered/linear_ir.hpp"
|
||||
#include "snippets/lowered/loop_manager.hpp"
|
||||
#include "snippets/snippets_isa.hpp"
|
||||
#include "transformations/snippets/x64/op/brgemm_cpu.hpp"
|
||||
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
namespace pass {
|
||||
using LoopManager = snippets::lowered::LinearIR::LoopManager;
|
||||
using LoopInfoPtr = LoopManager::LoopInfoPtr;
|
||||
using LoopPort = LoopManager::LoopPort;
|
||||
|
||||
BrgemmBlocking::BrgemmBlocking() : Pass() {}
|
||||
|
||||
bool BrgemmBlocking::run(snippets::lowered::LinearIR& linear_ir) {
|
||||
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::BrgemmBlocking")
|
||||
if (linear_ir.empty())
|
||||
return false;
|
||||
|
||||
// Ticket: 113745
|
||||
// TODO: make the block size configurable
|
||||
const auto block_size = 32;
|
||||
const auto dim_idx = 1;
|
||||
|
||||
const auto& loop_manager = linear_ir.get_loop_manager();
|
||||
|
||||
auto blocking_loop_exists = [&](const ov::snippets::lowered::ExpressionPtr& expr,
|
||||
const std::shared_ptr<ov::intel_cpu::BrgemmCPU>& brgemm) {
|
||||
const auto& loop_ids = expr->get_loop_ids();
|
||||
for (const auto& id : loop_ids) {
|
||||
const auto loop = loop_manager->get_loop_info(id);
|
||||
if (loop->dim_idx == dim_idx) {
|
||||
OPENVINO_ASSERT(brgemm->get_input_count(0) == loop->increment,
|
||||
"Brgemm ", brgemm, " has input count (", brgemm->get_input_count(0),
|
||||
") which doesn't match the increment(", loop->increment, ") of loop by M");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
bool modified = false;
|
||||
for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) {
|
||||
const auto& expr = *expr_it;
|
||||
const auto brgemm = ov::as_type_ptr<ov::intel_cpu::BrgemmCPU>(expr->get_node());
|
||||
if (!brgemm || blocking_loop_exists(expr, brgemm))
|
||||
continue;
|
||||
|
||||
const auto& input_shape_0 = expr->get_input_port_descriptor(0)->get_shape();
|
||||
const auto& input_layout_0 = expr->get_input_port_descriptor(0)->get_layout();
|
||||
const auto& dim = *(input_layout_0.rbegin() + dim_idx);
|
||||
const auto& m = input_shape_0[dim];
|
||||
|
||||
brgemm->set_input_count(block_size);
|
||||
|
||||
const auto work_amount = m;
|
||||
const auto increment = block_size;
|
||||
|
||||
std::vector<LoopPort> entries{LoopPort(expr->get_input_port(0), true), LoopPort(expr->get_input_port(1), false)};
|
||||
if (brgemm->is_with_scratchpad())
|
||||
entries.emplace_back(expr->get_input_port(2), false);
|
||||
std::vector<LoopPort> exits{LoopPort(expr->get_output_port(0), true)};
|
||||
loop_manager->mark_loop(expr_it, std::next(expr_it), work_amount, increment, dim_idx, entries, exits);
|
||||
}
|
||||
|
||||
return modified;
|
||||
}
|
||||
} // namespace pass
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
@ -0,0 +1,28 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "snippets/lowered/pass/pass.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
namespace pass {
|
||||
|
||||
/**
|
||||
* @interface BrgemmBlocking
|
||||
* @brief Covers BrgemmCPU with blocking loop by M
|
||||
* @ingroup snippets
|
||||
*/
|
||||
|
||||
class BrgemmBlocking : public snippets::lowered::pass::Pass {
|
||||
public:
|
||||
OPENVINO_RTTI("BrgemmBlocking", "Pass")
|
||||
BrgemmBlocking();
|
||||
bool run(snippets::lowered::LinearIR& linear_ir) override;
|
||||
};
|
||||
|
||||
} // namespace pass
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
@ -21,7 +21,7 @@ namespace pass {
|
||||
class FuseLoadStoreConvert: public snippets::lowered::pass::Pass {
|
||||
public:
|
||||
FuseLoadStoreConvert() = default;
|
||||
OPENVINO_RTTI("FuseLoadStoreConvert", "LinearIRTransformation");
|
||||
OPENVINO_RTTI("FuseLoadStoreConvert", "Pass");
|
||||
bool run(snippets::lowered::LinearIR& linear_ir) override;
|
||||
|
||||
private:
|
||||
|
@ -0,0 +1,89 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
using namespace CPUTestUtils;
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
|
||||
using FQScaleshiftWithConstantShiftTestParams = Precision;
|
||||
|
||||
class FQScaleshiftWithConstantShiftTest : public testing::WithParamInterface<FQScaleshiftWithConstantShiftTestParams>,
|
||||
public CPUTestsBase,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<FQScaleshiftWithConstantShiftTestParams> obj) {
|
||||
Precision netPrecision;
|
||||
netPrecision = obj.param;
|
||||
std::ostringstream result;
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
Precision netPrecision;
|
||||
netPrecision = this->GetParam();
|
||||
const auto ngPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
std::vector<SizeVector> mmShape{{25, 14, 14, 768}};
|
||||
SizeVector mmShape2{768, 2304};
|
||||
SizeVector sumShape{1, 1, 1, 2304};
|
||||
|
||||
// avoid eliminations
|
||||
std::vector<int> mmInData(768 * 2304);
|
||||
std::fill(mmInData.begin(), mmInData.end(), 2);
|
||||
mmInData[0] = 1;
|
||||
std::vector<int> sumConstData(1 * 1 * 1 * 2304);
|
||||
std::iota(sumConstData.begin(), sumConstData.end(), 0);
|
||||
|
||||
auto constShift = ngraph::opset5::Constant::create(ngraph::element::f32, sumShape, sumConstData);
|
||||
auto mmConst = ngraph::opset5::Constant::create(ngraph::element::f32, mmShape2, mmInData);
|
||||
auto mmParams = builder::makeParams(ngPrec, {mmShape});
|
||||
const auto mmOutputNodes = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(mmParams));
|
||||
|
||||
const auto mm = builder::makeMatMul(mmOutputNodes[0], mmConst, false, false);
|
||||
auto sum = ngraph::builder::makeEltwise(constShift, mm, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto fq = ngraph::builder::makeFakeQuantize(sum, ngraph::element::f32, 256, {}, {-8.0f}, {7.0f}, {-8.0f}, {7.0f});
|
||||
|
||||
ngraph::ParameterVector inputParams = {mmParams[0]};
|
||||
function = makeNgraphFunction(ngPrec, inputParams, fq, "FQScaleshiftWithConstantShift");
|
||||
}
|
||||
};
|
||||
|
||||
/* Network with SS subgraph and FQ node. Shift in SS is constant-folded.
|
||||
* Test that FQ-SS fusing works correctly while comparing SS and FQ channel dims.
|
||||
Input Const
|
||||
\ /
|
||||
\ /
|
||||
\ /
|
||||
MatMul Const
|
||||
\ /
|
||||
\ /
|
||||
\ /
|
||||
Add
|
||||
|
|
||||
|
|
||||
FQ
|
||||
|
|
||||
|
|
||||
Output
|
||||
*/
|
||||
|
||||
TEST_P(FQScaleshiftWithConstantShiftTest, CompareWithRefs) {
|
||||
Run();
|
||||
}
|
||||
|
||||
namespace {
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Check, FQScaleshiftWithConstantShiftTest,
|
||||
::testing::Values(Precision::FP32),
|
||||
FQScaleshiftWithConstantShiftTest::getTestCaseName);
|
||||
} // namespace
|
||||
} // namespace SubgraphTestsDefinitions
|
@ -18,6 +18,7 @@
|
||||
#include "gna_lib_ver_selector.hpp"
|
||||
#include "ie_ngraph_utils.hpp"
|
||||
#include "log/log.hpp"
|
||||
#include "openvino/opsets/opset12.hpp"
|
||||
|
||||
namespace std {
|
||||
inline std::ostream& operator<<(std::ostream& os, const std::set<ov::element::Type>& t) {
|
||||
@ -35,6 +36,7 @@ inline std::ostream& operator<<(std::ostream& os, const std::set<ov::element::Ty
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
using namespace target;
|
||||
using namespace opset12;
|
||||
namespace limitations {
|
||||
|
||||
class SupportedElementTypes {
|
||||
@ -689,22 +691,6 @@ void Limitations::init(const DeviceVersion& compile_target) {
|
||||
k_instance = std::shared_ptr<Limitations>(new Limitations(compile_target));
|
||||
}
|
||||
|
||||
bool Limitations::is_transpose_2d(const std::vector<size_t>& shape) {
|
||||
return std::count_if(std::begin(shape), std::end(shape), [](size_t dim) {
|
||||
return dim != 1;
|
||||
}) == 2;
|
||||
}
|
||||
|
||||
bool Limitations::is_transpose_supported(const std::vector<size_t>& shape) {
|
||||
if (!is_transpose_2d(shape))
|
||||
return false;
|
||||
auto shape_no_1 = shape;
|
||||
shape_no_1.erase(std::remove(shape_no_1.begin(), shape_no_1.end(), 1), shape_no_1.end());
|
||||
size_t min, max;
|
||||
std::tie(min, max) = std::minmax(shape_no_1[0], shape_no_1[1]);
|
||||
return min <= 8 && max % 8 == 0 && max >= 8 && max <= kTransposeMaxSize;
|
||||
}
|
||||
|
||||
size_t Limitations::get_min_batch_to_fit_in_buffer(InferenceEngine::DataPtr input) {
|
||||
auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims()));
|
||||
return total_size / kBufferMaxSize + 1;
|
||||
@ -753,31 +739,38 @@ bool SupportedElementTypes::IsConstantTypeSupported(ov::element::Type elem_type,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Limitations::is_transpose_supported(const std::shared_ptr<const ov::Node>& node) {
|
||||
OPENVINO_ASSERT(node, "Transpose node is empty!");
|
||||
const ov::Shape squeezed_shape = graph_utils::squeeze_shape(node->get_input_shape(0));
|
||||
const size_t min_input_dim = std::min(squeezed_shape[0], squeezed_shape[1]);
|
||||
const size_t max_input_dim = std::max(squeezed_shape[0], squeezed_shape[1]);
|
||||
bool Limitations::is_transpose_supported(const ov::Shape& shape) {
|
||||
const ov::Shape squeezed_shape = graph_utils::squeeze_shape(shape);
|
||||
|
||||
// GNA transpose limitations:
|
||||
// - supports 2d transposes only
|
||||
// - smaller dimension should be less or equal to 8
|
||||
// - bigger dimension should be a multiple of Limitations::kNoOfInputsDivisor
|
||||
if (squeezed_shape.size() == 2 && min_input_dim <= 8 && ALIGN(max_input_dim, kNoOfInputsDivisor) == max_input_dim) {
|
||||
return true;
|
||||
// - bigger dimension should be a multiple of limitations::noOfInputsDivisor
|
||||
if (squeezed_shape.size() == 2) {
|
||||
const size_t min_input_dim = std::min(squeezed_shape[0], squeezed_shape[1]);
|
||||
const size_t max_input_dim = std::max(squeezed_shape[0], squeezed_shape[1]);
|
||||
if (min_input_dim <= 8 && max_input_dim % Limitations::kNoOfInputsDivisor == 0 &&
|
||||
max_input_dim <= kTransposeMaxSize) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Limitations::is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
|
||||
bool Limitations::is_transpose_supported(const std::shared_ptr<const ov::Node>& node) {
|
||||
OPENVINO_ASSERT(node, "Transpose node is empty!");
|
||||
return is_transpose_supported(node->get_input_shape(0));
|
||||
}
|
||||
|
||||
bool Limitations::is_conv_supported(const std::shared_ptr<ov::intel_gna::op::GNAConvolution>& conv_gna,
|
||||
const InferenceEngine::Precision gna_precision,
|
||||
bool is_exception_allowed) {
|
||||
OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!");
|
||||
size_t batch_size = conv_ie->input_value(0).get_shape()[0];
|
||||
OPENVINO_ASSERT(conv_gna, "GNAConvolution node is empty!");
|
||||
size_t batch_size = conv_gna->input_value(0).get_shape()[0];
|
||||
if (batch_size != 1) {
|
||||
if (is_exception_allowed) {
|
||||
THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() +
|
||||
", type: " + conv_ie->get_type_name() + ", and batch size(" +
|
||||
THROW_GNA_EXCEPTION << "topology with layer: " + conv_gna->get_friendly_name() +
|
||||
", type: " + conv_gna->get_type_name() + ", and batch size(" +
|
||||
std::to_string(batch_size) + ") != 1 not supported";
|
||||
}
|
||||
return false;
|
||||
@ -789,15 +782,15 @@ bool Limitations::is_conv_supported(const std::shared_ptr<ngraph::op::Convolutio
|
||||
static_cast<uint32_t>(filter_stride_width));
|
||||
return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed,
|
||||
error,
|
||||
conv_ie->get_friendly_name(),
|
||||
conv_ie->get_type_name());
|
||||
conv_gna->get_friendly_name(),
|
||||
conv_gna->get_type_name());
|
||||
};
|
||||
auto input_shape = conv_ie->input_value(0).get_shape();
|
||||
auto filter_shape = conv_ie->input_value(1).get_shape();
|
||||
if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) ||
|
||||
(4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) {
|
||||
auto input_shape = conv_gna->input_value(0).get_shape();
|
||||
auto filter_shape = conv_gna->input_value(1).get_shape();
|
||||
if ((4 == filter_shape.size() && filter_shape[1] > 1 && filter_shape[2] > 1) ||
|
||||
(4 == input_shape.size() && input_shape[1] > 1 && input_shape[2] > 1)) {
|
||||
pass::helper::ConvData conv_data;
|
||||
pass::helper::GetConvData(conv_ie, conv_data);
|
||||
pass::helper::GetConvData(conv_gna, conv_data);
|
||||
if (gna_convolution_layer::isMappableFrom2DTo1D(static_cast<uint32_t>(conv_data.input_height),
|
||||
static_cast<uint32_t>(conv_data.input_width),
|
||||
static_cast<uint32_t>(conv_data.input_channel_count),
|
||||
@ -809,7 +802,7 @@ bool Limitations::is_conv_supported(const std::shared_ptr<ngraph::op::Convolutio
|
||||
}
|
||||
|
||||
if (m_cnn_validator) {
|
||||
return m_cnn_validator->ValidateCnn2D(conv_ie->get_friendly_name(),
|
||||
return m_cnn_validator->ValidateCnn2D(conv_gna->get_friendly_name(),
|
||||
static_cast<uint32_t>(conv_data.input_height),
|
||||
static_cast<uint32_t>(conv_data.input_width),
|
||||
static_cast<uint32_t>(conv_data.input_channel_count),
|
||||
@ -824,10 +817,12 @@ bool Limitations::is_conv_supported(const std::shared_ptr<ngraph::op::Convolutio
|
||||
is_exception_allowed);
|
||||
}
|
||||
}
|
||||
return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]);
|
||||
|
||||
return check_dilation(conv_gna->get_dilations()[0],
|
||||
conv_gna->get_dilations()[conv_gna->get_dilations().size() - 1]);
|
||||
}
|
||||
|
||||
bool Limitations::is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
|
||||
bool Limitations::is_pooling_supported(const std::shared_ptr<ov::intel_gna::op::GNAMaxPool> max_pool,
|
||||
bool is_exception_allowed) {
|
||||
OPENVINO_ASSERT(max_pool, "MaxPool node is empty!");
|
||||
auto kernels = max_pool->get_kernel();
|
||||
@ -869,6 +864,100 @@ bool Limitations::is_split_supported(const std::shared_ptr<ov::Node>& node, bool
|
||||
return is_aligned;
|
||||
}
|
||||
|
||||
bool Limitations::is_concat_supported(const std::shared_ptr<const ov::Node>& node) {
|
||||
OPENVINO_ASSERT(node, "Concat node is empty!");
|
||||
auto concat_node = std::dynamic_pointer_cast<const Concat>(node);
|
||||
const ov::Shape& output_shape = concat_node->get_output_shape(0);
|
||||
auto axis = concat_node->get_axis();
|
||||
|
||||
return graph_utils::get_first_valuable_dim_id(output_shape) == axis;
|
||||
}
|
||||
|
||||
bool Limitations::is_forward_transposed_concat_supported(const std::shared_ptr<const ov::Node>& node,
|
||||
const AxisVector& order) {
|
||||
auto concat_node = std::dynamic_pointer_cast<const Concat>(node);
|
||||
if (!concat_node) {
|
||||
log::debug() << "Concat node is empty!" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
const ov::Shape& output_shape = concat_node->get_output_shape(0);
|
||||
auto axis = concat_node->get_axis();
|
||||
|
||||
const ov::Shape& transposed_shape =
|
||||
graph_utils::transpose_shape(output_shape, pass::helper::reverse_transpose_order(order));
|
||||
const size_t transposed_concat_axis = order[axis];
|
||||
|
||||
return graph_utils::get_first_valuable_dim_id(transposed_shape) == static_cast<int64_t>(transposed_concat_axis);
|
||||
}
|
||||
|
||||
bool Limitations::is_backward_transposed_concat_supported(const std::shared_ptr<const ov::Node>& node,
|
||||
const AxisVector& order) {
|
||||
auto concat_node = std::dynamic_pointer_cast<const Concat>(node);
|
||||
if (!concat_node) {
|
||||
log::debug() << "Concat node is empty!" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
const ov::Shape& output_shape = concat_node->get_output_shape(0);
|
||||
auto axis = concat_node->get_axis();
|
||||
|
||||
const ov::Shape& transposed_shape = graph_utils::transpose_shape(output_shape, order);
|
||||
const size_t transposed_concat_axis = order[axis];
|
||||
|
||||
return graph_utils::get_first_valuable_dim_id(transposed_shape) == static_cast<int64_t>(transposed_concat_axis);
|
||||
}
|
||||
|
||||
bool Limitations::is_forward_transposed_split_supported(const std::shared_ptr<const ov::Node>& node,
|
||||
const AxisVector& order) {
|
||||
std::shared_ptr<const ov::Node> split_node = nullptr;
|
||||
if (std::dynamic_pointer_cast<const Split>(node)) {
|
||||
split_node = std::dynamic_pointer_cast<const Split>(node);
|
||||
} else if (std::dynamic_pointer_cast<const VariadicSplit>(node)) {
|
||||
split_node = std::dynamic_pointer_cast<const VariadicSplit>(node);
|
||||
} else {
|
||||
log::debug() << "Split node is empty!" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
const ov::Shape& output_shape = split_node->get_output_shape(0);
|
||||
auto constant_node = as_type_ptr<Constant>(split_node->input_value(1).get_node_shared_ptr());
|
||||
if (!constant_node)
|
||||
return false;
|
||||
auto axis = constant_node->get_axis_vector_val()[0];
|
||||
|
||||
const ov::Shape& transposed_shape =
|
||||
graph_utils::transpose_shape(output_shape, pass::helper::reverse_transpose_order(order));
|
||||
const size_t transposed_concat_axis = order[axis];
|
||||
|
||||
return graph_utils::get_first_valuable_dim_id(transposed_shape) == static_cast<int64_t>(transposed_concat_axis);
|
||||
}
|
||||
|
||||
bool Limitations::is_backward_transposed_split_supported(const std::shared_ptr<const ov::Node>& node,
|
||||
const AxisVector& order) {
|
||||
std::shared_ptr<const ov::Node> split_node = nullptr;
|
||||
if (std::dynamic_pointer_cast<const Split>(node)) {
|
||||
split_node = std::dynamic_pointer_cast<const Split>(node);
|
||||
} else if (std::dynamic_pointer_cast<const VariadicSplit>(node)) {
|
||||
split_node = std::dynamic_pointer_cast<const VariadicSplit>(node);
|
||||
} else {
|
||||
log::debug() << "Split node is empty!" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
const ov::Shape& output_shape = split_node->get_output_shape(0);
|
||||
auto constant_node = as_type_ptr<Constant>(split_node->input_value(1).get_node_shared_ptr());
|
||||
if (!constant_node)
|
||||
return false;
|
||||
auto axis = constant_node->get_axis_vector_val()[0];
|
||||
|
||||
const ov::Shape& transposed_shape =
|
||||
graph_utils::transpose_shape(output_shape, pass::helper::reverse_transpose_order(order));
|
||||
const int64_t transposed_concat_axis = order[axis];
|
||||
|
||||
return graph_utils::get_first_valuable_dim_id(transposed_shape) == transposed_concat_axis;
|
||||
}
|
||||
|
||||
bool Limitations::is_op_supported(const std::shared_ptr<ov::Node>& node,
|
||||
const InferenceEngine::Precision gna_precision,
|
||||
bool is_exception_allowed) {
|
||||
@ -876,12 +965,13 @@ bool Limitations::is_op_supported(const std::shared_ptr<ov::Node>& node,
|
||||
return SupportedElementTypes::IsParameterTypeSupported(node->get_element_type(), is_exception_allowed);
|
||||
} else if (ov::op::util::is_constant(node)) {
|
||||
return SupportedElementTypes::IsConstantTypeSupported(node->get_element_type(), is_exception_allowed);
|
||||
} else if (auto conv_ie = std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node)) {
|
||||
return is_conv_supported(conv_ie, gna_precision, is_exception_allowed);
|
||||
} else if (auto conv = std::dynamic_pointer_cast<ov::intel_gna::op::GNAConvolution>(node)) {
|
||||
return is_conv_supported(conv, gna_precision, is_exception_allowed);
|
||||
} else if (auto fully_connected = std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node)) {
|
||||
return is_fc_supported(fully_connected, is_exception_allowed);
|
||||
} else if (ov::intel_gna::graph_utils::is_pooling(node)) {
|
||||
return is_pooling_supported(std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node), is_exception_allowed);
|
||||
return is_pooling_supported(std::dynamic_pointer_cast<ov::intel_gna::op::GNAMaxPool>(node),
|
||||
is_exception_allowed);
|
||||
} else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) ||
|
||||
ov::intel_gna::graph_utils::is_eltwise_add(node) || ov::intel_gna::graph_utils::is_eltwise_mul(node) ||
|
||||
ov::intel_gna::graph_utils::is_crop_affined(node) ||
|
||||
@ -891,11 +981,11 @@ bool Limitations::is_op_supported(const std::shared_ptr<ov::Node>& node,
|
||||
(std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ngraph::op::PowerIE>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr)) {
|
||||
(std::dynamic_pointer_cast<MatMul>(node) != nullptr)) {
|
||||
return true;
|
||||
} else if (ov::intel_gna::graph_utils::is_gna_precision_agnostic(node)) {
|
||||
if ((std::dynamic_pointer_cast<ngraph::opset9::Split>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ngraph::opset9::VariadicSplit>(node) != nullptr)) {
|
||||
if ((std::dynamic_pointer_cast<Split>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<VariadicSplit>(node) != nullptr)) {
|
||||
return is_split_supported(node, is_exception_allowed);
|
||||
}
|
||||
// TODO check concat are aligned when transformation will be moved to ngraph
|
||||
|
@ -20,6 +20,8 @@
|
||||
#include "legacy/ngraph_ops/fully_connected.hpp"
|
||||
#include "ngraph/opsets/opset7.hpp"
|
||||
#include "ngraph/opsets/opset9.hpp"
|
||||
#include "ops/gna_convolution.hpp"
|
||||
#include "ops/gna_max_pool.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
@ -173,8 +175,6 @@ public:
|
||||
*/
|
||||
static inline std::shared_ptr<Limitations> get_instance();
|
||||
|
||||
static bool is_transpose_2d(const std::vector<size_t>& shape);
|
||||
static bool is_transpose_supported(const std::vector<size_t>& shape);
|
||||
static size_t get_min_batch_to_fit_in_buffer(InferenceEngine::DataPtr input);
|
||||
|
||||
/**
|
||||
@ -202,6 +202,13 @@ public:
|
||||
* @return true if supported
|
||||
*/
|
||||
static bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed = false);
|
||||
|
||||
/**
|
||||
* @brief Validates if transpose is supported by GNA
|
||||
* @param shape transpose
|
||||
* @return true if supported
|
||||
*/
|
||||
static bool is_transpose_supported(const ov::Shape& shape);
|
||||
/**
|
||||
* @brief Validates if transpose is supported by GNA
|
||||
* @param node transpose
|
||||
@ -209,13 +216,13 @@ public:
|
||||
*/
|
||||
static bool is_transpose_supported(const std::shared_ptr<const ov::Node>& node);
|
||||
/**
|
||||
* @brief Validates if legacy convolution is supported by GNA
|
||||
* @param conv_ie convolution
|
||||
* @brief Validates if convolution is supported by GNA
|
||||
* @param conv_gna GNA convolution
|
||||
* @param gna_precision GNA inference precision
|
||||
* @param is_exception_allowed flag specifies whether exception is allowed
|
||||
* @return true if supported
|
||||
*/
|
||||
bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
|
||||
bool is_conv_supported(const std::shared_ptr<ov::intel_gna::op::GNAConvolution>& conv_gna,
|
||||
const InferenceEngine::Precision gna_precision,
|
||||
bool is_exception_allowed = false);
|
||||
/**
|
||||
@ -224,9 +231,19 @@ public:
|
||||
* @param is_exception_allowed flag specifies whether exception is allowed
|
||||
* @return true if precision is found in supported
|
||||
*/
|
||||
bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
|
||||
bool is_pooling_supported(const std::shared_ptr<ov::intel_gna::op::GNAMaxPool> max_pool,
|
||||
bool is_exception_allowed = false);
|
||||
|
||||
static bool is_concat_supported(const std::shared_ptr<const ov::Node>& node);
|
||||
static bool is_forward_transposed_concat_supported(const std::shared_ptr<const ov::Node>& node,
|
||||
const AxisVector& order);
|
||||
static bool is_backward_transposed_concat_supported(const std::shared_ptr<const ov::Node>& node,
|
||||
const AxisVector& order);
|
||||
static bool is_forward_transposed_split_supported(const std::shared_ptr<const ov::Node>& node,
|
||||
const AxisVector& order);
|
||||
static bool is_backward_transposed_split_supported(const std::shared_ptr<const ov::Node>& node,
|
||||
const AxisVector& order);
|
||||
|
||||
/**
|
||||
* @brief Validates if operation is supported by GNA
|
||||
* @param node operation
|
||||
|
@ -198,7 +198,8 @@ inline bool is_eltwise_add(const std::shared_ptr<ngraph::Node>& node) {
|
||||
}
|
||||
|
||||
inline bool is_pooling(const std::shared_ptr<ngraph::Node>& node) {
|
||||
return (std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node) != nullptr);
|
||||
return ((std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node) != nullptr) ||
|
||||
std::dynamic_pointer_cast<ov::intel_gna::op::GNAMaxPool>(node) != nullptr);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -268,7 +269,7 @@ inline bool has_32bit_output(const std::shared_ptr<ngraph::Node>& node) {
|
||||
return ((std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ngraph::opset9::Convolution>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ov::intel_gna::op::GNAConvolution>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ngraph::opset9::Add>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ngraph::opset9::Multiply>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ngraph::op::Eltwise>(node) != nullptr) ||
|
||||
@ -625,6 +626,20 @@ bool has_child_node(std::shared_ptr<ov::Node> node) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks if shape without dimensions == 1 is 2D
|
||||
*/
|
||||
inline bool is_shape_2d(const ov::Shape& shape) {
|
||||
return graph_utils::squeeze_shape(shape).size() == 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks if node has N consumers
|
||||
*/
|
||||
inline bool has_n_consumers(const std::shared_ptr<ov::Node>& node, size_t n_consumers) {
|
||||
return node->output(0).get_target_inputs().size() == n_consumers;
|
||||
}
|
||||
|
||||
} // namespace graph_utils
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -74,9 +74,11 @@ struct GnaDesc {
|
||||
}
|
||||
|
||||
InferenceEngine::DataPtr to_ie_data() {
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
return std::make_shared<InferenceEngine::Data>(
|
||||
name,
|
||||
InferenceEngine::TensorDesc(model_precision, dims, model_layout));
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
};
|
||||
|
||||
@ -98,9 +100,11 @@ struct InputDesc : GnaDesc {
|
||||
}
|
||||
|
||||
InferenceEngine::InputInfo::Ptr ToIEInputInfo() {
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
InferenceEngine::InputInfo::Ptr input_info = std::make_shared<InferenceEngine::InputInfo>();
|
||||
input_info->setInputData(this->to_ie_data());
|
||||
return input_info;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -90,7 +90,7 @@ size_t LayerQuantizer::GetBiasSizeForLayer(InferenceEngine::WeightableLayer& wl)
|
||||
return wl._biases->size();
|
||||
} else if (LayerInfo(wl).isConvolution()) {
|
||||
// Calculating biases len using outdata dims: biases number should be equal to output channels number
|
||||
return InferenceEngine::GetDataDimByName(wl.outData.front(), InferenceEngine::DataDimName::C);
|
||||
return InferenceEngine::GetDataDimSizeNHWC(wl.outData.front(), InferenceEngine::DataDimName::C);
|
||||
} else {
|
||||
// Calculating biases size using outData dimensions
|
||||
return wl.outData.front()->getDims().back();
|
||||
|
@ -1265,7 +1265,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerWeightable(InferenceEngine::Weigh
|
||||
double weights_reducer = 1.0;
|
||||
auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer*>(wl);
|
||||
if (conv && !LayerInfo(conv).isConvolutionFilter()) {
|
||||
const auto inDepth = GetDataDimByName(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
const auto inDepth = GetDataDimSizeNHWC(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
weights_reducer = gna_convolution_layer::getWeightsReducer(*conv);
|
||||
weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
|
||||
weights_reducer = std::max(1.0, weights_reducer);
|
||||
|
@ -307,12 +307,26 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer) {
|
||||
|
||||
void GNAGraphCompiler::assertConvolutionLayoutProper(const InferenceEngine::DataPtr& data) {
|
||||
if (data->getLayout() != InferenceEngine::Layout::NHWC && data->getLayout() != InferenceEngine::Layout::NCHW &&
|
||||
data->getLayout() != InferenceEngine::Layout::NC) {
|
||||
data->getLayout() != InferenceEngine::Layout::NC && data->getLayout() != InferenceEngine::Layout::CHW) {
|
||||
THROW_GNA_EXCEPTION << "layer: \"Convolution\" with layout " << data->getLayout()
|
||||
<< " isn't currently supported on GNA";
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T>
|
||||
PropertyVector<T> property_vector_append(PropertyVector<T> properties, T value) {
|
||||
std::vector<T> new_values;
|
||||
for (size_t i = 0; i < properties.size(); ++i)
|
||||
new_values.push_back(properties[i]);
|
||||
new_values.push_back(value);
|
||||
|
||||
return PropertyVector<T>(new_values);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* Create AMIntelDNN Convolutional1DComponent from ConvolutionLayer
|
||||
*
|
||||
@ -338,15 +352,24 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
|
||||
const auto outputs = layer->outData.front();
|
||||
assertConvolutionLayoutProper(inputs);
|
||||
|
||||
const auto in_batch = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::N);
|
||||
const auto in_channels = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::C);
|
||||
auto in_height = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::H);
|
||||
auto in_width = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::W);
|
||||
const auto in_batch = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::N);
|
||||
const auto in_channels = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::C);
|
||||
auto in_height = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::H);
|
||||
auto in_width = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::W);
|
||||
const auto out_batch = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::N);
|
||||
const auto out_channels = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::C);
|
||||
auto out_height = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::H);
|
||||
auto out_width = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::W);
|
||||
|
||||
const auto out_batch = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::N);
|
||||
const auto out_channels = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::C);
|
||||
auto out_height = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::H);
|
||||
auto out_width = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::W);
|
||||
if (inputs->getLayout() == InferenceEngine::Layout::CHW) {
|
||||
// convolution is ngraph-3D here. Make some fixes to work with it as it's ngraph-4D
|
||||
convolution._kernel_y = 1;
|
||||
convolution._dilation_y = 1;
|
||||
convolution._stride_y = 1;
|
||||
|
||||
convolution._padding = property_vector_append<unsigned int>(convolution._padding, 0);
|
||||
convolution._pads_end = property_vector_append<unsigned int>(convolution._pads_end, 0);
|
||||
}
|
||||
|
||||
if (in_height > 1 && in_width == 1 && !ShouldUseOnlyConv2DGnaIface()) {
|
||||
std::swap(in_height, in_width);
|
||||
@ -589,42 +612,12 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
});
|
||||
}
|
||||
|
||||
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know
|
||||
// how kaldi will handle that
|
||||
if (!dnn->do_rotate_input) {
|
||||
if ((inputs->getLayout() != InferenceEngine::Layout::NHWC || transpose_h_w) &&
|
||||
LayerInfo(connectedInputLayer).isInput()) {
|
||||
// Kaldi features are opposite orientation
|
||||
dnn->do_rotate_input = true;
|
||||
dnn->num_rotate_rows = effectiveStride;
|
||||
dnn->num_rotate_columns = num_inputs / effectiveStride;
|
||||
} else {
|
||||
dnn->do_rotate_input = false;
|
||||
}
|
||||
}
|
||||
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
|
||||
// Transpose H with W or C with HW
|
||||
auto A = transpose_h_w ? in_kernel_h : in_channels;
|
||||
auto B = transpose_h_w ? in_kernel_w : convolution._kernel[X_AXIS];
|
||||
|
||||
std::vector<uint8_t> transposedWeights;
|
||||
for (uint32_t k = 0; k < num_filters; k++) {
|
||||
uint8_t* ptr_filt_current =
|
||||
convolution._weights->cbuffer().as<uint8_t*>() + k * A * B * convolution.precision.size();
|
||||
auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), A, B);
|
||||
transposedWeights.insert(transposedWeights.end(), transposedPart.begin(), transposedPart.end());
|
||||
}
|
||||
if (transposedWeights.size() != convolution._weights->byteSize()) {
|
||||
THROW_GNA_LAYER_EXCEPTION(&convolution) << "weights was transposed incorrectly. " << transposedWeights.size()
|
||||
<< ' ' << convolution._weights->byteSize();
|
||||
}
|
||||
|
||||
if (num_conv_kernel_padding == 0) {
|
||||
gnamem->getQueue(REGION_RO)->push_local_ptr(layer,
|
||||
ptr_weights,
|
||||
transposedWeights.data(),
|
||||
convolution._weights->cbuffer(),
|
||||
convolution._weights->byteSize());
|
||||
} else {
|
||||
auto paddedWeights = num_filter_coefficients * num_filters;
|
||||
@ -636,7 +629,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
layerName,
|
||||
num_conv_kernel_padding,
|
||||
cpSize,
|
||||
transposedWeights,
|
||||
convolution,
|
||||
num_filters,
|
||||
single_conv_kernel_size](void* data, std::size_t size) {
|
||||
if (paddedWeightsSize > size) {
|
||||
@ -648,7 +641,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
for (uint32_t i = 0; i < num_filters; i++) {
|
||||
ie_memcpy(dstPtr + offset,
|
||||
size - offset,
|
||||
transposedWeights.data() + single_conv_kernel_size * i * cpSize,
|
||||
convolution._weights->cbuffer().as<uint8_t*>() + single_conv_kernel_size * i * cpSize,
|
||||
single_conv_kernel_size * cpSize);
|
||||
offset += single_conv_kernel_size * cpSize;
|
||||
ie_memcpy(dstPtr + offset, size - offset, &padding_zeros[0], padding_zeros.size());
|
||||
@ -783,22 +776,6 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
|
||||
auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
|
||||
|
||||
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know
|
||||
// how kaldi will handle that
|
||||
if (!dnn->do_rotate_input && inputs->getLayout() != InferenceEngine::Layout::NHWC &&
|
||||
LayerInfo(connectedInputLayer).isInput()) {
|
||||
// Kaldi features are opposite orientation
|
||||
dnn->do_rotate_input = true;
|
||||
dnn->num_rotate_rows = in_channels;
|
||||
if (in_height != 1) {
|
||||
dnn->num_rotate_rows *= convolution._stride_y;
|
||||
}
|
||||
if (in_width != 1) {
|
||||
dnn->num_rotate_rows *= convolution._stride_x;
|
||||
}
|
||||
dnn->num_rotate_columns = num_inputs / dnn->num_rotate_rows;
|
||||
}
|
||||
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
|
||||
const auto convolution_precision = convolution.precision.size();
|
||||
@ -815,7 +792,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
ALIGN(effective_single_kernel_size, Limitations::kConvEachKernelByteAlignment) - effective_single_kernel_size;
|
||||
for (uint32_t k = 0; k < convolution._out_depth; k++) {
|
||||
uint8_t* ptr_filt_current = convolution._weights->cbuffer().as<uint8_t*>() + k * single_kernel_size;
|
||||
auto transposed_part = transposeMatrix(ptr_filt_current, convolution_precision, in_channels, kernelHW);
|
||||
auto transposed_part = copy_matrix(ptr_filt_current, convolution.precision.size(), in_channels, kernelHW);
|
||||
transposed_weights.insert(transposed_weights.end(), transposed_part.begin(), transposed_part.end());
|
||||
transposed_weights.resize(transposed_weights.size() + effective_single_kernel_size - single_kernel_size +
|
||||
kernel_pad);
|
||||
@ -997,13 +974,19 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
auto outputs = *layer->outData.begin();
|
||||
|
||||
uint32_t w_dim_in = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::W);
|
||||
uint32_t h_dim_in = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::H);
|
||||
const uint32_t c_dim_in = InferenceEngine::GetDataDimByName(inputs, InferenceEngine::DataDimName::C);
|
||||
uint32_t w_dim_in = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::W);
|
||||
uint32_t h_dim_in = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::H);
|
||||
const uint32_t c_dim_in = GetDataDimSizeNHWC(inputs, InferenceEngine::DataDimName::C);
|
||||
|
||||
uint32_t w_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::W);
|
||||
uint32_t h_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::H);
|
||||
const uint32_t c_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::C);
|
||||
uint32_t w_dim_out = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::W);
|
||||
uint32_t h_dim_out = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::H);
|
||||
const uint32_t c_dim_out = GetDataDimSizeNHWC(outputs, InferenceEngine::DataDimName::C);
|
||||
|
||||
if (inputs->getLayout() == InferenceEngine::Layout::CHW) {
|
||||
// Pooling is ngraph-3D here. Make some fixes to work with it as it's ngraph-4D
|
||||
pooling._kernel = property_vector_append<unsigned int>(pooling._kernel, 1);
|
||||
pooling._stride = property_vector_append<unsigned int>(pooling._stride, 1);
|
||||
}
|
||||
|
||||
void* ptr_inputs = nullptr;
|
||||
void* ptr_outputs = nullptr;
|
||||
@ -2590,7 +2573,8 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void* p
|
||||
if (layer->params.find("output_offset") != layer->params.end()) {
|
||||
output_offset = layer->GetParamAsInt("output_offset");
|
||||
}
|
||||
gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, output_offset);
|
||||
gnamem->getQueue(REGION_AUTO)
|
||||
->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, output_offset, num_data_bytes_out);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -2859,5 +2843,15 @@ std::vector<uint8_t> GNAGraphCompiler::transposeMatrix(uint8_t* ptr_matrix,
|
||||
return temp_buffer;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> GNAGraphCompiler::copy_matrix(uint8_t* ptr_matrix,
|
||||
size_t element_size,
|
||||
uint32_t num_rows,
|
||||
uint32_t num_cols) {
|
||||
const size_t dest_size = num_rows * num_cols * element_size;
|
||||
std::vector<uint8_t> temp_buffer(dest_size);
|
||||
::memcpy(temp_buffer.data(), ptr_matrix, dest_size);
|
||||
return temp_buffer;
|
||||
}
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -53,6 +53,10 @@ private:
|
||||
size_t element_size,
|
||||
uint32_t num_rows,
|
||||
uint32_t num_cols);
|
||||
std::vector<uint8_t> static copy_matrix(uint8_t* ptr_matrix,
|
||||
size_t element_size,
|
||||
uint32_t num_rows,
|
||||
uint32_t num_cols);
|
||||
|
||||
bool ShouldUseOnlyConv2DGnaIface() const;
|
||||
|
||||
|
@ -237,32 +237,6 @@ inline InferenceEngine::CNNLayerPtr FindPermutationAfterConvolutionInKaldiModel(
|
||||
return next;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief identifies if a model must be converted to NHWC, it must not be neither NHWC, nor Kaldi
|
||||
* @param layers model sorted layers
|
||||
*/
|
||||
inline bool MustBeConvertedFromNCHWToNHWC(const std::vector<InferenceEngine::CNNLayerPtr>& layers) {
|
||||
for (auto& l : layers) {
|
||||
if (!LayerInfo(l).isConvolution())
|
||||
continue;
|
||||
|
||||
InferenceEngine::CNNLayerPtr next;
|
||||
std::tie(std::ignore, next) = FindPermutationsAroundConvolutionInNHWCModel(l);
|
||||
if (next != nullptr)
|
||||
return false;
|
||||
// If a convolution has only 1-dimension input and output we should skip it
|
||||
auto in_dims = l->insData.begin()->lock()->getDims();
|
||||
auto out_dims = l->outData.front()->getDims();
|
||||
|
||||
if (ov::intel_gna::graph_utils::is_one_dim_shapes(in_dims, out_dims)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return FindPermutationAfterConvolutionInKaldiModel(l) == nullptr;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief returns transposition information for a layer based on the previous convolution or pooling dimensions order
|
||||
* @param layer layer from which transposition info search must be started
|
||||
|
@ -924,4 +924,38 @@ inline uint32_t GetDataDimByName(InferenceEngine::DataPtr data, DataDimName dimN
|
||||
return GetDimFromBack(dims, backOffsets[dimIxInNCHW]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief returns a size of a specified data dimension depending on the layout
|
||||
* NHWC specialization
|
||||
* @param data a pointer to the data
|
||||
* @param dimName dimension name
|
||||
*/
|
||||
inline uint32_t GetDataDimSizeNHWC(InferenceEngine::DataPtr data, DataDimName dimName) {
|
||||
uint32_t dimIxInNCHW = static_cast<uint32_t>(dimName);
|
||||
IE_ASSERT(dimIxInNCHW <= 3);
|
||||
|
||||
std::vector<uint32_t> backOffsets;
|
||||
switch (data->getLayout()) {
|
||||
case Layout::C:
|
||||
case Layout::NC:
|
||||
// 1 will be returned for offsets > 2
|
||||
backOffsets = std::vector<uint32_t>{2, 1, 3, 4};
|
||||
break;
|
||||
case Layout::HWC:
|
||||
// 1 will be returned for offset 4
|
||||
case Layout::NHWC:
|
||||
backOffsets = std::vector<uint32_t>{4, 3, 2, 1};
|
||||
break;
|
||||
case Layout::CHW:
|
||||
// 1 will be returned for offset 4
|
||||
case Layout::NCHW:
|
||||
backOffsets = std::vector<uint32_t>{4, 1, 3, 2};
|
||||
break;
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << data->getName() << " Unexpected layout " << data->getLayout();
|
||||
}
|
||||
auto dims = data->getDims();
|
||||
return GetDimFromBack(dims, backOffsets[dimIxInNCHW]);
|
||||
}
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
@ -62,12 +62,10 @@ void GNAInferRequest::StartAsyncImpl() {
|
||||
std::exception_ptr exceptionPtr;
|
||||
if (res != InferenceEngine::StatusCode::OK) {
|
||||
try {
|
||||
IE_EXCEPTION_SWITCH(res,
|
||||
ExceptionType,
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{} <<=
|
||||
std::stringstream{}
|
||||
<< IE_LOCATION
|
||||
<< InferenceEngine::details::ExceptionTraits<ExceptionType>::string());
|
||||
IE_EXCEPTION_SWITCH(
|
||||
res,
|
||||
ExceptionType,
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{IE_LOCATION_PARAM} <<= std::stringstream{});
|
||||
} catch (...) {
|
||||
exceptionPtr = std::current_exception();
|
||||
}
|
||||
|
@ -344,13 +344,13 @@ void GNAPlugin::PrePostProcess(InferenceEngine::Blob::Ptr input_blob,
|
||||
std::shared_ptr<ov::Model> model) {
|
||||
const ov::element::Type input_type = details::convertPrecision(input_blob->getTensorDesc().getPrecision());
|
||||
const ov::element::Type output_type = details::convertPrecision(output_blob->getTensorDesc().getPrecision());
|
||||
const ov::Shape& input_shape = model->get_parameters().front()->get_shape();
|
||||
const ov::Shape& output_shape = model->get_results().front()->get_shape();
|
||||
const ov::Shape& output_shape = output_blob->getTensorDesc().getDims();
|
||||
|
||||
for (const auto& param : model->get_parameters()) {
|
||||
param->set_element_type(input_type);
|
||||
}
|
||||
model->validate_nodes_and_infer_types();
|
||||
const ov::Shape& input_shape = model->get_parameters()[0]->get_output_shape(0);
|
||||
|
||||
ov::TensorVector inputs = {ov::Tensor(input_type, input_shape, input_blob->cbuffer().as<void*>())};
|
||||
ov::TensorVector results = {ov::Tensor(output_type, output_shape, output_blob->buffer().as<void*>())};
|
||||
@ -611,52 +611,6 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN
|
||||
return false;
|
||||
}
|
||||
|
||||
void GNAPlugin::FillInputsAndOutputsTranspositionInfo(const InferenceEngine::CNNNetwork& net) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "FillInputsAndOutputsTranspositionInfo");
|
||||
auto printTranspositionInfo = [](const std::vector<TranspositionInfo>& transpositionInfo) {
|
||||
for (const auto& transpositionInfoPart : transpositionInfo) {
|
||||
log::debug() << "transpose=" << transpositionInfoPart.transpose
|
||||
<< " rows_num=" << transpositionInfoPart.num_transpose_rows
|
||||
<< " columns_num=" << transpositionInfoPart.num_transpose_columns << "\n";
|
||||
}
|
||||
};
|
||||
|
||||
auto inputLayers = CNNNetGetAllInputLayers(net);
|
||||
for (const auto& inputLayer : inputLayers) {
|
||||
// Collect information for inputs transposition
|
||||
if (!LayerInfo(inputLayer).isInput())
|
||||
continue;
|
||||
auto transpositionInfo = FindTranspositionInfoFromNextLayers(inputLayer);
|
||||
if (transpositionInfo.empty())
|
||||
continue;
|
||||
|
||||
transpose_inputs_info.insert({inputLayer->name, transpositionInfo});
|
||||
log::debug() << "Input " << inputLayer->name << " transposition info: \n";
|
||||
printTranspositionInfo(transpositionInfo);
|
||||
}
|
||||
|
||||
auto outputsMap = net.getOutputsInfo();
|
||||
for (const auto& outPort : outputsMap) {
|
||||
auto outLayer = getCreatorLayer(outPort.second).lock();
|
||||
// Collect information for outputs transposition
|
||||
if (!LayerInfo(outLayer).isOutput())
|
||||
continue;
|
||||
auto transpositionInfo = FindTranspositionInfoFromPrevLayers(outLayer);
|
||||
if (transpositionInfo.empty())
|
||||
continue;
|
||||
|
||||
// Swap transposition info rows and columns since we need to transpose output back from NHWC to NCHW
|
||||
for (auto&& transpositionInfoPart : transpositionInfo) {
|
||||
if (transpositionInfoPart.transpose) {
|
||||
std::swap(transpositionInfoPart.num_transpose_rows, transpositionInfoPart.num_transpose_columns);
|
||||
}
|
||||
}
|
||||
transpose_outputs_info.insert({outLayer->name, transpositionInfo});
|
||||
log::debug() << "Output " << outLayer->name << " transposition info: \n";
|
||||
printTranspositionInfo(transpositionInfo);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef PLOT
|
||||
void GNAPlugin::AddDebugProperties(const InferenceEngine::CNNLayerPtr layer,
|
||||
InferenceEngine::ordered_properties& printed_properties,
|
||||
@ -751,10 +705,6 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
||||
UpdateInputScaleFromNetwork(network);
|
||||
}
|
||||
|
||||
if (MustBeConvertedFromNCHWToNHWC(CNNNetSortTopologically(network))) {
|
||||
FillInputsAndOutputsTranspositionInfo(network);
|
||||
}
|
||||
|
||||
InferenceEngine::CNNNetwork newNet;
|
||||
|
||||
if (gnaFlags->sw_fp32) {
|
||||
@ -995,22 +945,6 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
||||
}
|
||||
}
|
||||
|
||||
if (dnn->do_rotate_input && transpose_inputs_info.empty()) {
|
||||
for (auto& inputLayer : inputLayers) {
|
||||
transpose_inputs_info.insert(
|
||||
{inputLayer->name,
|
||||
{TranspositionInfo{dnn->do_rotate_input, dnn->num_rotate_rows, dnn->num_rotate_columns}}});
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Need to remove this conversion when ngraph NCHW->NHWC transformation is enabled
|
||||
if (!transpose_inputs_info.empty()) {
|
||||
ConvertTransposeMapToModel(transpose_inputs_info, inputs_ptr_->Get());
|
||||
}
|
||||
if (!transpose_outputs_info.empty()) {
|
||||
ConvertTransposeMapToModel(transpose_outputs_info, outputs_.Get());
|
||||
}
|
||||
|
||||
DumpXNNToFile();
|
||||
|
||||
#ifdef PLOT
|
||||
|
@ -37,6 +37,8 @@ class WorkerPool;
|
||||
class Worker;
|
||||
} // namespace request
|
||||
|
||||
using namespace ov::intel_gna::pre_post_processing;
|
||||
|
||||
class GNAPlugin : public InferenceEngine::IInferencePlugin {
|
||||
protected:
|
||||
std::string _pluginName = "GNA";
|
||||
@ -204,6 +206,13 @@ protected:
|
||||
InferenceEngine::Blob::Ptr output_blob,
|
||||
std::shared_ptr<ov::Model> model);
|
||||
|
||||
/**
|
||||
* Run ngraph model on CPU to modify inputs/outputs
|
||||
*/
|
||||
void pre_post_process(InferenceEngine::Blob::Ptr input_blob,
|
||||
InferenceEngine::Blob::Ptr output_blob,
|
||||
std::shared_ptr<ov::Model> model);
|
||||
|
||||
void ImportFrames(void* ptr_dst,
|
||||
const void* ptr_src,
|
||||
InferenceEngine::Precision input_precision,
|
||||
@ -246,14 +255,6 @@ protected:
|
||||
* @return true if the output is initiated, false otherwise
|
||||
*/
|
||||
bool TryToInitOutput(const std::string& portName, InferenceEngine::CNNLayerPtr layer);
|
||||
|
||||
/**
|
||||
* @brief Fills inputs and outputs transposition info for model convertion from NCHW to NHWC.
|
||||
* Information for transposition is found from convolution/pooling input or output dimensions.
|
||||
* @param layers model sorted layers
|
||||
*/
|
||||
void FillInputsAndOutputsTranspositionInfo(const InferenceEngine::CNNNetwork& net);
|
||||
|
||||
bool isFP32ModeActive() const;
|
||||
std::shared_ptr<request::ModelWrapper> createModelWrapperForLoadNetwork(bool trivial);
|
||||
std::shared_ptr<request::ModelWrapper> createModelWrapperForImportNetwork(uint32_t numberOfOperations);
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include "gna_itt.hpp"
|
||||
#include "legacy/net_pass.h"
|
||||
#include "legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp"
|
||||
#include "legacy/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.hpp"
|
||||
#include "ngraph/opsets/opset2.hpp"
|
||||
#include "ngraph/opsets/opset7.hpp"
|
||||
#include "openvino/pass/manager.hpp"
|
||||
#include "optimizer/gna_pass_manager.hpp"
|
||||
@ -18,7 +20,9 @@
|
||||
#include "transformations/common_optimizations/fq_reshape_fusion.hpp"
|
||||
#include "transformations/common_optimizations/pull_transpose_through_fq.hpp"
|
||||
#include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp"
|
||||
#include "transformations/common_optimizations/reshape_sequence_fusion.hpp"
|
||||
#include "transformations/common_optimizations/transpose_sinking.hpp"
|
||||
#include "transformations/common_optimizations/transpose_to_reshape.hpp"
|
||||
#include "transformations/control_flow/unroll_tensor_iterator.hpp"
|
||||
#include "transformations/convert_dwsc_to_scaleshifts.hpp"
|
||||
#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
|
||||
@ -28,6 +32,8 @@
|
||||
#include "transformations/decompose_mvn.hpp"
|
||||
#include "transformations/fp16_compression/convert_compression_only_to_legacy.hpp"
|
||||
#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp"
|
||||
#include "transformations/fuse_conv_bias_activation.hpp"
|
||||
#include "transformations/gather_sinking.hpp"
|
||||
#include "transformations/handle_transposes_around_matmul.hpp"
|
||||
#include "transformations/init_node_info.hpp"
|
||||
#include "transformations/insert_copy_layer.hpp"
|
||||
@ -37,6 +43,7 @@
|
||||
#include "transformations/markup_fusable_transpose.hpp"
|
||||
#include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp"
|
||||
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
|
||||
#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp"
|
||||
#include "transformations/op_conversions/gru_cell_decomposition.hpp"
|
||||
#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
|
||||
#include "transformations/op_conversions/softsign_decomposition.hpp"
|
||||
@ -48,13 +55,28 @@
|
||||
#include "transformations/remove_in_out_processing.hpp"
|
||||
#include "transformations/remove_single_input_concat.hpp"
|
||||
#include "transformations/reorder_activation_and_pooling.hpp"
|
||||
#include "transformations/replace_gna_nhwc_layers.hpp"
|
||||
#include "transformations/reshape_transpose_substitute.hpp"
|
||||
#include "transformations/rotate_inputs.hpp"
|
||||
#include "transformations/split_convolution_with_large_buffer_size.hpp"
|
||||
#include "transformations/split_eltwise.hpp"
|
||||
#include "transformations/substitute_softsign.hpp"
|
||||
#include "transformations/swap_input_matmul_gna.hpp"
|
||||
#include "transformations/transpose_sinking/ts_concat.hpp"
|
||||
#include "transformations/transpose_sinking/ts_fuse.hpp"
|
||||
#include "transformations/transpose_sinking/ts_general.hpp"
|
||||
#include "transformations/transpose_sinking/ts_split.hpp"
|
||||
#include "transformations/ts_concat_forward.hpp"
|
||||
#include "transformations/ts_split_backward.hpp"
|
||||
#include "transformations/unfuse_reshape_and_transpose.hpp"
|
||||
#include "transformations/utils/transformation_helper.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
using namespace ov;
|
||||
using namespace ov::opset8;
|
||||
using namespace ov::intel_gna::limitations;
|
||||
using namespace ov::intel_gna::pass::helper;
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
@ -64,12 +86,13 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
|
||||
|
||||
fake_quantized = ov::op::util::has_op_with_type<ngraph::op::FakeQuantize>(model);
|
||||
const bool has_convolution = ov::op::util::has_op_with_type<ngraph::opset7::Convolution>(model);
|
||||
const bool has_maxpool = ov::op::util::has_op_with_type<ov::opset8::MaxPool>(model);
|
||||
const bool has_slice = ov::op::util::has_op_with_type<ov::opset8::Slice>(model);
|
||||
const bool has_matmul = ov::op::util::has_op_with_type<ngraph::opset7::MatMul>(model);
|
||||
const bool has_mvn = ov::op::util::has_op_with_type<ngraph::opset7::MVN>(model) ||
|
||||
const bool has_mvn = ov::op::util::has_op_with_type<ov::opset8::MVN>(model) ||
|
||||
ov::op::util::has_op_with_type<ov::op::v0::MVN>(model);
|
||||
ov::pass::Manager manager;
|
||||
manager.register_pass<ov::pass::InitNodeInfo>();
|
||||
|
||||
// In OV API 2.0(IRv10) default convertion to fp32 (inputs, outputs and weights) is disabled
|
||||
// and we need to run the ConvertPrecision transformation to support old networks.
|
||||
manager.register_pass<ov::pass::ConvertPrecision>(precisions_map{{ngraph::element::f16, ngraph::element::f32}});
|
||||
@ -104,7 +127,6 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
|
||||
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithBias>();
|
||||
manager.register_pass<ov::intel_gna::pass::SwapInputMatMul>();
|
||||
manager.register_pass<ov::intel_gna::pass::HandleTransposesAroundMatMul>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertTransposeAfterConvOrPool>();
|
||||
manager.register_pass<ov::intel_gna::pass::Unfuse2dto4dReshapeAndTranspose>();
|
||||
manager.register_pass<ov::intel_gna::pass::Unfuse4dto2dReshapeAndTranspose>();
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveExtraReshapes>();
|
||||
@ -112,11 +134,21 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveSingleInputConcat>();
|
||||
manager.register_pass<ov::intel_gna::pass::SubstituteSoftsign>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeLayerToBeEliminated>();
|
||||
if (!has_convolution && !has_matmul && !has_mvn) {
|
||||
// TODO: Remove this condition when the legacy layout transformation (NCHW->NHWC) is disabled
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveInputsProcessing>(input_output_subgraphs);
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveOutputsProcessing>(input_output_subgraphs);
|
||||
// TODO enable this transformation for networks without convolutions
|
||||
if (has_convolution || has_maxpool || has_mvn || has_matmul) {
|
||||
manager.register_pass<ov::intel_gna::pass::ReplaceGnaNHWCLayers>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertConvolutionTransposeHW>();
|
||||
manager.register_pass<ov::pass::TransposeSinkingGeneral>();
|
||||
manager.register_pass<ov::intel_gna::pass::GatherSinkingGeneral>();
|
||||
manager.register_pass<ov::pass::ReshapeSequenceFusion>();
|
||||
manager.register_pass<ov::pass::TransposeToReshape>();
|
||||
manager.register_pass<ov::intel_gna::pass::GnaConvolutionFusion>();
|
||||
manager.register_pass<ov::intel_gna::pass::TSConcatForward>();
|
||||
manager.register_pass<ov::intel_gna::pass::TSSplitBackward>();
|
||||
manager.register_pass<ov::pass::transpose_sinking::TSFuse>();
|
||||
}
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveInputsProcessing>(input_output_subgraphs);
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveOutputsProcessing>(input_output_subgraphs);
|
||||
manager.register_pass<ov::pass::ConvertOpSet3ToOpSet2>();
|
||||
manager.register_pass<ov::pass::ConvertOpSet2ToOpSet1>();
|
||||
manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
|
||||
@ -160,6 +192,62 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
|
||||
{ov::element::u32, ov::element::i32}});
|
||||
const auto& pass_config = manager.get_pass_config();
|
||||
|
||||
pass_config->set_callback<ov::pass::transpose_sinking::TSConcatForward>(
|
||||
[](const std::shared_ptr<const ov::Node>& node) -> bool {
|
||||
const TransposeInfo transpose_info = get_first_input_transpose(node);
|
||||
if (transpose_info.isEmpty())
|
||||
return false;
|
||||
const bool is_supported = Limitations::is_forward_transposed_concat_supported(
|
||||
node,
|
||||
transpose_info.transpose_const->get_axis_vector_val());
|
||||
if (!is_supported)
|
||||
mark_input_transposes_as_nosinking(node);
|
||||
return !is_supported;
|
||||
});
|
||||
|
||||
pass_config->set_callback<ov::pass::transpose_sinking::TSConcatBackward>(
|
||||
[](const std::shared_ptr<const ov::Node>& node) -> bool {
|
||||
const TransposeInfo transpose_info = get_first_output_transpose(node);
|
||||
if (transpose_info.isEmpty())
|
||||
return false;
|
||||
return !Limitations::is_backward_transposed_concat_supported(
|
||||
node,
|
||||
transpose_info.transpose_const->get_axis_vector_val());
|
||||
});
|
||||
|
||||
pass_config->set_callback<ov::pass::transpose_sinking::TSSplitForward>(
|
||||
[](const std::shared_ptr<const ov::Node>& node) -> bool {
|
||||
const TransposeInfo transpose_info = get_first_input_transpose(node);
|
||||
if (transpose_info.isEmpty())
|
||||
return false;
|
||||
const bool is_supported = Limitations::is_forward_transposed_split_supported(
|
||||
node,
|
||||
transpose_info.transpose_const->get_axis_vector_val());
|
||||
if (!is_supported)
|
||||
mark_input_transposes_as_nosinking(node);
|
||||
return !is_supported;
|
||||
});
|
||||
|
||||
pass_config->set_callback<ov::pass::transpose_sinking::TSSplitBackward>(
|
||||
[](const std::shared_ptr<const ov::Node>& node) -> bool {
|
||||
const TransposeInfo transpose_info = get_first_output_transpose(node);
|
||||
if (transpose_info.isEmpty())
|
||||
return false;
|
||||
return !Limitations::is_backward_transposed_split_supported(
|
||||
node,
|
||||
transpose_info.transpose_const->get_axis_vector_val());
|
||||
});
|
||||
|
||||
/**
|
||||
* TransposeSinking doesn't currently support StridedSlice. We disable SliceToStridedSlice
|
||||
* transformation to prevent convert Slice to StridedSlice. This allows us to work with
|
||||
* networks, that initialy have Slice.
|
||||
* That could be removed after StridedSlice implementation in TransposeSinking
|
||||
*/
|
||||
if (has_slice && (has_convolution || has_maxpool || has_mvn)) {
|
||||
pass_config->disable<ov::pass::SliceToStridedSlice>();
|
||||
}
|
||||
|
||||
// Allowing FP16 Converts to be folded and FP16 constants to upgrade to FP32 data type
|
||||
pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
|
||||
pass_config->disable<ov::pass::DisableDecompressionConvertConstantFolding>();
|
||||
@ -177,8 +265,23 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
|
||||
// Operations Max and Min aren't supported
|
||||
pass_config->disable<ov::pass::ConcatReduceFusion>();
|
||||
|
||||
pass_config->disable<ov::pass::ConcatReduceFusion>();
|
||||
manager.run_passes(model);
|
||||
|
||||
/**
|
||||
* As we disabled SliceToStridedSlice, we have after all transformations
|
||||
* Slice, that is not supported natively in our plugin. Here we convert
|
||||
* Slice -> StridedSlice -> CropIE
|
||||
* That could be removed after StridedSlice implementation in TransposeSinking
|
||||
*/
|
||||
if (has_slice && (has_convolution || has_maxpool || has_mvn)) {
|
||||
ov::pass::Manager manager;
|
||||
manager.register_pass<ov::pass::InitNodeInfo>();
|
||||
manager.register_pass<ov::pass::SliceToStridedSlice>(true);
|
||||
manager.register_pass<ngraph::pass::ConvertStridedSliceToCropMatcher>();
|
||||
manager.run_passes(model);
|
||||
}
|
||||
|
||||
is_ngraph_passes_used = true;
|
||||
}
|
||||
|
||||
@ -204,8 +307,6 @@ void TransformationsPipeline::apply_legacy(const InferenceEngine::CNNNetwork& ne
|
||||
passes->registerPass<FuseFQIntoWeightsPass>();
|
||||
passes->registerPass<MoveFakeQuantizeLayerIntoQuantParamsPass>();
|
||||
|
||||
passes->registerPass<TransposeWeightsFromNCHWToNHWCPass>();
|
||||
|
||||
passes->registerPass<SubstitutePReluPass>();
|
||||
|
||||
if (!is_ngraph_passes_used) {
|
||||
@ -221,7 +322,7 @@ void TransformationsPipeline::apply_legacy(const InferenceEngine::CNNNetwork& ne
|
||||
passes->registerPass<FlattenTrivialConcatPass>();
|
||||
passes->registerPass<InsertConcatAligningFilterPass>();
|
||||
passes->registerPass<ReorderConcatInputsPass>();
|
||||
passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
|
||||
|
||||
// Keep legacy inserting of Identity layer here
|
||||
// because concat and split aliging passes are not moved to ngraph yet
|
||||
passes->registerPass<InsertIdentityLayerPass>();
|
||||
|
@ -59,11 +59,11 @@ double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
|
||||
const std::vector<KRT> reducers{{49, 3.0}, {36, 2.6}, {21, 2.3}, {14, 1.7}, {9, 1.3}, {7, 1.2}};
|
||||
auto reducer = 1.0;
|
||||
const auto inDepth =
|
||||
InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
InferenceEngine::GetDataDimSizeNHWC(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
const auto inHeight =
|
||||
InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
|
||||
InferenceEngine::GetDataDimSizeNHWC(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
|
||||
const auto inWidth =
|
||||
InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
|
||||
InferenceEngine::GetDataDimSizeNHWC(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
|
||||
if (is3DInputOr2DKernel(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
|
||||
!isMappableFrom2DTo1D(inHeight,
|
||||
inWidth,
|
||||
|
@ -297,7 +297,7 @@ public:
|
||||
return isOfType("FakeQuantize");
|
||||
}
|
||||
bool isNonFunctional() const {
|
||||
return isOfType("reshape") || isOfType("squeeze") || isOfType("unsqueeze") || isTrivialPermute();
|
||||
return isOfType("reshape") || isOfType("squeeze") || isOfType("unsqueeze") || isTrivialPermute() || is_gather();
|
||||
}
|
||||
bool isReshape() const noexcept {
|
||||
return isOfType("reshape");
|
||||
@ -305,6 +305,9 @@ public:
|
||||
bool isPermute() const noexcept {
|
||||
return isOfType("permute");
|
||||
}
|
||||
bool is_gather() const noexcept {
|
||||
return isOfType("gather");
|
||||
}
|
||||
bool isPermuteFusable() const noexcept {
|
||||
return isPermute() &&
|
||||
(layer->params.count(ov::intel_gna::rt_info::GNATransposeFusable::get_type_info_static()) > 0);
|
||||
@ -349,11 +352,8 @@ public:
|
||||
bool isNonValuesChangable() const {
|
||||
return isNonFunctional() || isSplit() || isSlice() || isConcat();
|
||||
}
|
||||
bool is_gather() const noexcept {
|
||||
return isOfType("gather");
|
||||
}
|
||||
bool is_fq_non_sensitive() const {
|
||||
return isPermute() || is_gather() || isNonFunctional();
|
||||
return isPermute() || isNonFunctional();
|
||||
}
|
||||
bool isPooling() const noexcept {
|
||||
return isOfType("pooling");
|
||||
|
@ -133,6 +133,9 @@ public:
|
||||
void set_auto_pad(const ov::op::PadType& auto_pad) {
|
||||
m_auto_pad = auto_pad;
|
||||
}
|
||||
bool has_add_node() const {
|
||||
return m_has_add_node;
|
||||
}
|
||||
bool has_bias() const {
|
||||
return m_has_add_node;
|
||||
}
|
||||
|
@ -110,17 +110,6 @@ static void SumBlobs(Blob::Ptr& src_blob, Blob::Ptr& dst_blob) {
|
||||
}
|
||||
}
|
||||
|
||||
static Blob::Ptr convertToRWBlob(const Blob::Ptr& readOnlyBlob, const std::string& name = {}) {
|
||||
auto blob = Blob::CreateFromData(std::make_shared<Data>(name, readOnlyBlob->getTensorDesc()));
|
||||
blob->allocate();
|
||||
const auto ret = ie_memcpy(blob->buffer().as<uint8_t*>(),
|
||||
blob->size() * blob->getTensorDesc().getPrecision().size(),
|
||||
readOnlyBlob->buffer().as<uint8_t*>(),
|
||||
readOnlyBlob->size() * readOnlyBlob->getTensorDesc().getPrecision().size());
|
||||
IE_ASSERT(ret == 0);
|
||||
return blob;
|
||||
}
|
||||
|
||||
// indexes stored in pass manager
|
||||
static const char identityLayersCounterName[] = "identityLayerCounter";
|
||||
static const char diagonalLayersCounterName[] = "diagonalLayerCounter";
|
||||
@ -2419,225 +2408,6 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass ::run() {
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeWeightsFromNCHWToNHWCPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "TransposeWeightsFromNCHWToNHWCPass");
|
||||
if (!MustBeConvertedFromNCHWToNHWC(*pLayers))
|
||||
return;
|
||||
|
||||
auto printTranspositionInfo = [](const std::vector<TranspositionInfo>& transpositionInfo) {
|
||||
for (const auto& transpositionInfoPart : transpositionInfo) {
|
||||
log::debug() << "transpose=" << transpositionInfoPart.transpose
|
||||
<< " rows_num=" << transpositionInfoPart.num_transpose_rows
|
||||
<< " columns_num=" << transpositionInfoPart.num_transpose_columns << "\n";
|
||||
}
|
||||
};
|
||||
|
||||
auto transpInfoMatchWeightsSize =
|
||||
[](const std::vector<TranspositionInfo>& transpositionInfo, size_t weightsSize, const std::string& layerName) {
|
||||
size_t totalElements = 0;
|
||||
for (auto&& transpositionInfoPart : transpositionInfo) {
|
||||
totalElements += transpositionInfoPart.num_transpose_rows * transpositionInfoPart.num_transpose_columns;
|
||||
}
|
||||
if (totalElements != weightsSize) {
|
||||
THROW_GNA_EXCEPTION << layerName << " weights elements from transposition info (" << totalElements
|
||||
<< ") don't match input dimensions (" << weightsSize << ")";
|
||||
}
|
||||
};
|
||||
|
||||
for (auto&& l : *pLayers) {
|
||||
if (LayerInfo(l).isScaleShift()) {
|
||||
std::vector<TranspositionInfo> transpositionInfo;
|
||||
// Try to find a convolution in previous layers
|
||||
if (InferenceEngine::CNNNetHasPrevLayer(l.get())) {
|
||||
transpositionInfo = FindTranspositionInfoFromPrevLayers(InferenceEngine::CNNNetPrevLayer(l));
|
||||
// If no convolutions are found try to find them in next layers
|
||||
if (!FoundPartToTranspose(transpositionInfo) && !l->outData.empty() &&
|
||||
!getInputTo(l->outData[0]).empty()) {
|
||||
transpositionInfo = FindTranspositionInfoFromNextLayers(getInputTo(l->outData[0]).begin()->second);
|
||||
}
|
||||
}
|
||||
if (FoundPartToTranspose(transpositionInfo)) {
|
||||
if (l->input()->getDims().front() > 1) {
|
||||
THROW_GNA_EXCEPTION << l->name
|
||||
<< " Weights transposition is not supported for a layer with batch size > 1";
|
||||
}
|
||||
auto weightable = dynamic_cast<WeightableLayer*>(l.get());
|
||||
IE_ASSERT(weightable != nullptr);
|
||||
|
||||
size_t totalWeights = weightable->_weights->size();
|
||||
transpInfoMatchWeightsSize(transpositionInfo, totalWeights, l->name);
|
||||
|
||||
ConvertTensorFromNCHWToNHWC(weightable->precision.size(),
|
||||
1,
|
||||
weightable->_weights->size(),
|
||||
weightable->_weights->cbuffer().as<uint8_t*>(),
|
||||
true,
|
||||
transpositionInfo);
|
||||
if (weightable->_biases) {
|
||||
ConvertTensorFromNCHWToNHWC(weightable->precision.size(),
|
||||
1,
|
||||
weightable->_biases->size(),
|
||||
weightable->_biases->cbuffer().as<uint8_t*>(),
|
||||
true,
|
||||
transpositionInfo);
|
||||
}
|
||||
log::debug() << l->name << " weights and biases rows transposition info:\n";
|
||||
printTranspositionInfo(transpositionInfo);
|
||||
}
|
||||
}
|
||||
|
||||
if (LayerInfo(l).isFullyConnected()) {
|
||||
auto weightable = dynamic_cast<WeightableLayer*>(l.get());
|
||||
IE_ASSERT(weightable != nullptr);
|
||||
IE_ASSERT(weightable->_weights != nullptr);
|
||||
auto precision = weightable->precision.size();
|
||||
auto out_dims = l->outData[0]->getDims();
|
||||
auto in_dims = l->input()->getDims();
|
||||
auto weightsRows = InferenceEngine::details::product(std::begin(out_dims) + 1, std::end(out_dims));
|
||||
auto weightsColumns = InferenceEngine::details::product(std::begin(in_dims) + 1, std::end(in_dims));
|
||||
// Find a convolution in previous layers to rotate weights rows
|
||||
if (InferenceEngine::CNNNetHasPrevLayer(l.get())) {
|
||||
std::vector<TranspositionInfo> transpositionInfo;
|
||||
auto prevLayer = InferenceEngine::CNNNetPrevLayer(l);
|
||||
transpositionInfo = FindTranspositionInfoFromPrevLayers(prevLayer);
|
||||
if (FoundPartToTranspose(transpositionInfo)) {
|
||||
if (l->input()->getDims().front() > 1) {
|
||||
THROW_GNA_EXCEPTION
|
||||
<< l->name << " Weights transposition is not supported for a layer with batch size > 1";
|
||||
}
|
||||
if (LayerInfo(prevLayer).isSplit()) {
|
||||
// If we found a split it's not possible to rotate data
|
||||
THROW_GNA_EXCEPTION << l->name << " won't be transposed due to a split before it";
|
||||
}
|
||||
|
||||
transpInfoMatchWeightsSize(transpositionInfo, weightsColumns, l->name);
|
||||
|
||||
weightable->_weights = convertToRWBlob(weightable->_weights);
|
||||
|
||||
ConvertTensorFromNCHWToNHWC(precision,
|
||||
weightsRows,
|
||||
weightsColumns,
|
||||
weightable->_weights->buffer().as<uint8_t*>(),
|
||||
true,
|
||||
transpositionInfo);
|
||||
log::debug() << l->name << " weights rows transposition info:\n";
|
||||
printTranspositionInfo(transpositionInfo);
|
||||
}
|
||||
}
|
||||
// Find a convolution in next layers to rotate weights columns
|
||||
if (!l->outData.empty() && !getInputTo(l->outData[0]).empty()) {
|
||||
std::vector<TranspositionInfo> transpositionInfo;
|
||||
auto nextLayer = getInputTo(l->outData[0]).begin()->second;
|
||||
transpositionInfo = FindTranspositionInfoFromNextLayers(nextLayer);
|
||||
if (FoundPartToTranspose(transpositionInfo)) {
|
||||
if (l->outData[0]->getDims().front() > 1) {
|
||||
THROW_GNA_EXCEPTION
|
||||
<< l->name << " Weights transposition is not supported for a layer with batch size > 1";
|
||||
}
|
||||
if (LayerInfo(nextLayer).isConcat()) {
|
||||
// If we found a concat it's not possible to rotate data
|
||||
THROW_GNA_EXCEPTION << l->name << " won't be transposed due to a concat after it";
|
||||
}
|
||||
|
||||
transpInfoMatchWeightsSize(transpositionInfo, weightsRows, l->name);
|
||||
|
||||
weightable->_weights = convertToRWBlob(weightable->_weights);
|
||||
|
||||
ConvertTensorFromNCHWToNHWC(precision,
|
||||
weightsRows,
|
||||
weightsColumns,
|
||||
weightable->_weights->cbuffer().as<uint8_t*>(),
|
||||
false,
|
||||
transpositionInfo);
|
||||
log::debug() << l->name << " weights columns transposition info:\n";
|
||||
printTranspositionInfo(transpositionInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (LayerInfo(l).isEltwise()) {
|
||||
// We need to transpose a constant which is an eltwise input
|
||||
auto firstInput = InferenceEngine::CNNNetPrevLayer(l, 0);
|
||||
auto secondInput = InferenceEngine::CNNNetPrevLayer(l, 1);
|
||||
if (!LayerInfo(firstInput).isConst() && !LayerInfo(secondInput).isConst()) {
|
||||
continue;
|
||||
}
|
||||
// Let a constant to be the second input
|
||||
if (LayerInfo(firstInput).isConst()) {
|
||||
std::swap(firstInput, secondInput);
|
||||
}
|
||||
// Find a convolution in previous or next layers
|
||||
auto transpositionInfo = FindTranspositionInfoFromPrevLayers(firstInput);
|
||||
if (!FoundPartToTranspose(transpositionInfo) && !l->outData.empty() && !getInputTo(l->outData[0]).empty()) {
|
||||
transpositionInfo = FindTranspositionInfoFromNextLayers(getInputTo(l->outData[0]).begin()->second);
|
||||
}
|
||||
if (FoundPartToTranspose(transpositionInfo)) {
|
||||
auto blob = secondInput->blobs["custom"];
|
||||
ConvertTensorFromNCHWToNHWC(blob->getTensorDesc().getPrecision().size(),
|
||||
1,
|
||||
blob->size(),
|
||||
blob->buffer().as<uint8_t*>(),
|
||||
true,
|
||||
transpositionInfo);
|
||||
log::debug() << secondInput->name << " data transposition info:\n";
|
||||
printTranspositionInfo(transpositionInfo);
|
||||
}
|
||||
}
|
||||
|
||||
if (LayerInfo(l).isConcat()) {
|
||||
auto concatLayer = LayerInfo(l).as<InferenceEngine::ConcatLayer*>();
|
||||
IE_ASSERT(concatLayer != nullptr);
|
||||
// If concatenation is along channel axis constant input transposition isn't required
|
||||
if (concatLayer->_axis <= 1)
|
||||
continue;
|
||||
|
||||
std::vector<InferenceEngine::CNNLayerPtr> constInputs;
|
||||
bool transpose = false;
|
||||
int nonConstInputIx = 0;
|
||||
// Check if non-const inputs are transposed
|
||||
for (int i = 0; InferenceEngine::CNNNetHasPrevLayer(l.get(), i); ++i) {
|
||||
auto input = InferenceEngine::CNNNetPrevLayer(l, i);
|
||||
if (LayerInfo(input).isConst()) {
|
||||
constInputs.push_back(input);
|
||||
continue;
|
||||
}
|
||||
auto transpositionInfo = FindTranspositionInfoFromPrevLayers(input);
|
||||
bool transposeInput = FoundPartToTranspose(transpositionInfo);
|
||||
if (nonConstInputIx == 0) {
|
||||
transpose = transposeInput;
|
||||
} else if (transposeInput != transpose) {
|
||||
THROW_GNA_EXCEPTION << "Concat layer " << l->name << " inputs have different layouts";
|
||||
}
|
||||
++nonConstInputIx;
|
||||
}
|
||||
if (!transpose)
|
||||
continue;
|
||||
|
||||
// Transpose all constant inputs
|
||||
for (auto&& input : constInputs) {
|
||||
auto rows = GetDataDimByName(input->outData[0], DataDimName::C);
|
||||
auto columns = GetDataDimByName(input->outData[0], DataDimName::H) *
|
||||
GetDataDimByName(input->outData[0], DataDimName::W);
|
||||
|
||||
auto blob = convertToRWBlob(input->blobs["custom"]);
|
||||
input->blobs["custom"] = blob;
|
||||
|
||||
// A constant should have the same number of channels since concatenation will be in height/weight
|
||||
// dimension
|
||||
TranspositionInfo concatTranspositionInfo{true, rows, columns};
|
||||
ConvertTensorFromNCHWToNHWC(blob->getTensorDesc().getPrecision().size(),
|
||||
1,
|
||||
blob->size(),
|
||||
blob->buffer().as<uint8_t*>(),
|
||||
true,
|
||||
{concatTranspositionInfo});
|
||||
log::debug() << input->name << " data transposition info:\n";
|
||||
printTranspositionInfo({concatTranspositionInfo});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FuseFullyConnectedWithEltwisePass::run() {
|
||||
// This legacy pass removes the Eltwise (only if it performs SUM op) from between FC and Any.
|
||||
// The blob data of Const layer attached to Eltwise is added to biases blob data of FC layer.
|
||||
|
@ -214,14 +214,6 @@ DECL_PASS(FuseFQIntoWeights);
|
||||
*/
|
||||
DECL_PASS(MoveFakeQuantizeLayerIntoQuantParams);
|
||||
|
||||
/**
|
||||
* @brief convert FullyConnected, ScaleShift and Eltwise layers weights order from NCHW to NHWC.
|
||||
* Information for transposition is found from convolution/pooling input or output dimensions.
|
||||
* Convolution weights are transposed in finalizeConvolution1DPrimitive() method (gna_graph_compiler.cpp).
|
||||
* They are transposed for the both, NCHW and NHWC models since MO always stores them in NCHW layout.
|
||||
*/
|
||||
DECL_PASS(TransposeWeightsFromNCHWToNHWC);
|
||||
|
||||
/**
|
||||
* @brief fuse FullyConnected and Eltwise layers, also in case there is a Reshape between them having input with only
|
||||
* one dimension > 1
|
||||
|
@ -26,7 +26,7 @@ struct MVNData {
|
||||
size_t W;
|
||||
size_t num_parts;
|
||||
float eps;
|
||||
op::MVNEpsMode eps_mode;
|
||||
ov::op::MVNEpsMode eps_mode;
|
||||
bool normalize_variance;
|
||||
element::Type element_type;
|
||||
std::string name;
|
||||
@ -138,7 +138,7 @@ static std::shared_ptr<Node> NormalizeVariance(const std::shared_ptr<opset8::MVN
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1},
|
||||
op::PadType::VALID);
|
||||
ov::op::PadType::VALID);
|
||||
transposed_avg_conv_3->set_friendly_name(mvn_data.name + "_Avg3");
|
||||
auto avg_conv_3 =
|
||||
std::make_shared<opset8::Transpose>(transposed_avg_conv_3,
|
||||
@ -156,7 +156,7 @@ static std::shared_ptr<Node> NormalizeVariance(const std::shared_ptr<opset8::MVN
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1},
|
||||
op::PadType::VALID);
|
||||
ov::op::PadType::VALID);
|
||||
transposed_avg_conv_4->set_friendly_name(mvn_data.name + "_Avg4");
|
||||
auto avg_conv_4 =
|
||||
std::make_shared<opset8::Transpose>(transposed_avg_conv_4,
|
||||
@ -243,7 +243,7 @@ static void Decompose(const std::shared_ptr<opset8::MVN> mvn, const MVNData& mvn
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1},
|
||||
op::PadType::VALID);
|
||||
ov::op::PadType::VALID);
|
||||
transposed_avg_conv_1->set_friendly_name(mvn_data.name + "_Avg1");
|
||||
auto avg_conv_1 =
|
||||
std::make_shared<opset8::Transpose>(transposed_avg_conv_1,
|
||||
@ -261,7 +261,7 @@ static void Decompose(const std::shared_ptr<opset8::MVN> mvn, const MVNData& mvn
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1},
|
||||
op::PadType::VALID);
|
||||
ov::op::PadType::VALID);
|
||||
transposed_avg_conv_2->set_friendly_name(mvn_data.name + "_Avg2");
|
||||
auto avg_conv_2 =
|
||||
std::make_shared<opset8::Transpose>(transposed_avg_conv_2,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user