Merge branch 'master' into river/cpu_plugin_api_2.0
This commit is contained in:
commit
9ef10dddab
@ -408,8 +408,8 @@ jobs:
|
||||
displayName: 'GNA UT'
|
||||
enabled: 'false' # TODO: fix
|
||||
|
||||
- script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ieMultiPluginUnitTests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ieMultiPluginUnitTests.xml
|
||||
displayName: 'MULTI UT'
|
||||
- script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_unit_tests.xml
|
||||
displayName: 'AUTO UT'
|
||||
|
||||
- script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_batch_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_batch_unit_tests.xml
|
||||
displayName: 'AutoBatch UT'
|
||||
@ -430,7 +430,6 @@ jobs:
|
||||
|
||||
# Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
|
||||
- script: |
|
||||
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.05.00.2116/linux/x64:$(LD_LIBRARY_PATH)
|
||||
python3 -m pytest -s $(INSTALL_TEST_DIR)/pyngraph $(PYTHON_STATIC_ARGS) \
|
||||
--junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
|
||||
--ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_zoo_models.py \
|
||||
@ -439,8 +438,6 @@ jobs:
|
||||
|
||||
# Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
|
||||
- script: |
|
||||
# For python imports to import pybind_mock_frontend
|
||||
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.05.00.2116/linux/x64:$(LD_LIBRARY_PATH)
|
||||
python3 -m pytest -sv $(INSTALL_TEST_DIR)/pyopenvino $(PYTHON_STATIC_ARGS) \
|
||||
--junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
|
||||
--ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_utils/test_utils.py \
|
||||
@ -449,7 +446,6 @@ jobs:
|
||||
displayName: 'Python API 2.0 Tests'
|
||||
|
||||
- script: |
|
||||
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.05.00.2116/linux/x64:$(LD_LIBRARY_PATH)
|
||||
python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml
|
||||
displayName: 'Model Optimizer UT'
|
||||
|
||||
|
@ -306,8 +306,8 @@ jobs:
|
||||
- script: $(INSTALL_TEST_DIR)/ov_cpu_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_cpu_unit_tests.xml
|
||||
displayName: 'Intel CPU Unit Tests'
|
||||
|
||||
- script: $(INSTALL_TEST_DIR)/ieMultiPluginUnitTests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ieMultiPluginUnitTests.xml
|
||||
displayName: 'MULTI UT'
|
||||
- script: $(INSTALL_TEST_DIR)/ov_auto_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_unit_tests.xml
|
||||
displayName: 'AUTO UT'
|
||||
|
||||
- script: $(INSTALL_TEST_DIR)/ov_template_func_tests --gtest_filter=*smoke* --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-templateFuncTests.xml
|
||||
env:
|
||||
|
@ -196,8 +196,8 @@ jobs:
|
||||
displayName: 'Intel CPU Unit Tests'
|
||||
enabled: 'false'
|
||||
|
||||
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ieMultiPluginUnitTests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ieMultiPluginUnitTests.xml
|
||||
displayName: 'MULTI UT'
|
||||
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ov_auto_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_unit_tests.xml
|
||||
displayName: 'AUTO UT'
|
||||
enabled: 'false'
|
||||
|
||||
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ov_cpu_func_tests --gtest_filter=*smoke* --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_cpu_func_tests.xml
|
||||
|
@ -306,8 +306,8 @@ jobs:
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ov_gna_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-ov_gna_unit_tests.xml
|
||||
displayName: 'GNA UT'
|
||||
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieMultiPluginUnitTests --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-ieMultiPluginUnitTests.xml
|
||||
displayName: 'MULTI UT'
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ov_auto_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-ov_auto_unit_tests.xml
|
||||
displayName: 'AUTO UT'
|
||||
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ov_auto_batch_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-ov_auto_batch_unit_tests.xml
|
||||
displayName: 'AutoBatch UT'
|
||||
|
1
.github/labeler.yml
vendored
1
.github/labeler.yml
vendored
@ -41,6 +41,7 @@
|
||||
|
||||
'category: dependency_changes':
|
||||
- '**/requirement*.txt'
|
||||
- '**/constraints*.txt'
|
||||
- 'scripts/**/*'
|
||||
- '.gitmodules'
|
||||
- '**/setup.py'
|
||||
|
@ -126,7 +126,7 @@ ie_option(ENABLE_OV_IR_FRONTEND "Enable IR FrontEnd" ON)
|
||||
ie_option(ENABLE_OV_TF_FRONTEND "Enable TensorFlow FrontEnd" ON)
|
||||
ie_option(ENABLE_OV_TF_LITE_FRONTEND "Enable TensorFlow Lite FrontEnd" ON)
|
||||
ie_dependent_option(ENABLE_SNAPPY_COMPRESSION "Enables compression support for TF FE" ON
|
||||
"ENABLE_OV_TF_FRONTEND" ON)
|
||||
"ENABLE_OV_TF_FRONTEND" OFF)
|
||||
|
||||
if(CMAKE_HOST_LINUX AND LINUX)
|
||||
# Debian packages are enabled on Ubuntu systems
|
||||
|
@ -22,7 +22,7 @@ Local Deployment Options
|
||||
|
||||
- using Debian / RPM packages - a recommended way for Linux operating systems;
|
||||
- using PIP package manager on PyPI - the default approach for Python-based applications;
|
||||
- using Docker images - if the application should be deployed as a Docker image, use a pre-built OpenVINO™ Runtime Docker image as a base image in the Dockerfile for the application container image. For more information about OpenVINO Docker images, refer to :doc:`Installing OpenVINO on Linux from Docker <openvino_docs_install_guides_installing_openvino_docker_linux>` and :doc:`Installing OpenVINO on Windows from Docker <openvino_docs_install_guides_installing_openvino_docker_windows>`.
|
||||
- using Docker images - if the application should be deployed as a Docker image, use a pre-built OpenVINO™ Runtime Docker image as a base image in the Dockerfile for the application container image. For more information about OpenVINO Docker images, refer to :doc:`Installing OpenVINO on Linux from Docker <openvino_docs_install_guides_installing_openvino_docker_linux>`
|
||||
|
||||
Furthermore, to customize your OpenVINO Docker image, use the `Docker CI Framework <https://github.com/openvinotoolkit/docker_ci>` to generate a Dockerfile and built the image.
|
||||
|
||||
@ -44,7 +44,7 @@ The table below shows which distribution type can be used for what target operat
|
||||
* - RMP packages
|
||||
- Red Hat Enterprise Linux 8, 64-bit
|
||||
* - Docker images
|
||||
- Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit; Red Hat Enterprise Linux 8, 64-bit; Windows Server Core base LTSC 2019, 64-bit; Windows 10, version 20H2, 64-bit
|
||||
- Ubuntu 22.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit; Red Hat Enterprise Linux 8, 64-bit
|
||||
* - PyPI (PIP package manager)
|
||||
- See https://pypi.org/project/openvino
|
||||
* - :doc:`OpenVINO Deployment Manager <openvino_docs_install_guides_deployment_manager_tool>`
|
||||
|
3
docs/_static/images/DeviceDriverVersion.PNG
vendored
3
docs/_static/images/DeviceDriverVersion.PNG
vendored
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c2f144de249eddf1c159cbc1a27a06ad40f57442efcf75f2f49cc02626fc6875
|
||||
size 13168
|
3
docs/_static/images/DeviceDriverVersion.svg
vendored
Normal file
3
docs/_static/images/DeviceDriverVersion.svg
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d3a47af0e18856603b93a15d9415ddddd4ae06058cdfa0e18597e9eb861bf589
|
||||
size 51390
|
BIN
docs/_static/selector-tool/assets/icons-regular-7c563649.woff2
vendored
Normal file
BIN
docs/_static/selector-tool/assets/icons-regular-7c563649.woff2
vendored
Normal file
Binary file not shown.
1
docs/_static/selector-tool/assets/index-89e3365b.js
vendored
Normal file
1
docs/_static/selector-tool/assets/index-89e3365b.js
vendored
Normal file
@ -0,0 +1 @@
|
||||
const e=document.getElementById("selector");if(!e)throw new Error("cannot find selector document");window.addEventListener("message",i=>{e.style.height=i.data.height+"px"});var o,n;const t=(n=(o=e.contentDocument)==null?void 0:o.body)==null?void 0:n.offsetHeight;t&&(e.style.height=`${t}px`);
|
BIN
docs/_static/selector-tool/assets/intelone-bodytext-font-family-medium-a08af450.woff2
vendored
Normal file
BIN
docs/_static/selector-tool/assets/intelone-bodytext-font-family-medium-a08af450.woff2
vendored
Normal file
Binary file not shown.
BIN
docs/_static/selector-tool/assets/intelone-bodytext-font-family-regular-8fde65a1.woff2
vendored
Normal file
BIN
docs/_static/selector-tool/assets/intelone-bodytext-font-family-regular-8fde65a1.woff2
vendored
Normal file
Binary file not shown.
54
docs/_static/selector-tool/assets/selector-363359f4.js
vendored
Normal file
54
docs/_static/selector-tool/assets/selector-363359f4.js
vendored
Normal file
File diff suppressed because one or more lines are too long
1
docs/_static/selector-tool/assets/selector-5c3f26d1.css
vendored
Normal file
1
docs/_static/selector-tool/assets/selector-5c3f26d1.css
vendored
Normal file
File diff suppressed because one or more lines are too long
22
docs/_static/selector-tool/selector-0290a24.html
vendored
Normal file
22
docs/_static/selector-tool/selector-0290a24.html
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="version" content="0290a24" />
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Download Intel® Distribution of OpenVINO™ Toolkit</title>
|
||||
<meta
|
||||
name="description"
|
||||
content="Download a version of the Intel® Distribution of OpenVINO™ toolkit for Linux, Windows, or macOS."
|
||||
/>
|
||||
<script
|
||||
type="module"
|
||||
crossorigin
|
||||
src="./assets/selector-363359f4.js"
|
||||
></script>
|
||||
<link rel="stylesheet" href="./assets/selector-5c3f26d1.css" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
</body>
|
||||
</html>
|
@ -25,7 +25,7 @@ To use sample applications, install OpenVINO Runtime via one of the following di
|
||||
|
||||
* Archive files (recommended) - :doc:`Linux <openvino_docs_install_guides_installing_openvino_from_archive_linux>` | :doc:`Windows <openvino_docs_install_guides_installing_openvino_from_archive_windows>` | :doc:`macOS <openvino_docs_install_guides_installing_openvino_from_archive_macos>`
|
||||
* :doc:`APT <openvino_docs_install_guides_installing_openvino_apt>` or :doc:`YUM <openvino_docs_install_guides_installing_openvino_yum>` for Linux
|
||||
* Docker image - :doc:`Linux <openvino_docs_install_guides_installing_openvino_docker_linux>` | :doc:`Windows <openvino_docs_install_guides_installing_openvino_docker_windows>`
|
||||
* Docker image - :doc:`Linux <openvino_docs_install_guides_installing_openvino_docker_linux>`
|
||||
* `Build from source <https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build.md>`__
|
||||
|
||||
Make sure that you also `install OpenCV <https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO>`__ , as it's required for running sample applications.
|
||||
|
@ -1,66 +1,126 @@
|
||||
# Configurations for Intel® Processor Graphics (GPU) with OpenVINO™ {#openvino_docs_install_guides_configurations_for_intel_gpu}
|
||||
|
||||
|
||||
@sphinxdirective
|
||||
|
||||
.. _gpu guide:
|
||||
|
||||
|
||||
To use the OpenVINO™ GPU plugin and offload inference to Intel® Processor Graphics (GPU), Intel® Graphics Driver must be properly configured on your system.
|
||||
|
||||
If Intel® Graphics Driver is already installed and you would like to keep it, you can skip the installation steps below.
|
||||
To use the OpenVINO™ GPU plug-in and transfer the inference to the graphics of the Intel® processor (GPU), the Intel® graphics driver must be properly configured on the system.
|
||||
|
||||
Linux
|
||||
#####
|
||||
|
||||
To install the latest available **Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver** for your operating system,
|
||||
see its `installation guide on GitHub <https://github.com/intel/compute-runtime/releases/latest>`_.
|
||||
To use a GPU device for OpenVINO inference, you must meet the following prerequisites:
|
||||
|
||||
.. note::
|
||||
- Use a supported Linux kernel as per the `documentation <https://dgpu-docs.intel.com/driver/kernel-driver-types.html>`__
|
||||
- Install ``intel-i915-dkms`` and ``xpu-smi`` kernel modules as described in the `installation documentation <https://dgpu-docs.intel.com/driver/installation.html>`__
|
||||
- Install GPU Runtime packages:
|
||||
|
||||
If you are using RedHat 8, you can install the OpenCL library as a prerequisite by using the following command:
|
||||
``http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/ocl-icd-2.2.12-1.el8.x86_64.rpm``
|
||||
- `The Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver <https://github.com/intel/compute-runtime/releases/latest>`__
|
||||
- `Intel Graphics Memory Management Library <https://github.com/intel/gmmlib>`__
|
||||
- `Intel® Graphics Compiler for OpenCL™ <https://github.com/intel/intel-graphics-compiler>`__
|
||||
- `OpenCL ICD loader package <https://github.com/KhronosGroup/OpenCL-ICD-Loader>`__
|
||||
|
||||
.. _wsl-instal:
|
||||
|
||||
Depending on your operating system, there may be different methods to install the above packages. Below are the instructions on how to install the packages on supported Linux distributions.
|
||||
|
||||
You may consider installing one of the earlier versions of the driver, based on your particular setup needs.
|
||||
.. tab-set::
|
||||
|
||||
For instructions and recommendations on the installation of a specific GPU driver release, as well as the list of supported hardware platforms, refer to the `Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver GitHub home page <https://github.com/intel/compute-runtime/>`__.
|
||||
.. tab-item:: Ubuntu 22.04 LTS
|
||||
:sync: ubuntu22
|
||||
|
||||
For instructions specific to discrete graphics platforms, refer to `the dgpu guide <https://dgpu-docs.intel.com/installation-guides/index.html>`__,
|
||||
including installation guides for Intel® Arc™ A-Series Graphics, Intel® Data Center GPU Flex Series, Intel® Data Center GPU MAX Series, Intel® processor graphics Gen12, and Intel® Iris Xe MAX codename DG1.
|
||||
Download and install the `deb` packages published `here <https://github.com/intel/compute-runtime/releases/latest>`__ and install the apt package `ocl-icd-libopencl1` with the OpenCl ICD loader.
|
||||
|
||||
Alternatively, you can add the apt repository by following the `installation guide <https://dgpu-docs.intel.com/driver/installation.html#ubuntu-install-steps>`__. Then install the `ocl-icd-libopencl1`, `intel-opencl-icd`, `intel-level-zero-gpu` and `level-zero` apt packages:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
apt-get install -y ocl-icd-libopencl1 intel-opencl-icd intel-level-zero-gpu level-zero
|
||||
|
||||
.. tab-item:: Ubuntu 20.04 LTS
|
||||
:sync: ubuntu20
|
||||
|
||||
Ubuntu 20.04 LTS is not updated with the latest driver versions. You can install the updated versions up to the version 22.43 from apt:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
apt-get update && apt-get install -y --no-install-recommends curl gpg gpg-agent && \
|
||||
curl https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
|
||||
echo 'deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu focal-legacy main' | tee /etc/apt/sources.list.d/intel.gpu.focal.list && \
|
||||
apt-get update
|
||||
apt-get update && apt-get install -y --no-install-recommends intel-opencl-icd intel-level-zero-gpu level-zero
|
||||
|
||||
Alternatively, download older `deb` version from `here <https://github.com/intel/compute-runtime/releases>`__. Note that older driver version might not include some of the bug fixes and might be not supported on some latest platforms. Check the supported hardware for the versions you are installing.
|
||||
|
||||
.. tab-item:: RedHat UBI 8
|
||||
:sync: redhat8
|
||||
|
||||
Follow the `guide <https://dgpu-docs.intel.com/driver/installation.html#rhel-install-steps>`__ to add Yum repository.
|
||||
|
||||
Install following packages:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
yum install intel-opencl level-zero intel-level-zero-gpu intel-igc-core intel-igc-cm intel-gmmlib intel-ocloc
|
||||
|
||||
Install the OpenCL ICD Loader via:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
rpm -ivh http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/ocl-icd-2.2.12-1.el8.x86_64.rpm
|
||||
|
||||
.. _gpu guide windows:
|
||||
|
||||
|
||||
Windows
|
||||
#######
|
||||
|
||||
To install the Intel Graphics Driver for Windows on your system, follow the `driver installation guide <https://www.intel.com/content/www/us/en/support/articles/000005629/graphics.html>`_.
|
||||
To install the Intel Graphics Driver for Windows, follow the `driver installation instructions <https://www.intel.com/content/www/us/en/support/articles/000005629/graphics.html>`_.
|
||||
|
||||
To check if you have this driver installed:
|
||||
To check if the driver has been installed:
|
||||
|
||||
1. Type **device manager** in your **Search Windows** box and press Enter. The **Device Manager** opens.
|
||||
2. Click the drop-down arrow to view the **Display adapters**. You can see the adapter that is installed in your computer:
|
||||
1. Type **device manager** in the **Search Windows** field and press Enter. **Device Manager** will open.
|
||||
2. Click the drop-down arrow to display **Display Adapters**. You can see the adapter that is installed in your computer:
|
||||
|
||||
.. image:: _static/images/DeviceManager.PNG
|
||||
:width: 400
|
||||
|
||||
3. Right-click the adapter name and select **Properties**.
|
||||
4. Click the **Driver** tab to see the driver version.
|
||||
3. Right-click on the adapter name and select **Properties**.
|
||||
4. Click the **Driver** tab to view the driver version.
|
||||
|
||||
.. image:: _static/images/DeviceDriverVersion.PNG
|
||||
.. image:: _static/images/DeviceDriverVersion.svg
|
||||
:width: 400
|
||||
|
||||
You are done updating your device driver and ready to use your GPU.
|
||||
Your device driver has been updated and is now ready to use your GPU.
|
||||
|
||||
Additional info
|
||||
###############
|
||||
Windows Subsystem for Linux (WSL)
|
||||
#################################
|
||||
|
||||
For your reference, the following versions of Intel® Graphics Driver were used in the OpenVINO internal validation:
|
||||
WSL allows developers to run a GNU/Linux development environment for the Windows operating system. Using the GPU in WSL is very similar to a native Linux environment.
|
||||
|
||||
.. note::
|
||||
|
||||
Make sure your Intel graphics driver is updated to version **30.0.100.9955** or later. You can download and install the latest GPU host driver `here <https://www.intel.com/content/www/us/en/download/19344/intel-graphics-windows-dch-drivers.html>`__.
|
||||
|
||||
Below are the required steps to make it work with OpenVINO:
|
||||
|
||||
- Install the GPU drivers as described :ref:`above <wsl-instal>`.
|
||||
- Run the following commands in PowerShell to view the latest version of WSL2:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
wsl --update
|
||||
wsl --shutdown
|
||||
|
||||
- When booting Ubuntu 20.04 or Ubuntu 22.04, install the same drivers as described above in the Linux section
|
||||
|
||||
.. note::
|
||||
|
||||
In WSL, the GPU device is accessed via the character device `/dev/drx`, while for native Linux OS it is accessed via `/dev/dri`.
|
||||
|
||||
Additional Resources
|
||||
####################
|
||||
|
||||
The following Intel® Graphics Driver versions were used during OpenVINO's internal validation:
|
||||
|
||||
+------------------+-------------------------------------------------------------------------------------------+
|
||||
| Operation System | Driver version |
|
||||
@ -80,24 +140,11 @@ For your reference, the following versions of Intel® Graphics Driver were used
|
||||
What’s Next?
|
||||
############
|
||||
|
||||
You can try out the toolkit with:
|
||||
|
||||
|
||||
* `Python Quick Start Example <notebooks/201-vision-monodepth-with-output.html>`_ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser.
|
||||
|
||||
Visit the :ref:`Tutorials <notebook tutorials>` page for more Jupyter Notebooks to get you started with OpenVINO, such as:
|
||||
|
||||
* `OpenVINO Python API Tutorial <notebooks/002-openvino-api-with-output.html>`__
|
||||
* `Basic image classification program with Hello Image Classification <notebooks/001-hello-world-with-output.html>`__
|
||||
* `Convert a PyTorch model and use it for image background removal <notebooks/205-vision-background-removal-with-output.html>`__
|
||||
|
||||
* `C++ Quick Start Example <openvino_docs_get_started_get_started_demos.html>`__ for step-by-step instructions on building and running a basic image classification C++ application.
|
||||
|
||||
Visit the :ref:`Samples <code samples>` page for other C++ example applications to get you started with OpenVINO, such as:
|
||||
|
||||
* `Basic object detection with the Hello Reshape SSD C++ sample <openvino_inference_engine_samples_hello_reshape_ssd_README.html>`_
|
||||
* `Automatic speech recognition C++ sample <openvino_inference_engine_samples_speech_sample_README.html>`_
|
||||
|
||||
* :doc:`GPU Device <openvino_docs_OV_UG_supported_plugins_GPU>`
|
||||
* :doc:`Install Intel® Distribution of OpenVINO™ toolkit for Linux from a Docker Image <openvino_docs_install_guides_installing_openvino_docker_linux>`
|
||||
* `Docker CI framework for Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/README.md>`__
|
||||
* `Get Started with DockerHub CI for Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/get-started.md>`__
|
||||
* `Dockerfiles with Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/dockerfiles/README.md>`__
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
|
@ -1,180 +1,32 @@
|
||||
# Install Intel® Distribution of OpenVINO™ toolkit for Linux from a Docker Image {#openvino_docs_install_guides_installing_openvino_docker_linux}
|
||||
|
||||
|
||||
@sphinxdirective
|
||||
|
||||
This guide provides steps on creating a Docker image with Intel® Distribution of OpenVINO™ toolkit for Linux and using the image on different devices.
|
||||
Supported operating systems for the Docker Base image:
|
||||
|
||||
System Requirements
|
||||
###################
|
||||
- Ubuntu 22.04 LTS
|
||||
- Ubuntu 20.04 LTS
|
||||
- RedHat UBI 8
|
||||
|
||||
.. tab:: Target Operating Systems with Python Versions
|
||||
The `Docker CI framework <https://github.com/openvinotoolkit/docker_ci/>`__ can generate a Dockerfile, build, test, and deploy an image using the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the OpenVINO™ image to your needs. You can get started easily with pre-built and published docker images. Details on how to get started can be found `here <https://github.com/openvinotoolkit/docker_ci/blob/master/get-started.md>`__.
|
||||
|
||||
+----------------------------------------------+-------------------------+
|
||||
| Operating System | Included Python Version |
|
||||
+==============================================+=========================+
|
||||
| Ubuntu 18.04 long-term support (LTS), 64-bit | 3.8 |
|
||||
+----------------------------------------------+-------------------------+
|
||||
| Ubuntu 20.04 long-term support (LTS), 64-bit | 3.8 |
|
||||
+----------------------------------------------+-------------------------+
|
||||
| Red Hat Enterprise Linux 8, 64-bit | 3.8 |
|
||||
+----------------------------------------------+-------------------------+
|
||||
To start using them, the following conditions must be met:
|
||||
|
||||
.. tab:: Host Operating Systems
|
||||
- Linux OS or Windows Subsystem for Linux (WSL2)
|
||||
- Installed docker engine or compatible container engine
|
||||
- Permissions to run containers (sudo or docker group membership)
|
||||
|
||||
* Linux
|
||||
* Windows Subsystem for Linux 2 (WSL2) on CPU or GPU
|
||||
* macOS on CPU only
|
||||
|
||||
To launch a Linux image on WSL2 when trying to run inferences on a GPU, make sure that the following requirements are met:
|
||||
|
||||
* Only Windows 10 with 21H2 update or above installed and Windows 11 are supported.
|
||||
* Intel GPU driver for Windows, version 30.0.100.9684 or newer needs to be installed. For more details, refer to
|
||||
`this article at intel.com <https://www.intel.com/content/www/us/en/artificial-intelligence/harness-the-power-of-intel-igpu-on-your-machine.html#articleparagraph_983312434>`__.
|
||||
* Currently, the Docker images contain preinstalled recommended version of OpenCL Runtime with WSL2 support.
|
||||
|
||||
|
||||
Installation
|
||||
#############
|
||||
|
||||
* Use a prebuilt image:
|
||||
|
||||
1. `Get a prebuilt image from provided sources <#getting-a-prebuilt-image-from-provided-sources>`__
|
||||
2. `Run the image on different devices <#running-the-docker-image-on-different-devices>`__
|
||||
3. `Run samples in the Docker image <#running-samples-in-docker-image>`__
|
||||
|
||||
* If you want to customize your image, you can also build a Docker image manually:
|
||||
|
||||
1. `Prepare a Dockerfile <#preparing-a-dockerfile>`__
|
||||
2. `Configure the Docker image <#configuring-the-image-for-different-devices>`__
|
||||
3. `Run the image on different devices <#running-the-docker-image-on-different-devices>`__
|
||||
4. `Run samples in the Docker image <#running-samples-in-docker-image>`__
|
||||
|
||||
|
||||
Getting a Prebuilt Image from Provided Sources
|
||||
++++++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
You can find prebuilt images on:
|
||||
|
||||
- `Docker Hub <https://hub.docker.com/u/openvino>`__
|
||||
- `Red Hat Quay.io <https://quay.io/organization/openvino>`__
|
||||
- `Red Hat Ecosystem Catalog (runtime image) <https://catalog.redhat.com/software/containers/intel/openvino-runtime/606ff4d7ecb5241699188fb3>`__
|
||||
- `Red Hat Ecosystem Catalog (development image) <https://catalog.redhat.com/software/containers/intel/openvino-dev/613a450dc9bc35f21dc4a1f7>`__
|
||||
- `Azure Marketplace <https://azuremarketplace.microsoft.com/en-us/marketplace/apps/intel_corporation.openvino>`__
|
||||
|
||||
Preparing a Dockerfile
|
||||
++++++++++++++++++++++
|
||||
|
||||
You can use the `available Dockerfiles on GitHub <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__
|
||||
or generate a Dockerfile with your settings via `DockerHub CI Framework <https://github.com/openvinotoolkit/docker_ci>`__
|
||||
which can generate a Dockerfile, build, test and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
|
||||
You can also try our `Tutorials <https://github.com/openvinotoolkit/docker_ci/tree/master/docs/tutorials>`__
|
||||
which demonstrate the usage of Docker containers with OpenVINO.
|
||||
|
||||
Configuring the Image for Different Devices
|
||||
+++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
If you want to run inference on a CPU no extra configuration is needed.
|
||||
Go to `Run the image on different devices <running-the-docker-image-on-different-devices>`__ for the next step.
|
||||
|
||||
If you want to run inference on a GPU, follow the instructions provided in the guide on
|
||||
:doc:`Configuration for Intel GPU <openvino_docs_install_guides_configurations_for_intel_gpu>`
|
||||
|
||||
|
||||
Running the Docker Image on Different Devices
|
||||
+++++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
Running the Image on CPU
|
||||
-------------------------
|
||||
|
||||
Run the Docker image with the following command:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
docker run -it --rm <image_name>
|
||||
|
||||
|
||||
Note the following:
|
||||
|
||||
- Kernel reports the same information for all containers as for native application,
|
||||
for example, CPU, memory information.
|
||||
- All instructions that are available to host process available for process in container,
|
||||
including, for example, AVX2, AVX512. No restrictions.
|
||||
- Docker does not use virtualization or emulation. The process in Docker is just a regular
|
||||
Linux process, but it is isolated from external world on kernel level. Performance loss is minor.
|
||||
|
||||
|
||||
Running the Image on GPU
|
||||
-------------------------
|
||||
OpenVINO's `Docker <https://docs.docker.com/>`__ and `Bare Metal <https://docs.openvino.ai/2023.0/ovms_docs_deploying_server.html#doxid-ovms-docs-deploying-server>` distributions are identical, so the documentation applies to both.
|
||||
|
||||
.. note::
|
||||
|
||||
Only Intel® integrated graphics are supported.
|
||||
The OpenVINO development environment in a docker container is also available in the `notebook repository <https://github.com/openvinotoolkit/openvino_notebooks>`__ . It can be implemented in `OpenShift RedHat OpenData Science (RHODS) <https://github.com/openvinotoolkit/operator/blob/main/docs/notebook_in_rhods.md>`__.
|
||||
|
||||
Note the following:
|
||||
|
||||
- GPU is not available in the container by default. You must attach it to the container.
|
||||
- Kernel driver must be installed on the host.
|
||||
- In the container, non-root user must be in the ``video`` and ``render`` groups.
|
||||
To add a user to the render group, follow the
|
||||
`Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu 20.04 <https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md>`__.
|
||||
|
||||
To make GPU available in the container, attach the GPU to the container using ``--device /dev/dri`` option and run the container:
|
||||
|
||||
* Ubuntu 18 or RHEL 8:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
docker run -it --rm --device /dev/dri <image_name>
|
||||
|
||||
.. note::
|
||||
|
||||
If your host system is Ubuntu 20, follow the
|
||||
`Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu* 20.04 <https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md>`__.
|
||||
|
||||
* WSL2:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
docker run -it --rm --device /dev/dxg --volume /usr/lib/wsl:/usr/lib/wsl <image_name>
|
||||
|
||||
.. note::
|
||||
|
||||
To launch a Linux image on WSL2, make sure that the additional `System Requirements <system-requirements>`__ are met.
|
||||
|
||||
|
||||
Running Samples in Docker Image
|
||||
###############################
|
||||
|
||||
To run the ``Hello Classification Sample`` on a specific inference device, run the following commands:
|
||||
|
||||
|
||||
.. tab-set::
|
||||
|
||||
.. tab-item:: CPU
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
docker run -it --rm <image_name>
|
||||
/bin/bash -c "cd ~ && omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python3 /opt/intel/openvino/samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp CPU"
|
||||
|
||||
.. tab-item:: GPU
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
docker run -itu root:root --rm --device /dev/dri:/dev/dri <image_name>
|
||||
/bin/bash -c "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python3 samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp GPU"
|
||||
|
||||
|
||||
Additional Resources
|
||||
###############################
|
||||
|
||||
- `DockerHub CI Framework <https://github.com/openvinotoolkit/docker_ci>`__ for Intel® Distribution of OpenVINO™ toolkit.
|
||||
The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
|
||||
You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
|
||||
- `Intel® Distribution of OpenVINO™ toolkit home page <https://software.intel.com/en-us/openvino-toolkit>`__
|
||||
- `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
|
||||
ore information about Docker CI for Intel® Distribution of OpenVINO™ toolset can be found `here <https://github.com/openvinotoolkit/docker_ci/blob/master/README.md>`__
|
||||
|
||||
* `Docker CI framework for Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/README.md>`__
|
||||
* `Get Started with DockerHub CI for Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/get-started.md>`__
|
||||
* `Dockerfiles with Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/dockerfiles/README.md>`__
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
|
@ -1,234 +0,0 @@
|
||||
# Install Intel® Distribution of OpenVINO™ toolkit for Windows from Docker Image {#openvino_docs_install_guides_installing_openvino_docker_windows}
|
||||
|
||||
@sphinxdirective
|
||||
|
||||
This guide provides steps for creating a Docker image with Intel® Distribution of OpenVINO™ toolkit for Windows and using the Docker image on different devices.
|
||||
|
||||
.. _system-requirements-docker-windows:
|
||||
|
||||
System Requirements
|
||||
####################
|
||||
|
||||
.. tab:: Target Operating System with Python Versions
|
||||
|
||||
+------------------------------------+--------------------------+
|
||||
| Operating System | Supported Python Version |
|
||||
+====================================+==========================+
|
||||
| Windows Server Core base LTSC 2019 | 3.8 |
|
||||
+------------------------------------+--------------------------+
|
||||
| Windows 10, version 20H2 | 3.8 |
|
||||
+------------------------------------+--------------------------+
|
||||
|
||||
.. tab:: Host Operating Systems
|
||||
|
||||
* Windows 10, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or later) editions
|
||||
* Windows Server 2016 or higher
|
||||
|
||||
|
||||
Additional Requirements for GPU
|
||||
+++++++++++++++++++++++++++++++
|
||||
|
||||
To use GPU Acceleration in Windows containers, make sure that the following requirements for Windows host, OpenVINO and Docker are met:
|
||||
|
||||
- `Windows requirements <https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/gpu-acceleration>`__:
|
||||
|
||||
- The container host must be running Windows Server 2019 or Windows 10 of version 1809 or higher.
|
||||
- The container base image must be ``mcr.microsoft.com/windows:1809`` or higher. Windows Server Core and Nano Server container images are not currently supported.
|
||||
- The container host must be running Docker Engine 19.03 or higher.
|
||||
- The container host must have GPU running display drivers of version WDDM 2.5 or higher.
|
||||
|
||||
- GPU requirement for OpenVINO: Intel Graphics Driver for Windows of version 15.65 or higher.
|
||||
- `Docker isolation mode requirements <https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/hyperv-container>`__:
|
||||
|
||||
- Windows host and container version tags must match.
|
||||
- `Windows host and container isolation process support <https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility>`__.
|
||||
|
||||
Installation Flow
|
||||
####################
|
||||
|
||||
There are two ways to install OpenVINO with Docker. You can choose either of them according to your needs:
|
||||
|
||||
* Use a prebuilt image. Do the following steps:
|
||||
|
||||
1. `Get a prebuilt image from provided sources <#getting-a-prebuilt-image-from-provided-sources>`__.
|
||||
2. `Run the image on different devices <#running-the-docker-image-on-different-devices>`__.
|
||||
|
||||
* If you want to customize your image, you can also build a Docker image manually by using the following steps:
|
||||
|
||||
1. `Prepare a Dockerfile <#preparing-a-dockerfile>`__.
|
||||
2. `Configure the Docker image <#configuring-the-docker-image-for-different-devices>`__.
|
||||
3. `Run the image on different devices <#running-the-docker-image-on-different-devices>`__.
|
||||
|
||||
Getting a Prebuilt Image from Provided Sources
|
||||
##############################################
|
||||
|
||||
You can find prebuilt images on:
|
||||
|
||||
- `Docker Hub <https://hub.docker.com/u/openvino>`__
|
||||
- `Azure Marketplace <https://azuremarketplace.microsoft.com/en-us/marketplace/apps/intel_corporation.openvino>`__
|
||||
|
||||
Preparing a Dockerfile
|
||||
######################
|
||||
|
||||
You can use the `available Dockerfiles on GitHub <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__ or generate a Dockerfile with your settings via `DockerHub CI Framework <https://github.com/openvinotoolkit/docker_ci>`__ which can generate a Dockerfile, build, test and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
|
||||
|
||||
Configuring the Docker Image for Different Devices
|
||||
##################################################
|
||||
|
||||
Installing Additional Dependencies for CPU
|
||||
++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
Installing CMake
|
||||
----------------
|
||||
|
||||
To add CMake to the image, add the following commands to the Dockerfile:
|
||||
|
||||
.. code-block:: bat
|
||||
|
||||
RUN powershell.exe -Command `
|
||||
Invoke-WebRequest -URI https://cmake.org/files/v3.14/cmake-3.14.7-win64-x64.msi -OutFile %TMP%\\cmake-3.14.7-win64-x64.msi ; `
|
||||
Start-Process %TMP%\\cmake-3.14.7-win64-x64.msi -ArgumentList '/quiet /norestart' -Wait ; `
|
||||
Remove-Item %TMP%\\cmake-3.14.7-win64-x64.msi -Force
|
||||
|
||||
RUN SETX /M PATH "C:\Program Files\CMake\Bin;%PATH%"
|
||||
|
||||
|
||||
In case of proxy issues, please add the ``ARG HTTPS_PROXY`` and ``-Proxy %%HTTPS_PROXY%`` settings to the ``powershell.exe`` command to the Dockerfile. Then build a Docker image:
|
||||
|
||||
.. code-block:: bat
|
||||
|
||||
docker build . -t <image_name> `
|
||||
--build-arg HTTPS_PROXY=<https://your_proxy_server:port>
|
||||
|
||||
|
||||
Installing Microsoft Visual Studio Build Tools
|
||||
----------------------------------------------
|
||||
|
||||
You can add Microsoft Visual Studio Build Tools to a Windows OS Docker image using the `offline <https://docs.microsoft.com/en-us/visualstudio/installcreate-an-offline-installation-of-visual-studio?view=vs-2019>`__ or `online <https://docs.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2019>`__ installers for Build Tools.
|
||||
|
||||
Microsoft Visual Studio Build Tools are licensed as a supplement your existing Microsoft Visual Studio license.
|
||||
|
||||
Any images built with these tools should be for your personal use or for use in your organization in accordance with your existing Visual Studio and Windows licenses.
|
||||
|
||||
To add MSBuild 2019 to the image, add the following commands to the Dockerfile:
|
||||
|
||||
.. code-block:: bat
|
||||
|
||||
RUN powershell.exe -Command Invoke-WebRequest -URI https://aka.ms/vs/16/release/vs_buildtools.exe -OutFile %TMP%\\vs_buildtools.exe
|
||||
|
||||
RUN %TMP%\\vs_buildtools.exe --quiet --norestart --wait --nocache `
|
||||
--installPath "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools" `
|
||||
--add Microsoft.VisualStudio.Workload.MSBuildTools `
|
||||
--add Microsoft.VisualStudio.Workload.UniversalBuildTools `
|
||||
--add Microsoft.VisualStudio.Workload.VCTools --includeRecommended `
|
||||
--remove Microsoft.VisualStudio.Component.Windows10SDK.10240 `
|
||||
--remove Microsoft.VisualStudio.Component.Windows10SDK.10586 `
|
||||
--remove Microsoft.VisualStudio.Component.Windows10SDK.14393 `
|
||||
--remove Microsoft.VisualStudio.Component.Windows81SDK || IF "%ERRORLEVEL%"=="3010" EXIT 0 && powershell set-executionpolicy remotesigned
|
||||
|
||||
|
||||
In case of proxy issues, please use the `offline installer for Build Tools <https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studioview=vs-2019>`__.
|
||||
|
||||
Configuring the Image for GPU
|
||||
+++++++++++++++++++++++++++++
|
||||
|
||||
.. note::
|
||||
|
||||
Since GPU is not supported in `prebuilt images <#getting-a-prebuilt-image-from-provided-sources>`__ or `default Dockerfiles <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__, you must make sure the Additional Requirements for GPU in `System Requirements <#system-requirements>`__ are met, and do the following steps to build the image manually.
|
||||
|
||||
1. Reuse one of `available Dockerfiles <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__. You can also use your own Dockerfile.
|
||||
2. Check your `Windows host and container isolation process compatibility <https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility>`__.
|
||||
3. Find the appropriate Windows container base image on `DockerHub <https://hub.docker.com/_/microsoft-windows>`__ and set up your host/container version in the ``FROM`` Dockerfile instruction.
|
||||
|
||||
For example, in the ``openvino_c_dev_<version>.dockerfile``, change:
|
||||
|
||||
.. code-block:: bat
|
||||
|
||||
FROM mcr.microsoft.com/windows/servercore:ltsc2019 AS ov_base
|
||||
|
||||
to:
|
||||
|
||||
.. code-block:: bat
|
||||
|
||||
FROM mcr.microsoft.com/windows:20H2
|
||||
|
||||
|
||||
4. Build the Docker image by running the following command:
|
||||
|
||||
.. code-block:: bat
|
||||
|
||||
docker build --build-arg package_url=<OpenVINO pkg> -f <Dockerfile> -t <image_name> .
|
||||
|
||||
|
||||
5. Copy ``OpenCL.dll`` from your ``C:\Windows\System32`` host folder to any ``temp`` directory:
|
||||
|
||||
.. code-block:: bat
|
||||
|
||||
mkdir C:\tmp
|
||||
copy C:\Windows\System32\OpenCL.dll C:\tmp
|
||||
|
||||
|
||||
Running the Docker Image on Different Devices
|
||||
#############################################
|
||||
|
||||
Running the Image on CPU
|
||||
++++++++++++++++++++++++
|
||||
|
||||
To start the interactive session, run the following command:
|
||||
|
||||
.. code-block:: bat
|
||||
|
||||
docker run -it --rm <image_name>
|
||||
|
||||
|
||||
If you want to try some samples, run the image with the following command:
|
||||
|
||||
.. code-block:: bat
|
||||
|
||||
docker run -it --rm <image_name>
|
||||
cmd /S /C "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp CPU"
|
||||
|
||||
|
||||
Running the Image on GPU
|
||||
++++++++++++++++++++++++
|
||||
|
||||
.. note::
|
||||
|
||||
Since GPU is not supported in `prebuilt images <#getting-a-prebuilt-image-from-provided-sources>`__ or `default Dockerfiles <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__, you must make sure the Additional Requirements for GPU in `System Requirements <#system-requirements>`__ are met, and `configure and build the image manually <#configuring-the-image-for-gpu>`__ before you can run inferences on a GPU.
|
||||
|
||||
|
||||
1. To try inference on a GPU, run the image with the following command:
|
||||
|
||||
.. code-block:: bat
|
||||
|
||||
docker run -it --rm -u ContainerAdministrator --isolation process --device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599 -v C:\Windows\System32\DriverStore\FileRepository\iigd_dch. inf_amd64_518f2921ba495409:C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409 -v C:\tmp:C:\tmp <image_name>
|
||||
|
||||
|
||||
where
|
||||
|
||||
- ``--device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599`` is a reserved interface class GUID for a GPU device.
|
||||
- ``C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409`` is the path to OpenCL driver home directory. To find it on your PC, run the ``C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_*`` regular expression.
|
||||
- ``C:\tmp`` is the folder with the copy of ``OpenCL.dll`` from your ``C:\Windows\System32`` host folder.
|
||||
|
||||
2. Copy ``OpenCL.dll`` to the ``C:\Windows\System32`` folder inside the container and set appropriate registry entry. Now you can run inference on a GPU device:
|
||||
|
||||
.. code-block:: bat
|
||||
|
||||
copy C:\tmp\OpenCL.dll C:\Windows\System32\ && reg add "HKLM\SOFTWARE\Khronos\OpenCL\Vendors" /v "C:\Windows\System32\DriverStore\FileRepository\iigd_dch. inf_amd64_518f2921ba495409\ocl\bin\x64\intelocl64.dll" /t REG_DWORD /d 0
|
||||
|
||||
|
||||
For example, run the ``Hello Classification Python`` sample with the following command:
|
||||
|
||||
.. code-block:: bat
|
||||
|
||||
omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/ car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp GPU
|
||||
|
||||
|
||||
Additional Resources
|
||||
####################
|
||||
|
||||
- `DockerHub CI Framework <https://github.com/openvinotoolkit/docker_ci>`__ for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
|
||||
- Intel® Distribution of OpenVINO™ toolkit home page: `https://software.intel.com/en-us/openvino-toolkit <https://software.intel.com/en-us/openvino-toolkit>`__
|
||||
- `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
|
||||
|
||||
@endsphinxdirective
|
@ -22,11 +22,11 @@ Intel® Distribution of OpenVINO™ toolkit is a comprehensive toolkit for devel
|
||||
Install OpenVINO
|
||||
################
|
||||
|
||||
.. button-link:: https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html
|
||||
:color: primary
|
||||
:outline:
|
||||
.. raw:: html
|
||||
|
||||
Check out the OpenVINO Download Page :fas:`fa-external-link-alt`
|
||||
<script type="module" crossorigin src="_static/selector-tool/assets/index-89e3365b.js"></script>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<iframe id="selector" src="_static/selector-tool/selector-0290a24.html" style="width: 100%; border: none" title="Download Intel® Distribution of OpenVINO™ Toolkit"></iframe>
|
||||
|
||||
|
||||
OpenVINO installation package is distributed in two parts: OpenVINO Runtime and OpenVINO Development Tools.
|
||||
|
@ -9,7 +9,6 @@
|
||||
Use Archive <openvino_docs_install_guides_installing_openvino_from_archive_windows>
|
||||
Use PyPI <openvino_docs_install_guides_installing_openvino_pip>
|
||||
Use Conda Forge <openvino_docs_install_guides_installing_openvino_conda>
|
||||
Use Docker <openvino_docs_install_guides_installing_openvino_docker_windows>
|
||||
|
||||
|
||||
If you want to install OpenVINO™ Runtime on Windows, you have the following options:
|
||||
@ -17,7 +16,6 @@ If you want to install OpenVINO™ Runtime on Windows, you have the following op
|
||||
* :doc:`Install OpenVINO Runtime from an Archive File <openvino_docs_install_guides_installing_openvino_from_archive_windows>`
|
||||
* :doc:`Install OpenVINO Runtime using PyPI <openvino_docs_install_guides_installing_openvino_pip>`
|
||||
* :doc:`Install OpenVINO Runtime using Conda Forge <openvino_docs_install_guides_installing_openvino_conda>`
|
||||
* :doc:`Install OpenVINO using Docker <openvino_docs_install_guides_installing_openvino_docker_windows>`
|
||||
|
||||
For a full selection of distribution channels,
|
||||
see the `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
|
||||
|
@ -54,13 +54,14 @@ fi
|
||||
# Selftest
|
||||
|
||||
if [ -n "$selftest" ] ; then
|
||||
for image in centos7 centos8 rhel8 rhel9.1 \
|
||||
almalinux8.7 amzn2 \
|
||||
fedora34 fedora35 fedora36 fedora37 fedora38 \
|
||||
raspbian9 debian9 ubuntu18.04 \
|
||||
raspbian10 debian10 ubuntu20.04 ubuntu20.10 ubuntu21.04 \
|
||||
raspbian11 debian11 ubuntu21.10 ubuntu22.04 \
|
||||
raspbian12 debian12 ubuntu22.10 ubuntu23.04 ; do
|
||||
for image in centos:7 centos:8 rhel:8 rhel:9.1 \
|
||||
almalinux:8.7 amazonlinux:2 \
|
||||
fedora:34 fedora:35 fedora:36 fedora:37 fedora:38 \
|
||||
opensuse/leap:15.3 \
|
||||
raspbian:9 debian:9 ubuntu:18.04 \
|
||||
raspbian:10 debian:10 ubuntu:20.04 ubuntu:20.10 ubuntu:21.04 \
|
||||
raspbian:11 debian:11 ubuntu:21.10 ubuntu:22.04 \
|
||||
raspbian:12 debian:12 ubuntu:22.10 ubuntu:23.04 ; do
|
||||
for opt in "-h" "-p" "-e -p" "-n" "-n -e" "-y" "-y -e" ; do
|
||||
echo "||"
|
||||
echo "|| Test $image / '$opt'"
|
||||
@ -118,14 +119,14 @@ if [ "$os" == "raspbian9" ] || [ "$os" == "debian9" ] ; then
|
||||
# which are not supported by OpenVINO
|
||||
|
||||
pkgs_core=(libpugixml1v5)
|
||||
pkgs_gpu=()
|
||||
pkgs_gpu=(ocl-icd-libopencl1)
|
||||
pkgs_python=()
|
||||
pkgs_dev=(pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json-dev make curl sudo)
|
||||
|
||||
elif [ "$os" == "ubuntu18.04" ] ; then
|
||||
|
||||
pkgs_core=(libtbb2 libpugixml1v5)
|
||||
pkgs_gpu=()
|
||||
pkgs_gpu=(ocl-icd-libopencl1)
|
||||
pkgs_python=(python3.8 libpython3.8 python3.8-venv python3-pip)
|
||||
pkgs_dev=(cmake pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json-dev make curl sudo)
|
||||
|
||||
@ -134,7 +135,7 @@ elif [ "$os" == "ubuntu20.04" ] || [ "$os" == "debian10" ] || [ "$os" == "raspbi
|
||||
[ "$os" == "ubuntu22.10" ] || [ "$os" == "ubuntu23.04" ] || [ "$os" == "debian12" ] || [ "$os" == "raspbian12" ]; then
|
||||
|
||||
pkgs_core=(libpugixml1v5)
|
||||
pkgs_gpu=()
|
||||
pkgs_gpu=(ocl-icd-libopencl1)
|
||||
pkgs_python=(python3 python3-venv python3-pip)
|
||||
pkgs_dev=(cmake pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json3-dev make curl sudo)
|
||||
|
||||
@ -195,6 +196,7 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
|
||||
|
||||
if [ "$os" == "centos7" ] || [ "$os" == "amzn2" ] ; then
|
||||
pkgs_core=("tbb.$arch" "pugixml.$arch" "gflags.$arch")
|
||||
pkgs_gpu+=("ocl-icd.$arch")
|
||||
pkgs_dev+=("gflags-devel.$arch")
|
||||
extra_repos+=("https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm")
|
||||
elif [ "$os" == "centos8" ] || [ "$os" == "rhel8" ] || [ "$os" == "almalinux8.7" ] ; then
|
||||
@ -203,9 +205,7 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
|
||||
"https://download-ib01.fedoraproject.org/pub/epel/8/Everything/$arch/Packages/p/pugixml-1.13-1.el8.$arch.rpm"
|
||||
"https://vault.centos.org/centos/8/PowerTools/$arch/os/Packages/gflags-2.1.2-6.el8.$arch.rpm"
|
||||
)
|
||||
pkgs_gpu+=(
|
||||
"http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/ocl-icd-2.2.12-1.el8.x86_64.rpm"
|
||||
)
|
||||
pkgs_gpu+=("http://mirror.centos.org/centos/8-stream/AppStream/$arch/os/Packages/ocl-icd-2.2.12-1.el8.$arch.rpm")
|
||||
pkgs_python+=(python38 python38-pip)
|
||||
pkgs_dev+=(
|
||||
"https://vault.centos.org/centos/8/PowerTools/$arch/os/Packages/gflags-devel-2.1.2-6.el8.$arch.rpm"
|
||||
@ -218,13 +218,14 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
|
||||
"https://download-ib01.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/p/pugixml-1.13-1.el9.$arch.rpm"
|
||||
"https://download-ib01.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-2.2.2-9.el9.$arch.rpm"
|
||||
)
|
||||
pkgs_gpu+=("https://mirror.stream.centos.org/9-stream/AppStream/$arch/os/Packages/ocl-icd-2.2.13-4.el9.$arch.rpm")
|
||||
pkgs_python=(python3 python3-pip)
|
||||
pkgs_dev+=("https://download-ib01.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-devel-2.2.2-9.el9.$arch.rpm")
|
||||
extra_repos+=("https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm")
|
||||
fi
|
||||
elif [ "$os" == "opensuse-leap15.3" ] ; then
|
||||
pkgs_core=(libtbb2 libtbbmalloc2 libpugixml1)
|
||||
pkgs_gpu=()
|
||||
pkgs_gpu=(libOpenCL1)
|
||||
pkgs_python=(python39-base python39 python39-venv python39-pip)
|
||||
pkgs_dev=(cmake pkg-config gcc-c++ gcc gflags-devel-static zlib-devel nlohmann_json-devel make curl sudo)
|
||||
else
|
||||
|
@ -122,7 +122,7 @@
|
||||
* @ingroup ov_c_api
|
||||
* @brief The definitions & operations about tensor
|
||||
*
|
||||
* @defgroup ov_remote_context_c_api ov_remote_context
|
||||
* @defgroup ov_remote_context_c_api Remote Context
|
||||
* @ingroup ov_c_api
|
||||
* @brief Set of functions representing of RemoteContext
|
||||
*/
|
||||
|
@ -1,6 +1,6 @@
|
||||
# used in multiple components
|
||||
onnx==1.13.1 # Python bindings, ONNX Frontend
|
||||
numpy>=1.16.6,<1.25 # Python bindings, frontends
|
||||
numpy>=1.16.6,<1.26 # Python bindings, frontends
|
||||
protobuf>=3.18.1,<4.0.0 # Python bindings, frontends
|
||||
|
||||
# pytest
|
||||
|
@ -17,8 +17,8 @@ from openvino._pyopenvino import get_version
|
||||
__version__ = get_version()
|
||||
|
||||
# main classes
|
||||
from openvino._pyopenvino import FrontEndManager
|
||||
from openvino._pyopenvino import FrontEnd
|
||||
from openvino.frontend.frontend import FrontEndManager
|
||||
from openvino.frontend.frontend import FrontEnd
|
||||
from openvino._pyopenvino import InputModel
|
||||
from openvino._pyopenvino import NodeContext
|
||||
from openvino._pyopenvino import Place
|
||||
|
44
src/bindings/python/src/openvino/frontend/frontend.py
Normal file
44
src/bindings/python/src/openvino/frontend/frontend.py
Normal file
@ -0,0 +1,44 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from typing import Union
|
||||
|
||||
from openvino._pyopenvino import FrontEnd as FrontEndBase
|
||||
from openvino._pyopenvino import FrontEndManager as FrontEndManagerBase
|
||||
from openvino._pyopenvino import InputModel
|
||||
from openvino.runtime import Model
|
||||
|
||||
|
||||
class FrontEnd(FrontEndBase):
|
||||
def __init__(self, fe: FrontEndBase) -> None:
|
||||
super().__init__(fe)
|
||||
|
||||
def convert(self, model: Union[Model, InputModel]) -> Model:
|
||||
converted_model = super().convert(model)
|
||||
if isinstance(model, InputModel):
|
||||
return Model(converted_model)
|
||||
return converted_model
|
||||
|
||||
def convert_partially(self, model: InputModel) -> Model:
|
||||
return Model(super().convert_partially(model))
|
||||
|
||||
def decode(self, model: InputModel) -> Model:
|
||||
return Model(super().decode(model))
|
||||
|
||||
def normalize(self, model: Model) -> Model:
|
||||
return Model(super().normalize(model))
|
||||
|
||||
|
||||
class FrontEndManager(FrontEndManagerBase):
|
||||
def load_by_framework(self, framework: str) -> Union[FrontEnd, None]:
|
||||
fe = super().load_by_framework(framework)
|
||||
if fe is not None:
|
||||
return FrontEnd(fe)
|
||||
return fe
|
||||
|
||||
def load_by_model(self, model_path: str) -> Union[FrontEnd, None]:
|
||||
fe = super().load_by_model(model_path)
|
||||
if fe is not None:
|
||||
return FrontEnd(fe)
|
||||
return fe
|
@ -9,6 +9,7 @@ from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType
|
||||
from openvino.runtime import op, PartialShape, Type as OVType, OVAny, Shape
|
||||
|
||||
import typing
|
||||
from packaging.version import parse
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
@ -133,24 +134,27 @@ class TorchScriptPythonDecoder (Decoder):
|
||||
import inspect
|
||||
|
||||
def prepare_example_inputs(inputs, input_signature):
|
||||
if inputs is not None:
|
||||
if isinstance(inputs, dict):
|
||||
if input_signature is not None:
|
||||
ordered_inputs = []
|
||||
used_sign = []
|
||||
for key in input_signature:
|
||||
if key not in inputs:
|
||||
continue
|
||||
ordered_inputs.append(inputs[key])
|
||||
used_sign.append(key)
|
||||
inputs = ordered_inputs
|
||||
input_signature = used_sign
|
||||
else:
|
||||
inputs = list(inputs.values())
|
||||
input_signature = input_signature[:len(inputs)]
|
||||
if isinstance(inputs, torch.Tensor):
|
||||
inputs = [inputs]
|
||||
return inputs, input_signature
|
||||
is_torch_2 = parse(torch.__version__) >= parse("2.0.0")
|
||||
if isinstance(inputs, dict):
|
||||
ordered_inputs = []
|
||||
if input_signature is not None:
|
||||
used_sign = []
|
||||
for key in input_signature:
|
||||
if key not in inputs:
|
||||
continue
|
||||
ordered_inputs.append(inputs[key])
|
||||
used_sign.append(key)
|
||||
input_signature = used_sign
|
||||
else:
|
||||
ordered_inputs = list(inputs.values())
|
||||
if is_torch_2:
|
||||
return {"example_kwarg_inputs": inputs}, input_signature
|
||||
else:
|
||||
inputs = ordered_inputs
|
||||
if isinstance(inputs, torch.Tensor):
|
||||
inputs = [inputs]
|
||||
|
||||
return {"example_inputs": inputs}, input_signature
|
||||
|
||||
if isinstance(pt_module, torch.nn.Module):
|
||||
pt_module.eval()
|
||||
@ -160,14 +164,14 @@ class TorchScriptPythonDecoder (Decoder):
|
||||
if example_inputs is None:
|
||||
scripted = torch.jit.script(pt_module)
|
||||
else:
|
||||
inputs, input_signature = prepare_example_inputs(example_inputs, input_signature)
|
||||
input_parameters, input_signature = prepare_example_inputs(example_inputs, input_signature)
|
||||
try:
|
||||
scripted = torch.jit.trace(pt_module, inputs)
|
||||
scripted = torch.jit.trace(pt_module, **input_parameters)
|
||||
except Exception:
|
||||
try:
|
||||
scripted = torch.jit.script(pt_module)
|
||||
except Exception:
|
||||
scripted = torch.jit.trace(pt_module, inputs, strict=False)
|
||||
scripted = torch.jit.trace(pt_module, **input_parameters, strict=False)
|
||||
skip_freeze = False
|
||||
for n in scripted.inlined_graph.nodes():
|
||||
# TODO: switch off freezing for all traced models
|
||||
|
@ -15,7 +15,6 @@ __version__ = get_version()
|
||||
|
||||
# Openvino pybind bindings and python extended classes
|
||||
from openvino._pyopenvino import Dimension
|
||||
from openvino._pyopenvino import Model
|
||||
from openvino._pyopenvino import Input
|
||||
from openvino._pyopenvino import Output
|
||||
from openvino._pyopenvino import Node
|
||||
@ -36,6 +35,7 @@ from openvino._pyopenvino import RTMap
|
||||
from openvino.runtime.ie_api import Core
|
||||
from openvino.runtime.ie_api import CompiledModel
|
||||
from openvino.runtime.ie_api import InferRequest
|
||||
from openvino.runtime.ie_api import Model
|
||||
from openvino.runtime.ie_api import AsyncInferQueue
|
||||
from openvino._pyopenvino import Version
|
||||
from openvino._pyopenvino import Tensor
|
||||
|
@ -7,12 +7,12 @@ from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
from openvino._pyopenvino import Model
|
||||
from openvino._pyopenvino import Model as ModelBase
|
||||
from openvino._pyopenvino import Core as CoreBase
|
||||
from openvino._pyopenvino import CompiledModel as CompiledModelBase
|
||||
from openvino._pyopenvino import AsyncInferQueue as AsyncInferQueueBase
|
||||
from openvino._pyopenvino import ConstOutput
|
||||
from openvino._pyopenvino import Tensor
|
||||
from openvino._pyopenvino import Node
|
||||
|
||||
from openvino.runtime.utils.data_helpers import (
|
||||
OVDict,
|
||||
@ -22,6 +22,21 @@ from openvino.runtime.utils.data_helpers import (
|
||||
)
|
||||
|
||||
|
||||
class Model(ModelBase):
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
if args and not kwargs:
|
||||
if isinstance(args[0], ModelBase):
|
||||
super().__init__(args[0])
|
||||
elif isinstance(args[0], Node):
|
||||
super().__init__(*args)
|
||||
else:
|
||||
super().__init__(*args)
|
||||
if args and kwargs:
|
||||
super().__init__(*args, **kwargs)
|
||||
if kwargs and not args:
|
||||
super().__init__(**kwargs)
|
||||
|
||||
|
||||
class InferRequest(_InferRequestWrapper):
|
||||
"""InferRequest class represents infer request which can be run in asynchronous or synchronous manners."""
|
||||
|
||||
@ -160,6 +175,9 @@ class CompiledModel(CompiledModelBase):
|
||||
self._infer_request: Optional[InferRequest] = None
|
||||
super().__init__(other)
|
||||
|
||||
def get_runtime_model(self) -> Model:
|
||||
return Model(super().get_runtime_model())
|
||||
|
||||
def create_infer_request(self) -> InferRequest:
|
||||
"""Creates an inference request object used to infer the compiled model.
|
||||
|
||||
@ -368,6 +386,11 @@ class Core(CoreBase):
|
||||
between several Core instances. The recommended way is to have a single
|
||||
Core instance per application.
|
||||
"""
|
||||
def read_model(self, model: Union[str, bytes, object], weights: Union[object, str, bytes, Tensor] = None) -> Model:
|
||||
if weights is not None:
|
||||
return Model(super().read_model(model, weights))
|
||||
else:
|
||||
return Model(super().read_model(model))
|
||||
|
||||
def compile_model(
|
||||
self,
|
||||
|
@ -30,6 +30,11 @@ void regclass_frontend_FrontEnd(py::module m) {
|
||||
py::class_<FrontEnd, std::shared_ptr<FrontEnd>> fem(m, "FrontEnd", py::dynamic_attr(), py::module_local());
|
||||
fem.doc() = "openvino.frontend.FrontEnd wraps ov::frontend::FrontEnd";
|
||||
|
||||
fem.def(py::init([](const std::shared_ptr<FrontEnd>& other) {
|
||||
return other;
|
||||
}),
|
||||
py::arg("other"));
|
||||
|
||||
fem.def(
|
||||
"load",
|
||||
[](FrontEnd& self, const py::object& py_obj) {
|
||||
|
@ -53,6 +53,11 @@ void regclass_graph_Model(py::module m) {
|
||||
py::class_<ov::Model, std::shared_ptr<ov::Model>> model(m, "Model", py::module_local());
|
||||
model.doc() = "openvino.runtime.Model wraps ov::Model";
|
||||
|
||||
model.def(py::init([](const std::shared_ptr<ov::Model>& other) {
|
||||
return other;
|
||||
}),
|
||||
py::arg("other"));
|
||||
|
||||
model.def(py::init([](const ov::ResultVector& res,
|
||||
const std::vector<std::shared_ptr<ov::Node>>& nodes,
|
||||
const ov::ParameterVector& params,
|
||||
|
@ -376,7 +376,7 @@ def test_get_perf_counts(device):
|
||||
request = exec_net.requests[0]
|
||||
request.infer({'data': img})
|
||||
pc = request.get_perf_counts()
|
||||
assert pc['29']["status"] == "EXECUTED"
|
||||
assert pc['29/WithoutBiases']["status"] == "EXECUTED"
|
||||
del exec_net
|
||||
del ie_core
|
||||
del net
|
||||
|
@ -63,6 +63,9 @@ public:
|
||||
size_t get_loop_count() const { return m_map.size(); }
|
||||
const std::map<size_t, LoopInfoPtr>& get_map() const;
|
||||
|
||||
// Return outer Loop IDs
|
||||
static std::vector<size_t> get_outer_expr_loops(const ExpressionPtr& expr, size_t loop_id);
|
||||
|
||||
void mark_loop(LinearIR::constExprIt loop_begin_pos,
|
||||
LinearIR::constExprIt loop_end_pos,
|
||||
size_t loop_depth, size_t vector_size);
|
||||
@ -74,6 +77,33 @@ public:
|
||||
const std::vector<ExpressionPort>& entries,
|
||||
const std::vector<ExpressionPort>& exits);
|
||||
|
||||
void fuse_loops(const LinearIR& linear_ir, size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper = true);
|
||||
void fuse_loops(LinearIR::constExprIt loop_begin_target, LinearIR::constExprIt loop_end_target,
|
||||
size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper = true);
|
||||
|
||||
// The following methods update ports of LoopInfo. They save the order of ports!
|
||||
// Remainder: the order is important to find Loop bounds (the most first and the most last expressions)
|
||||
// - Update LoopPort - insert new loop target ports instead of existing.
|
||||
// - Update ExpressionPort in the LoopPort - with saving of port parameters. It's softer method since ExpressionPort may not be port of Loop
|
||||
template<typename T>
|
||||
void update_loop_port(size_t loop_id, const T& actual_port, const std::vector<T>& target_ports, bool is_entry = true);
|
||||
template<typename T>
|
||||
void update_loops_port(const std::vector<size_t>& loop_ids, const T& actual_port,
|
||||
const std::vector<T>& target_ports, bool is_entry = true) {
|
||||
for (auto loop_id : loop_ids) {
|
||||
update_loop_port(loop_id, actual_port, target_ports, is_entry);
|
||||
}
|
||||
}
|
||||
// Sort Loop Ports by expression locations in Linear IR
|
||||
void sort_loop_ports(LinearIR::constExprIt& loop_begin_pos, LinearIR::constExprIt& loop_end_pos, size_t loop_id);
|
||||
|
||||
// When the previous expression was replaced with new expressions (decomposition), the method updates the corresponding Loop.
|
||||
// If ports of decomposed expression were the Loop ports, these Loop ports may be updated by parameters `entries` and `exits`
|
||||
// Note: This method should be removed when Softmax decomposition will be moved on data flow pipeline since
|
||||
// all decompositions should be call on this pipeline
|
||||
void expression_replacement(constExprIt new_expr_begin, constExprIt new_expr_end, const ExpressionPtr& decomposed_expr,
|
||||
size_t loop_id, const std::vector<ExpressionPort>& new_entries, const std::vector<ExpressionPort>& exits);
|
||||
|
||||
void get_loop_bounds(const LinearIR& linear_ir,
|
||||
size_t loop_id,
|
||||
LinearIR::constExprIt& loop_begin_pos,
|
||||
@ -85,11 +115,19 @@ public:
|
||||
LinearIR::constExprIt& loop_end_pos,
|
||||
size_t loop_id);
|
||||
|
||||
private:
|
||||
static void get_io_loop_ports(LinearIR::constExprIt loop_begin_pos,
|
||||
LinearIR::constExprIt loop_end_pos,
|
||||
std::vector<ExpressionPort>& entries,
|
||||
std::vector<ExpressionPort>& exits);
|
||||
|
||||
static void fuse_loop_ports(std::vector<LinearIR::LoopManager::LoopPort>& exit_points,
|
||||
std::vector<LinearIR::LoopManager::LoopPort>& entry_points,
|
||||
size_t loop_id);
|
||||
|
||||
/* ===== The methods for work with Loop IDs of Expression ===== */
|
||||
// Notes:
|
||||
// - These methods don't update the corresponding LoopInfo
|
||||
// - These methods should be private
|
||||
// TODO [112195] : fix these notes
|
||||
void replace_loop_id(const ExpressionPtr& expr, size_t prev_id, size_t new_id);
|
||||
void remove_loop_id(const ExpressionPtr& expr, size_t id);
|
||||
// Insert loop ID before (as outer Loop) or after (as inner Loop) target ID in vector of identifiers
|
||||
@ -100,12 +138,6 @@ public:
|
||||
void insert_loop_id(const ExpressionPtr& expr, size_t new_id, bool before = true, size_t target_id = SIZE_MAX);
|
||||
void insert_loop_ids(const ExpressionPtr& expr, const std::vector<size_t>& new_ids, bool before = true, size_t target_id = SIZE_MAX);
|
||||
|
||||
private:
|
||||
static void get_io_loop_ports(LinearIR::constExprIt loop_begin_pos,
|
||||
LinearIR::constExprIt loop_end_pos,
|
||||
std::vector<ExpressionPort>& entries,
|
||||
std::vector<ExpressionPort>& exits);
|
||||
|
||||
std::map<size_t, LoopInfoPtr> m_map = {};
|
||||
size_t next_id = 0;
|
||||
};
|
||||
|
@ -53,8 +53,8 @@ private:
|
||||
const std::shared_ptr<ExpressionPort>& current_entry_point,
|
||||
size_t current_loop_id, size_t target_loop_id,
|
||||
LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos);
|
||||
static void fuse_points(std::vector<LinearIR::LoopManager::LoopPort>& exit_points, std::vector<LinearIR::LoopManager::LoopPort>& entry_points,
|
||||
LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos);
|
||||
static void move(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, size_t loop_id,
|
||||
LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, LinearIR::constExprIt pos);
|
||||
};
|
||||
|
||||
} // namespace pass
|
||||
|
@ -27,13 +27,9 @@ public:
|
||||
bool run(LinearIR& linear_ir) override;
|
||||
|
||||
private:
|
||||
size_t get_count(const PortDescriptorPtr& port_desc) const;
|
||||
bool insert_load(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it);
|
||||
bool insert_store(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it);
|
||||
void update_loops(const LinearIR::LoopManagerPtr& loop_manager, const std::vector<size_t>& loop_ids,
|
||||
const ExpressionPort& actual_port, const std::vector<ExpressionPort>& target_ports, bool is_entry = true);
|
||||
void update_loop(const LinearIR::LoopManager::LoopInfoPtr& loop_info,
|
||||
const ExpressionPort& actual_port, const std::vector<ExpressionPort>& target_ports, bool is_entry = true);
|
||||
size_t get_count(const PortDescriptorPtr& port_desc) const;
|
||||
|
||||
size_t m_vector_size;
|
||||
};
|
||||
|
@ -62,6 +62,13 @@ LoopInfoPtr LinearIR::LoopManager::get_loop_info(size_t index) const {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
std::vector<size_t> LinearIR::LoopManager::get_outer_expr_loops(const ExpressionPtr& expr, size_t loop_id) {
|
||||
const auto loop_ids = expr->get_loop_ids();
|
||||
const auto it = std::find(loop_ids.cbegin(), loop_ids.cend(), loop_id);
|
||||
OPENVINO_ASSERT(it != loop_ids.cend(), "Loop ID hasn't been found");
|
||||
return std::vector<size_t>(loop_ids.cbegin(), it);
|
||||
}
|
||||
|
||||
void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir,
|
||||
size_t loop_id,
|
||||
LinearIR::constExprIt &loop_begin_pos,
|
||||
@ -207,7 +214,172 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos,
|
||||
insert_loop_id(*expr_it, loop_id);
|
||||
}
|
||||
}
|
||||
void LinearIR::LoopManager::fuse_loops(const LinearIR& linear_ir, size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper) {
|
||||
LinearIR::constExprIt loop_begin_target, loop_end_target;
|
||||
get_loop_bounds(linear_ir, fuse_into_upper ? loop_id_lower : loop_id_upper, loop_begin_target, loop_end_target);
|
||||
fuse_loops(loop_begin_target, loop_end_target, loop_id_upper, loop_id_lower, fuse_into_upper);
|
||||
}
|
||||
|
||||
void LinearIR::LoopManager::fuse_loops(LinearIR::constExprIt loop_begin_target, LinearIR::constExprIt loop_end_target,
|
||||
size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper) {
|
||||
OPENVINO_ASSERT(m_map.count(loop_id_upper) == 1 && m_map.count(loop_id_lower) == 1,
|
||||
"Failed Loop Fusion: the Loop with the Loop ID isn't existed");
|
||||
|
||||
const auto& loop_info_upper = m_map[loop_id_upper];
|
||||
const auto& loop_info_lower = m_map[loop_id_lower];
|
||||
|
||||
auto entry_points_upper = loop_info_upper->entry_points;
|
||||
auto exit_points_upper = loop_info_upper->exit_points;
|
||||
auto entry_points_lower = loop_info_lower->entry_points;
|
||||
auto exit_points_lower = loop_info_lower->exit_points;
|
||||
fuse_loop_ports(exit_points_upper, entry_points_lower, loop_id_upper);
|
||||
|
||||
std::vector<LoopManager::LoopPort> new_entries = entry_points_upper;
|
||||
new_entries.insert(new_entries.end(), entry_points_lower.begin(), entry_points_lower.end());
|
||||
std::vector<LoopManager::LoopPort> new_exits = exit_points_upper;
|
||||
new_exits.insert(new_exits.end(), exit_points_lower.begin(), exit_points_lower.end());
|
||||
|
||||
auto& loop_info = fuse_into_upper ? loop_info_upper : loop_info_lower;
|
||||
loop_info->entry_points = new_entries;
|
||||
loop_info->exit_points = new_exits;
|
||||
|
||||
const auto& from = fuse_into_upper ? loop_id_lower : loop_id_upper;
|
||||
const auto& to = fuse_into_upper ? loop_id_upper : loop_id_lower;
|
||||
for (auto it = loop_begin_target; it != loop_end_target; ++it) {
|
||||
const auto& expr = *it;
|
||||
replace_loop_id(expr, from, to);
|
||||
}
|
||||
|
||||
remove_loop_info(from);
|
||||
}
|
||||
|
||||
void LinearIR::LoopManager::fuse_loop_ports(std::vector<LinearIR::LoopManager::LoopPort>& exit_points,
|
||||
std::vector<LinearIR::LoopManager::LoopPort>& entry_points,
|
||||
size_t loop_id) {
|
||||
auto is_loop_id_found = [](const std::vector<size_t>& ids, size_t id) {
|
||||
return std::find(ids.cbegin(), ids.cend(), id) != ids.cend();
|
||||
};
|
||||
|
||||
std::vector<LinearIR::LoopManager::LoopPort> new_exit_points;
|
||||
for (const auto& exit_point : exit_points) {
|
||||
const auto consumers_inputs = exit_point.expr_port->get_connected_ports();
|
||||
|
||||
std::set<LinearIR::LoopManager::LoopPort> mapped_entry_points;
|
||||
std::set<ExpressionPtr> outside_consumers;
|
||||
for (const auto& consumer_input : consumers_inputs) {
|
||||
const auto entry_point_it = std::find_if(entry_points.begin(), entry_points.end(),
|
||||
[&consumer_input](const LoopManager::LoopPort& point) {
|
||||
return *point.expr_port.get() == consumer_input;
|
||||
});
|
||||
if (entry_point_it != entry_points.end()) {
|
||||
mapped_entry_points.insert(*entry_point_it);
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& consumer = consumer_input.get_expr();
|
||||
const auto loop_ids = consumer->get_loop_ids();
|
||||
if (!is_loop_id_found(loop_ids, loop_id)) {
|
||||
outside_consumers.insert(consumer);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove entry points which are mapped
|
||||
auto last_point = entry_points.end();
|
||||
for (const auto& mapped_entry_point : mapped_entry_points) {
|
||||
last_point = std::remove(entry_points.begin(), last_point, mapped_entry_point);
|
||||
}
|
||||
entry_points.resize(entry_points.size() - mapped_entry_points.size());
|
||||
|
||||
// Leave exit point if there are consumers outside after fusion
|
||||
if (!outside_consumers.empty()) {
|
||||
new_exit_points.push_back(exit_point);
|
||||
}
|
||||
}
|
||||
|
||||
exit_points = new_exit_points;
|
||||
}
|
||||
|
||||
template<>
|
||||
void LinearIR::LoopManager::update_loop_port(size_t loop_id, const ExpressionPort& actual_port, const std::vector<ExpressionPort>& target_ports,
|
||||
bool is_entry) {
|
||||
const auto& loop_info = get_loop_info(loop_id);
|
||||
auto& ports = is_entry ? loop_info->entry_points : loop_info->exit_points;
|
||||
auto port_it = std::find_if(ports.begin(), ports.end(),
|
||||
[&actual_port](const LoopPort& point) { return *point.expr_port.get() == actual_port; });
|
||||
// In some cases actual ExpressionPort may not be LoopPort. We shouldn't throw exception here since ExpressionPort is not strong condition as LoopPort
|
||||
// For example, not all inner loop ports are ports of outer loops
|
||||
if (port_it == ports.end())
|
||||
return;
|
||||
|
||||
// to save other parameters except expression port
|
||||
std::vector<LoopPort> target_loop_ports(target_ports.size(), *port_it);
|
||||
std::transform(target_loop_ports.begin(), target_loop_ports.end(), target_ports.begin(), target_loop_ports.begin(),
|
||||
[](LoopPort loop_port, const ExpressionPort& expr_port) {
|
||||
LoopPort copy = std::move(loop_port); // to save loop port parameters
|
||||
copy.expr_port = std::make_shared<ExpressionPort>(expr_port);
|
||||
return copy;
|
||||
});
|
||||
port_it = ports.erase(port_it);
|
||||
ports.insert(port_it, target_ports.cbegin(), target_ports.cend());
|
||||
}
|
||||
|
||||
template<>
|
||||
void LinearIR::LoopManager::update_loop_port(size_t loop_id, const LoopPort& actual_port, const std::vector<LoopPort>& target_ports,
|
||||
bool is_entry) {
|
||||
const auto& loop_info = get_loop_info(loop_id);
|
||||
auto& ports = is_entry ? loop_info->entry_points : loop_info->exit_points;
|
||||
auto port_it = std::find_if(ports.begin(), ports.end(),
|
||||
[&actual_port](const LoopPort& point) { return point == actual_port; });
|
||||
OPENVINO_ASSERT(port_it != ports.end(), "Failed update_loop_port: existing loop ports has not been found");
|
||||
port_it = ports.erase(port_it);
|
||||
ports.insert(port_it, target_ports.cbegin(), target_ports.cend());
|
||||
}
|
||||
|
||||
void LinearIR::LoopManager::expression_replacement(constExprIt new_expr_begin, constExprIt new_expr_end, const ExpressionPtr& decomposed_expr,
|
||||
size_t loop_id, const std::vector<ExpressionPort>& entries, const std::vector<ExpressionPort>& exits) {
|
||||
for (auto it = new_expr_begin; it!= new_expr_end; ++it) {
|
||||
insert_loop_id(*it, loop_id, true);
|
||||
}
|
||||
remove_loop_id(decomposed_expr, loop_id);
|
||||
|
||||
auto new_entries = entries;
|
||||
auto new_exits = exits;
|
||||
if (new_entries.empty() || new_exits.empty()) {
|
||||
const auto loop_info = get_loop_info(loop_id);
|
||||
get_io_loop_ports(new_expr_begin, new_expr_end, new_entries, new_exits);
|
||||
}
|
||||
for (size_t i = 0; i < decomposed_expr->get_input_count(); ++i) {
|
||||
update_loop_port(loop_id, decomposed_expr->get_input_port(i), new_entries);
|
||||
}
|
||||
for (size_t i = 0; i < decomposed_expr->get_output_count(); ++i) {
|
||||
update_loop_port(loop_id, decomposed_expr->get_output_port(i), new_exits, false);
|
||||
}
|
||||
}
|
||||
|
||||
void LinearIR::LoopManager::sort_loop_ports(LinearIR::constExprIt& loop_begin_pos, LinearIR::constExprIt& loop_end_pos, size_t loop_id) {
|
||||
// The method sorts Loop ports again
|
||||
// [113536] Update this logic please, when expression numeration will be implemented
|
||||
auto push = [](const std::vector<LoopPort>& ports, std::vector<LoopPort>& sorted_ports, const ExpressionPtr& expr) {
|
||||
for (const auto& port : ports) {
|
||||
if (port.expr_port->get_expr() == expr) {
|
||||
sorted_ports.push_back(port);
|
||||
}
|
||||
}
|
||||
};
|
||||
auto loop_info = get_loop_info(loop_id);
|
||||
const auto& loop_entries = loop_info->entry_points;
|
||||
const auto& loop_exits = loop_info->exit_points;
|
||||
std::vector<LoopPort> entries, exits;
|
||||
entries.reserve(loop_entries.size());
|
||||
exits.reserve(loop_exits.size());
|
||||
for (auto it = loop_begin_pos; it != loop_end_pos; ++it) {
|
||||
const auto& expr = *it;
|
||||
push(loop_entries, entries, expr);
|
||||
push(loop_exits, exits, expr);
|
||||
}
|
||||
loop_info->entry_points = entries;
|
||||
loop_info->exit_points = exits;
|
||||
}
|
||||
|
||||
void LinearIR::LoopManager::insert_loop_id(const ExpressionPtr& expr, size_t new_id, bool before, size_t target_id) {
|
||||
OPENVINO_ASSERT(m_map.count(new_id) == 1, "Failed marking expression by Loop ID: the Loop with this ID hasn't registered");
|
||||
|
@ -42,46 +42,35 @@ bool FuseLoops::can_be_fused(const LoopInfoPtr& loop_current, const LoopInfoPtr&
|
||||
return supported_work_amount && supported_increment && supported_dim_idxs;
|
||||
}
|
||||
|
||||
void FuseLoops::fuse_points(std::vector<LinearIR::LoopManager::LoopPort>& exit_points,
|
||||
std::vector<LinearIR::LoopManager::LoopPort>& entry_points,
|
||||
LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos) {
|
||||
std::vector<LinearIR::LoopManager::LoopPort> new_exit_points;
|
||||
for (const auto& exit_point : exit_points) {
|
||||
const auto consumers_inputs = exit_point.expr_port->get_connected_ports();
|
||||
|
||||
std::set<LinearIR::LoopManager::LoopPort> mapped_entry_points;
|
||||
std::set<ExpressionPtr> outside_consumers;
|
||||
for (const auto& consumer_input : consumers_inputs) {
|
||||
const auto entry_point_it = std::find_if(entry_points.begin(), entry_points.end(),
|
||||
[&consumer_input](const LoopManager::LoopPort& point) {
|
||||
return *point.expr_port.get() == consumer_input;
|
||||
});
|
||||
if (entry_point_it != entry_points.end()) {
|
||||
mapped_entry_points.insert(*entry_point_it);
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& consumer = consumer_input.get_expr();
|
||||
const auto inside_it = std::find(loop_begin_pos, loop_end_pos, consumer);
|
||||
if (inside_it == loop_end_pos) {
|
||||
outside_consumers.insert(consumer);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove entry points which are mapped
|
||||
auto last_point = entry_points.end();
|
||||
for (const auto& mapped_entry_point : mapped_entry_points) {
|
||||
last_point = std::remove(entry_points.begin(), last_point, mapped_entry_point);
|
||||
}
|
||||
entry_points.resize(entry_points.size() - mapped_entry_points.size());
|
||||
|
||||
// Leave exit point if there are consumers outside after fusion
|
||||
if (!outside_consumers.empty()) {
|
||||
new_exit_points.push_back(exit_point);
|
||||
}
|
||||
void FuseLoops::move(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, size_t loop_id,
|
||||
LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, LinearIR::constExprIt pos) {
|
||||
// Inner Loops can contain ports which are ports of outer Loops as well.
|
||||
// When we move these inner loops, we can corrupt the sort of LoopPorts of outer Loops.
|
||||
// Firstly, we should find correct target loop bounds before their movings.
|
||||
std::map<size_t, std::pair<LinearIR::constExprIt, LinearIR::constExprIt>> outer_loops; // The map: LoopID -> [ LoopBegin, LoopEnd ]
|
||||
const auto outer_loop_ids = LinearIR::LoopManager::get_outer_expr_loops(*loop_begin_pos, loop_id);
|
||||
for (const auto& loop_id : outer_loop_ids) {
|
||||
LinearIR::constExprIt begin, end;
|
||||
loop_manager->get_loop_bounds(linear_ir, loop_id, begin, end);
|
||||
// save previos iterator since the current iterator can be moved
|
||||
outer_loops[loop_id] = {std::prev(begin), end};
|
||||
}
|
||||
// Secondly, move expressions
|
||||
for (auto it = loop_begin_pos; it != loop_end_pos;) {
|
||||
auto expr_it = it;
|
||||
// After moving we will have `it` in new place in the current Loop,
|
||||
// but for markup we need have the expression from the target Loop.
|
||||
// Because of that we manually increment iterator before moving
|
||||
it = std::next(it);
|
||||
linear_ir.move(expr_it, pos);
|
||||
}
|
||||
// Thirdly, sort Loop Ports of outer Loops.
|
||||
for (auto& loop : outer_loops) {
|
||||
const auto loop_id = loop.first;
|
||||
auto begin = std::next(loop.second.first);
|
||||
auto end = loop.second.second;
|
||||
loop_manager->sort_loop_ports(begin, end, loop_id);
|
||||
}
|
||||
|
||||
exit_points = new_exit_points;
|
||||
}
|
||||
|
||||
bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager,
|
||||
@ -93,9 +82,6 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo
|
||||
if (!can_be_fused(loop_current, loop_target))
|
||||
return false;
|
||||
|
||||
LinearIR::constExprIt target_loop_begin_pos, target_loop_end_pos;
|
||||
loop_manager->get_loop_bounds(linear_ir, target_loop_id, target_loop_begin_pos, target_loop_end_pos);
|
||||
|
||||
// We can fuse Loop_up to Loop_down only in cases when other consumers of Loop_up are after Loop_down
|
||||
// Because Loop_up should be explicitly moved before Loop_down in linear IR, and we must save control dependency
|
||||
bool is_fusion_allowed = true;
|
||||
@ -117,40 +103,19 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo
|
||||
if (!is_fusion_allowed)
|
||||
return false;
|
||||
|
||||
// Update entry and exit points in current Loop information before moving till Loop iterators are valid
|
||||
auto current_entry_points = loop_current->entry_points;
|
||||
auto current_exit_points = loop_current->exit_points;
|
||||
auto target_entry_points = loop_target->entry_points;
|
||||
auto target_exit_points = loop_target->exit_points;
|
||||
fuse_points(target_exit_points, current_entry_points, target_loop_begin_pos, target_loop_end_pos);
|
||||
|
||||
const auto insertion_place = current_loop_begin_pos;
|
||||
const auto is_move_needed = target_loop_end_pos != current_loop_begin_pos;
|
||||
for (auto it = target_loop_begin_pos; it != target_loop_end_pos;) {
|
||||
auto expr_it = it;
|
||||
const auto& expr = *expr_it;
|
||||
// After moving we will have `it` in new place in the current Loop,
|
||||
// but for markup we need have the expression from the target Loop.
|
||||
// Because of that we manually increment iterator before moving
|
||||
it = std::next(it);
|
||||
loop_manager->replace_loop_id(expr, target_loop_id, current_loop_id);
|
||||
if (is_move_needed)
|
||||
linear_ir.move(expr_it, insertion_place);
|
||||
}
|
||||
|
||||
// Update current Loop bounds:
|
||||
current_loop_begin_pos = target_loop_begin_pos;
|
||||
|
||||
LinearIR::constExprIt target_loop_begin_pos, target_loop_end_pos;
|
||||
loop_manager->get_loop_bounds(linear_ir, target_loop_id, target_loop_begin_pos, target_loop_end_pos);
|
||||
loop_manager->fuse_loops(target_loop_begin_pos, target_loop_end_pos, target_loop_id, current_loop_id, false);
|
||||
// Update work_amount for Loop (increment is constant because increments must be the identical for fusion):
|
||||
loop_current->work_amount = std::max(loop_current->work_amount, loop_target->work_amount);
|
||||
|
||||
std::vector<LoopManager::LoopPort> new_entries = target_entry_points;
|
||||
new_entries.insert(new_entries.end(), current_entry_points.begin(), current_entry_points.end());
|
||||
std::vector<LoopManager::LoopPort> new_exits = target_exit_points;
|
||||
new_exits.insert(new_exits.end(), current_exit_points.begin(), current_exit_points.end());
|
||||
const auto insertion_place = current_loop_begin_pos;
|
||||
const auto is_move_needed = target_loop_end_pos != current_loop_begin_pos;
|
||||
if (is_move_needed)
|
||||
move(linear_ir, loop_manager, current_loop_id, target_loop_begin_pos, target_loop_end_pos, insertion_place);
|
||||
|
||||
loop_current->entry_points = new_entries;
|
||||
loop_current->exit_points = new_exits;
|
||||
// Update current Loop bounds:
|
||||
current_loop_begin_pos = target_loop_begin_pos;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -182,43 +147,17 @@ bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::Loo
|
||||
|
||||
LinearIR::constExprIt target_loop_begin_pos, target_loop_end_pos;
|
||||
loop_manager->get_loop_bounds(linear_ir, target_loop_id, target_loop_begin_pos, target_loop_end_pos);
|
||||
|
||||
// Update entry and exit points in current Loop information before moving till Loop iterators are valid
|
||||
auto current_entry_points = loop_current->entry_points;
|
||||
auto current_exit_points = loop_current->exit_points;
|
||||
auto target_entry_points = loop_target->entry_points;
|
||||
auto target_exit_points = loop_target->exit_points;
|
||||
fuse_points(current_exit_points, target_entry_points, current_loop_begin_pos, current_loop_end_pos);
|
||||
|
||||
const auto insertion_place = current_loop_end_pos;
|
||||
const auto is_move_needed = insertion_place != target_loop_begin_pos;
|
||||
for (auto it = target_loop_begin_pos; it != target_loop_end_pos;) {
|
||||
auto expr_it = it;
|
||||
const auto& expr = *expr_it;
|
||||
// After moving we will have `it` in new place in the current Loop,
|
||||
// but for markup we need have the expression from the target Loop.
|
||||
// Because of that we manually increment iterator before moving
|
||||
it = std::next(it);
|
||||
loop_manager->replace_loop_id(expr, target_loop_id, current_loop_id);
|
||||
if (is_move_needed)
|
||||
linear_ir.move(expr_it, insertion_place);
|
||||
}
|
||||
|
||||
// Update current Loop bounds:
|
||||
if (!is_move_needed)
|
||||
current_loop_end_pos = target_loop_end_pos;
|
||||
|
||||
loop_manager->fuse_loops(target_loop_begin_pos, target_loop_end_pos, current_loop_id, target_loop_id);
|
||||
// Update work_amount for Loop (increment is constant because increments must be the identical for fusion):
|
||||
loop_current->work_amount = std::max(loop_current->work_amount, loop_target->work_amount);
|
||||
|
||||
std::vector<LoopManager::LoopPort>& new_entries = current_entry_points;
|
||||
new_entries.insert(new_entries.end(), target_entry_points.begin(), target_entry_points.end());
|
||||
std::vector<LoopManager::LoopPort>& new_exits = current_exit_points;
|
||||
new_exits.insert(new_exits.end(), target_exit_points.begin(), target_exit_points.end());
|
||||
|
||||
loop_current->entry_points = new_entries;
|
||||
loop_current->exit_points = new_exits;
|
||||
|
||||
const auto insertion_place = current_loop_end_pos;
|
||||
const auto is_move_needed = insertion_place != target_loop_begin_pos;
|
||||
if (is_move_needed) {
|
||||
move(linear_ir, loop_manager, current_loop_id, target_loop_begin_pos, target_loop_end_pos, insertion_place);
|
||||
} else {
|
||||
current_loop_end_pos = target_loop_end_pos;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -292,7 +231,6 @@ bool FuseLoops::run(LinearIR& linear_ir) {
|
||||
if (fuse_upper_into_current(linear_ir, loop_manager, entry_point.expr_port, current_loop_id, upper_loop_id,
|
||||
current_loop_begin_pos, current_loop_end_pos)) {
|
||||
was_fusion_up = true;
|
||||
loop_manager->remove_loop_info(upper_loop_id);
|
||||
prev_fused_loops.insert(current_loop_id);
|
||||
}
|
||||
}
|
||||
@ -339,7 +277,6 @@ bool FuseLoops::run(LinearIR& linear_ir) {
|
||||
if (fuse_lower_into_current(linear_ir, loop_manager, exit_point.expr_port, current_loop_id, lower_loop_id,
|
||||
current_loop_begin_pos, current_loop_end_pos)) {
|
||||
was_fusion_down = true;
|
||||
loop_manager->remove_loop_info(lower_loop_id);
|
||||
prev_fused_loops.insert(current_loop_id);
|
||||
// Need to check for possible fusion again because of new input expressions for Loop
|
||||
break;
|
||||
|
@ -19,24 +19,6 @@ using LoopInfoPtr = LoopManager::LoopInfoPtr;
|
||||
|
||||
InsertLoadStore::InsertLoadStore(size_t vector_size) : m_vector_size(vector_size) {}
|
||||
|
||||
void InsertLoadStore::update_loops(const LinearIR::LoopManagerPtr& loop_manager, const std::vector<size_t>& loop_ids,
|
||||
const ExpressionPort& actual_port, const std::vector<ExpressionPort>& target_ports, bool is_entry) {
|
||||
for (auto loop_id : loop_ids) {
|
||||
update_loop(loop_manager->get_loop_info(loop_id), actual_port, target_ports, is_entry);
|
||||
}
|
||||
}
|
||||
|
||||
void InsertLoadStore::update_loop(const LinearIR::LoopManager::LoopInfoPtr& loop_info,
|
||||
const ExpressionPort& actual_port, const std::vector<ExpressionPort>& target_ports, bool is_entry) {
|
||||
auto& ports = is_entry ? loop_info->entry_points : loop_info->exit_points;
|
||||
auto port_it = std::find_if(ports.begin(), ports.end(),
|
||||
[&actual_port](const LoopManager::LoopPort& point) { return *point.expr_port.get() == actual_port; });
|
||||
if (port_it == ports.end())
|
||||
return;
|
||||
port_it = ports.erase(port_it);
|
||||
ports.insert(port_it, target_ports.cbegin(), target_ports.cend());
|
||||
}
|
||||
|
||||
size_t InsertLoadStore::get_count(const PortDescriptorPtr& port_desc) const {
|
||||
const auto layout = port_desc->get_layout();
|
||||
const auto shape = port_desc->get_shape();
|
||||
@ -75,7 +57,7 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr
|
||||
// Need to update all the corresponding Loops with the same Entry Point
|
||||
const auto prev_entry_point = consumer_input;
|
||||
const auto new_entry_point = load_expr->get_input_port(0);
|
||||
update_loops(loop_manager, loop_ids, prev_entry_point, {new_entry_point}, true);
|
||||
loop_manager->update_loops_port(loop_ids, prev_entry_point, {new_entry_point}, true);
|
||||
was_inserted = true;
|
||||
}
|
||||
|
||||
@ -122,7 +104,7 @@ bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExp
|
||||
const auto new_exit_point = store_expr->get_output_port(0);
|
||||
const auto new_exit_points = should_be_saved ? std::vector<ExpressionPort>{prev_exit_point, new_exit_point}
|
||||
: std::vector<ExpressionPort>{new_exit_point};
|
||||
update_loops(loop_manager, loop_ids, prev_exit_point, new_exit_points, false);
|
||||
loop_manager->update_loops_port(loop_ids, prev_exit_point, new_exit_points, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -41,14 +41,9 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) {
|
||||
const auto tensor_out = softmax_expr->get_output_port_descriptor(0)->get_shape();
|
||||
const auto inner_work_amount = *(tensor_out.rbegin());
|
||||
|
||||
expr_it = linear_ir.erase(expr_it); // Remove Softmax
|
||||
|
||||
std::vector<ExpressionPtr> new_exprs;
|
||||
|
||||
// We need an iterator to the inserted element
|
||||
auto push_node = [&linear_ir, &expr_it, &new_exprs](const std::shared_ptr<Node>& n) {
|
||||
auto push_node = [&linear_ir, &expr_it](const std::shared_ptr<Node>& n) {
|
||||
const auto expr = linear_ir.insert(expr_it, n);
|
||||
new_exprs.push_back(*expr);
|
||||
return std::make_pair(expr, n);
|
||||
};
|
||||
|
||||
@ -102,35 +97,16 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) {
|
||||
(*mul.first)->get_input_port(1)},
|
||||
std::vector<ExpressionPort>{(*mul.first)->get_output_port(0)});
|
||||
|
||||
// Moved other Loop IDs from Softmax
|
||||
for (const auto& expr : new_exprs) {
|
||||
if (expr->get_loop_ids().empty()) {
|
||||
expr->set_loop_ids(softmax_loop_ids);
|
||||
continue;
|
||||
}
|
||||
loop_manager->insert_loop_ids(expr, softmax_loop_ids, true, expr->get_loop_ids().back());
|
||||
}
|
||||
|
||||
auto update_loop_bounds = [&softmax_expr](std::vector<LinearIR::LoopManager::LoopPort>& points,
|
||||
const std::vector<ExpressionPort>& new_points,
|
||||
const LinearIR::LoopManager::LoopInfoPtr& loop_info) {
|
||||
auto entry_found = std::find_if(points.begin(), points.end(), [&softmax_expr](const LinearIR::LoopManager::LoopPort& point) {
|
||||
return point.expr_port->get_expr() == softmax_expr;
|
||||
});
|
||||
if (entry_found != points.end()) {
|
||||
entry_found = points.erase(entry_found);
|
||||
points.insert(entry_found, new_points.begin(), new_points.end());
|
||||
}
|
||||
};
|
||||
|
||||
// Update Loop info for outer loops
|
||||
const auto entry_points = std::vector<ExpressionPort>{(*max.first)->get_input_port(0),
|
||||
(*sub.first)->get_input_port(0)};
|
||||
const auto exit_points = std::vector<ExpressionPort>{(*mul.first)->get_output_port(0)};
|
||||
for (auto loop_id : softmax_loop_ids) {
|
||||
const auto loop_info = loop_manager->get_loop_info(loop_id);
|
||||
update_loop_bounds(loop_info->entry_points, std::vector<ExpressionPort>{(*max.first)->get_input_port(0),
|
||||
(*sub.first)->get_input_port(0)}, loop_info);
|
||||
update_loop_bounds(loop_info->exit_points, std::vector<ExpressionPort>{(*mul.first)->get_output_port(0)}, loop_info);
|
||||
loop_manager->expression_replacement(vector_buffer_max.first, expr_it, softmax_expr, loop_id, entry_points, exit_points);
|
||||
}
|
||||
|
||||
expr_it = linear_ir.erase(expr_it); // Remove Softmax
|
||||
|
||||
/* =========================================== */
|
||||
|
||||
/* ============= Runtime Info ================ */
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <ngraph/pattern/op/or.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <openvino/op/util/pad_base.hpp>
|
||||
#include <openvino/opsets/opset6.hpp>
|
||||
#include <vector>
|
||||
|
||||
@ -28,7 +29,7 @@ ov::pass::SpaceToBatchFusion::SpaceToBatchFusion() {
|
||||
auto pads_begin_pattern = pattern::wrap_type<opset6::Constant>();
|
||||
auto pads_end_pattern = pattern::wrap_type<opset6::Constant>();
|
||||
auto pad_value = pattern::wrap_type<opset6::Constant>();
|
||||
auto pad_pattern = pattern::wrap_type<opset6::Pad>(
|
||||
auto pad_pattern = pattern::wrap_type<op::util::PadBase>(
|
||||
{reshape_or_transpose_before_pattern, pads_begin_pattern, pads_end_pattern, pad_value});
|
||||
auto space_to_depth_pattern = pattern::wrap_type<opset6::SpaceToDepth>({pad_pattern}, pattern::has_static_shape());
|
||||
auto reshape_after_pattern =
|
||||
@ -60,6 +61,20 @@ ov::pass::SpaceToBatchFusion::SpaceToBatchFusion() {
|
||||
input_shape[2] == output_shape[2] && input_shape[3] == output_shape[3];
|
||||
};
|
||||
|
||||
auto pads_are_negative = [](const std::shared_ptr<Node>& pads) -> bool {
|
||||
auto constant = ov::as_type_ptr<opset6::Constant>(pads);
|
||||
if (!constant)
|
||||
return true;
|
||||
|
||||
for (auto pad : constant->cast_vector<int>()) {
|
||||
if (pad < 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
std::shared_ptr<Node> reshape_or_trans_before =
|
||||
get_reshape_or_transpose(reshape_before_pattern, trans_before_pattern);
|
||||
if (!reshape_or_trans_before)
|
||||
@ -73,7 +88,7 @@ ov::pass::SpaceToBatchFusion::SpaceToBatchFusion() {
|
||||
if (!check_input_output_shape(reshape_or_trans_after))
|
||||
return false;
|
||||
|
||||
auto pad = std::dynamic_pointer_cast<opset6::Pad>(pattern_map.at(pad_pattern).get_node_shared_ptr());
|
||||
auto pad = std::dynamic_pointer_cast<op::util::PadBase>(pattern_map.at(pad_pattern).get_node_shared_ptr());
|
||||
if (!pad || pad->get_pad_mode() != op::PadMode::CONSTANT)
|
||||
return false;
|
||||
auto pad_value_const =
|
||||
@ -84,6 +99,13 @@ ov::pass::SpaceToBatchFusion::SpaceToBatchFusion() {
|
||||
if (pad_value.size() != 1 || pad_value[0] != 0.0f)
|
||||
return false;
|
||||
|
||||
const auto pads_begin = pattern_map.at(pads_begin_pattern).get_node_shared_ptr();
|
||||
if (pads_are_negative(pads_begin))
|
||||
return false;
|
||||
const auto pads_end = pattern_map.at(pads_end_pattern).get_node_shared_ptr();
|
||||
if (pads_are_negative(pads_end))
|
||||
return false;
|
||||
|
||||
auto space_to_depth = std::dynamic_pointer_cast<opset6::SpaceToDepth>(
|
||||
pattern_map.at(space_to_depth_pattern).get_node_shared_ptr());
|
||||
if (!space_to_depth)
|
||||
@ -93,10 +115,8 @@ ov::pass::SpaceToBatchFusion::SpaceToBatchFusion() {
|
||||
auto block_size = static_cast<int64_t>(space_to_depth->get_block_size());
|
||||
auto block_shape =
|
||||
opset6::Constant::create(element::i64, Shape{4}, std::vector<int64_t>{1, 1, block_size, block_size});
|
||||
auto space_to_batch = register_new_node<opset6::SpaceToBatch>(pattern_map.at(data_pattern),
|
||||
block_shape,
|
||||
pattern_map.at(pads_begin_pattern),
|
||||
pattern_map.at(pads_end_pattern));
|
||||
auto space_to_batch =
|
||||
register_new_node<opset6::SpaceToBatch>(pattern_map.at(data_pattern), block_shape, pads_begin, pads_end);
|
||||
space_to_batch->set_friendly_name(reshape_or_trans_after->get_friendly_name());
|
||||
|
||||
copy_runtime_info(
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <ngraph/opsets/opset6.hpp>
|
||||
#include <ngraph/pass/constant_folding.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
#include <openvino/op/pad.hpp>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <transformations/common_optimizations/space_to_batch_fusion.hpp>
|
||||
@ -52,6 +53,59 @@ TEST_F(TransformationTestsF, SpaceToBatchFusionTranspose) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, SpaceToBatchFusionTransposePad12) {
|
||||
{
|
||||
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{12, 3, 4, 8});
|
||||
auto trans_before =
|
||||
std::make_shared<opset6::Transpose>(data, op::Constant::create(element::i64, Shape{4}, {1, 0, 2, 3}));
|
||||
auto pad = std::make_shared<op::v12::Pad>(trans_before,
|
||||
op::Constant::create(element::i64, Shape{4}, {1, 1, 1, 1}),
|
||||
op::Constant::create(element::i64, Shape{4}, {2, 2, 3, 3}),
|
||||
op::Constant::create(element::f32, Shape{}, {0}),
|
||||
op::PadMode::CONSTANT);
|
||||
auto space_to_depth =
|
||||
std::make_shared<opset6::SpaceToDepth>(pad, opset6::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST, 2);
|
||||
auto trans_after =
|
||||
std::make_shared<opset6::Transpose>(space_to_depth,
|
||||
op::Constant::create(element::i64, Shape{4}, {1, 0, 2, 3}));
|
||||
function = std::make_shared<Function>(NodeVector{trans_after}, ParameterVector{data});
|
||||
|
||||
manager.register_pass<ov::pass::SpaceToBatchFusion>();
|
||||
}
|
||||
|
||||
{
|
||||
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{12, 3, 4, 8});
|
||||
auto space_to_batch =
|
||||
std::make_shared<opset6::SpaceToBatch>(data,
|
||||
op::Constant::create(element::i64, Shape{4}, {1, 1, 2, 2}),
|
||||
op::Constant::create(element::i64, Shape{4}, {1, 1, 1, 1}),
|
||||
op::Constant::create(element::i64, Shape{4}, {2, 2, 3, 3}));
|
||||
|
||||
function_ref = std::make_shared<Function>(NodeVector{space_to_batch}, ParameterVector{data});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, SpaceToBatchFusionTransposeNegativePads) {
|
||||
{
|
||||
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{12, 3, 4, 8});
|
||||
auto trans_before =
|
||||
std::make_shared<opset6::Transpose>(data, op::Constant::create(element::i64, Shape{4}, {1, 0, 2, 3}));
|
||||
auto pad = std::make_shared<op::v12::Pad>(trans_before,
|
||||
op::Constant::create(element::i64, Shape{4}, {1, 1, -1, -1}),
|
||||
op::Constant::create(element::i64, Shape{4}, {2, 2, -3, -3}),
|
||||
op::Constant::create(element::f32, Shape{}, {0}),
|
||||
op::PadMode::CONSTANT);
|
||||
auto space_to_depth =
|
||||
std::make_shared<opset6::SpaceToDepth>(pad, opset6::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST, 4);
|
||||
auto trans_after =
|
||||
std::make_shared<opset6::Transpose>(space_to_depth,
|
||||
op::Constant::create(element::i64, Shape{4}, {1, 0, 2, 3}));
|
||||
function = std::make_shared<Function>(NodeVector{trans_after}, ParameterVector{data});
|
||||
|
||||
manager.register_pass<ov::pass::SpaceToBatchFusion>();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, SpaceToBatchFusionReshape) {
|
||||
{
|
||||
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{12, 3, 4, 8});
|
||||
|
@ -163,6 +163,28 @@ inline int64_t file_size(const char* path) {
|
||||
return in.tellg();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns file size for file
|
||||
* @param[in] path The file name
|
||||
* @return file size
|
||||
*/
|
||||
inline bool file_exists(const char* path) {
|
||||
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
|
||||
std::wstring widefilename = ov::util::string_to_wstring(path);
|
||||
const wchar_t* file_name = widefilename.c_str();
|
||||
#elif defined(__ANDROID__) || defined(ANDROID)
|
||||
std::string file_name = path;
|
||||
std::string::size_type pos = file_name.find('!');
|
||||
if (pos != std::string::npos) {
|
||||
file_name = file_name.substr(0, pos);
|
||||
}
|
||||
#else
|
||||
const char* file_name = path;
|
||||
#endif
|
||||
std::ifstream in(file_name, std::ios_base::binary | std::ios_base::ate);
|
||||
return in.good();
|
||||
}
|
||||
|
||||
#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
|
||||
|
||||
/**
|
||||
@ -174,6 +196,14 @@ inline int64_t file_size(const std::wstring& path) {
|
||||
return file_size(wstring_to_string(path).c_str());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns true if file exists
|
||||
* @param[in] path The file name
|
||||
* @return true if file exists
|
||||
*/
|
||||
inline bool file_exists(const std::wstring& path) {
|
||||
return file_exists(wstring_to_string(path).c_str());
|
||||
}
|
||||
#endif // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
|
||||
|
||||
/**
|
||||
@ -187,13 +217,11 @@ inline int64_t file_size(const std::string& path) {
|
||||
|
||||
/**
|
||||
* @brief Returns true if file exists
|
||||
* @param[in] path The path to file
|
||||
* @param[in] path The file name
|
||||
* @return true if file exists
|
||||
*/
|
||||
template <typename C,
|
||||
typename = typename std::enable_if<(std::is_same<C, char>::value || std::is_same<C, wchar_t>::value)>::type>
|
||||
inline bool file_exists(const std::basic_string<C>& path) {
|
||||
return file_size(path) > 0;
|
||||
inline bool file_exists(const std::string& path) {
|
||||
return file_exists(path.c_str());
|
||||
}
|
||||
|
||||
std::string get_file_ext(const std::string& path);
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "openvino/op/op.hpp"
|
||||
#include "openvino/op/util/scatter_elements_update_base.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace op {
|
||||
@ -12,9 +12,9 @@ namespace v3 {
|
||||
/// \brief ScatterElementsUpdate operation.
|
||||
///
|
||||
/// \ingroup ov_ops_cpp_api
|
||||
class OPENVINO_API ScatterElementsUpdate : public Op {
|
||||
class OPENVINO_API ScatterElementsUpdate : public util::ScatterElementsUpdateBase {
|
||||
public:
|
||||
OPENVINO_OP("ScatterElementsUpdate", "opset3", op::Op);
|
||||
OPENVINO_OP("ScatterElementsUpdate", "opset3", util::ScatterElementsUpdateBase);
|
||||
|
||||
ScatterElementsUpdate() = default;
|
||||
/// \brief Constructs a ScatterElementsUpdate node
|
||||
@ -28,21 +28,74 @@ public:
|
||||
const Output<Node>& updates,
|
||||
const Output<Node>& axis);
|
||||
|
||||
void validate_and_infer_types() override;
|
||||
bool visit_attributes(AttributeVisitor& visitor) override;
|
||||
|
||||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
bool has_evaluate() const override;
|
||||
bool evaluate_lower(TensorVector& output_values) const override;
|
||||
bool evaluate_upper(TensorVector& output_values) const override;
|
||||
bool evaluate_label(TensorLabelVector& output_labels) const override;
|
||||
|
||||
private:
|
||||
bool evaluate_scatter_element_update(const HostTensorVector& outputs, const HostTensorVector& inputs) const;
|
||||
};
|
||||
} // namespace v3
|
||||
namespace v12 {
|
||||
class OPENVINO_API ScatterElementsUpdate : public op::util::ScatterElementsUpdateBase {
|
||||
public:
|
||||
OPENVINO_OP("ScatterElementsUpdate", "opset12", op::util::ScatterElementsUpdateBase);
|
||||
|
||||
/// \brief Lists the supported reduction types for this version of the operator.
|
||||
/// See the specification for the description of how reduction works with ScatterElementsUpdate.
|
||||
enum class Reduction { NONE, SUM, PROD, MIN, MAX, MEAN };
|
||||
|
||||
ScatterElementsUpdate() = default;
|
||||
/// \brief Constructs a ScatterElementsUpdate node
|
||||
|
||||
/// \param data Input data
|
||||
/// \param indices Data entry index that will be updated
|
||||
/// \param updates Update values
|
||||
/// \param axis Axis to scatter on
|
||||
ScatterElementsUpdate(const Output<Node>& data,
|
||||
const Output<Node>& indices,
|
||||
const Output<Node>& updates,
|
||||
const Output<Node>& axis,
|
||||
const Reduction reduction = Reduction::NONE,
|
||||
const bool use_init_val = true);
|
||||
|
||||
bool visit_attributes(AttributeVisitor& visitor) override;
|
||||
|
||||
void validate_and_infer_types() override;
|
||||
|
||||
Reduction get_reduction() const {
|
||||
return m_reduction;
|
||||
}
|
||||
|
||||
void set_reduction(const Reduction reduction) {
|
||||
m_reduction = reduction;
|
||||
}
|
||||
|
||||
bool get_use_init_val() const {
|
||||
return m_use_init_val;
|
||||
}
|
||||
|
||||
void set_use_init_val(const bool use_init_val) {
|
||||
m_use_init_val = use_init_val;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
|
||||
|
||||
bool has_evaluate() const override;
|
||||
|
||||
private:
|
||||
Reduction m_reduction = Reduction::NONE;
|
||||
bool m_use_init_val = true;
|
||||
};
|
||||
} // namespace v12
|
||||
OPENVINO_API
|
||||
std::ostream& operator<<(std::ostream& s, const v12::ScatterElementsUpdate::Reduction& reduction);
|
||||
|
||||
} // namespace op
|
||||
template <>
|
||||
class OPENVINO_API AttributeAdapter<op::v12::ScatterElementsUpdate::Reduction>
|
||||
: public EnumAttributeAdapterBase<op::v12::ScatterElementsUpdate::Reduction> {
|
||||
public:
|
||||
AttributeAdapter(op::v12::ScatterElementsUpdate::Reduction& value)
|
||||
: EnumAttributeAdapterBase<op::v12::ScatterElementsUpdate::Reduction>(value) {}
|
||||
|
||||
OPENVINO_RTTI("AttributeAdapter<v12::ScatterElementsUpdate::Reduction>");
|
||||
};
|
||||
} // namespace ov
|
||||
|
@ -0,0 +1,45 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "openvino/op/op.hpp"
|
||||
#include "openvino/op/util/attr_types.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace op {
|
||||
namespace util {
|
||||
class OPENVINO_API ScatterElementsUpdateBase : public Op {
|
||||
public:
|
||||
OPENVINO_OP("ScatterElementsUpdateBase", "util");
|
||||
ScatterElementsUpdateBase() = default;
|
||||
|
||||
/// \brief The common base class for all ScatterElementsUpdate operator versions
|
||||
///
|
||||
/// \param data Input data
|
||||
/// \param indices Data entry index that will be updated
|
||||
/// \param updates Update values
|
||||
/// \param axis Axis to scatter on
|
||||
ScatterElementsUpdateBase(const Output<Node>& data,
|
||||
const Output<Node>& indices,
|
||||
const Output<Node>& updates,
|
||||
const Output<Node>& axis);
|
||||
|
||||
void validate_and_infer_types() override;
|
||||
|
||||
bool has_evaluate() const override;
|
||||
bool evaluate_lower(TensorVector& output_values) const override;
|
||||
bool evaluate_upper(TensorVector& output_values) const override;
|
||||
bool evaluate_label(TensorLabelVector& output_labels) const override;
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
|
||||
private:
|
||||
bool evaluate_scatter_element_update(const HostTensorVector& outputs, const HostTensorVector& inputs) const;
|
||||
};
|
||||
} // namespace util
|
||||
} // namespace op
|
||||
} // namespace ov
|
@ -120,7 +120,6 @@ _OPENVINO_OP_REG(EmbeddingBagOffsetsSum, ov::op::v3)
|
||||
_OPENVINO_OP_REG(GRUCell, ov::op::v3)
|
||||
_OPENVINO_OP_REG(NonZero, ov::op::v3)
|
||||
_OPENVINO_OP_REG(RNNCell, ov::op::v0)
|
||||
_OPENVINO_OP_REG(ScatterElementsUpdate, ov::op::v3)
|
||||
_OPENVINO_OP_REG(ScatterUpdate, ov::op::v3)
|
||||
_OPENVINO_OP_REG(ShuffleChannels, ov::op::v0)
|
||||
_OPENVINO_OP_REG(ShapeOf, ov::op::v3)
|
||||
@ -207,3 +206,4 @@ _OPENVINO_OP_REG(TopK, ov::op::v11)
|
||||
// New operations added in opset12
|
||||
_OPENVINO_OP_REG(GroupNormalization, ov::op::v12)
|
||||
_OPENVINO_OP_REG(Pad, ov::op::v12)
|
||||
_OPENVINO_OP_REG(ScatterElementsUpdate, ov::op::v12)
|
||||
|
@ -9,11 +9,11 @@
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace op {
|
||||
namespace v3 {
|
||||
|
||||
namespace op {
|
||||
namespace util {
|
||||
template <class TShape>
|
||||
std::vector<TShape> shape_infer(const ScatterElementsUpdate* op,
|
||||
std::vector<TShape> shape_infer(const util::ScatterElementsUpdateBase* op,
|
||||
const std::vector<TShape>& input_shapes,
|
||||
const std::map<size_t, HostTensorPtr>& constant_data = {}) {
|
||||
NODE_VALIDATION_CHECK(op, input_shapes.size() == 4);
|
||||
@ -59,15 +59,24 @@ std::vector<TShape> shape_infer(const ScatterElementsUpdate* op,
|
||||
}
|
||||
return {data_shape};
|
||||
}
|
||||
|
||||
} // namespace util
|
||||
namespace v3 {
|
||||
template <class TShape>
|
||||
void shape_infer(const ScatterElementsUpdate* op,
|
||||
const std::vector<TShape>& input_shapes,
|
||||
std::vector<TShape>& output_shapes,
|
||||
const std::map<size_t, HostTensorPtr>& constant_data = {}) {
|
||||
output_shapes = shape_infer(op, input_shapes, constant_data);
|
||||
output_shapes = util::shape_infer(op, input_shapes, constant_data);
|
||||
}
|
||||
|
||||
} // namespace v3
|
||||
namespace v12 {
|
||||
template <class TShape>
|
||||
void shape_infer(const ScatterElementsUpdate* op,
|
||||
const std::vector<TShape>& input_shapes,
|
||||
std::vector<TShape>& output_shapes,
|
||||
const std::map<size_t, HostTensorPtr>& constant_data = {}) {
|
||||
output_shapes = util::shape_infer(op, input_shapes, constant_data);
|
||||
}
|
||||
} // namespace v12
|
||||
} // namespace op
|
||||
} // namespace ov
|
||||
|
@ -2,25 +2,21 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ngraph/op/scatter_elements_update.hpp"
|
||||
#include "openvino/op/scatter_elements_update.hpp"
|
||||
|
||||
#include <scatter_elements_update_shape_inference.hpp>
|
||||
|
||||
#include "bound_evaluate.hpp"
|
||||
#include "itt.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
#include "ngraph/op/util/op_types.hpp"
|
||||
#include "ngraph/runtime/reference/scatter_elements_update.hpp"
|
||||
#include "ngraph/validation_util.hpp"
|
||||
#include "openvino/core/validation_util.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace std;
|
||||
|
||||
namespace ov {
|
||||
op::v3::ScatterElementsUpdate::ScatterElementsUpdate(const Output<Node>& data,
|
||||
const Output<Node>& indices,
|
||||
const Output<Node>& updates,
|
||||
const Output<Node>& axis)
|
||||
: Op({data, indices, updates, axis}) {
|
||||
: ov::op::util::ScatterElementsUpdateBase(data, indices, updates, axis) {
|
||||
constructor_validate_and_infer_types();
|
||||
}
|
||||
|
||||
@ -29,37 +25,6 @@ bool op::v3::ScatterElementsUpdate::visit_attributes(AttributeVisitor& visitor)
|
||||
return true;
|
||||
}
|
||||
|
||||
void op::v3::ScatterElementsUpdate::validate_and_infer_types() {
|
||||
OV_OP_SCOPE(v3_ScatterElementsUpdate_validate_and_infer_types);
|
||||
element::Type data_et = get_input_element_type(0);
|
||||
element::Type indices_et = get_input_element_type(1);
|
||||
element::Type updates_et = get_input_element_type(2);
|
||||
element::Type axis_et = get_input_element_type(3);
|
||||
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
indices_et.is_integral(),
|
||||
"Indices element type must be integral_number, but is: ",
|
||||
indices_et);
|
||||
|
||||
NODE_VALIDATION_CHECK(this, axis_et.is_integral(), "Axis element type must be integral_number, but is: ", axis_et);
|
||||
|
||||
element::Type merged_type;
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
element::Type::merge(merged_type, data_et, updates_et),
|
||||
"Data type and updates type are required to be the same. ",
|
||||
"Got: ",
|
||||
data_et,
|
||||
" and: ",
|
||||
updates_et);
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
set_output_type(0, data_et, output_shape);
|
||||
if (output_shape.is_dynamic())
|
||||
set_input_is_relevant_to_shape(0);
|
||||
}
|
||||
|
||||
shared_ptr<Node> op::v3::ScatterElementsUpdate::clone_with_new_inputs(const OutputVector& inputs) const {
|
||||
OV_OP_SCOPE(v3_ScatterElementsUpdate_clone_with_new_inputs);
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
@ -72,204 +37,78 @@ shared_ptr<Node> op::v3::ScatterElementsUpdate::clone_with_new_inputs(const Outp
|
||||
return make_shared<v3::ScatterElementsUpdate>(inputs.at(0), inputs.at(1), inputs.at(2), inputs.at(3));
|
||||
}
|
||||
|
||||
namespace scatter_element_update {
|
||||
namespace {
|
||||
template <element::Type_t DT, element::Type_t IT, element::Type_t AT>
|
||||
bool evaluate(const HostTensorPtr& data,
|
||||
const HostTensorPtr& indices,
|
||||
const HostTensorPtr& updates,
|
||||
const HostTensorPtr& axis,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis) {
|
||||
using DataType = typename element_type_traits<DT>::value_type;
|
||||
using IndicesType = typename element_type_traits<IT>::value_type;
|
||||
|
||||
out->set_shape(data->get_shape());
|
||||
|
||||
runtime::reference::scatter_elem_update<DataType, IndicesType>(data->get_data_ptr<DT>(),
|
||||
indices->get_data_ptr<IT>(),
|
||||
updates->get_data_ptr<DT>(),
|
||||
normalized_axis,
|
||||
out->get_data_ptr<DT>(),
|
||||
data->get_shape(),
|
||||
indices->get_shape());
|
||||
op::v12::ScatterElementsUpdate::ScatterElementsUpdate(const Output<Node>& data,
|
||||
const Output<Node>& indices,
|
||||
const Output<Node>& updates,
|
||||
const Output<Node>& axis,
|
||||
const Reduction reduction,
|
||||
const bool use_init_val)
|
||||
: op::util::ScatterElementsUpdateBase(data, indices, updates, axis),
|
||||
m_reduction{reduction},
|
||||
m_use_init_val{use_init_val} {
|
||||
constructor_validate_and_infer_types();
|
||||
}
|
||||
|
||||
bool op::v12::ScatterElementsUpdate::visit_attributes(AttributeVisitor& visitor) {
|
||||
OV_OP_SCOPE(v12_ScatterElementsUpdate_visit_attributes);
|
||||
visitor.on_attribute("reduction", m_reduction);
|
||||
visitor.on_attribute("use_init_val", m_use_init_val);
|
||||
return true;
|
||||
}
|
||||
|
||||
#define TYPE_AXS_CASE(a, ...) \
|
||||
case element::Type_t::a: { \
|
||||
OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_axs, _, a)); \
|
||||
rc = evaluate<DT, IT, element::Type_t::a>(__VA_ARGS__); \
|
||||
} break;
|
||||
void op::v12::ScatterElementsUpdate::validate_and_infer_types() {
|
||||
OV_OP_SCOPE(v12_ScatterElementsUpdate_validate_and_infer_types);
|
||||
|
||||
template <element::Type_t DT, element::Type_t IT>
|
||||
bool evaluate(const HostTensorPtr& arg0,
|
||||
const HostTensorPtr& arg1,
|
||||
const HostTensorPtr& arg2,
|
||||
const HostTensorPtr& arg3,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis) {
|
||||
auto axis_type = arg3->get_element_type();
|
||||
|
||||
// Dispatch specialization based on axis data type.
|
||||
bool rc = true;
|
||||
|
||||
switch (axis_type) {
|
||||
TYPE_AXS_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
default:
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
#define TYPE_IND_CASE(a, ...) \
|
||||
case element::Type_t::a: { \
|
||||
OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_ind, _, a)); \
|
||||
rc = evaluate<DT, element::Type_t::a>(__VA_ARGS__); \
|
||||
} break;
|
||||
|
||||
template <element::Type_t DT>
|
||||
bool evaluate(const HostTensorPtr& arg0,
|
||||
const HostTensorPtr& arg1,
|
||||
const HostTensorPtr& arg2,
|
||||
const HostTensorPtr& arg3,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis) {
|
||||
auto indices_type = arg1->get_element_type();
|
||||
|
||||
// Dispatch specialization based on indicies data type.
|
||||
bool rc = true;
|
||||
|
||||
switch (indices_type) {
|
||||
TYPE_IND_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
default:
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
bool evaluate_scatter_element_update(const HostTensorPtr& arg0,
|
||||
const HostTensorPtr& arg1,
|
||||
const HostTensorPtr& arg2,
|
||||
const HostTensorPtr& arg3,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis) {
|
||||
bool rc = true;
|
||||
|
||||
switch (out->get_element_type()) {
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, u32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, u64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, f16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, f32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
default:
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
} // namespace
|
||||
} // namespace scatter_element_update
|
||||
|
||||
bool op::v3::ScatterElementsUpdate::evaluate_scatter_element_update(const HostTensorVector& outputs,
|
||||
const HostTensorVector& inputs) const {
|
||||
NGRAPH_CHECK(inputs[3]->get_element_type().is_integral_number(), "axis element type is not integral data type");
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
int64_t axis = host_tensor_2_vector<int64_t>(inputs[3])[0];
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
const auto& input_rank = get_input_partial_shape(0).rank();
|
||||
int64_t normalized_axis = axis;
|
||||
|
||||
if (normalized_axis < 0) {
|
||||
if (input_rank.is_static()) {
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
normalized_axis = ngraph::normalize_axis(this, axis, input_rank);
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
} else {
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
normalized_axis = ngraph::normalize_axis(this, axis, static_cast<int64_t>(inputs[0]->get_shape().size()));
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
if (m_reduction == Reduction::MEAN) {
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
get_input_element_type(0) != element::boolean,
|
||||
"The 'mean' reduction type is not supported for boolean tensors");
|
||||
}
|
||||
|
||||
return scatter_element_update::evaluate_scatter_element_update(inputs[0],
|
||||
inputs[1],
|
||||
inputs[2],
|
||||
inputs[3],
|
||||
outputs[0],
|
||||
normalized_axis);
|
||||
ScatterElementsUpdateBase::validate_and_infer_types();
|
||||
}
|
||||
|
||||
bool op::v3::ScatterElementsUpdate::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
|
||||
OV_OP_SCOPE(v3_ScatterElementsUpdate_evaluate);
|
||||
return evaluate_scatter_element_update(outputs, inputs);
|
||||
shared_ptr<Node> op::v12::ScatterElementsUpdate::clone_with_new_inputs(const OutputVector& inputs) const {
|
||||
OV_OP_SCOPE(v12_ScatterElementsUpdate_clone_with_new_inputs);
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
inputs.size() == get_input_size(),
|
||||
"clone_with_new_inputs() required inputs size: ",
|
||||
get_input_size(),
|
||||
"Got: ",
|
||||
inputs.size());
|
||||
|
||||
return make_shared<v12::ScatterElementsUpdate>(inputs.at(0),
|
||||
inputs.at(1),
|
||||
inputs.at(2),
|
||||
inputs.at(3),
|
||||
m_reduction,
|
||||
m_use_init_val);
|
||||
}
|
||||
|
||||
bool op::v3::ScatterElementsUpdate::has_evaluate() const {
|
||||
OV_OP_SCOPE(v3_ScatterElementsUpdate_has_evaluate);
|
||||
|
||||
switch (get_output_element_type(0)) {
|
||||
case ngraph::element::i16:
|
||||
case ngraph::element::i32:
|
||||
case ngraph::element::i64:
|
||||
case ngraph::element::u32:
|
||||
case ngraph::element::u64:
|
||||
case ngraph::element::f16:
|
||||
case ngraph::element::f32:
|
||||
break;
|
||||
default:
|
||||
bool op::v12::ScatterElementsUpdate::has_evaluate() const {
|
||||
if (m_reduction != Reduction::NONE) {
|
||||
return false;
|
||||
} else {
|
||||
return ScatterElementsUpdateBase::has_evaluate();
|
||||
}
|
||||
switch (get_input_element_type(1)) {
|
||||
case ngraph::element::i8:
|
||||
case ngraph::element::i16:
|
||||
case ngraph::element::i32:
|
||||
case ngraph::element::i64:
|
||||
case ngraph::element::u8:
|
||||
case ngraph::element::u16:
|
||||
case ngraph::element::u32:
|
||||
case ngraph::element::u64:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool op::v3::ScatterElementsUpdate::evaluate_lower(ov::TensorVector& output_values) const {
|
||||
OV_OP_SCOPE(v3_ScatterNDUpdate_evaluate_lower);
|
||||
return get_input_tensor(1).has_and_set_bound() && ov::default_lower_bound_evaluator(this, output_values);
|
||||
template <>
|
||||
OPENVINO_API EnumNames<op::v12::ScatterElementsUpdate::Reduction>&
|
||||
EnumNames<op::v12::ScatterElementsUpdate::Reduction>::get() {
|
||||
static auto enum_names = EnumNames<op::v12::ScatterElementsUpdate::Reduction>(
|
||||
"op::v12::ScatterElementsUpdate::Reduction",
|
||||
{{"none", op::v12::ScatterElementsUpdate::Reduction::NONE},
|
||||
{"sum", op::v12::ScatterElementsUpdate::Reduction::SUM},
|
||||
{"prod", op::v12::ScatterElementsUpdate::Reduction::PROD},
|
||||
{"min", op::v12::ScatterElementsUpdate::Reduction::MIN},
|
||||
{"max", op::v12::ScatterElementsUpdate::Reduction::MAX},
|
||||
{"mean", op::v12::ScatterElementsUpdate::Reduction::MEAN}});
|
||||
return enum_names;
|
||||
}
|
||||
|
||||
bool op::v3::ScatterElementsUpdate::evaluate_upper(ov::TensorVector& output_values) const {
|
||||
OV_OP_SCOPE(v3_ScatterNDUpdate_evaluate_upper);
|
||||
return get_input_tensor(1).has_and_set_bound() && ov::default_upper_bound_evaluator(this, output_values);
|
||||
}
|
||||
|
||||
bool op::v3::ScatterElementsUpdate::evaluate_label(TensorLabelVector& output_labels) const {
|
||||
OV_OP_SCOPE(v3_ScatterNDUpdate_evaluate_label);
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
return ov::default_label_evaluator(this, {0, 2}, output_labels);
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
namespace op {
|
||||
std::ostream& operator<<(std::ostream& s, const v12::ScatterElementsUpdate::Reduction& reduction) {
|
||||
return s << as_string(reduction);
|
||||
}
|
||||
} // namespace op
|
||||
} // namespace ov
|
||||
|
262
src/core/src/op/util/scatter_elements_update_base.cpp
Normal file
262
src/core/src/op/util/scatter_elements_update_base.cpp
Normal file
@ -0,0 +1,262 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/op/util/scatter_elements_update_base.hpp"
|
||||
|
||||
#include <scatter_elements_update_shape_inference.hpp>
|
||||
|
||||
#include "bound_evaluate.hpp"
|
||||
#include "itt.hpp"
|
||||
#include "ngraph/runtime/reference/scatter_elements_update.hpp"
|
||||
#include "openvino/core/validation_util.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace op {
|
||||
|
||||
ov::op::util::ScatterElementsUpdateBase::ScatterElementsUpdateBase(const Output<Node>& data,
|
||||
const Output<Node>& indices,
|
||||
const Output<Node>& updates,
|
||||
const Output<Node>& axis)
|
||||
: Op({data, indices, updates, axis}) {
|
||||
constructor_validate_and_infer_types();
|
||||
}
|
||||
|
||||
void ov::op::util::ScatterElementsUpdateBase::validate_and_infer_types() {
|
||||
OV_OP_SCOPE(util_ScatterElementsUpdateBase_validate_and_infer_types);
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
const element::Type& data_et = get_input_element_type(0);
|
||||
const element::Type& indices_et = get_input_element_type(1);
|
||||
const element::Type& updates_et = get_input_element_type(2);
|
||||
const element::Type& axis_et = get_input_element_type(3);
|
||||
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
indices_et.is_integral(),
|
||||
"Indices element type must be integral_number, but is: ",
|
||||
indices_et);
|
||||
|
||||
NODE_VALIDATION_CHECK(this, axis_et.is_integral(), "Axis element type must be integral_number, but is: ", axis_et);
|
||||
|
||||
element::Type merged_type;
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
element::Type::merge(merged_type, data_et, updates_et),
|
||||
"Data type and updates type are required to be the same. ",
|
||||
"Got: ",
|
||||
data_et,
|
||||
" and: ",
|
||||
updates_et);
|
||||
const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
element::Type out_et = get_input_element_type(0);
|
||||
std::ignore = element::Type::merge(out_et, get_input_element_type(0), get_input_element_type(2));
|
||||
set_output_type(0, out_et, output_shape);
|
||||
if (output_shape.is_dynamic()) {
|
||||
set_input_is_relevant_to_shape(0);
|
||||
}
|
||||
}
|
||||
|
||||
bool op::util::ScatterElementsUpdateBase::has_evaluate() const {
|
||||
OV_OP_SCOPE(util_ScatterElementsUpdateBase_has_evaluate);
|
||||
|
||||
switch (get_output_element_type(0)) {
|
||||
case ngraph::element::i16:
|
||||
case ngraph::element::i32:
|
||||
case ngraph::element::i64:
|
||||
case ngraph::element::u32:
|
||||
case ngraph::element::u64:
|
||||
case ngraph::element::f16:
|
||||
case ngraph::element::f32:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
switch (get_input_element_type(1)) {
|
||||
case ngraph::element::i8:
|
||||
case ngraph::element::i16:
|
||||
case ngraph::element::i32:
|
||||
case ngraph::element::i64:
|
||||
case ngraph::element::u8:
|
||||
case ngraph::element::u16:
|
||||
case ngraph::element::u32:
|
||||
case ngraph::element::u64:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool op::util::ScatterElementsUpdateBase::evaluate_lower(ov::TensorVector& output_values) const {
|
||||
OV_OP_SCOPE(util_ScatterNDUpdate_evaluate_lower);
|
||||
return get_input_tensor(1).has_and_set_bound() && ov::default_lower_bound_evaluator(this, output_values);
|
||||
}
|
||||
|
||||
bool op::util::ScatterElementsUpdateBase::evaluate_upper(ov::TensorVector& output_values) const {
|
||||
OV_OP_SCOPE(util_ScatterNDUpdate_evaluate_upper);
|
||||
return get_input_tensor(1).has_and_set_bound() && ov::default_upper_bound_evaluator(this, output_values);
|
||||
}
|
||||
|
||||
bool op::util::ScatterElementsUpdateBase::evaluate_label(TensorLabelVector& output_labels) const {
|
||||
OV_OP_SCOPE(util_ScatterNDUpdate_evaluate_label);
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
return ov::default_label_evaluator(this, {0, 2}, output_labels);
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
|
||||
namespace scatter_element_update {
|
||||
namespace {
|
||||
template <element::Type_t DT, element::Type_t IT, element::Type_t AT>
|
||||
bool evaluate(const HostTensorPtr& data,
|
||||
const HostTensorPtr& indices,
|
||||
const HostTensorPtr& updates,
|
||||
const HostTensorPtr& axis,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis) {
|
||||
using DataType = typename element_type_traits<DT>::value_type;
|
||||
using IndicesType = typename element_type_traits<IT>::value_type;
|
||||
|
||||
out->set_shape(data->get_shape());
|
||||
|
||||
ngraph::runtime::reference::scatter_elem_update<DataType, IndicesType>(data->get_data_ptr<DT>(),
|
||||
indices->get_data_ptr<IT>(),
|
||||
updates->get_data_ptr<DT>(),
|
||||
normalized_axis,
|
||||
out->get_data_ptr<DT>(),
|
||||
data->get_shape(),
|
||||
indices->get_shape());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define TYPE_AXS_CASE(a, ...) \
|
||||
case element::Type_t::a: { \
|
||||
OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_axs, _, a)); \
|
||||
rc = evaluate<DT, IT, element::Type_t::a>(__VA_ARGS__); \
|
||||
} break;
|
||||
|
||||
template <element::Type_t DT, element::Type_t IT>
|
||||
bool evaluate(const HostTensorPtr& arg0,
|
||||
const HostTensorPtr& arg1,
|
||||
const HostTensorPtr& arg2,
|
||||
const HostTensorPtr& arg3,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis) {
|
||||
auto axis_type = arg3->get_element_type();
|
||||
|
||||
// Dispatch specialization based on axis data type.
|
||||
bool rc = true;
|
||||
|
||||
switch (axis_type) {
|
||||
TYPE_AXS_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_AXS_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
default:
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
#define TYPE_IND_CASE(a, ...) \
|
||||
case element::Type_t::a: { \
|
||||
OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_ind, _, a)); \
|
||||
rc = evaluate<DT, element::Type_t::a>(__VA_ARGS__); \
|
||||
} break;
|
||||
|
||||
template <element::Type_t DT>
|
||||
bool evaluate(const HostTensorPtr& arg0,
|
||||
const HostTensorPtr& arg1,
|
||||
const HostTensorPtr& arg2,
|
||||
const HostTensorPtr& arg3,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis) {
|
||||
auto indices_type = arg1->get_element_type();
|
||||
|
||||
// Dispatch specialization based on indicies data type.
|
||||
bool rc = true;
|
||||
|
||||
switch (indices_type) {
|
||||
TYPE_IND_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
TYPE_IND_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
default:
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
bool evaluate_scatter_element_update(const HostTensorPtr& arg0,
|
||||
const HostTensorPtr& arg1,
|
||||
const HostTensorPtr& arg2,
|
||||
const HostTensorPtr& arg3,
|
||||
const HostTensorPtr& out,
|
||||
const int64_t normalized_axis) {
|
||||
bool rc = true;
|
||||
|
||||
switch (out->get_element_type()) {
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, u32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, u64, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, f16, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, f32, arg0, arg1, arg2, arg3, out, normalized_axis);
|
||||
default:
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
} // namespace
|
||||
} // namespace scatter_element_update
|
||||
|
||||
bool op::util::ScatterElementsUpdateBase::evaluate_scatter_element_update(const HostTensorVector& outputs,
|
||||
const HostTensorVector& inputs) const {
|
||||
NGRAPH_CHECK(inputs[3]->get_element_type().is_integral_number(), "axis element type is not integral data type");
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
int64_t axis = host_tensor_2_vector<int64_t>(inputs[3])[0];
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
const auto& input_rank = get_input_partial_shape(0).rank();
|
||||
int64_t normalized_axis = axis;
|
||||
|
||||
if (normalized_axis < 0) {
|
||||
if (input_rank.is_static()) {
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
normalized_axis = ngraph::normalize_axis(this, axis, input_rank);
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
} else {
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
normalized_axis = ngraph::normalize_axis(this, axis, static_cast<int64_t>(inputs[0]->get_shape().size()));
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
}
|
||||
|
||||
return scatter_element_update::evaluate_scatter_element_update(inputs[0],
|
||||
inputs[1],
|
||||
inputs[2],
|
||||
inputs[3],
|
||||
outputs[0],
|
||||
normalized_axis);
|
||||
}
|
||||
|
||||
bool op::util::ScatterElementsUpdateBase::evaluate(const HostTensorVector& outputs,
|
||||
const HostTensorVector& inputs) const {
|
||||
OV_OP_SCOPE(util_ScatterElementsUpdate_evaluate);
|
||||
return evaluate_scatter_element_update(outputs, inputs);
|
||||
}
|
||||
|
||||
} // namespace op
|
||||
} // namespace ov
|
@ -8,85 +8,89 @@
|
||||
#include "util/type_prop.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
using namespace ov;
|
||||
using namespace testing;
|
||||
|
||||
TEST(type_prop, scatter_elements_update_output_shape) {
|
||||
template <class T>
|
||||
class ScatterElementsUpdateTest : public ::testing::Test {};
|
||||
TYPED_TEST_SUITE_P(ScatterElementsUpdateTest);
|
||||
|
||||
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_update_output_shape) {
|
||||
Shape data_shape{2, 4, 5, 7};
|
||||
Shape indices_shape{2, 2, 2, 2};
|
||||
Shape updates_shape{2, 2, 2, 2};
|
||||
Shape axis_shape{};
|
||||
Shape expected_output_shape{2, 4, 5, 7};
|
||||
|
||||
auto data = make_shared<op::Parameter>(element::f32, data_shape);
|
||||
auto indices = make_shared<op::Parameter>(element::i16, indices_shape);
|
||||
auto updates = make_shared<op::Parameter>(element::f32, updates_shape);
|
||||
auto axis = make_shared<op::Parameter>(element::i16, axis_shape);
|
||||
auto data = make_shared<op::v0::Parameter>(element::f32, data_shape);
|
||||
auto indices = make_shared<op::v0::Parameter>(element::i16, indices_shape);
|
||||
auto updates = make_shared<op::v0::Parameter>(element::f32, updates_shape);
|
||||
auto axis = make_shared<op::v0::Parameter>(element::i16, axis_shape);
|
||||
|
||||
auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis);
|
||||
auto scatter = make_shared<TypeParam>(data, indices, updates, axis);
|
||||
|
||||
EXPECT_EQ(scatter->get_output_shape(0), expected_output_shape);
|
||||
}
|
||||
|
||||
TEST(type_prop, scatter_elements_update_output_partial_dyn_shape) {
|
||||
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_update_output_partial_dyn_shape) {
|
||||
PartialShape data_shape{2, Dimension::dynamic(), 5};
|
||||
set_shape_labels(data_shape, 10);
|
||||
PartialShape indices_shape{Dimension::dynamic(), 2, 2};
|
||||
PartialShape updates_shape{2, 2, Dimension::dynamic()};
|
||||
PartialShape axis_shape = PartialShape::dynamic();
|
||||
|
||||
auto data = make_shared<op::Parameter>(element::f64, data_shape);
|
||||
auto indices = make_shared<op::Parameter>(element::i16, indices_shape);
|
||||
auto updates = make_shared<op::Parameter>(element::f64, updates_shape);
|
||||
auto axis = make_shared<op::Parameter>(element::i16, axis_shape);
|
||||
auto data = make_shared<op::v0::Parameter>(element::f64, data_shape);
|
||||
auto indices = make_shared<op::v0::Parameter>(element::i16, indices_shape);
|
||||
auto updates = make_shared<op::v0::Parameter>(element::f64, updates_shape);
|
||||
auto axis = make_shared<op::v0::Parameter>(element::i16, axis_shape);
|
||||
|
||||
auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis);
|
||||
auto scatter = make_shared<TypeParam>(data, indices, updates, axis);
|
||||
|
||||
EXPECT_EQ(scatter->get_output_element_type(0), element::f64);
|
||||
EXPECT_EQ(scatter->get_output_partial_shape(0), data_shape);
|
||||
EXPECT_THAT(get_shape_labels(scatter->get_output_partial_shape(0)), ElementsAre(10, 11, 12));
|
||||
}
|
||||
|
||||
TEST(type_prop, scatter_elements_update_data_has_interval_dimensions) {
|
||||
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_update_data_has_interval_dimensions) {
|
||||
PartialShape data_shape{{5, 10}, -1, {-1, 3}, {8, -1}};
|
||||
set_shape_labels(data_shape, 10);
|
||||
|
||||
const auto data = make_shared<op::Parameter>(element::i64, data_shape);
|
||||
const auto indices = make_shared<op::Parameter>(element::i16, PartialShape{1, 2, 2, {2, 3}});
|
||||
const auto updates = make_shared<op::Parameter>(element::i64, PartialShape{{0, 2}, -1, 2, -1});
|
||||
const auto axis = make_shared<op::Parameter>(element::i16, PartialShape::dynamic());
|
||||
const auto data = make_shared<op::v0::Parameter>(element::i64, data_shape);
|
||||
const auto indices = make_shared<op::v0::Parameter>(element::i16, PartialShape{1, 2, 2, {2, 3}});
|
||||
const auto updates = make_shared<op::v0::Parameter>(element::i64, PartialShape{{0, 2}, -1, 2, -1});
|
||||
const auto axis = make_shared<op::v0::Parameter>(element::i16, PartialShape::dynamic());
|
||||
|
||||
const auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis);
|
||||
const auto scatter = make_shared<TypeParam>(data, indices, updates, axis);
|
||||
|
||||
EXPECT_EQ(scatter->get_output_element_type(0), element::i64);
|
||||
EXPECT_EQ(scatter->get_output_partial_shape(0), data_shape);
|
||||
EXPECT_THAT(get_shape_labels(scatter->get_output_partial_shape(0)), ElementsAre(10, 11, 12, 13));
|
||||
}
|
||||
|
||||
TEST(type_prop, scatter_elements_update_output_full_dyn_shape) {
|
||||
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_update_output_full_dyn_shape) {
|
||||
PartialShape data_shape = PartialShape::dynamic();
|
||||
PartialShape indices_shape = PartialShape::dynamic();
|
||||
PartialShape updates_shape = PartialShape::dynamic();
|
||||
PartialShape axis_shape = PartialShape::dynamic();
|
||||
|
||||
auto data = make_shared<op::Parameter>(element::f32, data_shape);
|
||||
auto indices = make_shared<op::Parameter>(element::i16, indices_shape);
|
||||
auto updates = make_shared<op::Parameter>(element::f32, updates_shape);
|
||||
auto axis = make_shared<op::Parameter>(element::i16, axis_shape);
|
||||
auto data = make_shared<op::v0::Parameter>(element::f32, data_shape);
|
||||
auto indices = make_shared<op::v0::Parameter>(element::i16, indices_shape);
|
||||
auto updates = make_shared<op::v0::Parameter>(element::f32, updates_shape);
|
||||
auto axis = make_shared<op::v0::Parameter>(element::i16, axis_shape);
|
||||
|
||||
auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis);
|
||||
auto scatter = make_shared<TypeParam>(data, indices, updates, axis);
|
||||
|
||||
EXPECT_EQ(scatter->get_output_element_type(0), element::f32);
|
||||
EXPECT_EQ(scatter->get_output_partial_shape(0), data_shape);
|
||||
}
|
||||
|
||||
TEST(type_prop, scatter_elements_update_default_ctor) {
|
||||
const auto data = make_shared<op::Parameter>(element::f32, PartialShape{2, 5, 5, 6});
|
||||
const auto indices = make_shared<op::Parameter>(element::i16, PartialShape{1, 2, 1, 3});
|
||||
const auto updates = make_shared<op::Parameter>(element::f32, PartialShape{1, 2, 1, 3});
|
||||
const auto axis = make_shared<op::Constant>(element::i16, Shape{}, -4);
|
||||
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_update_default_ctor) {
|
||||
const auto data = make_shared<op::v0::Parameter>(element::f32, PartialShape{2, 5, 5, 6});
|
||||
const auto indices = make_shared<op::v0::Parameter>(element::i16, PartialShape{1, 2, 1, 3});
|
||||
const auto updates = make_shared<op::v0::Parameter>(element::f32, PartialShape{1, 2, 1, 3});
|
||||
const auto axis = make_shared<op::v0::Constant>(element::i16, Shape{}, -4);
|
||||
|
||||
const auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis);
|
||||
const auto scatter = make_shared<TypeParam>(data, indices, updates, axis);
|
||||
scatter->set_arguments(OutputVector{data, indices, updates, axis});
|
||||
scatter->validate_and_infer_types();
|
||||
|
||||
@ -97,83 +101,116 @@ TEST(type_prop, scatter_elements_update_default_ctor) {
|
||||
EXPECT_THAT(get_shape_labels(scatter->get_output_partial_shape(0)), Each(ov::no_label));
|
||||
}
|
||||
|
||||
TEST(type_prop, scatter_elements_update_preserve_partial_values_and_labels_via_evaluates_bounds) {
|
||||
const auto data = op::Constant::create(element::i64, Shape{4}, {2, 3, 15, 4});
|
||||
const auto indices = op::Constant::create(element::i64, Shape{2}, {3, 0});
|
||||
TYPED_TEST_P(ScatterElementsUpdateTest,
|
||||
scatter_elements_update_preserve_partial_values_and_labels_via_evaluates_bounds) {
|
||||
const auto data = op::v0::Constant::create(element::i64, Shape{4}, {2, 3, 15, 4});
|
||||
const auto indices = op::v0::Constant::create(element::i64, Shape{2}, {3, 0});
|
||||
auto updates_shape = PartialShape{{10, 20}, {3, 4}};
|
||||
set_shape_labels(updates_shape, 20);
|
||||
const auto axis = make_shared<op::Constant>(element::i16, Shape{}, 0);
|
||||
const auto axis = make_shared<op::v0::Constant>(element::i16, Shape{}, 0);
|
||||
|
||||
const auto shape_of_u = std::make_shared<op::ShapeOf>(std::make_shared<op::Parameter>(element::i64, updates_shape));
|
||||
const auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, shape_of_u, axis);
|
||||
const auto shape_of_u =
|
||||
std::make_shared<op::v0::ShapeOf>(std::make_shared<op::v0::Parameter>(element::i64, updates_shape));
|
||||
const auto scatter = make_shared<TypeParam>(data, indices, shape_of_u, axis);
|
||||
|
||||
auto param = std::make_shared<op::Parameter>(element::f32, PartialShape{1});
|
||||
auto param = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{1});
|
||||
auto bc = std::make_shared<op::v3::Broadcast>(param, scatter, op::BroadcastType::BIDIRECTIONAL);
|
||||
|
||||
EXPECT_EQ(bc->get_output_partial_shape(0), PartialShape({{3, 4}, 3, 15, {10, 20}}));
|
||||
EXPECT_THAT(get_shape_labels(bc->get_output_partial_shape(0)), ElementsAre(21, ov::no_label, ov::no_label, 20));
|
||||
}
|
||||
|
||||
TEST(type_prop, scatter_elements_update_axis_validation) {
|
||||
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_update_axis_validation) {
|
||||
Shape data_shape{2, 4, 5, 7};
|
||||
Shape indices_shape{2, 2, 2, 2};
|
||||
Shape updates_shape{2, 2, 2, 2};
|
||||
Shape axis_shape{};
|
||||
|
||||
auto data = make_shared<op::Parameter>(element::f32, data_shape);
|
||||
auto indices = make_shared<op::Parameter>(element::i16, indices_shape);
|
||||
auto updates = make_shared<op::Parameter>(element::f32, updates_shape);
|
||||
auto axis = make_shared<op::Constant>(element::i16, axis_shape, std::vector<int>{8});
|
||||
auto data = make_shared<op::v0::Parameter>(element::f32, data_shape);
|
||||
auto indices = make_shared<op::v0::Parameter>(element::i16, indices_shape);
|
||||
auto updates = make_shared<op::v0::Parameter>(element::f32, updates_shape);
|
||||
auto axis = make_shared<op::v0::Constant>(element::i16, axis_shape, std::vector<int>{8});
|
||||
|
||||
OV_EXPECT_THROW(auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis),
|
||||
OV_EXPECT_THROW(auto scatter = make_shared<TypeParam>(data, indices, updates, axis),
|
||||
ov::AssertFailure,
|
||||
HasSubstr("Parameter axis 8 out of the tensor rank range [-4, 3]"));
|
||||
}
|
||||
|
||||
TEST(type_prop, scatter_elements_updates_indices_shape) {
|
||||
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_updates_indices_shape) {
|
||||
Shape data_shape{2, 4, 5, 7};
|
||||
Shape indices_shape{3, 3, 3, 3};
|
||||
Shape updates_shape{2, 2, 2, 2};
|
||||
Shape axis_shape{};
|
||||
|
||||
auto data = make_shared<op::Parameter>(element::f32, data_shape);
|
||||
auto indices = make_shared<op::Parameter>(element::i16, indices_shape);
|
||||
auto updates = make_shared<op::Parameter>(element::f32, updates_shape);
|
||||
auto axis = make_shared<op::Constant>(element::i16, axis_shape, std::vector<int>{1});
|
||||
auto data = make_shared<op::v0::Parameter>(element::f32, data_shape);
|
||||
auto indices = make_shared<op::v0::Parameter>(element::i16, indices_shape);
|
||||
auto updates = make_shared<op::v0::Parameter>(element::f32, updates_shape);
|
||||
auto axis = make_shared<op::v0::Constant>(element::i16, axis_shape, std::vector<int>{1});
|
||||
|
||||
OV_EXPECT_THROW(auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis),
|
||||
OV_EXPECT_THROW(auto scatter = make_shared<TypeParam>(data, indices, updates, axis),
|
||||
NodeValidationFailure,
|
||||
HasSubstr("Indices and updates input shapes are required to be equal"));
|
||||
}
|
||||
|
||||
TEST(type_prop, scatter_elements_updates_indices_rank) {
|
||||
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_updates_indices_rank) {
|
||||
Shape data_shape{2, 4};
|
||||
Shape indices_shape{2, 2};
|
||||
Shape updates_shape{2, 2, 2, 2};
|
||||
Shape axis_shape{};
|
||||
|
||||
auto data = make_shared<op::Parameter>(element::f32, data_shape);
|
||||
auto indices = make_shared<op::Parameter>(element::i16, indices_shape);
|
||||
auto updates = make_shared<op::Parameter>(element::f32, updates_shape);
|
||||
auto axis = make_shared<op::Constant>(element::i16, axis_shape, std::vector<int>{1});
|
||||
auto data = make_shared<op::v0::Parameter>(element::f32, data_shape);
|
||||
auto indices = make_shared<op::v0::Parameter>(element::i16, indices_shape);
|
||||
auto updates = make_shared<op::v0::Parameter>(element::f32, updates_shape);
|
||||
auto axis = make_shared<op::v0::Constant>(element::i16, axis_shape, std::vector<int>{1});
|
||||
|
||||
OV_EXPECT_THROW(auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis),
|
||||
OV_EXPECT_THROW(auto scatter = make_shared<TypeParam>(data, indices, updates, axis),
|
||||
NodeValidationFailure,
|
||||
HasSubstr("Indices and updates input shapes are required to be equal"));
|
||||
}
|
||||
|
||||
TEST(type_prop, scatter_elements_data_indices_rank) {
|
||||
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_data_indices_rank) {
|
||||
Shape data_shape{2, 4, 5, 7};
|
||||
Shape indices_shape{2, 2};
|
||||
Shape updates_shape{2, 2};
|
||||
Shape axis_shape{};
|
||||
|
||||
auto data = make_shared<op::Parameter>(element::f32, data_shape);
|
||||
auto indices = make_shared<op::Parameter>(element::i16, indices_shape);
|
||||
auto updates = make_shared<op::Parameter>(element::f32, updates_shape);
|
||||
auto axis = make_shared<op::Constant>(element::i16, axis_shape, std::vector<int>{1});
|
||||
auto data = make_shared<op::v0::Parameter>(element::f32, data_shape);
|
||||
auto indices = make_shared<op::v0::Parameter>(element::i16, indices_shape);
|
||||
auto updates = make_shared<op::v0::Parameter>(element::f32, updates_shape);
|
||||
auto axis = make_shared<op::v0::Constant>(element::i16, axis_shape, std::vector<int>{1});
|
||||
|
||||
OV_EXPECT_THROW(auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis),
|
||||
OV_EXPECT_THROW(auto scatter = make_shared<TypeParam>(data, indices, updates, axis),
|
||||
NodeValidationFailure,
|
||||
HasSubstr("Indices rank and data rank are required to be equal"));
|
||||
}
|
||||
|
||||
TEST(type_prop, scatter_elements_update_mean_reduction_of_bool) {
|
||||
const auto data = make_shared<op::v0::Parameter>(element::boolean, Shape{10});
|
||||
const auto indices = make_shared<op::v0::Parameter>(element::i32, Shape{2});
|
||||
const auto updates = make_shared<op::v0::Parameter>(element::boolean, Shape{2});
|
||||
const auto axis = make_shared<op::v0::Constant>(element::i32, Shape{1}, std::vector<int>{0});
|
||||
|
||||
OV_EXPECT_THROW(
|
||||
std::ignore = make_shared<op::v12::ScatterElementsUpdate>(data,
|
||||
indices,
|
||||
updates,
|
||||
axis,
|
||||
op::v12::ScatterElementsUpdate::Reduction::MEAN),
|
||||
NodeValidationFailure,
|
||||
HasSubstr("The 'mean' reduction type is not supported for boolean tensors"));
|
||||
}
|
||||
|
||||
REGISTER_TYPED_TEST_SUITE_P(ScatterElementsUpdateTest,
|
||||
scatter_elements_update_output_shape,
|
||||
scatter_elements_update_output_partial_dyn_shape,
|
||||
scatter_elements_update_data_has_interval_dimensions,
|
||||
scatter_elements_update_output_full_dyn_shape,
|
||||
scatter_elements_update_default_ctor,
|
||||
scatter_elements_update_preserve_partial_values_and_labels_via_evaluates_bounds,
|
||||
scatter_elements_update_axis_validation,
|
||||
scatter_elements_updates_indices_shape,
|
||||
scatter_elements_updates_indices_rank,
|
||||
scatter_elements_data_indices_rank);
|
||||
|
||||
using OpVersions = ::testing::Types<op::v3::ScatterElementsUpdate, op::v12::ScatterElementsUpdate>;
|
||||
INSTANTIATE_TYPED_TEST_SUITE_P(type_prop, ScatterElementsUpdateTest, OpVersions);
|
||||
|
@ -2,27 +2,51 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/op/scatter_elements_update.hpp"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "ngraph/ngraph.hpp"
|
||||
#include "ngraph/op/util/attr_types.hpp"
|
||||
#include "ngraph/opsets/opset3.hpp"
|
||||
#include "util/visitor.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace ov;
|
||||
using ngraph::test::NodeBuilder;
|
||||
using ngraph::test::ValueMap;
|
||||
|
||||
TEST(attributes, scatter_elements_update) {
|
||||
NodeBuilder::get_ops().register_factory<opset3::ScatterElementsUpdate>();
|
||||
NodeBuilder::get_ops().register_factory<op::v3::ScatterElementsUpdate>();
|
||||
|
||||
auto data = std::make_shared<op::Parameter>(element::f32, Shape{2, 4, 5, 7});
|
||||
auto indices = std::make_shared<op::Parameter>(element::i16, Shape{2, 2, 2, 2});
|
||||
auto updates = std::make_shared<op::Parameter>(element::f32, Shape{2, 2, 2, 2});
|
||||
auto axis = std::make_shared<op::Parameter>(element::i16, Shape{});
|
||||
auto data = std::make_shared<op::v0::Parameter>(element::f32, Shape{2, 4, 5, 7});
|
||||
auto indices = std::make_shared<op::v0::Parameter>(element::i16, Shape{2, 2, 2, 2});
|
||||
auto updates = std::make_shared<op::v0::Parameter>(element::f32, Shape{2, 2, 2, 2});
|
||||
auto axis = std::make_shared<op::v0::Parameter>(element::i16, Shape{});
|
||||
|
||||
auto scatter = std::make_shared<opset3::ScatterElementsUpdate>(data, indices, updates, axis);
|
||||
auto scatter = std::make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis);
|
||||
NodeBuilder builder(scatter, {data, indices, updates, axis});
|
||||
|
||||
const auto expected_attr_count = 0;
|
||||
EXPECT_EQ(builder.get_value_map_size(), expected_attr_count);
|
||||
}
|
||||
|
||||
TEST(attributes, scatter_elements_update_v12) {
|
||||
NodeBuilder::get_ops().register_factory<op::v12::ScatterElementsUpdate>();
|
||||
|
||||
auto data = std::make_shared<op::v0::Parameter>(element::f32, Shape{2, 4, 5, 7});
|
||||
auto indices = std::make_shared<op::v0::Parameter>(element::i16, Shape{2, 2, 2, 2});
|
||||
auto updates = std::make_shared<op::v0::Parameter>(element::f32, Shape{2, 2, 2, 2});
|
||||
auto axis = std::make_shared<op::v0::Parameter>(element::i16, Shape{});
|
||||
|
||||
auto scatter = std::make_shared<op::v12::ScatterElementsUpdate>(data,
|
||||
indices,
|
||||
updates,
|
||||
axis,
|
||||
op::v12::ScatterElementsUpdate::Reduction::PROD,
|
||||
false);
|
||||
NodeBuilder builder(scatter, {data, indices, updates, axis});
|
||||
const auto g_scatter = ov::as_type_ptr<op::v12::ScatterElementsUpdate>(builder.create());
|
||||
|
||||
const auto expected_attr_count = 2;
|
||||
EXPECT_EQ(builder.get_value_map_size(), expected_attr_count);
|
||||
EXPECT_EQ(g_scatter->get_reduction(), scatter->get_reduction());
|
||||
EXPECT_EQ(g_scatter->get_use_init_val(), scatter->get_use_init_val());
|
||||
}
|
||||
|
@ -150,7 +150,8 @@ INSTANTIATE_TEST_SUITE_P(ONNXOpExtensionViaCommonConstructor,
|
||||
FrontEndOpExtensionTest::getTestCaseName);
|
||||
|
||||
TEST(ONNXOpExtensionViaCommonConstructor, onnx_op_extension_via_template_arg_with_custom_domain) {
|
||||
const auto ext = std::make_shared<onnx::OpExtension<ov::op::v0::Relu>>("CustomRelu", "my_custom_domain");
|
||||
const auto ext =
|
||||
std::make_shared<ov::frontend::onnx::OpExtension<ov::op::v0::Relu>>("CustomRelu", "my_custom_domain");
|
||||
|
||||
auto fe = std::make_shared<ov::frontend::onnx::FrontEnd>();
|
||||
fe->add_extension(ext);
|
||||
@ -163,7 +164,8 @@ TEST(ONNXOpExtensionViaCommonConstructor, onnx_op_extension_via_template_arg_wit
|
||||
}
|
||||
|
||||
TEST(ONNXOpExtensionViaCommonConstructor, onnx_op_extension_via_ov_type_name_with_custom_domain) {
|
||||
const auto ext = std::make_shared<onnx::OpExtension<>>("opset1::Relu", "CustomRelu", "my_custom_domain");
|
||||
const auto ext =
|
||||
std::make_shared<ov::frontend::onnx::OpExtension<>>("opset1::Relu", "CustomRelu", "my_custom_domain");
|
||||
|
||||
auto fe = std::make_shared<ov::frontend::onnx::FrontEnd>();
|
||||
fe->add_extension(ext);
|
||||
@ -199,7 +201,8 @@ TEST(ONNXOpExtensionViaCommonConstructor, onnx_op_extension_mixed_legacy_and_new
|
||||
ov::util::path_join({TEST_ONNX_MODELS_DIRNAME, "relu_custom_domain.onnx"}));
|
||||
ov::Core core;
|
||||
core.add_extension(std::make_shared<OldApiNode>());
|
||||
const auto new_api_ext = std::make_shared<onnx::OpExtension<ov::op::v0::Relu>>("CustomRelu", "my_custom_domain");
|
||||
const auto new_api_ext =
|
||||
std::make_shared<ov::frontend::onnx::OpExtension<ov::op::v0::Relu>>("CustomRelu", "my_custom_domain");
|
||||
core.add_extension(new_api_ext);
|
||||
EXPECT_NO_THROW(core.read_model(input_model_path));
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ namespace tensorflow {
|
||||
#define VARIABLES_INDEX_FOOTER_SIZE 48
|
||||
#define BLOCK_TRAILER_SIZE 5
|
||||
#define SAVED_TENSOR_SLICES_KEY ""
|
||||
#define META_GRAPH_DEFAULT_TAG "serve"
|
||||
|
||||
template <typename T>
|
||||
static T smUnpack(char*& ptr, const char* ptr_end) {
|
||||
|
@ -137,6 +137,9 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
|
||||
} else if (GraphIteratorProtoTxt::is_supported(model_path)) {
|
||||
// text protobuf format with checkpoints
|
||||
return true;
|
||||
} else if (GraphIteratorSavedModel::is_supported(model_path)) {
|
||||
// saved model format with tagged metagraphs
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
|
||||
@ -167,6 +170,9 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
|
||||
} else if (GraphIteratorProtoTxt::is_supported(model_path)) {
|
||||
// text protobuf format with checkpoints
|
||||
return true;
|
||||
} else if (GraphIteratorSavedModel::is_supported(model_path)) {
|
||||
// saved model format with tagged metagraphs
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -194,11 +200,7 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& va
|
||||
return std::make_shared<InputModel>(std::make_shared<GraphIteratorProto>(model_path), m_telemetry);
|
||||
} else if (GraphIteratorSavedModel::is_supported(model_path)) {
|
||||
std::shared_ptr<GraphIteratorSavedModel> graph_iterator;
|
||||
if (variants.size() > 1 && variants[1].is<std::string>()) {
|
||||
graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, variants[1].as<std::string>());
|
||||
} else {
|
||||
graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, std::string("serve"));
|
||||
}
|
||||
graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, std::string("serve"));
|
||||
return std::make_shared<InputModel>(graph_iterator,
|
||||
m_telemetry,
|
||||
graph_iterator->get_variables_index(),
|
||||
@ -249,6 +251,18 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& va
|
||||
graph_iterator->get_checkpoint_v1_reader(),
|
||||
false);
|
||||
}
|
||||
auto saved_model_tags = paths[1];
|
||||
if (GraphIteratorSavedModel::is_supported(model_path)) {
|
||||
std::shared_ptr<GraphIteratorSavedModel> graph_iterator;
|
||||
graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, saved_model_tags);
|
||||
return std::make_shared<InputModel>(graph_iterator,
|
||||
m_telemetry,
|
||||
graph_iterator->get_variables_index(),
|
||||
graph_iterator->get_saved_model_input_names(),
|
||||
graph_iterator->get_saved_model_output_names(),
|
||||
nullptr,
|
||||
true);
|
||||
}
|
||||
}
|
||||
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
|
||||
else if (variants[0].is<std::wstring>()) {
|
||||
@ -258,13 +272,7 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& va
|
||||
return std::make_shared<InputModel>(std::make_shared<GraphIteratorProto>(model_path), m_telemetry);
|
||||
} else if (GraphIteratorSavedModel::is_supported(model_path)) {
|
||||
std::shared_ptr<GraphIteratorSavedModel> graph_iterator;
|
||||
if (variants.size() > 1 && variants[1].is<std::string>()) {
|
||||
graph_iterator = std::make_shared<GraphIteratorSavedModel>(
|
||||
model_path,
|
||||
ov::util::wstring_to_string(variants[1].as<std::wstring>()));
|
||||
} else {
|
||||
graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, std::string("serve"));
|
||||
}
|
||||
graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, std::string(META_GRAPH_DEFAULT_TAG));
|
||||
return std::make_shared<InputModel>(graph_iterator,
|
||||
m_telemetry,
|
||||
graph_iterator->get_variables_index(),
|
||||
@ -315,6 +323,18 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& va
|
||||
graph_iterator->get_checkpoint_v1_reader(),
|
||||
false);
|
||||
}
|
||||
auto saved_model_tags = ov::util::wstring_to_string(paths[1]);
|
||||
if (GraphIteratorSavedModel::is_supported(model_path)) {
|
||||
std::shared_ptr<GraphIteratorSavedModel> graph_iterator;
|
||||
graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, saved_model_tags);
|
||||
return std::make_shared<InputModel>(graph_iterator,
|
||||
m_telemetry,
|
||||
graph_iterator->get_variables_index(),
|
||||
graph_iterator->get_saved_model_input_names(),
|
||||
graph_iterator->get_saved_model_output_names(),
|
||||
nullptr,
|
||||
true);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
else if (variants[0].is<GraphIterator::Ptr>()) {
|
||||
@ -362,7 +382,8 @@ std::shared_ptr<ov::Model> FrontEnd::convert(const ov::frontend::InputModel::Ptr
|
||||
++counter;
|
||||
}
|
||||
exception_message
|
||||
<< "\nTo facilitate the conversion of unsupported operations, refer to Frontend Extension documentation: "
|
||||
<< "\nTo facilitate the conversion of unsupported operations, refer to Frontend Extension "
|
||||
"documentation: "
|
||||
"https://docs.openvino.ai/latest/openvino_docs_Extensibility_UG_Frontend_Extensions.html \n";
|
||||
}
|
||||
|
||||
|
@ -72,6 +72,31 @@ std::basic_string<wchar_t> get_variables_index_name<wchar_t>() {
|
||||
}
|
||||
#endif
|
||||
|
||||
std::vector<std::string> GraphIteratorSavedModel::split_tags(const std::string tags) const {
|
||||
std::vector<std::string> tag_list = {};
|
||||
std::size_t len = tags.length();
|
||||
if (len == 0) {
|
||||
return tag_list;
|
||||
}
|
||||
std::string tag = "";
|
||||
std::size_t last_delimeter_pos = 0;
|
||||
std::size_t delimeter_pos = std::string::npos;
|
||||
while ((delimeter_pos = tags.find_first_of(",", last_delimeter_pos)) != std::string::npos) {
|
||||
tag = tags.substr(last_delimeter_pos, delimeter_pos - last_delimeter_pos);
|
||||
tag_list.push_back(tag);
|
||||
last_delimeter_pos = delimeter_pos + 1;
|
||||
}
|
||||
if (last_delimeter_pos != std::string::npos) {
|
||||
if (last_delimeter_pos < len) {
|
||||
tag = tags.substr(last_delimeter_pos);
|
||||
} else {
|
||||
tag = "";
|
||||
}
|
||||
tag_list.push_back(tag);
|
||||
}
|
||||
return tag_list;
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
@ -70,84 +70,122 @@ private:
|
||||
bool read_saved_model(const std::basic_string<T>& path, const std::string& tags) {
|
||||
std::basic_string<T> save_model_path = path + get_saved_model_name<T>();
|
||||
std::ifstream sm_stream{save_model_path.c_str(), std::ifstream::in | std::ifstream::binary};
|
||||
FRONT_END_GENERAL_CHECK(sm_stream && sm_stream.is_open(), "Model file does not exist");
|
||||
FRONT_END_GENERAL_CHECK(sm_stream && sm_stream.is_open(), "[TensorFlow Frontend] Model file does not exist");
|
||||
|
||||
std::basic_string<T> varIndexPath = path + get_variables_index_name<T>();
|
||||
if (ov::util::file_exists(varIndexPath)) {
|
||||
m_variables_index = std::make_shared<VariablesIndex>();
|
||||
std::ifstream vi_stream{varIndexPath.c_str(), std::ifstream::in | std::ifstream::binary};
|
||||
FRONT_END_GENERAL_CHECK(vi_stream && vi_stream.is_open(),
|
||||
"Saved Model's variable index file does not exist");
|
||||
"[TensorFlow Frontend] Saved Model's variable index file does not exist");
|
||||
FRONT_END_GENERAL_CHECK(m_variables_index->read_variables(vi_stream, path),
|
||||
"Saved Model's variable index file cannot be parsed");
|
||||
"[TensorFlow Frontend] Saved Model's variable index file cannot be parsed");
|
||||
}
|
||||
|
||||
bool res = m_saved_model->ParseFromIstream(&sm_stream);
|
||||
FRONT_END_GENERAL_CHECK(res && m_saved_model->meta_graphs_size(), "Saved Model cannot be parsed");
|
||||
FRONT_END_GENERAL_CHECK(res && m_saved_model->meta_graphs_size(),
|
||||
"[TensorFlow Frontend] Saved Model cannot be parsed");
|
||||
|
||||
auto tag_list = split_tags(tags);
|
||||
|
||||
// SavedModel can contain several MetaGraph with different tags. Look for MetaGraph with the required tag
|
||||
for (const auto& meta_graph : m_saved_model->meta_graphs()) {
|
||||
if (!meta_graph.has_graph_def()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (m_saved_model->meta_graphs_size() > 1) {
|
||||
bool tag_found = false;
|
||||
for (const auto& tag : meta_graph.meta_info_def().tags()) {
|
||||
if (tags.find(tag) != std::string::npos) {
|
||||
tag_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!tag_found) {
|
||||
continue;
|
||||
}
|
||||
bool tag_found = false;
|
||||
|
||||
if (meta_graph.meta_info_def().tags_size() > 0) {
|
||||
tag_found = std::all_of(meta_graph.meta_info_def().tags().begin(),
|
||||
meta_graph.meta_info_def().tags().end(),
|
||||
[&tag_list](const std::string& tag) {
|
||||
return std::find(tag_list.begin(), tag_list.end(), tag) != tag_list.end();
|
||||
});
|
||||
}
|
||||
|
||||
std::map<std::string, const ::tensorflow::SignatureDef*> validSignatures = {};
|
||||
for (const auto& sit : meta_graph.signature_def()) {
|
||||
const std::string& key = sit.first;
|
||||
const ::tensorflow::SignatureDef& val = sit.second;
|
||||
if (is_valid_signature(val)) {
|
||||
validSignatures[key] = &val;
|
||||
}
|
||||
if (tag_found) {
|
||||
return load_meta_graph(meta_graph);
|
||||
}
|
||||
|
||||
// MetaGraph may have a list of signatures, but at this moment we need information only about
|
||||
// "serving_default" signature which contains information about inputs/outputs names for the
|
||||
// model. Situation when it is missing in a file also could be.
|
||||
auto serving_default = validSignatures.find("serving_default");
|
||||
|
||||
if (serving_default != validSignatures.end()) {
|
||||
m_inputs_map = std::make_shared<std::map<std::string, std::string>>();
|
||||
m_outputs_map = std::make_shared<std::map<std::string, std::string>>();
|
||||
for (const auto& input : serving_default->second->inputs()) {
|
||||
(*m_inputs_map)[input.second.name()] = input.first;
|
||||
}
|
||||
for (const auto& output : serving_default->second->outputs()) {
|
||||
(*m_outputs_map)[output.second.name()] = output.first;
|
||||
}
|
||||
}
|
||||
|
||||
m_graph_def = std::make_shared<::tensorflow::GraphDef>(meta_graph.graph_def());
|
||||
|
||||
// Update variables map using information by resolving AssignVariableOp graph nodes
|
||||
std::map<std::string, std::string> var_map;
|
||||
VariablesIndex::map_assignvariable(m_graph_def, var_map);
|
||||
if (var_map.size() > 0 && m_variables_index.get() != nullptr) {
|
||||
for (auto var : var_map) {
|
||||
m_variables_index->map_variable(var.first, var.second);
|
||||
}
|
||||
}
|
||||
|
||||
initialize_decoders_and_library();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
FRONT_END_GENERAL_CHECK(false, "Saved Model doesn't contain MetaGraph with requested tag");
|
||||
// Alternate behavior for working with "default tag" to support additional cases for read_model
|
||||
if (tags == META_GRAPH_DEFAULT_TAG) {
|
||||
// If we have only one MetaGraph - try to use it
|
||||
if (m_saved_model->meta_graphs_size() == 1 && m_saved_model->meta_graphs(0).has_graph_def()) {
|
||||
return load_meta_graph(m_saved_model->meta_graphs(0));
|
||||
}
|
||||
|
||||
// If MetaGraph with tag == META_GRAPH_DEFAULT_TAG already found - we shouldn't reach this place.
|
||||
// Otherwise we try to find a MetaGraph with no tags as an alternative
|
||||
for (const auto& meta_graph : m_saved_model->meta_graphs()) {
|
||||
if (!meta_graph.has_graph_def()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (meta_graph.meta_info_def().tags_size() == 0) {
|
||||
return load_meta_graph(meta_graph);
|
||||
}
|
||||
}
|
||||
|
||||
FRONT_END_GENERAL_CHECK(false,
|
||||
"[TensorFlow Frontend] Saved Model doesn't contain any applicable MetaGraph");
|
||||
}
|
||||
|
||||
FRONT_END_GENERAL_CHECK(false,
|
||||
"[TensorFlow Frontend] Saved Model doesn't contain MetaGraph with requested tag");
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Does a loading of exact meta-graph
|
||||
bool load_meta_graph(const ::tensorflow::MetaGraphDef& meta_graph) {
|
||||
std::map<std::string, const ::tensorflow::SignatureDef*> validSignatures = {};
|
||||
for (const auto& sit : meta_graph.signature_def()) {
|
||||
const std::string& key = sit.first;
|
||||
const ::tensorflow::SignatureDef& val = sit.second;
|
||||
if (is_valid_signature(val)) {
|
||||
validSignatures[key] = &val;
|
||||
}
|
||||
}
|
||||
|
||||
// MetaGraph may have a list of signatures, but at this moment we need information only about
|
||||
// "serving_default" signature which contains information about inputs/outputs names for the
|
||||
// model. Situation when it is missing in a file also could be.
|
||||
auto serving_default = validSignatures.find("serving_default");
|
||||
|
||||
if (serving_default != validSignatures.end()) {
|
||||
m_inputs_map = std::make_shared<std::map<std::string, std::string>>();
|
||||
m_outputs_map = std::make_shared<std::map<std::string, std::string>>();
|
||||
for (const auto& input : serving_default->second->inputs()) {
|
||||
(*m_inputs_map)[input.second.name()] = input.first;
|
||||
}
|
||||
for (const auto& output : serving_default->second->outputs()) {
|
||||
(*m_outputs_map)[output.second.name()] = output.first;
|
||||
}
|
||||
}
|
||||
|
||||
m_graph_def = std::make_shared<::tensorflow::GraphDef>(meta_graph.graph_def());
|
||||
|
||||
// Update variables map using information by resolving AssignVariableOp graph nodes
|
||||
std::map<std::string, std::string> var_map;
|
||||
VariablesIndex::map_assignvariable(m_graph_def, var_map);
|
||||
if (var_map.size() > 0 && m_variables_index.get() != nullptr) {
|
||||
for (auto var : var_map) {
|
||||
m_variables_index->map_variable(var.first, var.second);
|
||||
}
|
||||
}
|
||||
|
||||
initialize_decoders_and_library();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Splitting tags by using "," delimeter
|
||||
/// \param[in] tags String with tags separated by ","
|
||||
/// \return Returns vector with splitted tags, no trimming is used. When you pass "tag1, tag2"
|
||||
/// you will have a vector ["tag1", " tag2"]. Because TensorFlow saves tags without trimming
|
||||
std::vector<std::string> split_tags(const std::string tags) const;
|
||||
}; // GraphIteratorSavedModel
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -214,6 +214,14 @@ void InputModel::InputModelTFImpl::load_places() {
|
||||
producer_op_name,
|
||||
producer_output_port_name,
|
||||
producer_output_port_idx);
|
||||
if (is_conditional_edge(producer_op_name)) {
|
||||
// exclude "^" mark indicating (execution) conditional dependency
|
||||
// for example, "^sub_op" means dependency on a producer node with a name "sub_op"
|
||||
// if a node has dependent operation nodes and has no data consumers,
|
||||
// this node is not terminating and will not output to the Result node
|
||||
producer_op_name = producer_op_name.substr(1);
|
||||
}
|
||||
|
||||
op_names_with_consumers.insert(producer_op_name);
|
||||
} catch (const std::exception&) {
|
||||
FRONT_END_THROW("[ ERROR ] Exception happened when preparing input " + std::to_string(input_port_idx) +
|
||||
|
@ -67,6 +67,10 @@ OutputVector translate_varhandle_op(const NodeContext& node) {
|
||||
auto shape = node.get_attribute<::ov::PartialShape>("shape").get_shape();
|
||||
bool result = var_index->get_mapped_variable(var_name, &entry_data, &entry_size);
|
||||
|
||||
if (!result) {
|
||||
result = var_index->get_variable(var_name, &entry_data, &entry_size);
|
||||
}
|
||||
|
||||
TENSORFLOW_OP_VALIDATION(node, result, "[TensorFlow Frontend] Internal error: Cannot find requested variable.");
|
||||
|
||||
::tensorflow::BundleEntryProto entry;
|
||||
|
@ -124,3 +124,17 @@ TEST_F(FrontEndConversionWithReferenceTestsF, SavedModelBroadcastIssue) {
|
||||
model_ref = make_shared<Model>(OutputVector{x}, ParameterVector{});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(FrontEndConversionWithReferenceTestsF, SavedModelMultiGraph) {
|
||||
// The test verifies loading of MetaGraph with empty tags as default
|
||||
// And verifies loading variables with no corresponding RestoreV2
|
||||
{ model = convert_model("saved_model_multi-graph"); }
|
||||
{
|
||||
// create a reference graph
|
||||
auto x = make_shared<Constant>(element::f32, Shape{2, 3}, vector<float>{1, 2, 3, 3, 2, 1});
|
||||
auto y = make_shared<Parameter>(element::f32, Shape{1});
|
||||
auto add = make_shared<Add>(x, y);
|
||||
|
||||
model_ref = make_shared<Model>(OutputVector{add}, ParameterVector{y});
|
||||
}
|
||||
}
|
||||
|
@ -700,3 +700,19 @@ TEST_F(FrontEndConversionWithReferenceTestsF, PartitionedCallsWithConvInBodyGrap
|
||||
model_ref = make_shared<Model>(OutputVector{conv}, ParameterVector{input1, filter});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(FrontEndConversionWithReferenceTestsF, ControlDependencyNumberOutputs) {
|
||||
// The test aims to check a number of outputs of the resulted model
|
||||
// If the node has dependent nodes by conditional edge, it is not terminating
|
||||
// and it should not go to the Result node
|
||||
{ model = convert_model("control_dependency/control_dependency.pb"); }
|
||||
{
|
||||
auto input1 = make_shared<Parameter>(f32, Shape{2, 3});
|
||||
auto input2 = make_shared<Parameter>(f32, Shape{2, 3});
|
||||
|
||||
// AddV2 node is excluded since it is not terminating
|
||||
auto sub = make_shared<Subtract>(input1, input2);
|
||||
|
||||
model_ref = make_shared<Model>(OutputVector{sub}, ParameterVector{input1, input2});
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,24 @@
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
tf.compat.v1.reset_default_graph()
|
||||
|
||||
# Create the graph and model
|
||||
with tf.compat.v1.Session() as sess:
|
||||
input1 = tf.compat.v1.placeholder(tf.float32, [2, 3], 'input1')
|
||||
input2 = tf.compat.v1.placeholder(tf.float32, [2, 3], 'input2')
|
||||
|
||||
add = tf.add(input1, input2, name="add")
|
||||
with tf.control_dependencies([add]):
|
||||
sub = tf.subtract(input1, input2, name="sub")
|
||||
|
||||
tf.compat.v1.global_variables_initializer()
|
||||
tf_net = sess.graph_def
|
||||
|
||||
tf.io.write_graph(tf_net, os.path.join(sys.argv[1], "control_dependency"), 'control_dependency.pb', False)
|
@ -0,0 +1,48 @@
|
||||
# Copyright (C) 2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
export_dir = os.path.join(sys.argv[1], "saved_model_multi-graph")
|
||||
|
||||
#Slash replacing required because otherwise fails on Windows
|
||||
builder = tf.compat.v1.saved_model.Builder(export_dir if os.name != 'nt' else export_dir.replace("/", "\\"))
|
||||
|
||||
# Create the graph and model
|
||||
with tf.compat.v1.Session(graph=tf.Graph()) as sess:
|
||||
x_value = [[1.,2.,3.],[3.,2.,1.]]
|
||||
z_value = [[2.,2.,1.],[1.,1.,2.]]
|
||||
tf_x = tf.compat.v1.Variable(x_value, name="custom_variable_name")
|
||||
tf_y = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1], name='y')
|
||||
tf_z = tf.constant(z_value)
|
||||
tf_add = tf.add(tf_x, tf_y, name="AddOperation")
|
||||
tf_identity = tf.identity(tf_add, name="AddIdentity")
|
||||
tf.subtract(tf_identity, tf_z, name="SubOperation")
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
|
||||
builder.add_meta_graph_and_variables(sess, ["train"])
|
||||
|
||||
with tf.compat.v1.Session(graph=tf.Graph()) as sess:
|
||||
x_value = [[1.,2.,3.],[3.,2.,1.]]
|
||||
tf_x = tf.compat.v1.Variable(x_value, name="custom_variable_name")
|
||||
tf_y = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1], name='y')
|
||||
tf_add = tf.add(tf_x, tf_y, name="AddOperation")
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
|
||||
saver = tf.compat.v1.train.Saver(var_list=None, defer_build=True)
|
||||
builder.add_meta_graph([], saver=saver)
|
||||
|
||||
with tf.compat.v1.Session(graph=tf.Graph()) as sess:
|
||||
x_value = [[1.,2.,3.],[3.,2.,1.]]
|
||||
tf_x = tf.compat.v1.Variable(x_value, name="custom_variable_name")
|
||||
tf_y = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1], name='y')
|
||||
tf_add = tf.subtract(tf_x, tf_y, name="SubOperation")
|
||||
sess.run(tf.compat.v1.global_variables_initializer())
|
||||
|
||||
saver = tf.compat.v1.train.Saver(var_list=None, defer_build=True)
|
||||
builder.add_meta_graph(["test","test2"], saver=saver)
|
||||
|
||||
builder.save()
|
@ -168,24 +168,28 @@ OPENVINO_RUNTIME_API std::vector<std::vector<int>> get_proc_type_table();
|
||||
* extend to support other CPU core type like ARM.
|
||||
*
|
||||
* The following are two example of processor type table.
|
||||
* 1. Processor table of two socket CPUs XEON server
|
||||
* 1. Processor table of 4 numa nodes and 2 socket server
|
||||
*
|
||||
* ALL_PROC | MAIN_CORE_PROC | EFFICIENT_CORE_PROC | HYPER_THREADING_PROC
|
||||
* 96 48 0 48 // Total number of two sockets
|
||||
* 48 24 0 24 // Number of socket one
|
||||
* 48 24 0 24 // Number of socket two
|
||||
* ALL_PROC | MAIN_CORE_PROC | EFFICIENT_CORE_PROC | HYPER_THREADING_PROC | PROC_NUMA_NODE_ID | PROC_SOCKET_ID
|
||||
* 96 48 0 48 -1 -1
|
||||
* 24 12 0 12 0 0
|
||||
* 24 12 0 12 1 0
|
||||
* 24 12 0 12 2 1
|
||||
* 24 12 0 12 3 1
|
||||
*
|
||||
* 2. Processor table of one socket CPU desktop
|
||||
* 2. Processor table of 1 numa node desktop
|
||||
*
|
||||
* ALL_PROC | MAIN_CORE_PROC | EFFICIENT_CORE_PROC | HYPER_THREADING_PROC
|
||||
* 32 8 16 8 // Total number of one socket
|
||||
* ALL_PROC | MAIN_CORE_PROC | EFFICIENT_CORE_PROC | HYPER_THREADING_PROC | PROC_NUMA_NODE_ID | PROC_SOCKET_ID
|
||||
* 32 8 16 8 -1 -1
|
||||
*/
|
||||
enum ColumnOfProcessorTypeTable {
|
||||
ALL_PROC = 0, //!< All processors, regardless of backend cpu
|
||||
MAIN_CORE_PROC = 1, //!< Processor based on physical core of Intel Performance-cores
|
||||
EFFICIENT_CORE_PROC = 2, //!< Processor based on Intel Efficient-cores
|
||||
HYPER_THREADING_PROC = 3, //!< Processor based on logical core of Intel Performance-cores
|
||||
PROC_TYPE_TABLE_SIZE = 4 //!< Size of processor type table
|
||||
PROC_NUMA_NODE_ID = 4, //!< Numa node id of processors in this row
|
||||
PROC_SOCKET_ID = 5, //!< Socket id of processors in this row
|
||||
PROC_TYPE_TABLE_SIZE = 6 //!< Size of processor type table
|
||||
};
|
||||
|
||||
/**
|
||||
@ -229,24 +233,25 @@ OPENVINO_RUNTIME_API void set_cpu_used(const std::vector<int>& cpu_ids, const in
|
||||
* 1. Four processors of two Pcore
|
||||
* 2. Four processors of four Ecores shared L2 cache
|
||||
*
|
||||
* PROCESSOR_ID | SOCKET_ID | CORE_ID | CORE_TYPE | GROUP_ID | Used
|
||||
* 0 0 0 3 0 0
|
||||
* 1 0 0 1 0 0
|
||||
* 2 0 1 3 1 0
|
||||
* 3 0 1 1 1 0
|
||||
* 4 0 2 2 2 0
|
||||
* 5 0 3 2 2 0
|
||||
* 6 0 4 2 2 0
|
||||
* 7 0 5 2 2 0
|
||||
* PROCESSOR_ID | NUMA_NODE_ID | SOCKET_ID | CORE_ID | CORE_TYPE | GROUP_ID | Used
|
||||
* 0 0 0 0 3 0 0
|
||||
* 1 0 0 0 1 0 0
|
||||
* 2 0 0 1 3 1 0
|
||||
* 3 0 0 1 1 1 0
|
||||
* 4 0 0 2 2 2 0
|
||||
* 5 0 0 3 2 2 0
|
||||
* 6 0 0 4 2 2 0
|
||||
* 7 0 0 5 2 2 0
|
||||
*/
|
||||
enum ColumnOfCPUMappingTable {
|
||||
CPU_MAP_PROCESSOR_ID = 0, //!< column for processor id of the processor
|
||||
CPU_MAP_SOCKET_ID = 1, //!< column for socket id of the processor
|
||||
CPU_MAP_CORE_ID = 2, //!< column for hardware core id of the processor
|
||||
CPU_MAP_CORE_TYPE = 3, //!< column for CPU core type corresponding to the processor
|
||||
CPU_MAP_GROUP_ID = 4, //!< column for group id to the processor. Processors in one group have dependency.
|
||||
CPU_MAP_USED_FLAG = 5, //!< column for resource management of the processor
|
||||
CPU_MAP_TABLE_SIZE = 6 //!< Size of CPU mapping table
|
||||
CPU_MAP_NUMA_NODE_ID = 1, //!< column for node id of the processor
|
||||
CPU_MAP_SOCKET_ID = 2, //!< column for socket id of the processor
|
||||
CPU_MAP_CORE_ID = 3, //!< column for hardware core id of the processor
|
||||
CPU_MAP_CORE_TYPE = 4, //!< column for CPU core type corresponding to the processor
|
||||
CPU_MAP_GROUP_ID = 5, //!< column for group id to the processor. Processors in one group have dependency.
|
||||
CPU_MAP_USED_FLAG = 6, //!< column for resource management of the processor
|
||||
CPU_MAP_TABLE_SIZE = 7 //!< Size of CPU mapping table
|
||||
};
|
||||
|
||||
} // namespace ov
|
||||
|
@ -0,0 +1,144 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include <type_traits>
|
||||
|
||||
#include "openvino/core/parallel.hpp"
|
||||
|
||||
#if ((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO))
|
||||
# include <tbb/concurrent_priority_queue.h>
|
||||
# include <tbb/concurrent_queue.h>
|
||||
#endif
|
||||
|
||||
namespace ov {
|
||||
namespace threading {
|
||||
|
||||
template <typename T>
|
||||
class ThreadSafeQueueWithSize {
|
||||
public:
|
||||
void push(T value) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
_queue.push(std::move(value));
|
||||
}
|
||||
bool try_pop(T& value) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
if (!_queue.empty()) {
|
||||
value = std::move(_queue.front());
|
||||
_queue.pop();
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
size_t size() {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
return _queue.size();
|
||||
}
|
||||
|
||||
protected:
|
||||
std::queue<T> _queue;
|
||||
std::mutex _mutex;
|
||||
};
|
||||
#if ((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO))
|
||||
template <typename T>
|
||||
using ThreadSafeQueue = tbb::concurrent_queue<T>;
|
||||
template <typename T>
|
||||
using ThreadSafeBoundedQueue = tbb::concurrent_bounded_queue<T>;
|
||||
template <typename T>
|
||||
class ThreadSafeBoundedPriorityQueue {
|
||||
public:
|
||||
ThreadSafeBoundedPriorityQueue() = default;
|
||||
bool try_push(T&& value) {
|
||||
if (_capacity) {
|
||||
_pqueue.push(std::move(value));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool try_pop(T& value) {
|
||||
return _capacity ? _pqueue.try_pop(value) : false;
|
||||
}
|
||||
void set_capacity(std::size_t newCapacity) {
|
||||
_capacity = newCapacity;
|
||||
}
|
||||
|
||||
protected:
|
||||
tbb::concurrent_priority_queue<T, std::greater<T>> _pqueue;
|
||||
std::atomic_bool _capacity{false};
|
||||
};
|
||||
#else
|
||||
template <typename T>
|
||||
using ThreadSafeQueue = ThreadSafeQueueWithSize<T>;
|
||||
template <typename T>
|
||||
class ThreadSafeBoundedQueue {
|
||||
public:
|
||||
ThreadSafeBoundedQueue() = default;
|
||||
bool try_push(T value) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
if (_capacity) {
|
||||
_queue.push(std::move(value));
|
||||
}
|
||||
return _capacity;
|
||||
}
|
||||
bool try_pop(T& value) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
if (_capacity && !_queue.empty()) {
|
||||
value = std::move(_queue.front());
|
||||
_queue.pop();
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
void set_capacity(std::size_t newCapacity) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
_capacity = newCapacity;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::queue<T> _queue;
|
||||
std::mutex _mutex;
|
||||
bool _capacity = false;
|
||||
};
|
||||
template <typename T>
|
||||
class ThreadSafeBoundedPriorityQueue {
|
||||
public:
|
||||
ThreadSafeBoundedPriorityQueue() = default;
|
||||
bool try_push(T value) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
if (_capacity) {
|
||||
_queue.push(std::move(value));
|
||||
}
|
||||
return _capacity;
|
||||
}
|
||||
bool try_pop(T& value) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
if (_capacity && !_queue.empty()) {
|
||||
value = std::move(_queue.top());
|
||||
_queue.pop();
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
void set_capacity(std::size_t newCapacity) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
_capacity = newCapacity;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::priority_queue<T, std::vector<T>, std::greater<T>> _queue;
|
||||
std::mutex _mutex;
|
||||
bool _capacity = false;
|
||||
};
|
||||
#endif
|
||||
} // namespace threading
|
||||
} // namespace ov
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief A header file that provides a set minimal required Streams Executor API.
|
||||
* @file streams_executor.hpp
|
||||
* @brief A header file that provides a set of CPU map and parser functions.
|
||||
* @file cpu_map_info.hpp
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
@ -22,6 +22,7 @@ public:
|
||||
~CPU(){};
|
||||
int _processors = 0;
|
||||
int _numa_nodes = 0;
|
||||
int _sockets = 0;
|
||||
int _cores = 0;
|
||||
std::vector<std::vector<int>> _proc_type_table;
|
||||
std::vector<std::vector<int>> _cpu_mapping_table;
|
||||
@ -34,18 +35,37 @@ public:
|
||||
CPU& cpu_info();
|
||||
|
||||
#ifdef __linux__
|
||||
/**
|
||||
* @brief Parse nodes information to update _sockets, proc_type_table and cpu_mapping_table on Linux
|
||||
* @param[in] node_info_table nodes information for this platform.
|
||||
* @param[in] _numa_nodes total number for nodes in system
|
||||
* @param[out] _sockets total number for sockets in system
|
||||
* @param[out] _proc_type_table summary table of number of processors per type
|
||||
* @param[out] _cpu_mapping_table CPU mapping table for each processor
|
||||
* @return
|
||||
*/
|
||||
void parse_node_info_linux(const std::vector<std::string> node_info_table,
|
||||
const int& _numa_nodes,
|
||||
int& _sockets,
|
||||
std::vector<std::vector<int>>& _proc_type_table,
|
||||
std::vector<std::vector<int>>& _cpu_mapping_table);
|
||||
|
||||
/**
|
||||
* @brief Parse CPU cache infomation on Linux
|
||||
* @param[in] _system_info_table system information for this platform.
|
||||
* @param[in] system_info_table cpus information for this platform.
|
||||
* @param[in] node_info_table nodes information for this platform.
|
||||
* @param[out] _processors total number for processors in system.
|
||||
* @param[out] _numa_nodes total number for nodes in system
|
||||
* @param[out] _sockets total number for sockets in system
|
||||
* @param[out] _cores total number for physical CPU cores in system
|
||||
* @param[out] _proc_type_table summary table of number of processors per type
|
||||
* @param[out] _cpu_mapping_table CPU mapping table for each processor
|
||||
* @return
|
||||
*/
|
||||
void parse_cache_info_linux(const std::vector<std::vector<std::string>> _system_info_table,
|
||||
void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_info_table,
|
||||
const std::vector<std::string> node_info_table,
|
||||
int& _processors,
|
||||
int& _numa_nodes,
|
||||
int& _sockets,
|
||||
int& _cores,
|
||||
std::vector<std::vector<int>>& _proc_type_table,
|
||||
@ -53,16 +73,20 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> _system_
|
||||
|
||||
/**
|
||||
* @brief Parse CPU frequency infomation on Linux
|
||||
* @param[in] _system_info_table system information for this platform.
|
||||
* @param[in] system_info_table cpus information for this platform.
|
||||
* @param[in] node_info_table nodes information for this platform.
|
||||
* @param[out] _processors total number for processors in system.
|
||||
* @param[out] _numa_nodes total number for nodes in system
|
||||
* @param[out] _sockets total number for sockets in system
|
||||
* @param[out] _cores total number for physical CPU cores in system
|
||||
* @param[out] _proc_type_table summary table of number of processors per type
|
||||
* @param[out] _cpu_mapping_table CPU mapping table for each processor
|
||||
* @return
|
||||
*/
|
||||
void parse_freq_info_linux(const std::vector<std::vector<std::string>> _system_info_table,
|
||||
void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_info_table,
|
||||
const std::vector<std::string> node_info_table,
|
||||
int& _processors,
|
||||
int& _numa_nodes,
|
||||
int& _sockets,
|
||||
int& _cores,
|
||||
std::vector<std::vector<int>>& _proc_type_table,
|
||||
@ -106,6 +130,7 @@ void get_cpu_mapping_from_cores(const int _processors,
|
||||
* @param[in] base_ptr buffer object pointer of Windows system infomation
|
||||
* @param[in] len buffer object length of Windows system infomation
|
||||
* @param[out] _processors total number for processors in system.
|
||||
* @param[out] _numa_nodes total number for nodes in system
|
||||
* @param[out] _sockets total number for sockets in system
|
||||
* @param[out] _cores total number for physical CPU cores in system
|
||||
* @param[out] _proc_type_table summary table of number of processors per type
|
||||
@ -115,6 +140,7 @@ void get_cpu_mapping_from_cores(const int _processors,
|
||||
void parse_processor_info_win(const char* base_ptr,
|
||||
const unsigned long len,
|
||||
int& _processors,
|
||||
int& _numa_nodes,
|
||||
int& _sockets,
|
||||
int& _cores,
|
||||
std::vector<std::vector<int>>& _proc_type_table,
|
@ -15,12 +15,13 @@
|
||||
#include "ie_common.h"
|
||||
#include "openvino/core/except.hpp"
|
||||
#include "openvino/runtime/system_conf.hpp"
|
||||
#include "streams_executor.hpp"
|
||||
#include "os/cpu_map_info.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
||||
CPU::CPU() {
|
||||
std::vector<std::vector<std::string>> system_info_table;
|
||||
std::vector<std::string> node_info_table;
|
||||
|
||||
_num_threads = parallel_get_max_threads();
|
||||
auto get_cache_info_linux = [&]() {
|
||||
@ -99,6 +100,21 @@ CPU::CPU() {
|
||||
return 0;
|
||||
};
|
||||
|
||||
auto get_node_info_linux = [&]() {
|
||||
int node_index = 0;
|
||||
|
||||
while (1) {
|
||||
std::ifstream cache_file("/sys/devices/system/node/node" + std::to_string(node_index) + "/cpulist");
|
||||
if (!cache_file.is_open()) {
|
||||
break;
|
||||
}
|
||||
std::string cache_info;
|
||||
std::getline(cache_file, cache_info);
|
||||
node_info_table.push_back(cache_info);
|
||||
node_index++;
|
||||
}
|
||||
};
|
||||
|
||||
auto check_valid_cpu = [&]() {
|
||||
cpu_set_t mask;
|
||||
CPU_ZERO(&mask);
|
||||
@ -131,10 +147,14 @@ CPU::CPU() {
|
||||
}
|
||||
};
|
||||
|
||||
get_node_info_linux();
|
||||
|
||||
if (!get_cache_info_linux()) {
|
||||
parse_cache_info_linux(system_info_table,
|
||||
node_info_table,
|
||||
_processors,
|
||||
_numa_nodes,
|
||||
_sockets,
|
||||
_cores,
|
||||
_proc_type_table,
|
||||
_cpu_mapping_table);
|
||||
@ -143,8 +163,10 @@ CPU::CPU() {
|
||||
if ((_proc_type_table.size() == 0) || (_proc_type_table[0][MAIN_CORE_PROC] == 0)) {
|
||||
if (!get_freq_info_linux()) {
|
||||
parse_freq_info_linux(system_info_table,
|
||||
node_info_table,
|
||||
_processors,
|
||||
_numa_nodes,
|
||||
_sockets,
|
||||
_cores,
|
||||
_proc_type_table,
|
||||
_cpu_mapping_table);
|
||||
@ -177,7 +199,10 @@ CPU::CPU() {
|
||||
}
|
||||
}
|
||||
_processors = processors.size();
|
||||
|
||||
_numa_nodes = sockets.size() == 0 ? 1 : sockets.size();
|
||||
_sockets = _numa_nodes;
|
||||
|
||||
for (auto&& socket : sockets) {
|
||||
_cores += socket.second;
|
||||
}
|
||||
@ -203,8 +228,77 @@ CPU::CPU() {
|
||||
};
|
||||
}
|
||||
|
||||
void parse_node_info_linux(const std::vector<std::string> node_info_table,
|
||||
const int& _numa_nodes,
|
||||
int& _sockets,
|
||||
std::vector<std::vector<int>>& _proc_type_table,
|
||||
std::vector<std::vector<int>>& _cpu_mapping_table) {
|
||||
std::vector<std::vector<int>> nodes_table;
|
||||
int node_index = 0;
|
||||
|
||||
for (auto& one_info : node_info_table) {
|
||||
int core_1 = 0;
|
||||
int core_2 = 0;
|
||||
std::string::size_type pos = 0;
|
||||
std::string::size_type endpos = 0;
|
||||
std::string sub_str = "";
|
||||
|
||||
if (((endpos = one_info.find('-', pos)) == std::string::npos) &&
|
||||
((endpos = one_info.find(',', pos)) != std::string::npos)) {
|
||||
while (endpos != std::string::npos) {
|
||||
sub_str = one_info.substr(pos);
|
||||
core_1 = std::stoi(sub_str);
|
||||
nodes_table.push_back({core_1, core_1, node_index});
|
||||
endpos = one_info.find(',', pos);
|
||||
pos = endpos + 1;
|
||||
}
|
||||
} else {
|
||||
while (endpos != std::string::npos) {
|
||||
if ((endpos = one_info.find('-', pos)) != std::string::npos) {
|
||||
sub_str = one_info.substr(pos, endpos - pos);
|
||||
core_1 = std::stoi(sub_str);
|
||||
sub_str = one_info.substr(endpos + 1);
|
||||
core_2 = std::stoi(sub_str);
|
||||
nodes_table.push_back({core_1, core_2, node_index});
|
||||
pos = one_info.find(',', endpos);
|
||||
if (pos == std::string::npos) {
|
||||
break;
|
||||
} else {
|
||||
pos = pos + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node_index++;
|
||||
}
|
||||
|
||||
_proc_type_table.assign((node_info_table.size() == 1) ? 1 : node_info_table.size() + 1,
|
||||
std::vector<int>({0, 0, 0, 0, -1, -1}));
|
||||
|
||||
for (auto& row : nodes_table) {
|
||||
for (int i = row[0]; i <= row[1]; i++) {
|
||||
_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID] = row[2];
|
||||
if (_sockets > _numa_nodes) {
|
||||
_cpu_mapping_table[i][CPU_MAP_SOCKET_ID] = row[2];
|
||||
}
|
||||
_proc_type_table[0][ALL_PROC]++;
|
||||
_proc_type_table[0][_cpu_mapping_table[i][CPU_MAP_CORE_TYPE]]++;
|
||||
if (node_info_table.size() != 1) {
|
||||
_proc_type_table[row[2] + 1][ALL_PROC]++;
|
||||
_proc_type_table[row[2] + 1][_cpu_mapping_table[i][CPU_MAP_CORE_TYPE]]++;
|
||||
}
|
||||
}
|
||||
node_index = (node_info_table.size() != 1) ? row[2] + 1 : 0;
|
||||
_proc_type_table[node_index][PROC_NUMA_NODE_ID] = _cpu_mapping_table[row[0]][CPU_MAP_NUMA_NODE_ID];
|
||||
_proc_type_table[node_index][PROC_SOCKET_ID] = _cpu_mapping_table[row[0]][CPU_MAP_SOCKET_ID];
|
||||
}
|
||||
_sockets = (_sockets > _numa_nodes) ? _numa_nodes : _sockets;
|
||||
}
|
||||
|
||||
void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_info_table,
|
||||
const std::vector<std::string> node_info_table,
|
||||
int& _processors,
|
||||
int& _numa_nodes,
|
||||
int& _sockets,
|
||||
int& _cores,
|
||||
std::vector<std::vector<int>>& _proc_type_table,
|
||||
@ -224,7 +318,7 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_i
|
||||
|
||||
if (((endpos = system_info_table[nproc][0].find(',', pos)) != std::string::npos) ||
|
||||
((endpos = system_info_table[nproc][0].find('-', pos)) != std::string::npos)) {
|
||||
sub_str = system_info_table[nproc][0].substr(pos, endpos);
|
||||
sub_str = system_info_table[nproc][0].substr(pos, endpos - pos);
|
||||
core_1 = std::stoi(sub_str);
|
||||
sub_str = system_info_table[nproc][0].substr(endpos + 1);
|
||||
core_2 = std::stoi(sub_str);
|
||||
@ -246,13 +340,12 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_i
|
||||
_cpu_mapping_table[core_2][CPU_MAP_GROUP_ID] = n_group;
|
||||
|
||||
_cores++;
|
||||
n_group++;
|
||||
|
||||
_proc_type_table[0][ALL_PROC] += 2;
|
||||
_proc_type_table[0][MAIN_CORE_PROC]++;
|
||||
_proc_type_table[0][HYPER_THREADING_PROC]++;
|
||||
} else if ((endpos = system_info_table[nproc][1].find('-', pos)) != std::string::npos) {
|
||||
sub_str = system_info_table[nproc][1].substr(pos, endpos);
|
||||
sub_str = system_info_table[nproc][1].substr(pos, endpos - pos);
|
||||
core_1 = std::stoi(sub_str);
|
||||
sub_str = system_info_table[nproc][1].substr(endpos + 1);
|
||||
core_2 = std::stoi(sub_str);
|
||||
@ -268,8 +361,6 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_i
|
||||
_proc_type_table[0][ALL_PROC]++;
|
||||
_proc_type_table[0][EFFICIENT_CORE_PROC]++;
|
||||
}
|
||||
|
||||
n_group++;
|
||||
} else {
|
||||
core_1 = std::stoi(system_info_table[nproc][0]);
|
||||
|
||||
@ -279,16 +370,23 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_i
|
||||
_cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = n_group;
|
||||
|
||||
_cores++;
|
||||
n_group++;
|
||||
|
||||
_proc_type_table[0][ALL_PROC]++;
|
||||
_proc_type_table[0][MAIN_CORE_PROC]++;
|
||||
}
|
||||
|
||||
n_group++;
|
||||
_proc_type_table[0][PROC_NUMA_NODE_ID] = (_proc_type_table[0][PROC_NUMA_NODE_ID] == -1)
|
||||
? _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID]
|
||||
: _proc_type_table[0][PROC_NUMA_NODE_ID];
|
||||
_proc_type_table[0][PROC_SOCKET_ID] = (_proc_type_table[0][PROC_SOCKET_ID] == -1)
|
||||
? _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]
|
||||
: _proc_type_table[0][PROC_SOCKET_ID];
|
||||
}
|
||||
return;
|
||||
};
|
||||
|
||||
std::vector<int> line_value_0(PROC_TYPE_TABLE_SIZE, 0);
|
||||
std::vector<int> line_value_0({0, 0, 0, 0, -1, -1});
|
||||
|
||||
for (int n = 0; n < _processors; n++) {
|
||||
if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) {
|
||||
@ -308,19 +406,21 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_i
|
||||
|
||||
while (1) {
|
||||
if ((endpos = system_info_table[n][2].find('-', pos)) != std::string::npos) {
|
||||
sub_str = system_info_table[n][2].substr(pos, endpos);
|
||||
sub_str = system_info_table[n][2].substr(pos, endpos - pos);
|
||||
core_1 = std::stoi(sub_str);
|
||||
sub_str = system_info_table[n][2].substr(endpos + 1);
|
||||
core_2 = std::stoi(sub_str);
|
||||
|
||||
for (int m = core_1; m <= core_2; m++) {
|
||||
_cpu_mapping_table[m][CPU_MAP_SOCKET_ID] = _sockets;
|
||||
_cpu_mapping_table[m][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[m][CPU_MAP_SOCKET_ID];
|
||||
update_proc_map_info(m);
|
||||
}
|
||||
} else if (pos != std::string::npos) {
|
||||
sub_str = system_info_table[n][2].substr(pos);
|
||||
core_1 = std::stoi(sub_str);
|
||||
_cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = _sockets;
|
||||
_cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
|
||||
update_proc_map_info(core_1);
|
||||
endpos = pos;
|
||||
}
|
||||
@ -334,15 +434,22 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_i
|
||||
_sockets++;
|
||||
}
|
||||
}
|
||||
if (_sockets > 1) {
|
||||
_proc_type_table.push_back(_proc_type_table[0]);
|
||||
_proc_type_table[0] = line_value_0;
|
||||
|
||||
for (int m = 1; m <= _sockets; m++) {
|
||||
for (int n = 0; n < PROC_TYPE_TABLE_SIZE; n++) {
|
||||
_proc_type_table[0][n] += _proc_type_table[m][n];
|
||||
if ((node_info_table.size() == 0) || (node_info_table.size() == (unsigned)_sockets)) {
|
||||
if (_sockets > 1) {
|
||||
_proc_type_table.push_back(_proc_type_table[0]);
|
||||
_proc_type_table[0] = line_value_0;
|
||||
|
||||
for (int m = 1; m <= _sockets; m++) {
|
||||
for (int n = 0; n < PROC_NUMA_NODE_ID; n++) {
|
||||
_proc_type_table[0][n] += _proc_type_table[m][n];
|
||||
}
|
||||
}
|
||||
}
|
||||
_numa_nodes = _sockets;
|
||||
} else {
|
||||
_numa_nodes = node_info_table.size();
|
||||
parse_node_info_linux(node_info_table, _numa_nodes, _sockets, _proc_type_table, _cpu_mapping_table);
|
||||
}
|
||||
};
|
||||
|
||||
@ -358,11 +465,10 @@ void get_cpu_mapping_from_cores(const int _processors,
|
||||
const auto socket_offset = big_phys_cores / _numa_nodes;
|
||||
const auto threads_per_core = hyper_thread ? 2 : 1;
|
||||
const auto step = num_small_cores_phys > 0 ? 2 : 1;
|
||||
std::vector<int> pro_all_table;
|
||||
std::vector<int> pro_all_table = {0, 0, 0, 0, -1, -1};
|
||||
|
||||
_cpu_mapping_table.resize(_processors, std::vector<int>(CPU_MAP_TABLE_SIZE, -1));
|
||||
_proc_type_table.assign(_numa_nodes, std::vector<int>(PROC_TYPE_TABLE_SIZE, 0));
|
||||
pro_all_table.resize(PROC_TYPE_TABLE_SIZE, 0);
|
||||
_proc_type_table.assign(_numa_nodes, std::vector<int>({0, 0, 0, 0, -1, -1}));
|
||||
|
||||
for (int t = 0; t < threads_per_core; t++) {
|
||||
int start = t == 0 ? 0 : (num_small_cores_phys > 0 ? 1 : big_phys_cores);
|
||||
@ -374,10 +480,17 @@ void get_cpu_mapping_from_cores(const int _processors,
|
||||
_cpu_mapping_table[cur_id][CPU_MAP_CORE_TYPE] =
|
||||
hyper_thread ? (t == 0 ? HYPER_THREADING_PROC : MAIN_CORE_PROC) : MAIN_CORE_PROC;
|
||||
_cpu_mapping_table[cur_id][CPU_MAP_GROUP_ID] = i;
|
||||
_cpu_mapping_table[cur_id][CPU_MAP_NUMA_NODE_ID] = socket_id;
|
||||
_cpu_mapping_table[cur_id][CPU_MAP_SOCKET_ID] = socket_id;
|
||||
|
||||
_proc_type_table[socket_id][_cpu_mapping_table[cur_id][CPU_MAP_CORE_TYPE]]++;
|
||||
_proc_type_table[socket_id][ALL_PROC]++;
|
||||
_proc_type_table[socket_id][PROC_NUMA_NODE_ID] = (_proc_type_table[socket_id][PROC_NUMA_NODE_ID] == -1)
|
||||
? socket_id
|
||||
: _proc_type_table[socket_id][PROC_NUMA_NODE_ID];
|
||||
_proc_type_table[socket_id][PROC_SOCKET_ID] = (_proc_type_table[socket_id][PROC_SOCKET_ID] == -1)
|
||||
? socket_id
|
||||
: _proc_type_table[socket_id][PROC_SOCKET_ID];
|
||||
pro_all_table[_cpu_mapping_table[cur_id][CPU_MAP_CORE_TYPE]]++;
|
||||
pro_all_table[ALL_PROC]++;
|
||||
}
|
||||
@ -389,6 +502,7 @@ void get_cpu_mapping_from_cores(const int _processors,
|
||||
_cpu_mapping_table[cur_id][CPU_MAP_CORE_ID] = big_phys_cores + j;
|
||||
_cpu_mapping_table[cur_id][CPU_MAP_CORE_TYPE] = EFFICIENT_CORE_PROC;
|
||||
_cpu_mapping_table[cur_id][CPU_MAP_GROUP_ID] = big_phys_cores + j / 4;
|
||||
_cpu_mapping_table[cur_id][CPU_MAP_NUMA_NODE_ID] = 0;
|
||||
_cpu_mapping_table[cur_id][CPU_MAP_SOCKET_ID] = 0;
|
||||
|
||||
_proc_type_table[0][_cpu_mapping_table[cur_id][CPU_MAP_CORE_TYPE]]++;
|
||||
@ -403,7 +517,9 @@ void get_cpu_mapping_from_cores(const int _processors,
|
||||
}
|
||||
|
||||
void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_info_table,
|
||||
const std::vector<std::string> node_info_table,
|
||||
int& _processors,
|
||||
int& _numa_nodes,
|
||||
int& _sockets,
|
||||
int& _cores,
|
||||
std::vector<std::vector<int>>& _proc_type_table,
|
||||
@ -413,6 +529,7 @@ void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_in
|
||||
bool ht_enabled = false;
|
||||
|
||||
_processors = system_info_table.size();
|
||||
_numa_nodes = 0;
|
||||
_sockets = 0;
|
||||
_cores = 0;
|
||||
_cpu_mapping_table.resize(_processors, std::vector<int>(CPU_MAP_TABLE_SIZE, -1));
|
||||
@ -432,19 +549,21 @@ void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_in
|
||||
if (((endpos1 = system_info_table[n][0].find(',', pos)) != std::string::npos) ||
|
||||
((endpos2 = system_info_table[n][0].find('-', pos)) != std::string::npos)) {
|
||||
endpos1 = (endpos1 != std::string::npos) ? endpos1 : endpos2;
|
||||
sub_str = system_info_table[n][0].substr(pos, endpos1);
|
||||
sub_str = system_info_table[n][0].substr(pos, endpos1 - pos);
|
||||
core_1 = std::stoi(sub_str);
|
||||
sub_str = system_info_table[n][0].substr(endpos1 + 1);
|
||||
core_2 = std::stoi(sub_str);
|
||||
|
||||
_cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1;
|
||||
_cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]);
|
||||
_cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
|
||||
_cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores;
|
||||
_cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = HYPER_THREADING_PROC;
|
||||
_cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = _cores;
|
||||
|
||||
_cpu_mapping_table[core_2][CPU_MAP_PROCESSOR_ID] = core_2;
|
||||
_cpu_mapping_table[core_2][CPU_MAP_SOCKET_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
|
||||
_cpu_mapping_table[core_2][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
|
||||
_cpu_mapping_table[core_2][CPU_MAP_CORE_ID] = _cpu_mapping_table[core_1][CPU_MAP_CORE_ID];
|
||||
_cpu_mapping_table[core_2][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC;
|
||||
_cpu_mapping_table[core_2][CPU_MAP_GROUP_ID] = _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID];
|
||||
@ -452,12 +571,12 @@ void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_in
|
||||
ht_enabled = true;
|
||||
int core_freq = std::stoi(system_info_table[core_1][2]);
|
||||
freq_max = std::max(core_freq, freq_max);
|
||||
|
||||
} else if (system_info_table[n][0].size() > 0) {
|
||||
core_1 = std::stoi(system_info_table[n][0]);
|
||||
|
||||
_cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1;
|
||||
_cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]);
|
||||
_cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
|
||||
_cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores;
|
||||
|
||||
int core_freq = std::stoi(system_info_table[core_1][2]);
|
||||
@ -476,28 +595,40 @@ void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_in
|
||||
}
|
||||
}
|
||||
|
||||
if ((_sockets >= 1) && (ecore_enabled)) {
|
||||
_sockets = 0;
|
||||
}
|
||||
_sockets = (_sockets > 0) ? _sockets + 1 : 1;
|
||||
|
||||
if (_sockets >= 1) {
|
||||
_proc_type_table.resize(_sockets + 2, std::vector<int>(PROC_TYPE_TABLE_SIZE, 0));
|
||||
for (int n = 0; n < _processors; n++) {
|
||||
_proc_type_table[0][ALL_PROC]++;
|
||||
_proc_type_table[_cpu_mapping_table[n][CPU_MAP_SOCKET_ID] + 1][ALL_PROC]++;
|
||||
|
||||
_proc_type_table[0][_cpu_mapping_table[n][CPU_MAP_CORE_TYPE]]++;
|
||||
_proc_type_table[_cpu_mapping_table[n][CPU_MAP_SOCKET_ID] + 1][_cpu_mapping_table[n][CPU_MAP_CORE_TYPE]]++;
|
||||
if (node_info_table.size() == 0) {
|
||||
if ((_sockets > 1) && (ecore_enabled)) {
|
||||
_sockets = 1; // This is the WA of the developing platform without CPU cache and numa node information.
|
||||
// Wrong socket information creates each socket ID per CPU core.
|
||||
}
|
||||
_sockets++;
|
||||
if (_sockets > 1) {
|
||||
_proc_type_table.resize(_sockets + 1, std::vector<int>({0, 0, 0, 0, -1, -1}));
|
||||
for (int n = 0; n < _processors; n++) {
|
||||
_proc_type_table[0][ALL_PROC]++;
|
||||
_proc_type_table[_cpu_mapping_table[n][CPU_MAP_SOCKET_ID] + 1][ALL_PROC]++;
|
||||
|
||||
_proc_type_table[0][_cpu_mapping_table[n][CPU_MAP_CORE_TYPE]]++;
|
||||
_proc_type_table[_cpu_mapping_table[n][CPU_MAP_SOCKET_ID] + 1]
|
||||
[_cpu_mapping_table[n][CPU_MAP_CORE_TYPE]]++;
|
||||
}
|
||||
for (int n = 0; n < _sockets; n++) {
|
||||
_proc_type_table[n + 1][PROC_NUMA_NODE_ID] = n;
|
||||
_proc_type_table[n + 1][PROC_SOCKET_ID] = n;
|
||||
};
|
||||
} else {
|
||||
_proc_type_table.resize(1, std::vector<int>({0, 0, 0, 0, 0, 0}));
|
||||
for (int n = 0; n < _processors; n++) {
|
||||
_proc_type_table[0][ALL_PROC]++;
|
||||
_proc_type_table[0][_cpu_mapping_table[n][CPU_MAP_CORE_TYPE]]++;
|
||||
_cpu_mapping_table[n][CPU_MAP_NUMA_NODE_ID] = 0;
|
||||
_cpu_mapping_table[n][CPU_MAP_SOCKET_ID] = 0;
|
||||
}
|
||||
}
|
||||
_numa_nodes = _sockets;
|
||||
} else {
|
||||
_proc_type_table.resize(1, std::vector<int>(PROC_TYPE_TABLE_SIZE, 0));
|
||||
for (int n = 0; n < _processors; n++) {
|
||||
_proc_type_table[0][ALL_PROC]++;
|
||||
_proc_type_table[0][_cpu_mapping_table[n][CPU_MAP_CORE_TYPE]]++;
|
||||
_cpu_mapping_table[n][CPU_MAP_SOCKET_ID] = 0;
|
||||
}
|
||||
_sockets = 1;
|
||||
_numa_nodes = node_info_table.size();
|
||||
parse_node_info_linux(node_info_table, _numa_nodes, _sockets, _proc_type_table, _cpu_mapping_table);
|
||||
}
|
||||
};
|
||||
|
||||
@ -507,7 +638,7 @@ void update_valid_processor_linux(const std::vector<int> phy_core_list,
|
||||
std::vector<std::vector<int>>& _proc_type_table,
|
||||
std::vector<std::vector<int>>& _cpu_mapping_table) {
|
||||
for (auto& row : _proc_type_table) {
|
||||
std::fill(row.begin(), row.end(), 0);
|
||||
std::fill(row.begin(), row.begin() + PROC_NUMA_NODE_ID, 0);
|
||||
}
|
||||
_cores = 0;
|
||||
for (auto& row : _cpu_mapping_table) {
|
||||
@ -540,7 +671,7 @@ void update_valid_processor_linux(const std::vector<int> phy_core_list,
|
||||
}
|
||||
|
||||
if ((_proc_type_table.size() > 1) && (_proc_type_table[0][ALL_PROC] == _proc_type_table[1][ALL_PROC])) {
|
||||
_proc_type_table.pop_back();
|
||||
_proc_type_table.erase(_proc_type_table.begin());
|
||||
}
|
||||
}
|
||||
_sockets = _proc_type_table.size() == 1 ? 1 : _proc_type_table.size() - 1;
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
#include "dev/threading/parallel_custom_arena.hpp"
|
||||
#include "openvino/runtime/system_conf.hpp"
|
||||
#include "streams_executor.hpp"
|
||||
#include "os/cpu_map_info.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
|
||||
#include "dev/threading/parallel_custom_arena.hpp"
|
||||
#include "openvino/runtime/system_conf.hpp"
|
||||
#include "streams_executor.hpp"
|
||||
#include "os/cpu_map_info.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
||||
@ -34,6 +34,7 @@ CPU::CPU() {
|
||||
len,
|
||||
_processors,
|
||||
_numa_nodes,
|
||||
_sockets,
|
||||
_cores,
|
||||
_proc_type_table,
|
||||
_cpu_mapping_table);
|
||||
@ -42,6 +43,7 @@ CPU::CPU() {
|
||||
void parse_processor_info_win(const char* base_ptr,
|
||||
const unsigned long len,
|
||||
int& _processors,
|
||||
int& _numa_nodes,
|
||||
int& _sockets,
|
||||
int& _cores,
|
||||
std::vector<std::vector<int>>& _proc_type_table,
|
||||
@ -49,7 +51,7 @@ void parse_processor_info_win(const char* base_ptr,
|
||||
std::vector<int> list;
|
||||
std::vector<int> proc_info;
|
||||
|
||||
std::vector<int> proc_init_line(PROC_TYPE_TABLE_SIZE, 0);
|
||||
std::vector<int> proc_init_line({0, 0, 0, 0, -1, -1});
|
||||
std::vector<int> cpu_init_line(CPU_MAP_TABLE_SIZE, -1);
|
||||
|
||||
char* info_ptr = (char*)base_ptr;
|
||||
@ -107,6 +109,7 @@ void parse_processor_info_win(const char* base_ptr,
|
||||
if (2 == list_len) {
|
||||
proc_info = cpu_init_line;
|
||||
proc_info[CPU_MAP_PROCESSOR_ID] = list[0] + base_proc;
|
||||
proc_info[CPU_MAP_NUMA_NODE_ID] = _sockets;
|
||||
proc_info[CPU_MAP_SOCKET_ID] = _sockets;
|
||||
proc_info[CPU_MAP_CORE_ID] = _cores;
|
||||
proc_info[CPU_MAP_CORE_TYPE] = HYPER_THREADING_PROC;
|
||||
@ -115,6 +118,7 @@ void parse_processor_info_win(const char* base_ptr,
|
||||
|
||||
proc_info = cpu_init_line;
|
||||
proc_info[CPU_MAP_PROCESSOR_ID] = list[1] + base_proc;
|
||||
proc_info[CPU_MAP_NUMA_NODE_ID] = _sockets;
|
||||
proc_info[CPU_MAP_SOCKET_ID] = _sockets;
|
||||
proc_info[CPU_MAP_CORE_ID] = _cores;
|
||||
proc_info[CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC;
|
||||
@ -128,6 +132,7 @@ void parse_processor_info_win(const char* base_ptr,
|
||||
} else {
|
||||
proc_info = cpu_init_line;
|
||||
proc_info[CPU_MAP_PROCESSOR_ID] = list[0] + base_proc;
|
||||
proc_info[CPU_MAP_NUMA_NODE_ID] = _sockets;
|
||||
proc_info[CPU_MAP_SOCKET_ID] = _sockets;
|
||||
proc_info[CPU_MAP_CORE_ID] = _cores;
|
||||
if ((_processors > group_start) && (_processors <= group_end)) {
|
||||
@ -185,11 +190,17 @@ void parse_processor_info_win(const char* base_ptr,
|
||||
_proc_type_table[0] = proc_init_line;
|
||||
|
||||
for (int m = 1; m <= _sockets; m++) {
|
||||
for (int n = 0; n < PROC_TYPE_TABLE_SIZE; n++) {
|
||||
for (int n = 0; n <= HYPER_THREADING_PROC; n++) {
|
||||
_proc_type_table[0][n] += _proc_type_table[m][n];
|
||||
}
|
||||
_proc_type_table[m][PROC_SOCKET_ID] = m - 1;
|
||||
_proc_type_table[m][PROC_NUMA_NODE_ID] = m - 1;
|
||||
}
|
||||
} else {
|
||||
_proc_type_table[0][PROC_SOCKET_ID] = 0;
|
||||
_proc_type_table[0][PROC_NUMA_NODE_ID] = 0;
|
||||
}
|
||||
_numa_nodes = _sockets;
|
||||
}
|
||||
|
||||
int get_number_of_cpu_cores(bool bigCoresOnly) {
|
||||
|
@ -16,7 +16,7 @@
|
||||
#include "dev/threading/parallel_custom_arena.hpp"
|
||||
#include "ie_common.h"
|
||||
#include "openvino/core/visibility.hpp"
|
||||
#include "streams_executor.hpp"
|
||||
#include "os/cpu_map_info.hpp"
|
||||
#include "threading/ie_cpu_streams_info.hpp"
|
||||
|
||||
#ifdef __APPLE__
|
||||
@ -341,11 +341,11 @@ void set_cpu_used(const std::vector<int>& cpu_ids, const int used) {
|
||||
all_table.resize(PROC_TYPE_TABLE_SIZE, 0);
|
||||
for (int i = 0; i < cpu._processors; i++) {
|
||||
if (cpu._cpu_mapping_table[i][CPU_MAP_USED_FLAG] < PLUGIN_USED_START &&
|
||||
cpu._cpu_mapping_table[i][CPU_MAP_SOCKET_ID] >= 0 &&
|
||||
cpu._cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID] >= 0 &&
|
||||
cpu._cpu_mapping_table[i][CPU_MAP_CORE_TYPE] >= ALL_PROC) {
|
||||
cpu._proc_type_table[cpu._cpu_mapping_table[i][CPU_MAP_SOCKET_ID] + start]
|
||||
cpu._proc_type_table[cpu._cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID] + start]
|
||||
[cpu._cpu_mapping_table[i][CPU_MAP_CORE_TYPE]]++;
|
||||
cpu._proc_type_table[cpu._cpu_mapping_table[i][CPU_MAP_SOCKET_ID] + start][ALL_PROC]++;
|
||||
cpu._proc_type_table[cpu._cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID] + start][ALL_PROC]++;
|
||||
all_table[cpu._cpu_mapping_table[i][CPU_MAP_CORE_TYPE]]++;
|
||||
all_table[ALL_PROC]++;
|
||||
}
|
||||
|
1121
src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp
Normal file
1121
src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp
Normal file
File diff suppressed because it is too large
Load Diff
992
src/inference/tests/unit/cpu_map_parser/freq_parser_linux.cpp
Normal file
992
src/inference/tests/unit/cpu_map_parser/freq_parser_linux.cpp
Normal file
@ -0,0 +1,992 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <common_test_utils/test_common.hpp>
|
||||
|
||||
#include "ie_system_conf.h"
|
||||
#include "os/cpu_map_info.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ov;
|
||||
|
||||
namespace {
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
struct LinuxCpuMapTestCase {
|
||||
int _processors;
|
||||
int _numa_nodes;
|
||||
int _sockets;
|
||||
int _cores;
|
||||
std::vector<std::vector<int>> _proc_type_table;
|
||||
std::vector<std::vector<int>> _cpu_mapping_table;
|
||||
std::vector<std::vector<std::string>> system_info_table;
|
||||
std::vector<std::string> node_info_table;
|
||||
};
|
||||
|
||||
class LinuxCpuMapFreqParserTests : public CommonTestUtils::TestsCommon,
|
||||
public testing::WithParamInterface<std::tuple<LinuxCpuMapTestCase>> {
|
||||
public:
|
||||
void SetUp() override {
|
||||
const auto& test_data = std::get<0>(GetParam());
|
||||
|
||||
int test_processors = 0;
|
||||
int test_numa_nodes = 0;
|
||||
int test_sockets = 0;
|
||||
int test_cores = 0;
|
||||
std::vector<std::vector<int>> test_proc_type_table;
|
||||
std::vector<std::vector<int>> test_cpu_mapping_table;
|
||||
|
||||
ov::parse_freq_info_linux(test_data.system_info_table,
|
||||
test_data.node_info_table,
|
||||
test_processors,
|
||||
test_numa_nodes,
|
||||
test_sockets,
|
||||
test_cores,
|
||||
test_proc_type_table,
|
||||
test_cpu_mapping_table);
|
||||
|
||||
ASSERT_EQ(test_data._processors, test_processors);
|
||||
ASSERT_EQ(test_data._numa_nodes, test_numa_nodes);
|
||||
ASSERT_EQ(test_data._sockets, test_sockets);
|
||||
ASSERT_EQ(test_data._cores, test_cores);
|
||||
ASSERT_EQ(test_data._proc_type_table, test_proc_type_table);
|
||||
ASSERT_EQ(test_data._cpu_mapping_table, test_cpu_mapping_table);
|
||||
}
|
||||
};
|
||||
|
||||
LinuxCpuMapTestCase freq_2sockets_112cores_hyperthreading = {
|
||||
224, // param[expected out]: total 224 logcial processors on this simulated platform
|
||||
2, // param[expected out]: total 2 numa nodes on this simulated platform
|
||||
2, // param[expected out]: total 2 sockets on this simulated platform
|
||||
112, // param[expected out]: total 112 CPU cores on this simulated platform
|
||||
{{224, 112, 0, 112, -1, -1},
|
||||
{112, 56, 0, 56, 0, 0},
|
||||
{112, 56, 0, 56, 1, 1}}, // param[expected out]: The proc_type_table of this simulated platform
|
||||
{
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
|
||||
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1},
|
||||
{10, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {11, 0, 0, 11, HYPER_THREADING_PROC, 11, -1},
|
||||
{12, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {13, 0, 0, 13, HYPER_THREADING_PROC, 13, -1},
|
||||
{14, 0, 0, 14, HYPER_THREADING_PROC, 14, -1}, {15, 0, 0, 15, HYPER_THREADING_PROC, 15, -1},
|
||||
{16, 0, 0, 16, HYPER_THREADING_PROC, 16, -1}, {17, 0, 0, 17, HYPER_THREADING_PROC, 17, -1},
|
||||
{18, 0, 0, 18, HYPER_THREADING_PROC, 18, -1}, {19, 0, 0, 19, HYPER_THREADING_PROC, 19, -1},
|
||||
{20, 0, 0, 20, HYPER_THREADING_PROC, 20, -1}, {21, 0, 0, 21, HYPER_THREADING_PROC, 21, -1},
|
||||
{22, 0, 0, 22, HYPER_THREADING_PROC, 22, -1}, {23, 0, 0, 23, HYPER_THREADING_PROC, 23, -1},
|
||||
{24, 0, 0, 24, HYPER_THREADING_PROC, 24, -1}, {25, 0, 0, 25, HYPER_THREADING_PROC, 25, -1},
|
||||
{26, 0, 0, 26, HYPER_THREADING_PROC, 26, -1}, {27, 0, 0, 27, HYPER_THREADING_PROC, 27, -1},
|
||||
{28, 0, 0, 28, HYPER_THREADING_PROC, 28, -1}, {29, 0, 0, 29, HYPER_THREADING_PROC, 29, -1},
|
||||
{30, 0, 0, 30, HYPER_THREADING_PROC, 30, -1}, {31, 0, 0, 31, HYPER_THREADING_PROC, 31, -1},
|
||||
{32, 0, 0, 32, HYPER_THREADING_PROC, 32, -1}, {33, 0, 0, 33, HYPER_THREADING_PROC, 33, -1},
|
||||
{34, 0, 0, 34, HYPER_THREADING_PROC, 34, -1}, {35, 0, 0, 35, HYPER_THREADING_PROC, 35, -1},
|
||||
{36, 0, 0, 36, HYPER_THREADING_PROC, 36, -1}, {37, 0, 0, 37, HYPER_THREADING_PROC, 37, -1},
|
||||
{38, 0, 0, 38, HYPER_THREADING_PROC, 38, -1}, {39, 0, 0, 39, HYPER_THREADING_PROC, 39, -1},
|
||||
{40, 0, 0, 40, HYPER_THREADING_PROC, 40, -1}, {41, 0, 0, 41, HYPER_THREADING_PROC, 41, -1},
|
||||
{42, 0, 0, 42, HYPER_THREADING_PROC, 42, -1}, {43, 0, 0, 43, HYPER_THREADING_PROC, 43, -1},
|
||||
{44, 0, 0, 44, HYPER_THREADING_PROC, 44, -1}, {45, 0, 0, 45, HYPER_THREADING_PROC, 45, -1},
|
||||
{46, 0, 0, 46, HYPER_THREADING_PROC, 46, -1}, {47, 0, 0, 47, HYPER_THREADING_PROC, 47, -1},
|
||||
{48, 0, 0, 48, HYPER_THREADING_PROC, 48, -1}, {49, 0, 0, 49, HYPER_THREADING_PROC, 49, -1},
|
||||
{50, 0, 0, 50, HYPER_THREADING_PROC, 50, -1}, {51, 0, 0, 51, HYPER_THREADING_PROC, 51, -1},
|
||||
{52, 0, 0, 52, HYPER_THREADING_PROC, 52, -1}, {53, 0, 0, 53, HYPER_THREADING_PROC, 53, -1},
|
||||
{54, 0, 0, 54, HYPER_THREADING_PROC, 54, -1}, {55, 0, 0, 55, HYPER_THREADING_PROC, 55, -1},
|
||||
{56, 1, 1, 56, HYPER_THREADING_PROC, 56, -1}, {57, 1, 1, 57, HYPER_THREADING_PROC, 57, -1},
|
||||
{58, 1, 1, 58, HYPER_THREADING_PROC, 58, -1}, {59, 1, 1, 59, HYPER_THREADING_PROC, 59, -1},
|
||||
{60, 1, 1, 60, HYPER_THREADING_PROC, 60, -1}, {61, 1, 1, 61, HYPER_THREADING_PROC, 61, -1},
|
||||
{62, 1, 1, 62, HYPER_THREADING_PROC, 62, -1}, {63, 1, 1, 63, HYPER_THREADING_PROC, 63, -1},
|
||||
{64, 1, 1, 64, HYPER_THREADING_PROC, 64, -1}, {65, 1, 1, 65, HYPER_THREADING_PROC, 65, -1},
|
||||
{66, 1, 1, 66, HYPER_THREADING_PROC, 66, -1}, {67, 1, 1, 67, HYPER_THREADING_PROC, 67, -1},
|
||||
{68, 1, 1, 68, HYPER_THREADING_PROC, 68, -1}, {69, 1, 1, 69, HYPER_THREADING_PROC, 69, -1},
|
||||
{70, 1, 1, 70, HYPER_THREADING_PROC, 70, -1}, {71, 1, 1, 71, HYPER_THREADING_PROC, 71, -1},
|
||||
{72, 1, 1, 72, HYPER_THREADING_PROC, 72, -1}, {73, 1, 1, 73, HYPER_THREADING_PROC, 73, -1},
|
||||
{74, 1, 1, 74, HYPER_THREADING_PROC, 74, -1}, {75, 1, 1, 75, HYPER_THREADING_PROC, 75, -1},
|
||||
{76, 1, 1, 76, HYPER_THREADING_PROC, 76, -1}, {77, 1, 1, 77, HYPER_THREADING_PROC, 77, -1},
|
||||
{78, 1, 1, 78, HYPER_THREADING_PROC, 78, -1}, {79, 1, 1, 79, HYPER_THREADING_PROC, 79, -1},
|
||||
{80, 1, 1, 80, HYPER_THREADING_PROC, 80, -1}, {81, 1, 1, 81, HYPER_THREADING_PROC, 81, -1},
|
||||
{82, 1, 1, 82, HYPER_THREADING_PROC, 82, -1}, {83, 1, 1, 83, HYPER_THREADING_PROC, 83, -1},
|
||||
{84, 1, 1, 84, HYPER_THREADING_PROC, 84, -1}, {85, 1, 1, 85, HYPER_THREADING_PROC, 85, -1},
|
||||
{86, 1, 1, 86, HYPER_THREADING_PROC, 86, -1}, {87, 1, 1, 87, HYPER_THREADING_PROC, 87, -1},
|
||||
{88, 1, 1, 88, HYPER_THREADING_PROC, 88, -1}, {89, 1, 1, 89, HYPER_THREADING_PROC, 89, -1},
|
||||
{90, 1, 1, 90, HYPER_THREADING_PROC, 90, -1}, {91, 1, 1, 91, HYPER_THREADING_PROC, 91, -1},
|
||||
{92, 1, 1, 92, HYPER_THREADING_PROC, 92, -1}, {93, 1, 1, 93, HYPER_THREADING_PROC, 93, -1},
|
||||
{94, 1, 1, 94, HYPER_THREADING_PROC, 94, -1}, {95, 1, 1, 95, HYPER_THREADING_PROC, 95, -1},
|
||||
{96, 1, 1, 96, HYPER_THREADING_PROC, 96, -1}, {97, 1, 1, 97, HYPER_THREADING_PROC, 97, -1},
|
||||
{98, 1, 1, 98, HYPER_THREADING_PROC, 98, -1}, {99, 1, 1, 99, HYPER_THREADING_PROC, 99, -1},
|
||||
{100, 1, 1, 100, HYPER_THREADING_PROC, 100, -1}, {101, 1, 1, 101, HYPER_THREADING_PROC, 101, -1},
|
||||
{102, 1, 1, 102, HYPER_THREADING_PROC, 102, -1}, {103, 1, 1, 103, HYPER_THREADING_PROC, 103, -1},
|
||||
{104, 1, 1, 104, HYPER_THREADING_PROC, 104, -1}, {105, 1, 1, 105, HYPER_THREADING_PROC, 105, -1},
|
||||
{106, 1, 1, 106, HYPER_THREADING_PROC, 106, -1}, {107, 1, 1, 107, HYPER_THREADING_PROC, 107, -1},
|
||||
{108, 1, 1, 108, HYPER_THREADING_PROC, 108, -1}, {109, 1, 1, 109, HYPER_THREADING_PROC, 109, -1},
|
||||
{110, 1, 1, 110, HYPER_THREADING_PROC, 110, -1}, {111, 1, 1, 111, HYPER_THREADING_PROC, 111, -1},
|
||||
{112, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {113, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{114, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {115, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{116, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {117, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{118, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {119, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{120, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {121, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
|
||||
{122, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {123, 0, 0, 11, MAIN_CORE_PROC, 11, -1},
|
||||
{124, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, {125, 0, 0, 13, MAIN_CORE_PROC, 13, -1},
|
||||
{126, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, {127, 0, 0, 15, MAIN_CORE_PROC, 15, -1},
|
||||
{128, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, {129, 0, 0, 17, MAIN_CORE_PROC, 17, -1},
|
||||
{130, 0, 0, 18, MAIN_CORE_PROC, 18, -1}, {131, 0, 0, 19, MAIN_CORE_PROC, 19, -1},
|
||||
{132, 0, 0, 20, MAIN_CORE_PROC, 20, -1}, {133, 0, 0, 21, MAIN_CORE_PROC, 21, -1},
|
||||
{134, 0, 0, 22, MAIN_CORE_PROC, 22, -1}, {135, 0, 0, 23, MAIN_CORE_PROC, 23, -1},
|
||||
{136, 0, 0, 24, MAIN_CORE_PROC, 24, -1}, {137, 0, 0, 25, MAIN_CORE_PROC, 25, -1},
|
||||
{138, 0, 0, 26, MAIN_CORE_PROC, 26, -1}, {139, 0, 0, 27, MAIN_CORE_PROC, 27, -1},
|
||||
{140, 0, 0, 28, MAIN_CORE_PROC, 28, -1}, {141, 0, 0, 29, MAIN_CORE_PROC, 29, -1},
|
||||
{142, 0, 0, 30, MAIN_CORE_PROC, 30, -1}, {143, 0, 0, 31, MAIN_CORE_PROC, 31, -1},
|
||||
{144, 0, 0, 32, MAIN_CORE_PROC, 32, -1}, {145, 0, 0, 33, MAIN_CORE_PROC, 33, -1},
|
||||
{146, 0, 0, 34, MAIN_CORE_PROC, 34, -1}, {147, 0, 0, 35, MAIN_CORE_PROC, 35, -1},
|
||||
{148, 0, 0, 36, MAIN_CORE_PROC, 36, -1}, {149, 0, 0, 37, MAIN_CORE_PROC, 37, -1},
|
||||
{150, 0, 0, 38, MAIN_CORE_PROC, 38, -1}, {151, 0, 0, 39, MAIN_CORE_PROC, 39, -1},
|
||||
{152, 0, 0, 40, MAIN_CORE_PROC, 40, -1}, {153, 0, 0, 41, MAIN_CORE_PROC, 41, -1},
|
||||
{154, 0, 0, 42, MAIN_CORE_PROC, 42, -1}, {155, 0, 0, 43, MAIN_CORE_PROC, 43, -1},
|
||||
{156, 0, 0, 44, MAIN_CORE_PROC, 44, -1}, {157, 0, 0, 45, MAIN_CORE_PROC, 45, -1},
|
||||
{158, 0, 0, 46, MAIN_CORE_PROC, 46, -1}, {159, 0, 0, 47, MAIN_CORE_PROC, 47, -1},
|
||||
{160, 0, 0, 48, MAIN_CORE_PROC, 48, -1}, {161, 0, 0, 49, MAIN_CORE_PROC, 49, -1},
|
||||
{162, 0, 0, 50, MAIN_CORE_PROC, 50, -1}, {163, 0, 0, 51, MAIN_CORE_PROC, 51, -1},
|
||||
{164, 0, 0, 52, MAIN_CORE_PROC, 52, -1}, {165, 0, 0, 53, MAIN_CORE_PROC, 53, -1},
|
||||
{166, 0, 0, 54, MAIN_CORE_PROC, 54, -1}, {167, 0, 0, 55, MAIN_CORE_PROC, 55, -1},
|
||||
{168, 1, 1, 56, MAIN_CORE_PROC, 56, -1}, {169, 1, 1, 57, MAIN_CORE_PROC, 57, -1},
|
||||
{170, 1, 1, 58, MAIN_CORE_PROC, 58, -1}, {171, 1, 1, 59, MAIN_CORE_PROC, 59, -1},
|
||||
{172, 1, 1, 60, MAIN_CORE_PROC, 60, -1}, {173, 1, 1, 61, MAIN_CORE_PROC, 61, -1},
|
||||
{174, 1, 1, 62, MAIN_CORE_PROC, 62, -1}, {175, 1, 1, 63, MAIN_CORE_PROC, 63, -1},
|
||||
{176, 1, 1, 64, MAIN_CORE_PROC, 64, -1}, {177, 1, 1, 65, MAIN_CORE_PROC, 65, -1},
|
||||
{178, 1, 1, 66, MAIN_CORE_PROC, 66, -1}, {179, 1, 1, 67, MAIN_CORE_PROC, 67, -1},
|
||||
{180, 1, 1, 68, MAIN_CORE_PROC, 68, -1}, {181, 1, 1, 69, MAIN_CORE_PROC, 69, -1},
|
||||
{182, 1, 1, 70, MAIN_CORE_PROC, 70, -1}, {183, 1, 1, 71, MAIN_CORE_PROC, 71, -1},
|
||||
{184, 1, 1, 72, MAIN_CORE_PROC, 72, -1}, {185, 1, 1, 73, MAIN_CORE_PROC, 73, -1},
|
||||
{186, 1, 1, 74, MAIN_CORE_PROC, 74, -1}, {187, 1, 1, 75, MAIN_CORE_PROC, 75, -1},
|
||||
{188, 1, 1, 76, MAIN_CORE_PROC, 76, -1}, {189, 1, 1, 77, MAIN_CORE_PROC, 77, -1},
|
||||
{190, 1, 1, 78, MAIN_CORE_PROC, 78, -1}, {191, 1, 1, 79, MAIN_CORE_PROC, 79, -1},
|
||||
{192, 1, 1, 80, MAIN_CORE_PROC, 80, -1}, {193, 1, 1, 81, MAIN_CORE_PROC, 81, -1},
|
||||
{194, 1, 1, 82, MAIN_CORE_PROC, 82, -1}, {195, 1, 1, 83, MAIN_CORE_PROC, 83, -1},
|
||||
{196, 1, 1, 84, MAIN_CORE_PROC, 84, -1}, {197, 1, 1, 85, MAIN_CORE_PROC, 85, -1},
|
||||
{198, 1, 1, 86, MAIN_CORE_PROC, 86, -1}, {199, 1, 1, 87, MAIN_CORE_PROC, 87, -1},
|
||||
{200, 1, 1, 88, MAIN_CORE_PROC, 88, -1}, {201, 1, 1, 89, MAIN_CORE_PROC, 89, -1},
|
||||
{202, 1, 1, 90, MAIN_CORE_PROC, 90, -1}, {203, 1, 1, 91, MAIN_CORE_PROC, 91, -1},
|
||||
{204, 1, 1, 92, MAIN_CORE_PROC, 92, -1}, {205, 1, 1, 93, MAIN_CORE_PROC, 93, -1},
|
||||
{206, 1, 1, 94, MAIN_CORE_PROC, 94, -1}, {207, 1, 1, 95, MAIN_CORE_PROC, 95, -1},
|
||||
{208, 1, 1, 96, MAIN_CORE_PROC, 96, -1}, {209, 1, 1, 97, MAIN_CORE_PROC, 97, -1},
|
||||
{210, 1, 1, 98, MAIN_CORE_PROC, 98, -1}, {211, 1, 1, 99, MAIN_CORE_PROC, 99, -1},
|
||||
{212, 1, 1, 100, MAIN_CORE_PROC, 100, -1}, {213, 1, 1, 101, MAIN_CORE_PROC, 101, -1},
|
||||
{214, 1, 1, 102, MAIN_CORE_PROC, 102, -1}, {215, 1, 1, 103, MAIN_CORE_PROC, 103, -1},
|
||||
{216, 1, 1, 104, MAIN_CORE_PROC, 104, -1}, {217, 1, 1, 105, MAIN_CORE_PROC, 105, -1},
|
||||
{218, 1, 1, 106, MAIN_CORE_PROC, 106, -1}, {219, 1, 1, 107, MAIN_CORE_PROC, 107, -1},
|
||||
{220, 1, 1, 108, MAIN_CORE_PROC, 108, -1}, {221, 1, 1, 109, MAIN_CORE_PROC, 109, -1},
|
||||
{222, 1, 1, 110, MAIN_CORE_PROC, 110, -1}, {223, 1, 1, 111, MAIN_CORE_PROC, 111, -1},
|
||||
}, // param[expected out]: The cpu_mapping_table of this simulated platform
|
||||
{
|
||||
{"0,112", "0", "2001000"}, {"1,113", "0", "2001000"}, {"2,114", "0", "2001000"},
|
||||
{"3,115", "0", "2001000"}, {"4,116", "0", "2001000"}, {"5,117", "0", "2001000"},
|
||||
{"6,118", "0", "2001000"}, {"7,119", "0", "2001000"}, {"8,120", "0", "2001000"},
|
||||
{"9,121", "0", "2001000"}, {"10,122", "0", "2001000"}, {"11,123", "0", "2001000"},
|
||||
{"12,124", "0", "2001000"}, {"13,125", "0", "2001000"}, {"14,126", "0", "2001000"},
|
||||
{"15,127", "0", "2001000"}, {"16,128", "0", "2001000"}, {"17,129", "0", "2001000"},
|
||||
{"18,130", "0", "2001000"}, {"19,131", "0", "2001000"}, {"20,132", "0", "2001000"},
|
||||
{"21,133", "0", "2001000"}, {"22,134", "0", "2001000"}, {"23,135", "0", "2001000"},
|
||||
{"24,136", "0", "2001000"}, {"25,137", "0", "2001000"}, {"26,138", "0", "2001000"},
|
||||
{"27,139", "0", "2001000"}, {"28,140", "0", "2001000"}, {"29,141", "0", "2001000"},
|
||||
{"30,142", "0", "2001000"}, {"31,143", "0", "2001000"}, {"32,144", "0", "2001000"},
|
||||
{"33,145", "0", "2001000"}, {"34,146", "0", "2001000"}, {"35,147", "0", "2001000"},
|
||||
{"36,148", "0", "2001000"}, {"37,149", "0", "2001000"}, {"38,150", "0", "2001000"},
|
||||
{"39,151", "0", "2001000"}, {"40,152", "0", "2001000"}, {"41,153", "0", "2001000"},
|
||||
{"42,154", "0", "2001000"}, {"43,155", "0", "2001000"}, {"44,156", "0", "2001000"},
|
||||
{"45,157", "0", "2001000"}, {"46,158", "0", "2001000"}, {"47,159", "0", "2001000"},
|
||||
{"48,160", "0", "2001000"}, {"49,161", "0", "2001000"}, {"50,162", "0", "2001000"},
|
||||
{"51,163", "0", "2001000"}, {"52,164", "0", "2001000"}, {"53,165", "0", "2001000"},
|
||||
{"54,166", "0", "2001000"}, {"55,167", "0", "2001000"}, {"56,168", "1", "2001000"},
|
||||
{"57,169", "1", "2001000"}, {"58,170", "1", "2001000"}, {"59,171", "1", "2001000"},
|
||||
{"60,172", "1", "2001000"}, {"61,173", "1", "2001000"}, {"62,174", "1", "2001000"},
|
||||
{"63,175", "1", "2001000"}, {"64,176", "1", "2001000"}, {"65,177", "1", "2001000"},
|
||||
{"66,178", "1", "2001000"}, {"67,179", "1", "2001000"}, {"68,180", "1", "2001000"},
|
||||
{"69,181", "1", "2001000"}, {"70,182", "1", "2001000"}, {"71,183", "1", "2001000"},
|
||||
{"72,184", "1", "2001000"}, {"73,185", "1", "2001000"}, {"74,186", "1", "2001000"},
|
||||
{"75,187", "1", "2001000"}, {"76,188", "1", "2001000"}, {"77,189", "1", "2001000"},
|
||||
{"78,190", "1", "2001000"}, {"79,191", "1", "2001000"}, {"80,192", "1", "2001000"},
|
||||
{"81,193", "1", "2001000"}, {"82,194", "1", "2001000"}, {"83,195", "1", "2001000"},
|
||||
{"84,196", "1", "2001000"}, {"85,197", "1", "2001000"}, {"86,198", "1", "2001000"},
|
||||
{"87,199", "1", "2001000"}, {"88,200", "1", "2001000"}, {"89,201", "1", "2001000"},
|
||||
{"90,202", "1", "2001000"}, {"91,203", "1", "2001000"}, {"92,204", "1", "2001000"},
|
||||
{"93,205", "1", "2001000"}, {"94,206", "1", "2001000"}, {"95,207", "1", "2001000"},
|
||||
{"96,208", "1", "2001000"}, {"97,209", "1", "2001000"}, {"98,210", "1", "2001000"},
|
||||
{"99,211", "1", "2001000"}, {"100,212", "1", "2001000"}, {"101,213", "1", "2001000"},
|
||||
{"102,214", "1", "2001000"}, {"103,215", "1", "2001000"}, {"104,216", "1", "2001000"},
|
||||
{"105,217", "1", "2001000"}, {"106,218", "1", "2001000"}, {"107,219", "1", "2001000"},
|
||||
{"108,220", "1", "2001000"}, {"109,221", "1", "2001000"}, {"110,222", "1", "2001000"},
|
||||
{"111,223", "1", "2001000"}, {"0,112", "0", "2001000"}, {"1,113", "0", "2001000"},
|
||||
{"2,114", "0", "2001000"}, {"3,115", "0", "2001000"}, {"4,116", "0", "2001000"},
|
||||
{"5,117", "0", "2001000"}, {"6,118", "0", "2001000"}, {"7,119", "0", "2001000"},
|
||||
{"8,120", "0", "2001000"}, {"9,121", "0", "2001000"}, {"10,122", "0", "2001000"},
|
||||
{"11,123", "0", "2001000"}, {"12,124", "0", "2001000"}, {"13,125", "0", "2001000"},
|
||||
{"14,126", "0", "2001000"}, {"15,127", "0", "2001000"}, {"16,128", "0", "2001000"},
|
||||
{"17,129", "0", "2001000"}, {"18,130", "0", "2001000"}, {"19,131", "0", "2001000"},
|
||||
{"20,132", "0", "2001000"}, {"21,133", "0", "2001000"}, {"22,134", "0", "2001000"},
|
||||
{"23,135", "0", "2001000"}, {"24,136", "0", "2001000"}, {"25,137", "0", "2001000"},
|
||||
{"26,138", "0", "2001000"}, {"27,139", "0", "2001000"}, {"28,140", "0", "2001000"},
|
||||
{"29,141", "0", "2001000"}, {"30,142", "0", "2001000"}, {"31,143", "0", "2001000"},
|
||||
{"32,144", "0", "2001000"}, {"33,145", "0", "2001000"}, {"34,146", "0", "2001000"},
|
||||
{"35,147", "0", "2001000"}, {"36,148", "0", "2001000"}, {"37,149", "0", "2001000"},
|
||||
{"38,150", "0", "2001000"}, {"39,151", "0", "2001000"}, {"40,152", "0", "2001000"},
|
||||
{"41,153", "0", "2001000"}, {"42,154", "0", "2001000"}, {"43,155", "0", "2001000"},
|
||||
{"44,156", "0", "2001000"}, {"45,157", "0", "2001000"}, {"46,158", "0", "2001000"},
|
||||
{"47,159", "0", "2001000"}, {"48,160", "0", "2001000"}, {"49,161", "0", "2001000"},
|
||||
{"50,162", "0", "2001000"}, {"51,163", "0", "2001000"}, {"52,164", "0", "2001000"},
|
||||
{"53,165", "0", "2001000"}, {"54,166", "0", "2001000"}, {"55,167", "0", "2001000"},
|
||||
{"56,168", "1", "2001000"}, {"57,169", "1", "2001000"}, {"58,170", "1", "2001000"},
|
||||
{"59,171", "1", "2001000"}, {"60,172", "1", "2001000"}, {"61,173", "1", "2001000"},
|
||||
{"62,174", "1", "2001000"}, {"63,175", "1", "2001000"}, {"64,176", "1", "2001000"},
|
||||
{"65,177", "1", "2001000"}, {"66,178", "1", "2001000"}, {"67,179", "1", "2001000"},
|
||||
{"68,180", "1", "2001000"}, {"69,181", "1", "2001000"}, {"70,182", "1", "2001000"},
|
||||
{"71,183", "1", "2001000"}, {"72,184", "1", "2001000"}, {"73,185", "1", "2001000"},
|
||||
{"74,186", "1", "2001000"}, {"75,187", "1", "2001000"}, {"76,188", "1", "2001000"},
|
||||
{"77,189", "1", "2001000"}, {"78,190", "1", "2001000"}, {"79,191", "1", "2001000"},
|
||||
{"80,192", "1", "2001000"}, {"81,193", "1", "2001000"}, {"82,194", "1", "2001000"},
|
||||
{"83,195", "1", "2001000"}, {"84,196", "1", "2001000"}, {"85,197", "1", "2001000"},
|
||||
{"86,198", "1", "2001000"}, {"87,199", "1", "2001000"}, {"88,200", "1", "2001000"},
|
||||
{"89,201", "1", "2001000"}, {"90,202", "1", "2001000"}, {"91,203", "1", "2001000"},
|
||||
{"92,204", "1", "2001000"}, {"93,205", "1", "2001000"}, {"94,206", "1", "2001000"},
|
||||
{"95,207", "1", "2001000"}, {"96,208", "1", "2001000"}, {"97,209", "1", "2001000"},
|
||||
{"98,210", "1", "2001000"}, {"99,211", "1", "2001000"}, {"100,212", "1", "2001000"},
|
||||
{"101,213", "1", "2001000"}, {"102,214", "1", "2001000"}, {"103,215", "1", "2001000"},
|
||||
{"104,216", "1", "2001000"}, {"105,217", "1", "2001000"}, {"106,218", "1", "2001000"},
|
||||
{"107,219", "1", "2001000"}, {"108,220", "1", "2001000"}, {"109,221", "1", "2001000"},
|
||||
{"110,222", "1", "2001000"}, {"111,223", "1", "2001000"},
|
||||
}, // param[in]: The CPU frequency information table of this simulated platform
|
||||
{{"0-55,112-167"}, {"56-111,168-223"}}, // param[in]: The numa node information table of this simulated platform
|
||||
};
|
||||
LinuxCpuMapTestCase freq_2sockets_48cores_hyperthreading = {
|
||||
96,
|
||||
2,
|
||||
2,
|
||||
48,
|
||||
{{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}},
|
||||
{
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
|
||||
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1},
|
||||
{10, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {11, 0, 0, 11, HYPER_THREADING_PROC, 11, -1},
|
||||
{12, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {13, 0, 0, 13, HYPER_THREADING_PROC, 13, -1},
|
||||
{14, 0, 0, 14, HYPER_THREADING_PROC, 14, -1}, {15, 0, 0, 15, HYPER_THREADING_PROC, 15, -1},
|
||||
{16, 0, 0, 16, HYPER_THREADING_PROC, 16, -1}, {17, 0, 0, 17, HYPER_THREADING_PROC, 17, -1},
|
||||
{18, 0, 0, 18, HYPER_THREADING_PROC, 18, -1}, {19, 0, 0, 19, HYPER_THREADING_PROC, 19, -1},
|
||||
{20, 0, 0, 20, HYPER_THREADING_PROC, 20, -1}, {21, 0, 0, 21, HYPER_THREADING_PROC, 21, -1},
|
||||
{22, 0, 0, 22, HYPER_THREADING_PROC, 22, -1}, {23, 0, 0, 23, HYPER_THREADING_PROC, 23, -1},
|
||||
{24, 1, 1, 24, HYPER_THREADING_PROC, 24, -1}, {25, 1, 1, 25, HYPER_THREADING_PROC, 25, -1},
|
||||
{26, 1, 1, 26, HYPER_THREADING_PROC, 26, -1}, {27, 1, 1, 27, HYPER_THREADING_PROC, 27, -1},
|
||||
{28, 1, 1, 28, HYPER_THREADING_PROC, 28, -1}, {29, 1, 1, 29, HYPER_THREADING_PROC, 29, -1},
|
||||
{30, 1, 1, 30, HYPER_THREADING_PROC, 30, -1}, {31, 1, 1, 31, HYPER_THREADING_PROC, 31, -1},
|
||||
{32, 1, 1, 32, HYPER_THREADING_PROC, 32, -1}, {33, 1, 1, 33, HYPER_THREADING_PROC, 33, -1},
|
||||
{34, 1, 1, 34, HYPER_THREADING_PROC, 34, -1}, {35, 1, 1, 35, HYPER_THREADING_PROC, 35, -1},
|
||||
{36, 1, 1, 36, HYPER_THREADING_PROC, 36, -1}, {37, 1, 1, 37, HYPER_THREADING_PROC, 37, -1},
|
||||
{38, 1, 1, 38, HYPER_THREADING_PROC, 38, -1}, {39, 1, 1, 39, HYPER_THREADING_PROC, 39, -1},
|
||||
{40, 1, 1, 40, HYPER_THREADING_PROC, 40, -1}, {41, 1, 1, 41, HYPER_THREADING_PROC, 41, -1},
|
||||
{42, 1, 1, 42, HYPER_THREADING_PROC, 42, -1}, {43, 1, 1, 43, HYPER_THREADING_PROC, 43, -1},
|
||||
{44, 1, 1, 44, HYPER_THREADING_PROC, 44, -1}, {45, 1, 1, 45, HYPER_THREADING_PROC, 45, -1},
|
||||
{46, 1, 1, 46, HYPER_THREADING_PROC, 46, -1}, {47, 1, 1, 47, HYPER_THREADING_PROC, 47, -1},
|
||||
{48, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {49, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{50, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {51, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{52, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {53, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{54, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {55, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{56, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {57, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
|
||||
{58, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {59, 0, 0, 11, MAIN_CORE_PROC, 11, -1},
|
||||
{60, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, {61, 0, 0, 13, MAIN_CORE_PROC, 13, -1},
|
||||
{62, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, {63, 0, 0, 15, MAIN_CORE_PROC, 15, -1},
|
||||
{64, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, {65, 0, 0, 17, MAIN_CORE_PROC, 17, -1},
|
||||
{66, 0, 0, 18, MAIN_CORE_PROC, 18, -1}, {67, 0, 0, 19, MAIN_CORE_PROC, 19, -1},
|
||||
{68, 0, 0, 20, MAIN_CORE_PROC, 20, -1}, {69, 0, 0, 21, MAIN_CORE_PROC, 21, -1},
|
||||
{70, 0, 0, 22, MAIN_CORE_PROC, 22, -1}, {71, 0, 0, 23, MAIN_CORE_PROC, 23, -1},
|
||||
{72, 1, 1, 24, MAIN_CORE_PROC, 24, -1}, {73, 1, 1, 25, MAIN_CORE_PROC, 25, -1},
|
||||
{74, 1, 1, 26, MAIN_CORE_PROC, 26, -1}, {75, 1, 1, 27, MAIN_CORE_PROC, 27, -1},
|
||||
{76, 1, 1, 28, MAIN_CORE_PROC, 28, -1}, {77, 1, 1, 29, MAIN_CORE_PROC, 29, -1},
|
||||
{78, 1, 1, 30, MAIN_CORE_PROC, 30, -1}, {79, 1, 1, 31, MAIN_CORE_PROC, 31, -1},
|
||||
{80, 1, 1, 32, MAIN_CORE_PROC, 32, -1}, {81, 1, 1, 33, MAIN_CORE_PROC, 33, -1},
|
||||
{82, 1, 1, 34, MAIN_CORE_PROC, 34, -1}, {83, 1, 1, 35, MAIN_CORE_PROC, 35, -1},
|
||||
{84, 1, 1, 36, MAIN_CORE_PROC, 36, -1}, {85, 1, 1, 37, MAIN_CORE_PROC, 37, -1},
|
||||
{86, 1, 1, 38, MAIN_CORE_PROC, 38, -1}, {87, 1, 1, 39, MAIN_CORE_PROC, 39, -1},
|
||||
{88, 1, 1, 40, MAIN_CORE_PROC, 40, -1}, {89, 1, 1, 41, MAIN_CORE_PROC, 41, -1},
|
||||
{90, 1, 1, 42, MAIN_CORE_PROC, 42, -1}, {91, 1, 1, 43, MAIN_CORE_PROC, 43, -1},
|
||||
{92, 1, 1, 44, MAIN_CORE_PROC, 44, -1}, {93, 1, 1, 45, MAIN_CORE_PROC, 45, -1},
|
||||
{94, 1, 1, 46, MAIN_CORE_PROC, 46, -1}, {95, 1, 1, 47, MAIN_CORE_PROC, 47, -1},
|
||||
},
|
||||
{
|
||||
{"0,48", "0", "3600000"}, {"1,49", "0", "3600000"}, {"2,50", "0", "3600000"}, {"3,51", "0", "3600000"},
|
||||
{"4,52", "0", "3600000"}, {"5,53", "0", "3600000"}, {"6,54", "0", "3600000"}, {"7,55", "0", "3600000"},
|
||||
{"8,56", "0", "3600000"}, {"9,57", "0", "3600000"}, {"10,58", "0", "3600000"}, {"11,59", "0", "3600000"},
|
||||
{"12,60", "0", "3600000"}, {"13,61", "0", "3600000"}, {"14,62", "0", "3600000"}, {"15,63", "0", "3600000"},
|
||||
{"16,64", "0", "3600000"}, {"17,65", "0", "3600000"}, {"18,66", "0", "3600000"}, {"19,67", "0", "3600000"},
|
||||
{"20,68", "0", "3600000"}, {"21,69", "0", "3600000"}, {"22,70", "0", "3600000"}, {"23,71", "0", "3600000"},
|
||||
{"24,72", "1", "3600000"}, {"25,73", "1", "3600000"}, {"26,74", "1", "3600000"}, {"27,75", "1", "3600000"},
|
||||
{"28,76", "1", "3600000"}, {"29,77", "1", "3600000"}, {"30,78", "1", "3600000"}, {"31,79", "1", "3600000"},
|
||||
{"32,80", "1", "3600000"}, {"33,81", "1", "3600000"}, {"34,82", "1", "3600000"}, {"35,83", "1", "3600000"},
|
||||
{"36,84", "1", "3600000"}, {"37,85", "1", "3600000"}, {"38,86", "1", "3600000"}, {"39,87", "1", "3600000"},
|
||||
{"40,88", "1", "3600000"}, {"41,89", "1", "3600000"}, {"42,90", "1", "3600000"}, {"43,91", "1", "3600000"},
|
||||
{"44,92", "1", "3600000"}, {"45,93", "1", "3600000"}, {"46,94", "1", "3600000"}, {"47,95", "1", "3600000"},
|
||||
{"0,48", "0", "3600000"}, {"1,49", "0", "3600000"}, {"2,50", "0", "3600000"}, {"3,51", "0", "3600000"},
|
||||
{"4,52", "0", "3600000"}, {"5,53", "0", "3600000"}, {"6,54", "0", "3600000"}, {"7,55", "0", "3600000"},
|
||||
{"8,56", "0", "3600000"}, {"9,57", "0", "3600000"}, {"10,58", "0", "3600000"}, {"11,59", "0", "3600000"},
|
||||
{"12,60", "0", "3600000"}, {"13,61", "0", "3600000"}, {"14,62", "0", "3600000"}, {"15,63", "0", "3600000"},
|
||||
{"16,64", "0", "3600000"}, {"17,65", "0", "3600000"}, {"18,66", "0", "3600000"}, {"19,67", "0", "3600000"},
|
||||
{"20,68", "0", "3600000"}, {"21,69", "0", "3600000"}, {"22,70", "0", "3600000"}, {"23,71", "0", "3600000"},
|
||||
{"24,72", "1", "3600000"}, {"25,73", "1", "3600000"}, {"26,74", "1", "3600000"}, {"27,75", "1", "3600000"},
|
||||
{"28,76", "1", "3600000"}, {"29,77", "1", "3600000"}, {"30,78", "1", "3600000"}, {"31,79", "1", "3600000"},
|
||||
{"32,80", "1", "3600000"}, {"33,81", "1", "3600000"}, {"34,82", "1", "3600000"}, {"35,83", "1", "3600000"},
|
||||
{"36,84", "1", "3600000"}, {"37,85", "1", "3600000"}, {"38,86", "1", "3600000"}, {"39,87", "1", "3600000"},
|
||||
{"40,88", "1", "3600000"}, {"41,89", "1", "3600000"}, {"42,90", "1", "3600000"}, {"43,91", "1", "3600000"},
|
||||
{"44,92", "1", "3600000"}, {"45,93", "1", "3600000"}, {"46,94", "1", "3600000"}, {"47,95", "1", "3600000"},
|
||||
},
|
||||
{
|
||||
{"0-23,48-71"},
|
||||
{"24-47,72-95"},
|
||||
},
|
||||
|
||||
};
|
||||
LinuxCpuMapTestCase freq_2sockets_48cores_hyperthreading_1 = {
|
||||
96,
|
||||
4,
|
||||
2,
|
||||
48,
|
||||
{{96, 48, 0, 48, -1, -1},
|
||||
{24, 12, 0, 12, 0, 0},
|
||||
{24, 12, 0, 12, 1, 0},
|
||||
{24, 12, 0, 12, 2, 1},
|
||||
{24, 12, 0, 12, 3, 1}},
|
||||
{
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
|
||||
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1},
|
||||
{10, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {11, 0, 0, 11, HYPER_THREADING_PROC, 11, -1},
|
||||
{12, 1, 0, 12, HYPER_THREADING_PROC, 12, -1}, {13, 1, 0, 13, HYPER_THREADING_PROC, 13, -1},
|
||||
{14, 1, 0, 14, HYPER_THREADING_PROC, 14, -1}, {15, 1, 0, 15, HYPER_THREADING_PROC, 15, -1},
|
||||
{16, 1, 0, 16, HYPER_THREADING_PROC, 16, -1}, {17, 1, 0, 17, HYPER_THREADING_PROC, 17, -1},
|
||||
{18, 1, 0, 18, HYPER_THREADING_PROC, 18, -1}, {19, 1, 0, 19, HYPER_THREADING_PROC, 19, -1},
|
||||
{20, 1, 0, 20, HYPER_THREADING_PROC, 20, -1}, {21, 1, 0, 21, HYPER_THREADING_PROC, 21, -1},
|
||||
{22, 1, 0, 22, HYPER_THREADING_PROC, 22, -1}, {23, 1, 0, 23, HYPER_THREADING_PROC, 23, -1},
|
||||
{24, 2, 1, 24, HYPER_THREADING_PROC, 24, -1}, {25, 2, 1, 25, HYPER_THREADING_PROC, 25, -1},
|
||||
{26, 2, 1, 26, HYPER_THREADING_PROC, 26, -1}, {27, 2, 1, 27, HYPER_THREADING_PROC, 27, -1},
|
||||
{28, 2, 1, 28, HYPER_THREADING_PROC, 28, -1}, {29, 2, 1, 29, HYPER_THREADING_PROC, 29, -1},
|
||||
{30, 2, 1, 30, HYPER_THREADING_PROC, 30, -1}, {31, 2, 1, 31, HYPER_THREADING_PROC, 31, -1},
|
||||
{32, 2, 1, 32, HYPER_THREADING_PROC, 32, -1}, {33, 2, 1, 33, HYPER_THREADING_PROC, 33, -1},
|
||||
{34, 2, 1, 34, HYPER_THREADING_PROC, 34, -1}, {35, 2, 1, 35, HYPER_THREADING_PROC, 35, -1},
|
||||
{36, 3, 1, 36, HYPER_THREADING_PROC, 36, -1}, {37, 3, 1, 37, HYPER_THREADING_PROC, 37, -1},
|
||||
{38, 3, 1, 38, HYPER_THREADING_PROC, 38, -1}, {39, 3, 1, 39, HYPER_THREADING_PROC, 39, -1},
|
||||
{40, 3, 1, 40, HYPER_THREADING_PROC, 40, -1}, {41, 3, 1, 41, HYPER_THREADING_PROC, 41, -1},
|
||||
{42, 3, 1, 42, HYPER_THREADING_PROC, 42, -1}, {43, 3, 1, 43, HYPER_THREADING_PROC, 43, -1},
|
||||
{44, 3, 1, 44, HYPER_THREADING_PROC, 44, -1}, {45, 3, 1, 45, HYPER_THREADING_PROC, 45, -1},
|
||||
{46, 3, 1, 46, HYPER_THREADING_PROC, 46, -1}, {47, 3, 1, 47, HYPER_THREADING_PROC, 47, -1},
|
||||
{48, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {49, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{50, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {51, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{52, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {53, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{54, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {55, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{56, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {57, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
|
||||
{58, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {59, 0, 0, 11, MAIN_CORE_PROC, 11, -1},
|
||||
{60, 1, 0, 12, MAIN_CORE_PROC, 12, -1}, {61, 1, 0, 13, MAIN_CORE_PROC, 13, -1},
|
||||
{62, 1, 0, 14, MAIN_CORE_PROC, 14, -1}, {63, 1, 0, 15, MAIN_CORE_PROC, 15, -1},
|
||||
{64, 1, 0, 16, MAIN_CORE_PROC, 16, -1}, {65, 1, 0, 17, MAIN_CORE_PROC, 17, -1},
|
||||
{66, 1, 0, 18, MAIN_CORE_PROC, 18, -1}, {67, 1, 0, 19, MAIN_CORE_PROC, 19, -1},
|
||||
{68, 1, 0, 20, MAIN_CORE_PROC, 20, -1}, {69, 1, 0, 21, MAIN_CORE_PROC, 21, -1},
|
||||
{70, 1, 0, 22, MAIN_CORE_PROC, 22, -1}, {71, 1, 0, 23, MAIN_CORE_PROC, 23, -1},
|
||||
{72, 2, 1, 24, MAIN_CORE_PROC, 24, -1}, {73, 2, 1, 25, MAIN_CORE_PROC, 25, -1},
|
||||
{74, 2, 1, 26, MAIN_CORE_PROC, 26, -1}, {75, 2, 1, 27, MAIN_CORE_PROC, 27, -1},
|
||||
{76, 2, 1, 28, MAIN_CORE_PROC, 28, -1}, {77, 2, 1, 29, MAIN_CORE_PROC, 29, -1},
|
||||
{78, 2, 1, 30, MAIN_CORE_PROC, 30, -1}, {79, 2, 1, 31, MAIN_CORE_PROC, 31, -1},
|
||||
{80, 2, 1, 32, MAIN_CORE_PROC, 32, -1}, {81, 2, 1, 33, MAIN_CORE_PROC, 33, -1},
|
||||
{82, 2, 1, 34, MAIN_CORE_PROC, 34, -1}, {83, 2, 1, 35, MAIN_CORE_PROC, 35, -1},
|
||||
{84, 3, 1, 36, MAIN_CORE_PROC, 36, -1}, {85, 3, 1, 37, MAIN_CORE_PROC, 37, -1},
|
||||
{86, 3, 1, 38, MAIN_CORE_PROC, 38, -1}, {87, 3, 1, 39, MAIN_CORE_PROC, 39, -1},
|
||||
{88, 3, 1, 40, MAIN_CORE_PROC, 40, -1}, {89, 3, 1, 41, MAIN_CORE_PROC, 41, -1},
|
||||
{90, 3, 1, 42, MAIN_CORE_PROC, 42, -1}, {91, 3, 1, 43, MAIN_CORE_PROC, 43, -1},
|
||||
{92, 3, 1, 44, MAIN_CORE_PROC, 44, -1}, {93, 3, 1, 45, MAIN_CORE_PROC, 45, -1},
|
||||
{94, 3, 1, 46, MAIN_CORE_PROC, 46, -1}, {95, 3, 1, 47, MAIN_CORE_PROC, 47, -1},
|
||||
},
|
||||
{
|
||||
{"0,48", "0", "3600000"}, {"1,49", "0", "3600000"}, {"2,50", "0", "3600000"}, {"3,51", "0", "3600000"},
|
||||
{"4,52", "0", "3600000"}, {"5,53", "0", "3600000"}, {"6,54", "0", "3600000"}, {"7,55", "0", "3600000"},
|
||||
{"8,56", "0", "3600000"}, {"9,57", "0", "3600000"}, {"10,58", "0", "3600000"}, {"11,59", "0", "3600000"},
|
||||
{"12,60", "0", "3600000"}, {"13,61", "0", "3600000"}, {"14,62", "0", "3600000"}, {"15,63", "0", "3600000"},
|
||||
{"16,64", "0", "3600000"}, {"17,65", "0", "3600000"}, {"18,66", "0", "3600000"}, {"19,67", "0", "3600000"},
|
||||
{"20,68", "0", "3600000"}, {"21,69", "0", "3600000"}, {"22,70", "0", "3600000"}, {"23,71", "0", "3600000"},
|
||||
{"24,72", "1", "3600000"}, {"25,73", "1", "3600000"}, {"26,74", "1", "3600000"}, {"27,75", "1", "3600000"},
|
||||
{"28,76", "1", "3600000"}, {"29,77", "1", "3600000"}, {"30,78", "1", "3600000"}, {"31,79", "1", "3600000"},
|
||||
{"32,80", "1", "3600000"}, {"33,81", "1", "3600000"}, {"34,82", "1", "3600000"}, {"35,83", "1", "3600000"},
|
||||
{"36,84", "1", "3600000"}, {"37,85", "1", "3600000"}, {"38,86", "1", "3600000"}, {"39,87", "1", "3600000"},
|
||||
{"40,88", "1", "3600000"}, {"41,89", "1", "3600000"}, {"42,90", "1", "3600000"}, {"43,91", "1", "3600000"},
|
||||
{"44,92", "1", "3600000"}, {"45,93", "1", "3600000"}, {"46,94", "1", "3600000"}, {"47,95", "1", "3600000"},
|
||||
{"0,48", "0", "3600000"}, {"1,49", "0", "3600000"}, {"2,50", "0", "3600000"}, {"3,51", "0", "3600000"},
|
||||
{"4,52", "0", "3600000"}, {"5,53", "0", "3600000"}, {"6,54", "0", "3600000"}, {"7,55", "0", "3600000"},
|
||||
{"8,56", "0", "3600000"}, {"9,57", "0", "3600000"}, {"10,58", "0", "3600000"}, {"11,59", "0", "3600000"},
|
||||
{"12,60", "0", "3600000"}, {"13,61", "0", "3600000"}, {"14,62", "0", "3600000"}, {"15,63", "0", "3600000"},
|
||||
{"16,64", "0", "3600000"}, {"17,65", "0", "3600000"}, {"18,66", "0", "3600000"}, {"19,67", "0", "3600000"},
|
||||
{"20,68", "0", "3600000"}, {"21,69", "0", "3600000"}, {"22,70", "0", "3600000"}, {"23,71", "0", "3600000"},
|
||||
{"24,72", "1", "3600000"}, {"25,73", "1", "3600000"}, {"26,74", "1", "3600000"}, {"27,75", "1", "3600000"},
|
||||
{"28,76", "1", "3600000"}, {"29,77", "1", "3600000"}, {"30,78", "1", "3600000"}, {"31,79", "1", "3600000"},
|
||||
{"32,80", "1", "3600000"}, {"33,81", "1", "3600000"}, {"34,82", "1", "3600000"}, {"35,83", "1", "3600000"},
|
||||
{"36,84", "1", "3600000"}, {"37,85", "1", "3600000"}, {"38,86", "1", "3600000"}, {"39,87", "1", "3600000"},
|
||||
{"40,88", "1", "3600000"}, {"41,89", "1", "3600000"}, {"42,90", "1", "3600000"}, {"43,91", "1", "3600000"},
|
||||
{"44,92", "1", "3600000"}, {"45,93", "1", "3600000"}, {"46,94", "1", "3600000"}, {"47,95", "1", "3600000"},
|
||||
},
|
||||
{
|
||||
{"0-11,48-59"},
|
||||
{"12-23,60-71"},
|
||||
{"24-35,72-83"},
|
||||
{"36-47,84-95"},
|
||||
},
|
||||
};
|
||||
LinuxCpuMapTestCase freq_2sockets_24cores_hyperthreading = {
|
||||
48,
|
||||
2,
|
||||
2,
|
||||
24,
|
||||
{{48, 24, 0, 24, -1, -1}, {24, 12, 0, 12, 0, 0}, {24, 12, 0, 12, 1, 1}},
|
||||
{
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 1, 1, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 1, 1, 3, HYPER_THREADING_PROC, 3, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 1, 1, 5, HYPER_THREADING_PROC, 5, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 1, 1, 7, HYPER_THREADING_PROC, 7, -1},
|
||||
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 1, 1, 9, HYPER_THREADING_PROC, 9, -1},
|
||||
{10, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {11, 1, 1, 11, HYPER_THREADING_PROC, 11, -1},
|
||||
{12, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {13, 1, 1, 13, HYPER_THREADING_PROC, 13, -1},
|
||||
{14, 0, 0, 14, HYPER_THREADING_PROC, 14, -1}, {15, 1, 1, 15, HYPER_THREADING_PROC, 15, -1},
|
||||
{16, 0, 0, 16, HYPER_THREADING_PROC, 16, -1}, {17, 1, 1, 17, HYPER_THREADING_PROC, 17, -1},
|
||||
{18, 0, 0, 18, HYPER_THREADING_PROC, 18, -1}, {19, 1, 1, 19, HYPER_THREADING_PROC, 19, -1},
|
||||
{20, 0, 0, 20, HYPER_THREADING_PROC, 20, -1}, {21, 1, 1, 21, HYPER_THREADING_PROC, 21, -1},
|
||||
{22, 0, 0, 22, HYPER_THREADING_PROC, 22, -1}, {23, 1, 1, 23, HYPER_THREADING_PROC, 23, -1},
|
||||
{24, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {25, 1, 1, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{26, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {27, 1, 1, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{28, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {29, 1, 1, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{30, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {31, 1, 1, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{32, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {33, 1, 1, 9, MAIN_CORE_PROC, 9, -1},
|
||||
{34, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {35, 1, 1, 11, MAIN_CORE_PROC, 11, -1},
|
||||
{36, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, {37, 1, 1, 13, MAIN_CORE_PROC, 13, -1},
|
||||
{38, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, {39, 1, 1, 15, MAIN_CORE_PROC, 15, -1},
|
||||
{40, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, {41, 1, 1, 17, MAIN_CORE_PROC, 17, -1},
|
||||
{42, 0, 0, 18, MAIN_CORE_PROC, 18, -1}, {43, 1, 1, 19, MAIN_CORE_PROC, 19, -1},
|
||||
{44, 0, 0, 20, MAIN_CORE_PROC, 20, -1}, {45, 1, 1, 21, MAIN_CORE_PROC, 21, -1},
|
||||
{46, 0, 0, 22, MAIN_CORE_PROC, 22, -1}, {47, 1, 1, 23, MAIN_CORE_PROC, 23, -1},
|
||||
},
|
||||
{
|
||||
{"0,24", "0", "3500000"}, {"1,25", "1", "3500000"}, {"2,26", "0", "3500000"}, {"3,27", "1", "3500000"},
|
||||
{"4,28", "0", "3500000"}, {"5,29", "1", "3500000"}, {"6,30", "0", "3500000"}, {"7,31", "1", "3500000"},
|
||||
{"8,32", "0", "3500000"}, {"9,33", "1", "3500000"}, {"10,34", "0", "3500000"}, {"11,35", "1", "3500000"},
|
||||
{"12,36", "0", "3500000"}, {"13,37", "1", "3500000"}, {"14,38", "0", "3500000"}, {"15,39", "1", "3500000"},
|
||||
{"16,40", "0", "3500000"}, {"17,41", "1", "3500000"}, {"18,42", "0", "3500000"}, {"19,43", "1", "3500000"},
|
||||
{"20,44", "0", "3500000"}, {"21,45", "1", "3500000"}, {"22,46", "0", "3500000"}, {"23,47", "1", "3500000"},
|
||||
{"0,24", "0", "3500000"}, {"1,25", "1", "3500000"}, {"2,26", "0", "3500000"}, {"3,27", "1", "3500000"},
|
||||
{"4,28", "0", "3500000"}, {"5,29", "1", "3500000"}, {"6,30", "0", "3500000"}, {"7,31", "1", "3500000"},
|
||||
{"8,32", "0", "3500000"}, {"9,33", "1", "3500000"}, {"10,34", "0", "3500000"}, {"11,35", "1", "3500000"},
|
||||
{"12,36", "0", "3500000"}, {"13,37", "1", "3500000"}, {"14,38", "0", "3500000"}, {"15,39", "1", "3500000"},
|
||||
{"16,40", "0", "3500000"}, {"17,41", "1", "3500000"}, {"18,42", "0", "3500000"}, {"19,43", "1", "3500000"},
|
||||
{"20,44", "0", "3500000"}, {"21,45", "1", "3500000"}, {"22,46", "0", "3500000"}, {"23,47", "1", "3500000"},
|
||||
},
|
||||
{
|
||||
{"0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46"},
|
||||
{"1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47"},
|
||||
},
|
||||
};
|
||||
LinuxCpuMapTestCase freq_2sockets_24cores_hyperthreading_1 = {
|
||||
48,
|
||||
4,
|
||||
2,
|
||||
24,
|
||||
{{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}},
|
||||
{
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 2, 1, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 2, 1, 3, HYPER_THREADING_PROC, 3, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 2, 1, 5, HYPER_THREADING_PROC, 5, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 2, 1, 7, HYPER_THREADING_PROC, 7, -1},
|
||||
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 2, 1, 9, HYPER_THREADING_PROC, 9, -1},
|
||||
{10, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {11, 2, 1, 11, HYPER_THREADING_PROC, 11, -1},
|
||||
{12, 1, 0, 12, HYPER_THREADING_PROC, 12, -1}, {13, 3, 1, 13, HYPER_THREADING_PROC, 13, -1},
|
||||
{14, 1, 0, 14, HYPER_THREADING_PROC, 14, -1}, {15, 3, 1, 15, HYPER_THREADING_PROC, 15, -1},
|
||||
{16, 1, 0, 16, HYPER_THREADING_PROC, 16, -1}, {17, 3, 1, 17, HYPER_THREADING_PROC, 17, -1},
|
||||
{18, 1, 0, 18, HYPER_THREADING_PROC, 18, -1}, {19, 3, 1, 19, HYPER_THREADING_PROC, 19, -1},
|
||||
{20, 1, 0, 20, HYPER_THREADING_PROC, 20, -1}, {21, 3, 1, 21, HYPER_THREADING_PROC, 21, -1},
|
||||
{22, 1, 0, 22, HYPER_THREADING_PROC, 22, -1}, {23, 3, 1, 23, HYPER_THREADING_PROC, 23, -1},
|
||||
{24, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {25, 2, 1, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{26, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {27, 2, 1, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{28, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {29, 2, 1, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{30, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {31, 2, 1, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{32, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {33, 2, 1, 9, MAIN_CORE_PROC, 9, -1},
|
||||
{34, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {35, 2, 1, 11, MAIN_CORE_PROC, 11, -1},
|
||||
{36, 1, 0, 12, MAIN_CORE_PROC, 12, -1}, {37, 3, 1, 13, MAIN_CORE_PROC, 13, -1},
|
||||
{38, 1, 0, 14, MAIN_CORE_PROC, 14, -1}, {39, 3, 1, 15, MAIN_CORE_PROC, 15, -1},
|
||||
{40, 1, 0, 16, MAIN_CORE_PROC, 16, -1}, {41, 3, 1, 17, MAIN_CORE_PROC, 17, -1},
|
||||
{42, 1, 0, 18, MAIN_CORE_PROC, 18, -1}, {43, 3, 1, 19, MAIN_CORE_PROC, 19, -1},
|
||||
{44, 1, 0, 20, MAIN_CORE_PROC, 20, -1}, {45, 3, 1, 21, MAIN_CORE_PROC, 21, -1},
|
||||
{46, 1, 0, 22, MAIN_CORE_PROC, 22, -1}, {47, 3, 1, 23, MAIN_CORE_PROC, 23, -1},
|
||||
},
|
||||
{
|
||||
{"0,24", "0", "3500000"}, {"1,25", "1", "3500000"}, {"2,26", "0", "3500000"}, {"3,27", "1", "3500000"},
|
||||
{"4,28", "0", "3500000"}, {"5,29", "1", "3500000"}, {"6,30", "0", "3500000"}, {"7,31", "1", "3500000"},
|
||||
{"8,32", "0", "3500000"}, {"9,33", "1", "3500000"}, {"10,34", "0", "3500000"}, {"11,35", "1", "3500000"},
|
||||
{"12,36", "0", "3500000"}, {"13,37", "1", "3500000"}, {"14,38", "0", "3500000"}, {"15,39", "1", "3500000"},
|
||||
{"16,40", "0", "3500000"}, {"17,41", "1", "3500000"}, {"18,42", "0", "3500000"}, {"19,43", "1", "3500000"},
|
||||
{"20,44", "0", "3500000"}, {"21,45", "1", "3500000"}, {"22,46", "0", "3500000"}, {"23,47", "1", "3500000"},
|
||||
{"0,24", "0", "3500000"}, {"1,25", "1", "3500000"}, {"2,26", "0", "3500000"}, {"3,27", "1", "3500000"},
|
||||
{"4,28", "0", "3500000"}, {"5,29", "1", "3500000"}, {"6,30", "0", "3500000"}, {"7,31", "1", "3500000"},
|
||||
{"8,32", "0", "3500000"}, {"9,33", "1", "3500000"}, {"10,34", "0", "3500000"}, {"11,35", "1", "3500000"},
|
||||
{"12,36", "0", "3500000"}, {"13,37", "1", "3500000"}, {"14,38", "0", "3500000"}, {"15,39", "1", "3500000"},
|
||||
{"16,40", "0", "3500000"}, {"17,41", "1", "3500000"}, {"18,42", "0", "3500000"}, {"19,43", "1", "3500000"},
|
||||
{"20,44", "0", "3500000"}, {"21,45", "1", "3500000"}, {"22,46", "0", "3500000"}, {"23,47", "1", "3500000"},
|
||||
},
|
||||
{
|
||||
{"0,2,4,6,8,10,24,26,28,30,32,34"},
|
||||
{"12,14,16,18,20,22,36,38,40,42,44,46"},
|
||||
{"1,3,5,7,9,11,25,27,29,31,33,35"},
|
||||
{"13,15,17,19,21,23,37,39,41,43,45,47"},
|
||||
},
|
||||
};
|
||||
LinuxCpuMapTestCase freq_2sockets_20cores_hyperthreading = {
|
||||
40,
|
||||
2,
|
||||
2,
|
||||
20,
|
||||
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
|
||||
{
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
|
||||
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1},
|
||||
{10, 1, 1, 10, HYPER_THREADING_PROC, 10, -1}, {11, 1, 1, 11, HYPER_THREADING_PROC, 11, -1},
|
||||
{12, 1, 1, 12, HYPER_THREADING_PROC, 12, -1}, {13, 1, 1, 13, HYPER_THREADING_PROC, 13, -1},
|
||||
{14, 1, 1, 14, HYPER_THREADING_PROC, 14, -1}, {15, 1, 1, 15, HYPER_THREADING_PROC, 15, -1},
|
||||
{16, 1, 1, 16, HYPER_THREADING_PROC, 16, -1}, {17, 1, 1, 17, HYPER_THREADING_PROC, 17, -1},
|
||||
{18, 1, 1, 18, HYPER_THREADING_PROC, 18, -1}, {19, 1, 1, 19, HYPER_THREADING_PROC, 19, -1},
|
||||
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {21, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {23, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {25, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {27, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{28, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {29, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
|
||||
{30, 1, 1, 10, MAIN_CORE_PROC, 10, -1}, {31, 1, 1, 11, MAIN_CORE_PROC, 11, -1},
|
||||
{32, 1, 1, 12, MAIN_CORE_PROC, 12, -1}, {33, 1, 1, 13, MAIN_CORE_PROC, 13, -1},
|
||||
{34, 1, 1, 14, MAIN_CORE_PROC, 14, -1}, {35, 1, 1, 15, MAIN_CORE_PROC, 15, -1},
|
||||
{36, 1, 1, 16, MAIN_CORE_PROC, 16, -1}, {37, 1, 1, 17, MAIN_CORE_PROC, 17, -1},
|
||||
{38, 1, 1, 18, MAIN_CORE_PROC, 18, -1}, {39, 1, 1, 19, MAIN_CORE_PROC, 19, -1},
|
||||
},
|
||||
{
|
||||
{"0,20", "0", "3000000"}, {"1,21", "0", "3000000"}, {"2,22", "0", "3000000"}, {"3,23", "0", "3000000"},
|
||||
{"4,24", "0", "3000000"}, {"5,25", "0", "3000000"}, {"6,26", "0", "3000000"}, {"7,27", "0", "3000000"},
|
||||
{"8,28", "0", "3000000"}, {"9,29", "0", "3000000"}, {"10,30", "1", "3000000"}, {"11,31", "1", "3000000"},
|
||||
{"12,32", "1", "3000000"}, {"13,33", "1", "3000000"}, {"14,34", "1", "3000000"}, {"15,35", "1", "3000000"},
|
||||
{"16,36", "1", "3000000"}, {"17,37", "1", "3000000"}, {"18,38", "1", "3000000"}, {"19,39", "1", "3000000"},
|
||||
{"0,20", "0", "3000000"}, {"1,21", "0", "3000000"}, {"2,22", "0", "3000000"}, {"3,23", "0", "3000000"},
|
||||
{"4,24", "0", "3000000"}, {"5,25", "0", "3000000"}, {"6,26", "0", "3000000"}, {"7,27", "0", "3000000"},
|
||||
{"8,28", "0", "3000000"}, {"9,29", "0", "3000000"}, {"10,30", "1", "3000000"}, {"11,31", "1", "3000000"},
|
||||
{"12,32", "1", "3000000"}, {"13,33", "1", "3000000"}, {"14,34", "1", "3000000"}, {"15,35", "1", "3000000"},
|
||||
{"16,36", "1", "3000000"}, {"17,37", "1", "3000000"}, {"18,38", "1", "3000000"}, {"19,39", "1", "3000000"},
|
||||
},
|
||||
{{"0-9,20-29"}, {"10-19,30-39"}},
|
||||
};
|
||||
LinuxCpuMapTestCase freq_2sockets_20cores_hyperthreading_1 = {
|
||||
40,
|
||||
2,
|
||||
2,
|
||||
20,
|
||||
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
|
||||
{
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
|
||||
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1},
|
||||
{10, 1, 1, 10, HYPER_THREADING_PROC, 10, -1}, {11, 1, 1, 11, HYPER_THREADING_PROC, 11, -1},
|
||||
{12, 1, 1, 12, HYPER_THREADING_PROC, 12, -1}, {13, 1, 1, 13, HYPER_THREADING_PROC, 13, -1},
|
||||
{14, 1, 1, 14, HYPER_THREADING_PROC, 14, -1}, {15, 1, 1, 15, HYPER_THREADING_PROC, 15, -1},
|
||||
{16, 1, 1, 16, HYPER_THREADING_PROC, 16, -1}, {17, 1, 1, 17, HYPER_THREADING_PROC, 17, -1},
|
||||
{18, 1, 1, 18, HYPER_THREADING_PROC, 18, -1}, {19, 1, 1, 19, HYPER_THREADING_PROC, 19, -1},
|
||||
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {21, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {23, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {25, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {27, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{28, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {29, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
|
||||
{30, 1, 1, 10, MAIN_CORE_PROC, 10, -1}, {31, 1, 1, 11, MAIN_CORE_PROC, 11, -1},
|
||||
{32, 1, 1, 12, MAIN_CORE_PROC, 12, -1}, {33, 1, 1, 13, MAIN_CORE_PROC, 13, -1},
|
||||
{34, 1, 1, 14, MAIN_CORE_PROC, 14, -1}, {35, 1, 1, 15, MAIN_CORE_PROC, 15, -1},
|
||||
{36, 1, 1, 16, MAIN_CORE_PROC, 16, -1}, {37, 1, 1, 17, MAIN_CORE_PROC, 17, -1},
|
||||
{38, 1, 1, 18, MAIN_CORE_PROC, 18, -1}, {39, 1, 1, 19, MAIN_CORE_PROC, 19, -1},
|
||||
},
|
||||
{
|
||||
{"0,20", "0", "3000000"}, {"1,21", "0", "3000000"}, {"2,22", "0", "3000000"}, {"3,23", "0", "3000000"},
|
||||
{"4,24", "0", "3000000"}, {"5,25", "0", "3000000"}, {"6,26", "0", "3000000"}, {"7,27", "0", "3000000"},
|
||||
{"8,28", "0", "3000000"}, {"9,29", "0", "3000000"}, {"10,30", "1", "3000000"}, {"11,31", "1", "3000000"},
|
||||
{"12,32", "1", "3000000"}, {"13,33", "1", "3000000"}, {"14,34", "1", "3000000"}, {"15,35", "1", "3000000"},
|
||||
{"16,36", "1", "3000000"}, {"17,37", "1", "3000000"}, {"18,38", "1", "3000000"}, {"19,39", "1", "3000000"},
|
||||
{"0,20", "0", "3000000"}, {"1,21", "0", "3000000"}, {"2,22", "0", "3000000"}, {"3,23", "0", "3000000"},
|
||||
{"4,24", "0", "3000000"}, {"5,25", "0", "3000000"}, {"6,26", "0", "3000000"}, {"7,27", "0", "3000000"},
|
||||
{"8,28", "0", "3000000"}, {"9,29", "0", "3000000"}, {"10,30", "1", "3000000"}, {"11,31", "1", "3000000"},
|
||||
{"12,32", "1", "3000000"}, {"13,33", "1", "3000000"}, {"14,34", "1", "3000000"}, {"15,35", "1", "3000000"},
|
||||
{"16,36", "1", "3000000"}, {"17,37", "1", "3000000"}, {"18,38", "1", "3000000"}, {"19,39", "1", "3000000"},
|
||||
},
|
||||
{},
|
||||
};
|
||||
LinuxCpuMapTestCase freq_2sockets_20cores = {
|
||||
20,
|
||||
2,
|
||||
2,
|
||||
20,
|
||||
{{20, 20, 0, 0, -1, -1}, {10, 10, 0, 0, 0, 0}, {10, 10, 0, 0, 1, 1}},
|
||||
{
|
||||
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {1, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{2, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {3, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{4, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {5, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{6, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {7, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{8, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {9, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
|
||||
{10, 1, 1, 10, MAIN_CORE_PROC, 10, -1}, {11, 1, 1, 11, MAIN_CORE_PROC, 11, -1},
|
||||
{12, 1, 1, 12, MAIN_CORE_PROC, 12, -1}, {13, 1, 1, 13, MAIN_CORE_PROC, 13, -1},
|
||||
{14, 1, 1, 14, MAIN_CORE_PROC, 14, -1}, {15, 1, 1, 15, MAIN_CORE_PROC, 15, -1},
|
||||
{16, 1, 1, 16, MAIN_CORE_PROC, 16, -1}, {17, 1, 1, 17, MAIN_CORE_PROC, 17, -1},
|
||||
{18, 1, 1, 18, MAIN_CORE_PROC, 18, -1}, {19, 1, 1, 19, MAIN_CORE_PROC, 19, -1},
|
||||
},
|
||||
{
|
||||
{"0", "0", "3000000"}, {"1", "0", "3000000"}, {"2", "0", "3000000"}, {"3", "0", "3000000"},
|
||||
{"4", "0", "3000000"}, {"5", "0", "3000000"}, {"6", "0", "3000000"}, {"7", "0", "3000000"},
|
||||
{"8", "0", "3000000"}, {"9", "0", "3000000"}, {"10", "1", "3000000"}, {"11", "1", "3000000"},
|
||||
{"12", "1", "3000000"}, {"13", "1", "3000000"}, {"14", "1", "3000000"}, {"15", "1", "3000000"},
|
||||
{"16", "1", "3000000"}, {"17", "1", "3000000"}, {"18", "1", "3000000"}, {"19", "1", "3000000"},
|
||||
},
|
||||
{{"0-9"}, {"10-19"}},
|
||||
};
|
||||
LinuxCpuMapTestCase freq_1sockets_32cores_hyperthreading = {
|
||||
64,
|
||||
1,
|
||||
1,
|
||||
32,
|
||||
{{64, 32, 0, 32, 0, 0}},
|
||||
{
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
|
||||
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1},
|
||||
{10, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {11, 0, 0, 11, HYPER_THREADING_PROC, 11, -1},
|
||||
{12, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {13, 0, 0, 13, HYPER_THREADING_PROC, 13, -1},
|
||||
{14, 0, 0, 14, HYPER_THREADING_PROC, 14, -1}, {15, 0, 0, 15, HYPER_THREADING_PROC, 15, -1},
|
||||
{16, 0, 0, 16, HYPER_THREADING_PROC, 16, -1}, {17, 0, 0, 17, HYPER_THREADING_PROC, 17, -1},
|
||||
{18, 0, 0, 18, HYPER_THREADING_PROC, 18, -1}, {19, 0, 0, 19, HYPER_THREADING_PROC, 19, -1},
|
||||
{20, 0, 0, 20, HYPER_THREADING_PROC, 20, -1}, {21, 0, 0, 21, HYPER_THREADING_PROC, 21, -1},
|
||||
{22, 0, 0, 22, HYPER_THREADING_PROC, 22, -1}, {23, 0, 0, 23, HYPER_THREADING_PROC, 23, -1},
|
||||
{24, 0, 0, 24, HYPER_THREADING_PROC, 24, -1}, {25, 0, 0, 25, HYPER_THREADING_PROC, 25, -1},
|
||||
{26, 0, 0, 26, HYPER_THREADING_PROC, 26, -1}, {27, 0, 0, 27, HYPER_THREADING_PROC, 27, -1},
|
||||
{28, 0, 0, 28, HYPER_THREADING_PROC, 28, -1}, {29, 0, 0, 29, HYPER_THREADING_PROC, 29, -1},
|
||||
{30, 0, 0, 30, HYPER_THREADING_PROC, 30, -1}, {31, 0, 0, 31, HYPER_THREADING_PROC, 31, -1},
|
||||
{32, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {33, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{34, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {35, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{36, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {37, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{38, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {39, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{40, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {41, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
|
||||
{42, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {43, 0, 0, 11, MAIN_CORE_PROC, 11, -1},
|
||||
{44, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, {45, 0, 0, 13, MAIN_CORE_PROC, 13, -1},
|
||||
{46, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, {47, 0, 0, 15, MAIN_CORE_PROC, 15, -1},
|
||||
{48, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, {49, 0, 0, 17, MAIN_CORE_PROC, 17, -1},
|
||||
{50, 0, 0, 18, MAIN_CORE_PROC, 18, -1}, {51, 0, 0, 19, MAIN_CORE_PROC, 19, -1},
|
||||
{52, 0, 0, 20, MAIN_CORE_PROC, 20, -1}, {53, 0, 0, 21, MAIN_CORE_PROC, 21, -1},
|
||||
{54, 0, 0, 22, MAIN_CORE_PROC, 22, -1}, {55, 0, 0, 23, MAIN_CORE_PROC, 23, -1},
|
||||
{56, 0, 0, 24, MAIN_CORE_PROC, 24, -1}, {57, 0, 0, 25, MAIN_CORE_PROC, 25, -1},
|
||||
{58, 0, 0, 26, MAIN_CORE_PROC, 26, -1}, {59, 0, 0, 27, MAIN_CORE_PROC, 27, -1},
|
||||
{60, 0, 0, 28, MAIN_CORE_PROC, 28, -1}, {61, 0, 0, 29, MAIN_CORE_PROC, 29, -1},
|
||||
{62, 0, 0, 30, MAIN_CORE_PROC, 30, -1}, {63, 0, 0, 31, MAIN_CORE_PROC, 31, -1},
|
||||
},
|
||||
{
|
||||
{"0,32", "0", "3400000"}, {"1,33", "0", "3400000"}, {"2,34", "0", "3400000"}, {"3,35", "0", "3400000"},
|
||||
{"4,36", "0", "3400000"}, {"5,37", "0", "3400000"}, {"6,38", "0", "3400000"}, {"7,39", "0", "3400000"},
|
||||
{"8,40", "0", "3400000"}, {"9,41", "0", "3400000"}, {"10,42", "0", "3400000"}, {"11,43", "0", "3400000"},
|
||||
{"12,44", "0", "3400000"}, {"13,45", "0", "3400000"}, {"14,46", "0", "3400000"}, {"15,47", "0", "3400000"},
|
||||
{"16,48", "0", "3400000"}, {"17,49", "0", "3400000"}, {"18,50", "0", "3400000"}, {"19,51", "0", "3400000"},
|
||||
{"20,52", "0", "3400000"}, {"21,53", "0", "3400000"}, {"22,54", "0", "3400000"}, {"23,55", "0", "3400000"},
|
||||
{"24,56", "0", "3400000"}, {"25,57", "0", "3400000"}, {"26,58", "0", "3400000"}, {"27,59", "0", "3400000"},
|
||||
{"28,60", "0", "3400000"}, {"29,61", "0", "3400000"}, {"30,62", "0", "3400000"}, {"31,63", "0", "3400000"},
|
||||
{"0,32", "0", "3400000"}, {"1,33", "0", "3400000"}, {"2,34", "0", "3400000"}, {"3,35", "0", "3400000"},
|
||||
{"4,36", "0", "3400000"}, {"5,37", "0", "3400000"}, {"6,38", "0", "3400000"}, {"7,39", "0", "3400000"},
|
||||
{"8,40", "0", "3400000"}, {"9,41", "0", "3400000"}, {"10,42", "0", "3400000"}, {"11,43", "0", "3400000"},
|
||||
{"12,44", "0", "3400000"}, {"13,45", "0", "3400000"}, {"14,46", "0", "3400000"}, {"15,47", "0", "3400000"},
|
||||
{"16,48", "0", "3400000"}, {"17,49", "0", "3400000"}, {"18,50", "0", "3400000"}, {"19,51", "0", "3400000"},
|
||||
{"20,52", "0", "3400000"}, {"21,53", "0", "3400000"}, {"22,54", "0", "3400000"}, {"23,55", "0", "3400000"},
|
||||
{"24,56", "0", "3400000"}, {"25,57", "0", "3400000"}, {"26,58", "0", "3400000"}, {"27,59", "0", "3400000"},
|
||||
{"28,60", "0", "3400000"}, {"29,61", "0", "3400000"}, {"30,62", "0", "3400000"}, {"31,63", "0", "3400000"},
|
||||
},
|
||||
{{"0-63"}},
|
||||
};
|
||||
LinuxCpuMapTestCase freq_1sockets_16cores_hyperthreading = {
|
||||
24,
|
||||
1,
|
||||
1,
|
||||
16,
|
||||
{{24, 8, 8, 8, 0, 0}},
|
||||
{
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, {3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, {7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {9, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{12, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {13, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{14, 0, 0, 7, HYPER_THREADING_PROC, 7, -1}, {15, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1}, {17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1}, {19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
{20, 0, 0, 12, EFFICIENT_CORE_PROC, 12, -1}, {21, 0, 0, 13, EFFICIENT_CORE_PROC, 13, -1},
|
||||
{22, 0, 0, 14, EFFICIENT_CORE_PROC, 14, -1}, {23, 0, 0, 15, EFFICIENT_CORE_PROC, 15, -1},
|
||||
},
|
||||
{
|
||||
{"0-1", "0", "5376760"}, {"0-1", "0", "5376760"}, {"2-3", "0", "5376760"}, {"2-3", "0", "5376760"},
|
||||
{"4-5", "0", "5376760"}, {"4-5", "0", "5376760"}, {"6-7", "0", "5376760"}, {"6-7", "0", "5376760"},
|
||||
{"8-9", "0", "5400000"}, {"8-9", "0", "5400000"}, {"10-11", "0", "5400000"}, {"10-11", "0", "5400000"},
|
||||
{"12-13", "0", "5376760"}, {"12-13", "0", "5376760"}, {"14-15", "0", "5376760"}, {"14-15", "0", "5376760"},
|
||||
{"16", "0", "4200000"}, {"17", "0", "4200000"}, {"18", "0", "4200000"}, {"19", "0", "4200000"},
|
||||
{"20", "0", "4200000"}, {"21", "0", "4200000"}, {"22", "0", "4200000"}, {"23", "0", "4200000"},
|
||||
},
|
||||
{},
|
||||
};
|
||||
LinuxCpuMapTestCase freq_1sockets_16cores = {
|
||||
16,
|
||||
1,
|
||||
1,
|
||||
16,
|
||||
{{16, 8, 8, 0, 0, 0}},
|
||||
{
|
||||
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{1, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{2, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{3, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{4, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{5, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{6, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{7, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{8, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{9, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{10, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{11, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
{12, 0, 0, 12, EFFICIENT_CORE_PROC, 12, -1},
|
||||
{13, 0, 0, 13, EFFICIENT_CORE_PROC, 13, -1},
|
||||
{14, 0, 0, 14, EFFICIENT_CORE_PROC, 14, -1},
|
||||
{15, 0, 0, 15, EFFICIENT_CORE_PROC, 15, -1},
|
||||
},
|
||||
{
|
||||
{"0", "0", "5376760"},
|
||||
{"1", "0", "5376760"},
|
||||
{"2", "0", "5376760"},
|
||||
{"3", "0", "5376760"},
|
||||
{"4", "0", "5400000"},
|
||||
{"5", "0", "5400000"},
|
||||
{"6", "0", "5376760"},
|
||||
{"7", "0", "5376760"},
|
||||
{"8", "0", "4200000"},
|
||||
{"9", "0", "4200000"},
|
||||
{"10", "0", "4200000"},
|
||||
{"11", "0", "4200000"},
|
||||
{"12", "0", "4200000"},
|
||||
{"13", "0", "4200000"},
|
||||
{"14", "0", "4200000"},
|
||||
{"15", "0", "4200000"},
|
||||
},
|
||||
{{"0-15"}},
|
||||
};
|
||||
LinuxCpuMapTestCase freq_1sockets_16cores_1_hyperthreading = {
|
||||
22,
|
||||
1,
|
||||
1,
|
||||
16,
|
||||
{{22, 6, 10, 6, 0, 0}},
|
||||
{
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, {3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, {7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {9, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{12, 0, 0, 6, EFFICIENT_CORE_PROC, 6, -1}, {13, 0, 0, 7, EFFICIENT_CORE_PROC, 7, -1},
|
||||
{14, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1}, {15, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{16, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1}, {17, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
{18, 0, 0, 12, EFFICIENT_CORE_PROC, 12, -1}, {19, 0, 0, 13, EFFICIENT_CORE_PROC, 13, -1},
|
||||
{20, 0, 0, 14, EFFICIENT_CORE_PROC, 14, -1}, {21, 0, 0, 15, EFFICIENT_CORE_PROC, 15, -1},
|
||||
},
|
||||
{
|
||||
{"0-1", "2", "3200040"}, {"0-1", "2", "3200040"}, {"2-3", "3", "3200040"}, {"2-3", "3", "3200040"},
|
||||
{"4-5", "4", "3200040"}, {"4-5", "4", "3200040"}, {"6-7", "5", "3200040"}, {"6-7", "5", "3200040"},
|
||||
{"8-9", "6", "3200040"}, {"8-9", "6", "3200040"}, {"10-11", "7", "3200040"}, {"10-11", "7", "3200040"},
|
||||
{"12", "0", "3100000"}, {"13", "0", "3100000"}, {"14", "0", "3100000"}, {"15", "0", "3100000"},
|
||||
{"16", "1", "3100000"}, {"17", "1", "3100000"}, {"18", "1", "3100000"}, {"19", "1", "3100000"},
|
||||
{"20", "8", "1600011"}, {"21", "8", "1600011"},
|
||||
},
|
||||
{},
|
||||
};
|
||||
LinuxCpuMapTestCase freq_1sockets_12cores_hyperthreading = {
|
||||
14,
|
||||
1,
|
||||
1,
|
||||
12,
|
||||
{{14, 2, 10, 2, 0, 0}},
|
||||
{
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{4, 0, 0, 2, EFFICIENT_CORE_PROC, 2, -1},
|
||||
{5, 0, 0, 3, EFFICIENT_CORE_PROC, 3, -1},
|
||||
{6, 0, 0, 4, EFFICIENT_CORE_PROC, 4, -1},
|
||||
{7, 0, 0, 5, EFFICIENT_CORE_PROC, 5, -1},
|
||||
{8, 0, 0, 6, EFFICIENT_CORE_PROC, 6, -1},
|
||||
{9, 0, 0, 7, EFFICIENT_CORE_PROC, 7, -1},
|
||||
{10, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{11, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{12, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{13, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
},
|
||||
{
|
||||
{"0-1", "2", "4100000"},
|
||||
{"0-1", "2", "4100000"},
|
||||
{"2-3", "3", "4100000"},
|
||||
{"2-3", "3", "4100000"},
|
||||
{"4", "0", "3100000"},
|
||||
{"5", "0", "3100000"},
|
||||
{"6", "0", "3100000"},
|
||||
{"7", "0", "3100000"},
|
||||
{"8", "1", "3100000"},
|
||||
{"9", "1", "3100000"},
|
||||
{"10", "1", "3100000"},
|
||||
{"11", "1", "3100000"},
|
||||
{"12", "8", "2100000"},
|
||||
{"13", "8", "2100000"},
|
||||
},
|
||||
{{"0-13"}},
|
||||
};
|
||||
LinuxCpuMapTestCase freq_1sockets_8cores_hyperthreading = {
|
||||
16,
|
||||
1,
|
||||
1,
|
||||
8,
|
||||
{{16, 8, 0, 8, 0, 0}},
|
||||
{
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
|
||||
{7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{9, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
|
||||
{11, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{12, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{13, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{14, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
|
||||
{15, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
},
|
||||
{
|
||||
{"0-1", "0", "6100000"},
|
||||
{"0-1", "0", "6100000"},
|
||||
{"2-3", "0", "6100000"},
|
||||
{"2-3", "0", "6100000"},
|
||||
{"4-5", "0", "6100000"},
|
||||
{"4-5", "0", "6100000"},
|
||||
{"6-7", "0", "6100000"},
|
||||
{"6-7", "0", "6100000"},
|
||||
{"8-9", "0", "6300000"},
|
||||
{"8-9", "0", "6300000"},
|
||||
{"10-11", "0", "6300000"},
|
||||
{"10-11", "0", "6300000"},
|
||||
{"12-13", "0", "6100000"},
|
||||
{"12-13", "0", "6100000"},
|
||||
{"14-15", "0", "6100000"},
|
||||
{"14-15", "0", "6100000"},
|
||||
},
|
||||
{},
|
||||
};
|
||||
LinuxCpuMapTestCase freq_1sockets_8cores_hyperthreading_1 = {
|
||||
16,
|
||||
1,
|
||||
1,
|
||||
8,
|
||||
{{16, 8, 0, 8, 0, 0}},
|
||||
{
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
|
||||
{8, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{9, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{10, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{11, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{12, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{13, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{14, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{15, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
},
|
||||
{
|
||||
{"0,8", "0", "4700000"},
|
||||
{"1,9", "0", "4800000"},
|
||||
{"2,10", "0", "4800000"},
|
||||
{"3,11", "0", "4700000"},
|
||||
{"4,12", "0", "4700000"},
|
||||
{"5,13", "0", "4700000"},
|
||||
{"6,14", "0", "4700000"},
|
||||
{"7,15", "0", "4700000"},
|
||||
{"0,8", "0", "4700000"},
|
||||
{"1,9", "0", "4800000"},
|
||||
{"2,10", "0", "4800000"},
|
||||
{"3,11", "0", "4700000"},
|
||||
{"4,12", "0", "4700000"},
|
||||
{"5,13", "0", "4700000"},
|
||||
{"6,14", "0", "4700000"},
|
||||
{"7,15", "0", "4700000"},
|
||||
},
|
||||
{{"0-15"}},
|
||||
};
|
||||
LinuxCpuMapTestCase freq_1sockets_4cores = {
|
||||
4,
|
||||
1,
|
||||
1,
|
||||
4,
|
||||
{{4, 4, 0, 0, 0, 0}},
|
||||
{
|
||||
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{1, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{2, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{3, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
},
|
||||
{
|
||||
{"0", "0", "1800000"},
|
||||
{"1", "0", "1800000"},
|
||||
{"2", "0", "1800000"},
|
||||
{"3", "0", "1800000"},
|
||||
},
|
||||
{},
|
||||
};
|
||||
|
||||
TEST_P(LinuxCpuMapFreqParserTests, LinuxFreq) {}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(CPUMap,
|
||||
LinuxCpuMapFreqParserTests,
|
||||
testing::Values(freq_2sockets_112cores_hyperthreading,
|
||||
freq_2sockets_48cores_hyperthreading,
|
||||
freq_2sockets_48cores_hyperthreading_1,
|
||||
freq_2sockets_24cores_hyperthreading,
|
||||
freq_2sockets_24cores_hyperthreading_1,
|
||||
freq_2sockets_20cores_hyperthreading,
|
||||
freq_2sockets_20cores_hyperthreading_1,
|
||||
freq_2sockets_20cores,
|
||||
freq_1sockets_32cores_hyperthreading,
|
||||
freq_1sockets_16cores_hyperthreading,
|
||||
freq_1sockets_16cores,
|
||||
freq_1sockets_16cores_1_hyperthreading,
|
||||
freq_1sockets_12cores_hyperthreading,
|
||||
freq_1sockets_8cores_hyperthreading,
|
||||
freq_1sockets_8cores_hyperthreading_1,
|
||||
freq_1sockets_4cores));
|
||||
|
||||
#endif
|
||||
} // namespace
|
File diff suppressed because it is too large
Load Diff
@ -7,7 +7,7 @@
|
||||
#include <common_test_utils/test_common.hpp>
|
||||
|
||||
#include "ie_system_conf.h"
|
||||
#include "streams_executor.hpp"
|
||||
#include "os/cpu_map_info.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ov;
|
||||
@ -53,366 +53,366 @@ public:
|
||||
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_1 = {
|
||||
{}, // param[in]: The logical processors selected in this simulation case does not include the physical core of
|
||||
// Pcore
|
||||
{{40, 20, 0, 20},
|
||||
{20, 10, 0, 10},
|
||||
{20, 10, 0, 10}}, // param[in]: The proc_type_table of simulated platform which is 2 sockets, 20 Pcores
|
||||
// and 40 logical processors with hyper-threading enabled.
|
||||
{{40, 20, 0, 20, -1, -1},
|
||||
{20, 10, 0, 10, 0, 0},
|
||||
{20, 10, 0, 10, 1, 1}}, // param[in]: The proc_type_table of simulated platform which is 2 sockets, 20 Pcores
|
||||
// and 40 logical processors with hyper-threading enabled.
|
||||
{
|
||||
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
}, // param[in]: This simulation case select logcial processor 0, 2, 4 and 6 which is marked as logcial core of
|
||||
// Pcore in original cpu_mapping_table.
|
||||
1, // param[expected out]: Since all selected logical processors are in one socket, the number of sockets changes
|
||||
// to 1.
|
||||
4, // param[expected out]: Since only 4 logical processors are selected, the number of cores changes to 4.
|
||||
{{4, 4, 0, 0}}, // param[expected out]: The proc_type_table changes to 4 Pcores only
|
||||
{{4, 4, 0, 0, 0, 0}}, // param[expected out]: The proc_type_table changes to 4 Pcores only
|
||||
{
|
||||
{0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{4, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{6, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{4, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{6, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
}, // param[expected out]: cpu_mapping_table changes to physical core of Pcore.
|
||||
};
|
||||
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_2 = {
|
||||
{1, 3, 5, 7},
|
||||
{{40, 20, 0, 20}, {20, 10, 0, 10}, {20, 10, 0, 10}},
|
||||
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
|
||||
{
|
||||
{21, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{23, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{25, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{27, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{21, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{23, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{25, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{27, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
},
|
||||
1,
|
||||
4,
|
||||
{{4, 4, 0, 0}},
|
||||
{{4, 4, 0, 0, 0, 0}},
|
||||
{
|
||||
{21, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{23, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{25, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{27, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{21, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{23, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{25, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{27, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
},
|
||||
};
|
||||
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_3 = {
|
||||
{1, 3, 5, 7},
|
||||
{{40, 20, 0, 20}, {20, 10, 0, 10}, {20, 10, 0, 10}},
|
||||
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
|
||||
{
|
||||
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{21, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{23, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{25, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{27, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{21, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{23, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{25, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{27, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
},
|
||||
1,
|
||||
8,
|
||||
{{8, 8, 0, 0}},
|
||||
{{8, 8, 0, 0, 0, 0}},
|
||||
{
|
||||
{0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{4, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{6, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{21, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{23, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{25, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{27, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{4, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{6, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{21, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{23, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{25, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{27, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
},
|
||||
};
|
||||
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_4 = {
|
||||
{0, 2, 4, 6},
|
||||
{{40, 20, 0, 20}, {20, 10, 0, 10}, {20, 10, 0, 10}},
|
||||
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
|
||||
{
|
||||
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
},
|
||||
1,
|
||||
4,
|
||||
{{8, 4, 0, 4}},
|
||||
{{8, 4, 0, 4, 0, 0}},
|
||||
{
|
||||
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
},
|
||||
};
|
||||
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_5 = {
|
||||
{},
|
||||
{{40, 20, 0, 20}, {20, 10, 0, 10}, {20, 10, 0, 10}},
|
||||
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
|
||||
{
|
||||
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{10, 1, 10, HYPER_THREADING_PROC, 10, -1},
|
||||
{12, 1, 12, HYPER_THREADING_PROC, 12, -1},
|
||||
{14, 1, 14, HYPER_THREADING_PROC, 14, -1},
|
||||
{16, 1, 16, HYPER_THREADING_PROC, 16, -1},
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{10, 1, 1, 10, HYPER_THREADING_PROC, 10, -1},
|
||||
{12, 1, 1, 12, HYPER_THREADING_PROC, 12, -1},
|
||||
{14, 1, 1, 14, HYPER_THREADING_PROC, 14, -1},
|
||||
{16, 1, 1, 16, HYPER_THREADING_PROC, 16, -1},
|
||||
},
|
||||
2,
|
||||
8,
|
||||
{{8, 8, 0, 0}, {4, 4, 0, 0}, {4, 4, 0, 0}},
|
||||
{{8, 8, 0, 0, -1, -1}, {4, 4, 0, 0, 0, 0}, {4, 4, 0, 0, 1, 1}},
|
||||
{
|
||||
{0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{4, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{6, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{10, 1, 10, MAIN_CORE_PROC, 10, -1},
|
||||
{12, 1, 12, MAIN_CORE_PROC, 12, -1},
|
||||
{14, 1, 14, MAIN_CORE_PROC, 14, -1},
|
||||
{16, 1, 16, MAIN_CORE_PROC, 16, -1},
|
||||
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{4, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{6, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{10, 1, 1, 10, MAIN_CORE_PROC, 10, -1},
|
||||
{12, 1, 1, 12, MAIN_CORE_PROC, 12, -1},
|
||||
{14, 1, 1, 14, MAIN_CORE_PROC, 14, -1},
|
||||
{16, 1, 1, 16, MAIN_CORE_PROC, 16, -1},
|
||||
},
|
||||
};
|
||||
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_6 = {
|
||||
{0, 2, 4, 6, 10, 12, 14, 16},
|
||||
{{40, 20, 0, 20}, {20, 10, 0, 10}, {20, 10, 0, 10}},
|
||||
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
|
||||
{
|
||||
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{30, 1, 10, MAIN_CORE_PROC, 10, -1},
|
||||
{32, 1, 12, MAIN_CORE_PROC, 12, -1},
|
||||
{34, 1, 14, MAIN_CORE_PROC, 14, -1},
|
||||
{36, 1, 16, MAIN_CORE_PROC, 16, -1},
|
||||
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{30, 1, 1, 10, MAIN_CORE_PROC, 10, -1},
|
||||
{32, 1, 1, 12, MAIN_CORE_PROC, 12, -1},
|
||||
{34, 1, 1, 14, MAIN_CORE_PROC, 14, -1},
|
||||
{36, 1, 1, 16, MAIN_CORE_PROC, 16, -1},
|
||||
},
|
||||
2,
|
||||
8,
|
||||
{{8, 8, 0, 0}, {4, 4, 0, 0}, {4, 4, 0, 0}},
|
||||
{{8, 8, 0, 0, -1, -1}, {4, 4, 0, 0, 0, 0}, {4, 4, 0, 0, 1, 1}},
|
||||
{
|
||||
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{30, 1, 10, MAIN_CORE_PROC, 10, -1},
|
||||
{32, 1, 12, MAIN_CORE_PROC, 12, -1},
|
||||
{34, 1, 14, MAIN_CORE_PROC, 14, -1},
|
||||
{36, 1, 16, MAIN_CORE_PROC, 16, -1},
|
||||
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{30, 1, 1, 10, MAIN_CORE_PROC, 10, -1},
|
||||
{32, 1, 1, 12, MAIN_CORE_PROC, 12, -1},
|
||||
{34, 1, 1, 14, MAIN_CORE_PROC, 14, -1},
|
||||
{36, 1, 1, 16, MAIN_CORE_PROC, 16, -1},
|
||||
},
|
||||
};
|
||||
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_7 = {
|
||||
{0, 2, 4, 6},
|
||||
{{40, 20, 0, 20}, {20, 10, 0, 10}, {20, 10, 0, 10}},
|
||||
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
|
||||
{
|
||||
{10, 1, 10, HYPER_THREADING_PROC, 10, -1},
|
||||
{12, 1, 12, HYPER_THREADING_PROC, 12, -1},
|
||||
{14, 1, 14, HYPER_THREADING_PROC, 14, -1},
|
||||
{16, 1, 16, HYPER_THREADING_PROC, 16, -1},
|
||||
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{10, 1, 1, 10, HYPER_THREADING_PROC, 10, -1},
|
||||
{12, 1, 1, 12, HYPER_THREADING_PROC, 12, -1},
|
||||
{14, 1, 1, 14, HYPER_THREADING_PROC, 14, -1},
|
||||
{16, 1, 1, 16, HYPER_THREADING_PROC, 16, -1},
|
||||
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
},
|
||||
2,
|
||||
8,
|
||||
{{8, 8, 0, 0}, {4, 4, 0, 0}, {4, 4, 0, 0}},
|
||||
{{8, 8, 0, 0, -1, -1}, {4, 4, 0, 0, 0, 0}, {4, 4, 0, 0, 1, 1}},
|
||||
{
|
||||
{10, 1, 10, MAIN_CORE_PROC, 10, -1},
|
||||
{12, 1, 12, MAIN_CORE_PROC, 12, -1},
|
||||
{14, 1, 14, MAIN_CORE_PROC, 14, -1},
|
||||
{16, 1, 16, MAIN_CORE_PROC, 16, -1},
|
||||
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{10, 1, 1, 10, MAIN_CORE_PROC, 10, -1},
|
||||
{12, 1, 1, 12, MAIN_CORE_PROC, 12, -1},
|
||||
{14, 1, 1, 14, MAIN_CORE_PROC, 14, -1},
|
||||
{16, 1, 1, 16, MAIN_CORE_PROC, 16, -1},
|
||||
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
},
|
||||
};
|
||||
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_8 = {
|
||||
{0, 2, 4, 6, 10, 12, 14, 16},
|
||||
{{40, 20, 0, 20}, {20, 10, 0, 10}, {20, 10, 0, 10}},
|
||||
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
|
||||
{
|
||||
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{10, 1, 10, HYPER_THREADING_PROC, 10, -1},
|
||||
{12, 1, 12, HYPER_THREADING_PROC, 12, -1},
|
||||
{14, 1, 14, HYPER_THREADING_PROC, 14, -1},
|
||||
{16, 1, 16, HYPER_THREADING_PROC, 16, -1},
|
||||
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{30, 1, 10, MAIN_CORE_PROC, 10, -1},
|
||||
{32, 1, 12, MAIN_CORE_PROC, 12, -1},
|
||||
{34, 1, 14, MAIN_CORE_PROC, 14, -1},
|
||||
{36, 1, 16, MAIN_CORE_PROC, 16, -1},
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{10, 1, 1, 10, HYPER_THREADING_PROC, 10, -1},
|
||||
{12, 1, 1, 12, HYPER_THREADING_PROC, 12, -1},
|
||||
{14, 1, 1, 14, HYPER_THREADING_PROC, 14, -1},
|
||||
{16, 1, 1, 16, HYPER_THREADING_PROC, 16, -1},
|
||||
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{30, 1, 1, 10, MAIN_CORE_PROC, 10, -1},
|
||||
{32, 1, 1, 12, MAIN_CORE_PROC, 12, -1},
|
||||
{34, 1, 1, 14, MAIN_CORE_PROC, 14, -1},
|
||||
{36, 1, 1, 16, MAIN_CORE_PROC, 16, -1},
|
||||
},
|
||||
2,
|
||||
8,
|
||||
{{16, 8, 0, 8}, {8, 4, 0, 4}, {8, 4, 0, 4}},
|
||||
{{16, 8, 0, 8, -1, -1}, {8, 4, 0, 4, 0, 0}, {8, 4, 0, 4, 1, 1}},
|
||||
{
|
||||
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{10, 1, 10, HYPER_THREADING_PROC, 10, -1},
|
||||
{12, 1, 12, HYPER_THREADING_PROC, 12, -1},
|
||||
{14, 1, 14, HYPER_THREADING_PROC, 14, -1},
|
||||
{16, 1, 16, HYPER_THREADING_PROC, 16, -1},
|
||||
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{30, 1, 10, MAIN_CORE_PROC, 10, -1},
|
||||
{32, 1, 12, MAIN_CORE_PROC, 12, -1},
|
||||
{34, 1, 14, MAIN_CORE_PROC, 14, -1},
|
||||
{36, 1, 16, MAIN_CORE_PROC, 16, -1},
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{10, 1, 1, 10, HYPER_THREADING_PROC, 10, -1},
|
||||
{12, 1, 1, 12, HYPER_THREADING_PROC, 12, -1},
|
||||
{14, 1, 1, 14, HYPER_THREADING_PROC, 14, -1},
|
||||
{16, 1, 1, 16, HYPER_THREADING_PROC, 16, -1},
|
||||
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{30, 1, 1, 10, MAIN_CORE_PROC, 10, -1},
|
||||
{32, 1, 1, 12, MAIN_CORE_PROC, 12, -1},
|
||||
{34, 1, 1, 14, MAIN_CORE_PROC, 14, -1},
|
||||
{36, 1, 1, 16, MAIN_CORE_PROC, 16, -1},
|
||||
},
|
||||
};
|
||||
LinuxNumactlTestCase numactl_1sockets_16cores_hyperthreading_1 = {
|
||||
{},
|
||||
{{24, 8, 8, 8}},
|
||||
{{24, 8, 8, 8, 0, 0}},
|
||||
{
|
||||
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
},
|
||||
1,
|
||||
4,
|
||||
{{4, 0, 4, 0}},
|
||||
{{4, 0, 4, 0, 0, 0}},
|
||||
{
|
||||
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
},
|
||||
};
|
||||
LinuxNumactlTestCase numactl_1sockets_16cores_hyperthreading_2 = {
|
||||
{},
|
||||
{{24, 8, 8, 8}},
|
||||
{{24, 8, 8, 8, 0, 0}},
|
||||
{
|
||||
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
},
|
||||
1,
|
||||
8,
|
||||
{{8, 4, 4, 0}},
|
||||
{{8, 4, 4, 0, 0, 0}},
|
||||
{
|
||||
{0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{4, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{6, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{4, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{6, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
},
|
||||
};
|
||||
LinuxNumactlTestCase numactl_1sockets_16cores_hyperthreading_3 = {
|
||||
{0, 1, 2, 3},
|
||||
{{24, 8, 8, 8}},
|
||||
{{24, 8, 8, 8, 0, 0}},
|
||||
{
|
||||
{1, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{3, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{5, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{7, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
},
|
||||
1,
|
||||
8,
|
||||
{{8, 4, 4, 0}},
|
||||
{{8, 4, 4, 0, 0, 0}},
|
||||
{
|
||||
{1, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{3, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{5, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{7, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
},
|
||||
};
|
||||
LinuxNumactlTestCase numactl_1sockets_16cores_hyperthreading_4 = {
|
||||
{0, 1, 2, 3},
|
||||
{{24, 8, 8, 8}},
|
||||
{{24, 8, 8, 8, 0, 0}},
|
||||
{
|
||||
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{1, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{3, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{4, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{5, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{6, 0, 3, HYPER_THREADING_PROC, 3, -1},
|
||||
{7, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
|
||||
{7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
},
|
||||
1,
|
||||
8,
|
||||
{{12, 4, 4, 4}},
|
||||
{{12, 4, 4, 4, 0, 0}},
|
||||
{
|
||||
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{1, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{3, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{4, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{5, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{6, 0, 3, HYPER_THREADING_PROC, 3, -1},
|
||||
{7, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
|
||||
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
|
||||
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
|
||||
{5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
|
||||
{7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
},
|
||||
};
|
||||
LinuxNumactlTestCase numactl_1sockets_16cores_hyperthreading_5 = {
|
||||
{0, 1, 2, 3},
|
||||
{{24, 8, 8, 8}},
|
||||
{{24, 8, 8, 8, 0, 0}},
|
||||
{
|
||||
{1, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{3, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{5, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{7, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{8, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{10, 0, 5, HYPER_THREADING_PROC, 5, -1},
|
||||
{12, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{14, 0, 7, HYPER_THREADING_PROC, 7, -1},
|
||||
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
|
||||
{10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
|
||||
{12, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
|
||||
{14, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
|
||||
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
},
|
||||
1,
|
||||
12,
|
||||
{{12, 8, 4, 0}},
|
||||
{{12, 8, 4, 0, 0, 0}},
|
||||
{
|
||||
{1, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{3, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{5, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{7, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{8, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{10, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{12, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{14, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
|
||||
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
|
||||
{5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
|
||||
{7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
|
||||
{8, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
|
||||
{10, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
|
||||
{12, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
|
||||
{14, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
|
||||
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
|
||||
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
|
||||
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
|
||||
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -3,9 +3,9 @@
|
||||
The main responsibility of the AUTO plugin is to provide a unified device that enables developers to code deep learning applications once and deploy them anywhere.
|
||||
|
||||
Other capabilities of the AUTO plugin include:
|
||||
* Static device selection, which intelligently loads a network to one device or multiple devices.
|
||||
* CPU acceleration to start inferencing while the target device is still loading the network.
|
||||
* Model priority support for loading multiple networks to multiple devices.
|
||||
* Static device selection, which intelligently compiles a model to one device or multiple devices.
|
||||
* CPU acceleration to start inferencing while the target device is still compiling the model.
|
||||
* Model priority support for compiling multiple models to multiple devices.
|
||||
|
||||
The component is written in `C++`. If you want to contribute to the AUTO plugin, follow [the common coding style rules](../../../docs/dev/coding_style.md).
|
||||
|
||||
|
@ -18,25 +18,25 @@ Auto unit test is a set of unit tests using gmock, each of which is for testing
|
||||
2. Build
|
||||
|
||||
```bash
|
||||
make ieMultiPluginUnitTests
|
||||
make ov_auto_unit_tests
|
||||
```
|
||||
|
||||
3. You can find `ieMultiPluginUnitTests` in *bin* directory after build
|
||||
3. You can find `ov_auto_unit_tests` in *bin* directory after build
|
||||
|
||||
### Run unit test
|
||||
|
||||
You can run _`ieMultiPluginUnitTests`_ in *bin* directory which is the output of OpenVINO build
|
||||
You can run _`ov_auto_unit_tests`_ in *bin* directory which is the output of OpenVINO build
|
||||
|
||||
If you want to run a specific unit test, you can use `gtest_filter` option as follows:
|
||||
|
||||
```
|
||||
./ieMultiPluginUnitTests --gtest_filter='*filter_name*'
|
||||
./ov_auto_unit_tests --gtest_filter='*filter_name*'
|
||||
```
|
||||
|
||||
Then, you can get the result similar to:
|
||||
|
||||
```bash
|
||||
openvino/bin/intel64/Release$ ./ieMultiPluginUnitTests --gtest_filter=*AutoReleaseHelperTest*cpuLoadFailure_accelerateorLoadFailure*
|
||||
openvino/bin/intel64/Release$ ./ov_auto_unit_tests --gtest_filter=*AutoReleaseHelperTest*cpuLoadFailure_accelerateorLoadFailure*
|
||||
Running main() from /home/openvino/thirdparty/gtest/gtest/googletest/src/gtest_main.cc
|
||||
Note: Google Test filter = *AutoReleaseHelperTest*cpuLoadFailure_accelerateorLoadFailure*
|
||||
[==========] Running 1 test from 1 test suite.
|
||||
|
@ -4,33 +4,28 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include "async_infer_request.hpp"
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
AsyncInferRequest::AsyncInferRequest(const Schedule::Ptr& schedule,
|
||||
const IInferPtr& inferRequest,
|
||||
const IE::ITaskExecutor::Ptr& callbackExecutor):
|
||||
AsyncInferRequestThreadSafeDefault(inferRequest, nullptr, callbackExecutor),
|
||||
_schedule(schedule),
|
||||
_inferRequest(inferRequest) {
|
||||
auto pipeline = _schedule->GetPipeline(_inferRequest, &_workerInferRequest);
|
||||
ov::auto_plugin::AsyncInferRequest::AsyncInferRequest(const Schedule::Ptr& schedule,
|
||||
const std::shared_ptr<ov::auto_plugin::InferRequest>& request,
|
||||
const std::shared_ptr<ov::threading::ITaskExecutor>& callback_executor) :
|
||||
IAsyncInferRequest(request, nullptr, callback_executor),
|
||||
m_schedule(schedule),
|
||||
m_inferrequest(request) {
|
||||
auto pipeline = m_schedule->get_async_pipeline(m_inferrequest, &m_worker_inferrequest);
|
||||
if (pipeline.size() > 0) {
|
||||
_pipeline = std::move(pipeline);
|
||||
m_pipeline = std::move(pipeline);
|
||||
}
|
||||
}
|
||||
|
||||
void AsyncInferRequest::Infer_ThreadUnsafe() {
|
||||
InferUsingAsync();
|
||||
std::vector<ov::ProfilingInfo> ov::auto_plugin::AsyncInferRequest::get_profiling_info() const {
|
||||
check_state();
|
||||
auto scheduled_request = std::dynamic_pointer_cast<InferRequest>(m_inferrequest);
|
||||
return scheduled_request->get_profiling_info();
|
||||
}
|
||||
|
||||
std::map<std::string, IE::InferenceEngineProfileInfo>
|
||||
AsyncInferRequest::GetPerformanceCounts() const {
|
||||
CheckState();
|
||||
auto multiDeviceInfer = std::dynamic_pointer_cast<MultiDeviceInferRequest>(_inferRequest);
|
||||
return multiDeviceInfer->GetPerformanceCounts();
|
||||
void ov::auto_plugin::AsyncInferRequest::infer_thread_unsafe() {
|
||||
start_async_thread_unsafe();
|
||||
}
|
||||
|
||||
AsyncInferRequest::~AsyncInferRequest() {
|
||||
StopAndWait();
|
||||
ov::auto_plugin::AsyncInferRequest::~AsyncInferRequest() {
|
||||
stop_and_wait();
|
||||
}
|
||||
|
||||
} // namespace MultiDevicePlugin
|
||||
|
@ -8,27 +8,23 @@
|
||||
#include "schedule.hpp"
|
||||
#include "infer_request.hpp"
|
||||
|
||||
#ifdef MULTIUNITTEST
|
||||
#define MOCKTESTMACRO virtual
|
||||
#define MultiDevicePlugin MockMultiDevicePlugin
|
||||
#else
|
||||
#define MOCKTESTMACRO
|
||||
#endif
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
class AsyncInferRequest : public IE::AsyncInferRequestThreadSafeDefault {
|
||||
namespace ov {
|
||||
namespace auto_plugin {
|
||||
// ! [async_infer_request:header]
|
||||
class AsyncInferRequest : public ov::IAsyncInferRequest {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<AsyncInferRequest>;
|
||||
explicit AsyncInferRequest(const Schedule::Ptr& schedule, const IInferPtr& inferRequest,
|
||||
const IE::ITaskExecutor::Ptr& callbackExecutor);
|
||||
void Infer_ThreadUnsafe() override;
|
||||
std::map<std::string, IE::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
|
||||
~AsyncInferRequest();
|
||||
AsyncInferRequest(const Schedule::Ptr& schedule,
|
||||
const std::shared_ptr<ov::auto_plugin::InferRequest>& request,
|
||||
const std::shared_ptr<ov::threading::ITaskExecutor>& callback_executor);
|
||||
|
||||
protected:
|
||||
Schedule::Ptr _schedule;
|
||||
WorkerInferRequest* _workerInferRequest = nullptr;
|
||||
IInferPtr _inferRequest;
|
||||
~AsyncInferRequest();
|
||||
void infer_thread_unsafe() override;
|
||||
std::vector<ov::ProfilingInfo> get_profiling_info() const override;
|
||||
private:
|
||||
Schedule::Ptr m_schedule;
|
||||
WorkerInferRequest* m_worker_inferrequest = nullptr;
|
||||
ISyncInferPtr m_inferrequest;
|
||||
};
|
||||
|
||||
} // namespace MultiDevicePlugin
|
||||
} // namespace auto_plugin
|
||||
} // namespace ov
|
||||
|
252
src/plugins/auto/src/auto_compiled_model.cpp
Normal file
252
src/plugins/auto/src/auto_compiled_model.cpp
Normal file
@ -0,0 +1,252 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include "auto_compiled_model.hpp"
|
||||
#include "common.hpp"
|
||||
#include <memory>
|
||||
|
||||
#include "async_infer_request.hpp"
|
||||
#include "itt.hpp"
|
||||
#include "openvino/runtime/exec_model_info.hpp"
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
#include "plugin.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace auto_plugin {
|
||||
AutoCompiledModel::AutoCompiledModel(const std::shared_ptr<ov::Model>& model,
|
||||
const std::shared_ptr<const ov::IPlugin>& plugin,
|
||||
ScheduleContext::Ptr context,
|
||||
Schedule::Ptr scheduler)
|
||||
: CompiledModel(model, plugin, context, scheduler),
|
||||
m_model(model),
|
||||
m_context(context) {
|
||||
m_scheduler = std::dynamic_pointer_cast<AutoSchedule>(scheduler);
|
||||
}
|
||||
|
||||
void AutoCompiledModel::set_property(const ov::AnyMap& properties) {
|
||||
OPENVINO_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
std::shared_ptr<const ov::Model> AutoCompiledModel::get_runtime_model() const {
|
||||
OPENVINO_ASSERT(m_context->m_hw_compiled_model);
|
||||
return m_context->m_hw_compiled_model->get_runtime_model();
|
||||
}
|
||||
|
||||
ov::Any AutoCompiledModel::get_property(const std::string& name) const {
|
||||
const auto& add_ro_properties = [](const std::string& name, std::vector<ov::PropertyName>& properties) {
|
||||
properties.emplace_back(ov::PropertyName{name, ov::PropertyMutability::RO});
|
||||
};
|
||||
const auto& default_ro_properties = []() {
|
||||
std::vector<ov::PropertyName> ro_properties{ov::model_name,
|
||||
ov::supported_properties,
|
||||
ov::execution_devices,
|
||||
ov::hint::performance_mode,
|
||||
ov::optimal_number_of_infer_requests,
|
||||
ov::device::priorities,
|
||||
ov::device::properties,
|
||||
ov::hint::model_priority,
|
||||
ov::loaded_from_cache};
|
||||
return ro_properties;
|
||||
};
|
||||
const auto& default_rw_properties = []() {
|
||||
std::vector<ov::PropertyName> rw_properties{};
|
||||
return rw_properties;
|
||||
};
|
||||
const auto& to_string_vector = [](const std::vector<ov::PropertyName>& properties) {
|
||||
std::vector<std::string> ret;
|
||||
for (const auto& property : properties) {
|
||||
ret.emplace_back(property);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
if (name == ov::supported_properties) {
|
||||
auto ro_properties = default_ro_properties();
|
||||
auto rw_properties = default_rw_properties();
|
||||
|
||||
std::vector<ov::PropertyName> supported_properties;
|
||||
supported_properties.reserve(ro_properties.size() + rw_properties.size());
|
||||
supported_properties.insert(supported_properties.end(), ro_properties.begin(), ro_properties.end());
|
||||
supported_properties.insert(supported_properties.end(), rw_properties.begin(), rw_properties.end());
|
||||
return decltype(ov::supported_properties)::value_type(supported_properties);
|
||||
} else if (name == ov::hint::performance_mode) {
|
||||
return m_context->m_performance_hint;
|
||||
} else if (name == ov::device::priorities) {
|
||||
// device priority does not support change on-the-fly
|
||||
return decltype(ov::device::priorities)::value_type(m_context->m_str_devices);
|
||||
} else if (name == ov::device::properties) {
|
||||
ov::AnyMap all_devices = {};
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
|
||||
if (m_scheduler->m_compile_context[FALLBACKDEVICE].m_is_already) {
|
||||
all_devices = get_device_supported_properties(m_scheduler->m_compile_context[FALLBACKDEVICE]);
|
||||
}
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(m_context->m_mutex);
|
||||
if (m_scheduler->m_compile_context[ACTUALDEVICE].m_is_already) {
|
||||
all_devices = get_device_supported_properties(m_scheduler->m_compile_context[ACTUALDEVICE]);
|
||||
} else {
|
||||
all_devices = get_device_supported_properties(m_scheduler->m_compile_context[CPU]);
|
||||
}
|
||||
return all_devices;
|
||||
} else if (name == ov::hint::model_priority) {
|
||||
auto value = m_context->m_model_priority;
|
||||
if (m_context->m_ov_core->is_new_api()) {
|
||||
return value ? ((value > 1) ? ov::hint::Priority::LOW :
|
||||
ov::hint::Priority::MEDIUM) : ov::hint::Priority::HIGH;
|
||||
} else {
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
return value ? ((value > 1) ? CONFIG_VALUE(MODEL_PRIORITY_LOW) : CONFIG_VALUE(
|
||||
MODEL_PRIORITY_MED)) : CONFIG_VALUE(MODEL_PRIORITY_HIGH);
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
} else if (name == ov::optimal_number_of_infer_requests) {
|
||||
const unsigned int default_num_for_tput = 4u;
|
||||
const unsigned int default_num_for_latency = 1u;
|
||||
unsigned int real = 0;
|
||||
if (m_scheduler->m_compile_context[ACTUALDEVICE].m_is_already) {
|
||||
real = m_scheduler->m_compile_context[ACTUALDEVICE].
|
||||
m_compiled_model->get_property(name).as<unsigned int>();
|
||||
} else {
|
||||
OPENVINO_ASSERT(m_scheduler->m_compile_context[CPU].m_is_already == true);
|
||||
std::unique_lock<std::mutex> lock(m_context->m_mutex);
|
||||
auto device_info = m_scheduler->m_compile_context[ACTUALDEVICE].m_device_info;
|
||||
lock.unlock();
|
||||
unsigned int optimal_batch_size = 0;
|
||||
unsigned int requests = 0;
|
||||
bool tput_enabled_in_plugin = false;
|
||||
auto actual_dev_supported_properties = m_context->m_ov_core->get_property(device_info.device_name, ov::supported_properties);
|
||||
try {
|
||||
// for benchmark through AUTO:CPU,GPU
|
||||
// SetConfig directly set to CPU/GPU in this case
|
||||
if (std::find(actual_dev_supported_properties.begin(), actual_dev_supported_properties.end(), ov::hint::performance_mode)
|
||||
!= actual_dev_supported_properties.end())
|
||||
tput_enabled_in_plugin =
|
||||
m_context->m_ov_core->get_property(device_info.device_name,
|
||||
ov::hint::performance_mode)== ov::hint::PerformanceMode::THROUGHPUT;
|
||||
} catch (const ov::Exception&) {
|
||||
LOG_DEBUG_TAG("get_property:%s for %s", "PERF_HINT config not supported",
|
||||
device_info.device_name.c_str());
|
||||
}
|
||||
const auto& mode = device_info.config.find(ov::hint::performance_mode.name());
|
||||
if (tput_enabled_in_plugin ||
|
||||
(mode != device_info.config.end() && mode->second == ov::hint::PerformanceMode::THROUGHPUT)) {
|
||||
unsigned int upper_bound_streams_num = 0;
|
||||
if (std::find(actual_dev_supported_properties.begin(), actual_dev_supported_properties.end(), ov::range_for_streams)
|
||||
!= actual_dev_supported_properties.end()) {
|
||||
try {
|
||||
auto range_of_streams = m_context->m_ov_core->get_property(device_info.device_name,
|
||||
ov::range_for_streams);
|
||||
upper_bound_streams_num = std::get<1>(range_of_streams);
|
||||
} catch (const ov::Exception&) {
|
||||
LOG_DEBUG_TAG("get_property range_for_streams failed");
|
||||
}
|
||||
}
|
||||
if (!m_context->m_batching_disabled) {
|
||||
if (std::find(actual_dev_supported_properties.begin(), actual_dev_supported_properties.end(), ov::optimal_batch_size)
|
||||
!= actual_dev_supported_properties.end()) {
|
||||
try {
|
||||
optimal_batch_size = m_context->m_ov_core->get_property(device_info.device_name,
|
||||
ov::optimal_batch_size, {ov::hint::model(m_model)});
|
||||
LOG_DEBUG_TAG("BATCHING:%s:%ld", "optimal batch size",
|
||||
optimal_batch_size);
|
||||
} catch (const ov::Exception&) {
|
||||
LOG_DEBUG_TAG("BATCHING:%s", "property optimal_batch_size not supported");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (optimal_batch_size > 1) {
|
||||
// batching is supported with the device
|
||||
// go with auto-batching
|
||||
try {
|
||||
// check if app have set preferred value
|
||||
requests =
|
||||
m_context->m_ov_core->get_property(device_info.device_name, ov::hint::num_requests);
|
||||
const auto& reqs = device_info.config.find(ov::hint::num_requests.name());
|
||||
if (reqs != device_info.config.end()) {
|
||||
requests = reqs->second.as<unsigned int>();
|
||||
}
|
||||
LOG_DEBUG_TAG("BATCHING:%s:%ld", "user requested size", requests);
|
||||
if (!requests) { // no limitations from user
|
||||
requests = optimal_batch_size * upper_bound_streams_num * 2;
|
||||
LOG_DEBUG_TAG("BATCHING:%s:%ld", "deduced size:", requests);
|
||||
}
|
||||
} catch (const ov::Exception& iie) {
|
||||
LOG_WARNING_TAG("deduce optimal infer requset num for auto-batch failed :%s",
|
||||
iie.what());
|
||||
}
|
||||
real = (std::max)(requests, optimal_batch_size);
|
||||
} else if (device_info.device_name.find("VPUX") != std::string::npos) {
|
||||
real = 8u;
|
||||
} else {
|
||||
real = upper_bound_streams_num ? 2 * upper_bound_streams_num : default_num_for_tput;
|
||||
}
|
||||
} else {
|
||||
real = default_num_for_latency;
|
||||
}
|
||||
}
|
||||
return decltype(ov::optimal_number_of_infer_requests)::value_type {real};
|
||||
} else if (name == ov::execution_devices) {
|
||||
ov::Any execution_devices;
|
||||
auto get_execution_devices = [&execution_devices](std::string exe_devices_string) {
|
||||
std::vector<std::string> exe_devices = {};
|
||||
if (exe_devices_string == "CPU_HELP")
|
||||
exe_devices_string = "(CPU)";
|
||||
exe_devices.push_back(exe_devices_string);
|
||||
execution_devices = decltype(ov::execution_devices)::value_type {exe_devices};
|
||||
};
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_context->m_mutex);
|
||||
for (int i = 0; i < CONTEXTNUM; i++) {
|
||||
if (m_scheduler->m_compile_context[i].m_is_enabled && m_scheduler->m_compile_context[i].m_is_already) {
|
||||
if (i == 0 && !m_scheduler->m_compile_context[CPU].m_compiled_model._ptr) {
|
||||
continue;
|
||||
} else {
|
||||
get_execution_devices(m_scheduler->m_compile_context[i].m_worker_name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return execution_devices;
|
||||
} else if (name == ov::model_name) {
|
||||
std::lock_guard<std::mutex> lock(m_context->m_mutex);
|
||||
{
|
||||
if (m_scheduler->m_compile_context[CPU].m_is_enabled && m_scheduler->m_compile_context[CPU].m_is_already)
|
||||
return m_scheduler->m_compile_context[CPU].m_compiled_model->get_property(name);
|
||||
return m_scheduler->m_compile_context[ACTUALDEVICE].m_compiled_model->get_property(name);
|
||||
}
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
|
||||
auto ro_properties = default_ro_properties();
|
||||
add_ro_properties(METRIC_KEY(SUPPORTED_METRICS), ro_properties);
|
||||
add_ro_properties(METRIC_KEY(SUPPORTED_CONFIG_KEYS), ro_properties);
|
||||
return to_string_vector(ro_properties);
|
||||
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
|
||||
auto rw_properties = default_rw_properties();
|
||||
return to_string_vector(rw_properties);
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
} else if (name == ov::loaded_from_cache) {
|
||||
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
|
||||
if (m_scheduler->m_compile_context[FALLBACKDEVICE].m_is_already) {
|
||||
return m_scheduler->m_compile_context[FALLBACKDEVICE].m_compiled_model->get_property(name).as<bool>();
|
||||
}
|
||||
if (m_scheduler->m_compile_context[ACTUALDEVICE].m_is_already) {
|
||||
return m_scheduler->m_compile_context[ACTUALDEVICE].
|
||||
m_compiled_model->get_property(name).as<bool>();
|
||||
} else {
|
||||
OPENVINO_ASSERT(m_scheduler->m_compile_context[CPU].m_is_already == true);
|
||||
std::lock_guard<std::mutex> lock(m_context->m_mutex);
|
||||
return m_scheduler->m_compile_context[CPU].
|
||||
m_compiled_model->get_property(name).as<bool>();
|
||||
}
|
||||
}
|
||||
OPENVINO_THROW(get_log_tag(), ": not supported property ", name);
|
||||
}
|
||||
|
||||
void AutoCompiledModel::export_model(std::ostream& model_stream) const {
|
||||
OPENVINO_NOT_IMPLEMENTED;
|
||||
}
|
||||
} // namespace auto_plugin
|
||||
} // namespace ov
|
38
src/plugins/auto/src/auto_compiled_model.hpp
Normal file
38
src/plugins/auto/src/auto_compiled_model.hpp
Normal file
@ -0,0 +1,38 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
|
||||
#include "compiled_model.hpp"
|
||||
#include "auto_schedule.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace auto_plugin {
|
||||
|
||||
class AutoCompiledModel : public CompiledModel {
|
||||
public:
|
||||
AutoCompiledModel(const std::shared_ptr<ov::Model>& model,
|
||||
const std::shared_ptr<const ov::IPlugin>& plugin,
|
||||
ScheduleContext::Ptr context,
|
||||
Schedule::Ptr scheduler);
|
||||
|
||||
// implement pure virtual methods from a base class ov::ICompiledModel
|
||||
void export_model(std::ostream& model) const override;
|
||||
|
||||
std::shared_ptr<const ov::Model> get_runtime_model() const override;
|
||||
|
||||
void set_property(const ov::AnyMap& properties) override;
|
||||
|
||||
ov::Any get_property(const std::string& name) const override;
|
||||
|
||||
private:
|
||||
friend class InferRequest;
|
||||
friend class Plugin;
|
||||
std::shared_ptr<ov::Model> m_model;
|
||||
ScheduleContext::Ptr m_context;
|
||||
AutoSchedule::Ptr m_scheduler;
|
||||
};
|
||||
} // namespace auto_plugin
|
||||
} // namespace ov
|
@ -1,283 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include <ie_metric_helpers.hpp>
|
||||
#include "ie_performance_hints.hpp"
|
||||
#include "auto_executable_network.hpp"
|
||||
|
||||
// ------------------------------AutoExecutableNetwork----------------------------
|
||||
//
|
||||
namespace MultiDevicePlugin {
|
||||
AutoExecutableNetwork::AutoExecutableNetwork(AutoScheduleContext::Ptr& context, const AutoSchedule::Ptr& schedule)
|
||||
:ExecutableNetwork(schedule, context),
|
||||
_autoSContext(context),
|
||||
_autoSchedule(schedule) {
|
||||
}
|
||||
|
||||
std::shared_ptr<IE::RemoteContext> AutoExecutableNetwork::GetContext() const {
|
||||
if (_autoSchedule->_pCTPUTLoadContext) {
|
||||
for (size_t i = 0; i < _autoSchedule->_nCTputDeviceNums; i++) {
|
||||
if (_autoSchedule->_pCTPUTLoadContext[i].isAlready) {
|
||||
return _autoSchedule->_pCTPUTLoadContext[i].executableNetwork->GetContext();
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
} else {
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
|
||||
if (_autoSchedule->_loadContext[FALLBACKDEVICE].isAlready) {
|
||||
return _autoSchedule->_loadContext[FALLBACKDEVICE].executableNetwork->GetContext();
|
||||
} else {
|
||||
_autoSchedule->WaitActualNetworkReady();
|
||||
return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetContext();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AutoExecutableNetwork::SetConfig(const std::map<std::string, IE::Parameter>
|
||||
& config) {
|
||||
IE_THROW(NotImplemented);
|
||||
}
|
||||
|
||||
IE::Parameter AutoExecutableNetwork::GetConfig(const std::string& name) const {
|
||||
IE_THROW(NotFound) << name << " not found in the ExecutableNetwork config";
|
||||
}
|
||||
|
||||
IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
|
||||
if (name == ov::supported_properties) {
|
||||
return decltype(ov::supported_properties)::value_type {
|
||||
// Metrics
|
||||
ov::PropertyName{ov::supported_properties.name(), ov::PropertyMutability::RO},
|
||||
ov::PropertyName{ov::hint::performance_mode.name(), ov::PropertyMutability::RO},
|
||||
ov::PropertyName{ov::model_name.name(), ov::PropertyMutability::RO},
|
||||
ov::PropertyName{ov::optimal_number_of_infer_requests.name(), ov::PropertyMutability::RO},
|
||||
ov::PropertyName{ov::hint::model_priority.name(), ov::PropertyMutability::RO},
|
||||
ov::PropertyName{ov::device::priorities.name(), ov::PropertyMutability::RO},
|
||||
ov::PropertyName{ov::device::properties.name(), ov::PropertyMutability::RO},
|
||||
ov::PropertyName{ov::execution_devices.name(), ov::PropertyMutability::RO}};
|
||||
} else if (name == ov::hint::performance_mode) {
|
||||
auto value = _autoSContext->_performanceHint;
|
||||
if (!_autoSContext->_core->isNewAPI())
|
||||
return value;
|
||||
if (value == InferenceEngine::PluginConfigParams::THROUGHPUT) {
|
||||
return ov::hint::PerformanceMode::THROUGHPUT;
|
||||
} else if (value == InferenceEngine::PluginConfigParams::LATENCY) {
|
||||
return ov::hint::PerformanceMode::LATENCY;
|
||||
} else if (value == InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT) {
|
||||
return ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT;
|
||||
} else if (value == "UNDEFINED") {
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
return ov::hint::PerformanceMode::UNDEFINED;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
} else {
|
||||
OPENVINO_THROW("Unsupported value of ov::hint::PerformanceMode");
|
||||
}
|
||||
} else if (name == ov::device::priorities) {
|
||||
auto value = _autoSContext->_config.find(ov::device::priorities.name());
|
||||
return decltype(ov::device::priorities)::value_type {value->second.as<std::string>()};
|
||||
} else if (name == ov::device::properties) {
|
||||
ov::AnyMap all_devices = {};
|
||||
auto get_device_supported_metrics = [&all_devices] (const AutoLoadContext& context) {
|
||||
ov::AnyMap device_properties = {};
|
||||
auto device_supported_metrics = context.executableNetwork->GetMetric(METRIC_KEY(SUPPORTED_METRICS));
|
||||
for (auto&& property_name : device_supported_metrics.as<std::vector<std::string>>()) {
|
||||
device_properties[property_name] = context.executableNetwork->GetMetric(property_name);
|
||||
}
|
||||
auto device_supported_configs = context.executableNetwork->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
||||
for (auto&& property_name : device_supported_configs.as<std::vector<std::string>>()) {
|
||||
device_properties[property_name] = context.executableNetwork->GetConfig(property_name);
|
||||
}
|
||||
all_devices[context.deviceInfo.deviceName] = device_properties;
|
||||
};
|
||||
if (_autoSchedule->_pCTPUTLoadContext) {
|
||||
for (size_t i = 0; i < _autoSchedule->_nCTputDeviceNums; i++) {
|
||||
if (_autoSchedule->_pCTPUTLoadContext[i].isAlready) {
|
||||
get_device_supported_metrics(_autoSchedule->_pCTPUTLoadContext[i]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
|
||||
if (_autoSchedule->_loadContext[FALLBACKDEVICE].isAlready) {
|
||||
get_device_supported_metrics(_autoSchedule->_loadContext[FALLBACKDEVICE]);
|
||||
}
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
|
||||
if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) {
|
||||
get_device_supported_metrics(_autoSchedule->_loadContext[ACTUALDEVICE]);
|
||||
} else {
|
||||
get_device_supported_metrics(_autoSchedule->_loadContext[CPU]);
|
||||
}
|
||||
}
|
||||
return all_devices;
|
||||
} else if (name == ov::hint::model_priority) {
|
||||
auto value = _autoSContext->_modelPriority;
|
||||
if (_autoSContext->_core->isNewAPI()) {
|
||||
return value ? ((value > 1) ? ov::hint::Priority::LOW :
|
||||
ov::hint::Priority::MEDIUM) : ov::hint::Priority::HIGH;
|
||||
} else {
|
||||
return value ? ((value > 1) ? CONFIG_VALUE(MODEL_PRIORITY_LOW) : CONFIG_VALUE(
|
||||
MODEL_PRIORITY_MED)) : CONFIG_VALUE(MODEL_PRIORITY_HIGH);
|
||||
}
|
||||
} else if (name == ov::optimal_number_of_infer_requests) {
|
||||
const unsigned int defaultNumForTPUT = 4u;
|
||||
const unsigned int defaultNumForLatency = 1u;
|
||||
unsigned int real = 0;
|
||||
if (_autoSchedule->_pCTPUTLoadContext) {
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
|
||||
unsigned int res = 0u;
|
||||
for (size_t i = 0; i < _autoSchedule->_nCTputDeviceNums; i++) {
|
||||
try {
|
||||
if (_autoSchedule->_pCTPUTLoadContext[i].isAlready) {
|
||||
res += (_autoSchedule->_pCTPUTLoadContext[i])
|
||||
.executableNetwork->GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))
|
||||
.as<unsigned int>();
|
||||
}
|
||||
} catch (const IE::Exception& iie) {
|
||||
IE_THROW()
|
||||
<< "Every device used in cumulative mode should "
|
||||
<< "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. "
|
||||
<< "Failed to query the metric for with error:" <<
|
||||
iie.what();
|
||||
}
|
||||
}
|
||||
return decltype(ov::optimal_number_of_infer_requests)::value_type {res};
|
||||
}
|
||||
if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) {
|
||||
real = _autoSchedule->_loadContext[ACTUALDEVICE].
|
||||
executableNetwork->GetMetric(name).as<unsigned int>();
|
||||
} else {
|
||||
IE_ASSERT(_autoSchedule->_loadContext[CPU].isAlready == true);
|
||||
std::unique_lock<std::mutex> lock(_autoSContext->_confMutex);
|
||||
auto deviceInfo = _autoSchedule->_loadContext[ACTUALDEVICE].deviceInfo;
|
||||
lock.unlock();
|
||||
unsigned int optimalBatchSize = 0;
|
||||
unsigned int requests = 0;
|
||||
bool bThroughputEnabledInPlugin = false;
|
||||
try {
|
||||
// for benchmark through AUTO:CPU,GPU
|
||||
// SetConfig directly set to CPU/GPU in this case
|
||||
bThroughputEnabledInPlugin =
|
||||
_autoSContext->_core->GetConfig(deviceInfo.deviceName,
|
||||
CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>() == CONFIG_VALUE(THROUGHPUT);
|
||||
} catch (const IE::Exception&) {
|
||||
LOG_DEBUG_TAG("GetMetric:%s for %s", "PERF_HINT config not supported",
|
||||
deviceInfo.deviceName.c_str());
|
||||
}
|
||||
const auto& mode = deviceInfo.config.find(CONFIG_KEY(PERFORMANCE_HINT));
|
||||
if (bThroughputEnabledInPlugin ||
|
||||
(mode != deviceInfo.config.end() && mode->second == CONFIG_VALUE(THROUGHPUT))) {
|
||||
unsigned int upperBoundStreamsNum = 0;
|
||||
std::map<std::string, IE::Parameter> options;
|
||||
options["MODEL_PTR"] = std::const_pointer_cast<ngraph::Function>
|
||||
(_autoSContext->_network.getFunction());
|
||||
try {
|
||||
auto rangeOfStreams = _autoSContext->_core->GetMetric(deviceInfo.deviceName,
|
||||
METRIC_KEY(RANGE_FOR_STREAMS),
|
||||
options).as<std::tuple<unsigned int, unsigned int>>();
|
||||
upperBoundStreamsNum = std::get<1>(rangeOfStreams);
|
||||
} catch (const IE::Exception&) {
|
||||
LOG_DEBUG_TAG("GetMetric RANGE_FOR_STREAMS failed");
|
||||
}
|
||||
if (!_autoSContext->_batchingDisabled) {
|
||||
try {
|
||||
optimalBatchSize = _autoSContext->_core->GetMetric(deviceInfo.deviceName,
|
||||
METRIC_KEY(OPTIMAL_BATCH_SIZE), options).as<unsigned int>();
|
||||
LOG_DEBUG_TAG("BATCHING:%s:%ld", "optimal batch size",
|
||||
optimalBatchSize);
|
||||
} catch (const IE::Exception&) {
|
||||
LOG_DEBUG_TAG("BATCHING:%s", "metric OPTIMAL_BATCH_SIZE not supported");
|
||||
}
|
||||
}
|
||||
if (optimalBatchSize > 1) {
|
||||
// batching is supported with the device
|
||||
// go with auto-batching
|
||||
try {
|
||||
// check if app have set preferred value
|
||||
auto res =
|
||||
_autoSContext->_core->GetConfig(deviceInfo.deviceName,
|
||||
CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as<std::string>();
|
||||
requests = IE::PerfHintsConfig::CheckPerformanceHintRequestValue(res);
|
||||
const auto& reqs = deviceInfo.config.find(CONFIG_KEY(
|
||||
PERFORMANCE_HINT_NUM_REQUESTS));
|
||||
if (reqs != deviceInfo.config.end()) {
|
||||
requests = static_cast<unsigned int>
|
||||
(IE::PerfHintsConfig::CheckPerformanceHintRequestValue(reqs->second));
|
||||
}
|
||||
LOG_DEBUG_TAG("BATCHING:%s:%ld", "user requested size", requests);
|
||||
if (!requests) { // no limitations from user
|
||||
requests = optimalBatchSize * upperBoundStreamsNum * 2;
|
||||
LOG_DEBUG_TAG("BATCHING:%s:%ld", "deduced size:", requests);
|
||||
}
|
||||
} catch (const IE::Exception& iie) {
|
||||
LOG_WARNING_TAG("deduce optimal infer requset num for auto-batch failed :%s",
|
||||
iie.what());
|
||||
}
|
||||
real = (std::max)(requests, optimalBatchSize);
|
||||
} else if (deviceInfo.deviceName.find("VPUX") != std::string::npos) {
|
||||
real = 8u;
|
||||
} else {
|
||||
real = upperBoundStreamsNum ? 2 * upperBoundStreamsNum : defaultNumForTPUT;
|
||||
}
|
||||
} else {
|
||||
real = defaultNumForLatency;
|
||||
}
|
||||
}
|
||||
return decltype(ov::optimal_number_of_infer_requests)::value_type {real};
|
||||
} else if (name == ov::execution_devices) {
|
||||
ov::Any execution_devices;
|
||||
auto GetExecutionDevices = [&execution_devices](std::string ExeDevicesString) {
|
||||
std::vector<std::string> exeDevices = {};
|
||||
if (ExeDevicesString == "CPU_HELP")
|
||||
ExeDevicesString = "(CPU)";
|
||||
exeDevices.push_back(ExeDevicesString);
|
||||
execution_devices = decltype(ov::execution_devices)::value_type {exeDevices};
|
||||
};
|
||||
if (_autoSchedule->_pCTPUTLoadContext) {
|
||||
std::vector<std::string> exeDevices = {};
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
|
||||
for (auto const & n : _autoSContext->_devicePriorities) {
|
||||
exeDevices.push_back(n.deviceName);
|
||||
}
|
||||
execution_devices = decltype(ov::execution_devices)::value_type {exeDevices};
|
||||
} else {
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
|
||||
for (int i = 0; i < CONTEXTNUM; i++) {
|
||||
if (_autoSchedule->_loadContext[i].isEnabled && _autoSchedule->_loadContext[i].isAlready) {
|
||||
if (i == 0 && !_autoSchedule->_loadContext[CPU].executableNetwork._ptr) {
|
||||
continue;
|
||||
} else {
|
||||
GetExecutionDevices(_autoSchedule->_loadContext[i].workName);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return execution_devices;
|
||||
} else if (name == ov::model_name) {
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
|
||||
if (_autoSchedule->_pCTPUTLoadContext) {
|
||||
for (size_t i = 0; i < _autoSchedule->_nCTputDeviceNums; i++) {
|
||||
if (_autoSchedule->_pCTPUTLoadContext[i].isAlready) {
|
||||
return _autoSchedule->_pCTPUTLoadContext[i].executableNetwork->GetMetric(name);
|
||||
}
|
||||
}
|
||||
IE_THROW() << "No valid executable network found to get" << name;
|
||||
} else {
|
||||
if (_autoSchedule->_loadContext[CPU].isEnabled && _autoSchedule->_loadContext[CPU].isAlready)
|
||||
return _autoSchedule->_loadContext[CPU].executableNetwork->GetMetric(name);
|
||||
return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name);
|
||||
}
|
||||
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
|
||||
IE_SET_METRIC_RETURN(SUPPORTED_METRICS,
|
||||
{METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS),
|
||||
METRIC_KEY(SUPPORTED_METRICS),
|
||||
METRIC_KEY(NETWORK_NAME),
|
||||
METRIC_KEY(SUPPORTED_CONFIG_KEYS)});
|
||||
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
|
||||
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, {});
|
||||
}
|
||||
IE_THROW() << "Unsupported metric key: " << name;
|
||||
}
|
||||
} // namespace MultiDevicePlugin
|
@ -1,39 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
#include "auto_schedule.hpp"
|
||||
#include "executable_network.hpp"
|
||||
|
||||
#ifdef MULTIUNITTEST
|
||||
#define MOCKTESTMACRO virtual
|
||||
#define MultiDevicePlugin MockMultiDevicePlugin
|
||||
#else
|
||||
#define MOCKTESTMACRO
|
||||
#endif
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
class AutoExecutableNetwork : public ExecutableNetwork {
|
||||
friend IInferPtr AutoSchedule::CreateInferRequest();
|
||||
public:
|
||||
using Ptr = std::shared_ptr<AutoExecutableNetwork>;
|
||||
|
||||
explicit AutoExecutableNetwork(AutoScheduleContext::Ptr& context, const AutoSchedule::Ptr& schedule);
|
||||
|
||||
void SetConfig(const std::map<std::string, IE::Parameter>& config) override;
|
||||
IE::Parameter GetConfig(const std::string& name) const override;
|
||||
IE::Parameter GetMetric(const std::string& name) const override;
|
||||
std::shared_ptr<IE::RemoteContext> GetContext() const override;
|
||||
virtual ~AutoExecutableNetwork() = default;
|
||||
|
||||
private:
|
||||
AutoScheduleContext::Ptr _autoSContext;
|
||||
AutoSchedule::Ptr _autoSchedule;
|
||||
};
|
||||
} // namespace MultiDevicePlugin
|
File diff suppressed because it is too large
Load Diff
@ -6,109 +6,38 @@
|
||||
#pragma once
|
||||
|
||||
#include "schedule.hpp"
|
||||
#include "async_infer_request.hpp"
|
||||
|
||||
#ifdef MULTIUNITTEST
|
||||
#define MOCKTESTMACRO virtual
|
||||
#define MultiDevicePlugin MockMultiDevicePlugin
|
||||
#else
|
||||
#define MOCKTESTMACRO
|
||||
#endif
|
||||
namespace MultiDevicePlugin {
|
||||
struct ThisRequestExecutor : public IE::ITaskExecutor {
|
||||
explicit ThisRequestExecutor(WorkerInferRequest** ptr, MultiImmediateExecutor::Ptr executor = nullptr): _workptrptr{ptr}, _fallbackExec(executor) {}
|
||||
void run(IE::Task task) override {
|
||||
(*_workptrptr)->_task = std::move(task);
|
||||
(*_workptrptr)->_fallbackExec = _fallbackExec;
|
||||
(*_workptrptr)->_inferRequest->StartAsync();
|
||||
};
|
||||
WorkerInferRequest** _workptrptr = nullptr;
|
||||
MultiImmediateExecutor::Ptr _fallbackExec;
|
||||
};
|
||||
struct AutoLoadContext {
|
||||
std::atomic<bool> isEnabled = {false};
|
||||
std::atomic<bool> isAlready = {false};
|
||||
std::atomic<bool> isLoadSuccess = {false};
|
||||
std::atomic<bool> isReloadSuccess = {false};
|
||||
std::future<void> future;
|
||||
std::promise<void> promise;
|
||||
SoExecNetwork executableNetwork;
|
||||
DeviceInformation deviceInfo;
|
||||
std::vector<DeviceInformation> metaDevices;
|
||||
std::string networkPrecision;
|
||||
std::string errMessage;
|
||||
IE::Task task;
|
||||
//ACTUALDEVICE's workName is same as it's deviceName,
|
||||
//CPU_HELP's workName is "CPU_HELP", and deviceName is "CPU"
|
||||
//add workName is because of ACTUALDEVICE and CPU maybe all CPU,
|
||||
//they can't use the same workerQueue
|
||||
std::string workName = "";
|
||||
};
|
||||
namespace ov {
|
||||
namespace auto_plugin {
|
||||
|
||||
enum AutoLoadContextIndex {
|
||||
CPU = 0,
|
||||
ACTUALDEVICE = 1,
|
||||
FALLBACKDEVICE = 2,
|
||||
CONTEXTNUM = 3
|
||||
};
|
||||
class AutoSchedule : public Schedule, public IE::ITaskExecutor {
|
||||
class AutoSchedule : public Schedule {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<AutoSchedule>;
|
||||
void init(const ScheduleContext::Ptr& sContext) override;
|
||||
IInferPtr CreateInferRequest() override;
|
||||
IInferPtr CreateInferRequestImpl(IE::InputsDataMap networkInputs, IE::OutputsDataMap networkOutputs) override;
|
||||
IInferPtr CreateInferRequestImpl(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs) override;
|
||||
void run(IE::Task inferTask) override;
|
||||
Pipeline GetPipeline(const IInferPtr& syncRequestImpl, WorkerInferRequest** WorkerInferRequest) override;
|
||||
void WaitActualNetworkReady() const;
|
||||
virtual ~AutoSchedule();
|
||||
|
||||
public:
|
||||
static thread_local WorkerInferRequest* _thisWorkerInferRequest;
|
||||
// have to use the const char* ptr rather than std::string due to a bug in old gcc versions,
|
||||
// the bug is e.g. manifesting on the old CentOS (and it's 4.8.x gcc) used in our testing
|
||||
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81880
|
||||
static thread_local const char* _thisPreferredDeviceName;
|
||||
AutoLoadContext _loadContext[CONTEXTNUM];
|
||||
std::unique_ptr<AutoLoadContext[]> _pCTPUTLoadContext = nullptr;
|
||||
size_t _nCTputDeviceNums = 0;
|
||||
|
||||
protected:
|
||||
void GenerateWorkers(const std::string& device, const SoExecNetwork& executableNetwork);
|
||||
bool ScheduleToWorkerInferRequest(IE::Task, DeviceName preferred_device = "");
|
||||
static bool RunPipelineTask(IE::Task& inferPipelineTask, NotBusyPriorityWorkerRequests& idleWorkerRequests,
|
||||
const DeviceName& preferred_device);
|
||||
std::string GetLogTag() const noexcept;
|
||||
DeviceMap<NotBusyPriorityWorkerRequests> _idleWorkerRequests;
|
||||
AutoScheduleContext::Ptr _autoSContext;
|
||||
std::atomic_size_t _numRequestsCreated = {0};
|
||||
DeviceMap<std::vector<WorkerInferRequest>> _workerRequests;
|
||||
AutoCompileContext m_compile_context[CONTEXTNUM];
|
||||
|
||||
private:
|
||||
void init() override;
|
||||
// release actual task
|
||||
// ov::threading::Task release_actualdevice_task;
|
||||
bool schedule_to_worker_infer_request(ov::threading::Task, DeviceName preferred_device = "") override;
|
||||
void wait_actual_compiled_model_ready() const;
|
||||
/**
|
||||
* @brief wait for one of the executable network to finish loading.
|
||||
* @return An SoPtr object hold an available executable network loaded to HW device.
|
||||
* @note An exception will be thrown if all loading of network to hw device fails.
|
||||
* @brief wait for one of the compiled model to finish loading.
|
||||
* @return An SoPtr object hold an available compiled model loaded to HW device.
|
||||
* @note An exception will be thrown if all loading of model to hw device fails.
|
||||
*/
|
||||
SoExecNetwork WaitFirstNetworkReady();
|
||||
void TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network, bool isCumulative);
|
||||
bool selectOtherDevice(const std::string& currentDeviceName);
|
||||
IE::Task releaseActualdeviceTask;
|
||||
|
||||
private:
|
||||
IE::ThreadSafeQueue<IE::Task> _inferPipelineTasks;
|
||||
DeviceMap<std::unique_ptr<IE::ThreadSafeQueue<IE::Task>>> _inferPipelineTasksDeviceSpecific;
|
||||
SoExecNetwork _passthroughExeNet;
|
||||
Time _cpuHelpReleaseTime;
|
||||
size_t _cpuHelpInferCount = 0;
|
||||
double _cpuHelpFps = 0.0;
|
||||
std::string _LogTag;
|
||||
IE::IStreamsExecutor::Ptr _executor;
|
||||
mutable std::once_flag _oc;
|
||||
std::once_flag _firstLoadOC;
|
||||
std::future<void> _firstLoadFuture;
|
||||
std::promise<void> _firstLoadPromise;
|
||||
bool _exitFlag = {false};
|
||||
SoCompiledModel wait_first_compiled_model_ready() override;
|
||||
void try_to_compile_model(AutoCompileContext& context, const std::shared_ptr<ov::Model>& model) override;
|
||||
bool select_other_device(const std::string& cur_dev_name) override;
|
||||
size_t m_cpuhelp_infer_count = 0;
|
||||
double m_cpuhelp_fps = 0.0;
|
||||
mutable std::once_flag m_oc;
|
||||
std::once_flag m_firstload_oc;
|
||||
std::future<void> m_firstload_future;
|
||||
std::promise<void> m_firstload_promise;
|
||||
bool m_exitflag = {false};
|
||||
};
|
||||
|
||||
} // namespace MultiDevicePlugin
|
||||
} // namespace auto_plugin
|
||||
} // namespace ov
|
||||
|
@ -1,98 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include "async_infer_request.hpp"
|
||||
#include "plugin.hpp"
|
||||
#include "bind_multi_schedule.hpp"
|
||||
// ------------------------------MultiSchedule----------------------------
|
||||
namespace MultiDevicePlugin {
|
||||
|
||||
void BinderMultiSchedule::init(const ScheduleContext::Ptr& sContext) {
|
||||
AutoSchedule::init(sContext);
|
||||
LOG_INFO_TAG("enable bind buffer for AUTO");
|
||||
}
|
||||
|
||||
Pipeline BinderMultiSchedule::GetPipeline(const IInferPtr& syncInferRequest, WorkerInferRequest** workerInferRequest) {
|
||||
Pipeline pipeline;
|
||||
struct RequestExecutor : ITaskExecutor {
|
||||
explicit RequestExecutor(InferenceEngine::SoIInferRequestInternal& inferRequest) : _inferRequest(inferRequest) {
|
||||
_inferRequest->SetCallback([this](std::exception_ptr exceptionPtr) mutable {
|
||||
_exceptionPtr = exceptionPtr;
|
||||
auto capturedTask = std::move(_task);
|
||||
capturedTask();
|
||||
});
|
||||
}
|
||||
void run(InferenceEngine::Task task) override {
|
||||
_task = std::move(task);
|
||||
_inferRequest->StartAsync();
|
||||
};
|
||||
InferenceEngine::SoIInferRequestInternal& _inferRequest;
|
||||
std::exception_ptr _exceptionPtr;
|
||||
InferenceEngine::Task _task;
|
||||
};
|
||||
auto requestExecutor = std::make_shared<RequestExecutor>(
|
||||
std::static_pointer_cast<MultiDeviceInferRequest>(syncInferRequest)->GetSharedRequest());
|
||||
pipeline.emplace_back(requestExecutor, [requestExecutor] {
|
||||
if (nullptr != requestExecutor->_exceptionPtr) {
|
||||
std::rethrow_exception(requestExecutor->_exceptionPtr);
|
||||
}
|
||||
});
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
BinderMultiSchedule::~BinderMultiSchedule() {
|
||||
}
|
||||
|
||||
IInferPtr BinderMultiSchedule::CreateInferRequestImpl(
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs) {
|
||||
auto num = _numRequestsCreated++;
|
||||
size_t sum = 0;
|
||||
SoInfer request_to_share_blobs_with;
|
||||
// borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests
|
||||
// this allows to potentially save on the data-copy later (if the requests are scheduled in the same order)
|
||||
for (const auto& device : _autoSContext->_devicePrioritiesInitial) {
|
||||
auto& dev_requests = _workerRequests[device.deviceName];
|
||||
if ((num - sum) < dev_requests.size()) {
|
||||
request_to_share_blobs_with = dev_requests.at(num - sum)._inferRequest;
|
||||
break;
|
||||
}
|
||||
sum += dev_requests.size();
|
||||
}
|
||||
if (!request_to_share_blobs_with) {
|
||||
IE_THROW() <<
|
||||
"binder mode does not allow oversubsciption of infer requests"
|
||||
" please use optimal infer request";
|
||||
}
|
||||
auto syncImpl = std::make_shared<MultiDeviceInferRequest>(inputs, outputs, request_to_share_blobs_with);
|
||||
return syncImpl;
|
||||
}
|
||||
|
||||
IInferPtr BinderMultiSchedule::CreateInferRequestImpl(IE::InputsDataMap networkInputs,
|
||||
IE::OutputsDataMap networkOutputs) {
|
||||
auto num = _numRequestsCreated++;
|
||||
SoInfer request_to_share_blobs_with;
|
||||
size_t sum = 0;
|
||||
// borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests
|
||||
// this allows to potentially save on the data-copy later (if the requests are scheduled in the same order)
|
||||
for (const auto& device : _autoSContext->_devicePrioritiesInitial) {
|
||||
auto& dev_requests = _workerRequests[device.deviceName];
|
||||
if ((num - sum) < dev_requests.size()) {
|
||||
request_to_share_blobs_with = dev_requests.at(num - sum)._inferRequest;
|
||||
break;
|
||||
}
|
||||
sum += dev_requests.size();
|
||||
}
|
||||
if (!request_to_share_blobs_with) {
|
||||
IE_THROW() <<
|
||||
"binder mode does not allow oversubsciption of infer requests"
|
||||
" please use optimal infer request";
|
||||
}
|
||||
auto syncImpl = std::make_shared<MultiDeviceInferRequest>(networkInputs, networkOutputs, request_to_share_blobs_with);
|
||||
return syncImpl;
|
||||
}
|
||||
|
||||
} // namespace MultiDevicePlugin
|
||||
|
@ -1,28 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
|
||||
#include "auto_schedule.hpp"
|
||||
|
||||
#ifdef MULTIUNITTEST
|
||||
#define MOCKTESTMACRO virtual
|
||||
#define MultiDevicePlugin MockMultiDevicePlugin
|
||||
#else
|
||||
#define MOCKTESTMACRO
|
||||
#endif
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
class BinderMultiSchedule : public AutoSchedule {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<BinderMultiSchedule>;
|
||||
IInferPtr CreateInferRequestImpl(IE::InputsDataMap networkInputs, IE::OutputsDataMap networkOutputs) override;
|
||||
IE::IInferRequestInternal::Ptr CreateInferRequestImpl(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs) override;
|
||||
void init(const ScheduleContext::Ptr& sContext) override;
|
||||
Pipeline GetPipeline(const IInferPtr& syncRequestImpl, WorkerInferRequest** WorkerInferRequest) override;
|
||||
virtual ~BinderMultiSchedule();
|
||||
};
|
||||
} // namespace MultiDevicePlugin
|
@ -10,14 +10,13 @@
|
||||
#include "ie_icore.hpp"
|
||||
#include "ie_metric_helpers.hpp"
|
||||
#include <ie_plugin_config.hpp>
|
||||
#include "cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp"
|
||||
#include "threading/ie_executor_manager.hpp"
|
||||
#include "threading/ie_immediate_executor.hpp"
|
||||
#include "threading/ie_istreams_executor.hpp"
|
||||
#include "threading/ie_itask_executor.hpp"
|
||||
#include "threading/ie_thread_safe_containers.hpp"
|
||||
#include "openvino/runtime/icompiled_model.hpp"
|
||||
#include "openvino/runtime/isync_infer_request.hpp"
|
||||
#include "openvino/runtime/iasync_infer_request.hpp"
|
||||
#include "openvino/runtime/threading/itask_executor.hpp"
|
||||
#include "openvino/runtime/remote_tensor.hpp"
|
||||
#include "openvino/runtime/threading/thread_safe_containers.hpp"
|
||||
#include "utils/log_util.hpp"
|
||||
#include <ie_performance_hints.hpp>
|
||||
#include "openvino/runtime/auto/properties.hpp"
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
@ -26,191 +25,226 @@
|
||||
|
||||
#ifdef MULTIUNITTEST
|
||||
#define MOCKTESTMACRO virtual
|
||||
#define MultiDevicePlugin MockMultiDevicePlugin
|
||||
#define auto_plugin mock_auto_plugin
|
||||
#else
|
||||
#define MOCKTESTMACRO
|
||||
#endif
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
namespace IE = InferenceEngine;
|
||||
namespace ov {
|
||||
namespace auto_plugin {
|
||||
using DeviceName = std::string;
|
||||
using IInferPtr = IE::IInferRequestInternal::Ptr;
|
||||
using IExecNetwork = IE::IExecutableNetworkInternal;
|
||||
using SoInfer = IE::SoIInferRequestInternal;
|
||||
using SoExecNetwork = IE::SoExecutableNetworkInternal;
|
||||
using IASyncInferPtr = std::shared_ptr<ov::IAsyncInferRequest>;
|
||||
using ISyncInferPtr = std::shared_ptr<ov::ISyncInferRequest>;
|
||||
using SoAsyncInferRequest = ov::SoPtr<ov::IAsyncInferRequest>;
|
||||
using SoCompiledModel = ov::SoPtr<ov::ICompiledModel>;
|
||||
using Time = std::chrono::time_point<std::chrono::steady_clock>;
|
||||
using Stage = std::pair<std::shared_ptr<ov::threading::ITaskExecutor>, ov::threading::Task>;
|
||||
using Pipeline = std::vector<Stage>;
|
||||
|
||||
template<typename T>
|
||||
using DeviceMap = std::unordered_map<DeviceName, T>;
|
||||
|
||||
struct MultiImmediateExecutor : public IE::ITaskExecutor {
|
||||
// Bell to do, check if needed, or just use immediate exectutor is enough
|
||||
struct AutoImmediateExecutor : public ov::threading::ITaskExecutor {
|
||||
public:
|
||||
/**
|
||||
* @brief A shared pointer to a ImmediateExecutor object
|
||||
*/
|
||||
using Ptr = std::shared_ptr<MultiImmediateExecutor>;
|
||||
using Ptr = std::shared_ptr<AutoImmediateExecutor>;
|
||||
|
||||
/**
|
||||
* @brief Destroys the object.
|
||||
*/
|
||||
~MultiImmediateExecutor() override = default;
|
||||
~AutoImmediateExecutor() override = default;
|
||||
|
||||
void run(IE::Task task) override {
|
||||
_task = std::move(task);
|
||||
_task();
|
||||
void run(ov::threading::Task task) override {
|
||||
immediate_task = std::move(task);
|
||||
immediate_task();
|
||||
}
|
||||
IE::Task _task;
|
||||
};
|
||||
|
||||
struct DeviceInformation {
|
||||
DeviceName deviceName;
|
||||
std::map<std::string, std::string> config;
|
||||
int numRequestsPerDevices;
|
||||
std::string defaultDeviceID;
|
||||
DeviceName uniqueName;
|
||||
unsigned int devicePriority;
|
||||
DeviceInformation(DeviceName dn = {}, std::map<std::string, std::string> conf = {},
|
||||
int nReq = -1, std::string defaultID = {}, DeviceName uName = {}, unsigned int priority = 0)
|
||||
: deviceName(dn), config(conf),
|
||||
numRequestsPerDevices(nReq), defaultDeviceID(defaultID), uniqueName(uName), devicePriority(priority)
|
||||
{}
|
||||
ov::threading::Task immediate_task;
|
||||
};
|
||||
|
||||
struct WorkerInferRequest {
|
||||
SoInfer _inferRequest;
|
||||
IE::Task _task;
|
||||
std::exception_ptr _exceptionPtr = nullptr;
|
||||
std::list<Time> _startTimes;
|
||||
std::list<Time> _endTimes;
|
||||
int _index = 0;
|
||||
MultiImmediateExecutor::Ptr _fallbackExec;
|
||||
SoAsyncInferRequest m_inferrequest;
|
||||
ov::threading::Task m_task;
|
||||
std::exception_ptr m_exception_ptr = nullptr;
|
||||
std::list<Time> m_start_times;
|
||||
std::list<Time> m_end_times;
|
||||
int m_index = 0;
|
||||
AutoImmediateExecutor::Ptr m_fallback_exec;
|
||||
};
|
||||
|
||||
struct ThisRequestExecutor : public ov::threading::ITaskExecutor {
|
||||
explicit ThisRequestExecutor(WorkerInferRequest** ptr, AutoImmediateExecutor::Ptr executor = nullptr): m_workptrptr{ptr}, m_fallback_exec(executor) {}
|
||||
void run(ov::threading::Task task) override {
|
||||
(*m_workptrptr)->m_task = std::move(task);
|
||||
(*m_workptrptr)->m_fallback_exec = m_fallback_exec;
|
||||
(*m_workptrptr)->m_inferrequest->start_async();
|
||||
};
|
||||
WorkerInferRequest** m_workptrptr = nullptr;
|
||||
AutoImmediateExecutor::Ptr m_fallback_exec;
|
||||
};
|
||||
|
||||
struct DeviceInformation {
|
||||
DeviceName device_name;
|
||||
ov::AnyMap config;
|
||||
int num_requests_per_devices;
|
||||
std::string default_device_id;
|
||||
DeviceName unique_name;
|
||||
unsigned int device_priority;
|
||||
DeviceInformation(DeviceName dn = {}, ov::AnyMap conf = {},
|
||||
int nReq = -1, std::string defaultID = {}, DeviceName uName = {}, unsigned int priority = 0)
|
||||
: device_name(dn), config(conf),
|
||||
num_requests_per_devices(nReq), default_device_id(defaultID), unique_name(uName), device_priority(priority)
|
||||
{}
|
||||
};
|
||||
|
||||
struct deviceChecker {
|
||||
template <typename T,
|
||||
typename std::enable_if<std::is_same<typename std::decay<T>::type, std::string>::value, bool>::type = true,
|
||||
typename U = typename std::vector<T>::const_iterator>
|
||||
U checkAndReturnIfDeviceInList(const std::string& target, const std::vector<T>& deviceList, bool exactMatch = false) {
|
||||
if (exactMatch) {
|
||||
return std::find_if(deviceList.begin(), deviceList.end(),
|
||||
U check_and_return_if_device_in_list(const std::string& target, const std::vector<T>& device_list, bool exact_match = false) {
|
||||
if (exact_match) {
|
||||
return std::find_if(device_list.begin(), device_list.end(),
|
||||
[&target](const T& d) { return d == target; });
|
||||
}
|
||||
return std::find_if(deviceList.begin(), deviceList.end(),
|
||||
return std::find_if(device_list.begin(), device_list.end(),
|
||||
[&target](const T & d) {
|
||||
return d.find(target) != std::string::npos;
|
||||
});
|
||||
}
|
||||
template <typename T,
|
||||
typename std::enable_if<std::is_same<typename std::decay<T>::type, std::string>::value, bool>::type = true>
|
||||
bool checkIfDeviceInList(const std::string& target, const std::vector<T>& deviceList, bool exactMatch = false) {
|
||||
if (exactMatch) {
|
||||
return std::find_if(deviceList.begin(), deviceList.end(),
|
||||
[&target](const T& d) { return d == target; }) != deviceList.cend();
|
||||
bool check_if_device_in_list(const std::string& target, const std::vector<T>& device_list, bool exact_match = false) {
|
||||
if (exact_match) {
|
||||
return std::find_if(device_list.begin(), device_list.end(),
|
||||
[&target](const T& d) { return d == target; }) != device_list.cend();
|
||||
}
|
||||
return std::find_if(deviceList.begin(), deviceList.end(),
|
||||
return std::find_if(device_list.begin(), device_list.end(),
|
||||
[&target](const T& d) {
|
||||
return d.find(target) != std::string::npos;
|
||||
}) != deviceList.end();
|
||||
}) != device_list.end();
|
||||
}
|
||||
template <typename T,
|
||||
typename std::enable_if<std::is_same<typename std::decay<T>::type, DeviceInformation>::value, bool>::type = true,
|
||||
typename U = typename std::vector<T>::const_iterator>
|
||||
U checkAndReturnIfDeviceInList(const std::string& target, const std::vector<T>& deviceList, bool exactMatch = false) {
|
||||
if (exactMatch) {
|
||||
return std::find_if(deviceList.begin(), deviceList.end(),
|
||||
[&target](const T& d) { return d.deviceName == target; });
|
||||
U check_and_return_if_device_in_list(const std::string& target, const std::vector<T>& device_list, bool exact_match = false) {
|
||||
if (exact_match) {
|
||||
return std::find_if(device_list.begin(), device_list.end(),
|
||||
[&target](const T& d) { return d.device_name == target; });
|
||||
}
|
||||
return std::find_if(deviceList.begin(), deviceList.end(),
|
||||
return std::find_if(device_list.begin(), device_list.end(),
|
||||
[&target](const T& d) {
|
||||
return d.deviceName.find(target) != std::string::npos;
|
||||
return d.device_name.find(target) != std::string::npos;
|
||||
});
|
||||
}
|
||||
template <typename T,
|
||||
typename std::enable_if<std::is_same<typename std::decay<T>::type, DeviceInformation>::value, bool>::type = true>
|
||||
bool checkIfDeviceInList(const std::string& target, const std::vector<T>& deviceList, bool exactMatch = false) {
|
||||
if (exactMatch) {
|
||||
return std::find_if(deviceList.begin(), deviceList.end(),
|
||||
[&target](const T& d) { return d.deviceName == target; }) != deviceList.end();
|
||||
bool check_if_device_in_list(const std::string& target, const std::vector<T>& device_list, bool exact_match = false) {
|
||||
if (exact_match) {
|
||||
return std::find_if(device_list.begin(), device_list.end(),
|
||||
[&target](const T& d) { return d.device_name == target; }) != device_list.end();
|
||||
}
|
||||
return std::find_if(deviceList.begin(), deviceList.end(),
|
||||
return std::find_if(device_list.begin(), device_list.end(),
|
||||
[&target](const T& d) {
|
||||
return d.deviceName.find(target) != std::string::npos;
|
||||
}) != deviceList.end();
|
||||
return d.device_name.find(target) != std::string::npos;
|
||||
}) != device_list.end();
|
||||
}
|
||||
};
|
||||
|
||||
using NotBusyPriorityWorkerRequests = IE::ThreadSafeBoundedPriorityQueue<std::pair<int, WorkerInferRequest*>>;
|
||||
using NotBusyWorkerRequests = IE::ThreadSafeBoundedQueue<WorkerInferRequest*>;
|
||||
using NotBusyPriorityWorkerRequests = ov::threading::ThreadSafeBoundedPriorityQueue<std::pair<int, WorkerInferRequest*>>;
|
||||
using NotBusyWorkerRequests = ov::threading::ThreadSafeBoundedQueue<WorkerInferRequest*>;
|
||||
using TaskQueue = ov::threading::ThreadSafeQueue<ov::threading::Task>;
|
||||
|
||||
template <typename T>
|
||||
struct IdleGuard {};
|
||||
template<>
|
||||
struct IdleGuard<NotBusyWorkerRequests> {
|
||||
explicit IdleGuard(WorkerInferRequest* workerInferRequestPtr, NotBusyWorkerRequests& notBusyWorkerRequests) :
|
||||
_workerInferRequestPtr{workerInferRequestPtr},
|
||||
_notBusyWorkerRequests{¬BusyWorkerRequests} {
|
||||
explicit IdleGuard(WorkerInferRequest* worker_inferrequest_ptr, NotBusyWorkerRequests& not_busy_worker_requests) :
|
||||
m_worker_inferrequest_ptr{worker_inferrequest_ptr},
|
||||
m_not_busy_worker_requests{¬_busy_worker_requests} {
|
||||
}
|
||||
~IdleGuard() {
|
||||
if (nullptr != _notBusyWorkerRequests) {
|
||||
_notBusyWorkerRequests->try_push(_workerInferRequestPtr);
|
||||
if (nullptr != m_not_busy_worker_requests) {
|
||||
m_not_busy_worker_requests->try_push(m_worker_inferrequest_ptr);
|
||||
}
|
||||
}
|
||||
NotBusyWorkerRequests* Release() {
|
||||
auto notBusyWorkerRequests = _notBusyWorkerRequests;
|
||||
_notBusyWorkerRequests = nullptr;
|
||||
return notBusyWorkerRequests;
|
||||
NotBusyWorkerRequests* release() {
|
||||
auto not_busy_worker_requests = m_not_busy_worker_requests;
|
||||
m_not_busy_worker_requests = nullptr;
|
||||
return not_busy_worker_requests;
|
||||
}
|
||||
WorkerInferRequest* _workerInferRequestPtr = nullptr;
|
||||
NotBusyWorkerRequests* _notBusyWorkerRequests = nullptr;
|
||||
WorkerInferRequest* m_worker_inferrequest_ptr = nullptr;
|
||||
NotBusyWorkerRequests* m_not_busy_worker_requests = nullptr;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct IdleGuard<NotBusyPriorityWorkerRequests> {
|
||||
explicit IdleGuard(WorkerInferRequest* workerInferRequestPtr, NotBusyPriorityWorkerRequests& notBusyWorkerRequests) :
|
||||
_workerInferRequestPtr{workerInferRequestPtr},
|
||||
_notBusyWorkerRequests{¬BusyWorkerRequests} {
|
||||
explicit IdleGuard(WorkerInferRequest* worker_inferrequest_ptr, NotBusyPriorityWorkerRequests& not_busy_worker_requests) :
|
||||
m_worker_inferrequest_ptr{worker_inferrequest_ptr},
|
||||
m_not_busy_worker_requests{¬_busy_worker_requests} {
|
||||
}
|
||||
~IdleGuard() {
|
||||
if (nullptr != _notBusyWorkerRequests) {
|
||||
_notBusyWorkerRequests->try_push(std::make_pair(_workerInferRequestPtr->_index, _workerInferRequestPtr));
|
||||
if (nullptr != m_not_busy_worker_requests) {
|
||||
m_not_busy_worker_requests->try_push(std::make_pair(m_worker_inferrequest_ptr->m_index, m_worker_inferrequest_ptr));
|
||||
}
|
||||
}
|
||||
NotBusyPriorityWorkerRequests* Release() {
|
||||
auto notBusyWorkerRequests = _notBusyWorkerRequests;
|
||||
_notBusyWorkerRequests = nullptr;
|
||||
return notBusyWorkerRequests;
|
||||
NotBusyPriorityWorkerRequests* release() {
|
||||
auto not_busy_worker_requests_queue = m_not_busy_worker_requests;
|
||||
m_not_busy_worker_requests = nullptr;
|
||||
return not_busy_worker_requests_queue;
|
||||
}
|
||||
WorkerInferRequest* _workerInferRequestPtr = nullptr;
|
||||
NotBusyPriorityWorkerRequests* _notBusyWorkerRequests = nullptr;
|
||||
WorkerInferRequest* m_worker_inferrequest_ptr = nullptr;
|
||||
NotBusyPriorityWorkerRequests* m_not_busy_worker_requests = nullptr;
|
||||
};
|
||||
class ScheduleContext : public std::enable_shared_from_this<ScheduleContext> {
|
||||
|
||||
class Plugin;
|
||||
class ScheduleContext : public std::enable_shared_from_this<ScheduleContext> {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<ScheduleContext>;
|
||||
std::shared_ptr<IE::ICore> _core;
|
||||
std::weak_ptr<IExecNetwork> _executableNetwork;
|
||||
std::string _LogTag;
|
||||
std::shared_ptr<ov::ICore> m_ov_core;
|
||||
std::weak_ptr<ov::ICompiledModel> m_compiled_model;
|
||||
std::string m_log_tag;
|
||||
std::vector<DeviceInformation> m_device_priorities;
|
||||
std::vector<DeviceInformation> m_device_priorities_initial;
|
||||
bool m_need_perf_counters;
|
||||
bool m_batching_disabled = false;
|
||||
bool m_startup_fallback = true;
|
||||
bool m_runtime_fallback = true;
|
||||
bool m_bind_buffer = false;
|
||||
std::shared_ptr<ov::Model> m_model;
|
||||
std::string m_model_path;
|
||||
std::shared_ptr<const ov::IPlugin> m_plugin;
|
||||
std::string m_str_devices;
|
||||
unsigned int m_model_priority = 0;
|
||||
ov::Any m_performance_hint;
|
||||
std::mutex m_mutex;
|
||||
std::mutex m_fallback_mutex;
|
||||
SoCompiledModel m_hw_compiled_model;
|
||||
std::string m_model_precision;
|
||||
virtual ~ScheduleContext() = default;
|
||||
};
|
||||
|
||||
class MultiDeviceInferencePlugin;
|
||||
class AutoScheduleContext : public ScheduleContext {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<AutoScheduleContext>;
|
||||
std::vector<DeviceInformation> _devicePriorities;
|
||||
std::vector<DeviceInformation> _devicePrioritiesInitial;
|
||||
std::unordered_map<std::string, IE::Parameter> _config;
|
||||
bool _needPerfCounters;
|
||||
bool _batchingDisabled = {false};
|
||||
bool _startupfallback = true;
|
||||
bool _runtimeFallback = true;
|
||||
std::string _modelPath;
|
||||
IE::CNNNetwork _network;
|
||||
std::string _strDevices;
|
||||
unsigned int _modelPriority = 0;
|
||||
std::string _performanceHint;
|
||||
std::mutex _confMutex;
|
||||
std::mutex _fallbackMutex;
|
||||
MultiDeviceInferencePlugin* _plugin;
|
||||
SoExecNetwork _hwExecutableNetwork;
|
||||
virtual ~AutoScheduleContext() = default;
|
||||
struct AutoCompileContext {
|
||||
std::atomic<bool> m_is_enabled = {false};
|
||||
std::atomic<bool> m_is_already = {false};
|
||||
std::atomic<bool> m_is_load_success = {false};
|
||||
std::atomic<bool> m_is_reload_success = {false};
|
||||
std::future<void> m_future;
|
||||
std::promise<void> m_promise;
|
||||
SoCompiledModel m_compiled_model;
|
||||
DeviceInformation m_device_info;
|
||||
std::vector<DeviceInformation> m_meta_devices;
|
||||
std::string m_model_precision;
|
||||
std::string m_err_message;
|
||||
ov::threading::Task m_task;
|
||||
std::string m_worker_name = "";
|
||||
};
|
||||
} // namespace MultiDevicePlugin
|
||||
|
||||
enum AutoCompileContextIndex {
|
||||
CPU = 0,
|
||||
ACTUALDEVICE = 1,
|
||||
FALLBACKDEVICE = 2,
|
||||
CONTEXTNUM = 3
|
||||
};
|
||||
} // namespace auto_plugin
|
||||
} // namespace ov
|
||||
|
87
src/plugins/auto/src/compiled_model.cpp
Normal file
87
src/plugins/auto/src/compiled_model.cpp
Normal file
@ -0,0 +1,87 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include "compiled_model.hpp"
|
||||
#include "common.hpp"
|
||||
#include <memory>
|
||||
|
||||
#include "async_infer_request.hpp"
|
||||
#include "itt.hpp"
|
||||
#include "openvino/op/util/op_types.hpp"
|
||||
#include "openvino/runtime/exec_model_info.hpp"
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
#include "plugin.hpp"
|
||||
#include "transformations/rt_info/fused_names_attribute.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
ov::auto_plugin::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
|
||||
const std::shared_ptr<const ov::IPlugin>& plugin,
|
||||
ScheduleContext::Ptr context,
|
||||
Schedule::Ptr scheduler)
|
||||
: ov::ICompiledModel(model, plugin),
|
||||
m_context(context),
|
||||
m_scheduler(scheduler) {
|
||||
scheduler->launch(context);
|
||||
m_inputs_outputs_from_hardware = (model == nullptr);
|
||||
}
|
||||
|
||||
const std::vector<ov::Output<const ov::Node>>& ov::auto_plugin::CompiledModel::outputs() const {
|
||||
if (m_inputs_outputs_from_hardware && m_context->m_hw_compiled_model)
|
||||
return m_context->m_hw_compiled_model->outputs();
|
||||
return ov::ICompiledModel::outputs();
|
||||
}
|
||||
|
||||
const std::vector<ov::Output<const ov::Node>>& ov::auto_plugin::CompiledModel::inputs() const {
|
||||
if (m_inputs_outputs_from_hardware && m_context->m_hw_compiled_model)
|
||||
return m_context->m_hw_compiled_model->inputs();
|
||||
return ov::ICompiledModel::inputs();
|
||||
}
|
||||
|
||||
ov::auto_plugin::ISyncInferPtr ov::auto_plugin::CompiledModel::create_sync_infer_request() const {
|
||||
return m_scheduler->create_sync_infer_request();
|
||||
}
|
||||
|
||||
ov::auto_plugin::IASyncInferPtr ov::auto_plugin::CompiledModel::create_infer_request() const {
|
||||
const_cast<CompiledModel*>(this)->set_compile_model_for_context();
|
||||
auto internal_request = create_sync_infer_request();
|
||||
auto async_infer_request = std::make_shared<AsyncInferRequest>(
|
||||
m_scheduler,
|
||||
std::static_pointer_cast<InferRequest>(internal_request),
|
||||
get_callback_executor());
|
||||
return async_infer_request;
|
||||
}
|
||||
|
||||
std::string ov::auto_plugin::CompiledModel::get_log_tag() const noexcept {
|
||||
return m_context->m_log_tag;
|
||||
}
|
||||
|
||||
ov::AnyMap ov::auto_plugin::CompiledModel::get_device_supported_properties(AutoCompileContext& context) {
|
||||
ov::AnyMap all_devices;
|
||||
ov::AnyMap device_properties = {};
|
||||
OPENVINO_ASSERT(context.m_compiled_model);
|
||||
auto device_supported_properties = context.m_compiled_model->get_property(ov::supported_properties.name());
|
||||
for (auto&& property_name : device_supported_properties.as<std::vector<ov::PropertyName>>()) {
|
||||
// for lto issue, explictly do the conversion here
|
||||
std::string query_name = property_name;
|
||||
device_properties[property_name] = context.m_compiled_model->get_property(query_name);
|
||||
}
|
||||
all_devices[context.m_device_info.device_name] = device_properties;
|
||||
return all_devices;
|
||||
}
|
||||
|
||||
void ov::auto_plugin::CompiledModel::set_compile_model_for_context() {
|
||||
std::call_once(m_oc, [this]() {
|
||||
m_context->m_compiled_model = shared_from_this();
|
||||
});
|
||||
}
|
||||
|
||||
std::shared_ptr<const ov::auto_plugin::Plugin> ov::auto_plugin::CompiledModel::get_auto_plugin() {
|
||||
auto plugin = get_plugin();
|
||||
OPENVINO_ASSERT(plugin);
|
||||
auto auto_plugin = std::static_pointer_cast<const ov::auto_plugin::Plugin>(plugin);
|
||||
OPENVINO_ASSERT(auto_plugin);
|
||||
return auto_plugin;
|
||||
}
|
||||
|
37
src/plugins/auto/src/compiled_model.hpp
Normal file
37
src/plugins/auto/src/compiled_model.hpp
Normal file
@ -0,0 +1,37 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "schedule.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace auto_plugin {
|
||||
class Schedule;
|
||||
class CompiledModel : public ov::ICompiledModel {
|
||||
public:
|
||||
CompiledModel(const std::shared_ptr<ov::Model>& model,
|
||||
const std::shared_ptr<const ov::IPlugin>& plugin,
|
||||
ScheduleContext::Ptr context,
|
||||
Schedule::Ptr scheduler);
|
||||
|
||||
std::shared_ptr<ov::IAsyncInferRequest> create_infer_request() const override;
|
||||
std::shared_ptr<const Plugin> get_auto_plugin();
|
||||
const std::vector<ov::Output<const ov::Node>>& outputs() const override;
|
||||
const std::vector<ov::Output<const ov::Node>>& inputs() const override;
|
||||
|
||||
protected:
|
||||
std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
|
||||
std::string get_log_tag() const noexcept;
|
||||
static ov::AnyMap get_device_supported_properties(AutoCompileContext& context);
|
||||
|
||||
private:
|
||||
ScheduleContext::Ptr m_context;
|
||||
Schedule::Ptr m_scheduler;
|
||||
std::once_flag m_oc;
|
||||
bool m_inputs_outputs_from_hardware;
|
||||
void set_compile_model_for_context();
|
||||
};
|
||||
} // namespace auto_plugin
|
||||
} // namespace ov
|
157
src/plugins/auto/src/cumulative_compiled_model.cpp
Normal file
157
src/plugins/auto/src/cumulative_compiled_model.cpp
Normal file
@ -0,0 +1,157 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include "cumulative_compiled_model.hpp"
|
||||
#include "common.hpp"
|
||||
#include <memory>
|
||||
|
||||
#include "async_infer_request.hpp"
|
||||
#include "itt.hpp"
|
||||
#include "openvino/runtime/exec_model_info.hpp"
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
#include "plugin.hpp"
|
||||
#include "ie_plugin_config.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace auto_plugin {
|
||||
AutoCumuCompiledModel::AutoCumuCompiledModel(const std::shared_ptr<ov::Model>& model,
|
||||
const std::shared_ptr<const ov::IPlugin>& plugin,
|
||||
ScheduleContext::Ptr context,
|
||||
Schedule::Ptr scheduler)
|
||||
: CompiledModel(model, plugin, context, scheduler),
|
||||
m_model(model),
|
||||
m_context(context) {
|
||||
m_scheduler = std::dynamic_pointer_cast<CumuSchedule>(scheduler);
|
||||
}
|
||||
|
||||
void AutoCumuCompiledModel::set_property(const ov::AnyMap& properties) {
|
||||
OPENVINO_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
std::shared_ptr<const ov::Model> AutoCumuCompiledModel::get_runtime_model() const {
|
||||
if (m_context->m_hw_compiled_model)
|
||||
return m_context->m_hw_compiled_model->get_runtime_model();
|
||||
OPENVINO_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
ov::Any AutoCumuCompiledModel::get_property(const std::string& name) const {
|
||||
const auto& add_ro_properties = [](const std::string& name, std::vector<ov::PropertyName>& properties) {
|
||||
properties.emplace_back(ov::PropertyName{name, ov::PropertyMutability::RO});
|
||||
};
|
||||
const auto& default_ro_properties = []() {
|
||||
std::vector<ov::PropertyName> ro_properties{ov::model_name,
|
||||
ov::supported_properties,
|
||||
ov::execution_devices,
|
||||
ov::hint::performance_mode,
|
||||
ov::optimal_number_of_infer_requests,
|
||||
ov::device::properties,
|
||||
ov::hint::model_priority,
|
||||
ov::loaded_from_cache};
|
||||
return ro_properties;
|
||||
};
|
||||
const auto& default_rw_properties = []() {
|
||||
std::vector<ov::PropertyName> rw_properties{ov::device::priorities};
|
||||
return rw_properties;
|
||||
};
|
||||
const auto& to_string_vector = [](const std::vector<ov::PropertyName>& properties) {
|
||||
std::vector<std::string> ret;
|
||||
for (const auto& property : properties) {
|
||||
ret.emplace_back(property);
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
if (name == ov::supported_properties) {
|
||||
auto ro_properties = default_ro_properties();
|
||||
auto rw_properties = default_rw_properties();
|
||||
|
||||
std::vector<ov::PropertyName> supported_properties;
|
||||
supported_properties.reserve(ro_properties.size() + rw_properties.size());
|
||||
supported_properties.insert(supported_properties.end(), ro_properties.begin(), ro_properties.end());
|
||||
supported_properties.insert(supported_properties.end(), rw_properties.begin(), rw_properties.end());
|
||||
return decltype(ov::supported_properties)::value_type(supported_properties);
|
||||
} else if (name == ov::hint::performance_mode) {
|
||||
return m_context->m_performance_hint;
|
||||
} else if (name == ov::device::priorities) {
|
||||
// device priority does not support change on-the-fly
|
||||
return decltype(ov::device::priorities)::value_type(m_context->m_str_devices);
|
||||
} else if (name == ov::device::properties) {
|
||||
ov::AnyMap all_devices = {};
|
||||
for (size_t i = 0; i < m_scheduler->m_n_ctput_devicenums; i++) {
|
||||
if (m_scheduler->m_p_ctput_loadcontext[i].m_is_already) {
|
||||
auto temp = get_device_supported_properties(m_scheduler->m_p_ctput_loadcontext[i]);
|
||||
all_devices.insert(temp.begin(), temp.end());
|
||||
}
|
||||
}
|
||||
return all_devices;
|
||||
} else if (name == ov::hint::model_priority) {
|
||||
auto value = m_context->m_model_priority;
|
||||
if (m_context->m_ov_core->is_new_api()) {
|
||||
return value ? ((value > 1) ? ov::hint::Priority::LOW :
|
||||
ov::hint::Priority::MEDIUM) : ov::hint::Priority::HIGH;
|
||||
} else {
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
return value ? ((value > 1) ? CONFIG_VALUE(MODEL_PRIORITY_LOW) : CONFIG_VALUE(
|
||||
MODEL_PRIORITY_MED)) : CONFIG_VALUE(MODEL_PRIORITY_HIGH);
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
} else if (name == ov::optimal_number_of_infer_requests) {
|
||||
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
|
||||
unsigned int res = 0u;
|
||||
for (size_t i = 0; i < m_scheduler->m_n_ctput_devicenums; i++) {
|
||||
try {
|
||||
if (m_scheduler->m_p_ctput_loadcontext[i].m_is_already) {
|
||||
res += (m_scheduler->m_p_ctput_loadcontext[i])
|
||||
.m_compiled_model->get_property(ov::optimal_number_of_infer_requests.name())
|
||||
.as<unsigned int>();
|
||||
}
|
||||
} catch (const ov::Exception& err) {
|
||||
OPENVINO_THROW("Every device used in cumulative mode should support OPTIMAL_NUMBER_OF_INFER_REQUESTS property from compiled model",
|
||||
"Failed to query the property with error:", err.what());
|
||||
}
|
||||
}
|
||||
return decltype(ov::optimal_number_of_infer_requests)::value_type {res};
|
||||
} else if (name == ov::execution_devices) {
|
||||
std::vector<std::string> exeDevices = {};
|
||||
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
|
||||
for (auto const & n : m_context->m_device_priorities) {
|
||||
exeDevices.push_back(n.device_name);
|
||||
}
|
||||
return decltype(ov::execution_devices)::value_type {exeDevices};
|
||||
} else if (name == ov::model_name) {
|
||||
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
|
||||
for (size_t i = 0; i < m_scheduler->m_n_ctput_devicenums; i++) {
|
||||
if (m_scheduler->m_p_ctput_loadcontext[i].m_is_already) {
|
||||
return m_scheduler->m_p_ctput_loadcontext[i].m_compiled_model->get_property(name);
|
||||
}
|
||||
}
|
||||
OPENVINO_THROW("No valid compiled model found to get", name);
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
|
||||
auto ro_properties = default_ro_properties();
|
||||
add_ro_properties(METRIC_KEY(SUPPORTED_METRICS), ro_properties);
|
||||
add_ro_properties(METRIC_KEY(SUPPORTED_CONFIG_KEYS), ro_properties);
|
||||
return to_string_vector(ro_properties);
|
||||
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
|
||||
auto rw_properties = default_rw_properties();
|
||||
return to_string_vector(rw_properties);
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
} else if (name == ov::loaded_from_cache) {
|
||||
bool loaded_from_cache = true;
|
||||
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
|
||||
for (size_t i = 0; i < m_scheduler->m_n_ctput_devicenums; i++) {
|
||||
if (m_scheduler->m_p_ctput_loadcontext[i].m_is_already) {
|
||||
loaded_from_cache &= (m_scheduler->m_p_ctput_loadcontext[i].m_compiled_model->get_property(name).as<bool>());
|
||||
}
|
||||
}
|
||||
return loaded_from_cache;
|
||||
}
|
||||
OPENVINO_THROW(get_log_tag(), ": not supported property ", name);;
|
||||
}
|
||||
|
||||
void AutoCumuCompiledModel::export_model(std::ostream& model_stream) const {
|
||||
OPENVINO_NOT_IMPLEMENTED;
|
||||
}
|
||||
} // namespace auto_plugin
|
||||
} // namespace ov
|
38
src/plugins/auto/src/cumulative_compiled_model.hpp
Normal file
38
src/plugins/auto/src/cumulative_compiled_model.hpp
Normal file
@ -0,0 +1,38 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
|
||||
#include "compiled_model.hpp"
|
||||
#include "cumulative_schedule.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace auto_plugin {
|
||||
|
||||
class AutoCumuCompiledModel : public CompiledModel {
|
||||
public:
|
||||
AutoCumuCompiledModel(const std::shared_ptr<ov::Model>& model,
|
||||
const std::shared_ptr<const ov::IPlugin>& plugin,
|
||||
ScheduleContext::Ptr context,
|
||||
Schedule::Ptr scheduler);
|
||||
|
||||
// implement pure virtual methods from a base class ov::ICompiledModel
|
||||
void export_model(std::ostream& model) const override;
|
||||
|
||||
std::shared_ptr<const ov::Model> get_runtime_model() const override;
|
||||
|
||||
void set_property(const ov::AnyMap& properties) override;
|
||||
|
||||
ov::Any get_property(const std::string& name) const override;
|
||||
|
||||
private:
|
||||
friend class InferRequest;
|
||||
friend class Plugin;
|
||||
std::shared_ptr<ov::Model> m_model;
|
||||
ScheduleContext::Ptr m_context;
|
||||
CumuSchedule::Ptr m_scheduler;
|
||||
};
|
||||
} // namespace auto_plugin
|
||||
} // namespace ov
|
254
src/plugins/auto/src/cumulative_schedule.cpp
Normal file
254
src/plugins/auto/src/cumulative_schedule.cpp
Normal file
@ -0,0 +1,254 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include "cumulative_schedule.hpp"
|
||||
#include "async_infer_request.hpp"
|
||||
#include "plugin.hpp"
|
||||
|
||||
// ------------------------------CumuSchedule----------------------------
|
||||
namespace ov {
|
||||
namespace auto_plugin {
|
||||
bool CumuSchedule::select_other_device(const std::string& cur_dev_name) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
|
||||
|
||||
auto remove_inferfail_device = [&](const std::string& device_name) {
|
||||
if (m_context->m_device_priorities.size() > 1) {
|
||||
const auto current_device_iter =
|
||||
deviceChecker().check_and_return_if_device_in_list<DeviceInformation>(device_name, m_context->m_device_priorities);
|
||||
if (current_device_iter != m_context->m_device_priorities.end()) {
|
||||
m_context->m_device_priorities.erase(current_device_iter);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
if (m_p_ctput_loadcontext) {
|
||||
return remove_inferfail_device(cur_dev_name);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void CumuSchedule::init() {
|
||||
if (m_context->m_bind_buffer) {
|
||||
// disable run time fallback , as not applicable in bind mode
|
||||
m_context->m_runtime_fallback = false;
|
||||
LOG_INFO_TAG("disable runtime fallback in bind mode");
|
||||
}
|
||||
std::string profilingTask = "CumuSchedule::CumuSchedule:compile_model";
|
||||
const auto& valid_devices = m_context->m_device_priorities;
|
||||
{
|
||||
// Total number of devices in CTPUT
|
||||
m_n_ctput_devicenums = valid_devices.size();
|
||||
// Generate contexts for loading each device
|
||||
m_p_ctput_loadcontext.reset(new AutoCompileContext[m_n_ctput_devicenums]);
|
||||
int idx = 0;
|
||||
DeviceInformation cpu_device_information;
|
||||
for (auto& device : valid_devices) {
|
||||
if (device.device_name.find("CPU") == std::string::npos) {
|
||||
m_p_ctput_loadcontext[idx].m_device_info = device;
|
||||
m_p_ctput_loadcontext[idx].m_device_info.config[ov::hint::performance_mode.name()] = ov::hint::PerformanceMode::THROUGHPUT;
|
||||
idx++;
|
||||
} else {
|
||||
cpu_device_information = device;
|
||||
cpu_device_information.config.insert(
|
||||
{ov::affinity.name(), ov::Any(ov::Affinity::CORE).as<std::string>()});
|
||||
}
|
||||
}
|
||||
if (!cpu_device_information.device_name.empty()) {
|
||||
m_p_ctput_loadcontext[idx].m_device_info = cpu_device_information;
|
||||
m_p_ctput_loadcontext[idx].m_device_info.config[ov::hint::performance_mode.name()] = ov::hint::PerformanceMode::THROUGHPUT;
|
||||
}
|
||||
}
|
||||
if (m_context->m_log_tag == "MULTI") {
|
||||
// MULTI's performance hint always is tput
|
||||
m_context->m_performance_hint = ov::hint::PerformanceMode::THROUGHPUT;
|
||||
}
|
||||
|
||||
auto load_device_task = [&](AutoCompileContext* context_ptr,
|
||||
const std::shared_ptr<ov::Model>& model) {
|
||||
try_to_compile_model(*context_ptr, model);
|
||||
if (context_ptr->m_is_load_success) {
|
||||
if (context_ptr->m_worker_name.empty()) {
|
||||
context_ptr->m_worker_name = context_ptr->m_device_info.device_name;
|
||||
}
|
||||
generate_workers(context_ptr->m_worker_name, context_ptr->m_compiled_model);
|
||||
context_ptr->m_is_already = true;
|
||||
// reloadsuccess flag only for m_compile_context[FALLBACKDEVICE]
|
||||
context_ptr->m_is_reload_success = true;
|
||||
auto& device_name = context_ptr->m_device_info.device_name;
|
||||
LOG_INFO_TAG("device:%s compiling model finished", device_name.c_str());
|
||||
DEBUG_RUN([this, &context_ptr, &device_name] {
|
||||
auto supported_config_keys = context_ptr->m_compiled_model->get_property(ov::supported_properties.name()).as<std::vector<ov::PropertyName>>();
|
||||
std::lock_guard<std::mutex> lock(m_context->m_mutex);
|
||||
for (const auto& cfg : supported_config_keys) {
|
||||
try {
|
||||
LOG_DEBUG_TAG("device:%s, GetConfig:%s=%s",
|
||||
device_name.c_str(),
|
||||
cfg.c_str(),
|
||||
context_ptr->m_compiled_model->get_property(cfg).as<std::string>().c_str());
|
||||
} catch (const ov::Exception&) {
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
// Handle device load failure in case of ctput
|
||||
if (!context_ptr->m_is_load_success) {
|
||||
std::string failedDeviceName = context_ptr->m_device_info.device_name;
|
||||
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
|
||||
const auto DeviceIter = deviceChecker().check_and_return_if_device_in_list(failedDeviceName, m_context->m_device_priorities);
|
||||
// Remove failed device from m_device_priorities
|
||||
if (DeviceIter != m_context->m_device_priorities.end()) {
|
||||
m_context->m_device_priorities.erase(DeviceIter);
|
||||
}
|
||||
}
|
||||
};
|
||||
m_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(
|
||||
ov::threading::IStreamsExecutor::Config{"CTPUTDeviceAsyncLoad",
|
||||
static_cast<int>(std::thread::hardware_concurrency()) /* max possible #streams*/,
|
||||
0 /*default threads per stream, workaround for ticket 62376*/,
|
||||
ov::threading::IStreamsExecutor::ThreadBindingType::NONE});
|
||||
std::vector<ov::threading::Task> other_devices_loads;
|
||||
std::vector<ov::threading::Task> cpu_loads;
|
||||
for (size_t i = 0; i < m_n_ctput_devicenums; i++) {
|
||||
auto* context_ptr = &m_p_ctput_loadcontext[i];
|
||||
auto model = m_context->m_model;
|
||||
m_p_ctput_loadcontext[i].m_task = std::bind(load_device_task, context_ptr, model);
|
||||
if (i == m_n_ctput_devicenums - 1 &&
|
||||
m_p_ctput_loadcontext[i].m_device_info.device_name.find("CPU") != std::string::npos) {
|
||||
cpu_loads.push_back(m_p_ctput_loadcontext[i].m_task);
|
||||
} else {
|
||||
other_devices_loads.push_back(m_p_ctput_loadcontext[i].m_task);
|
||||
}
|
||||
}
|
||||
OV_ITT_SCOPED_TASK(itt::domains::AutoPlugin, openvino::itt::handle(profilingTask));
|
||||
for (auto&& device : m_context->m_device_priorities) {
|
||||
// initialize containers before run async task, if not initialized, it will hang during infer
|
||||
m_idle_worker_requests[device.device_name];
|
||||
m_worker_requests[device.device_name];
|
||||
m_infer_pipeline_tasks_device_specific[device.device_name] = nullptr;
|
||||
}
|
||||
// load devices other than CPU first
|
||||
if (other_devices_loads.size() > 0) {
|
||||
// Wait for the devices other than CPU to compile the model
|
||||
m_executor->run_and_wait(other_devices_loads);
|
||||
}
|
||||
// Finally load the CPU
|
||||
if (cpu_loads.size() > 0) {
|
||||
// Wait for CPU to compile the model
|
||||
m_executor->run_and_wait(cpu_loads);
|
||||
}
|
||||
if (m_n_ctput_devicenums == 1 && m_p_ctput_loadcontext[0].m_is_already) {
|
||||
m_passthrough_compiled_model = m_p_ctput_loadcontext[0].m_compiled_model;
|
||||
m_context->m_hw_compiled_model = m_passthrough_compiled_model;
|
||||
}
|
||||
m_context->m_hw_compiled_model = wait_first_compiled_model_ready();
|
||||
}
|
||||
|
||||
void CumuSchedule::try_to_compile_model(AutoCompileContext& context, const std::shared_ptr<ov::Model>& model) {
|
||||
auto& device = context.m_device_info.device_name;
|
||||
auto& device_config = context.m_device_info.config;
|
||||
bool cur_dev_is_gpu = (device.find("GPU") != std::string::npos);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_context->m_mutex);
|
||||
if (cur_dev_is_gpu) {
|
||||
// user does not set the compiling threads
|
||||
// limit the threads num for compiling
|
||||
int max_threads = 0;
|
||||
try {
|
||||
max_threads = m_context->m_ov_core->get_property(device, ov::compilation_num_threads);
|
||||
} catch (const ov::Exception&) {
|
||||
LOG_DEBUG_TAG("cannot get MAX_NUM_THREADS from GPU");
|
||||
}
|
||||
if (max_threads == static_cast<int>(std::thread::hardware_concurrency())) {
|
||||
int thread_num = max_threads / 2;
|
||||
device_config.insert(ov::compilation_num_threads(thread_num));
|
||||
LOG_DEBUG_TAG("gpu streams number for compiling: %d", thread_num);
|
||||
} else {
|
||||
// user set the compiling threads num
|
||||
// use the user's val anyway
|
||||
LOG_DEBUG_TAG("user defined compiling threads: %d", max_threads);
|
||||
}
|
||||
}
|
||||
}
|
||||
try {
|
||||
if (!(m_context->m_model_path.empty())) {
|
||||
context.m_compiled_model = m_context->m_ov_core->compile_model(m_context->m_model_path, device, device_config);
|
||||
} else {
|
||||
context.m_compiled_model = m_context->m_ov_core->compile_model(model, device, device_config);
|
||||
}
|
||||
context.m_is_load_success = true;
|
||||
} catch (const ov::Exception& e) {
|
||||
context.m_err_message += device + ":" + e.what();
|
||||
context.m_is_load_success = false;
|
||||
} catch (const std::exception& e) {
|
||||
context.m_err_message += device + ":" + e.what();
|
||||
context.m_is_load_success = false;
|
||||
}
|
||||
}
|
||||
SoCompiledModel CumuSchedule::wait_first_compiled_model_ready() {
|
||||
std::ostringstream result;
|
||||
result << "compile model failed, ";
|
||||
for (size_t i = 0; i < m_n_ctput_devicenums; i++) {
|
||||
// check if device loaded successfully
|
||||
if (m_p_ctput_loadcontext[i].m_is_already) {
|
||||
return m_p_ctput_loadcontext[i].m_compiled_model;
|
||||
} else {
|
||||
result << m_p_ctput_loadcontext[i].m_err_message.c_str();
|
||||
result << "; ";
|
||||
}
|
||||
}
|
||||
OPENVINO_THROW("[", get_log_tag(), "] ", result.str());
|
||||
}
|
||||
|
||||
bool CumuSchedule::schedule_to_worker_infer_request(ov::threading::Task pipeline_task, DeviceName preferred_device) {
|
||||
std::vector<DeviceInformation> devices;
|
||||
// AUTO work mode
|
||||
// Devices that fail infer will be removed from the priority list in the callback, need lock here
|
||||
std::unique_lock<std::mutex> lock(m_context->m_fallback_mutex);
|
||||
if (!preferred_device.empty()) {
|
||||
devices = m_context->m_device_priorities;
|
||||
if (!deviceChecker().check_if_device_in_list<DeviceInformation>(preferred_device, devices)) {
|
||||
lock.unlock();
|
||||
OPENVINO_THROW("The preferred device should be the selected device");
|
||||
}
|
||||
} else {
|
||||
devices = m_context->m_device_priorities;
|
||||
}
|
||||
lock.unlock();
|
||||
for (auto&& device : devices) {
|
||||
if (!preferred_device.empty() && (device.device_name != preferred_device)) {
|
||||
continue;
|
||||
}
|
||||
if (run_pipeline_task(pipeline_task, m_idle_worker_requests[device.device_name], preferred_device)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// no vacant requests this time, storing the task to the respective queue
|
||||
if (!preferred_device.empty()) {
|
||||
m_infer_pipeline_tasks_device_specific[preferred_device]->push(std::move(pipeline_task));
|
||||
} else {
|
||||
m_infer_pipeline_tasks.push(std::move(pipeline_task));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
CumuSchedule::~CumuSchedule() {
|
||||
if (m_context) {
|
||||
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
|
||||
m_context->m_device_priorities.clear();
|
||||
}
|
||||
/* NOTE: The only threads that use `MultiSchedule` worker infer requests' threads.
|
||||
* But AsyncInferRequest destructor should wait for all asynchronous tasks by the request
|
||||
*/
|
||||
for (auto&& idleWorker : m_idle_worker_requests) {
|
||||
// stop accepting any idle requests back (for re-scheduling)
|
||||
idleWorker.second.set_capacity(0);
|
||||
}
|
||||
}
|
||||
} // namespace auto_plugin
|
||||
} // namespace ov
|
29
src/plugins/auto/src/cumulative_schedule.hpp
Normal file
29
src/plugins/auto/src/cumulative_schedule.hpp
Normal file
@ -0,0 +1,29 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
|
||||
#include "schedule.hpp"
|
||||
#include "async_infer_request.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace auto_plugin {
|
||||
|
||||
class CumuSchedule : public Schedule {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<CumuSchedule>;
|
||||
virtual ~CumuSchedule();
|
||||
std::unique_ptr<AutoCompileContext[]> m_p_ctput_loadcontext = nullptr;
|
||||
size_t m_n_ctput_devicenums = 0;
|
||||
|
||||
private:
|
||||
void init() override;
|
||||
SoCompiledModel wait_first_compiled_model_ready() override;
|
||||
bool schedule_to_worker_infer_request(ov::threading::Task, DeviceName preferred_device = "") override;
|
||||
void try_to_compile_model(AutoCompileContext& context, const std::shared_ptr<ov::Model>& model) override;
|
||||
bool select_other_device(const std::string& cur_dev_name) override;
|
||||
};
|
||||
} // namespace auto_plugin
|
||||
} // namespace ov
|
@ -1,44 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include "executable_network.hpp"
|
||||
// ------------------------------ExecutableNetwork----------------------------
|
||||
namespace MultiDevicePlugin {
|
||||
using namespace InferenceEngine;
|
||||
|
||||
ExecutableNetwork::ExecutableNetwork(const Schedule::Ptr& schedule,
|
||||
const ScheduleContext::Ptr& sContext):
|
||||
_schedule(schedule),
|
||||
_sContext(sContext) {
|
||||
_schedule->init(_sContext);
|
||||
}
|
||||
|
||||
ExecutableNetwork::~ExecutableNetwork() {
|
||||
}
|
||||
|
||||
IInferRequestInternal::Ptr ExecutableNetwork::CreateInferRequest() {
|
||||
SetExeNetworkForContext();
|
||||
return _schedule->CreateInferRequest();
|
||||
}
|
||||
|
||||
void ExecutableNetwork::SetExeNetworkForContext() {
|
||||
// Maybe different API will call this function, so add call once here
|
||||
// for every AutoSchedule instance
|
||||
std::call_once(_oc, [this]() {
|
||||
_sContext->_executableNetwork = shared_from_this();
|
||||
});
|
||||
}
|
||||
|
||||
std::string ExecutableNetwork::GetLogTag() const noexcept {
|
||||
return _sContext->_LogTag;
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Function> ExecutableNetwork::GetExecGraphInfo() {
|
||||
// TODO: When there are more then 2 executable networks, such as MULTI:GPU,CPU
|
||||
auto autoSContext = std::dynamic_pointer_cast<AutoScheduleContext>(_sContext);
|
||||
return autoSContext->_hwExecutableNetwork->GetExecGraphInfo();
|
||||
}
|
||||
|
||||
} // namespace MultiDevicePlugin
|
@ -1,34 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "schedule.hpp"
|
||||
|
||||
#ifdef MULTIUNITTEST
|
||||
#define MOCKTESTMACRO virtual
|
||||
#define MultiDevicePlugin MockMultiDevicePlugin
|
||||
#else
|
||||
#define MOCKTESTMACRO
|
||||
#endif
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
class ExecutableNetwork : public
|
||||
InferenceEngine::ExecutableNetworkThreadSafeDefault {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<ExecutableNetwork>;
|
||||
ExecutableNetwork(const Schedule::Ptr& schedule, const ScheduleContext::Ptr& sContext);
|
||||
IInferPtr CreateInferRequest() override;
|
||||
std::shared_ptr<ngraph::Function> GetExecGraphInfo() override;
|
||||
~ExecutableNetwork() override;
|
||||
|
||||
protected:
|
||||
std::string GetLogTag() const noexcept;
|
||||
private:
|
||||
Schedule::Ptr _schedule;
|
||||
ScheduleContext::Ptr _sContext;
|
||||
std::once_flag _oc;
|
||||
void SetExeNetworkForContext();
|
||||
};
|
||||
} // namespace MultiDevicePlugin
|
@ -2,138 +2,117 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "infer_request.hpp"
|
||||
#include <ngraph/node.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <ie_input_info.hpp>
|
||||
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
|
||||
#include <blob_factory.hpp>
|
||||
#include <debug.h>
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
#include "itt.hpp"
|
||||
#include "openvino/core/except.hpp"
|
||||
#include "openvino/runtime/profiling_info.hpp"
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
#include "plugin.hpp"
|
||||
|
||||
// ------------------------------MultiDeviceInferRequest----------------------------
|
||||
MultiDeviceInferRequest::MultiDeviceInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
|
||||
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
|
||||
InferenceEngine::RemoteContext::Ptr ctx)
|
||||
: IInferRequestInternal(inputs, outputs),
|
||||
_sharedRequest(request_to_share_blobs_with) {
|
||||
for (const std::shared_ptr<const ov::Node>& in : inputs) {
|
||||
modelInputsMap[ov::op::util::get_ie_output_name(ngraph::Output<const ngraph::Node>(in))] = in;
|
||||
}
|
||||
for (const std::shared_ptr<const ov::Node>& out : outputs) {
|
||||
modelOutputsMap[ov::op::util::get_ie_output_name(out->input_value(0))] = out;
|
||||
}
|
||||
CreateInferRequest(request_to_share_blobs_with, ctx);
|
||||
}
|
||||
using Time = std::chrono::high_resolution_clock;
|
||||
|
||||
MultiDeviceInferRequest::MultiDeviceInferRequest(const InputsDataMap& networkInputs,
|
||||
const OutputsDataMap& networkOutputs,
|
||||
const SoIInferRequestInternal & request_to_share_blobs_with,
|
||||
InferenceEngine::RemoteContext::Ptr ctx)
|
||||
: IInferRequestInternal(networkInputs, networkOutputs),
|
||||
_sharedRequest(request_to_share_blobs_with) {
|
||||
CreateInferRequest(request_to_share_blobs_with, ctx);
|
||||
}
|
||||
namespace {
|
||||
|
||||
void MultiDeviceInferRequest::CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with,
|
||||
InferenceEngine::RemoteContext::Ptr ctx) {
|
||||
if (request_to_share_blobs_with) {
|
||||
// do not need to touch multi memory blobs
|
||||
return;
|
||||
}
|
||||
// Allocate all input blobs
|
||||
for (const auto &it : _networkInputs) {
|
||||
auto l = it.second->getLayout();
|
||||
auto p = it.second->getPrecision();
|
||||
auto dims = it.second->getTensorDesc().getDims();
|
||||
|
||||
TensorDesc desc = TensorDesc(p, dims, l);
|
||||
if (ctx) {
|
||||
_inputs[it.first] = ctx->CreateHostBlob(desc);
|
||||
} else {
|
||||
_inputs[it.first] = make_blob_with_precision(desc);
|
||||
}
|
||||
_inputs[it.first]->allocate();
|
||||
}
|
||||
// Allocate all output blobs
|
||||
for (const auto &it : _networkOutputs) {
|
||||
auto l = it.second->getLayout();
|
||||
auto p = it.second->getPrecision();
|
||||
auto dims = it.second->getTensorDesc().getDims();
|
||||
// for 1.0 API, dims is not dynamic anyway
|
||||
if (InferenceEngine::details::product(dims) == 0 && !modelOutputsMap.empty()) {
|
||||
// replace the dims with one from dynamic shape
|
||||
const auto outputNodeItr = modelOutputsMap.find(it.first);
|
||||
if (outputNodeItr != modelOutputsMap.end()) {
|
||||
const auto shape = outputNodeItr->second->get_input_partial_shape(0);
|
||||
// update dims
|
||||
dims = shape.get_max_shape();
|
||||
}
|
||||
}
|
||||
|
||||
TensorDesc desc = TensorDesc(p, dims, l);
|
||||
if (ctx) {
|
||||
_outputs[it.first] = ctx->CreateHostBlob(desc);
|
||||
} else {
|
||||
_outputs[it.first] = make_blob_with_precision(desc);
|
||||
}
|
||||
_outputs[it.first]->allocate();
|
||||
}
|
||||
}
|
||||
void MultiDeviceInferRequest::SetBlobsToAnotherRequest(const SoIInferRequestInternal& req) {
|
||||
for (const auto &it : _networkInputs) {
|
||||
auto &name = it.first;
|
||||
// this request is already in BUSY state, so using the internal functions safely
|
||||
auto blob = GetBlob(name);
|
||||
if (req->GetBlob(name) != blob)
|
||||
req->SetBlob(name, blob);
|
||||
}
|
||||
for (const auto &it : _networkOutputs) {
|
||||
auto &name = it.first;
|
||||
// this request is already in BUSY state, so using the internal functions safely
|
||||
auto blob = GetBlob(name);
|
||||
if (req->GetBlob(name) != blob)
|
||||
req->SetBlob(name, blob);
|
||||
}
|
||||
}
|
||||
|
||||
void MultiDeviceInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& blob) {
|
||||
if (_sharedRequest)
|
||||
_sharedRequest->SetBlob(name, blob);
|
||||
else
|
||||
IInferRequestInternal::SetBlob(name, blob);
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr MultiDeviceInferRequest::GetBlob(const std::string& name) {
|
||||
if (_sharedRequest)
|
||||
return _sharedRequest->GetBlob(name);
|
||||
else
|
||||
return IInferRequestInternal::GetBlob(name);
|
||||
}
|
||||
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> MultiDeviceInferRequest::GetPerformanceCounts() const {
|
||||
if (_sharedRequest) {
|
||||
return _sharedRequest->GetPerformanceCounts();
|
||||
void allocate_tensor_impl(ov::Tensor& tensor, const ov::element::Type& element_type, const ov::Shape& shape) {
|
||||
if (!tensor || tensor.get_element_type() != element_type) {
|
||||
tensor = ov::Tensor(element_type, shape);
|
||||
} else {
|
||||
// get the profiling info directly from target infer request
|
||||
// not thread-safe for plugin like GPU, see CVS-86034
|
||||
if (_scheduledRequest)
|
||||
return _scheduledRequest->GetPerformanceCounts();
|
||||
else
|
||||
IE_THROW() << "Performance counters were not enabled";
|
||||
tensor.set_shape(shape);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> MultiDeviceInferRequest::QueryState() {
|
||||
if (_sharedRequest)
|
||||
return _sharedRequest->QueryState();
|
||||
IE_THROW(NotImplemented);
|
||||
} // namespace
|
||||
|
||||
ov::auto_plugin::InferRequest::InferRequest(const std::shared_ptr<const ov::auto_plugin::CompiledModel>& model,
|
||||
const SoAsyncInferRequest& request_to_share_tensors_with)
|
||||
: ov::ISyncInferRequest(model),
|
||||
m_shared_request(request_to_share_tensors_with) {
|
||||
if (!m_shared_request) {
|
||||
// Allocate input/output tensors
|
||||
for (const auto& input : get_inputs()) {
|
||||
allocate_tensor(input, [input](ov::Tensor& tensor) {
|
||||
// Can add a check to avoid double work in case of shared tensors
|
||||
allocate_tensor_impl(tensor,
|
||||
input.get_element_type(),
|
||||
input.get_partial_shape().is_dynamic() ? ov::Shape{0} : input.get_shape());
|
||||
});
|
||||
}
|
||||
for (const auto& output : get_outputs()) {
|
||||
allocate_tensor(output, [output](ov::Tensor& tensor) {
|
||||
// Can add a check to avoid double work in case of shared tensors
|
||||
allocate_tensor_impl(tensor,
|
||||
output.get_element_type(),
|
||||
output.get_partial_shape().is_dynamic() ? ov::Shape{0} : output.get_shape());
|
||||
});
|
||||
}
|
||||
} else {
|
||||
for (const auto& input : get_inputs()) {
|
||||
ov::ISyncInferRequest::set_tensor(input, ov::Tensor(m_shared_request->get_tensor(input), m_shared_request._so));
|
||||
}
|
||||
for (const auto& output : get_outputs()) {
|
||||
ov::ISyncInferRequest::set_tensor(output, ov::Tensor(m_shared_request->get_tensor(output), m_shared_request._so));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace MultiDevicePlugin
|
||||
|
||||
const ov::auto_plugin::SoAsyncInferRequest& ov::auto_plugin::InferRequest::get_shared_request() {
|
||||
return m_shared_request;
|
||||
}
|
||||
|
||||
void ov::auto_plugin::InferRequest::set_scheduled_request(SoAsyncInferRequest request) {
|
||||
m_scheduled_request = request;
|
||||
}
|
||||
|
||||
void ov::auto_plugin::InferRequest::set_tensors_to_another_request(const SoAsyncInferRequest& req) {
|
||||
for (const auto &it : get_inputs()) {
|
||||
// this request is already in BUSY state, so using the internal functions safely
|
||||
auto tensor = get_tensor(it);
|
||||
auto type = tensor.get_element_type();
|
||||
bool is_remote = tensor.is<ov::RemoteTensor>() || req->get_tensor(it).is<ov::RemoteTensor>();
|
||||
if (is_remote || req->get_tensor(it).data(type) != tensor.data(type))
|
||||
req->set_tensor(it, tensor);
|
||||
}
|
||||
for (const auto &it : get_outputs()) {
|
||||
// this request is already in BUSY state, so using the internal functions safely
|
||||
auto tensor = get_tensor(it);
|
||||
auto type = tensor.get_element_type();
|
||||
bool is_remote = tensor.is<ov::RemoteTensor>() || req->get_tensor(it).is<ov::RemoteTensor>();
|
||||
if (is_remote || req->get_tensor(it).data(type) != tensor.data(type))
|
||||
req->set_tensor(it, tensor);
|
||||
}
|
||||
}
|
||||
|
||||
void ov::auto_plugin::InferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) {
|
||||
if (m_shared_request)
|
||||
m_shared_request->set_tensor(port, tensor);
|
||||
ov::ISyncInferRequest::set_tensor(port, tensor);
|
||||
}
|
||||
|
||||
|
||||
void ov::auto_plugin::InferRequest::infer() {
|
||||
OPENVINO_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
std::vector<ov::ProfilingInfo> ov::auto_plugin::InferRequest::get_profiling_info() const {
|
||||
if (m_shared_request)
|
||||
return m_shared_request->get_profiling_info();
|
||||
if (m_scheduled_request)
|
||||
return m_shared_request->get_profiling_info();
|
||||
OPENVINO_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
ov::auto_plugin::InferRequest::~InferRequest() = default;
|
||||
|
||||
std::vector<std::shared_ptr<ov::IVariableState>> ov::auto_plugin::InferRequest::query_state() const {
|
||||
if (m_shared_request)
|
||||
return m_shared_request->query_state();
|
||||
OPENVINO_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
@ -14,44 +14,30 @@
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
|
||||
#include "ie_remote_context.hpp"
|
||||
#include "plugin.hpp"
|
||||
|
||||
#ifdef MULTIUNITTEST
|
||||
#define MOCKTESTMACRO virtual
|
||||
#define MultiDevicePlugin MockMultiDevicePlugin
|
||||
#else
|
||||
#define MOCKTESTMACRO
|
||||
#endif
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
|
||||
class MultiDeviceInferRequest : public InferenceEngine::IInferRequestInternal {
|
||||
namespace ov {
|
||||
namespace auto_plugin {
|
||||
class CompiledModel;
|
||||
class InferRequest : public ov::ISyncInferRequest {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<MultiDeviceInferRequest>;
|
||||
explicit MultiDeviceInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
|
||||
const InferenceEngine::OutputsDataMap& networkOutputs,
|
||||
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
|
||||
InferenceEngine::RemoteContext::Ptr ctx = nullptr);
|
||||
explicit MultiDeviceInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
|
||||
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
|
||||
InferenceEngine::RemoteContext::Ptr ctx = nullptr);
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
|
||||
void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& blob) override;
|
||||
InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
|
||||
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> QueryState() override;
|
||||
// Multi-Device impl specific: sets the data (blobs from the device-less requests to the specific device request)
|
||||
void SetBlobsToAnotherRequest(const InferenceEngine::SoIInferRequestInternal& req);
|
||||
InferenceEngine::SoIInferRequestInternal& GetSharedRequest() { return _sharedRequest; }
|
||||
InferenceEngine::SoIInferRequestInternal _scheduledRequest;
|
||||
explicit InferRequest(const std::shared_ptr<const ov::auto_plugin::CompiledModel>& compiled_model,
|
||||
const SoAsyncInferRequest& request_to_share_tensors_with);
|
||||
~InferRequest();
|
||||
|
||||
void infer() override;
|
||||
std::vector<std::shared_ptr<ov::IVariableState>> query_state() const override;
|
||||
std::vector<ov::ProfilingInfo> get_profiling_info() const override;
|
||||
|
||||
const SoAsyncInferRequest& get_shared_request();
|
||||
void set_scheduled_request(SoAsyncInferRequest request);
|
||||
// Auto-Device impl specific: sets the data (tensors from the device-less requests to the specific device request)
|
||||
void set_tensors_to_another_request(const SoAsyncInferRequest& req);
|
||||
void set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) override;
|
||||
|
||||
private:
|
||||
void CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with,
|
||||
InferenceEngine::RemoteContext::Ptr ctx);
|
||||
InferenceEngine::SoIInferRequestInternal _sharedRequest;
|
||||
std::unordered_map<std::string, std::shared_ptr<const ov::Node>> modelInputsMap;
|
||||
std::unordered_map<std::string, std::shared_ptr<const ov::Node>> modelOutputsMap;
|
||||
SoAsyncInferRequest m_shared_request;
|
||||
SoAsyncInferRequest m_scheduled_request;
|
||||
};
|
||||
|
||||
} // namespace MultiDevicePlugin
|
||||
} // namespace auto_plugin
|
||||
} // namespace ov
|
@ -4,17 +4,18 @@
|
||||
|
||||
/**
|
||||
* @brief Defines openvino domains for tracing
|
||||
* @file multi_itt.h
|
||||
* @file itt.h
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <openvino/itt.hpp>
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
namespace ov {
|
||||
namespace auto_plugin {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(MULTIPlugin);
|
||||
}
|
||||
}
|
||||
OV_ITT_DOMAIN(AutoPlugin);
|
||||
}
|
||||
} // namespace itt
|
||||
} // namespace auto_plugin
|
||||
} // namespace ov
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -10,70 +10,81 @@
|
||||
#include <string>
|
||||
#include <list>
|
||||
|
||||
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
||||
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
|
||||
#include "openvino/runtime/iplugin.hpp"
|
||||
#include "utils/log_util.hpp"
|
||||
#include "common.hpp"
|
||||
#include "plugin_config.hpp"
|
||||
#include "compiled_model.hpp"
|
||||
|
||||
#ifdef MULTIUNITTEST
|
||||
#define MOCKTESTMACRO virtual
|
||||
#define MultiDevicePlugin MockMultiDevicePlugin
|
||||
#else
|
||||
#define MOCKTESTMACRO
|
||||
#endif
|
||||
namespace ov {
|
||||
namespace auto_plugin {
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
|
||||
class MultiDeviceInferencePlugin : public InferenceEngine::IInferencePlugin {
|
||||
class Plugin : public ov::IPlugin {
|
||||
public:
|
||||
MultiDeviceInferencePlugin();
|
||||
~MultiDeviceInferencePlugin() = default;
|
||||
Plugin();
|
||||
~Plugin() = default;
|
||||
|
||||
InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& config) override;
|
||||
void set_property(const ov::AnyMap& properties) override;
|
||||
|
||||
ov::SoPtr<InferenceEngine::IExecutableNetworkInternal> LoadNetwork(const std::string& modelPath,
|
||||
const std::map<std::string, std::string>& config) override;
|
||||
ov::Any get_property(const std::string& name, const ov::AnyMap& arguments) const override;
|
||||
|
||||
void SetConfig(const std::map<std::string, std::string>& config) override;
|
||||
InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter> & options) const override;
|
||||
InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& config) const override;
|
||||
InferenceEngine::Parameter GetMetric(const std::string& name,
|
||||
const std::map<std::string, InferenceEngine::Parameter>& options) const override;
|
||||
ov::SupportedOpsMap query_model(const std::shared_ptr<const ov::Model>& model,
|
||||
const ov::AnyMap& properties) const override;
|
||||
|
||||
MOCKTESTMACRO std::vector<MultiDevicePlugin::DeviceInformation> ParseMetaDevices(const std::string & devicesRequestsCfg,
|
||||
const std::map<std::string, std::string> & config) const;
|
||||
std::shared_ptr<ov::ICompiledModel> compile_model(const std::shared_ptr<const ov::Model>& model,
|
||||
const ov::AnyMap& properties) const override;
|
||||
|
||||
MOCKTESTMACRO std::string GetDeviceList(const std::map<std::string, std::string>& config) const;
|
||||
std::shared_ptr<ov::ICompiledModel> compile_model(const std::shared_ptr<const ov::Model>& model,
|
||||
const ov::AnyMap& properties,
|
||||
const ov::RemoteContext& context) const override;
|
||||
|
||||
MOCKTESTMACRO std::list<DeviceInformation> GetValidDevice(const std::vector<DeviceInformation>& metaDevices,
|
||||
const std::string& networkPrecision = METRIC_VALUE(FP32));
|
||||
std::shared_ptr<ov::ICompiledModel> compile_model(const std::string& model_path,
|
||||
const ov::AnyMap& properties) const override;
|
||||
|
||||
MOCKTESTMACRO DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices,
|
||||
const std::string& networkPrecision = METRIC_VALUE(FP32),
|
||||
MOCKTESTMACRO std::vector<auto_plugin::DeviceInformation> parse_meta_devices(const std::string & devices_requests_cfg,
|
||||
const ov::AnyMap& properties) const;
|
||||
|
||||
MOCKTESTMACRO std::string get_device_list(const ov::AnyMap& properties) const;
|
||||
|
||||
MOCKTESTMACRO std::list<DeviceInformation> get_valid_device(const std::vector<DeviceInformation>& meta_devices,
|
||||
const std::string& model_precision = "FP32") const;
|
||||
|
||||
MOCKTESTMACRO DeviceInformation select_device(const std::vector<DeviceInformation>& meta_devices,
|
||||
const std::string& model_precision = "FP32",
|
||||
unsigned int priority = 0);
|
||||
void UnregisterPriority(const unsigned int& priority, const std::string& deviceName);
|
||||
void RegisterPriority(const unsigned int& priority, const std::string& deviceName);
|
||||
void unregister_priority(const unsigned int& priority, const std::string& device_name);
|
||||
void register_priority(const unsigned int& priority, const std::string& device_name);
|
||||
|
||||
std::shared_ptr<ov::IRemoteContext> create_context(const ov::AnyMap& remote_properties) const override;
|
||||
|
||||
|
||||
std::shared_ptr<ov::IRemoteContext> get_default_context(const ov::AnyMap& remote_properties) const override;
|
||||
|
||||
std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
|
||||
const ov::AnyMap& properties) const override;
|
||||
|
||||
std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
|
||||
const ov::RemoteContext& context,
|
||||
const ov::AnyMap& properties) const override;
|
||||
|
||||
protected:
|
||||
ov::AnyMap PreProcessConfig(const std::map<std::string, std::string>& orig_config) const;
|
||||
ov::AnyMap pre_process_config(const ov::AnyMap& orig_config) const;
|
||||
|
||||
private:
|
||||
InferenceEngine::IExecutableNetworkInternal::Ptr LoadNetworkImpl(const std::string& modelPath,
|
||||
InferenceEngine::CNNNetwork network,
|
||||
const std::map<std::string, std::string>& config,
|
||||
const std::string &networkPrecision = METRIC_VALUE(FP32));
|
||||
std::vector<DeviceInformation> FilterDevice(const std::vector<DeviceInformation>& metaDevices,
|
||||
const std::map<std::string, std::string>& config);
|
||||
std::vector<DeviceInformation> FilterDeviceByNetwork(const std::vector<DeviceInformation>& metaDevices,
|
||||
InferenceEngine::CNNNetwork network);
|
||||
std::string GetLogTag() const noexcept;
|
||||
static std::mutex _mtx;
|
||||
static std::map<unsigned int, std::list<std::string>> _priorityMap;
|
||||
std::string _LogTag;
|
||||
PluginConfig _pluginConfig;
|
||||
std::shared_ptr<ov::ICompiledModel> compile_model_impl(const std::string& model_path,
|
||||
const std::shared_ptr<const ov::Model>& model,
|
||||
const ov::AnyMap& properties,
|
||||
const std::string& model_precision = "FP32") const;
|
||||
std::vector<DeviceInformation> filter_device(const std::vector<DeviceInformation>& meta_devices,
|
||||
const ov::AnyMap& properties) const;
|
||||
std::vector<DeviceInformation> filter_device_by_model(const std::vector<DeviceInformation>& meta_devices,
|
||||
const std::shared_ptr<const ov::Model>& model) const;
|
||||
std::string get_log_tag() const noexcept;
|
||||
static std::mutex m_mtx;
|
||||
static std::map<unsigned int, std::list<std::string>> m_priority_map;
|
||||
PluginConfig m_plugin_config;
|
||||
mutable SoCompiledModel m_hw_compiledmodel;
|
||||
};
|
||||
|
||||
} // namespace MultiDevicePlugin
|
||||
} // namespace auto_plugin
|
||||
} // namespace ov
|
||||
|
@ -3,11 +3,12 @@
|
||||
//
|
||||
#include "plugin_config.hpp"
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
namespace ov {
|
||||
namespace auto_plugin {
|
||||
// AUTO will enable the blocklist if
|
||||
// 1.No device priority passed to AUTO/MULTI.(eg. core.compile_model(model, "AUTO", configs);)
|
||||
// 2.No valid device parsed out from device priority (eg. core.compile_model(model, "AUTO:-CPU,-GPU", configs);).
|
||||
const std::set<std::string> PluginConfig::_deviceBlocklist = {"VPUX", "GNA", "notIntelGPU"};
|
||||
const std::set<std::string> PluginConfig::device_block_list = {"VPUX", "GNA", "notIntelGPU"};
|
||||
|
||||
PluginConfig::PluginConfig() {
|
||||
set_default();
|
||||
@ -52,9 +53,8 @@ void PluginConfig::set_property(const ov::AnyMap& properties) {
|
||||
// when user call set_property to set some config to plugin, we also respect this and pass through the config in this case
|
||||
user_properties[name] = val;
|
||||
if (kv.first == ov::log::level.name()) {
|
||||
auto log_level = kv.second.as<std::string>();
|
||||
if (!setLogLevel(log_level)) {
|
||||
IE_THROW() << "Unsupported log level: " << log_level;
|
||||
if (!set_log_level(kv.second)) {
|
||||
OPENVINO_THROW("Unsupported log level: ", kv.second.as<std::string>());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -108,9 +108,8 @@ void PluginConfig::apply_user_properties() {
|
||||
for (auto& kv : user_properties) {
|
||||
full_properties[kv.first] = kv.second;
|
||||
if (kv.first == ov::log::level.name()) {
|
||||
auto log_level = kv.second.as<std::string>();
|
||||
if (!setLogLevel(log_level)) {
|
||||
IE_THROW() << "Unsupported log level: " << log_level;
|
||||
if (!set_log_level(kv.second)) {
|
||||
OPENVINO_THROW("Unsupported log level: ", kv.second.as<std::string>());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -120,4 +119,5 @@ ov::AnyMap PluginConfig::get_full_properties() {
|
||||
return full_properties;
|
||||
}
|
||||
|
||||
} // namespace MultiDevicePlugin
|
||||
} // namespace auto_plugin
|
||||
} // namespace ov
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user