Merge branch 'master' into river/cpu_plugin_api_2.0

This commit is contained in:
River.Li 2023-06-23 10:27:56 +08:00
commit 9ef10dddab
307 changed files with 17889 additions and 12050 deletions

View File

@ -408,8 +408,8 @@ jobs:
displayName: 'GNA UT'
enabled: 'false' # TODO: fix
- script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ieMultiPluginUnitTests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ieMultiPluginUnitTests.xml
displayName: 'MULTI UT'
- script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_unit_tests.xml
displayName: 'AUTO UT'
- script: $(RUN_PREFIX) $(INSTALL_TEST_DIR)/ov_auto_batch_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_batch_unit_tests.xml
displayName: 'AutoBatch UT'
@ -430,7 +430,6 @@ jobs:
# Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
- script: |
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.05.00.2116/linux/x64:$(LD_LIBRARY_PATH)
python3 -m pytest -s $(INSTALL_TEST_DIR)/pyngraph $(PYTHON_STATIC_ARGS) \
--junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
--ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_zoo_models.py \
@ -439,8 +438,6 @@ jobs:
# Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
- script: |
# For python imports to import pybind_mock_frontend
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.05.00.2116/linux/x64:$(LD_LIBRARY_PATH)
python3 -m pytest -sv $(INSTALL_TEST_DIR)/pyopenvino $(PYTHON_STATIC_ARGS) \
--junitxml=$(INSTALL_TEST_DIR)/TEST-Pyngraph.xml \
--ignore=$(INSTALL_TEST_DIR)/pyopenvino/tests/test_utils/test_utils.py \
@ -449,7 +446,6 @@ jobs:
displayName: 'Python API 2.0 Tests'
- script: |
export LD_LIBRARY_PATH=$(REPO_DIR)/temp/gna_03.05.00.2116/linux/x64:$(LD_LIBRARY_PATH)
python3 -m pytest -s $(INSTALL_TEST_DIR)/mo/unit_tests --junitxml=$(INSTALL_TEST_DIR)/TEST-ModelOptimizer.xml
displayName: 'Model Optimizer UT'

View File

@ -306,8 +306,8 @@ jobs:
- script: $(INSTALL_TEST_DIR)/ov_cpu_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_cpu_unit_tests.xml
displayName: 'Intel CPU Unit Tests'
- script: $(INSTALL_TEST_DIR)/ieMultiPluginUnitTests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ieMultiPluginUnitTests.xml
displayName: 'MULTI UT'
- script: $(INSTALL_TEST_DIR)/ov_auto_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_unit_tests.xml
displayName: 'AUTO UT'
- script: $(INSTALL_TEST_DIR)/ov_template_func_tests --gtest_filter=*smoke* --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-templateFuncTests.xml
env:

View File

@ -196,8 +196,8 @@ jobs:
displayName: 'Intel CPU Unit Tests'
enabled: 'false'
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ieMultiPluginUnitTests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ieMultiPluginUnitTests.xml
displayName: 'MULTI UT'
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ov_auto_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_auto_unit_tests.xml
displayName: 'AUTO UT'
enabled: 'false'
- script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ov_cpu_func_tests --gtest_filter=*smoke* --gtest_print_time=1 --gtest_output=xml:$(INSTALL_TEST_DIR)/TEST-ov_cpu_func_tests.xml

View File

@ -306,8 +306,8 @@ jobs:
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ov_gna_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-ov_gna_unit_tests.xml
displayName: 'GNA UT'
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieMultiPluginUnitTests --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-ieMultiPluginUnitTests.xml
displayName: 'MULTI UT'
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ov_auto_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-ov_auto_unit_tests.xml
displayName: 'AUTO UT'
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ov_auto_batch_unit_tests --gtest_output=xml:$(INSTALL_TEST_DIR)\TEST-ov_auto_batch_unit_tests.xml
displayName: 'AutoBatch UT'

1
.github/labeler.yml vendored
View File

@ -41,6 +41,7 @@
'category: dependency_changes':
- '**/requirement*.txt'
- '**/constraints*.txt'
- 'scripts/**/*'
- '.gitmodules'
- '**/setup.py'

View File

@ -126,7 +126,7 @@ ie_option(ENABLE_OV_IR_FRONTEND "Enable IR FrontEnd" ON)
ie_option(ENABLE_OV_TF_FRONTEND "Enable TensorFlow FrontEnd" ON)
ie_option(ENABLE_OV_TF_LITE_FRONTEND "Enable TensorFlow Lite FrontEnd" ON)
ie_dependent_option(ENABLE_SNAPPY_COMPRESSION "Enables compression support for TF FE" ON
"ENABLE_OV_TF_FRONTEND" ON)
"ENABLE_OV_TF_FRONTEND" OFF)
if(CMAKE_HOST_LINUX AND LINUX)
# Debian packages are enabled on Ubuntu systems

View File

@ -22,7 +22,7 @@ Local Deployment Options
- using Debian / RPM packages - a recommended way for Linux operating systems;
- using PIP package manager on PyPI - the default approach for Python-based applications;
- using Docker images - if the application should be deployed as a Docker image, use a pre-built OpenVINO™ Runtime Docker image as a base image in the Dockerfile for the application container image. For more information about OpenVINO Docker images, refer to :doc:`Installing OpenVINO on Linux from Docker <openvino_docs_install_guides_installing_openvino_docker_linux>` and :doc:`Installing OpenVINO on Windows from Docker <openvino_docs_install_guides_installing_openvino_docker_windows>`.
- using Docker images - if the application should be deployed as a Docker image, use a pre-built OpenVINO™ Runtime Docker image as a base image in the Dockerfile for the application container image. For more information about OpenVINO Docker images, refer to :doc:`Installing OpenVINO on Linux from Docker <openvino_docs_install_guides_installing_openvino_docker_linux>`
Furthermore, to customize your OpenVINO Docker image, use the `Docker CI Framework <https://github.com/openvinotoolkit/docker_ci>` to generate a Dockerfile and built the image.
@ -44,7 +44,7 @@ The table below shows which distribution type can be used for what target operat
* - RMP packages
- Red Hat Enterprise Linux 8, 64-bit
* - Docker images
- Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit; Red Hat Enterprise Linux 8, 64-bit; Windows Server Core base LTSC 2019, 64-bit; Windows 10, version 20H2, 64-bit
- Ubuntu 22.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit; Red Hat Enterprise Linux 8, 64-bit
* - PyPI (PIP package manager)
- See https://pypi.org/project/openvino
* - :doc:`OpenVINO Deployment Manager <openvino_docs_install_guides_deployment_manager_tool>`

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c2f144de249eddf1c159cbc1a27a06ad40f57442efcf75f2f49cc02626fc6875
size 13168

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d3a47af0e18856603b93a15d9415ddddd4ae06058cdfa0e18597e9eb861bf589
size 51390

Binary file not shown.

View File

@ -0,0 +1 @@
const e=document.getElementById("selector");if(!e)throw new Error("cannot find selector document");window.addEventListener("message",i=>{e.style.height=i.data.height+"px"});var o,n;const t=(n=(o=e.contentDocument)==null?void 0:o.body)==null?void 0:n.offsetHeight;t&&(e.style.height=`${t}px`);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,22 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="version" content="0290a24" />
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Download Intel® Distribution of OpenVINO™ Toolkit</title>
<meta
name="description"
content="Download a version of the Intel® Distribution of OpenVINO™ toolkit for Linux, Windows, or macOS."
/>
<script
type="module"
crossorigin
src="./assets/selector-363359f4.js"
></script>
<link rel="stylesheet" href="./assets/selector-5c3f26d1.css" />
</head>
<body>
<div id="root"></div>
</body>
</html>

View File

@ -25,7 +25,7 @@ To use sample applications, install OpenVINO Runtime via one of the following di
* Archive files (recommended) - :doc:`Linux <openvino_docs_install_guides_installing_openvino_from_archive_linux>` | :doc:`Windows <openvino_docs_install_guides_installing_openvino_from_archive_windows>` | :doc:`macOS <openvino_docs_install_guides_installing_openvino_from_archive_macos>`
* :doc:`APT <openvino_docs_install_guides_installing_openvino_apt>` or :doc:`YUM <openvino_docs_install_guides_installing_openvino_yum>` for Linux
* Docker image - :doc:`Linux <openvino_docs_install_guides_installing_openvino_docker_linux>` | :doc:`Windows <openvino_docs_install_guides_installing_openvino_docker_windows>`
* Docker image - :doc:`Linux <openvino_docs_install_guides_installing_openvino_docker_linux>`
* `Build from source <https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build.md>`__
Make sure that you also `install OpenCV <https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO>`__ , as it's required for running sample applications.

View File

@ -1,66 +1,126 @@
# Configurations for Intel® Processor Graphics (GPU) with OpenVINO™ {#openvino_docs_install_guides_configurations_for_intel_gpu}
@sphinxdirective
.. _gpu guide:
To use the OpenVINO™ GPU plugin and offload inference to Intel® Processor Graphics (GPU), Intel® Graphics Driver must be properly configured on your system.
If Intel® Graphics Driver is already installed and you would like to keep it, you can skip the installation steps below.
To use the OpenVINO™ GPU plug-in and transfer the inference to the graphics of the Intel® processor (GPU), the Intel® graphics driver must be properly configured on the system.
Linux
#####
To install the latest available **Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver** for your operating system,
see its `installation guide on GitHub <https://github.com/intel/compute-runtime/releases/latest>`_.
To use a GPU device for OpenVINO inference, you must meet the following prerequisites:
.. note::
- Use a supported Linux kernel as per the `documentation <https://dgpu-docs.intel.com/driver/kernel-driver-types.html>`__
- Install ``intel-i915-dkms`` and ``xpu-smi`` kernel modules as described in the `installation documentation <https://dgpu-docs.intel.com/driver/installation.html>`__
- Install GPU Runtime packages:
If you are using RedHat 8, you can install the OpenCL library as a prerequisite by using the following command:
``http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/ocl-icd-2.2.12-1.el8.x86_64.rpm``
- `The Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) Driver <https://github.com/intel/compute-runtime/releases/latest>`__
- `Intel Graphics Memory Management Library <https://github.com/intel/gmmlib>`__
- `Intel® Graphics Compiler for OpenCL™ <https://github.com/intel/intel-graphics-compiler>`__
- `OpenCL ICD loader package <https://github.com/KhronosGroup/OpenCL-ICD-Loader>`__
.. _wsl-instal:
Depending on your operating system, there may be different methods to install the above packages. Below are the instructions on how to install the packages on supported Linux distributions.
You may consider installing one of the earlier versions of the driver, based on your particular setup needs.
.. tab-set::
For instructions and recommendations on the installation of a specific GPU driver release, as well as the list of supported hardware platforms, refer to the `Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL™ Driver GitHub home page <https://github.com/intel/compute-runtime/>`__.
.. tab-item:: Ubuntu 22.04 LTS
:sync: ubuntu22
For instructions specific to discrete graphics platforms, refer to `the dgpu guide <https://dgpu-docs.intel.com/installation-guides/index.html>`__,
including installation guides for Intel® Arc™ A-Series Graphics, Intel® Data Center GPU Flex Series, Intel® Data Center GPU MAX Series, Intel® processor graphics Gen12, and Intel® Iris Xe MAX codename DG1.
Download and install the `deb` packages published `here <https://github.com/intel/compute-runtime/releases/latest>`__ and install the apt package `ocl-icd-libopencl1` with the OpenCl ICD loader.
Alternatively, you can add the apt repository by following the `installation guide <https://dgpu-docs.intel.com/driver/installation.html#ubuntu-install-steps>`__. Then install the `ocl-icd-libopencl1`, `intel-opencl-icd`, `intel-level-zero-gpu` and `level-zero` apt packages:
.. code-block:: sh
apt-get install -y ocl-icd-libopencl1 intel-opencl-icd intel-level-zero-gpu level-zero
.. tab-item:: Ubuntu 20.04 LTS
:sync: ubuntu20
Ubuntu 20.04 LTS is not updated with the latest driver versions. You can install the updated versions up to the version 22.43 from apt:
.. code-block:: sh
apt-get update && apt-get install -y --no-install-recommends curl gpg gpg-agent && \
curl https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
echo 'deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu focal-legacy main' | tee /etc/apt/sources.list.d/intel.gpu.focal.list && \
apt-get update
apt-get update && apt-get install -y --no-install-recommends intel-opencl-icd intel-level-zero-gpu level-zero
Alternatively, download older `deb` version from `here <https://github.com/intel/compute-runtime/releases>`__. Note that older driver version might not include some of the bug fixes and might be not supported on some latest platforms. Check the supported hardware for the versions you are installing.
.. tab-item:: RedHat UBI 8
:sync: redhat8
Follow the `guide <https://dgpu-docs.intel.com/driver/installation.html#rhel-install-steps>`__ to add Yum repository.
Install following packages:
.. code-block:: sh
yum install intel-opencl level-zero intel-level-zero-gpu intel-igc-core intel-igc-cm intel-gmmlib intel-ocloc
Install the OpenCL ICD Loader via:
.. code-block:: sh
rpm -ivh http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/ocl-icd-2.2.12-1.el8.x86_64.rpm
.. _gpu guide windows:
Windows
#######
To install the Intel Graphics Driver for Windows on your system, follow the `driver installation guide <https://www.intel.com/content/www/us/en/support/articles/000005629/graphics.html>`_.
To install the Intel Graphics Driver for Windows, follow the `driver installation instructions <https://www.intel.com/content/www/us/en/support/articles/000005629/graphics.html>`_.
To check if you have this driver installed:
To check if the driver has been installed:
1. Type **device manager** in your **Search Windows** box and press Enter. The **Device Manager** opens.
2. Click the drop-down arrow to view the **Display adapters**. You can see the adapter that is installed in your computer:
1. Type **device manager** in the **Search Windows** field and press Enter. **Device Manager** will open.
2. Click the drop-down arrow to display **Display Adapters**. You can see the adapter that is installed in your computer:
.. image:: _static/images/DeviceManager.PNG
:width: 400
3. Right-click the adapter name and select **Properties**.
4. Click the **Driver** tab to see the driver version.
3. Right-click on the adapter name and select **Properties**.
4. Click the **Driver** tab to view the driver version.
.. image:: _static/images/DeviceDriverVersion.PNG
.. image:: _static/images/DeviceDriverVersion.svg
:width: 400
You are done updating your device driver and ready to use your GPU.
Your device driver has been updated and is now ready to use your GPU.
Additional info
###############
Windows Subsystem for Linux (WSL)
#################################
For your reference, the following versions of Intel® Graphics Driver were used in the OpenVINO internal validation:
WSL allows developers to run a GNU/Linux development environment for the Windows operating system. Using the GPU in WSL is very similar to a native Linux environment.
.. note::
Make sure your Intel graphics driver is updated to version **30.0.100.9955** or later. You can download and install the latest GPU host driver `here <https://www.intel.com/content/www/us/en/download/19344/intel-graphics-windows-dch-drivers.html>`__.
Below are the required steps to make it work with OpenVINO:
- Install the GPU drivers as described :ref:`above <wsl-instal>`.
- Run the following commands in PowerShell to view the latest version of WSL2:
.. code-block:: sh
wsl --update
wsl --shutdown
- When booting Ubuntu 20.04 or Ubuntu 22.04, install the same drivers as described above in the Linux section
.. note::
In WSL, the GPU device is accessed via the character device `/dev/drx`, while for native Linux OS it is accessed via `/dev/dri`.
Additional Resources
####################
The following Intel® Graphics Driver versions were used during OpenVINO's internal validation:
+------------------+-------------------------------------------------------------------------------------------+
| Operation System | Driver version |
@ -80,24 +140,11 @@ For your reference, the following versions of Intel® Graphics Driver were used
Whats Next?
############
You can try out the toolkit with:
* `Python Quick Start Example <notebooks/201-vision-monodepth-with-output.html>`_ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser.
Visit the :ref:`Tutorials <notebook tutorials>` page for more Jupyter Notebooks to get you started with OpenVINO, such as:
* `OpenVINO Python API Tutorial <notebooks/002-openvino-api-with-output.html>`__
* `Basic image classification program with Hello Image Classification <notebooks/001-hello-world-with-output.html>`__
* `Convert a PyTorch model and use it for image background removal <notebooks/205-vision-background-removal-with-output.html>`__
* `C++ Quick Start Example <openvino_docs_get_started_get_started_demos.html>`__ for step-by-step instructions on building and running a basic image classification C++ application.
Visit the :ref:`Samples <code samples>` page for other C++ example applications to get you started with OpenVINO, such as:
* `Basic object detection with the Hello Reshape SSD C++ sample <openvino_inference_engine_samples_hello_reshape_ssd_README.html>`_
* `Automatic speech recognition C++ sample <openvino_inference_engine_samples_speech_sample_README.html>`_
* :doc:`GPU Device <openvino_docs_OV_UG_supported_plugins_GPU>`
* :doc:`Install Intel® Distribution of OpenVINO™ toolkit for Linux from a Docker Image <openvino_docs_install_guides_installing_openvino_docker_linux>`
* `Docker CI framework for Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/README.md>`__
* `Get Started with DockerHub CI for Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/get-started.md>`__
* `Dockerfiles with Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/dockerfiles/README.md>`__
@endsphinxdirective

View File

@ -1,180 +1,32 @@
# Install Intel® Distribution of OpenVINO™ toolkit for Linux from a Docker Image {#openvino_docs_install_guides_installing_openvino_docker_linux}
@sphinxdirective
This guide provides steps on creating a Docker image with Intel® Distribution of OpenVINO™ toolkit for Linux and using the image on different devices.
Supported operating systems for the Docker Base image:
System Requirements
###################
- Ubuntu 22.04 LTS
- Ubuntu 20.04 LTS
- RedHat UBI 8
.. tab:: Target Operating Systems with Python Versions
The `Docker CI framework <https://github.com/openvinotoolkit/docker_ci/>`__ can generate a Dockerfile, build, test, and deploy an image using the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the OpenVINO™ image to your needs. You can get started easily with pre-built and published docker images. Details on how to get started can be found `here <https://github.com/openvinotoolkit/docker_ci/blob/master/get-started.md>`__.
+----------------------------------------------+-------------------------+
| Operating System | Included Python Version |
+==============================================+=========================+
| Ubuntu 18.04 long-term support (LTS), 64-bit | 3.8 |
+----------------------------------------------+-------------------------+
| Ubuntu 20.04 long-term support (LTS), 64-bit | 3.8 |
+----------------------------------------------+-------------------------+
| Red Hat Enterprise Linux 8, 64-bit | 3.8 |
+----------------------------------------------+-------------------------+
To start using them, the following conditions must be met:
.. tab:: Host Operating Systems
- Linux OS or Windows Subsystem for Linux (WSL2)
- Installed docker engine or compatible container engine
- Permissions to run containers (sudo or docker group membership)
* Linux
* Windows Subsystem for Linux 2 (WSL2) on CPU or GPU
* macOS on CPU only
To launch a Linux image on WSL2 when trying to run inferences on a GPU, make sure that the following requirements are met:
* Only Windows 10 with 21H2 update or above installed and Windows 11 are supported.
* Intel GPU driver for Windows, version 30.0.100.9684 or newer needs to be installed. For more details, refer to
`this article at intel.com <https://www.intel.com/content/www/us/en/artificial-intelligence/harness-the-power-of-intel-igpu-on-your-machine.html#articleparagraph_983312434>`__.
* Currently, the Docker images contain preinstalled recommended version of OpenCL Runtime with WSL2 support.
Installation
#############
* Use a prebuilt image:
1. `Get a prebuilt image from provided sources <#getting-a-prebuilt-image-from-provided-sources>`__
2. `Run the image on different devices <#running-the-docker-image-on-different-devices>`__
3. `Run samples in the Docker image <#running-samples-in-docker-image>`__
* If you want to customize your image, you can also build a Docker image manually:
1. `Prepare a Dockerfile <#preparing-a-dockerfile>`__
2. `Configure the Docker image <#configuring-the-image-for-different-devices>`__
3. `Run the image on different devices <#running-the-docker-image-on-different-devices>`__
4. `Run samples in the Docker image <#running-samples-in-docker-image>`__
Getting a Prebuilt Image from Provided Sources
++++++++++++++++++++++++++++++++++++++++++++++
You can find prebuilt images on:
- `Docker Hub <https://hub.docker.com/u/openvino>`__
- `Red Hat Quay.io <https://quay.io/organization/openvino>`__
- `Red Hat Ecosystem Catalog (runtime image) <https://catalog.redhat.com/software/containers/intel/openvino-runtime/606ff4d7ecb5241699188fb3>`__
- `Red Hat Ecosystem Catalog (development image) <https://catalog.redhat.com/software/containers/intel/openvino-dev/613a450dc9bc35f21dc4a1f7>`__
- `Azure Marketplace <https://azuremarketplace.microsoft.com/en-us/marketplace/apps/intel_corporation.openvino>`__
Preparing a Dockerfile
++++++++++++++++++++++
You can use the `available Dockerfiles on GitHub <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__
or generate a Dockerfile with your settings via `DockerHub CI Framework <https://github.com/openvinotoolkit/docker_ci>`__
which can generate a Dockerfile, build, test and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
You can also try our `Tutorials <https://github.com/openvinotoolkit/docker_ci/tree/master/docs/tutorials>`__
which demonstrate the usage of Docker containers with OpenVINO.
Configuring the Image for Different Devices
+++++++++++++++++++++++++++++++++++++++++++
If you want to run inference on a CPU no extra configuration is needed.
Go to `Run the image on different devices <running-the-docker-image-on-different-devices>`__ for the next step.
If you want to run inference on a GPU, follow the instructions provided in the guide on
:doc:`Configuration for Intel GPU <openvino_docs_install_guides_configurations_for_intel_gpu>`
Running the Docker Image on Different Devices
+++++++++++++++++++++++++++++++++++++++++++++
Running the Image on CPU
-------------------------
Run the Docker image with the following command:
.. code-block:: sh
docker run -it --rm <image_name>
Note the following:
- Kernel reports the same information for all containers as for native application,
for example, CPU, memory information.
- All instructions that are available to host process available for process in container,
including, for example, AVX2, AVX512. No restrictions.
- Docker does not use virtualization or emulation. The process in Docker is just a regular
Linux process, but it is isolated from external world on kernel level. Performance loss is minor.
Running the Image on GPU
-------------------------
OpenVINO's `Docker <https://docs.docker.com/>`__ and `Bare Metal <https://docs.openvino.ai/2023.0/ovms_docs_deploying_server.html#doxid-ovms-docs-deploying-server>` distributions are identical, so the documentation applies to both.
.. note::
Only Intel® integrated graphics are supported.
The OpenVINO development environment in a docker container is also available in the `notebook repository <https://github.com/openvinotoolkit/openvino_notebooks>`__ . It can be implemented in `OpenShift RedHat OpenData Science (RHODS) <https://github.com/openvinotoolkit/operator/blob/main/docs/notebook_in_rhods.md>`__.
Note the following:
- GPU is not available in the container by default. You must attach it to the container.
- Kernel driver must be installed on the host.
- In the container, non-root user must be in the ``video`` and ``render`` groups.
To add a user to the render group, follow the
`Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu 20.04 <https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md>`__.
To make GPU available in the container, attach the GPU to the container using ``--device /dev/dri`` option and run the container:
* Ubuntu 18 or RHEL 8:
.. code-block:: sh
docker run -it --rm --device /dev/dri <image_name>
.. note::
If your host system is Ubuntu 20, follow the
`Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu* 20.04 <https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md>`__.
* WSL2:
.. code-block:: sh
docker run -it --rm --device /dev/dxg --volume /usr/lib/wsl:/usr/lib/wsl <image_name>
.. note::
To launch a Linux image on WSL2, make sure that the additional `System Requirements <system-requirements>`__ are met.
Running Samples in Docker Image
###############################
To run the ``Hello Classification Sample`` on a specific inference device, run the following commands:
.. tab-set::
.. tab-item:: CPU
.. code-block:: sh
docker run -it --rm <image_name>
/bin/bash -c "cd ~ && omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python3 /opt/intel/openvino/samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp CPU"
.. tab-item:: GPU
.. code-block:: sh
docker run -itu root:root --rm --device /dev/dri:/dev/dri <image_name>
/bin/bash -c "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python3 samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp GPU"
Additional Resources
###############################
- `DockerHub CI Framework <https://github.com/openvinotoolkit/docker_ci>`__ for Intel® Distribution of OpenVINO™ toolkit.
The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
- `Intel® Distribution of OpenVINO™ toolkit home page <https://software.intel.com/en-us/openvino-toolkit>`__
- `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
ore information about Docker CI for Intel® Distribution of OpenVINO™ toolset can be found `here <https://github.com/openvinotoolkit/docker_ci/blob/master/README.md>`__
* `Docker CI framework for Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/README.md>`__
* `Get Started with DockerHub CI for Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/get-started.md>`__
* `Dockerfiles with Intel® Distribution of OpenVINO™ toolkit <https://github.com/openvinotoolkit/docker_ci/blob/master/dockerfiles/README.md>`__
@endsphinxdirective

View File

@ -1,234 +0,0 @@
# Install Intel® Distribution of OpenVINO™ toolkit for Windows from Docker Image {#openvino_docs_install_guides_installing_openvino_docker_windows}
@sphinxdirective
This guide provides steps for creating a Docker image with Intel® Distribution of OpenVINO™ toolkit for Windows and using the Docker image on different devices.
.. _system-requirements-docker-windows:
System Requirements
####################
.. tab:: Target Operating System with Python Versions
+------------------------------------+--------------------------+
| Operating System | Supported Python Version |
+====================================+==========================+
| Windows Server Core base LTSC 2019 | 3.8 |
+------------------------------------+--------------------------+
| Windows 10, version 20H2 | 3.8 |
+------------------------------------+--------------------------+
.. tab:: Host Operating Systems
* Windows 10, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or later) editions
* Windows Server 2016 or higher
Additional Requirements for GPU
+++++++++++++++++++++++++++++++
To use GPU Acceleration in Windows containers, make sure that the following requirements for Windows host, OpenVINO and Docker are met:
- `Windows requirements <https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/gpu-acceleration>`__:
- The container host must be running Windows Server 2019 or Windows 10 of version 1809 or higher.
- The container base image must be ``mcr.microsoft.com/windows:1809`` or higher. Windows Server Core and Nano Server container images are not currently supported.
- The container host must be running Docker Engine 19.03 or higher.
- The container host must have GPU running display drivers of version WDDM 2.5 or higher.
- GPU requirement for OpenVINO: Intel Graphics Driver for Windows of version 15.65 or higher.
- `Docker isolation mode requirements <https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/hyperv-container>`__:
- Windows host and container version tags must match.
- `Windows host and container isolation process support <https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility>`__.
Installation Flow
####################
There are two ways to install OpenVINO with Docker. You can choose either of them according to your needs:
* Use a prebuilt image. Do the following steps:
1. `Get a prebuilt image from provided sources <#getting-a-prebuilt-image-from-provided-sources>`__.
2. `Run the image on different devices <#running-the-docker-image-on-different-devices>`__.
* If you want to customize your image, you can also build a Docker image manually by using the following steps:
1. `Prepare a Dockerfile <#preparing-a-dockerfile>`__.
2. `Configure the Docker image <#configuring-the-docker-image-for-different-devices>`__.
3. `Run the image on different devices <#running-the-docker-image-on-different-devices>`__.
Getting a Prebuilt Image from Provided Sources
##############################################
You can find prebuilt images on:
- `Docker Hub <https://hub.docker.com/u/openvino>`__
- `Azure Marketplace <https://azuremarketplace.microsoft.com/en-us/marketplace/apps/intel_corporation.openvino>`__
Preparing a Dockerfile
######################
You can use the `available Dockerfiles on GitHub <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__ or generate a Dockerfile with your settings via `DockerHub CI Framework <https://github.com/openvinotoolkit/docker_ci>`__ which can generate a Dockerfile, build, test and deploy an image with the Intel® Distribution of OpenVINO™ toolkit.
Configuring the Docker Image for Different Devices
##################################################
Installing Additional Dependencies for CPU
++++++++++++++++++++++++++++++++++++++++++
Installing CMake
----------------
To add CMake to the image, add the following commands to the Dockerfile:
.. code-block:: bat
RUN powershell.exe -Command `
Invoke-WebRequest -URI https://cmake.org/files/v3.14/cmake-3.14.7-win64-x64.msi -OutFile %TMP%\\cmake-3.14.7-win64-x64.msi ; `
Start-Process %TMP%\\cmake-3.14.7-win64-x64.msi -ArgumentList '/quiet /norestart' -Wait ; `
Remove-Item %TMP%\\cmake-3.14.7-win64-x64.msi -Force
RUN SETX /M PATH "C:\Program Files\CMake\Bin;%PATH%"
In case of proxy issues, please add the ``ARG HTTPS_PROXY`` and ``-Proxy %%HTTPS_PROXY%`` settings to the ``powershell.exe`` command to the Dockerfile. Then build a Docker image:
.. code-block:: bat
docker build . -t <image_name> `
--build-arg HTTPS_PROXY=<https://your_proxy_server:port>
Installing Microsoft Visual Studio Build Tools
----------------------------------------------
You can add Microsoft Visual Studio Build Tools to a Windows OS Docker image using the `offline <https://docs.microsoft.com/en-us/visualstudio/installcreate-an-offline-installation-of-visual-studio?view=vs-2019>`__ or `online <https://docs.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2019>`__ installers for Build Tools.
Microsoft Visual Studio Build Tools are licensed as a supplement your existing Microsoft Visual Studio license.
Any images built with these tools should be for your personal use or for use in your organization in accordance with your existing Visual Studio and Windows licenses.
To add MSBuild 2019 to the image, add the following commands to the Dockerfile:
.. code-block:: bat
RUN powershell.exe -Command Invoke-WebRequest -URI https://aka.ms/vs/16/release/vs_buildtools.exe -OutFile %TMP%\\vs_buildtools.exe
RUN %TMP%\\vs_buildtools.exe --quiet --norestart --wait --nocache `
--installPath "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools" `
--add Microsoft.VisualStudio.Workload.MSBuildTools `
--add Microsoft.VisualStudio.Workload.UniversalBuildTools `
--add Microsoft.VisualStudio.Workload.VCTools --includeRecommended `
--remove Microsoft.VisualStudio.Component.Windows10SDK.10240 `
--remove Microsoft.VisualStudio.Component.Windows10SDK.10586 `
--remove Microsoft.VisualStudio.Component.Windows10SDK.14393 `
--remove Microsoft.VisualStudio.Component.Windows81SDK || IF "%ERRORLEVEL%"=="3010" EXIT 0 && powershell set-executionpolicy remotesigned
In case of proxy issues, please use the `offline installer for Build Tools <https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studioview=vs-2019>`__.
Configuring the Image for GPU
+++++++++++++++++++++++++++++
.. note::
Since GPU is not supported in `prebuilt images <#getting-a-prebuilt-image-from-provided-sources>`__ or `default Dockerfiles <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__, you must make sure the Additional Requirements for GPU in `System Requirements <#system-requirements>`__ are met, and do the following steps to build the image manually.
1. Reuse one of `available Dockerfiles <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__. You can also use your own Dockerfile.
2. Check your `Windows host and container isolation process compatibility <https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility>`__.
3. Find the appropriate Windows container base image on `DockerHub <https://hub.docker.com/_/microsoft-windows>`__ and set up your host/container version in the ``FROM`` Dockerfile instruction.
For example, in the ``openvino_c_dev_<version>.dockerfile``, change:
.. code-block:: bat
FROM mcr.microsoft.com/windows/servercore:ltsc2019 AS ov_base
to:
.. code-block:: bat
FROM mcr.microsoft.com/windows:20H2
4. Build the Docker image by running the following command:
.. code-block:: bat
docker build --build-arg package_url=<OpenVINO pkg> -f <Dockerfile> -t <image_name> .
5. Copy ``OpenCL.dll`` from your ``C:\Windows\System32`` host folder to any ``temp`` directory:
.. code-block:: bat
mkdir C:\tmp
copy C:\Windows\System32\OpenCL.dll C:\tmp
Running the Docker Image on Different Devices
#############################################
Running the Image on CPU
++++++++++++++++++++++++
To start the interactive session, run the following command:
.. code-block:: bat
docker run -it --rm <image_name>
If you want to try some samples, run the image with the following command:
.. code-block:: bat
docker run -it --rm <image_name>
cmd /S /C "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp CPU"
Running the Image on GPU
++++++++++++++++++++++++
.. note::
Since GPU is not supported in `prebuilt images <#getting-a-prebuilt-image-from-provided-sources>`__ or `default Dockerfiles <https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles>`__, you must make sure the Additional Requirements for GPU in `System Requirements <#system-requirements>`__ are met, and `configure and build the image manually <#configuring-the-image-for-gpu>`__ before you can run inferences on a GPU.
1. To try inference on a GPU, run the image with the following command:
.. code-block:: bat
docker run -it --rm -u ContainerAdministrator --isolation process --device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599 -v C:\Windows\System32\DriverStore\FileRepository\iigd_dch. inf_amd64_518f2921ba495409:C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409 -v C:\tmp:C:\tmp <image_name>
where
- ``--device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599`` is a reserved interface class GUID for a GPU device.
- ``C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409`` is the path to OpenCL driver home directory. To find it on your PC, run the ``C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_*`` regular expression.
- ``C:\tmp`` is the folder with the copy of ``OpenCL.dll`` from your ``C:\Windows\System32`` host folder.
2. Copy ``OpenCL.dll`` to the ``C:\Windows\System32`` folder inside the container and set appropriate registry entry. Now you can run inference on a GPU device:
.. code-block:: bat
copy C:\tmp\OpenCL.dll C:\Windows\System32\ && reg add "HKLM\SOFTWARE\Khronos\OpenCL\Vendors" /v "C:\Windows\System32\DriverStore\FileRepository\iigd_dch. inf_amd64_518f2921ba495409\ocl\bin\x64\intelocl64.dll" /t REG_DWORD /d 0
For example, run the ``Hello Classification Python`` sample with the following command:
.. code-block:: bat
omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/ car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp GPU
Additional Resources
####################
- `DockerHub CI Framework <https://github.com/openvinotoolkit/docker_ci>`__ for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs.
- Intel® Distribution of OpenVINO™ toolkit home page: `https://software.intel.com/en-us/openvino-toolkit <https://software.intel.com/en-us/openvino-toolkit>`__
- `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__
@endsphinxdirective

View File

@ -22,11 +22,11 @@ Intel® Distribution of OpenVINO™ toolkit is a comprehensive toolkit for devel
Install OpenVINO
################
.. button-link:: https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html
:color: primary
:outline:
.. raw:: html
Check out the OpenVINO Download Page :fas:`fa-external-link-alt`
<script type="module" crossorigin src="_static/selector-tool/assets/index-89e3365b.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<iframe id="selector" src="_static/selector-tool/selector-0290a24.html" style="width: 100%; border: none" title="Download Intel® Distribution of OpenVINO™ Toolkit"></iframe>
OpenVINO installation package is distributed in two parts: OpenVINO Runtime and OpenVINO Development Tools.

View File

@ -9,7 +9,6 @@
Use Archive <openvino_docs_install_guides_installing_openvino_from_archive_windows>
Use PyPI <openvino_docs_install_guides_installing_openvino_pip>
Use Conda Forge <openvino_docs_install_guides_installing_openvino_conda>
Use Docker <openvino_docs_install_guides_installing_openvino_docker_windows>
If you want to install OpenVINO™ Runtime on Windows, you have the following options:
@ -17,7 +16,6 @@ If you want to install OpenVINO™ Runtime on Windows, you have the following op
* :doc:`Install OpenVINO Runtime from an Archive File <openvino_docs_install_guides_installing_openvino_from_archive_windows>`
* :doc:`Install OpenVINO Runtime using PyPI <openvino_docs_install_guides_installing_openvino_pip>`
* :doc:`Install OpenVINO Runtime using Conda Forge <openvino_docs_install_guides_installing_openvino_conda>`
* :doc:`Install OpenVINO using Docker <openvino_docs_install_guides_installing_openvino_docker_windows>`
For a full selection of distribution channels,
see the `OpenVINO Installation Selector Tool <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html>`__

View File

@ -54,13 +54,14 @@ fi
# Selftest
if [ -n "$selftest" ] ; then
for image in centos7 centos8 rhel8 rhel9.1 \
almalinux8.7 amzn2 \
fedora34 fedora35 fedora36 fedora37 fedora38 \
raspbian9 debian9 ubuntu18.04 \
raspbian10 debian10 ubuntu20.04 ubuntu20.10 ubuntu21.04 \
raspbian11 debian11 ubuntu21.10 ubuntu22.04 \
raspbian12 debian12 ubuntu22.10 ubuntu23.04 ; do
for image in centos:7 centos:8 rhel:8 rhel:9.1 \
almalinux:8.7 amazonlinux:2 \
fedora:34 fedora:35 fedora:36 fedora:37 fedora:38 \
opensuse/leap:15.3 \
raspbian:9 debian:9 ubuntu:18.04 \
raspbian:10 debian:10 ubuntu:20.04 ubuntu:20.10 ubuntu:21.04 \
raspbian:11 debian:11 ubuntu:21.10 ubuntu:22.04 \
raspbian:12 debian:12 ubuntu:22.10 ubuntu:23.04 ; do
for opt in "-h" "-p" "-e -p" "-n" "-n -e" "-y" "-y -e" ; do
echo "||"
echo "|| Test $image / '$opt'"
@ -118,14 +119,14 @@ if [ "$os" == "raspbian9" ] || [ "$os" == "debian9" ] ; then
# which are not supported by OpenVINO
pkgs_core=(libpugixml1v5)
pkgs_gpu=()
pkgs_gpu=(ocl-icd-libopencl1)
pkgs_python=()
pkgs_dev=(pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json-dev make curl sudo)
elif [ "$os" == "ubuntu18.04" ] ; then
pkgs_core=(libtbb2 libpugixml1v5)
pkgs_gpu=()
pkgs_gpu=(ocl-icd-libopencl1)
pkgs_python=(python3.8 libpython3.8 python3.8-venv python3-pip)
pkgs_dev=(cmake pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json-dev make curl sudo)
@ -134,7 +135,7 @@ elif [ "$os" == "ubuntu20.04" ] || [ "$os" == "debian10" ] || [ "$os" == "raspbi
[ "$os" == "ubuntu22.10" ] || [ "$os" == "ubuntu23.04" ] || [ "$os" == "debian12" ] || [ "$os" == "raspbian12" ]; then
pkgs_core=(libpugixml1v5)
pkgs_gpu=()
pkgs_gpu=(ocl-icd-libopencl1)
pkgs_python=(python3 python3-venv python3-pip)
pkgs_dev=(cmake pkg-config g++ gcc libc6-dev libgflags-dev zlib1g-dev nlohmann-json3-dev make curl sudo)
@ -195,6 +196,7 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
if [ "$os" == "centos7" ] || [ "$os" == "amzn2" ] ; then
pkgs_core=("tbb.$arch" "pugixml.$arch" "gflags.$arch")
pkgs_gpu+=("ocl-icd.$arch")
pkgs_dev+=("gflags-devel.$arch")
extra_repos+=("https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm")
elif [ "$os" == "centos8" ] || [ "$os" == "rhel8" ] || [ "$os" == "almalinux8.7" ] ; then
@ -203,9 +205,7 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
"https://download-ib01.fedoraproject.org/pub/epel/8/Everything/$arch/Packages/p/pugixml-1.13-1.el8.$arch.rpm"
"https://vault.centos.org/centos/8/PowerTools/$arch/os/Packages/gflags-2.1.2-6.el8.$arch.rpm"
)
pkgs_gpu+=(
"http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/ocl-icd-2.2.12-1.el8.x86_64.rpm"
)
pkgs_gpu+=("http://mirror.centos.org/centos/8-stream/AppStream/$arch/os/Packages/ocl-icd-2.2.12-1.el8.$arch.rpm")
pkgs_python+=(python38 python38-pip)
pkgs_dev+=(
"https://vault.centos.org/centos/8/PowerTools/$arch/os/Packages/gflags-devel-2.1.2-6.el8.$arch.rpm"
@ -218,13 +218,14 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
"https://download-ib01.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/p/pugixml-1.13-1.el9.$arch.rpm"
"https://download-ib01.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-2.2.2-9.el9.$arch.rpm"
)
pkgs_gpu+=("https://mirror.stream.centos.org/9-stream/AppStream/$arch/os/Packages/ocl-icd-2.2.13-4.el9.$arch.rpm")
pkgs_python=(python3 python3-pip)
pkgs_dev+=("https://download-ib01.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-devel-2.2.2-9.el9.$arch.rpm")
extra_repos+=("https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm")
fi
elif [ "$os" == "opensuse-leap15.3" ] ; then
pkgs_core=(libtbb2 libtbbmalloc2 libpugixml1)
pkgs_gpu=()
pkgs_gpu=(libOpenCL1)
pkgs_python=(python39-base python39 python39-venv python39-pip)
pkgs_dev=(cmake pkg-config gcc-c++ gcc gflags-devel-static zlib-devel nlohmann_json-devel make curl sudo)
else

View File

@ -122,7 +122,7 @@
* @ingroup ov_c_api
* @brief The definitions & operations about tensor
*
* @defgroup ov_remote_context_c_api ov_remote_context
* @defgroup ov_remote_context_c_api Remote Context
* @ingroup ov_c_api
* @brief Set of functions representing of RemoteContext
*/

View File

@ -1,6 +1,6 @@
# used in multiple components
onnx==1.13.1 # Python bindings, ONNX Frontend
numpy>=1.16.6,<1.25 # Python bindings, frontends
numpy>=1.16.6,<1.26 # Python bindings, frontends
protobuf>=3.18.1,<4.0.0 # Python bindings, frontends
# pytest

View File

@ -17,8 +17,8 @@ from openvino._pyopenvino import get_version
__version__ = get_version()
# main classes
from openvino._pyopenvino import FrontEndManager
from openvino._pyopenvino import FrontEnd
from openvino.frontend.frontend import FrontEndManager
from openvino.frontend.frontend import FrontEnd
from openvino._pyopenvino import InputModel
from openvino._pyopenvino import NodeContext
from openvino._pyopenvino import Place

View File

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from typing import Union
from openvino._pyopenvino import FrontEnd as FrontEndBase
from openvino._pyopenvino import FrontEndManager as FrontEndManagerBase
from openvino._pyopenvino import InputModel
from openvino.runtime import Model
class FrontEnd(FrontEndBase):
def __init__(self, fe: FrontEndBase) -> None:
super().__init__(fe)
def convert(self, model: Union[Model, InputModel]) -> Model:
converted_model = super().convert(model)
if isinstance(model, InputModel):
return Model(converted_model)
return converted_model
def convert_partially(self, model: InputModel) -> Model:
return Model(super().convert_partially(model))
def decode(self, model: InputModel) -> Model:
return Model(super().decode(model))
def normalize(self, model: Model) -> Model:
return Model(super().normalize(model))
class FrontEndManager(FrontEndManagerBase):
def load_by_framework(self, framework: str) -> Union[FrontEnd, None]:
fe = super().load_by_framework(framework)
if fe is not None:
return FrontEnd(fe)
return fe
def load_by_model(self, model_path: str) -> Union[FrontEnd, None]:
fe = super().load_by_model(model_path)
if fe is not None:
return FrontEnd(fe)
return fe

View File

@ -9,6 +9,7 @@ from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType
from openvino.runtime import op, PartialShape, Type as OVType, OVAny, Shape
import typing
from packaging.version import parse
import torch
import numpy as np
@ -133,24 +134,27 @@ class TorchScriptPythonDecoder (Decoder):
import inspect
def prepare_example_inputs(inputs, input_signature):
if inputs is not None:
is_torch_2 = parse(torch.__version__) >= parse("2.0.0")
if isinstance(inputs, dict):
if input_signature is not None:
ordered_inputs = []
if input_signature is not None:
used_sign = []
for key in input_signature:
if key not in inputs:
continue
ordered_inputs.append(inputs[key])
used_sign.append(key)
inputs = ordered_inputs
input_signature = used_sign
else:
inputs = list(inputs.values())
input_signature = input_signature[:len(inputs)]
ordered_inputs = list(inputs.values())
if is_torch_2:
return {"example_kwarg_inputs": inputs}, input_signature
else:
inputs = ordered_inputs
if isinstance(inputs, torch.Tensor):
inputs = [inputs]
return inputs, input_signature
return {"example_inputs": inputs}, input_signature
if isinstance(pt_module, torch.nn.Module):
pt_module.eval()
@ -160,14 +164,14 @@ class TorchScriptPythonDecoder (Decoder):
if example_inputs is None:
scripted = torch.jit.script(pt_module)
else:
inputs, input_signature = prepare_example_inputs(example_inputs, input_signature)
input_parameters, input_signature = prepare_example_inputs(example_inputs, input_signature)
try:
scripted = torch.jit.trace(pt_module, inputs)
scripted = torch.jit.trace(pt_module, **input_parameters)
except Exception:
try:
scripted = torch.jit.script(pt_module)
except Exception:
scripted = torch.jit.trace(pt_module, inputs, strict=False)
scripted = torch.jit.trace(pt_module, **input_parameters, strict=False)
skip_freeze = False
for n in scripted.inlined_graph.nodes():
# TODO: switch off freezing for all traced models

View File

@ -15,7 +15,6 @@ __version__ = get_version()
# Openvino pybind bindings and python extended classes
from openvino._pyopenvino import Dimension
from openvino._pyopenvino import Model
from openvino._pyopenvino import Input
from openvino._pyopenvino import Output
from openvino._pyopenvino import Node
@ -36,6 +35,7 @@ from openvino._pyopenvino import RTMap
from openvino.runtime.ie_api import Core
from openvino.runtime.ie_api import CompiledModel
from openvino.runtime.ie_api import InferRequest
from openvino.runtime.ie_api import Model
from openvino.runtime.ie_api import AsyncInferQueue
from openvino._pyopenvino import Version
from openvino._pyopenvino import Tensor

View File

@ -7,12 +7,12 @@ from pathlib import Path
import numpy as np
from openvino._pyopenvino import Model
from openvino._pyopenvino import Model as ModelBase
from openvino._pyopenvino import Core as CoreBase
from openvino._pyopenvino import CompiledModel as CompiledModelBase
from openvino._pyopenvino import AsyncInferQueue as AsyncInferQueueBase
from openvino._pyopenvino import ConstOutput
from openvino._pyopenvino import Tensor
from openvino._pyopenvino import Node
from openvino.runtime.utils.data_helpers import (
OVDict,
@ -22,6 +22,21 @@ from openvino.runtime.utils.data_helpers import (
)
class Model(ModelBase):
def __init__(self, *args: Any, **kwargs: Any) -> None:
if args and not kwargs:
if isinstance(args[0], ModelBase):
super().__init__(args[0])
elif isinstance(args[0], Node):
super().__init__(*args)
else:
super().__init__(*args)
if args and kwargs:
super().__init__(*args, **kwargs)
if kwargs and not args:
super().__init__(**kwargs)
class InferRequest(_InferRequestWrapper):
"""InferRequest class represents infer request which can be run in asynchronous or synchronous manners."""
@ -160,6 +175,9 @@ class CompiledModel(CompiledModelBase):
self._infer_request: Optional[InferRequest] = None
super().__init__(other)
def get_runtime_model(self) -> Model:
return Model(super().get_runtime_model())
def create_infer_request(self) -> InferRequest:
"""Creates an inference request object used to infer the compiled model.
@ -368,6 +386,11 @@ class Core(CoreBase):
between several Core instances. The recommended way is to have a single
Core instance per application.
"""
def read_model(self, model: Union[str, bytes, object], weights: Union[object, str, bytes, Tensor] = None) -> Model:
if weights is not None:
return Model(super().read_model(model, weights))
else:
return Model(super().read_model(model))
def compile_model(
self,

View File

@ -30,6 +30,11 @@ void regclass_frontend_FrontEnd(py::module m) {
py::class_<FrontEnd, std::shared_ptr<FrontEnd>> fem(m, "FrontEnd", py::dynamic_attr(), py::module_local());
fem.doc() = "openvino.frontend.FrontEnd wraps ov::frontend::FrontEnd";
fem.def(py::init([](const std::shared_ptr<FrontEnd>& other) {
return other;
}),
py::arg("other"));
fem.def(
"load",
[](FrontEnd& self, const py::object& py_obj) {

View File

@ -53,6 +53,11 @@ void regclass_graph_Model(py::module m) {
py::class_<ov::Model, std::shared_ptr<ov::Model>> model(m, "Model", py::module_local());
model.doc() = "openvino.runtime.Model wraps ov::Model";
model.def(py::init([](const std::shared_ptr<ov::Model>& other) {
return other;
}),
py::arg("other"));
model.def(py::init([](const ov::ResultVector& res,
const std::vector<std::shared_ptr<ov::Node>>& nodes,
const ov::ParameterVector& params,

View File

@ -376,7 +376,7 @@ def test_get_perf_counts(device):
request = exec_net.requests[0]
request.infer({'data': img})
pc = request.get_perf_counts()
assert pc['29']["status"] == "EXECUTED"
assert pc['29/WithoutBiases']["status"] == "EXECUTED"
del exec_net
del ie_core
del net

View File

@ -63,6 +63,9 @@ public:
size_t get_loop_count() const { return m_map.size(); }
const std::map<size_t, LoopInfoPtr>& get_map() const;
// Return outer Loop IDs
static std::vector<size_t> get_outer_expr_loops(const ExpressionPtr& expr, size_t loop_id);
void mark_loop(LinearIR::constExprIt loop_begin_pos,
LinearIR::constExprIt loop_end_pos,
size_t loop_depth, size_t vector_size);
@ -74,6 +77,33 @@ public:
const std::vector<ExpressionPort>& entries,
const std::vector<ExpressionPort>& exits);
void fuse_loops(const LinearIR& linear_ir, size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper = true);
void fuse_loops(LinearIR::constExprIt loop_begin_target, LinearIR::constExprIt loop_end_target,
size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper = true);
// The following methods update ports of LoopInfo. They save the order of ports!
// Remainder: the order is important to find Loop bounds (the most first and the most last expressions)
// - Update LoopPort - insert new loop target ports instead of existing.
// - Update ExpressionPort in the LoopPort - with saving of port parameters. It's softer method since ExpressionPort may not be port of Loop
template<typename T>
void update_loop_port(size_t loop_id, const T& actual_port, const std::vector<T>& target_ports, bool is_entry = true);
template<typename T>
void update_loops_port(const std::vector<size_t>& loop_ids, const T& actual_port,
const std::vector<T>& target_ports, bool is_entry = true) {
for (auto loop_id : loop_ids) {
update_loop_port(loop_id, actual_port, target_ports, is_entry);
}
}
// Sort Loop Ports by expression locations in Linear IR
void sort_loop_ports(LinearIR::constExprIt& loop_begin_pos, LinearIR::constExprIt& loop_end_pos, size_t loop_id);
// When the previous expression was replaced with new expressions (decomposition), the method updates the corresponding Loop.
// If ports of decomposed expression were the Loop ports, these Loop ports may be updated by parameters `entries` and `exits`
// Note: This method should be removed when Softmax decomposition will be moved on data flow pipeline since
// all decompositions should be call on this pipeline
void expression_replacement(constExprIt new_expr_begin, constExprIt new_expr_end, const ExpressionPtr& decomposed_expr,
size_t loop_id, const std::vector<ExpressionPort>& new_entries, const std::vector<ExpressionPort>& exits);
void get_loop_bounds(const LinearIR& linear_ir,
size_t loop_id,
LinearIR::constExprIt& loop_begin_pos,
@ -85,11 +115,19 @@ public:
LinearIR::constExprIt& loop_end_pos,
size_t loop_id);
private:
static void get_io_loop_ports(LinearIR::constExprIt loop_begin_pos,
LinearIR::constExprIt loop_end_pos,
std::vector<ExpressionPort>& entries,
std::vector<ExpressionPort>& exits);
static void fuse_loop_ports(std::vector<LinearIR::LoopManager::LoopPort>& exit_points,
std::vector<LinearIR::LoopManager::LoopPort>& entry_points,
size_t loop_id);
/* ===== The methods for work with Loop IDs of Expression ===== */
// Notes:
// - These methods don't update the corresponding LoopInfo
// - These methods should be private
// TODO [112195] : fix these notes
void replace_loop_id(const ExpressionPtr& expr, size_t prev_id, size_t new_id);
void remove_loop_id(const ExpressionPtr& expr, size_t id);
// Insert loop ID before (as outer Loop) or after (as inner Loop) target ID in vector of identifiers
@ -100,12 +138,6 @@ public:
void insert_loop_id(const ExpressionPtr& expr, size_t new_id, bool before = true, size_t target_id = SIZE_MAX);
void insert_loop_ids(const ExpressionPtr& expr, const std::vector<size_t>& new_ids, bool before = true, size_t target_id = SIZE_MAX);
private:
static void get_io_loop_ports(LinearIR::constExprIt loop_begin_pos,
LinearIR::constExprIt loop_end_pos,
std::vector<ExpressionPort>& entries,
std::vector<ExpressionPort>& exits);
std::map<size_t, LoopInfoPtr> m_map = {};
size_t next_id = 0;
};

View File

@ -53,8 +53,8 @@ private:
const std::shared_ptr<ExpressionPort>& current_entry_point,
size_t current_loop_id, size_t target_loop_id,
LinearIR::constExprIt& current_loop_begin_pos, LinearIR::constExprIt& current_loop_end_pos);
static void fuse_points(std::vector<LinearIR::LoopManager::LoopPort>& exit_points, std::vector<LinearIR::LoopManager::LoopPort>& entry_points,
LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos);
static void move(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, size_t loop_id,
LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, LinearIR::constExprIt pos);
};
} // namespace pass

View File

@ -27,13 +27,9 @@ public:
bool run(LinearIR& linear_ir) override;
private:
size_t get_count(const PortDescriptorPtr& port_desc) const;
bool insert_load(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it);
bool insert_store(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it);
void update_loops(const LinearIR::LoopManagerPtr& loop_manager, const std::vector<size_t>& loop_ids,
const ExpressionPort& actual_port, const std::vector<ExpressionPort>& target_ports, bool is_entry = true);
void update_loop(const LinearIR::LoopManager::LoopInfoPtr& loop_info,
const ExpressionPort& actual_port, const std::vector<ExpressionPort>& target_ports, bool is_entry = true);
size_t get_count(const PortDescriptorPtr& port_desc) const;
size_t m_vector_size;
};

View File

@ -62,6 +62,13 @@ LoopInfoPtr LinearIR::LoopManager::get_loop_info(size_t index) const {
return it->second;
}
std::vector<size_t> LinearIR::LoopManager::get_outer_expr_loops(const ExpressionPtr& expr, size_t loop_id) {
const auto loop_ids = expr->get_loop_ids();
const auto it = std::find(loop_ids.cbegin(), loop_ids.cend(), loop_id);
OPENVINO_ASSERT(it != loop_ids.cend(), "Loop ID hasn't been found");
return std::vector<size_t>(loop_ids.cbegin(), it);
}
void LinearIR::LoopManager::get_loop_bounds(const LinearIR &linear_ir,
size_t loop_id,
LinearIR::constExprIt &loop_begin_pos,
@ -207,7 +214,172 @@ void LinearIR::LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos,
insert_loop_id(*expr_it, loop_id);
}
}
void LinearIR::LoopManager::fuse_loops(const LinearIR& linear_ir, size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper) {
LinearIR::constExprIt loop_begin_target, loop_end_target;
get_loop_bounds(linear_ir, fuse_into_upper ? loop_id_lower : loop_id_upper, loop_begin_target, loop_end_target);
fuse_loops(loop_begin_target, loop_end_target, loop_id_upper, loop_id_lower, fuse_into_upper);
}
void LinearIR::LoopManager::fuse_loops(LinearIR::constExprIt loop_begin_target, LinearIR::constExprIt loop_end_target,
size_t loop_id_upper, size_t loop_id_lower, bool fuse_into_upper) {
OPENVINO_ASSERT(m_map.count(loop_id_upper) == 1 && m_map.count(loop_id_lower) == 1,
"Failed Loop Fusion: the Loop with the Loop ID isn't existed");
const auto& loop_info_upper = m_map[loop_id_upper];
const auto& loop_info_lower = m_map[loop_id_lower];
auto entry_points_upper = loop_info_upper->entry_points;
auto exit_points_upper = loop_info_upper->exit_points;
auto entry_points_lower = loop_info_lower->entry_points;
auto exit_points_lower = loop_info_lower->exit_points;
fuse_loop_ports(exit_points_upper, entry_points_lower, loop_id_upper);
std::vector<LoopManager::LoopPort> new_entries = entry_points_upper;
new_entries.insert(new_entries.end(), entry_points_lower.begin(), entry_points_lower.end());
std::vector<LoopManager::LoopPort> new_exits = exit_points_upper;
new_exits.insert(new_exits.end(), exit_points_lower.begin(), exit_points_lower.end());
auto& loop_info = fuse_into_upper ? loop_info_upper : loop_info_lower;
loop_info->entry_points = new_entries;
loop_info->exit_points = new_exits;
const auto& from = fuse_into_upper ? loop_id_lower : loop_id_upper;
const auto& to = fuse_into_upper ? loop_id_upper : loop_id_lower;
for (auto it = loop_begin_target; it != loop_end_target; ++it) {
const auto& expr = *it;
replace_loop_id(expr, from, to);
}
remove_loop_info(from);
}
void LinearIR::LoopManager::fuse_loop_ports(std::vector<LinearIR::LoopManager::LoopPort>& exit_points,
std::vector<LinearIR::LoopManager::LoopPort>& entry_points,
size_t loop_id) {
auto is_loop_id_found = [](const std::vector<size_t>& ids, size_t id) {
return std::find(ids.cbegin(), ids.cend(), id) != ids.cend();
};
std::vector<LinearIR::LoopManager::LoopPort> new_exit_points;
for (const auto& exit_point : exit_points) {
const auto consumers_inputs = exit_point.expr_port->get_connected_ports();
std::set<LinearIR::LoopManager::LoopPort> mapped_entry_points;
std::set<ExpressionPtr> outside_consumers;
for (const auto& consumer_input : consumers_inputs) {
const auto entry_point_it = std::find_if(entry_points.begin(), entry_points.end(),
[&consumer_input](const LoopManager::LoopPort& point) {
return *point.expr_port.get() == consumer_input;
});
if (entry_point_it != entry_points.end()) {
mapped_entry_points.insert(*entry_point_it);
continue;
}
const auto& consumer = consumer_input.get_expr();
const auto loop_ids = consumer->get_loop_ids();
if (!is_loop_id_found(loop_ids, loop_id)) {
outside_consumers.insert(consumer);
}
}
// Remove entry points which are mapped
auto last_point = entry_points.end();
for (const auto& mapped_entry_point : mapped_entry_points) {
last_point = std::remove(entry_points.begin(), last_point, mapped_entry_point);
}
entry_points.resize(entry_points.size() - mapped_entry_points.size());
// Leave exit point if there are consumers outside after fusion
if (!outside_consumers.empty()) {
new_exit_points.push_back(exit_point);
}
}
exit_points = new_exit_points;
}
template<>
void LinearIR::LoopManager::update_loop_port(size_t loop_id, const ExpressionPort& actual_port, const std::vector<ExpressionPort>& target_ports,
bool is_entry) {
const auto& loop_info = get_loop_info(loop_id);
auto& ports = is_entry ? loop_info->entry_points : loop_info->exit_points;
auto port_it = std::find_if(ports.begin(), ports.end(),
[&actual_port](const LoopPort& point) { return *point.expr_port.get() == actual_port; });
// In some cases actual ExpressionPort may not be LoopPort. We shouldn't throw exception here since ExpressionPort is not strong condition as LoopPort
// For example, not all inner loop ports are ports of outer loops
if (port_it == ports.end())
return;
// to save other parameters except expression port
std::vector<LoopPort> target_loop_ports(target_ports.size(), *port_it);
std::transform(target_loop_ports.begin(), target_loop_ports.end(), target_ports.begin(), target_loop_ports.begin(),
[](LoopPort loop_port, const ExpressionPort& expr_port) {
LoopPort copy = std::move(loop_port); // to save loop port parameters
copy.expr_port = std::make_shared<ExpressionPort>(expr_port);
return copy;
});
port_it = ports.erase(port_it);
ports.insert(port_it, target_ports.cbegin(), target_ports.cend());
}
template<>
void LinearIR::LoopManager::update_loop_port(size_t loop_id, const LoopPort& actual_port, const std::vector<LoopPort>& target_ports,
bool is_entry) {
const auto& loop_info = get_loop_info(loop_id);
auto& ports = is_entry ? loop_info->entry_points : loop_info->exit_points;
auto port_it = std::find_if(ports.begin(), ports.end(),
[&actual_port](const LoopPort& point) { return point == actual_port; });
OPENVINO_ASSERT(port_it != ports.end(), "Failed update_loop_port: existing loop ports has not been found");
port_it = ports.erase(port_it);
ports.insert(port_it, target_ports.cbegin(), target_ports.cend());
}
void LinearIR::LoopManager::expression_replacement(constExprIt new_expr_begin, constExprIt new_expr_end, const ExpressionPtr& decomposed_expr,
size_t loop_id, const std::vector<ExpressionPort>& entries, const std::vector<ExpressionPort>& exits) {
for (auto it = new_expr_begin; it!= new_expr_end; ++it) {
insert_loop_id(*it, loop_id, true);
}
remove_loop_id(decomposed_expr, loop_id);
auto new_entries = entries;
auto new_exits = exits;
if (new_entries.empty() || new_exits.empty()) {
const auto loop_info = get_loop_info(loop_id);
get_io_loop_ports(new_expr_begin, new_expr_end, new_entries, new_exits);
}
for (size_t i = 0; i < decomposed_expr->get_input_count(); ++i) {
update_loop_port(loop_id, decomposed_expr->get_input_port(i), new_entries);
}
for (size_t i = 0; i < decomposed_expr->get_output_count(); ++i) {
update_loop_port(loop_id, decomposed_expr->get_output_port(i), new_exits, false);
}
}
void LinearIR::LoopManager::sort_loop_ports(LinearIR::constExprIt& loop_begin_pos, LinearIR::constExprIt& loop_end_pos, size_t loop_id) {
// The method sorts Loop ports again
// [113536] Update this logic please, when expression numeration will be implemented
auto push = [](const std::vector<LoopPort>& ports, std::vector<LoopPort>& sorted_ports, const ExpressionPtr& expr) {
for (const auto& port : ports) {
if (port.expr_port->get_expr() == expr) {
sorted_ports.push_back(port);
}
}
};
auto loop_info = get_loop_info(loop_id);
const auto& loop_entries = loop_info->entry_points;
const auto& loop_exits = loop_info->exit_points;
std::vector<LoopPort> entries, exits;
entries.reserve(loop_entries.size());
exits.reserve(loop_exits.size());
for (auto it = loop_begin_pos; it != loop_end_pos; ++it) {
const auto& expr = *it;
push(loop_entries, entries, expr);
push(loop_exits, exits, expr);
}
loop_info->entry_points = entries;
loop_info->exit_points = exits;
}
void LinearIR::LoopManager::insert_loop_id(const ExpressionPtr& expr, size_t new_id, bool before, size_t target_id) {
OPENVINO_ASSERT(m_map.count(new_id) == 1, "Failed marking expression by Loop ID: the Loop with this ID hasn't registered");

View File

@ -42,46 +42,35 @@ bool FuseLoops::can_be_fused(const LoopInfoPtr& loop_current, const LoopInfoPtr&
return supported_work_amount && supported_increment && supported_dim_idxs;
}
void FuseLoops::fuse_points(std::vector<LinearIR::LoopManager::LoopPort>& exit_points,
std::vector<LinearIR::LoopManager::LoopPort>& entry_points,
LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos) {
std::vector<LinearIR::LoopManager::LoopPort> new_exit_points;
for (const auto& exit_point : exit_points) {
const auto consumers_inputs = exit_point.expr_port->get_connected_ports();
std::set<LinearIR::LoopManager::LoopPort> mapped_entry_points;
std::set<ExpressionPtr> outside_consumers;
for (const auto& consumer_input : consumers_inputs) {
const auto entry_point_it = std::find_if(entry_points.begin(), entry_points.end(),
[&consumer_input](const LoopManager::LoopPort& point) {
return *point.expr_port.get() == consumer_input;
});
if (entry_point_it != entry_points.end()) {
mapped_entry_points.insert(*entry_point_it);
continue;
void FuseLoops::move(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, size_t loop_id,
LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, LinearIR::constExprIt pos) {
// Inner Loops can contain ports which are ports of outer Loops as well.
// When we move these inner loops, we can corrupt the sort of LoopPorts of outer Loops.
// Firstly, we should find correct target loop bounds before their movings.
std::map<size_t, std::pair<LinearIR::constExprIt, LinearIR::constExprIt>> outer_loops; // The map: LoopID -> [ LoopBegin, LoopEnd ]
const auto outer_loop_ids = LinearIR::LoopManager::get_outer_expr_loops(*loop_begin_pos, loop_id);
for (const auto& loop_id : outer_loop_ids) {
LinearIR::constExprIt begin, end;
loop_manager->get_loop_bounds(linear_ir, loop_id, begin, end);
// save previos iterator since the current iterator can be moved
outer_loops[loop_id] = {std::prev(begin), end};
}
const auto& consumer = consumer_input.get_expr();
const auto inside_it = std::find(loop_begin_pos, loop_end_pos, consumer);
if (inside_it == loop_end_pos) {
outside_consumers.insert(consumer);
// Secondly, move expressions
for (auto it = loop_begin_pos; it != loop_end_pos;) {
auto expr_it = it;
// After moving we will have `it` in new place in the current Loop,
// but for markup we need have the expression from the target Loop.
// Because of that we manually increment iterator before moving
it = std::next(it);
linear_ir.move(expr_it, pos);
}
// Thirdly, sort Loop Ports of outer Loops.
for (auto& loop : outer_loops) {
const auto loop_id = loop.first;
auto begin = std::next(loop.second.first);
auto end = loop.second.second;
loop_manager->sort_loop_ports(begin, end, loop_id);
}
// Remove entry points which are mapped
auto last_point = entry_points.end();
for (const auto& mapped_entry_point : mapped_entry_points) {
last_point = std::remove(entry_points.begin(), last_point, mapped_entry_point);
}
entry_points.resize(entry_points.size() - mapped_entry_points.size());
// Leave exit point if there are consumers outside after fusion
if (!outside_consumers.empty()) {
new_exit_points.push_back(exit_point);
}
}
exit_points = new_exit_points;
}
bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager,
@ -93,9 +82,6 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo
if (!can_be_fused(loop_current, loop_target))
return false;
LinearIR::constExprIt target_loop_begin_pos, target_loop_end_pos;
loop_manager->get_loop_bounds(linear_ir, target_loop_id, target_loop_begin_pos, target_loop_end_pos);
// We can fuse Loop_up to Loop_down only in cases when other consumers of Loop_up are after Loop_down
// Because Loop_up should be explicitly moved before Loop_down in linear IR, and we must save control dependency
bool is_fusion_allowed = true;
@ -117,40 +103,19 @@ bool FuseLoops::fuse_upper_into_current(LinearIR& linear_ir, const LinearIR::Loo
if (!is_fusion_allowed)
return false;
// Update entry and exit points in current Loop information before moving till Loop iterators are valid
auto current_entry_points = loop_current->entry_points;
auto current_exit_points = loop_current->exit_points;
auto target_entry_points = loop_target->entry_points;
auto target_exit_points = loop_target->exit_points;
fuse_points(target_exit_points, current_entry_points, target_loop_begin_pos, target_loop_end_pos);
const auto insertion_place = current_loop_begin_pos;
const auto is_move_needed = target_loop_end_pos != current_loop_begin_pos;
for (auto it = target_loop_begin_pos; it != target_loop_end_pos;) {
auto expr_it = it;
const auto& expr = *expr_it;
// After moving we will have `it` in new place in the current Loop,
// but for markup we need have the expression from the target Loop.
// Because of that we manually increment iterator before moving
it = std::next(it);
loop_manager->replace_loop_id(expr, target_loop_id, current_loop_id);
if (is_move_needed)
linear_ir.move(expr_it, insertion_place);
}
// Update current Loop bounds:
current_loop_begin_pos = target_loop_begin_pos;
LinearIR::constExprIt target_loop_begin_pos, target_loop_end_pos;
loop_manager->get_loop_bounds(linear_ir, target_loop_id, target_loop_begin_pos, target_loop_end_pos);
loop_manager->fuse_loops(target_loop_begin_pos, target_loop_end_pos, target_loop_id, current_loop_id, false);
// Update work_amount for Loop (increment is constant because increments must be the identical for fusion):
loop_current->work_amount = std::max(loop_current->work_amount, loop_target->work_amount);
std::vector<LoopManager::LoopPort> new_entries = target_entry_points;
new_entries.insert(new_entries.end(), current_entry_points.begin(), current_entry_points.end());
std::vector<LoopManager::LoopPort> new_exits = target_exit_points;
new_exits.insert(new_exits.end(), current_exit_points.begin(), current_exit_points.end());
const auto insertion_place = current_loop_begin_pos;
const auto is_move_needed = target_loop_end_pos != current_loop_begin_pos;
if (is_move_needed)
move(linear_ir, loop_manager, current_loop_id, target_loop_begin_pos, target_loop_end_pos, insertion_place);
loop_current->entry_points = new_entries;
loop_current->exit_points = new_exits;
// Update current Loop bounds:
current_loop_begin_pos = target_loop_begin_pos;
return true;
}
@ -182,43 +147,17 @@ bool FuseLoops::fuse_lower_into_current(LinearIR& linear_ir, const LinearIR::Loo
LinearIR::constExprIt target_loop_begin_pos, target_loop_end_pos;
loop_manager->get_loop_bounds(linear_ir, target_loop_id, target_loop_begin_pos, target_loop_end_pos);
// Update entry and exit points in current Loop information before moving till Loop iterators are valid
auto current_entry_points = loop_current->entry_points;
auto current_exit_points = loop_current->exit_points;
auto target_entry_points = loop_target->entry_points;
auto target_exit_points = loop_target->exit_points;
fuse_points(current_exit_points, target_entry_points, current_loop_begin_pos, current_loop_end_pos);
const auto insertion_place = current_loop_end_pos;
const auto is_move_needed = insertion_place != target_loop_begin_pos;
for (auto it = target_loop_begin_pos; it != target_loop_end_pos;) {
auto expr_it = it;
const auto& expr = *expr_it;
// After moving we will have `it` in new place in the current Loop,
// but for markup we need have the expression from the target Loop.
// Because of that we manually increment iterator before moving
it = std::next(it);
loop_manager->replace_loop_id(expr, target_loop_id, current_loop_id);
if (is_move_needed)
linear_ir.move(expr_it, insertion_place);
}
// Update current Loop bounds:
if (!is_move_needed)
current_loop_end_pos = target_loop_end_pos;
loop_manager->fuse_loops(target_loop_begin_pos, target_loop_end_pos, current_loop_id, target_loop_id);
// Update work_amount for Loop (increment is constant because increments must be the identical for fusion):
loop_current->work_amount = std::max(loop_current->work_amount, loop_target->work_amount);
std::vector<LoopManager::LoopPort>& new_entries = current_entry_points;
new_entries.insert(new_entries.end(), target_entry_points.begin(), target_entry_points.end());
std::vector<LoopManager::LoopPort>& new_exits = current_exit_points;
new_exits.insert(new_exits.end(), target_exit_points.begin(), target_exit_points.end());
loop_current->entry_points = new_entries;
loop_current->exit_points = new_exits;
const auto insertion_place = current_loop_end_pos;
const auto is_move_needed = insertion_place != target_loop_begin_pos;
if (is_move_needed) {
move(linear_ir, loop_manager, current_loop_id, target_loop_begin_pos, target_loop_end_pos, insertion_place);
} else {
current_loop_end_pos = target_loop_end_pos;
}
return true;
}
@ -292,7 +231,6 @@ bool FuseLoops::run(LinearIR& linear_ir) {
if (fuse_upper_into_current(linear_ir, loop_manager, entry_point.expr_port, current_loop_id, upper_loop_id,
current_loop_begin_pos, current_loop_end_pos)) {
was_fusion_up = true;
loop_manager->remove_loop_info(upper_loop_id);
prev_fused_loops.insert(current_loop_id);
}
}
@ -339,7 +277,6 @@ bool FuseLoops::run(LinearIR& linear_ir) {
if (fuse_lower_into_current(linear_ir, loop_manager, exit_point.expr_port, current_loop_id, lower_loop_id,
current_loop_begin_pos, current_loop_end_pos)) {
was_fusion_down = true;
loop_manager->remove_loop_info(lower_loop_id);
prev_fused_loops.insert(current_loop_id);
// Need to check for possible fusion again because of new input expressions for Loop
break;

View File

@ -19,24 +19,6 @@ using LoopInfoPtr = LoopManager::LoopInfoPtr;
InsertLoadStore::InsertLoadStore(size_t vector_size) : m_vector_size(vector_size) {}
void InsertLoadStore::update_loops(const LinearIR::LoopManagerPtr& loop_manager, const std::vector<size_t>& loop_ids,
const ExpressionPort& actual_port, const std::vector<ExpressionPort>& target_ports, bool is_entry) {
for (auto loop_id : loop_ids) {
update_loop(loop_manager->get_loop_info(loop_id), actual_port, target_ports, is_entry);
}
}
void InsertLoadStore::update_loop(const LinearIR::LoopManager::LoopInfoPtr& loop_info,
const ExpressionPort& actual_port, const std::vector<ExpressionPort>& target_ports, bool is_entry) {
auto& ports = is_entry ? loop_info->entry_points : loop_info->exit_points;
auto port_it = std::find_if(ports.begin(), ports.end(),
[&actual_port](const LoopManager::LoopPort& point) { return *point.expr_port.get() == actual_port; });
if (port_it == ports.end())
return;
port_it = ports.erase(port_it);
ports.insert(port_it, target_ports.cbegin(), target_ports.cend());
}
size_t InsertLoadStore::get_count(const PortDescriptorPtr& port_desc) const {
const auto layout = port_desc->get_layout();
const auto shape = port_desc->get_shape();
@ -75,7 +57,7 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr
// Need to update all the corresponding Loops with the same Entry Point
const auto prev_entry_point = consumer_input;
const auto new_entry_point = load_expr->get_input_port(0);
update_loops(loop_manager, loop_ids, prev_entry_point, {new_entry_point}, true);
loop_manager->update_loops_port(loop_ids, prev_entry_point, {new_entry_point}, true);
was_inserted = true;
}
@ -122,7 +104,7 @@ bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExp
const auto new_exit_point = store_expr->get_output_port(0);
const auto new_exit_points = should_be_saved ? std::vector<ExpressionPort>{prev_exit_point, new_exit_point}
: std::vector<ExpressionPort>{new_exit_point};
update_loops(loop_manager, loop_ids, prev_exit_point, new_exit_points, false);
loop_manager->update_loops_port(loop_ids, prev_exit_point, new_exit_points, false);
return true;
}

View File

@ -41,14 +41,9 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) {
const auto tensor_out = softmax_expr->get_output_port_descriptor(0)->get_shape();
const auto inner_work_amount = *(tensor_out.rbegin());
expr_it = linear_ir.erase(expr_it); // Remove Softmax
std::vector<ExpressionPtr> new_exprs;
// We need an iterator to the inserted element
auto push_node = [&linear_ir, &expr_it, &new_exprs](const std::shared_ptr<Node>& n) {
auto push_node = [&linear_ir, &expr_it](const std::shared_ptr<Node>& n) {
const auto expr = linear_ir.insert(expr_it, n);
new_exprs.push_back(*expr);
return std::make_pair(expr, n);
};
@ -102,35 +97,16 @@ bool SoftmaxDecomposition::run(LinearIR& linear_ir) {
(*mul.first)->get_input_port(1)},
std::vector<ExpressionPort>{(*mul.first)->get_output_port(0)});
// Moved other Loop IDs from Softmax
for (const auto& expr : new_exprs) {
if (expr->get_loop_ids().empty()) {
expr->set_loop_ids(softmax_loop_ids);
continue;
}
loop_manager->insert_loop_ids(expr, softmax_loop_ids, true, expr->get_loop_ids().back());
}
auto update_loop_bounds = [&softmax_expr](std::vector<LinearIR::LoopManager::LoopPort>& points,
const std::vector<ExpressionPort>& new_points,
const LinearIR::LoopManager::LoopInfoPtr& loop_info) {
auto entry_found = std::find_if(points.begin(), points.end(), [&softmax_expr](const LinearIR::LoopManager::LoopPort& point) {
return point.expr_port->get_expr() == softmax_expr;
});
if (entry_found != points.end()) {
entry_found = points.erase(entry_found);
points.insert(entry_found, new_points.begin(), new_points.end());
}
};
// Update Loop info for outer loops
const auto entry_points = std::vector<ExpressionPort>{(*max.first)->get_input_port(0),
(*sub.first)->get_input_port(0)};
const auto exit_points = std::vector<ExpressionPort>{(*mul.first)->get_output_port(0)};
for (auto loop_id : softmax_loop_ids) {
const auto loop_info = loop_manager->get_loop_info(loop_id);
update_loop_bounds(loop_info->entry_points, std::vector<ExpressionPort>{(*max.first)->get_input_port(0),
(*sub.first)->get_input_port(0)}, loop_info);
update_loop_bounds(loop_info->exit_points, std::vector<ExpressionPort>{(*mul.first)->get_output_port(0)}, loop_info);
loop_manager->expression_replacement(vector_buffer_max.first, expr_it, softmax_expr, loop_id, entry_points, exit_points);
}
expr_it = linear_ir.erase(expr_it); // Remove Softmax
/* =========================================== */
/* ============= Runtime Info ================ */

View File

@ -8,6 +8,7 @@
#include <ngraph/pattern/op/or.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/rt_info.hpp>
#include <openvino/op/util/pad_base.hpp>
#include <openvino/opsets/opset6.hpp>
#include <vector>
@ -28,7 +29,7 @@ ov::pass::SpaceToBatchFusion::SpaceToBatchFusion() {
auto pads_begin_pattern = pattern::wrap_type<opset6::Constant>();
auto pads_end_pattern = pattern::wrap_type<opset6::Constant>();
auto pad_value = pattern::wrap_type<opset6::Constant>();
auto pad_pattern = pattern::wrap_type<opset6::Pad>(
auto pad_pattern = pattern::wrap_type<op::util::PadBase>(
{reshape_or_transpose_before_pattern, pads_begin_pattern, pads_end_pattern, pad_value});
auto space_to_depth_pattern = pattern::wrap_type<opset6::SpaceToDepth>({pad_pattern}, pattern::has_static_shape());
auto reshape_after_pattern =
@ -60,6 +61,20 @@ ov::pass::SpaceToBatchFusion::SpaceToBatchFusion() {
input_shape[2] == output_shape[2] && input_shape[3] == output_shape[3];
};
auto pads_are_negative = [](const std::shared_ptr<Node>& pads) -> bool {
auto constant = ov::as_type_ptr<opset6::Constant>(pads);
if (!constant)
return true;
for (auto pad : constant->cast_vector<int>()) {
if (pad < 0) {
return true;
}
}
return false;
};
std::shared_ptr<Node> reshape_or_trans_before =
get_reshape_or_transpose(reshape_before_pattern, trans_before_pattern);
if (!reshape_or_trans_before)
@ -73,7 +88,7 @@ ov::pass::SpaceToBatchFusion::SpaceToBatchFusion() {
if (!check_input_output_shape(reshape_or_trans_after))
return false;
auto pad = std::dynamic_pointer_cast<opset6::Pad>(pattern_map.at(pad_pattern).get_node_shared_ptr());
auto pad = std::dynamic_pointer_cast<op::util::PadBase>(pattern_map.at(pad_pattern).get_node_shared_ptr());
if (!pad || pad->get_pad_mode() != op::PadMode::CONSTANT)
return false;
auto pad_value_const =
@ -84,6 +99,13 @@ ov::pass::SpaceToBatchFusion::SpaceToBatchFusion() {
if (pad_value.size() != 1 || pad_value[0] != 0.0f)
return false;
const auto pads_begin = pattern_map.at(pads_begin_pattern).get_node_shared_ptr();
if (pads_are_negative(pads_begin))
return false;
const auto pads_end = pattern_map.at(pads_end_pattern).get_node_shared_ptr();
if (pads_are_negative(pads_end))
return false;
auto space_to_depth = std::dynamic_pointer_cast<opset6::SpaceToDepth>(
pattern_map.at(space_to_depth_pattern).get_node_shared_ptr());
if (!space_to_depth)
@ -93,10 +115,8 @@ ov::pass::SpaceToBatchFusion::SpaceToBatchFusion() {
auto block_size = static_cast<int64_t>(space_to_depth->get_block_size());
auto block_shape =
opset6::Constant::create(element::i64, Shape{4}, std::vector<int64_t>{1, 1, block_size, block_size});
auto space_to_batch = register_new_node<opset6::SpaceToBatch>(pattern_map.at(data_pattern),
block_shape,
pattern_map.at(pads_begin_pattern),
pattern_map.at(pads_end_pattern));
auto space_to_batch =
register_new_node<opset6::SpaceToBatch>(pattern_map.at(data_pattern), block_shape, pads_begin, pads_end);
space_to_batch->set_friendly_name(reshape_or_trans_after->get_friendly_name());
copy_runtime_info(

View File

@ -9,6 +9,7 @@
#include <ngraph/opsets/opset6.hpp>
#include <ngraph/pass/constant_folding.hpp>
#include <ngraph/pass/manager.hpp>
#include <openvino/op/pad.hpp>
#include <queue>
#include <string>
#include <transformations/common_optimizations/space_to_batch_fusion.hpp>
@ -52,6 +53,59 @@ TEST_F(TransformationTestsF, SpaceToBatchFusionTranspose) {
}
}
TEST_F(TransformationTestsF, SpaceToBatchFusionTransposePad12) {
{
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{12, 3, 4, 8});
auto trans_before =
std::make_shared<opset6::Transpose>(data, op::Constant::create(element::i64, Shape{4}, {1, 0, 2, 3}));
auto pad = std::make_shared<op::v12::Pad>(trans_before,
op::Constant::create(element::i64, Shape{4}, {1, 1, 1, 1}),
op::Constant::create(element::i64, Shape{4}, {2, 2, 3, 3}),
op::Constant::create(element::f32, Shape{}, {0}),
op::PadMode::CONSTANT);
auto space_to_depth =
std::make_shared<opset6::SpaceToDepth>(pad, opset6::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST, 2);
auto trans_after =
std::make_shared<opset6::Transpose>(space_to_depth,
op::Constant::create(element::i64, Shape{4}, {1, 0, 2, 3}));
function = std::make_shared<Function>(NodeVector{trans_after}, ParameterVector{data});
manager.register_pass<ov::pass::SpaceToBatchFusion>();
}
{
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{12, 3, 4, 8});
auto space_to_batch =
std::make_shared<opset6::SpaceToBatch>(data,
op::Constant::create(element::i64, Shape{4}, {1, 1, 2, 2}),
op::Constant::create(element::i64, Shape{4}, {1, 1, 1, 1}),
op::Constant::create(element::i64, Shape{4}, {2, 2, 3, 3}));
function_ref = std::make_shared<Function>(NodeVector{space_to_batch}, ParameterVector{data});
}
}
TEST_F(TransformationTestsF, SpaceToBatchFusionTransposeNegativePads) {
{
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{12, 3, 4, 8});
auto trans_before =
std::make_shared<opset6::Transpose>(data, op::Constant::create(element::i64, Shape{4}, {1, 0, 2, 3}));
auto pad = std::make_shared<op::v12::Pad>(trans_before,
op::Constant::create(element::i64, Shape{4}, {1, 1, -1, -1}),
op::Constant::create(element::i64, Shape{4}, {2, 2, -3, -3}),
op::Constant::create(element::f32, Shape{}, {0}),
op::PadMode::CONSTANT);
auto space_to_depth =
std::make_shared<opset6::SpaceToDepth>(pad, opset6::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST, 4);
auto trans_after =
std::make_shared<opset6::Transpose>(space_to_depth,
op::Constant::create(element::i64, Shape{4}, {1, 0, 2, 3}));
function = std::make_shared<Function>(NodeVector{trans_after}, ParameterVector{data});
manager.register_pass<ov::pass::SpaceToBatchFusion>();
}
}
TEST_F(TransformationTestsF, SpaceToBatchFusionReshape) {
{
auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{12, 3, 4, 8});

View File

@ -163,6 +163,28 @@ inline int64_t file_size(const char* path) {
return in.tellg();
}
/**
* @brief Returns file size for file
* @param[in] path The file name
* @return file size
*/
inline bool file_exists(const char* path) {
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
std::wstring widefilename = ov::util::string_to_wstring(path);
const wchar_t* file_name = widefilename.c_str();
#elif defined(__ANDROID__) || defined(ANDROID)
std::string file_name = path;
std::string::size_type pos = file_name.find('!');
if (pos != std::string::npos) {
file_name = file_name.substr(0, pos);
}
#else
const char* file_name = path;
#endif
std::ifstream in(file_name, std::ios_base::binary | std::ios_base::ate);
return in.good();
}
#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
/**
@ -174,6 +196,14 @@ inline int64_t file_size(const std::wstring& path) {
return file_size(wstring_to_string(path).c_str());
}
/**
* @brief Returns true if file exists
* @param[in] path The file name
* @return true if file exists
*/
inline bool file_exists(const std::wstring& path) {
return file_exists(wstring_to_string(path).c_str());
}
#endif // OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
/**
@ -187,13 +217,11 @@ inline int64_t file_size(const std::string& path) {
/**
* @brief Returns true if file exists
* @param[in] path The path to file
* @param[in] path The file name
* @return true if file exists
*/
template <typename C,
typename = typename std::enable_if<(std::is_same<C, char>::value || std::is_same<C, wchar_t>::value)>::type>
inline bool file_exists(const std::basic_string<C>& path) {
return file_size(path) > 0;
inline bool file_exists(const std::string& path) {
return file_exists(path.c_str());
}
std::string get_file_ext(const std::string& path);

View File

@ -4,7 +4,7 @@
#pragma once
#include "openvino/op/op.hpp"
#include "openvino/op/util/scatter_elements_update_base.hpp"
namespace ov {
namespace op {
@ -12,9 +12,9 @@ namespace v3 {
/// \brief ScatterElementsUpdate operation.
///
/// \ingroup ov_ops_cpp_api
class OPENVINO_API ScatterElementsUpdate : public Op {
class OPENVINO_API ScatterElementsUpdate : public util::ScatterElementsUpdateBase {
public:
OPENVINO_OP("ScatterElementsUpdate", "opset3", op::Op);
OPENVINO_OP("ScatterElementsUpdate", "opset3", util::ScatterElementsUpdateBase);
ScatterElementsUpdate() = default;
/// \brief Constructs a ScatterElementsUpdate node
@ -28,21 +28,74 @@ public:
const Output<Node>& updates,
const Output<Node>& axis);
void validate_and_infer_types() override;
bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;
OPENVINO_SUPPRESS_DEPRECATED_START
bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
OPENVINO_SUPPRESS_DEPRECATED_END
bool has_evaluate() const override;
bool evaluate_lower(TensorVector& output_values) const override;
bool evaluate_upper(TensorVector& output_values) const override;
bool evaluate_label(TensorLabelVector& output_labels) const override;
private:
bool evaluate_scatter_element_update(const HostTensorVector& outputs, const HostTensorVector& inputs) const;
};
} // namespace v3
namespace v12 {
class OPENVINO_API ScatterElementsUpdate : public op::util::ScatterElementsUpdateBase {
public:
OPENVINO_OP("ScatterElementsUpdate", "opset12", op::util::ScatterElementsUpdateBase);
/// \brief Lists the supported reduction types for this version of the operator.
/// See the specification for the description of how reduction works with ScatterElementsUpdate.
enum class Reduction { NONE, SUM, PROD, MIN, MAX, MEAN };
ScatterElementsUpdate() = default;
/// \brief Constructs a ScatterElementsUpdate node
/// \param data Input data
/// \param indices Data entry index that will be updated
/// \param updates Update values
/// \param axis Axis to scatter on
ScatterElementsUpdate(const Output<Node>& data,
const Output<Node>& indices,
const Output<Node>& updates,
const Output<Node>& axis,
const Reduction reduction = Reduction::NONE,
const bool use_init_val = true);
bool visit_attributes(AttributeVisitor& visitor) override;
void validate_and_infer_types() override;
Reduction get_reduction() const {
return m_reduction;
}
void set_reduction(const Reduction reduction) {
m_reduction = reduction;
}
bool get_use_init_val() const {
return m_use_init_val;
}
void set_use_init_val(const bool use_init_val) {
m_use_init_val = use_init_val;
}
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
bool has_evaluate() const override;
private:
Reduction m_reduction = Reduction::NONE;
bool m_use_init_val = true;
};
} // namespace v12
OPENVINO_API
std::ostream& operator<<(std::ostream& s, const v12::ScatterElementsUpdate::Reduction& reduction);
} // namespace op
template <>
class OPENVINO_API AttributeAdapter<op::v12::ScatterElementsUpdate::Reduction>
: public EnumAttributeAdapterBase<op::v12::ScatterElementsUpdate::Reduction> {
public:
AttributeAdapter(op::v12::ScatterElementsUpdate::Reduction& value)
: EnumAttributeAdapterBase<op::v12::ScatterElementsUpdate::Reduction>(value) {}
OPENVINO_RTTI("AttributeAdapter<v12::ScatterElementsUpdate::Reduction>");
};
} // namespace ov

View File

@ -0,0 +1,45 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "openvino/op/op.hpp"
#include "openvino/op/util/attr_types.hpp"
namespace ov {
namespace op {
namespace util {
class OPENVINO_API ScatterElementsUpdateBase : public Op {
public:
OPENVINO_OP("ScatterElementsUpdateBase", "util");
ScatterElementsUpdateBase() = default;
/// \brief The common base class for all ScatterElementsUpdate operator versions
///
/// \param data Input data
/// \param indices Data entry index that will be updated
/// \param updates Update values
/// \param axis Axis to scatter on
ScatterElementsUpdateBase(const Output<Node>& data,
const Output<Node>& indices,
const Output<Node>& updates,
const Output<Node>& axis);
void validate_and_infer_types() override;
bool has_evaluate() const override;
bool evaluate_lower(TensorVector& output_values) const override;
bool evaluate_upper(TensorVector& output_values) const override;
bool evaluate_label(TensorLabelVector& output_labels) const override;
OPENVINO_SUPPRESS_DEPRECATED_START
bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
OPENVINO_SUPPRESS_DEPRECATED_END
private:
bool evaluate_scatter_element_update(const HostTensorVector& outputs, const HostTensorVector& inputs) const;
};
} // namespace util
} // namespace op
} // namespace ov

View File

@ -120,7 +120,6 @@ _OPENVINO_OP_REG(EmbeddingBagOffsetsSum, ov::op::v3)
_OPENVINO_OP_REG(GRUCell, ov::op::v3)
_OPENVINO_OP_REG(NonZero, ov::op::v3)
_OPENVINO_OP_REG(RNNCell, ov::op::v0)
_OPENVINO_OP_REG(ScatterElementsUpdate, ov::op::v3)
_OPENVINO_OP_REG(ScatterUpdate, ov::op::v3)
_OPENVINO_OP_REG(ShuffleChannels, ov::op::v0)
_OPENVINO_OP_REG(ShapeOf, ov::op::v3)
@ -207,3 +206,4 @@ _OPENVINO_OP_REG(TopK, ov::op::v11)
// New operations added in opset12
_OPENVINO_OP_REG(GroupNormalization, ov::op::v12)
_OPENVINO_OP_REG(Pad, ov::op::v12)
_OPENVINO_OP_REG(ScatterElementsUpdate, ov::op::v12)

View File

@ -9,11 +9,11 @@
#include "utils.hpp"
namespace ov {
namespace op {
namespace v3 {
namespace op {
namespace util {
template <class TShape>
std::vector<TShape> shape_infer(const ScatterElementsUpdate* op,
std::vector<TShape> shape_infer(const util::ScatterElementsUpdateBase* op,
const std::vector<TShape>& input_shapes,
const std::map<size_t, HostTensorPtr>& constant_data = {}) {
NODE_VALIDATION_CHECK(op, input_shapes.size() == 4);
@ -59,15 +59,24 @@ std::vector<TShape> shape_infer(const ScatterElementsUpdate* op,
}
return {data_shape};
}
} // namespace util
namespace v3 {
template <class TShape>
void shape_infer(const ScatterElementsUpdate* op,
const std::vector<TShape>& input_shapes,
std::vector<TShape>& output_shapes,
const std::map<size_t, HostTensorPtr>& constant_data = {}) {
output_shapes = shape_infer(op, input_shapes, constant_data);
output_shapes = util::shape_infer(op, input_shapes, constant_data);
}
} // namespace v3
namespace v12 {
template <class TShape>
void shape_infer(const ScatterElementsUpdate* op,
const std::vector<TShape>& input_shapes,
std::vector<TShape>& output_shapes,
const std::map<size_t, HostTensorPtr>& constant_data = {}) {
output_shapes = util::shape_infer(op, input_shapes, constant_data);
}
} // namespace v12
} // namespace op
} // namespace ov

View File

@ -2,25 +2,21 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph/op/scatter_elements_update.hpp"
#include "openvino/op/scatter_elements_update.hpp"
#include <scatter_elements_update_shape_inference.hpp>
#include "bound_evaluate.hpp"
#include "itt.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/util/op_types.hpp"
#include "ngraph/runtime/reference/scatter_elements_update.hpp"
#include "ngraph/validation_util.hpp"
#include "openvino/core/validation_util.hpp"
using namespace ngraph;
using namespace std;
namespace ov {
op::v3::ScatterElementsUpdate::ScatterElementsUpdate(const Output<Node>& data,
const Output<Node>& indices,
const Output<Node>& updates,
const Output<Node>& axis)
: Op({data, indices, updates, axis}) {
: ov::op::util::ScatterElementsUpdateBase(data, indices, updates, axis) {
constructor_validate_and_infer_types();
}
@ -29,37 +25,6 @@ bool op::v3::ScatterElementsUpdate::visit_attributes(AttributeVisitor& visitor)
return true;
}
void op::v3::ScatterElementsUpdate::validate_and_infer_types() {
OV_OP_SCOPE(v3_ScatterElementsUpdate_validate_and_infer_types);
element::Type data_et = get_input_element_type(0);
element::Type indices_et = get_input_element_type(1);
element::Type updates_et = get_input_element_type(2);
element::Type axis_et = get_input_element_type(3);
NODE_VALIDATION_CHECK(this,
indices_et.is_integral(),
"Indices element type must be integral_number, but is: ",
indices_et);
NODE_VALIDATION_CHECK(this, axis_et.is_integral(), "Axis element type must be integral_number, but is: ", axis_et);
element::Type merged_type;
NODE_VALIDATION_CHECK(this,
element::Type::merge(merged_type, data_et, updates_et),
"Data type and updates type are required to be the same. ",
"Got: ",
data_et,
" and: ",
updates_et);
OPENVINO_SUPPRESS_DEPRECATED_START
const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
OPENVINO_SUPPRESS_DEPRECATED_END
set_output_type(0, data_et, output_shape);
if (output_shape.is_dynamic())
set_input_is_relevant_to_shape(0);
}
shared_ptr<Node> op::v3::ScatterElementsUpdate::clone_with_new_inputs(const OutputVector& inputs) const {
OV_OP_SCOPE(v3_ScatterElementsUpdate_clone_with_new_inputs);
NODE_VALIDATION_CHECK(this,
@ -72,204 +37,78 @@ shared_ptr<Node> op::v3::ScatterElementsUpdate::clone_with_new_inputs(const Outp
return make_shared<v3::ScatterElementsUpdate>(inputs.at(0), inputs.at(1), inputs.at(2), inputs.at(3));
}
namespace scatter_element_update {
namespace {
template <element::Type_t DT, element::Type_t IT, element::Type_t AT>
bool evaluate(const HostTensorPtr& data,
const HostTensorPtr& indices,
const HostTensorPtr& updates,
const HostTensorPtr& axis,
const HostTensorPtr& out,
const int64_t normalized_axis) {
using DataType = typename element_type_traits<DT>::value_type;
using IndicesType = typename element_type_traits<IT>::value_type;
out->set_shape(data->get_shape());
runtime::reference::scatter_elem_update<DataType, IndicesType>(data->get_data_ptr<DT>(),
indices->get_data_ptr<IT>(),
updates->get_data_ptr<DT>(),
normalized_axis,
out->get_data_ptr<DT>(),
data->get_shape(),
indices->get_shape());
op::v12::ScatterElementsUpdate::ScatterElementsUpdate(const Output<Node>& data,
const Output<Node>& indices,
const Output<Node>& updates,
const Output<Node>& axis,
const Reduction reduction,
const bool use_init_val)
: op::util::ScatterElementsUpdateBase(data, indices, updates, axis),
m_reduction{reduction},
m_use_init_val{use_init_val} {
constructor_validate_and_infer_types();
}
bool op::v12::ScatterElementsUpdate::visit_attributes(AttributeVisitor& visitor) {
OV_OP_SCOPE(v12_ScatterElementsUpdate_visit_attributes);
visitor.on_attribute("reduction", m_reduction);
visitor.on_attribute("use_init_val", m_use_init_val);
return true;
}
#define TYPE_AXS_CASE(a, ...) \
case element::Type_t::a: { \
OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_axs, _, a)); \
rc = evaluate<DT, IT, element::Type_t::a>(__VA_ARGS__); \
} break;
void op::v12::ScatterElementsUpdate::validate_and_infer_types() {
OV_OP_SCOPE(v12_ScatterElementsUpdate_validate_and_infer_types);
template <element::Type_t DT, element::Type_t IT>
bool evaluate(const HostTensorPtr& arg0,
const HostTensorPtr& arg1,
const HostTensorPtr& arg2,
const HostTensorPtr& arg3,
const HostTensorPtr& out,
const int64_t normalized_axis) {
auto axis_type = arg3->get_element_type();
// Dispatch specialization based on axis data type.
bool rc = true;
switch (axis_type) {
TYPE_AXS_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_AXS_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_AXS_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_AXS_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_AXS_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_AXS_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_AXS_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_AXS_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis);
default:
rc = false;
break;
if (m_reduction == Reduction::MEAN) {
NODE_VALIDATION_CHECK(this,
get_input_element_type(0) != element::boolean,
"The 'mean' reduction type is not supported for boolean tensors");
}
return rc;
ScatterElementsUpdateBase::validate_and_infer_types();
}
#define TYPE_IND_CASE(a, ...) \
case element::Type_t::a: { \
OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_ind, _, a)); \
rc = evaluate<DT, element::Type_t::a>(__VA_ARGS__); \
} break;
shared_ptr<Node> op::v12::ScatterElementsUpdate::clone_with_new_inputs(const OutputVector& inputs) const {
OV_OP_SCOPE(v12_ScatterElementsUpdate_clone_with_new_inputs);
NODE_VALIDATION_CHECK(this,
inputs.size() == get_input_size(),
"clone_with_new_inputs() required inputs size: ",
get_input_size(),
"Got: ",
inputs.size());
template <element::Type_t DT>
bool evaluate(const HostTensorPtr& arg0,
const HostTensorPtr& arg1,
const HostTensorPtr& arg2,
const HostTensorPtr& arg3,
const HostTensorPtr& out,
const int64_t normalized_axis) {
auto indices_type = arg1->get_element_type();
// Dispatch specialization based on indicies data type.
bool rc = true;
switch (indices_type) {
TYPE_IND_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_IND_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_IND_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_IND_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_IND_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_IND_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_IND_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_IND_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis);
default:
rc = false;
break;
}
return rc;
return make_shared<v12::ScatterElementsUpdate>(inputs.at(0),
inputs.at(1),
inputs.at(2),
inputs.at(3),
m_reduction,
m_use_init_val);
}
bool evaluate_scatter_element_update(const HostTensorPtr& arg0,
const HostTensorPtr& arg1,
const HostTensorPtr& arg2,
const HostTensorPtr& arg3,
const HostTensorPtr& out,
const int64_t normalized_axis) {
bool rc = true;
switch (out->get_element_type()) {
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i16, arg0, arg1, arg2, arg3, out, normalized_axis);
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i32, arg0, arg1, arg2, arg3, out, normalized_axis);
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i64, arg0, arg1, arg2, arg3, out, normalized_axis);
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, u32, arg0, arg1, arg2, arg3, out, normalized_axis);
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, u64, arg0, arg1, arg2, arg3, out, normalized_axis);
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, f16, arg0, arg1, arg2, arg3, out, normalized_axis);
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, f32, arg0, arg1, arg2, arg3, out, normalized_axis);
default:
rc = false;
break;
}
return rc;
}
} // namespace
} // namespace scatter_element_update
bool op::v3::ScatterElementsUpdate::evaluate_scatter_element_update(const HostTensorVector& outputs,
const HostTensorVector& inputs) const {
NGRAPH_CHECK(inputs[3]->get_element_type().is_integral_number(), "axis element type is not integral data type");
OPENVINO_SUPPRESS_DEPRECATED_START
int64_t axis = host_tensor_2_vector<int64_t>(inputs[3])[0];
OPENVINO_SUPPRESS_DEPRECATED_END
const auto& input_rank = get_input_partial_shape(0).rank();
int64_t normalized_axis = axis;
if (normalized_axis < 0) {
if (input_rank.is_static()) {
OPENVINO_SUPPRESS_DEPRECATED_START
normalized_axis = ngraph::normalize_axis(this, axis, input_rank);
OPENVINO_SUPPRESS_DEPRECATED_END
bool op::v12::ScatterElementsUpdate::has_evaluate() const {
if (m_reduction != Reduction::NONE) {
return false;
} else {
OPENVINO_SUPPRESS_DEPRECATED_START
normalized_axis = ngraph::normalize_axis(this, axis, static_cast<int64_t>(inputs[0]->get_shape().size()));
OPENVINO_SUPPRESS_DEPRECATED_END
return ScatterElementsUpdateBase::has_evaluate();
}
}
return scatter_element_update::evaluate_scatter_element_update(inputs[0],
inputs[1],
inputs[2],
inputs[3],
outputs[0],
normalized_axis);
}
bool op::v3::ScatterElementsUpdate::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
OV_OP_SCOPE(v3_ScatterElementsUpdate_evaluate);
return evaluate_scatter_element_update(outputs, inputs);
template <>
OPENVINO_API EnumNames<op::v12::ScatterElementsUpdate::Reduction>&
EnumNames<op::v12::ScatterElementsUpdate::Reduction>::get() {
static auto enum_names = EnumNames<op::v12::ScatterElementsUpdate::Reduction>(
"op::v12::ScatterElementsUpdate::Reduction",
{{"none", op::v12::ScatterElementsUpdate::Reduction::NONE},
{"sum", op::v12::ScatterElementsUpdate::Reduction::SUM},
{"prod", op::v12::ScatterElementsUpdate::Reduction::PROD},
{"min", op::v12::ScatterElementsUpdate::Reduction::MIN},
{"max", op::v12::ScatterElementsUpdate::Reduction::MAX},
{"mean", op::v12::ScatterElementsUpdate::Reduction::MEAN}});
return enum_names;
}
bool op::v3::ScatterElementsUpdate::has_evaluate() const {
OV_OP_SCOPE(v3_ScatterElementsUpdate_has_evaluate);
switch (get_output_element_type(0)) {
case ngraph::element::i16:
case ngraph::element::i32:
case ngraph::element::i64:
case ngraph::element::u32:
case ngraph::element::u64:
case ngraph::element::f16:
case ngraph::element::f32:
break;
default:
return false;
}
switch (get_input_element_type(1)) {
case ngraph::element::i8:
case ngraph::element::i16:
case ngraph::element::i32:
case ngraph::element::i64:
case ngraph::element::u8:
case ngraph::element::u16:
case ngraph::element::u32:
case ngraph::element::u64:
break;
default:
return false;
}
return true;
}
bool op::v3::ScatterElementsUpdate::evaluate_lower(ov::TensorVector& output_values) const {
OV_OP_SCOPE(v3_ScatterNDUpdate_evaluate_lower);
return get_input_tensor(1).has_and_set_bound() && ov::default_lower_bound_evaluator(this, output_values);
}
bool op::v3::ScatterElementsUpdate::evaluate_upper(ov::TensorVector& output_values) const {
OV_OP_SCOPE(v3_ScatterNDUpdate_evaluate_upper);
return get_input_tensor(1).has_and_set_bound() && ov::default_upper_bound_evaluator(this, output_values);
}
bool op::v3::ScatterElementsUpdate::evaluate_label(TensorLabelVector& output_labels) const {
OV_OP_SCOPE(v3_ScatterNDUpdate_evaluate_label);
OPENVINO_SUPPRESS_DEPRECATED_START
return ov::default_label_evaluator(this, {0, 2}, output_labels);
OPENVINO_SUPPRESS_DEPRECATED_END
namespace op {
std::ostream& operator<<(std::ostream& s, const v12::ScatterElementsUpdate::Reduction& reduction) {
return s << as_string(reduction);
}
} // namespace op
} // namespace ov

View File

@ -0,0 +1,262 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "openvino/op/util/scatter_elements_update_base.hpp"
#include <scatter_elements_update_shape_inference.hpp>
#include "bound_evaluate.hpp"
#include "itt.hpp"
#include "ngraph/runtime/reference/scatter_elements_update.hpp"
#include "openvino/core/validation_util.hpp"
namespace ov {
namespace op {
ov::op::util::ScatterElementsUpdateBase::ScatterElementsUpdateBase(const Output<Node>& data,
const Output<Node>& indices,
const Output<Node>& updates,
const Output<Node>& axis)
: Op({data, indices, updates, axis}) {
constructor_validate_and_infer_types();
}
void ov::op::util::ScatterElementsUpdateBase::validate_and_infer_types() {
OV_OP_SCOPE(util_ScatterElementsUpdateBase_validate_and_infer_types);
OPENVINO_SUPPRESS_DEPRECATED_START
const element::Type& data_et = get_input_element_type(0);
const element::Type& indices_et = get_input_element_type(1);
const element::Type& updates_et = get_input_element_type(2);
const element::Type& axis_et = get_input_element_type(3);
NODE_VALIDATION_CHECK(this,
indices_et.is_integral(),
"Indices element type must be integral_number, but is: ",
indices_et);
NODE_VALIDATION_CHECK(this, axis_et.is_integral(), "Axis element type must be integral_number, but is: ", axis_et);
element::Type merged_type;
NODE_VALIDATION_CHECK(this,
element::Type::merge(merged_type, data_et, updates_et),
"Data type and updates type are required to be the same. ",
"Got: ",
data_et,
" and: ",
updates_et);
const auto output_shape = shape_infer(this, get_node_input_partial_shapes(*this)).front();
OPENVINO_SUPPRESS_DEPRECATED_END
element::Type out_et = get_input_element_type(0);
std::ignore = element::Type::merge(out_et, get_input_element_type(0), get_input_element_type(2));
set_output_type(0, out_et, output_shape);
if (output_shape.is_dynamic()) {
set_input_is_relevant_to_shape(0);
}
}
bool op::util::ScatterElementsUpdateBase::has_evaluate() const {
OV_OP_SCOPE(util_ScatterElementsUpdateBase_has_evaluate);
switch (get_output_element_type(0)) {
case ngraph::element::i16:
case ngraph::element::i32:
case ngraph::element::i64:
case ngraph::element::u32:
case ngraph::element::u64:
case ngraph::element::f16:
case ngraph::element::f32:
break;
default:
return false;
}
switch (get_input_element_type(1)) {
case ngraph::element::i8:
case ngraph::element::i16:
case ngraph::element::i32:
case ngraph::element::i64:
case ngraph::element::u8:
case ngraph::element::u16:
case ngraph::element::u32:
case ngraph::element::u64:
break;
default:
return false;
}
return true;
}
bool op::util::ScatterElementsUpdateBase::evaluate_lower(ov::TensorVector& output_values) const {
OV_OP_SCOPE(util_ScatterNDUpdate_evaluate_lower);
return get_input_tensor(1).has_and_set_bound() && ov::default_lower_bound_evaluator(this, output_values);
}
bool op::util::ScatterElementsUpdateBase::evaluate_upper(ov::TensorVector& output_values) const {
OV_OP_SCOPE(util_ScatterNDUpdate_evaluate_upper);
return get_input_tensor(1).has_and_set_bound() && ov::default_upper_bound_evaluator(this, output_values);
}
bool op::util::ScatterElementsUpdateBase::evaluate_label(TensorLabelVector& output_labels) const {
OV_OP_SCOPE(util_ScatterNDUpdate_evaluate_label);
OPENVINO_SUPPRESS_DEPRECATED_START
return ov::default_label_evaluator(this, {0, 2}, output_labels);
OPENVINO_SUPPRESS_DEPRECATED_END
}
namespace scatter_element_update {
namespace {
template <element::Type_t DT, element::Type_t IT, element::Type_t AT>
bool evaluate(const HostTensorPtr& data,
const HostTensorPtr& indices,
const HostTensorPtr& updates,
const HostTensorPtr& axis,
const HostTensorPtr& out,
const int64_t normalized_axis) {
using DataType = typename element_type_traits<DT>::value_type;
using IndicesType = typename element_type_traits<IT>::value_type;
out->set_shape(data->get_shape());
ngraph::runtime::reference::scatter_elem_update<DataType, IndicesType>(data->get_data_ptr<DT>(),
indices->get_data_ptr<IT>(),
updates->get_data_ptr<DT>(),
normalized_axis,
out->get_data_ptr<DT>(),
data->get_shape(),
indices->get_shape());
return true;
}
#define TYPE_AXS_CASE(a, ...) \
case element::Type_t::a: { \
OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_axs, _, a)); \
rc = evaluate<DT, IT, element::Type_t::a>(__VA_ARGS__); \
} break;
template <element::Type_t DT, element::Type_t IT>
bool evaluate(const HostTensorPtr& arg0,
const HostTensorPtr& arg1,
const HostTensorPtr& arg2,
const HostTensorPtr& arg3,
const HostTensorPtr& out,
const int64_t normalized_axis) {
auto axis_type = arg3->get_element_type();
// Dispatch specialization based on axis data type.
bool rc = true;
switch (axis_type) {
TYPE_AXS_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_AXS_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_AXS_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_AXS_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_AXS_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_AXS_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_AXS_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_AXS_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis);
default:
rc = false;
break;
}
return rc;
}
#define TYPE_IND_CASE(a, ...) \
case element::Type_t::a: { \
OV_OP_SCOPE(OV_PP_CAT3(scatter_element_update_ind, _, a)); \
rc = evaluate<DT, element::Type_t::a>(__VA_ARGS__); \
} break;
template <element::Type_t DT>
bool evaluate(const HostTensorPtr& arg0,
const HostTensorPtr& arg1,
const HostTensorPtr& arg2,
const HostTensorPtr& arg3,
const HostTensorPtr& out,
const int64_t normalized_axis) {
auto indices_type = arg1->get_element_type();
// Dispatch specialization based on indicies data type.
bool rc = true;
switch (indices_type) {
TYPE_IND_CASE(i8, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_IND_CASE(i16, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_IND_CASE(i32, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_IND_CASE(i64, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_IND_CASE(u8, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_IND_CASE(u16, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_IND_CASE(u32, arg0, arg1, arg2, arg3, out, normalized_axis);
TYPE_IND_CASE(u64, arg0, arg1, arg2, arg3, out, normalized_axis);
default:
rc = false;
break;
}
return rc;
}
bool evaluate_scatter_element_update(const HostTensorPtr& arg0,
const HostTensorPtr& arg1,
const HostTensorPtr& arg2,
const HostTensorPtr& arg3,
const HostTensorPtr& out,
const int64_t normalized_axis) {
bool rc = true;
switch (out->get_element_type()) {
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i16, arg0, arg1, arg2, arg3, out, normalized_axis);
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i32, arg0, arg1, arg2, arg3, out, normalized_axis);
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, i64, arg0, arg1, arg2, arg3, out, normalized_axis);
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, u32, arg0, arg1, arg2, arg3, out, normalized_axis);
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, u64, arg0, arg1, arg2, arg3, out, normalized_axis);
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, f16, arg0, arg1, arg2, arg3, out, normalized_axis);
NGRAPH_TYPE_CASE(evaluate_scatter_element_update, f32, arg0, arg1, arg2, arg3, out, normalized_axis);
default:
rc = false;
break;
}
return rc;
}
} // namespace
} // namespace scatter_element_update
bool op::util::ScatterElementsUpdateBase::evaluate_scatter_element_update(const HostTensorVector& outputs,
const HostTensorVector& inputs) const {
NGRAPH_CHECK(inputs[3]->get_element_type().is_integral_number(), "axis element type is not integral data type");
OPENVINO_SUPPRESS_DEPRECATED_START
int64_t axis = host_tensor_2_vector<int64_t>(inputs[3])[0];
OPENVINO_SUPPRESS_DEPRECATED_END
const auto& input_rank = get_input_partial_shape(0).rank();
int64_t normalized_axis = axis;
if (normalized_axis < 0) {
if (input_rank.is_static()) {
OPENVINO_SUPPRESS_DEPRECATED_START
normalized_axis = ngraph::normalize_axis(this, axis, input_rank);
OPENVINO_SUPPRESS_DEPRECATED_END
} else {
OPENVINO_SUPPRESS_DEPRECATED_START
normalized_axis = ngraph::normalize_axis(this, axis, static_cast<int64_t>(inputs[0]->get_shape().size()));
OPENVINO_SUPPRESS_DEPRECATED_END
}
}
return scatter_element_update::evaluate_scatter_element_update(inputs[0],
inputs[1],
inputs[2],
inputs[3],
outputs[0],
normalized_axis);
}
bool op::util::ScatterElementsUpdateBase::evaluate(const HostTensorVector& outputs,
const HostTensorVector& inputs) const {
OV_OP_SCOPE(util_ScatterElementsUpdate_evaluate);
return evaluate_scatter_element_update(outputs, inputs);
}
} // namespace op
} // namespace ov

View File

@ -8,85 +8,89 @@
#include "util/type_prop.hpp"
using namespace std;
using namespace ngraph;
using namespace ov;
using namespace testing;
TEST(type_prop, scatter_elements_update_output_shape) {
template <class T>
class ScatterElementsUpdateTest : public ::testing::Test {};
TYPED_TEST_SUITE_P(ScatterElementsUpdateTest);
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_update_output_shape) {
Shape data_shape{2, 4, 5, 7};
Shape indices_shape{2, 2, 2, 2};
Shape updates_shape{2, 2, 2, 2};
Shape axis_shape{};
Shape expected_output_shape{2, 4, 5, 7};
auto data = make_shared<op::Parameter>(element::f32, data_shape);
auto indices = make_shared<op::Parameter>(element::i16, indices_shape);
auto updates = make_shared<op::Parameter>(element::f32, updates_shape);
auto axis = make_shared<op::Parameter>(element::i16, axis_shape);
auto data = make_shared<op::v0::Parameter>(element::f32, data_shape);
auto indices = make_shared<op::v0::Parameter>(element::i16, indices_shape);
auto updates = make_shared<op::v0::Parameter>(element::f32, updates_shape);
auto axis = make_shared<op::v0::Parameter>(element::i16, axis_shape);
auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis);
auto scatter = make_shared<TypeParam>(data, indices, updates, axis);
EXPECT_EQ(scatter->get_output_shape(0), expected_output_shape);
}
TEST(type_prop, scatter_elements_update_output_partial_dyn_shape) {
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_update_output_partial_dyn_shape) {
PartialShape data_shape{2, Dimension::dynamic(), 5};
set_shape_labels(data_shape, 10);
PartialShape indices_shape{Dimension::dynamic(), 2, 2};
PartialShape updates_shape{2, 2, Dimension::dynamic()};
PartialShape axis_shape = PartialShape::dynamic();
auto data = make_shared<op::Parameter>(element::f64, data_shape);
auto indices = make_shared<op::Parameter>(element::i16, indices_shape);
auto updates = make_shared<op::Parameter>(element::f64, updates_shape);
auto axis = make_shared<op::Parameter>(element::i16, axis_shape);
auto data = make_shared<op::v0::Parameter>(element::f64, data_shape);
auto indices = make_shared<op::v0::Parameter>(element::i16, indices_shape);
auto updates = make_shared<op::v0::Parameter>(element::f64, updates_shape);
auto axis = make_shared<op::v0::Parameter>(element::i16, axis_shape);
auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis);
auto scatter = make_shared<TypeParam>(data, indices, updates, axis);
EXPECT_EQ(scatter->get_output_element_type(0), element::f64);
EXPECT_EQ(scatter->get_output_partial_shape(0), data_shape);
EXPECT_THAT(get_shape_labels(scatter->get_output_partial_shape(0)), ElementsAre(10, 11, 12));
}
TEST(type_prop, scatter_elements_update_data_has_interval_dimensions) {
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_update_data_has_interval_dimensions) {
PartialShape data_shape{{5, 10}, -1, {-1, 3}, {8, -1}};
set_shape_labels(data_shape, 10);
const auto data = make_shared<op::Parameter>(element::i64, data_shape);
const auto indices = make_shared<op::Parameter>(element::i16, PartialShape{1, 2, 2, {2, 3}});
const auto updates = make_shared<op::Parameter>(element::i64, PartialShape{{0, 2}, -1, 2, -1});
const auto axis = make_shared<op::Parameter>(element::i16, PartialShape::dynamic());
const auto data = make_shared<op::v0::Parameter>(element::i64, data_shape);
const auto indices = make_shared<op::v0::Parameter>(element::i16, PartialShape{1, 2, 2, {2, 3}});
const auto updates = make_shared<op::v0::Parameter>(element::i64, PartialShape{{0, 2}, -1, 2, -1});
const auto axis = make_shared<op::v0::Parameter>(element::i16, PartialShape::dynamic());
const auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis);
const auto scatter = make_shared<TypeParam>(data, indices, updates, axis);
EXPECT_EQ(scatter->get_output_element_type(0), element::i64);
EXPECT_EQ(scatter->get_output_partial_shape(0), data_shape);
EXPECT_THAT(get_shape_labels(scatter->get_output_partial_shape(0)), ElementsAre(10, 11, 12, 13));
}
TEST(type_prop, scatter_elements_update_output_full_dyn_shape) {
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_update_output_full_dyn_shape) {
PartialShape data_shape = PartialShape::dynamic();
PartialShape indices_shape = PartialShape::dynamic();
PartialShape updates_shape = PartialShape::dynamic();
PartialShape axis_shape = PartialShape::dynamic();
auto data = make_shared<op::Parameter>(element::f32, data_shape);
auto indices = make_shared<op::Parameter>(element::i16, indices_shape);
auto updates = make_shared<op::Parameter>(element::f32, updates_shape);
auto axis = make_shared<op::Parameter>(element::i16, axis_shape);
auto data = make_shared<op::v0::Parameter>(element::f32, data_shape);
auto indices = make_shared<op::v0::Parameter>(element::i16, indices_shape);
auto updates = make_shared<op::v0::Parameter>(element::f32, updates_shape);
auto axis = make_shared<op::v0::Parameter>(element::i16, axis_shape);
auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis);
auto scatter = make_shared<TypeParam>(data, indices, updates, axis);
EXPECT_EQ(scatter->get_output_element_type(0), element::f32);
EXPECT_EQ(scatter->get_output_partial_shape(0), data_shape);
}
TEST(type_prop, scatter_elements_update_default_ctor) {
const auto data = make_shared<op::Parameter>(element::f32, PartialShape{2, 5, 5, 6});
const auto indices = make_shared<op::Parameter>(element::i16, PartialShape{1, 2, 1, 3});
const auto updates = make_shared<op::Parameter>(element::f32, PartialShape{1, 2, 1, 3});
const auto axis = make_shared<op::Constant>(element::i16, Shape{}, -4);
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_update_default_ctor) {
const auto data = make_shared<op::v0::Parameter>(element::f32, PartialShape{2, 5, 5, 6});
const auto indices = make_shared<op::v0::Parameter>(element::i16, PartialShape{1, 2, 1, 3});
const auto updates = make_shared<op::v0::Parameter>(element::f32, PartialShape{1, 2, 1, 3});
const auto axis = make_shared<op::v0::Constant>(element::i16, Shape{}, -4);
const auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis);
const auto scatter = make_shared<TypeParam>(data, indices, updates, axis);
scatter->set_arguments(OutputVector{data, indices, updates, axis});
scatter->validate_and_infer_types();
@ -97,83 +101,116 @@ TEST(type_prop, scatter_elements_update_default_ctor) {
EXPECT_THAT(get_shape_labels(scatter->get_output_partial_shape(0)), Each(ov::no_label));
}
TEST(type_prop, scatter_elements_update_preserve_partial_values_and_labels_via_evaluates_bounds) {
const auto data = op::Constant::create(element::i64, Shape{4}, {2, 3, 15, 4});
const auto indices = op::Constant::create(element::i64, Shape{2}, {3, 0});
TYPED_TEST_P(ScatterElementsUpdateTest,
scatter_elements_update_preserve_partial_values_and_labels_via_evaluates_bounds) {
const auto data = op::v0::Constant::create(element::i64, Shape{4}, {2, 3, 15, 4});
const auto indices = op::v0::Constant::create(element::i64, Shape{2}, {3, 0});
auto updates_shape = PartialShape{{10, 20}, {3, 4}};
set_shape_labels(updates_shape, 20);
const auto axis = make_shared<op::Constant>(element::i16, Shape{}, 0);
const auto axis = make_shared<op::v0::Constant>(element::i16, Shape{}, 0);
const auto shape_of_u = std::make_shared<op::ShapeOf>(std::make_shared<op::Parameter>(element::i64, updates_shape));
const auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, shape_of_u, axis);
const auto shape_of_u =
std::make_shared<op::v0::ShapeOf>(std::make_shared<op::v0::Parameter>(element::i64, updates_shape));
const auto scatter = make_shared<TypeParam>(data, indices, shape_of_u, axis);
auto param = std::make_shared<op::Parameter>(element::f32, PartialShape{1});
auto param = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{1});
auto bc = std::make_shared<op::v3::Broadcast>(param, scatter, op::BroadcastType::BIDIRECTIONAL);
EXPECT_EQ(bc->get_output_partial_shape(0), PartialShape({{3, 4}, 3, 15, {10, 20}}));
EXPECT_THAT(get_shape_labels(bc->get_output_partial_shape(0)), ElementsAre(21, ov::no_label, ov::no_label, 20));
}
TEST(type_prop, scatter_elements_update_axis_validation) {
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_update_axis_validation) {
Shape data_shape{2, 4, 5, 7};
Shape indices_shape{2, 2, 2, 2};
Shape updates_shape{2, 2, 2, 2};
Shape axis_shape{};
auto data = make_shared<op::Parameter>(element::f32, data_shape);
auto indices = make_shared<op::Parameter>(element::i16, indices_shape);
auto updates = make_shared<op::Parameter>(element::f32, updates_shape);
auto axis = make_shared<op::Constant>(element::i16, axis_shape, std::vector<int>{8});
auto data = make_shared<op::v0::Parameter>(element::f32, data_shape);
auto indices = make_shared<op::v0::Parameter>(element::i16, indices_shape);
auto updates = make_shared<op::v0::Parameter>(element::f32, updates_shape);
auto axis = make_shared<op::v0::Constant>(element::i16, axis_shape, std::vector<int>{8});
OV_EXPECT_THROW(auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis),
OV_EXPECT_THROW(auto scatter = make_shared<TypeParam>(data, indices, updates, axis),
ov::AssertFailure,
HasSubstr("Parameter axis 8 out of the tensor rank range [-4, 3]"));
}
TEST(type_prop, scatter_elements_updates_indices_shape) {
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_updates_indices_shape) {
Shape data_shape{2, 4, 5, 7};
Shape indices_shape{3, 3, 3, 3};
Shape updates_shape{2, 2, 2, 2};
Shape axis_shape{};
auto data = make_shared<op::Parameter>(element::f32, data_shape);
auto indices = make_shared<op::Parameter>(element::i16, indices_shape);
auto updates = make_shared<op::Parameter>(element::f32, updates_shape);
auto axis = make_shared<op::Constant>(element::i16, axis_shape, std::vector<int>{1});
auto data = make_shared<op::v0::Parameter>(element::f32, data_shape);
auto indices = make_shared<op::v0::Parameter>(element::i16, indices_shape);
auto updates = make_shared<op::v0::Parameter>(element::f32, updates_shape);
auto axis = make_shared<op::v0::Constant>(element::i16, axis_shape, std::vector<int>{1});
OV_EXPECT_THROW(auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis),
OV_EXPECT_THROW(auto scatter = make_shared<TypeParam>(data, indices, updates, axis),
NodeValidationFailure,
HasSubstr("Indices and updates input shapes are required to be equal"));
}
TEST(type_prop, scatter_elements_updates_indices_rank) {
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_updates_indices_rank) {
Shape data_shape{2, 4};
Shape indices_shape{2, 2};
Shape updates_shape{2, 2, 2, 2};
Shape axis_shape{};
auto data = make_shared<op::Parameter>(element::f32, data_shape);
auto indices = make_shared<op::Parameter>(element::i16, indices_shape);
auto updates = make_shared<op::Parameter>(element::f32, updates_shape);
auto axis = make_shared<op::Constant>(element::i16, axis_shape, std::vector<int>{1});
auto data = make_shared<op::v0::Parameter>(element::f32, data_shape);
auto indices = make_shared<op::v0::Parameter>(element::i16, indices_shape);
auto updates = make_shared<op::v0::Parameter>(element::f32, updates_shape);
auto axis = make_shared<op::v0::Constant>(element::i16, axis_shape, std::vector<int>{1});
OV_EXPECT_THROW(auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis),
OV_EXPECT_THROW(auto scatter = make_shared<TypeParam>(data, indices, updates, axis),
NodeValidationFailure,
HasSubstr("Indices and updates input shapes are required to be equal"));
}
TEST(type_prop, scatter_elements_data_indices_rank) {
TYPED_TEST_P(ScatterElementsUpdateTest, scatter_elements_data_indices_rank) {
Shape data_shape{2, 4, 5, 7};
Shape indices_shape{2, 2};
Shape updates_shape{2, 2};
Shape axis_shape{};
auto data = make_shared<op::Parameter>(element::f32, data_shape);
auto indices = make_shared<op::Parameter>(element::i16, indices_shape);
auto updates = make_shared<op::Parameter>(element::f32, updates_shape);
auto axis = make_shared<op::Constant>(element::i16, axis_shape, std::vector<int>{1});
auto data = make_shared<op::v0::Parameter>(element::f32, data_shape);
auto indices = make_shared<op::v0::Parameter>(element::i16, indices_shape);
auto updates = make_shared<op::v0::Parameter>(element::f32, updates_shape);
auto axis = make_shared<op::v0::Constant>(element::i16, axis_shape, std::vector<int>{1});
OV_EXPECT_THROW(auto scatter = make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis),
OV_EXPECT_THROW(auto scatter = make_shared<TypeParam>(data, indices, updates, axis),
NodeValidationFailure,
HasSubstr("Indices rank and data rank are required to be equal"));
}
TEST(type_prop, scatter_elements_update_mean_reduction_of_bool) {
const auto data = make_shared<op::v0::Parameter>(element::boolean, Shape{10});
const auto indices = make_shared<op::v0::Parameter>(element::i32, Shape{2});
const auto updates = make_shared<op::v0::Parameter>(element::boolean, Shape{2});
const auto axis = make_shared<op::v0::Constant>(element::i32, Shape{1}, std::vector<int>{0});
OV_EXPECT_THROW(
std::ignore = make_shared<op::v12::ScatterElementsUpdate>(data,
indices,
updates,
axis,
op::v12::ScatterElementsUpdate::Reduction::MEAN),
NodeValidationFailure,
HasSubstr("The 'mean' reduction type is not supported for boolean tensors"));
}
REGISTER_TYPED_TEST_SUITE_P(ScatterElementsUpdateTest,
scatter_elements_update_output_shape,
scatter_elements_update_output_partial_dyn_shape,
scatter_elements_update_data_has_interval_dimensions,
scatter_elements_update_output_full_dyn_shape,
scatter_elements_update_default_ctor,
scatter_elements_update_preserve_partial_values_and_labels_via_evaluates_bounds,
scatter_elements_update_axis_validation,
scatter_elements_updates_indices_shape,
scatter_elements_updates_indices_rank,
scatter_elements_data_indices_rank);
using OpVersions = ::testing::Types<op::v3::ScatterElementsUpdate, op::v12::ScatterElementsUpdate>;
INSTANTIATE_TYPED_TEST_SUITE_P(type_prop, ScatterElementsUpdateTest, OpVersions);

View File

@ -2,27 +2,51 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "openvino/op/scatter_elements_update.hpp"
#include "gtest/gtest.h"
#include "ngraph/ngraph.hpp"
#include "ngraph/op/util/attr_types.hpp"
#include "ngraph/opsets/opset3.hpp"
#include "util/visitor.hpp"
using namespace ngraph;
using namespace ov;
using ngraph::test::NodeBuilder;
using ngraph::test::ValueMap;
TEST(attributes, scatter_elements_update) {
NodeBuilder::get_ops().register_factory<opset3::ScatterElementsUpdate>();
NodeBuilder::get_ops().register_factory<op::v3::ScatterElementsUpdate>();
auto data = std::make_shared<op::Parameter>(element::f32, Shape{2, 4, 5, 7});
auto indices = std::make_shared<op::Parameter>(element::i16, Shape{2, 2, 2, 2});
auto updates = std::make_shared<op::Parameter>(element::f32, Shape{2, 2, 2, 2});
auto axis = std::make_shared<op::Parameter>(element::i16, Shape{});
auto data = std::make_shared<op::v0::Parameter>(element::f32, Shape{2, 4, 5, 7});
auto indices = std::make_shared<op::v0::Parameter>(element::i16, Shape{2, 2, 2, 2});
auto updates = std::make_shared<op::v0::Parameter>(element::f32, Shape{2, 2, 2, 2});
auto axis = std::make_shared<op::v0::Parameter>(element::i16, Shape{});
auto scatter = std::make_shared<opset3::ScatterElementsUpdate>(data, indices, updates, axis);
auto scatter = std::make_shared<op::v3::ScatterElementsUpdate>(data, indices, updates, axis);
NodeBuilder builder(scatter, {data, indices, updates, axis});
const auto expected_attr_count = 0;
EXPECT_EQ(builder.get_value_map_size(), expected_attr_count);
}
TEST(attributes, scatter_elements_update_v12) {
NodeBuilder::get_ops().register_factory<op::v12::ScatterElementsUpdate>();
auto data = std::make_shared<op::v0::Parameter>(element::f32, Shape{2, 4, 5, 7});
auto indices = std::make_shared<op::v0::Parameter>(element::i16, Shape{2, 2, 2, 2});
auto updates = std::make_shared<op::v0::Parameter>(element::f32, Shape{2, 2, 2, 2});
auto axis = std::make_shared<op::v0::Parameter>(element::i16, Shape{});
auto scatter = std::make_shared<op::v12::ScatterElementsUpdate>(data,
indices,
updates,
axis,
op::v12::ScatterElementsUpdate::Reduction::PROD,
false);
NodeBuilder builder(scatter, {data, indices, updates, axis});
const auto g_scatter = ov::as_type_ptr<op::v12::ScatterElementsUpdate>(builder.create());
const auto expected_attr_count = 2;
EXPECT_EQ(builder.get_value_map_size(), expected_attr_count);
EXPECT_EQ(g_scatter->get_reduction(), scatter->get_reduction());
EXPECT_EQ(g_scatter->get_use_init_val(), scatter->get_use_init_val());
}

View File

@ -150,7 +150,8 @@ INSTANTIATE_TEST_SUITE_P(ONNXOpExtensionViaCommonConstructor,
FrontEndOpExtensionTest::getTestCaseName);
TEST(ONNXOpExtensionViaCommonConstructor, onnx_op_extension_via_template_arg_with_custom_domain) {
const auto ext = std::make_shared<onnx::OpExtension<ov::op::v0::Relu>>("CustomRelu", "my_custom_domain");
const auto ext =
std::make_shared<ov::frontend::onnx::OpExtension<ov::op::v0::Relu>>("CustomRelu", "my_custom_domain");
auto fe = std::make_shared<ov::frontend::onnx::FrontEnd>();
fe->add_extension(ext);
@ -163,7 +164,8 @@ TEST(ONNXOpExtensionViaCommonConstructor, onnx_op_extension_via_template_arg_wit
}
TEST(ONNXOpExtensionViaCommonConstructor, onnx_op_extension_via_ov_type_name_with_custom_domain) {
const auto ext = std::make_shared<onnx::OpExtension<>>("opset1::Relu", "CustomRelu", "my_custom_domain");
const auto ext =
std::make_shared<ov::frontend::onnx::OpExtension<>>("opset1::Relu", "CustomRelu", "my_custom_domain");
auto fe = std::make_shared<ov::frontend::onnx::FrontEnd>();
fe->add_extension(ext);
@ -199,7 +201,8 @@ TEST(ONNXOpExtensionViaCommonConstructor, onnx_op_extension_mixed_legacy_and_new
ov::util::path_join({TEST_ONNX_MODELS_DIRNAME, "relu_custom_domain.onnx"}));
ov::Core core;
core.add_extension(std::make_shared<OldApiNode>());
const auto new_api_ext = std::make_shared<onnx::OpExtension<ov::op::v0::Relu>>("CustomRelu", "my_custom_domain");
const auto new_api_ext =
std::make_shared<ov::frontend::onnx::OpExtension<ov::op::v0::Relu>>("CustomRelu", "my_custom_domain");
core.add_extension(new_api_ext);
EXPECT_NO_THROW(core.read_model(input_model_path));
}

View File

@ -18,6 +18,7 @@ namespace tensorflow {
#define VARIABLES_INDEX_FOOTER_SIZE 48
#define BLOCK_TRAILER_SIZE 5
#define SAVED_TENSOR_SLICES_KEY ""
#define META_GRAPH_DEFAULT_TAG "serve"
template <typename T>
static T smUnpack(char*& ptr, const char* ptr_end) {

View File

@ -137,6 +137,9 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
} else if (GraphIteratorProtoTxt::is_supported(model_path)) {
// text protobuf format with checkpoints
return true;
} else if (GraphIteratorSavedModel::is_supported(model_path)) {
// saved model format with tagged metagraphs
return true;
}
}
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
@ -167,6 +170,9 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
} else if (GraphIteratorProtoTxt::is_supported(model_path)) {
// text protobuf format with checkpoints
return true;
} else if (GraphIteratorSavedModel::is_supported(model_path)) {
// saved model format with tagged metagraphs
return true;
}
}
#endif
@ -194,11 +200,7 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& va
return std::make_shared<InputModel>(std::make_shared<GraphIteratorProto>(model_path), m_telemetry);
} else if (GraphIteratorSavedModel::is_supported(model_path)) {
std::shared_ptr<GraphIteratorSavedModel> graph_iterator;
if (variants.size() > 1 && variants[1].is<std::string>()) {
graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, variants[1].as<std::string>());
} else {
graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, std::string("serve"));
}
return std::make_shared<InputModel>(graph_iterator,
m_telemetry,
graph_iterator->get_variables_index(),
@ -249,6 +251,18 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& va
graph_iterator->get_checkpoint_v1_reader(),
false);
}
auto saved_model_tags = paths[1];
if (GraphIteratorSavedModel::is_supported(model_path)) {
std::shared_ptr<GraphIteratorSavedModel> graph_iterator;
graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, saved_model_tags);
return std::make_shared<InputModel>(graph_iterator,
m_telemetry,
graph_iterator->get_variables_index(),
graph_iterator->get_saved_model_input_names(),
graph_iterator->get_saved_model_output_names(),
nullptr,
true);
}
}
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
else if (variants[0].is<std::wstring>()) {
@ -258,13 +272,7 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& va
return std::make_shared<InputModel>(std::make_shared<GraphIteratorProto>(model_path), m_telemetry);
} else if (GraphIteratorSavedModel::is_supported(model_path)) {
std::shared_ptr<GraphIteratorSavedModel> graph_iterator;
if (variants.size() > 1 && variants[1].is<std::string>()) {
graph_iterator = std::make_shared<GraphIteratorSavedModel>(
model_path,
ov::util::wstring_to_string(variants[1].as<std::wstring>()));
} else {
graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, std::string("serve"));
}
graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, std::string(META_GRAPH_DEFAULT_TAG));
return std::make_shared<InputModel>(graph_iterator,
m_telemetry,
graph_iterator->get_variables_index(),
@ -315,6 +323,18 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& va
graph_iterator->get_checkpoint_v1_reader(),
false);
}
auto saved_model_tags = ov::util::wstring_to_string(paths[1]);
if (GraphIteratorSavedModel::is_supported(model_path)) {
std::shared_ptr<GraphIteratorSavedModel> graph_iterator;
graph_iterator = std::make_shared<GraphIteratorSavedModel>(model_path, saved_model_tags);
return std::make_shared<InputModel>(graph_iterator,
m_telemetry,
graph_iterator->get_variables_index(),
graph_iterator->get_saved_model_input_names(),
graph_iterator->get_saved_model_output_names(),
nullptr,
true);
}
}
#endif
else if (variants[0].is<GraphIterator::Ptr>()) {
@ -362,7 +382,8 @@ std::shared_ptr<ov::Model> FrontEnd::convert(const ov::frontend::InputModel::Ptr
++counter;
}
exception_message
<< "\nTo facilitate the conversion of unsupported operations, refer to Frontend Extension documentation: "
<< "\nTo facilitate the conversion of unsupported operations, refer to Frontend Extension "
"documentation: "
"https://docs.openvino.ai/latest/openvino_docs_Extensibility_UG_Frontend_Extensions.html \n";
}

View File

@ -72,6 +72,31 @@ std::basic_string<wchar_t> get_variables_index_name<wchar_t>() {
}
#endif
std::vector<std::string> GraphIteratorSavedModel::split_tags(const std::string tags) const {
std::vector<std::string> tag_list = {};
std::size_t len = tags.length();
if (len == 0) {
return tag_list;
}
std::string tag = "";
std::size_t last_delimeter_pos = 0;
std::size_t delimeter_pos = std::string::npos;
while ((delimeter_pos = tags.find_first_of(",", last_delimeter_pos)) != std::string::npos) {
tag = tags.substr(last_delimeter_pos, delimeter_pos - last_delimeter_pos);
tag_list.push_back(tag);
last_delimeter_pos = delimeter_pos + 1;
}
if (last_delimeter_pos != std::string::npos) {
if (last_delimeter_pos < len) {
tag = tags.substr(last_delimeter_pos);
} else {
tag = "";
}
tag_list.push_back(tag);
}
return tag_list;
}
} // namespace tensorflow
} // namespace frontend
} // namespace ov

View File

@ -70,39 +70,76 @@ private:
bool read_saved_model(const std::basic_string<T>& path, const std::string& tags) {
std::basic_string<T> save_model_path = path + get_saved_model_name<T>();
std::ifstream sm_stream{save_model_path.c_str(), std::ifstream::in | std::ifstream::binary};
FRONT_END_GENERAL_CHECK(sm_stream && sm_stream.is_open(), "Model file does not exist");
FRONT_END_GENERAL_CHECK(sm_stream && sm_stream.is_open(), "[TensorFlow Frontend] Model file does not exist");
std::basic_string<T> varIndexPath = path + get_variables_index_name<T>();
if (ov::util::file_exists(varIndexPath)) {
m_variables_index = std::make_shared<VariablesIndex>();
std::ifstream vi_stream{varIndexPath.c_str(), std::ifstream::in | std::ifstream::binary};
FRONT_END_GENERAL_CHECK(vi_stream && vi_stream.is_open(),
"Saved Model's variable index file does not exist");
"[TensorFlow Frontend] Saved Model's variable index file does not exist");
FRONT_END_GENERAL_CHECK(m_variables_index->read_variables(vi_stream, path),
"Saved Model's variable index file cannot be parsed");
"[TensorFlow Frontend] Saved Model's variable index file cannot be parsed");
}
bool res = m_saved_model->ParseFromIstream(&sm_stream);
FRONT_END_GENERAL_CHECK(res && m_saved_model->meta_graphs_size(), "Saved Model cannot be parsed");
FRONT_END_GENERAL_CHECK(res && m_saved_model->meta_graphs_size(),
"[TensorFlow Frontend] Saved Model cannot be parsed");
auto tag_list = split_tags(tags);
// SavedModel can contain several MetaGraph with different tags. Look for MetaGraph with the required tag
for (const auto& meta_graph : m_saved_model->meta_graphs()) {
if (!meta_graph.has_graph_def()) {
continue;
}
if (m_saved_model->meta_graphs_size() > 1) {
bool tag_found = false;
for (const auto& tag : meta_graph.meta_info_def().tags()) {
if (tags.find(tag) != std::string::npos) {
tag_found = true;
break;
if (meta_graph.meta_info_def().tags_size() > 0) {
tag_found = std::all_of(meta_graph.meta_info_def().tags().begin(),
meta_graph.meta_info_def().tags().end(),
[&tag_list](const std::string& tag) {
return std::find(tag_list.begin(), tag_list.end(), tag) != tag_list.end();
});
}
}
if (!tag_found) {
continue;
if (tag_found) {
return load_meta_graph(meta_graph);
}
}
// Alternate behavior for working with "default tag" to support additional cases for read_model
if (tags == META_GRAPH_DEFAULT_TAG) {
// If we have only one MetaGraph - try to use it
if (m_saved_model->meta_graphs_size() == 1 && m_saved_model->meta_graphs(0).has_graph_def()) {
return load_meta_graph(m_saved_model->meta_graphs(0));
}
// If MetaGraph with tag == META_GRAPH_DEFAULT_TAG already found - we shouldn't reach this place.
// Otherwise we try to find a MetaGraph with no tags as an alternative
for (const auto& meta_graph : m_saved_model->meta_graphs()) {
if (!meta_graph.has_graph_def()) {
continue;
}
if (meta_graph.meta_info_def().tags_size() == 0) {
return load_meta_graph(meta_graph);
}
}
FRONT_END_GENERAL_CHECK(false,
"[TensorFlow Frontend] Saved Model doesn't contain any applicable MetaGraph");
}
FRONT_END_GENERAL_CHECK(false,
"[TensorFlow Frontend] Saved Model doesn't contain MetaGraph with requested tag");
return false;
}
/// \brief Does a loading of exact meta-graph
bool load_meta_graph(const ::tensorflow::MetaGraphDef& meta_graph) {
std::map<std::string, const ::tensorflow::SignatureDef*> validSignatures = {};
for (const auto& sit : meta_graph.signature_def()) {
const std::string& key = sit.first;
@ -144,10 +181,11 @@ private:
return true;
}
FRONT_END_GENERAL_CHECK(false, "Saved Model doesn't contain MetaGraph with requested tag");
return false;
}
/// \brief Splitting tags by using "," delimeter
/// \param[in] tags String with tags separated by ","
/// \return Returns vector with splitted tags, no trimming is used. When you pass "tag1, tag2"
/// you will have a vector ["tag1", " tag2"]. Because TensorFlow saves tags without trimming
std::vector<std::string> split_tags(const std::string tags) const;
}; // GraphIteratorSavedModel
} // namespace tensorflow

View File

@ -214,6 +214,14 @@ void InputModel::InputModelTFImpl::load_places() {
producer_op_name,
producer_output_port_name,
producer_output_port_idx);
if (is_conditional_edge(producer_op_name)) {
// exclude "^" mark indicating (execution) conditional dependency
// for example, "^sub_op" means dependency on a producer node with a name "sub_op"
// if a node has dependent operation nodes and has no data consumers,
// this node is not terminating and will not output to the Result node
producer_op_name = producer_op_name.substr(1);
}
op_names_with_consumers.insert(producer_op_name);
} catch (const std::exception&) {
FRONT_END_THROW("[ ERROR ] Exception happened when preparing input " + std::to_string(input_port_idx) +

View File

@ -67,6 +67,10 @@ OutputVector translate_varhandle_op(const NodeContext& node) {
auto shape = node.get_attribute<::ov::PartialShape>("shape").get_shape();
bool result = var_index->get_mapped_variable(var_name, &entry_data, &entry_size);
if (!result) {
result = var_index->get_variable(var_name, &entry_data, &entry_size);
}
TENSORFLOW_OP_VALIDATION(node, result, "[TensorFlow Frontend] Internal error: Cannot find requested variable.");
::tensorflow::BundleEntryProto entry;

View File

@ -124,3 +124,17 @@ TEST_F(FrontEndConversionWithReferenceTestsF, SavedModelBroadcastIssue) {
model_ref = make_shared<Model>(OutputVector{x}, ParameterVector{});
}
}
TEST_F(FrontEndConversionWithReferenceTestsF, SavedModelMultiGraph) {
// The test verifies loading of MetaGraph with empty tags as default
// And verifies loading variables with no corresponding RestoreV2
{ model = convert_model("saved_model_multi-graph"); }
{
// create a reference graph
auto x = make_shared<Constant>(element::f32, Shape{2, 3}, vector<float>{1, 2, 3, 3, 2, 1});
auto y = make_shared<Parameter>(element::f32, Shape{1});
auto add = make_shared<Add>(x, y);
model_ref = make_shared<Model>(OutputVector{add}, ParameterVector{y});
}
}

View File

@ -700,3 +700,19 @@ TEST_F(FrontEndConversionWithReferenceTestsF, PartitionedCallsWithConvInBodyGrap
model_ref = make_shared<Model>(OutputVector{conv}, ParameterVector{input1, filter});
}
}
TEST_F(FrontEndConversionWithReferenceTestsF, ControlDependencyNumberOutputs) {
// The test aims to check a number of outputs of the resulted model
// If the node has dependent nodes by conditional edge, it is not terminating
// and it should not go to the Result node
{ model = convert_model("control_dependency/control_dependency.pb"); }
{
auto input1 = make_shared<Parameter>(f32, Shape{2, 3});
auto input2 = make_shared<Parameter>(f32, Shape{2, 3});
// AddV2 node is excluded since it is not terminating
auto sub = make_shared<Subtract>(input1, input2);
model_ref = make_shared<Model>(OutputVector{sub}, ParameterVector{input1, input2});
}
}

View File

@ -0,0 +1,24 @@
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
import sys
import numpy as np
import tensorflow as tf
tf.compat.v1.reset_default_graph()
# Create the graph and model
with tf.compat.v1.Session() as sess:
input1 = tf.compat.v1.placeholder(tf.float32, [2, 3], 'input1')
input2 = tf.compat.v1.placeholder(tf.float32, [2, 3], 'input2')
add = tf.add(input1, input2, name="add")
with tf.control_dependencies([add]):
sub = tf.subtract(input1, input2, name="sub")
tf.compat.v1.global_variables_initializer()
tf_net = sess.graph_def
tf.io.write_graph(tf_net, os.path.join(sys.argv[1], "control_dependency"), 'control_dependency.pb', False)

View File

@ -0,0 +1,48 @@
# Copyright (C) 2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
import sys
import tensorflow as tf
export_dir = os.path.join(sys.argv[1], "saved_model_multi-graph")
#Slash replacing required because otherwise fails on Windows
builder = tf.compat.v1.saved_model.Builder(export_dir if os.name != 'nt' else export_dir.replace("/", "\\"))
# Create the graph and model
with tf.compat.v1.Session(graph=tf.Graph()) as sess:
x_value = [[1.,2.,3.],[3.,2.,1.]]
z_value = [[2.,2.,1.],[1.,1.,2.]]
tf_x = tf.compat.v1.Variable(x_value, name="custom_variable_name")
tf_y = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1], name='y')
tf_z = tf.constant(z_value)
tf_add = tf.add(tf_x, tf_y, name="AddOperation")
tf_identity = tf.identity(tf_add, name="AddIdentity")
tf.subtract(tf_identity, tf_z, name="SubOperation")
sess.run(tf.compat.v1.global_variables_initializer())
builder.add_meta_graph_and_variables(sess, ["train"])
with tf.compat.v1.Session(graph=tf.Graph()) as sess:
x_value = [[1.,2.,3.],[3.,2.,1.]]
tf_x = tf.compat.v1.Variable(x_value, name="custom_variable_name")
tf_y = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1], name='y')
tf_add = tf.add(tf_x, tf_y, name="AddOperation")
sess.run(tf.compat.v1.global_variables_initializer())
saver = tf.compat.v1.train.Saver(var_list=None, defer_build=True)
builder.add_meta_graph([], saver=saver)
with tf.compat.v1.Session(graph=tf.Graph()) as sess:
x_value = [[1.,2.,3.],[3.,2.,1.]]
tf_x = tf.compat.v1.Variable(x_value, name="custom_variable_name")
tf_y = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1], name='y')
tf_add = tf.subtract(tf_x, tf_y, name="SubOperation")
sess.run(tf.compat.v1.global_variables_initializer())
saver = tf.compat.v1.train.Saver(var_list=None, defer_build=True)
builder.add_meta_graph(["test","test2"], saver=saver)
builder.save()

View File

@ -168,24 +168,28 @@ OPENVINO_RUNTIME_API std::vector<std::vector<int>> get_proc_type_table();
* extend to support other CPU core type like ARM.
*
* The following are two example of processor type table.
* 1. Processor table of two socket CPUs XEON server
* 1. Processor table of 4 numa nodes and 2 socket server
*
* ALL_PROC | MAIN_CORE_PROC | EFFICIENT_CORE_PROC | HYPER_THREADING_PROC
* 96 48 0 48 // Total number of two sockets
* 48 24 0 24 // Number of socket one
* 48 24 0 24 // Number of socket two
* ALL_PROC | MAIN_CORE_PROC | EFFICIENT_CORE_PROC | HYPER_THREADING_PROC | PROC_NUMA_NODE_ID | PROC_SOCKET_ID
* 96 48 0 48 -1 -1
* 24 12 0 12 0 0
* 24 12 0 12 1 0
* 24 12 0 12 2 1
* 24 12 0 12 3 1
*
* 2. Processor table of one socket CPU desktop
* 2. Processor table of 1 numa node desktop
*
* ALL_PROC | MAIN_CORE_PROC | EFFICIENT_CORE_PROC | HYPER_THREADING_PROC
* 32 8 16 8 // Total number of one socket
* ALL_PROC | MAIN_CORE_PROC | EFFICIENT_CORE_PROC | HYPER_THREADING_PROC | PROC_NUMA_NODE_ID | PROC_SOCKET_ID
* 32 8 16 8 -1 -1
*/
enum ColumnOfProcessorTypeTable {
ALL_PROC = 0, //!< All processors, regardless of backend cpu
MAIN_CORE_PROC = 1, //!< Processor based on physical core of Intel Performance-cores
EFFICIENT_CORE_PROC = 2, //!< Processor based on Intel Efficient-cores
HYPER_THREADING_PROC = 3, //!< Processor based on logical core of Intel Performance-cores
PROC_TYPE_TABLE_SIZE = 4 //!< Size of processor type table
PROC_NUMA_NODE_ID = 4, //!< Numa node id of processors in this row
PROC_SOCKET_ID = 5, //!< Socket id of processors in this row
PROC_TYPE_TABLE_SIZE = 6 //!< Size of processor type table
};
/**
@ -229,24 +233,25 @@ OPENVINO_RUNTIME_API void set_cpu_used(const std::vector<int>& cpu_ids, const in
* 1. Four processors of two Pcore
* 2. Four processors of four Ecores shared L2 cache
*
* PROCESSOR_ID | SOCKET_ID | CORE_ID | CORE_TYPE | GROUP_ID | Used
* 0 0 0 3 0 0
* 1 0 0 1 0 0
* 2 0 1 3 1 0
* 3 0 1 1 1 0
* 4 0 2 2 2 0
* 5 0 3 2 2 0
* 6 0 4 2 2 0
* 7 0 5 2 2 0
* PROCESSOR_ID | NUMA_NODE_ID | SOCKET_ID | CORE_ID | CORE_TYPE | GROUP_ID | Used
* 0 0 0 0 3 0 0
* 1 0 0 0 1 0 0
* 2 0 0 1 3 1 0
* 3 0 0 1 1 1 0
* 4 0 0 2 2 2 0
* 5 0 0 3 2 2 0
* 6 0 0 4 2 2 0
* 7 0 0 5 2 2 0
*/
enum ColumnOfCPUMappingTable {
CPU_MAP_PROCESSOR_ID = 0, //!< column for processor id of the processor
CPU_MAP_SOCKET_ID = 1, //!< column for socket id of the processor
CPU_MAP_CORE_ID = 2, //!< column for hardware core id of the processor
CPU_MAP_CORE_TYPE = 3, //!< column for CPU core type corresponding to the processor
CPU_MAP_GROUP_ID = 4, //!< column for group id to the processor. Processors in one group have dependency.
CPU_MAP_USED_FLAG = 5, //!< column for resource management of the processor
CPU_MAP_TABLE_SIZE = 6 //!< Size of CPU mapping table
CPU_MAP_NUMA_NODE_ID = 1, //!< column for node id of the processor
CPU_MAP_SOCKET_ID = 2, //!< column for socket id of the processor
CPU_MAP_CORE_ID = 3, //!< column for hardware core id of the processor
CPU_MAP_CORE_TYPE = 4, //!< column for CPU core type corresponding to the processor
CPU_MAP_GROUP_ID = 5, //!< column for group id to the processor. Processors in one group have dependency.
CPU_MAP_USED_FLAG = 6, //!< column for resource management of the processor
CPU_MAP_TABLE_SIZE = 7 //!< Size of CPU mapping table
};
} // namespace ov

View File

@ -0,0 +1,144 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <atomic>
#include <cstddef>
#include <mutex>
#include <queue>
#include <type_traits>
#include "openvino/core/parallel.hpp"
#if ((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO))
# include <tbb/concurrent_priority_queue.h>
# include <tbb/concurrent_queue.h>
#endif
namespace ov {
namespace threading {
template <typename T>
class ThreadSafeQueueWithSize {
public:
void push(T value) {
std::lock_guard<std::mutex> lock(_mutex);
_queue.push(std::move(value));
}
bool try_pop(T& value) {
std::lock_guard<std::mutex> lock(_mutex);
if (!_queue.empty()) {
value = std::move(_queue.front());
_queue.pop();
return true;
} else {
return false;
}
}
size_t size() {
std::lock_guard<std::mutex> lock(_mutex);
return _queue.size();
}
protected:
std::queue<T> _queue;
std::mutex _mutex;
};
#if ((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO))
template <typename T>
using ThreadSafeQueue = tbb::concurrent_queue<T>;
template <typename T>
using ThreadSafeBoundedQueue = tbb::concurrent_bounded_queue<T>;
template <typename T>
class ThreadSafeBoundedPriorityQueue {
public:
ThreadSafeBoundedPriorityQueue() = default;
bool try_push(T&& value) {
if (_capacity) {
_pqueue.push(std::move(value));
return true;
}
return false;
}
bool try_pop(T& value) {
return _capacity ? _pqueue.try_pop(value) : false;
}
void set_capacity(std::size_t newCapacity) {
_capacity = newCapacity;
}
protected:
tbb::concurrent_priority_queue<T, std::greater<T>> _pqueue;
std::atomic_bool _capacity{false};
};
#else
template <typename T>
using ThreadSafeQueue = ThreadSafeQueueWithSize<T>;
template <typename T>
class ThreadSafeBoundedQueue {
public:
ThreadSafeBoundedQueue() = default;
bool try_push(T value) {
std::lock_guard<std::mutex> lock(_mutex);
if (_capacity) {
_queue.push(std::move(value));
}
return _capacity;
}
bool try_pop(T& value) {
std::lock_guard<std::mutex> lock(_mutex);
if (_capacity && !_queue.empty()) {
value = std::move(_queue.front());
_queue.pop();
return true;
} else {
return false;
}
}
void set_capacity(std::size_t newCapacity) {
std::lock_guard<std::mutex> lock(_mutex);
_capacity = newCapacity;
}
protected:
std::queue<T> _queue;
std::mutex _mutex;
bool _capacity = false;
};
template <typename T>
class ThreadSafeBoundedPriorityQueue {
public:
ThreadSafeBoundedPriorityQueue() = default;
bool try_push(T value) {
std::lock_guard<std::mutex> lock(_mutex);
if (_capacity) {
_queue.push(std::move(value));
}
return _capacity;
}
bool try_pop(T& value) {
std::lock_guard<std::mutex> lock(_mutex);
if (_capacity && !_queue.empty()) {
value = std::move(_queue.top());
_queue.pop();
return true;
} else {
return false;
}
}
void set_capacity(std::size_t newCapacity) {
std::lock_guard<std::mutex> lock(_mutex);
_capacity = newCapacity;
}
protected:
std::priority_queue<T, std::vector<T>, std::greater<T>> _queue;
std::mutex _mutex;
bool _capacity = false;
};
#endif
} // namespace threading
} // namespace ov

View File

@ -3,8 +3,8 @@
//
/**
* @brief A header file that provides a set minimal required Streams Executor API.
* @file streams_executor.hpp
* @brief A header file that provides a set of CPU map and parser functions.
* @file cpu_map_info.hpp
*/
#pragma once
@ -22,6 +22,7 @@ public:
~CPU(){};
int _processors = 0;
int _numa_nodes = 0;
int _sockets = 0;
int _cores = 0;
std::vector<std::vector<int>> _proc_type_table;
std::vector<std::vector<int>> _cpu_mapping_table;
@ -34,18 +35,37 @@ public:
CPU& cpu_info();
#ifdef __linux__
/**
* @brief Parse nodes information to update _sockets, proc_type_table and cpu_mapping_table on Linux
* @param[in] node_info_table nodes information for this platform.
* @param[in] _numa_nodes total number for nodes in system
* @param[out] _sockets total number for sockets in system
* @param[out] _proc_type_table summary table of number of processors per type
* @param[out] _cpu_mapping_table CPU mapping table for each processor
* @return
*/
void parse_node_info_linux(const std::vector<std::string> node_info_table,
const int& _numa_nodes,
int& _sockets,
std::vector<std::vector<int>>& _proc_type_table,
std::vector<std::vector<int>>& _cpu_mapping_table);
/**
* @brief Parse CPU cache infomation on Linux
* @param[in] _system_info_table system information for this platform.
* @param[in] system_info_table cpus information for this platform.
* @param[in] node_info_table nodes information for this platform.
* @param[out] _processors total number for processors in system.
* @param[out] _numa_nodes total number for nodes in system
* @param[out] _sockets total number for sockets in system
* @param[out] _cores total number for physical CPU cores in system
* @param[out] _proc_type_table summary table of number of processors per type
* @param[out] _cpu_mapping_table CPU mapping table for each processor
* @return
*/
void parse_cache_info_linux(const std::vector<std::vector<std::string>> _system_info_table,
void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_info_table,
const std::vector<std::string> node_info_table,
int& _processors,
int& _numa_nodes,
int& _sockets,
int& _cores,
std::vector<std::vector<int>>& _proc_type_table,
@ -53,16 +73,20 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> _system_
/**
* @brief Parse CPU frequency infomation on Linux
* @param[in] _system_info_table system information for this platform.
* @param[in] system_info_table cpus information for this platform.
* @param[in] node_info_table nodes information for this platform.
* @param[out] _processors total number for processors in system.
* @param[out] _numa_nodes total number for nodes in system
* @param[out] _sockets total number for sockets in system
* @param[out] _cores total number for physical CPU cores in system
* @param[out] _proc_type_table summary table of number of processors per type
* @param[out] _cpu_mapping_table CPU mapping table for each processor
* @return
*/
void parse_freq_info_linux(const std::vector<std::vector<std::string>> _system_info_table,
void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_info_table,
const std::vector<std::string> node_info_table,
int& _processors,
int& _numa_nodes,
int& _sockets,
int& _cores,
std::vector<std::vector<int>>& _proc_type_table,
@ -106,6 +130,7 @@ void get_cpu_mapping_from_cores(const int _processors,
* @param[in] base_ptr buffer object pointer of Windows system infomation
* @param[in] len buffer object length of Windows system infomation
* @param[out] _processors total number for processors in system.
* @param[out] _numa_nodes total number for nodes in system
* @param[out] _sockets total number for sockets in system
* @param[out] _cores total number for physical CPU cores in system
* @param[out] _proc_type_table summary table of number of processors per type
@ -115,6 +140,7 @@ void get_cpu_mapping_from_cores(const int _processors,
void parse_processor_info_win(const char* base_ptr,
const unsigned long len,
int& _processors,
int& _numa_nodes,
int& _sockets,
int& _cores,
std::vector<std::vector<int>>& _proc_type_table,

View File

@ -15,12 +15,13 @@
#include "ie_common.h"
#include "openvino/core/except.hpp"
#include "openvino/runtime/system_conf.hpp"
#include "streams_executor.hpp"
#include "os/cpu_map_info.hpp"
namespace ov {
CPU::CPU() {
std::vector<std::vector<std::string>> system_info_table;
std::vector<std::string> node_info_table;
_num_threads = parallel_get_max_threads();
auto get_cache_info_linux = [&]() {
@ -99,6 +100,21 @@ CPU::CPU() {
return 0;
};
auto get_node_info_linux = [&]() {
int node_index = 0;
while (1) {
std::ifstream cache_file("/sys/devices/system/node/node" + std::to_string(node_index) + "/cpulist");
if (!cache_file.is_open()) {
break;
}
std::string cache_info;
std::getline(cache_file, cache_info);
node_info_table.push_back(cache_info);
node_index++;
}
};
auto check_valid_cpu = [&]() {
cpu_set_t mask;
CPU_ZERO(&mask);
@ -131,10 +147,14 @@ CPU::CPU() {
}
};
get_node_info_linux();
if (!get_cache_info_linux()) {
parse_cache_info_linux(system_info_table,
node_info_table,
_processors,
_numa_nodes,
_sockets,
_cores,
_proc_type_table,
_cpu_mapping_table);
@ -143,8 +163,10 @@ CPU::CPU() {
if ((_proc_type_table.size() == 0) || (_proc_type_table[0][MAIN_CORE_PROC] == 0)) {
if (!get_freq_info_linux()) {
parse_freq_info_linux(system_info_table,
node_info_table,
_processors,
_numa_nodes,
_sockets,
_cores,
_proc_type_table,
_cpu_mapping_table);
@ -177,7 +199,10 @@ CPU::CPU() {
}
}
_processors = processors.size();
_numa_nodes = sockets.size() == 0 ? 1 : sockets.size();
_sockets = _numa_nodes;
for (auto&& socket : sockets) {
_cores += socket.second;
}
@ -203,8 +228,77 @@ CPU::CPU() {
};
}
void parse_node_info_linux(const std::vector<std::string> node_info_table,
const int& _numa_nodes,
int& _sockets,
std::vector<std::vector<int>>& _proc_type_table,
std::vector<std::vector<int>>& _cpu_mapping_table) {
std::vector<std::vector<int>> nodes_table;
int node_index = 0;
for (auto& one_info : node_info_table) {
int core_1 = 0;
int core_2 = 0;
std::string::size_type pos = 0;
std::string::size_type endpos = 0;
std::string sub_str = "";
if (((endpos = one_info.find('-', pos)) == std::string::npos) &&
((endpos = one_info.find(',', pos)) != std::string::npos)) {
while (endpos != std::string::npos) {
sub_str = one_info.substr(pos);
core_1 = std::stoi(sub_str);
nodes_table.push_back({core_1, core_1, node_index});
endpos = one_info.find(',', pos);
pos = endpos + 1;
}
} else {
while (endpos != std::string::npos) {
if ((endpos = one_info.find('-', pos)) != std::string::npos) {
sub_str = one_info.substr(pos, endpos - pos);
core_1 = std::stoi(sub_str);
sub_str = one_info.substr(endpos + 1);
core_2 = std::stoi(sub_str);
nodes_table.push_back({core_1, core_2, node_index});
pos = one_info.find(',', endpos);
if (pos == std::string::npos) {
break;
} else {
pos = pos + 1;
}
}
}
}
node_index++;
}
_proc_type_table.assign((node_info_table.size() == 1) ? 1 : node_info_table.size() + 1,
std::vector<int>({0, 0, 0, 0, -1, -1}));
for (auto& row : nodes_table) {
for (int i = row[0]; i <= row[1]; i++) {
_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID] = row[2];
if (_sockets > _numa_nodes) {
_cpu_mapping_table[i][CPU_MAP_SOCKET_ID] = row[2];
}
_proc_type_table[0][ALL_PROC]++;
_proc_type_table[0][_cpu_mapping_table[i][CPU_MAP_CORE_TYPE]]++;
if (node_info_table.size() != 1) {
_proc_type_table[row[2] + 1][ALL_PROC]++;
_proc_type_table[row[2] + 1][_cpu_mapping_table[i][CPU_MAP_CORE_TYPE]]++;
}
}
node_index = (node_info_table.size() != 1) ? row[2] + 1 : 0;
_proc_type_table[node_index][PROC_NUMA_NODE_ID] = _cpu_mapping_table[row[0]][CPU_MAP_NUMA_NODE_ID];
_proc_type_table[node_index][PROC_SOCKET_ID] = _cpu_mapping_table[row[0]][CPU_MAP_SOCKET_ID];
}
_sockets = (_sockets > _numa_nodes) ? _numa_nodes : _sockets;
}
void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_info_table,
const std::vector<std::string> node_info_table,
int& _processors,
int& _numa_nodes,
int& _sockets,
int& _cores,
std::vector<std::vector<int>>& _proc_type_table,
@ -224,7 +318,7 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_i
if (((endpos = system_info_table[nproc][0].find(',', pos)) != std::string::npos) ||
((endpos = system_info_table[nproc][0].find('-', pos)) != std::string::npos)) {
sub_str = system_info_table[nproc][0].substr(pos, endpos);
sub_str = system_info_table[nproc][0].substr(pos, endpos - pos);
core_1 = std::stoi(sub_str);
sub_str = system_info_table[nproc][0].substr(endpos + 1);
core_2 = std::stoi(sub_str);
@ -246,13 +340,12 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_i
_cpu_mapping_table[core_2][CPU_MAP_GROUP_ID] = n_group;
_cores++;
n_group++;
_proc_type_table[0][ALL_PROC] += 2;
_proc_type_table[0][MAIN_CORE_PROC]++;
_proc_type_table[0][HYPER_THREADING_PROC]++;
} else if ((endpos = system_info_table[nproc][1].find('-', pos)) != std::string::npos) {
sub_str = system_info_table[nproc][1].substr(pos, endpos);
sub_str = system_info_table[nproc][1].substr(pos, endpos - pos);
core_1 = std::stoi(sub_str);
sub_str = system_info_table[nproc][1].substr(endpos + 1);
core_2 = std::stoi(sub_str);
@ -268,8 +361,6 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_i
_proc_type_table[0][ALL_PROC]++;
_proc_type_table[0][EFFICIENT_CORE_PROC]++;
}
n_group++;
} else {
core_1 = std::stoi(system_info_table[nproc][0]);
@ -279,16 +370,23 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_i
_cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = n_group;
_cores++;
n_group++;
_proc_type_table[0][ALL_PROC]++;
_proc_type_table[0][MAIN_CORE_PROC]++;
}
n_group++;
_proc_type_table[0][PROC_NUMA_NODE_ID] = (_proc_type_table[0][PROC_NUMA_NODE_ID] == -1)
? _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID]
: _proc_type_table[0][PROC_NUMA_NODE_ID];
_proc_type_table[0][PROC_SOCKET_ID] = (_proc_type_table[0][PROC_SOCKET_ID] == -1)
? _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]
: _proc_type_table[0][PROC_SOCKET_ID];
}
return;
};
std::vector<int> line_value_0(PROC_TYPE_TABLE_SIZE, 0);
std::vector<int> line_value_0({0, 0, 0, 0, -1, -1});
for (int n = 0; n < _processors; n++) {
if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) {
@ -308,19 +406,21 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_i
while (1) {
if ((endpos = system_info_table[n][2].find('-', pos)) != std::string::npos) {
sub_str = system_info_table[n][2].substr(pos, endpos);
sub_str = system_info_table[n][2].substr(pos, endpos - pos);
core_1 = std::stoi(sub_str);
sub_str = system_info_table[n][2].substr(endpos + 1);
core_2 = std::stoi(sub_str);
for (int m = core_1; m <= core_2; m++) {
_cpu_mapping_table[m][CPU_MAP_SOCKET_ID] = _sockets;
_cpu_mapping_table[m][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[m][CPU_MAP_SOCKET_ID];
update_proc_map_info(m);
}
} else if (pos != std::string::npos) {
sub_str = system_info_table[n][2].substr(pos);
core_1 = std::stoi(sub_str);
_cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = _sockets;
_cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
update_proc_map_info(core_1);
endpos = pos;
}
@ -334,16 +434,23 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_i
_sockets++;
}
}
if ((node_info_table.size() == 0) || (node_info_table.size() == (unsigned)_sockets)) {
if (_sockets > 1) {
_proc_type_table.push_back(_proc_type_table[0]);
_proc_type_table[0] = line_value_0;
for (int m = 1; m <= _sockets; m++) {
for (int n = 0; n < PROC_TYPE_TABLE_SIZE; n++) {
for (int n = 0; n < PROC_NUMA_NODE_ID; n++) {
_proc_type_table[0][n] += _proc_type_table[m][n];
}
}
}
_numa_nodes = _sockets;
} else {
_numa_nodes = node_info_table.size();
parse_node_info_linux(node_info_table, _numa_nodes, _sockets, _proc_type_table, _cpu_mapping_table);
}
};
void get_cpu_mapping_from_cores(const int _processors,
@ -358,11 +465,10 @@ void get_cpu_mapping_from_cores(const int _processors,
const auto socket_offset = big_phys_cores / _numa_nodes;
const auto threads_per_core = hyper_thread ? 2 : 1;
const auto step = num_small_cores_phys > 0 ? 2 : 1;
std::vector<int> pro_all_table;
std::vector<int> pro_all_table = {0, 0, 0, 0, -1, -1};
_cpu_mapping_table.resize(_processors, std::vector<int>(CPU_MAP_TABLE_SIZE, -1));
_proc_type_table.assign(_numa_nodes, std::vector<int>(PROC_TYPE_TABLE_SIZE, 0));
pro_all_table.resize(PROC_TYPE_TABLE_SIZE, 0);
_proc_type_table.assign(_numa_nodes, std::vector<int>({0, 0, 0, 0, -1, -1}));
for (int t = 0; t < threads_per_core; t++) {
int start = t == 0 ? 0 : (num_small_cores_phys > 0 ? 1 : big_phys_cores);
@ -374,10 +480,17 @@ void get_cpu_mapping_from_cores(const int _processors,
_cpu_mapping_table[cur_id][CPU_MAP_CORE_TYPE] =
hyper_thread ? (t == 0 ? HYPER_THREADING_PROC : MAIN_CORE_PROC) : MAIN_CORE_PROC;
_cpu_mapping_table[cur_id][CPU_MAP_GROUP_ID] = i;
_cpu_mapping_table[cur_id][CPU_MAP_NUMA_NODE_ID] = socket_id;
_cpu_mapping_table[cur_id][CPU_MAP_SOCKET_ID] = socket_id;
_proc_type_table[socket_id][_cpu_mapping_table[cur_id][CPU_MAP_CORE_TYPE]]++;
_proc_type_table[socket_id][ALL_PROC]++;
_proc_type_table[socket_id][PROC_NUMA_NODE_ID] = (_proc_type_table[socket_id][PROC_NUMA_NODE_ID] == -1)
? socket_id
: _proc_type_table[socket_id][PROC_NUMA_NODE_ID];
_proc_type_table[socket_id][PROC_SOCKET_ID] = (_proc_type_table[socket_id][PROC_SOCKET_ID] == -1)
? socket_id
: _proc_type_table[socket_id][PROC_SOCKET_ID];
pro_all_table[_cpu_mapping_table[cur_id][CPU_MAP_CORE_TYPE]]++;
pro_all_table[ALL_PROC]++;
}
@ -389,6 +502,7 @@ void get_cpu_mapping_from_cores(const int _processors,
_cpu_mapping_table[cur_id][CPU_MAP_CORE_ID] = big_phys_cores + j;
_cpu_mapping_table[cur_id][CPU_MAP_CORE_TYPE] = EFFICIENT_CORE_PROC;
_cpu_mapping_table[cur_id][CPU_MAP_GROUP_ID] = big_phys_cores + j / 4;
_cpu_mapping_table[cur_id][CPU_MAP_NUMA_NODE_ID] = 0;
_cpu_mapping_table[cur_id][CPU_MAP_SOCKET_ID] = 0;
_proc_type_table[0][_cpu_mapping_table[cur_id][CPU_MAP_CORE_TYPE]]++;
@ -403,7 +517,9 @@ void get_cpu_mapping_from_cores(const int _processors,
}
void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_info_table,
const std::vector<std::string> node_info_table,
int& _processors,
int& _numa_nodes,
int& _sockets,
int& _cores,
std::vector<std::vector<int>>& _proc_type_table,
@ -413,6 +529,7 @@ void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_in
bool ht_enabled = false;
_processors = system_info_table.size();
_numa_nodes = 0;
_sockets = 0;
_cores = 0;
_cpu_mapping_table.resize(_processors, std::vector<int>(CPU_MAP_TABLE_SIZE, -1));
@ -432,19 +549,21 @@ void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_in
if (((endpos1 = system_info_table[n][0].find(',', pos)) != std::string::npos) ||
((endpos2 = system_info_table[n][0].find('-', pos)) != std::string::npos)) {
endpos1 = (endpos1 != std::string::npos) ? endpos1 : endpos2;
sub_str = system_info_table[n][0].substr(pos, endpos1);
sub_str = system_info_table[n][0].substr(pos, endpos1 - pos);
core_1 = std::stoi(sub_str);
sub_str = system_info_table[n][0].substr(endpos1 + 1);
core_2 = std::stoi(sub_str);
_cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1;
_cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]);
_cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
_cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores;
_cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = HYPER_THREADING_PROC;
_cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = _cores;
_cpu_mapping_table[core_2][CPU_MAP_PROCESSOR_ID] = core_2;
_cpu_mapping_table[core_2][CPU_MAP_SOCKET_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
_cpu_mapping_table[core_2][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
_cpu_mapping_table[core_2][CPU_MAP_CORE_ID] = _cpu_mapping_table[core_1][CPU_MAP_CORE_ID];
_cpu_mapping_table[core_2][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC;
_cpu_mapping_table[core_2][CPU_MAP_GROUP_ID] = _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID];
@ -452,12 +571,12 @@ void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_in
ht_enabled = true;
int core_freq = std::stoi(system_info_table[core_1][2]);
freq_max = std::max(core_freq, freq_max);
} else if (system_info_table[n][0].size() > 0) {
core_1 = std::stoi(system_info_table[n][0]);
_cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1;
_cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]);
_cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
_cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores;
int core_freq = std::stoi(system_info_table[core_1][2]);
@ -476,28 +595,40 @@ void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_in
}
}
if ((_sockets >= 1) && (ecore_enabled)) {
_sockets = 0;
}
_sockets = (_sockets > 0) ? _sockets + 1 : 1;
if (_sockets >= 1) {
_proc_type_table.resize(_sockets + 2, std::vector<int>(PROC_TYPE_TABLE_SIZE, 0));
if (node_info_table.size() == 0) {
if ((_sockets > 1) && (ecore_enabled)) {
_sockets = 1; // This is the WA of the developing platform without CPU cache and numa node information.
// Wrong socket information creates each socket ID per CPU core.
}
if (_sockets > 1) {
_proc_type_table.resize(_sockets + 1, std::vector<int>({0, 0, 0, 0, -1, -1}));
for (int n = 0; n < _processors; n++) {
_proc_type_table[0][ALL_PROC]++;
_proc_type_table[_cpu_mapping_table[n][CPU_MAP_SOCKET_ID] + 1][ALL_PROC]++;
_proc_type_table[0][_cpu_mapping_table[n][CPU_MAP_CORE_TYPE]]++;
_proc_type_table[_cpu_mapping_table[n][CPU_MAP_SOCKET_ID] + 1][_cpu_mapping_table[n][CPU_MAP_CORE_TYPE]]++;
_proc_type_table[_cpu_mapping_table[n][CPU_MAP_SOCKET_ID] + 1]
[_cpu_mapping_table[n][CPU_MAP_CORE_TYPE]]++;
}
_sockets++;
for (int n = 0; n < _sockets; n++) {
_proc_type_table[n + 1][PROC_NUMA_NODE_ID] = n;
_proc_type_table[n + 1][PROC_SOCKET_ID] = n;
};
} else {
_proc_type_table.resize(1, std::vector<int>(PROC_TYPE_TABLE_SIZE, 0));
_proc_type_table.resize(1, std::vector<int>({0, 0, 0, 0, 0, 0}));
for (int n = 0; n < _processors; n++) {
_proc_type_table[0][ALL_PROC]++;
_proc_type_table[0][_cpu_mapping_table[n][CPU_MAP_CORE_TYPE]]++;
_cpu_mapping_table[n][CPU_MAP_NUMA_NODE_ID] = 0;
_cpu_mapping_table[n][CPU_MAP_SOCKET_ID] = 0;
}
_sockets = 1;
}
_numa_nodes = _sockets;
} else {
_numa_nodes = node_info_table.size();
parse_node_info_linux(node_info_table, _numa_nodes, _sockets, _proc_type_table, _cpu_mapping_table);
}
};
@ -507,7 +638,7 @@ void update_valid_processor_linux(const std::vector<int> phy_core_list,
std::vector<std::vector<int>>& _proc_type_table,
std::vector<std::vector<int>>& _cpu_mapping_table) {
for (auto& row : _proc_type_table) {
std::fill(row.begin(), row.end(), 0);
std::fill(row.begin(), row.begin() + PROC_NUMA_NODE_ID, 0);
}
_cores = 0;
for (auto& row : _cpu_mapping_table) {
@ -540,7 +671,7 @@ void update_valid_processor_linux(const std::vector<int> phy_core_list,
}
if ((_proc_type_table.size() > 1) && (_proc_type_table[0][ALL_PROC] == _proc_type_table[1][ALL_PROC])) {
_proc_type_table.pop_back();
_proc_type_table.erase(_proc_type_table.begin());
}
}
_sockets = _proc_type_table.size() == 1 ? 1 : _proc_type_table.size() - 1;

View File

@ -8,7 +8,7 @@
#include "dev/threading/parallel_custom_arena.hpp"
#include "openvino/runtime/system_conf.hpp"
#include "streams_executor.hpp"
#include "os/cpu_map_info.hpp"
namespace ov {

View File

@ -13,7 +13,7 @@
#include "dev/threading/parallel_custom_arena.hpp"
#include "openvino/runtime/system_conf.hpp"
#include "streams_executor.hpp"
#include "os/cpu_map_info.hpp"
namespace ov {
@ -34,6 +34,7 @@ CPU::CPU() {
len,
_processors,
_numa_nodes,
_sockets,
_cores,
_proc_type_table,
_cpu_mapping_table);
@ -42,6 +43,7 @@ CPU::CPU() {
void parse_processor_info_win(const char* base_ptr,
const unsigned long len,
int& _processors,
int& _numa_nodes,
int& _sockets,
int& _cores,
std::vector<std::vector<int>>& _proc_type_table,
@ -49,7 +51,7 @@ void parse_processor_info_win(const char* base_ptr,
std::vector<int> list;
std::vector<int> proc_info;
std::vector<int> proc_init_line(PROC_TYPE_TABLE_SIZE, 0);
std::vector<int> proc_init_line({0, 0, 0, 0, -1, -1});
std::vector<int> cpu_init_line(CPU_MAP_TABLE_SIZE, -1);
char* info_ptr = (char*)base_ptr;
@ -107,6 +109,7 @@ void parse_processor_info_win(const char* base_ptr,
if (2 == list_len) {
proc_info = cpu_init_line;
proc_info[CPU_MAP_PROCESSOR_ID] = list[0] + base_proc;
proc_info[CPU_MAP_NUMA_NODE_ID] = _sockets;
proc_info[CPU_MAP_SOCKET_ID] = _sockets;
proc_info[CPU_MAP_CORE_ID] = _cores;
proc_info[CPU_MAP_CORE_TYPE] = HYPER_THREADING_PROC;
@ -115,6 +118,7 @@ void parse_processor_info_win(const char* base_ptr,
proc_info = cpu_init_line;
proc_info[CPU_MAP_PROCESSOR_ID] = list[1] + base_proc;
proc_info[CPU_MAP_NUMA_NODE_ID] = _sockets;
proc_info[CPU_MAP_SOCKET_ID] = _sockets;
proc_info[CPU_MAP_CORE_ID] = _cores;
proc_info[CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC;
@ -128,6 +132,7 @@ void parse_processor_info_win(const char* base_ptr,
} else {
proc_info = cpu_init_line;
proc_info[CPU_MAP_PROCESSOR_ID] = list[0] + base_proc;
proc_info[CPU_MAP_NUMA_NODE_ID] = _sockets;
proc_info[CPU_MAP_SOCKET_ID] = _sockets;
proc_info[CPU_MAP_CORE_ID] = _cores;
if ((_processors > group_start) && (_processors <= group_end)) {
@ -185,11 +190,17 @@ void parse_processor_info_win(const char* base_ptr,
_proc_type_table[0] = proc_init_line;
for (int m = 1; m <= _sockets; m++) {
for (int n = 0; n < PROC_TYPE_TABLE_SIZE; n++) {
for (int n = 0; n <= HYPER_THREADING_PROC; n++) {
_proc_type_table[0][n] += _proc_type_table[m][n];
}
_proc_type_table[m][PROC_SOCKET_ID] = m - 1;
_proc_type_table[m][PROC_NUMA_NODE_ID] = m - 1;
}
} else {
_proc_type_table[0][PROC_SOCKET_ID] = 0;
_proc_type_table[0][PROC_NUMA_NODE_ID] = 0;
}
_numa_nodes = _sockets;
}
int get_number_of_cpu_cores(bool bigCoresOnly) {

View File

@ -16,7 +16,7 @@
#include "dev/threading/parallel_custom_arena.hpp"
#include "ie_common.h"
#include "openvino/core/visibility.hpp"
#include "streams_executor.hpp"
#include "os/cpu_map_info.hpp"
#include "threading/ie_cpu_streams_info.hpp"
#ifdef __APPLE__
@ -341,11 +341,11 @@ void set_cpu_used(const std::vector<int>& cpu_ids, const int used) {
all_table.resize(PROC_TYPE_TABLE_SIZE, 0);
for (int i = 0; i < cpu._processors; i++) {
if (cpu._cpu_mapping_table[i][CPU_MAP_USED_FLAG] < PLUGIN_USED_START &&
cpu._cpu_mapping_table[i][CPU_MAP_SOCKET_ID] >= 0 &&
cpu._cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID] >= 0 &&
cpu._cpu_mapping_table[i][CPU_MAP_CORE_TYPE] >= ALL_PROC) {
cpu._proc_type_table[cpu._cpu_mapping_table[i][CPU_MAP_SOCKET_ID] + start]
cpu._proc_type_table[cpu._cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID] + start]
[cpu._cpu_mapping_table[i][CPU_MAP_CORE_TYPE]]++;
cpu._proc_type_table[cpu._cpu_mapping_table[i][CPU_MAP_SOCKET_ID] + start][ALL_PROC]++;
cpu._proc_type_table[cpu._cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID] + start][ALL_PROC]++;
all_table[cpu._cpu_mapping_table[i][CPU_MAP_CORE_TYPE]]++;
all_table[ALL_PROC]++;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,992 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <common_test_utils/test_common.hpp>
#include "ie_system_conf.h"
#include "os/cpu_map_info.hpp"
using namespace testing;
using namespace ov;
namespace {
#ifdef __linux__
struct LinuxCpuMapTestCase {
int _processors;
int _numa_nodes;
int _sockets;
int _cores;
std::vector<std::vector<int>> _proc_type_table;
std::vector<std::vector<int>> _cpu_mapping_table;
std::vector<std::vector<std::string>> system_info_table;
std::vector<std::string> node_info_table;
};
class LinuxCpuMapFreqParserTests : public CommonTestUtils::TestsCommon,
public testing::WithParamInterface<std::tuple<LinuxCpuMapTestCase>> {
public:
void SetUp() override {
const auto& test_data = std::get<0>(GetParam());
int test_processors = 0;
int test_numa_nodes = 0;
int test_sockets = 0;
int test_cores = 0;
std::vector<std::vector<int>> test_proc_type_table;
std::vector<std::vector<int>> test_cpu_mapping_table;
ov::parse_freq_info_linux(test_data.system_info_table,
test_data.node_info_table,
test_processors,
test_numa_nodes,
test_sockets,
test_cores,
test_proc_type_table,
test_cpu_mapping_table);
ASSERT_EQ(test_data._processors, test_processors);
ASSERT_EQ(test_data._numa_nodes, test_numa_nodes);
ASSERT_EQ(test_data._sockets, test_sockets);
ASSERT_EQ(test_data._cores, test_cores);
ASSERT_EQ(test_data._proc_type_table, test_proc_type_table);
ASSERT_EQ(test_data._cpu_mapping_table, test_cpu_mapping_table);
}
};
LinuxCpuMapTestCase freq_2sockets_112cores_hyperthreading = {
224, // param[expected out]: total 224 logcial processors on this simulated platform
2, // param[expected out]: total 2 numa nodes on this simulated platform
2, // param[expected out]: total 2 sockets on this simulated platform
112, // param[expected out]: total 112 CPU cores on this simulated platform
{{224, 112, 0, 112, -1, -1},
{112, 56, 0, 56, 0, 0},
{112, 56, 0, 56, 1, 1}}, // param[expected out]: The proc_type_table of this simulated platform
{
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1},
{10, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {11, 0, 0, 11, HYPER_THREADING_PROC, 11, -1},
{12, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {13, 0, 0, 13, HYPER_THREADING_PROC, 13, -1},
{14, 0, 0, 14, HYPER_THREADING_PROC, 14, -1}, {15, 0, 0, 15, HYPER_THREADING_PROC, 15, -1},
{16, 0, 0, 16, HYPER_THREADING_PROC, 16, -1}, {17, 0, 0, 17, HYPER_THREADING_PROC, 17, -1},
{18, 0, 0, 18, HYPER_THREADING_PROC, 18, -1}, {19, 0, 0, 19, HYPER_THREADING_PROC, 19, -1},
{20, 0, 0, 20, HYPER_THREADING_PROC, 20, -1}, {21, 0, 0, 21, HYPER_THREADING_PROC, 21, -1},
{22, 0, 0, 22, HYPER_THREADING_PROC, 22, -1}, {23, 0, 0, 23, HYPER_THREADING_PROC, 23, -1},
{24, 0, 0, 24, HYPER_THREADING_PROC, 24, -1}, {25, 0, 0, 25, HYPER_THREADING_PROC, 25, -1},
{26, 0, 0, 26, HYPER_THREADING_PROC, 26, -1}, {27, 0, 0, 27, HYPER_THREADING_PROC, 27, -1},
{28, 0, 0, 28, HYPER_THREADING_PROC, 28, -1}, {29, 0, 0, 29, HYPER_THREADING_PROC, 29, -1},
{30, 0, 0, 30, HYPER_THREADING_PROC, 30, -1}, {31, 0, 0, 31, HYPER_THREADING_PROC, 31, -1},
{32, 0, 0, 32, HYPER_THREADING_PROC, 32, -1}, {33, 0, 0, 33, HYPER_THREADING_PROC, 33, -1},
{34, 0, 0, 34, HYPER_THREADING_PROC, 34, -1}, {35, 0, 0, 35, HYPER_THREADING_PROC, 35, -1},
{36, 0, 0, 36, HYPER_THREADING_PROC, 36, -1}, {37, 0, 0, 37, HYPER_THREADING_PROC, 37, -1},
{38, 0, 0, 38, HYPER_THREADING_PROC, 38, -1}, {39, 0, 0, 39, HYPER_THREADING_PROC, 39, -1},
{40, 0, 0, 40, HYPER_THREADING_PROC, 40, -1}, {41, 0, 0, 41, HYPER_THREADING_PROC, 41, -1},
{42, 0, 0, 42, HYPER_THREADING_PROC, 42, -1}, {43, 0, 0, 43, HYPER_THREADING_PROC, 43, -1},
{44, 0, 0, 44, HYPER_THREADING_PROC, 44, -1}, {45, 0, 0, 45, HYPER_THREADING_PROC, 45, -1},
{46, 0, 0, 46, HYPER_THREADING_PROC, 46, -1}, {47, 0, 0, 47, HYPER_THREADING_PROC, 47, -1},
{48, 0, 0, 48, HYPER_THREADING_PROC, 48, -1}, {49, 0, 0, 49, HYPER_THREADING_PROC, 49, -1},
{50, 0, 0, 50, HYPER_THREADING_PROC, 50, -1}, {51, 0, 0, 51, HYPER_THREADING_PROC, 51, -1},
{52, 0, 0, 52, HYPER_THREADING_PROC, 52, -1}, {53, 0, 0, 53, HYPER_THREADING_PROC, 53, -1},
{54, 0, 0, 54, HYPER_THREADING_PROC, 54, -1}, {55, 0, 0, 55, HYPER_THREADING_PROC, 55, -1},
{56, 1, 1, 56, HYPER_THREADING_PROC, 56, -1}, {57, 1, 1, 57, HYPER_THREADING_PROC, 57, -1},
{58, 1, 1, 58, HYPER_THREADING_PROC, 58, -1}, {59, 1, 1, 59, HYPER_THREADING_PROC, 59, -1},
{60, 1, 1, 60, HYPER_THREADING_PROC, 60, -1}, {61, 1, 1, 61, HYPER_THREADING_PROC, 61, -1},
{62, 1, 1, 62, HYPER_THREADING_PROC, 62, -1}, {63, 1, 1, 63, HYPER_THREADING_PROC, 63, -1},
{64, 1, 1, 64, HYPER_THREADING_PROC, 64, -1}, {65, 1, 1, 65, HYPER_THREADING_PROC, 65, -1},
{66, 1, 1, 66, HYPER_THREADING_PROC, 66, -1}, {67, 1, 1, 67, HYPER_THREADING_PROC, 67, -1},
{68, 1, 1, 68, HYPER_THREADING_PROC, 68, -1}, {69, 1, 1, 69, HYPER_THREADING_PROC, 69, -1},
{70, 1, 1, 70, HYPER_THREADING_PROC, 70, -1}, {71, 1, 1, 71, HYPER_THREADING_PROC, 71, -1},
{72, 1, 1, 72, HYPER_THREADING_PROC, 72, -1}, {73, 1, 1, 73, HYPER_THREADING_PROC, 73, -1},
{74, 1, 1, 74, HYPER_THREADING_PROC, 74, -1}, {75, 1, 1, 75, HYPER_THREADING_PROC, 75, -1},
{76, 1, 1, 76, HYPER_THREADING_PROC, 76, -1}, {77, 1, 1, 77, HYPER_THREADING_PROC, 77, -1},
{78, 1, 1, 78, HYPER_THREADING_PROC, 78, -1}, {79, 1, 1, 79, HYPER_THREADING_PROC, 79, -1},
{80, 1, 1, 80, HYPER_THREADING_PROC, 80, -1}, {81, 1, 1, 81, HYPER_THREADING_PROC, 81, -1},
{82, 1, 1, 82, HYPER_THREADING_PROC, 82, -1}, {83, 1, 1, 83, HYPER_THREADING_PROC, 83, -1},
{84, 1, 1, 84, HYPER_THREADING_PROC, 84, -1}, {85, 1, 1, 85, HYPER_THREADING_PROC, 85, -1},
{86, 1, 1, 86, HYPER_THREADING_PROC, 86, -1}, {87, 1, 1, 87, HYPER_THREADING_PROC, 87, -1},
{88, 1, 1, 88, HYPER_THREADING_PROC, 88, -1}, {89, 1, 1, 89, HYPER_THREADING_PROC, 89, -1},
{90, 1, 1, 90, HYPER_THREADING_PROC, 90, -1}, {91, 1, 1, 91, HYPER_THREADING_PROC, 91, -1},
{92, 1, 1, 92, HYPER_THREADING_PROC, 92, -1}, {93, 1, 1, 93, HYPER_THREADING_PROC, 93, -1},
{94, 1, 1, 94, HYPER_THREADING_PROC, 94, -1}, {95, 1, 1, 95, HYPER_THREADING_PROC, 95, -1},
{96, 1, 1, 96, HYPER_THREADING_PROC, 96, -1}, {97, 1, 1, 97, HYPER_THREADING_PROC, 97, -1},
{98, 1, 1, 98, HYPER_THREADING_PROC, 98, -1}, {99, 1, 1, 99, HYPER_THREADING_PROC, 99, -1},
{100, 1, 1, 100, HYPER_THREADING_PROC, 100, -1}, {101, 1, 1, 101, HYPER_THREADING_PROC, 101, -1},
{102, 1, 1, 102, HYPER_THREADING_PROC, 102, -1}, {103, 1, 1, 103, HYPER_THREADING_PROC, 103, -1},
{104, 1, 1, 104, HYPER_THREADING_PROC, 104, -1}, {105, 1, 1, 105, HYPER_THREADING_PROC, 105, -1},
{106, 1, 1, 106, HYPER_THREADING_PROC, 106, -1}, {107, 1, 1, 107, HYPER_THREADING_PROC, 107, -1},
{108, 1, 1, 108, HYPER_THREADING_PROC, 108, -1}, {109, 1, 1, 109, HYPER_THREADING_PROC, 109, -1},
{110, 1, 1, 110, HYPER_THREADING_PROC, 110, -1}, {111, 1, 1, 111, HYPER_THREADING_PROC, 111, -1},
{112, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {113, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{114, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {115, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{116, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {117, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{118, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {119, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
{120, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {121, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
{122, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {123, 0, 0, 11, MAIN_CORE_PROC, 11, -1},
{124, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, {125, 0, 0, 13, MAIN_CORE_PROC, 13, -1},
{126, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, {127, 0, 0, 15, MAIN_CORE_PROC, 15, -1},
{128, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, {129, 0, 0, 17, MAIN_CORE_PROC, 17, -1},
{130, 0, 0, 18, MAIN_CORE_PROC, 18, -1}, {131, 0, 0, 19, MAIN_CORE_PROC, 19, -1},
{132, 0, 0, 20, MAIN_CORE_PROC, 20, -1}, {133, 0, 0, 21, MAIN_CORE_PROC, 21, -1},
{134, 0, 0, 22, MAIN_CORE_PROC, 22, -1}, {135, 0, 0, 23, MAIN_CORE_PROC, 23, -1},
{136, 0, 0, 24, MAIN_CORE_PROC, 24, -1}, {137, 0, 0, 25, MAIN_CORE_PROC, 25, -1},
{138, 0, 0, 26, MAIN_CORE_PROC, 26, -1}, {139, 0, 0, 27, MAIN_CORE_PROC, 27, -1},
{140, 0, 0, 28, MAIN_CORE_PROC, 28, -1}, {141, 0, 0, 29, MAIN_CORE_PROC, 29, -1},
{142, 0, 0, 30, MAIN_CORE_PROC, 30, -1}, {143, 0, 0, 31, MAIN_CORE_PROC, 31, -1},
{144, 0, 0, 32, MAIN_CORE_PROC, 32, -1}, {145, 0, 0, 33, MAIN_CORE_PROC, 33, -1},
{146, 0, 0, 34, MAIN_CORE_PROC, 34, -1}, {147, 0, 0, 35, MAIN_CORE_PROC, 35, -1},
{148, 0, 0, 36, MAIN_CORE_PROC, 36, -1}, {149, 0, 0, 37, MAIN_CORE_PROC, 37, -1},
{150, 0, 0, 38, MAIN_CORE_PROC, 38, -1}, {151, 0, 0, 39, MAIN_CORE_PROC, 39, -1},
{152, 0, 0, 40, MAIN_CORE_PROC, 40, -1}, {153, 0, 0, 41, MAIN_CORE_PROC, 41, -1},
{154, 0, 0, 42, MAIN_CORE_PROC, 42, -1}, {155, 0, 0, 43, MAIN_CORE_PROC, 43, -1},
{156, 0, 0, 44, MAIN_CORE_PROC, 44, -1}, {157, 0, 0, 45, MAIN_CORE_PROC, 45, -1},
{158, 0, 0, 46, MAIN_CORE_PROC, 46, -1}, {159, 0, 0, 47, MAIN_CORE_PROC, 47, -1},
{160, 0, 0, 48, MAIN_CORE_PROC, 48, -1}, {161, 0, 0, 49, MAIN_CORE_PROC, 49, -1},
{162, 0, 0, 50, MAIN_CORE_PROC, 50, -1}, {163, 0, 0, 51, MAIN_CORE_PROC, 51, -1},
{164, 0, 0, 52, MAIN_CORE_PROC, 52, -1}, {165, 0, 0, 53, MAIN_CORE_PROC, 53, -1},
{166, 0, 0, 54, MAIN_CORE_PROC, 54, -1}, {167, 0, 0, 55, MAIN_CORE_PROC, 55, -1},
{168, 1, 1, 56, MAIN_CORE_PROC, 56, -1}, {169, 1, 1, 57, MAIN_CORE_PROC, 57, -1},
{170, 1, 1, 58, MAIN_CORE_PROC, 58, -1}, {171, 1, 1, 59, MAIN_CORE_PROC, 59, -1},
{172, 1, 1, 60, MAIN_CORE_PROC, 60, -1}, {173, 1, 1, 61, MAIN_CORE_PROC, 61, -1},
{174, 1, 1, 62, MAIN_CORE_PROC, 62, -1}, {175, 1, 1, 63, MAIN_CORE_PROC, 63, -1},
{176, 1, 1, 64, MAIN_CORE_PROC, 64, -1}, {177, 1, 1, 65, MAIN_CORE_PROC, 65, -1},
{178, 1, 1, 66, MAIN_CORE_PROC, 66, -1}, {179, 1, 1, 67, MAIN_CORE_PROC, 67, -1},
{180, 1, 1, 68, MAIN_CORE_PROC, 68, -1}, {181, 1, 1, 69, MAIN_CORE_PROC, 69, -1},
{182, 1, 1, 70, MAIN_CORE_PROC, 70, -1}, {183, 1, 1, 71, MAIN_CORE_PROC, 71, -1},
{184, 1, 1, 72, MAIN_CORE_PROC, 72, -1}, {185, 1, 1, 73, MAIN_CORE_PROC, 73, -1},
{186, 1, 1, 74, MAIN_CORE_PROC, 74, -1}, {187, 1, 1, 75, MAIN_CORE_PROC, 75, -1},
{188, 1, 1, 76, MAIN_CORE_PROC, 76, -1}, {189, 1, 1, 77, MAIN_CORE_PROC, 77, -1},
{190, 1, 1, 78, MAIN_CORE_PROC, 78, -1}, {191, 1, 1, 79, MAIN_CORE_PROC, 79, -1},
{192, 1, 1, 80, MAIN_CORE_PROC, 80, -1}, {193, 1, 1, 81, MAIN_CORE_PROC, 81, -1},
{194, 1, 1, 82, MAIN_CORE_PROC, 82, -1}, {195, 1, 1, 83, MAIN_CORE_PROC, 83, -1},
{196, 1, 1, 84, MAIN_CORE_PROC, 84, -1}, {197, 1, 1, 85, MAIN_CORE_PROC, 85, -1},
{198, 1, 1, 86, MAIN_CORE_PROC, 86, -1}, {199, 1, 1, 87, MAIN_CORE_PROC, 87, -1},
{200, 1, 1, 88, MAIN_CORE_PROC, 88, -1}, {201, 1, 1, 89, MAIN_CORE_PROC, 89, -1},
{202, 1, 1, 90, MAIN_CORE_PROC, 90, -1}, {203, 1, 1, 91, MAIN_CORE_PROC, 91, -1},
{204, 1, 1, 92, MAIN_CORE_PROC, 92, -1}, {205, 1, 1, 93, MAIN_CORE_PROC, 93, -1},
{206, 1, 1, 94, MAIN_CORE_PROC, 94, -1}, {207, 1, 1, 95, MAIN_CORE_PROC, 95, -1},
{208, 1, 1, 96, MAIN_CORE_PROC, 96, -1}, {209, 1, 1, 97, MAIN_CORE_PROC, 97, -1},
{210, 1, 1, 98, MAIN_CORE_PROC, 98, -1}, {211, 1, 1, 99, MAIN_CORE_PROC, 99, -1},
{212, 1, 1, 100, MAIN_CORE_PROC, 100, -1}, {213, 1, 1, 101, MAIN_CORE_PROC, 101, -1},
{214, 1, 1, 102, MAIN_CORE_PROC, 102, -1}, {215, 1, 1, 103, MAIN_CORE_PROC, 103, -1},
{216, 1, 1, 104, MAIN_CORE_PROC, 104, -1}, {217, 1, 1, 105, MAIN_CORE_PROC, 105, -1},
{218, 1, 1, 106, MAIN_CORE_PROC, 106, -1}, {219, 1, 1, 107, MAIN_CORE_PROC, 107, -1},
{220, 1, 1, 108, MAIN_CORE_PROC, 108, -1}, {221, 1, 1, 109, MAIN_CORE_PROC, 109, -1},
{222, 1, 1, 110, MAIN_CORE_PROC, 110, -1}, {223, 1, 1, 111, MAIN_CORE_PROC, 111, -1},
}, // param[expected out]: The cpu_mapping_table of this simulated platform
{
{"0,112", "0", "2001000"}, {"1,113", "0", "2001000"}, {"2,114", "0", "2001000"},
{"3,115", "0", "2001000"}, {"4,116", "0", "2001000"}, {"5,117", "0", "2001000"},
{"6,118", "0", "2001000"}, {"7,119", "0", "2001000"}, {"8,120", "0", "2001000"},
{"9,121", "0", "2001000"}, {"10,122", "0", "2001000"}, {"11,123", "0", "2001000"},
{"12,124", "0", "2001000"}, {"13,125", "0", "2001000"}, {"14,126", "0", "2001000"},
{"15,127", "0", "2001000"}, {"16,128", "0", "2001000"}, {"17,129", "0", "2001000"},
{"18,130", "0", "2001000"}, {"19,131", "0", "2001000"}, {"20,132", "0", "2001000"},
{"21,133", "0", "2001000"}, {"22,134", "0", "2001000"}, {"23,135", "0", "2001000"},
{"24,136", "0", "2001000"}, {"25,137", "0", "2001000"}, {"26,138", "0", "2001000"},
{"27,139", "0", "2001000"}, {"28,140", "0", "2001000"}, {"29,141", "0", "2001000"},
{"30,142", "0", "2001000"}, {"31,143", "0", "2001000"}, {"32,144", "0", "2001000"},
{"33,145", "0", "2001000"}, {"34,146", "0", "2001000"}, {"35,147", "0", "2001000"},
{"36,148", "0", "2001000"}, {"37,149", "0", "2001000"}, {"38,150", "0", "2001000"},
{"39,151", "0", "2001000"}, {"40,152", "0", "2001000"}, {"41,153", "0", "2001000"},
{"42,154", "0", "2001000"}, {"43,155", "0", "2001000"}, {"44,156", "0", "2001000"},
{"45,157", "0", "2001000"}, {"46,158", "0", "2001000"}, {"47,159", "0", "2001000"},
{"48,160", "0", "2001000"}, {"49,161", "0", "2001000"}, {"50,162", "0", "2001000"},
{"51,163", "0", "2001000"}, {"52,164", "0", "2001000"}, {"53,165", "0", "2001000"},
{"54,166", "0", "2001000"}, {"55,167", "0", "2001000"}, {"56,168", "1", "2001000"},
{"57,169", "1", "2001000"}, {"58,170", "1", "2001000"}, {"59,171", "1", "2001000"},
{"60,172", "1", "2001000"}, {"61,173", "1", "2001000"}, {"62,174", "1", "2001000"},
{"63,175", "1", "2001000"}, {"64,176", "1", "2001000"}, {"65,177", "1", "2001000"},
{"66,178", "1", "2001000"}, {"67,179", "1", "2001000"}, {"68,180", "1", "2001000"},
{"69,181", "1", "2001000"}, {"70,182", "1", "2001000"}, {"71,183", "1", "2001000"},
{"72,184", "1", "2001000"}, {"73,185", "1", "2001000"}, {"74,186", "1", "2001000"},
{"75,187", "1", "2001000"}, {"76,188", "1", "2001000"}, {"77,189", "1", "2001000"},
{"78,190", "1", "2001000"}, {"79,191", "1", "2001000"}, {"80,192", "1", "2001000"},
{"81,193", "1", "2001000"}, {"82,194", "1", "2001000"}, {"83,195", "1", "2001000"},
{"84,196", "1", "2001000"}, {"85,197", "1", "2001000"}, {"86,198", "1", "2001000"},
{"87,199", "1", "2001000"}, {"88,200", "1", "2001000"}, {"89,201", "1", "2001000"},
{"90,202", "1", "2001000"}, {"91,203", "1", "2001000"}, {"92,204", "1", "2001000"},
{"93,205", "1", "2001000"}, {"94,206", "1", "2001000"}, {"95,207", "1", "2001000"},
{"96,208", "1", "2001000"}, {"97,209", "1", "2001000"}, {"98,210", "1", "2001000"},
{"99,211", "1", "2001000"}, {"100,212", "1", "2001000"}, {"101,213", "1", "2001000"},
{"102,214", "1", "2001000"}, {"103,215", "1", "2001000"}, {"104,216", "1", "2001000"},
{"105,217", "1", "2001000"}, {"106,218", "1", "2001000"}, {"107,219", "1", "2001000"},
{"108,220", "1", "2001000"}, {"109,221", "1", "2001000"}, {"110,222", "1", "2001000"},
{"111,223", "1", "2001000"}, {"0,112", "0", "2001000"}, {"1,113", "0", "2001000"},
{"2,114", "0", "2001000"}, {"3,115", "0", "2001000"}, {"4,116", "0", "2001000"},
{"5,117", "0", "2001000"}, {"6,118", "0", "2001000"}, {"7,119", "0", "2001000"},
{"8,120", "0", "2001000"}, {"9,121", "0", "2001000"}, {"10,122", "0", "2001000"},
{"11,123", "0", "2001000"}, {"12,124", "0", "2001000"}, {"13,125", "0", "2001000"},
{"14,126", "0", "2001000"}, {"15,127", "0", "2001000"}, {"16,128", "0", "2001000"},
{"17,129", "0", "2001000"}, {"18,130", "0", "2001000"}, {"19,131", "0", "2001000"},
{"20,132", "0", "2001000"}, {"21,133", "0", "2001000"}, {"22,134", "0", "2001000"},
{"23,135", "0", "2001000"}, {"24,136", "0", "2001000"}, {"25,137", "0", "2001000"},
{"26,138", "0", "2001000"}, {"27,139", "0", "2001000"}, {"28,140", "0", "2001000"},
{"29,141", "0", "2001000"}, {"30,142", "0", "2001000"}, {"31,143", "0", "2001000"},
{"32,144", "0", "2001000"}, {"33,145", "0", "2001000"}, {"34,146", "0", "2001000"},
{"35,147", "0", "2001000"}, {"36,148", "0", "2001000"}, {"37,149", "0", "2001000"},
{"38,150", "0", "2001000"}, {"39,151", "0", "2001000"}, {"40,152", "0", "2001000"},
{"41,153", "0", "2001000"}, {"42,154", "0", "2001000"}, {"43,155", "0", "2001000"},
{"44,156", "0", "2001000"}, {"45,157", "0", "2001000"}, {"46,158", "0", "2001000"},
{"47,159", "0", "2001000"}, {"48,160", "0", "2001000"}, {"49,161", "0", "2001000"},
{"50,162", "0", "2001000"}, {"51,163", "0", "2001000"}, {"52,164", "0", "2001000"},
{"53,165", "0", "2001000"}, {"54,166", "0", "2001000"}, {"55,167", "0", "2001000"},
{"56,168", "1", "2001000"}, {"57,169", "1", "2001000"}, {"58,170", "1", "2001000"},
{"59,171", "1", "2001000"}, {"60,172", "1", "2001000"}, {"61,173", "1", "2001000"},
{"62,174", "1", "2001000"}, {"63,175", "1", "2001000"}, {"64,176", "1", "2001000"},
{"65,177", "1", "2001000"}, {"66,178", "1", "2001000"}, {"67,179", "1", "2001000"},
{"68,180", "1", "2001000"}, {"69,181", "1", "2001000"}, {"70,182", "1", "2001000"},
{"71,183", "1", "2001000"}, {"72,184", "1", "2001000"}, {"73,185", "1", "2001000"},
{"74,186", "1", "2001000"}, {"75,187", "1", "2001000"}, {"76,188", "1", "2001000"},
{"77,189", "1", "2001000"}, {"78,190", "1", "2001000"}, {"79,191", "1", "2001000"},
{"80,192", "1", "2001000"}, {"81,193", "1", "2001000"}, {"82,194", "1", "2001000"},
{"83,195", "1", "2001000"}, {"84,196", "1", "2001000"}, {"85,197", "1", "2001000"},
{"86,198", "1", "2001000"}, {"87,199", "1", "2001000"}, {"88,200", "1", "2001000"},
{"89,201", "1", "2001000"}, {"90,202", "1", "2001000"}, {"91,203", "1", "2001000"},
{"92,204", "1", "2001000"}, {"93,205", "1", "2001000"}, {"94,206", "1", "2001000"},
{"95,207", "1", "2001000"}, {"96,208", "1", "2001000"}, {"97,209", "1", "2001000"},
{"98,210", "1", "2001000"}, {"99,211", "1", "2001000"}, {"100,212", "1", "2001000"},
{"101,213", "1", "2001000"}, {"102,214", "1", "2001000"}, {"103,215", "1", "2001000"},
{"104,216", "1", "2001000"}, {"105,217", "1", "2001000"}, {"106,218", "1", "2001000"},
{"107,219", "1", "2001000"}, {"108,220", "1", "2001000"}, {"109,221", "1", "2001000"},
{"110,222", "1", "2001000"}, {"111,223", "1", "2001000"},
}, // param[in]: The CPU frequency information table of this simulated platform
{{"0-55,112-167"}, {"56-111,168-223"}}, // param[in]: The numa node information table of this simulated platform
};
LinuxCpuMapTestCase freq_2sockets_48cores_hyperthreading = {
96,
2,
2,
48,
{{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}},
{
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1},
{10, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {11, 0, 0, 11, HYPER_THREADING_PROC, 11, -1},
{12, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {13, 0, 0, 13, HYPER_THREADING_PROC, 13, -1},
{14, 0, 0, 14, HYPER_THREADING_PROC, 14, -1}, {15, 0, 0, 15, HYPER_THREADING_PROC, 15, -1},
{16, 0, 0, 16, HYPER_THREADING_PROC, 16, -1}, {17, 0, 0, 17, HYPER_THREADING_PROC, 17, -1},
{18, 0, 0, 18, HYPER_THREADING_PROC, 18, -1}, {19, 0, 0, 19, HYPER_THREADING_PROC, 19, -1},
{20, 0, 0, 20, HYPER_THREADING_PROC, 20, -1}, {21, 0, 0, 21, HYPER_THREADING_PROC, 21, -1},
{22, 0, 0, 22, HYPER_THREADING_PROC, 22, -1}, {23, 0, 0, 23, HYPER_THREADING_PROC, 23, -1},
{24, 1, 1, 24, HYPER_THREADING_PROC, 24, -1}, {25, 1, 1, 25, HYPER_THREADING_PROC, 25, -1},
{26, 1, 1, 26, HYPER_THREADING_PROC, 26, -1}, {27, 1, 1, 27, HYPER_THREADING_PROC, 27, -1},
{28, 1, 1, 28, HYPER_THREADING_PROC, 28, -1}, {29, 1, 1, 29, HYPER_THREADING_PROC, 29, -1},
{30, 1, 1, 30, HYPER_THREADING_PROC, 30, -1}, {31, 1, 1, 31, HYPER_THREADING_PROC, 31, -1},
{32, 1, 1, 32, HYPER_THREADING_PROC, 32, -1}, {33, 1, 1, 33, HYPER_THREADING_PROC, 33, -1},
{34, 1, 1, 34, HYPER_THREADING_PROC, 34, -1}, {35, 1, 1, 35, HYPER_THREADING_PROC, 35, -1},
{36, 1, 1, 36, HYPER_THREADING_PROC, 36, -1}, {37, 1, 1, 37, HYPER_THREADING_PROC, 37, -1},
{38, 1, 1, 38, HYPER_THREADING_PROC, 38, -1}, {39, 1, 1, 39, HYPER_THREADING_PROC, 39, -1},
{40, 1, 1, 40, HYPER_THREADING_PROC, 40, -1}, {41, 1, 1, 41, HYPER_THREADING_PROC, 41, -1},
{42, 1, 1, 42, HYPER_THREADING_PROC, 42, -1}, {43, 1, 1, 43, HYPER_THREADING_PROC, 43, -1},
{44, 1, 1, 44, HYPER_THREADING_PROC, 44, -1}, {45, 1, 1, 45, HYPER_THREADING_PROC, 45, -1},
{46, 1, 1, 46, HYPER_THREADING_PROC, 46, -1}, {47, 1, 1, 47, HYPER_THREADING_PROC, 47, -1},
{48, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {49, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{50, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {51, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{52, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {53, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{54, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {55, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
{56, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {57, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
{58, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {59, 0, 0, 11, MAIN_CORE_PROC, 11, -1},
{60, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, {61, 0, 0, 13, MAIN_CORE_PROC, 13, -1},
{62, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, {63, 0, 0, 15, MAIN_CORE_PROC, 15, -1},
{64, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, {65, 0, 0, 17, MAIN_CORE_PROC, 17, -1},
{66, 0, 0, 18, MAIN_CORE_PROC, 18, -1}, {67, 0, 0, 19, MAIN_CORE_PROC, 19, -1},
{68, 0, 0, 20, MAIN_CORE_PROC, 20, -1}, {69, 0, 0, 21, MAIN_CORE_PROC, 21, -1},
{70, 0, 0, 22, MAIN_CORE_PROC, 22, -1}, {71, 0, 0, 23, MAIN_CORE_PROC, 23, -1},
{72, 1, 1, 24, MAIN_CORE_PROC, 24, -1}, {73, 1, 1, 25, MAIN_CORE_PROC, 25, -1},
{74, 1, 1, 26, MAIN_CORE_PROC, 26, -1}, {75, 1, 1, 27, MAIN_CORE_PROC, 27, -1},
{76, 1, 1, 28, MAIN_CORE_PROC, 28, -1}, {77, 1, 1, 29, MAIN_CORE_PROC, 29, -1},
{78, 1, 1, 30, MAIN_CORE_PROC, 30, -1}, {79, 1, 1, 31, MAIN_CORE_PROC, 31, -1},
{80, 1, 1, 32, MAIN_CORE_PROC, 32, -1}, {81, 1, 1, 33, MAIN_CORE_PROC, 33, -1},
{82, 1, 1, 34, MAIN_CORE_PROC, 34, -1}, {83, 1, 1, 35, MAIN_CORE_PROC, 35, -1},
{84, 1, 1, 36, MAIN_CORE_PROC, 36, -1}, {85, 1, 1, 37, MAIN_CORE_PROC, 37, -1},
{86, 1, 1, 38, MAIN_CORE_PROC, 38, -1}, {87, 1, 1, 39, MAIN_CORE_PROC, 39, -1},
{88, 1, 1, 40, MAIN_CORE_PROC, 40, -1}, {89, 1, 1, 41, MAIN_CORE_PROC, 41, -1},
{90, 1, 1, 42, MAIN_CORE_PROC, 42, -1}, {91, 1, 1, 43, MAIN_CORE_PROC, 43, -1},
{92, 1, 1, 44, MAIN_CORE_PROC, 44, -1}, {93, 1, 1, 45, MAIN_CORE_PROC, 45, -1},
{94, 1, 1, 46, MAIN_CORE_PROC, 46, -1}, {95, 1, 1, 47, MAIN_CORE_PROC, 47, -1},
},
{
{"0,48", "0", "3600000"}, {"1,49", "0", "3600000"}, {"2,50", "0", "3600000"}, {"3,51", "0", "3600000"},
{"4,52", "0", "3600000"}, {"5,53", "0", "3600000"}, {"6,54", "0", "3600000"}, {"7,55", "0", "3600000"},
{"8,56", "0", "3600000"}, {"9,57", "0", "3600000"}, {"10,58", "0", "3600000"}, {"11,59", "0", "3600000"},
{"12,60", "0", "3600000"}, {"13,61", "0", "3600000"}, {"14,62", "0", "3600000"}, {"15,63", "0", "3600000"},
{"16,64", "0", "3600000"}, {"17,65", "0", "3600000"}, {"18,66", "0", "3600000"}, {"19,67", "0", "3600000"},
{"20,68", "0", "3600000"}, {"21,69", "0", "3600000"}, {"22,70", "0", "3600000"}, {"23,71", "0", "3600000"},
{"24,72", "1", "3600000"}, {"25,73", "1", "3600000"}, {"26,74", "1", "3600000"}, {"27,75", "1", "3600000"},
{"28,76", "1", "3600000"}, {"29,77", "1", "3600000"}, {"30,78", "1", "3600000"}, {"31,79", "1", "3600000"},
{"32,80", "1", "3600000"}, {"33,81", "1", "3600000"}, {"34,82", "1", "3600000"}, {"35,83", "1", "3600000"},
{"36,84", "1", "3600000"}, {"37,85", "1", "3600000"}, {"38,86", "1", "3600000"}, {"39,87", "1", "3600000"},
{"40,88", "1", "3600000"}, {"41,89", "1", "3600000"}, {"42,90", "1", "3600000"}, {"43,91", "1", "3600000"},
{"44,92", "1", "3600000"}, {"45,93", "1", "3600000"}, {"46,94", "1", "3600000"}, {"47,95", "1", "3600000"},
{"0,48", "0", "3600000"}, {"1,49", "0", "3600000"}, {"2,50", "0", "3600000"}, {"3,51", "0", "3600000"},
{"4,52", "0", "3600000"}, {"5,53", "0", "3600000"}, {"6,54", "0", "3600000"}, {"7,55", "0", "3600000"},
{"8,56", "0", "3600000"}, {"9,57", "0", "3600000"}, {"10,58", "0", "3600000"}, {"11,59", "0", "3600000"},
{"12,60", "0", "3600000"}, {"13,61", "0", "3600000"}, {"14,62", "0", "3600000"}, {"15,63", "0", "3600000"},
{"16,64", "0", "3600000"}, {"17,65", "0", "3600000"}, {"18,66", "0", "3600000"}, {"19,67", "0", "3600000"},
{"20,68", "0", "3600000"}, {"21,69", "0", "3600000"}, {"22,70", "0", "3600000"}, {"23,71", "0", "3600000"},
{"24,72", "1", "3600000"}, {"25,73", "1", "3600000"}, {"26,74", "1", "3600000"}, {"27,75", "1", "3600000"},
{"28,76", "1", "3600000"}, {"29,77", "1", "3600000"}, {"30,78", "1", "3600000"}, {"31,79", "1", "3600000"},
{"32,80", "1", "3600000"}, {"33,81", "1", "3600000"}, {"34,82", "1", "3600000"}, {"35,83", "1", "3600000"},
{"36,84", "1", "3600000"}, {"37,85", "1", "3600000"}, {"38,86", "1", "3600000"}, {"39,87", "1", "3600000"},
{"40,88", "1", "3600000"}, {"41,89", "1", "3600000"}, {"42,90", "1", "3600000"}, {"43,91", "1", "3600000"},
{"44,92", "1", "3600000"}, {"45,93", "1", "3600000"}, {"46,94", "1", "3600000"}, {"47,95", "1", "3600000"},
},
{
{"0-23,48-71"},
{"24-47,72-95"},
},
};
LinuxCpuMapTestCase freq_2sockets_48cores_hyperthreading_1 = {
96,
4,
2,
48,
{{96, 48, 0, 48, -1, -1},
{24, 12, 0, 12, 0, 0},
{24, 12, 0, 12, 1, 0},
{24, 12, 0, 12, 2, 1},
{24, 12, 0, 12, 3, 1}},
{
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1},
{10, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {11, 0, 0, 11, HYPER_THREADING_PROC, 11, -1},
{12, 1, 0, 12, HYPER_THREADING_PROC, 12, -1}, {13, 1, 0, 13, HYPER_THREADING_PROC, 13, -1},
{14, 1, 0, 14, HYPER_THREADING_PROC, 14, -1}, {15, 1, 0, 15, HYPER_THREADING_PROC, 15, -1},
{16, 1, 0, 16, HYPER_THREADING_PROC, 16, -1}, {17, 1, 0, 17, HYPER_THREADING_PROC, 17, -1},
{18, 1, 0, 18, HYPER_THREADING_PROC, 18, -1}, {19, 1, 0, 19, HYPER_THREADING_PROC, 19, -1},
{20, 1, 0, 20, HYPER_THREADING_PROC, 20, -1}, {21, 1, 0, 21, HYPER_THREADING_PROC, 21, -1},
{22, 1, 0, 22, HYPER_THREADING_PROC, 22, -1}, {23, 1, 0, 23, HYPER_THREADING_PROC, 23, -1},
{24, 2, 1, 24, HYPER_THREADING_PROC, 24, -1}, {25, 2, 1, 25, HYPER_THREADING_PROC, 25, -1},
{26, 2, 1, 26, HYPER_THREADING_PROC, 26, -1}, {27, 2, 1, 27, HYPER_THREADING_PROC, 27, -1},
{28, 2, 1, 28, HYPER_THREADING_PROC, 28, -1}, {29, 2, 1, 29, HYPER_THREADING_PROC, 29, -1},
{30, 2, 1, 30, HYPER_THREADING_PROC, 30, -1}, {31, 2, 1, 31, HYPER_THREADING_PROC, 31, -1},
{32, 2, 1, 32, HYPER_THREADING_PROC, 32, -1}, {33, 2, 1, 33, HYPER_THREADING_PROC, 33, -1},
{34, 2, 1, 34, HYPER_THREADING_PROC, 34, -1}, {35, 2, 1, 35, HYPER_THREADING_PROC, 35, -1},
{36, 3, 1, 36, HYPER_THREADING_PROC, 36, -1}, {37, 3, 1, 37, HYPER_THREADING_PROC, 37, -1},
{38, 3, 1, 38, HYPER_THREADING_PROC, 38, -1}, {39, 3, 1, 39, HYPER_THREADING_PROC, 39, -1},
{40, 3, 1, 40, HYPER_THREADING_PROC, 40, -1}, {41, 3, 1, 41, HYPER_THREADING_PROC, 41, -1},
{42, 3, 1, 42, HYPER_THREADING_PROC, 42, -1}, {43, 3, 1, 43, HYPER_THREADING_PROC, 43, -1},
{44, 3, 1, 44, HYPER_THREADING_PROC, 44, -1}, {45, 3, 1, 45, HYPER_THREADING_PROC, 45, -1},
{46, 3, 1, 46, HYPER_THREADING_PROC, 46, -1}, {47, 3, 1, 47, HYPER_THREADING_PROC, 47, -1},
{48, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {49, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{50, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {51, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{52, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {53, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{54, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {55, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
{56, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {57, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
{58, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {59, 0, 0, 11, MAIN_CORE_PROC, 11, -1},
{60, 1, 0, 12, MAIN_CORE_PROC, 12, -1}, {61, 1, 0, 13, MAIN_CORE_PROC, 13, -1},
{62, 1, 0, 14, MAIN_CORE_PROC, 14, -1}, {63, 1, 0, 15, MAIN_CORE_PROC, 15, -1},
{64, 1, 0, 16, MAIN_CORE_PROC, 16, -1}, {65, 1, 0, 17, MAIN_CORE_PROC, 17, -1},
{66, 1, 0, 18, MAIN_CORE_PROC, 18, -1}, {67, 1, 0, 19, MAIN_CORE_PROC, 19, -1},
{68, 1, 0, 20, MAIN_CORE_PROC, 20, -1}, {69, 1, 0, 21, MAIN_CORE_PROC, 21, -1},
{70, 1, 0, 22, MAIN_CORE_PROC, 22, -1}, {71, 1, 0, 23, MAIN_CORE_PROC, 23, -1},
{72, 2, 1, 24, MAIN_CORE_PROC, 24, -1}, {73, 2, 1, 25, MAIN_CORE_PROC, 25, -1},
{74, 2, 1, 26, MAIN_CORE_PROC, 26, -1}, {75, 2, 1, 27, MAIN_CORE_PROC, 27, -1},
{76, 2, 1, 28, MAIN_CORE_PROC, 28, -1}, {77, 2, 1, 29, MAIN_CORE_PROC, 29, -1},
{78, 2, 1, 30, MAIN_CORE_PROC, 30, -1}, {79, 2, 1, 31, MAIN_CORE_PROC, 31, -1},
{80, 2, 1, 32, MAIN_CORE_PROC, 32, -1}, {81, 2, 1, 33, MAIN_CORE_PROC, 33, -1},
{82, 2, 1, 34, MAIN_CORE_PROC, 34, -1}, {83, 2, 1, 35, MAIN_CORE_PROC, 35, -1},
{84, 3, 1, 36, MAIN_CORE_PROC, 36, -1}, {85, 3, 1, 37, MAIN_CORE_PROC, 37, -1},
{86, 3, 1, 38, MAIN_CORE_PROC, 38, -1}, {87, 3, 1, 39, MAIN_CORE_PROC, 39, -1},
{88, 3, 1, 40, MAIN_CORE_PROC, 40, -1}, {89, 3, 1, 41, MAIN_CORE_PROC, 41, -1},
{90, 3, 1, 42, MAIN_CORE_PROC, 42, -1}, {91, 3, 1, 43, MAIN_CORE_PROC, 43, -1},
{92, 3, 1, 44, MAIN_CORE_PROC, 44, -1}, {93, 3, 1, 45, MAIN_CORE_PROC, 45, -1},
{94, 3, 1, 46, MAIN_CORE_PROC, 46, -1}, {95, 3, 1, 47, MAIN_CORE_PROC, 47, -1},
},
{
{"0,48", "0", "3600000"}, {"1,49", "0", "3600000"}, {"2,50", "0", "3600000"}, {"3,51", "0", "3600000"},
{"4,52", "0", "3600000"}, {"5,53", "0", "3600000"}, {"6,54", "0", "3600000"}, {"7,55", "0", "3600000"},
{"8,56", "0", "3600000"}, {"9,57", "0", "3600000"}, {"10,58", "0", "3600000"}, {"11,59", "0", "3600000"},
{"12,60", "0", "3600000"}, {"13,61", "0", "3600000"}, {"14,62", "0", "3600000"}, {"15,63", "0", "3600000"},
{"16,64", "0", "3600000"}, {"17,65", "0", "3600000"}, {"18,66", "0", "3600000"}, {"19,67", "0", "3600000"},
{"20,68", "0", "3600000"}, {"21,69", "0", "3600000"}, {"22,70", "0", "3600000"}, {"23,71", "0", "3600000"},
{"24,72", "1", "3600000"}, {"25,73", "1", "3600000"}, {"26,74", "1", "3600000"}, {"27,75", "1", "3600000"},
{"28,76", "1", "3600000"}, {"29,77", "1", "3600000"}, {"30,78", "1", "3600000"}, {"31,79", "1", "3600000"},
{"32,80", "1", "3600000"}, {"33,81", "1", "3600000"}, {"34,82", "1", "3600000"}, {"35,83", "1", "3600000"},
{"36,84", "1", "3600000"}, {"37,85", "1", "3600000"}, {"38,86", "1", "3600000"}, {"39,87", "1", "3600000"},
{"40,88", "1", "3600000"}, {"41,89", "1", "3600000"}, {"42,90", "1", "3600000"}, {"43,91", "1", "3600000"},
{"44,92", "1", "3600000"}, {"45,93", "1", "3600000"}, {"46,94", "1", "3600000"}, {"47,95", "1", "3600000"},
{"0,48", "0", "3600000"}, {"1,49", "0", "3600000"}, {"2,50", "0", "3600000"}, {"3,51", "0", "3600000"},
{"4,52", "0", "3600000"}, {"5,53", "0", "3600000"}, {"6,54", "0", "3600000"}, {"7,55", "0", "3600000"},
{"8,56", "0", "3600000"}, {"9,57", "0", "3600000"}, {"10,58", "0", "3600000"}, {"11,59", "0", "3600000"},
{"12,60", "0", "3600000"}, {"13,61", "0", "3600000"}, {"14,62", "0", "3600000"}, {"15,63", "0", "3600000"},
{"16,64", "0", "3600000"}, {"17,65", "0", "3600000"}, {"18,66", "0", "3600000"}, {"19,67", "0", "3600000"},
{"20,68", "0", "3600000"}, {"21,69", "0", "3600000"}, {"22,70", "0", "3600000"}, {"23,71", "0", "3600000"},
{"24,72", "1", "3600000"}, {"25,73", "1", "3600000"}, {"26,74", "1", "3600000"}, {"27,75", "1", "3600000"},
{"28,76", "1", "3600000"}, {"29,77", "1", "3600000"}, {"30,78", "1", "3600000"}, {"31,79", "1", "3600000"},
{"32,80", "1", "3600000"}, {"33,81", "1", "3600000"}, {"34,82", "1", "3600000"}, {"35,83", "1", "3600000"},
{"36,84", "1", "3600000"}, {"37,85", "1", "3600000"}, {"38,86", "1", "3600000"}, {"39,87", "1", "3600000"},
{"40,88", "1", "3600000"}, {"41,89", "1", "3600000"}, {"42,90", "1", "3600000"}, {"43,91", "1", "3600000"},
{"44,92", "1", "3600000"}, {"45,93", "1", "3600000"}, {"46,94", "1", "3600000"}, {"47,95", "1", "3600000"},
},
{
{"0-11,48-59"},
{"12-23,60-71"},
{"24-35,72-83"},
{"36-47,84-95"},
},
};
LinuxCpuMapTestCase freq_2sockets_24cores_hyperthreading = {
48,
2,
2,
24,
{{48, 24, 0, 24, -1, -1}, {24, 12, 0, 12, 0, 0}, {24, 12, 0, 12, 1, 1}},
{
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 1, 1, 1, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 1, 1, 3, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 1, 1, 5, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 1, 1, 7, HYPER_THREADING_PROC, 7, -1},
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 1, 1, 9, HYPER_THREADING_PROC, 9, -1},
{10, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {11, 1, 1, 11, HYPER_THREADING_PROC, 11, -1},
{12, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {13, 1, 1, 13, HYPER_THREADING_PROC, 13, -1},
{14, 0, 0, 14, HYPER_THREADING_PROC, 14, -1}, {15, 1, 1, 15, HYPER_THREADING_PROC, 15, -1},
{16, 0, 0, 16, HYPER_THREADING_PROC, 16, -1}, {17, 1, 1, 17, HYPER_THREADING_PROC, 17, -1},
{18, 0, 0, 18, HYPER_THREADING_PROC, 18, -1}, {19, 1, 1, 19, HYPER_THREADING_PROC, 19, -1},
{20, 0, 0, 20, HYPER_THREADING_PROC, 20, -1}, {21, 1, 1, 21, HYPER_THREADING_PROC, 21, -1},
{22, 0, 0, 22, HYPER_THREADING_PROC, 22, -1}, {23, 1, 1, 23, HYPER_THREADING_PROC, 23, -1},
{24, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {25, 1, 1, 1, MAIN_CORE_PROC, 1, -1},
{26, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {27, 1, 1, 3, MAIN_CORE_PROC, 3, -1},
{28, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {29, 1, 1, 5, MAIN_CORE_PROC, 5, -1},
{30, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {31, 1, 1, 7, MAIN_CORE_PROC, 7, -1},
{32, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {33, 1, 1, 9, MAIN_CORE_PROC, 9, -1},
{34, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {35, 1, 1, 11, MAIN_CORE_PROC, 11, -1},
{36, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, {37, 1, 1, 13, MAIN_CORE_PROC, 13, -1},
{38, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, {39, 1, 1, 15, MAIN_CORE_PROC, 15, -1},
{40, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, {41, 1, 1, 17, MAIN_CORE_PROC, 17, -1},
{42, 0, 0, 18, MAIN_CORE_PROC, 18, -1}, {43, 1, 1, 19, MAIN_CORE_PROC, 19, -1},
{44, 0, 0, 20, MAIN_CORE_PROC, 20, -1}, {45, 1, 1, 21, MAIN_CORE_PROC, 21, -1},
{46, 0, 0, 22, MAIN_CORE_PROC, 22, -1}, {47, 1, 1, 23, MAIN_CORE_PROC, 23, -1},
},
{
{"0,24", "0", "3500000"}, {"1,25", "1", "3500000"}, {"2,26", "0", "3500000"}, {"3,27", "1", "3500000"},
{"4,28", "0", "3500000"}, {"5,29", "1", "3500000"}, {"6,30", "0", "3500000"}, {"7,31", "1", "3500000"},
{"8,32", "0", "3500000"}, {"9,33", "1", "3500000"}, {"10,34", "0", "3500000"}, {"11,35", "1", "3500000"},
{"12,36", "0", "3500000"}, {"13,37", "1", "3500000"}, {"14,38", "0", "3500000"}, {"15,39", "1", "3500000"},
{"16,40", "0", "3500000"}, {"17,41", "1", "3500000"}, {"18,42", "0", "3500000"}, {"19,43", "1", "3500000"},
{"20,44", "0", "3500000"}, {"21,45", "1", "3500000"}, {"22,46", "0", "3500000"}, {"23,47", "1", "3500000"},
{"0,24", "0", "3500000"}, {"1,25", "1", "3500000"}, {"2,26", "0", "3500000"}, {"3,27", "1", "3500000"},
{"4,28", "0", "3500000"}, {"5,29", "1", "3500000"}, {"6,30", "0", "3500000"}, {"7,31", "1", "3500000"},
{"8,32", "0", "3500000"}, {"9,33", "1", "3500000"}, {"10,34", "0", "3500000"}, {"11,35", "1", "3500000"},
{"12,36", "0", "3500000"}, {"13,37", "1", "3500000"}, {"14,38", "0", "3500000"}, {"15,39", "1", "3500000"},
{"16,40", "0", "3500000"}, {"17,41", "1", "3500000"}, {"18,42", "0", "3500000"}, {"19,43", "1", "3500000"},
{"20,44", "0", "3500000"}, {"21,45", "1", "3500000"}, {"22,46", "0", "3500000"}, {"23,47", "1", "3500000"},
},
{
{"0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46"},
{"1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47"},
},
};
LinuxCpuMapTestCase freq_2sockets_24cores_hyperthreading_1 = {
48,
4,
2,
24,
{{48, 24, 0, 24, -1, -1}, {12, 6, 0, 6, 0, 0}, {12, 6, 0, 6, 1, 0}, {12, 6, 0, 6, 2, 1}, {12, 6, 0, 6, 3, 1}},
{
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 2, 1, 1, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 2, 1, 3, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 2, 1, 5, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 2, 1, 7, HYPER_THREADING_PROC, 7, -1},
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 2, 1, 9, HYPER_THREADING_PROC, 9, -1},
{10, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {11, 2, 1, 11, HYPER_THREADING_PROC, 11, -1},
{12, 1, 0, 12, HYPER_THREADING_PROC, 12, -1}, {13, 3, 1, 13, HYPER_THREADING_PROC, 13, -1},
{14, 1, 0, 14, HYPER_THREADING_PROC, 14, -1}, {15, 3, 1, 15, HYPER_THREADING_PROC, 15, -1},
{16, 1, 0, 16, HYPER_THREADING_PROC, 16, -1}, {17, 3, 1, 17, HYPER_THREADING_PROC, 17, -1},
{18, 1, 0, 18, HYPER_THREADING_PROC, 18, -1}, {19, 3, 1, 19, HYPER_THREADING_PROC, 19, -1},
{20, 1, 0, 20, HYPER_THREADING_PROC, 20, -1}, {21, 3, 1, 21, HYPER_THREADING_PROC, 21, -1},
{22, 1, 0, 22, HYPER_THREADING_PROC, 22, -1}, {23, 3, 1, 23, HYPER_THREADING_PROC, 23, -1},
{24, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {25, 2, 1, 1, MAIN_CORE_PROC, 1, -1},
{26, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {27, 2, 1, 3, MAIN_CORE_PROC, 3, -1},
{28, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {29, 2, 1, 5, MAIN_CORE_PROC, 5, -1},
{30, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {31, 2, 1, 7, MAIN_CORE_PROC, 7, -1},
{32, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {33, 2, 1, 9, MAIN_CORE_PROC, 9, -1},
{34, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {35, 2, 1, 11, MAIN_CORE_PROC, 11, -1},
{36, 1, 0, 12, MAIN_CORE_PROC, 12, -1}, {37, 3, 1, 13, MAIN_CORE_PROC, 13, -1},
{38, 1, 0, 14, MAIN_CORE_PROC, 14, -1}, {39, 3, 1, 15, MAIN_CORE_PROC, 15, -1},
{40, 1, 0, 16, MAIN_CORE_PROC, 16, -1}, {41, 3, 1, 17, MAIN_CORE_PROC, 17, -1},
{42, 1, 0, 18, MAIN_CORE_PROC, 18, -1}, {43, 3, 1, 19, MAIN_CORE_PROC, 19, -1},
{44, 1, 0, 20, MAIN_CORE_PROC, 20, -1}, {45, 3, 1, 21, MAIN_CORE_PROC, 21, -1},
{46, 1, 0, 22, MAIN_CORE_PROC, 22, -1}, {47, 3, 1, 23, MAIN_CORE_PROC, 23, -1},
},
{
{"0,24", "0", "3500000"}, {"1,25", "1", "3500000"}, {"2,26", "0", "3500000"}, {"3,27", "1", "3500000"},
{"4,28", "0", "3500000"}, {"5,29", "1", "3500000"}, {"6,30", "0", "3500000"}, {"7,31", "1", "3500000"},
{"8,32", "0", "3500000"}, {"9,33", "1", "3500000"}, {"10,34", "0", "3500000"}, {"11,35", "1", "3500000"},
{"12,36", "0", "3500000"}, {"13,37", "1", "3500000"}, {"14,38", "0", "3500000"}, {"15,39", "1", "3500000"},
{"16,40", "0", "3500000"}, {"17,41", "1", "3500000"}, {"18,42", "0", "3500000"}, {"19,43", "1", "3500000"},
{"20,44", "0", "3500000"}, {"21,45", "1", "3500000"}, {"22,46", "0", "3500000"}, {"23,47", "1", "3500000"},
{"0,24", "0", "3500000"}, {"1,25", "1", "3500000"}, {"2,26", "0", "3500000"}, {"3,27", "1", "3500000"},
{"4,28", "0", "3500000"}, {"5,29", "1", "3500000"}, {"6,30", "0", "3500000"}, {"7,31", "1", "3500000"},
{"8,32", "0", "3500000"}, {"9,33", "1", "3500000"}, {"10,34", "0", "3500000"}, {"11,35", "1", "3500000"},
{"12,36", "0", "3500000"}, {"13,37", "1", "3500000"}, {"14,38", "0", "3500000"}, {"15,39", "1", "3500000"},
{"16,40", "0", "3500000"}, {"17,41", "1", "3500000"}, {"18,42", "0", "3500000"}, {"19,43", "1", "3500000"},
{"20,44", "0", "3500000"}, {"21,45", "1", "3500000"}, {"22,46", "0", "3500000"}, {"23,47", "1", "3500000"},
},
{
{"0,2,4,6,8,10,24,26,28,30,32,34"},
{"12,14,16,18,20,22,36,38,40,42,44,46"},
{"1,3,5,7,9,11,25,27,29,31,33,35"},
{"13,15,17,19,21,23,37,39,41,43,45,47"},
},
};
LinuxCpuMapTestCase freq_2sockets_20cores_hyperthreading = {
40,
2,
2,
20,
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
{
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1},
{10, 1, 1, 10, HYPER_THREADING_PROC, 10, -1}, {11, 1, 1, 11, HYPER_THREADING_PROC, 11, -1},
{12, 1, 1, 12, HYPER_THREADING_PROC, 12, -1}, {13, 1, 1, 13, HYPER_THREADING_PROC, 13, -1},
{14, 1, 1, 14, HYPER_THREADING_PROC, 14, -1}, {15, 1, 1, 15, HYPER_THREADING_PROC, 15, -1},
{16, 1, 1, 16, HYPER_THREADING_PROC, 16, -1}, {17, 1, 1, 17, HYPER_THREADING_PROC, 17, -1},
{18, 1, 1, 18, HYPER_THREADING_PROC, 18, -1}, {19, 1, 1, 19, HYPER_THREADING_PROC, 19, -1},
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {21, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {23, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {25, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {27, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
{28, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {29, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
{30, 1, 1, 10, MAIN_CORE_PROC, 10, -1}, {31, 1, 1, 11, MAIN_CORE_PROC, 11, -1},
{32, 1, 1, 12, MAIN_CORE_PROC, 12, -1}, {33, 1, 1, 13, MAIN_CORE_PROC, 13, -1},
{34, 1, 1, 14, MAIN_CORE_PROC, 14, -1}, {35, 1, 1, 15, MAIN_CORE_PROC, 15, -1},
{36, 1, 1, 16, MAIN_CORE_PROC, 16, -1}, {37, 1, 1, 17, MAIN_CORE_PROC, 17, -1},
{38, 1, 1, 18, MAIN_CORE_PROC, 18, -1}, {39, 1, 1, 19, MAIN_CORE_PROC, 19, -1},
},
{
{"0,20", "0", "3000000"}, {"1,21", "0", "3000000"}, {"2,22", "0", "3000000"}, {"3,23", "0", "3000000"},
{"4,24", "0", "3000000"}, {"5,25", "0", "3000000"}, {"6,26", "0", "3000000"}, {"7,27", "0", "3000000"},
{"8,28", "0", "3000000"}, {"9,29", "0", "3000000"}, {"10,30", "1", "3000000"}, {"11,31", "1", "3000000"},
{"12,32", "1", "3000000"}, {"13,33", "1", "3000000"}, {"14,34", "1", "3000000"}, {"15,35", "1", "3000000"},
{"16,36", "1", "3000000"}, {"17,37", "1", "3000000"}, {"18,38", "1", "3000000"}, {"19,39", "1", "3000000"},
{"0,20", "0", "3000000"}, {"1,21", "0", "3000000"}, {"2,22", "0", "3000000"}, {"3,23", "0", "3000000"},
{"4,24", "0", "3000000"}, {"5,25", "0", "3000000"}, {"6,26", "0", "3000000"}, {"7,27", "0", "3000000"},
{"8,28", "0", "3000000"}, {"9,29", "0", "3000000"}, {"10,30", "1", "3000000"}, {"11,31", "1", "3000000"},
{"12,32", "1", "3000000"}, {"13,33", "1", "3000000"}, {"14,34", "1", "3000000"}, {"15,35", "1", "3000000"},
{"16,36", "1", "3000000"}, {"17,37", "1", "3000000"}, {"18,38", "1", "3000000"}, {"19,39", "1", "3000000"},
},
{{"0-9,20-29"}, {"10-19,30-39"}},
};
LinuxCpuMapTestCase freq_2sockets_20cores_hyperthreading_1 = {
40,
2,
2,
20,
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
{
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1},
{10, 1, 1, 10, HYPER_THREADING_PROC, 10, -1}, {11, 1, 1, 11, HYPER_THREADING_PROC, 11, -1},
{12, 1, 1, 12, HYPER_THREADING_PROC, 12, -1}, {13, 1, 1, 13, HYPER_THREADING_PROC, 13, -1},
{14, 1, 1, 14, HYPER_THREADING_PROC, 14, -1}, {15, 1, 1, 15, HYPER_THREADING_PROC, 15, -1},
{16, 1, 1, 16, HYPER_THREADING_PROC, 16, -1}, {17, 1, 1, 17, HYPER_THREADING_PROC, 17, -1},
{18, 1, 1, 18, HYPER_THREADING_PROC, 18, -1}, {19, 1, 1, 19, HYPER_THREADING_PROC, 19, -1},
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {21, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {23, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {25, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {27, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
{28, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {29, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
{30, 1, 1, 10, MAIN_CORE_PROC, 10, -1}, {31, 1, 1, 11, MAIN_CORE_PROC, 11, -1},
{32, 1, 1, 12, MAIN_CORE_PROC, 12, -1}, {33, 1, 1, 13, MAIN_CORE_PROC, 13, -1},
{34, 1, 1, 14, MAIN_CORE_PROC, 14, -1}, {35, 1, 1, 15, MAIN_CORE_PROC, 15, -1},
{36, 1, 1, 16, MAIN_CORE_PROC, 16, -1}, {37, 1, 1, 17, MAIN_CORE_PROC, 17, -1},
{38, 1, 1, 18, MAIN_CORE_PROC, 18, -1}, {39, 1, 1, 19, MAIN_CORE_PROC, 19, -1},
},
{
{"0,20", "0", "3000000"}, {"1,21", "0", "3000000"}, {"2,22", "0", "3000000"}, {"3,23", "0", "3000000"},
{"4,24", "0", "3000000"}, {"5,25", "0", "3000000"}, {"6,26", "0", "3000000"}, {"7,27", "0", "3000000"},
{"8,28", "0", "3000000"}, {"9,29", "0", "3000000"}, {"10,30", "1", "3000000"}, {"11,31", "1", "3000000"},
{"12,32", "1", "3000000"}, {"13,33", "1", "3000000"}, {"14,34", "1", "3000000"}, {"15,35", "1", "3000000"},
{"16,36", "1", "3000000"}, {"17,37", "1", "3000000"}, {"18,38", "1", "3000000"}, {"19,39", "1", "3000000"},
{"0,20", "0", "3000000"}, {"1,21", "0", "3000000"}, {"2,22", "0", "3000000"}, {"3,23", "0", "3000000"},
{"4,24", "0", "3000000"}, {"5,25", "0", "3000000"}, {"6,26", "0", "3000000"}, {"7,27", "0", "3000000"},
{"8,28", "0", "3000000"}, {"9,29", "0", "3000000"}, {"10,30", "1", "3000000"}, {"11,31", "1", "3000000"},
{"12,32", "1", "3000000"}, {"13,33", "1", "3000000"}, {"14,34", "1", "3000000"}, {"15,35", "1", "3000000"},
{"16,36", "1", "3000000"}, {"17,37", "1", "3000000"}, {"18,38", "1", "3000000"}, {"19,39", "1", "3000000"},
},
{},
};
LinuxCpuMapTestCase freq_2sockets_20cores = {
20,
2,
2,
20,
{{20, 20, 0, 0, -1, -1}, {10, 10, 0, 0, 0, 0}, {10, 10, 0, 0, 1, 1}},
{
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {1, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{2, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {3, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{4, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {5, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{6, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {7, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
{8, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {9, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
{10, 1, 1, 10, MAIN_CORE_PROC, 10, -1}, {11, 1, 1, 11, MAIN_CORE_PROC, 11, -1},
{12, 1, 1, 12, MAIN_CORE_PROC, 12, -1}, {13, 1, 1, 13, MAIN_CORE_PROC, 13, -1},
{14, 1, 1, 14, MAIN_CORE_PROC, 14, -1}, {15, 1, 1, 15, MAIN_CORE_PROC, 15, -1},
{16, 1, 1, 16, MAIN_CORE_PROC, 16, -1}, {17, 1, 1, 17, MAIN_CORE_PROC, 17, -1},
{18, 1, 1, 18, MAIN_CORE_PROC, 18, -1}, {19, 1, 1, 19, MAIN_CORE_PROC, 19, -1},
},
{
{"0", "0", "3000000"}, {"1", "0", "3000000"}, {"2", "0", "3000000"}, {"3", "0", "3000000"},
{"4", "0", "3000000"}, {"5", "0", "3000000"}, {"6", "0", "3000000"}, {"7", "0", "3000000"},
{"8", "0", "3000000"}, {"9", "0", "3000000"}, {"10", "1", "3000000"}, {"11", "1", "3000000"},
{"12", "1", "3000000"}, {"13", "1", "3000000"}, {"14", "1", "3000000"}, {"15", "1", "3000000"},
{"16", "1", "3000000"}, {"17", "1", "3000000"}, {"18", "1", "3000000"}, {"19", "1", "3000000"},
},
{{"0-9"}, {"10-19"}},
};
LinuxCpuMapTestCase freq_1sockets_32cores_hyperthreading = {
64,
1,
1,
32,
{{64, 32, 0, 32, 0, 0}},
{
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
{8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1},
{10, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {11, 0, 0, 11, HYPER_THREADING_PROC, 11, -1},
{12, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {13, 0, 0, 13, HYPER_THREADING_PROC, 13, -1},
{14, 0, 0, 14, HYPER_THREADING_PROC, 14, -1}, {15, 0, 0, 15, HYPER_THREADING_PROC, 15, -1},
{16, 0, 0, 16, HYPER_THREADING_PROC, 16, -1}, {17, 0, 0, 17, HYPER_THREADING_PROC, 17, -1},
{18, 0, 0, 18, HYPER_THREADING_PROC, 18, -1}, {19, 0, 0, 19, HYPER_THREADING_PROC, 19, -1},
{20, 0, 0, 20, HYPER_THREADING_PROC, 20, -1}, {21, 0, 0, 21, HYPER_THREADING_PROC, 21, -1},
{22, 0, 0, 22, HYPER_THREADING_PROC, 22, -1}, {23, 0, 0, 23, HYPER_THREADING_PROC, 23, -1},
{24, 0, 0, 24, HYPER_THREADING_PROC, 24, -1}, {25, 0, 0, 25, HYPER_THREADING_PROC, 25, -1},
{26, 0, 0, 26, HYPER_THREADING_PROC, 26, -1}, {27, 0, 0, 27, HYPER_THREADING_PROC, 27, -1},
{28, 0, 0, 28, HYPER_THREADING_PROC, 28, -1}, {29, 0, 0, 29, HYPER_THREADING_PROC, 29, -1},
{30, 0, 0, 30, HYPER_THREADING_PROC, 30, -1}, {31, 0, 0, 31, HYPER_THREADING_PROC, 31, -1},
{32, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {33, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{34, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {35, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{36, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {37, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{38, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {39, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
{40, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {41, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
{42, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {43, 0, 0, 11, MAIN_CORE_PROC, 11, -1},
{44, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, {45, 0, 0, 13, MAIN_CORE_PROC, 13, -1},
{46, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, {47, 0, 0, 15, MAIN_CORE_PROC, 15, -1},
{48, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, {49, 0, 0, 17, MAIN_CORE_PROC, 17, -1},
{50, 0, 0, 18, MAIN_CORE_PROC, 18, -1}, {51, 0, 0, 19, MAIN_CORE_PROC, 19, -1},
{52, 0, 0, 20, MAIN_CORE_PROC, 20, -1}, {53, 0, 0, 21, MAIN_CORE_PROC, 21, -1},
{54, 0, 0, 22, MAIN_CORE_PROC, 22, -1}, {55, 0, 0, 23, MAIN_CORE_PROC, 23, -1},
{56, 0, 0, 24, MAIN_CORE_PROC, 24, -1}, {57, 0, 0, 25, MAIN_CORE_PROC, 25, -1},
{58, 0, 0, 26, MAIN_CORE_PROC, 26, -1}, {59, 0, 0, 27, MAIN_CORE_PROC, 27, -1},
{60, 0, 0, 28, MAIN_CORE_PROC, 28, -1}, {61, 0, 0, 29, MAIN_CORE_PROC, 29, -1},
{62, 0, 0, 30, MAIN_CORE_PROC, 30, -1}, {63, 0, 0, 31, MAIN_CORE_PROC, 31, -1},
},
{
{"0,32", "0", "3400000"}, {"1,33", "0", "3400000"}, {"2,34", "0", "3400000"}, {"3,35", "0", "3400000"},
{"4,36", "0", "3400000"}, {"5,37", "0", "3400000"}, {"6,38", "0", "3400000"}, {"7,39", "0", "3400000"},
{"8,40", "0", "3400000"}, {"9,41", "0", "3400000"}, {"10,42", "0", "3400000"}, {"11,43", "0", "3400000"},
{"12,44", "0", "3400000"}, {"13,45", "0", "3400000"}, {"14,46", "0", "3400000"}, {"15,47", "0", "3400000"},
{"16,48", "0", "3400000"}, {"17,49", "0", "3400000"}, {"18,50", "0", "3400000"}, {"19,51", "0", "3400000"},
{"20,52", "0", "3400000"}, {"21,53", "0", "3400000"}, {"22,54", "0", "3400000"}, {"23,55", "0", "3400000"},
{"24,56", "0", "3400000"}, {"25,57", "0", "3400000"}, {"26,58", "0", "3400000"}, {"27,59", "0", "3400000"},
{"28,60", "0", "3400000"}, {"29,61", "0", "3400000"}, {"30,62", "0", "3400000"}, {"31,63", "0", "3400000"},
{"0,32", "0", "3400000"}, {"1,33", "0", "3400000"}, {"2,34", "0", "3400000"}, {"3,35", "0", "3400000"},
{"4,36", "0", "3400000"}, {"5,37", "0", "3400000"}, {"6,38", "0", "3400000"}, {"7,39", "0", "3400000"},
{"8,40", "0", "3400000"}, {"9,41", "0", "3400000"}, {"10,42", "0", "3400000"}, {"11,43", "0", "3400000"},
{"12,44", "0", "3400000"}, {"13,45", "0", "3400000"}, {"14,46", "0", "3400000"}, {"15,47", "0", "3400000"},
{"16,48", "0", "3400000"}, {"17,49", "0", "3400000"}, {"18,50", "0", "3400000"}, {"19,51", "0", "3400000"},
{"20,52", "0", "3400000"}, {"21,53", "0", "3400000"}, {"22,54", "0", "3400000"}, {"23,55", "0", "3400000"},
{"24,56", "0", "3400000"}, {"25,57", "0", "3400000"}, {"26,58", "0", "3400000"}, {"27,59", "0", "3400000"},
{"28,60", "0", "3400000"}, {"29,61", "0", "3400000"}, {"30,62", "0", "3400000"}, {"31,63", "0", "3400000"},
},
{{"0-63"}},
};
LinuxCpuMapTestCase freq_1sockets_16cores_hyperthreading = {
24,
1,
1,
16,
{{24, 8, 8, 8, 0, 0}},
{
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, {3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, {7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {9, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{12, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {13, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
{14, 0, 0, 7, HYPER_THREADING_PROC, 7, -1}, {15, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1}, {17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1}, {19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
{20, 0, 0, 12, EFFICIENT_CORE_PROC, 12, -1}, {21, 0, 0, 13, EFFICIENT_CORE_PROC, 13, -1},
{22, 0, 0, 14, EFFICIENT_CORE_PROC, 14, -1}, {23, 0, 0, 15, EFFICIENT_CORE_PROC, 15, -1},
},
{
{"0-1", "0", "5376760"}, {"0-1", "0", "5376760"}, {"2-3", "0", "5376760"}, {"2-3", "0", "5376760"},
{"4-5", "0", "5376760"}, {"4-5", "0", "5376760"}, {"6-7", "0", "5376760"}, {"6-7", "0", "5376760"},
{"8-9", "0", "5400000"}, {"8-9", "0", "5400000"}, {"10-11", "0", "5400000"}, {"10-11", "0", "5400000"},
{"12-13", "0", "5376760"}, {"12-13", "0", "5376760"}, {"14-15", "0", "5376760"}, {"14-15", "0", "5376760"},
{"16", "0", "4200000"}, {"17", "0", "4200000"}, {"18", "0", "4200000"}, {"19", "0", "4200000"},
{"20", "0", "4200000"}, {"21", "0", "4200000"}, {"22", "0", "4200000"}, {"23", "0", "4200000"},
},
{},
};
LinuxCpuMapTestCase freq_1sockets_16cores = {
16,
1,
1,
16,
{{16, 8, 8, 0, 0, 0}},
{
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{1, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{2, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{3, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{4, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{5, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{6, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
{7, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
{8, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{9, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{10, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{11, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
{12, 0, 0, 12, EFFICIENT_CORE_PROC, 12, -1},
{13, 0, 0, 13, EFFICIENT_CORE_PROC, 13, -1},
{14, 0, 0, 14, EFFICIENT_CORE_PROC, 14, -1},
{15, 0, 0, 15, EFFICIENT_CORE_PROC, 15, -1},
},
{
{"0", "0", "5376760"},
{"1", "0", "5376760"},
{"2", "0", "5376760"},
{"3", "0", "5376760"},
{"4", "0", "5400000"},
{"5", "0", "5400000"},
{"6", "0", "5376760"},
{"7", "0", "5376760"},
{"8", "0", "4200000"},
{"9", "0", "4200000"},
{"10", "0", "4200000"},
{"11", "0", "4200000"},
{"12", "0", "4200000"},
{"13", "0", "4200000"},
{"14", "0", "4200000"},
{"15", "0", "4200000"},
},
{{"0-15"}},
};
LinuxCpuMapTestCase freq_1sockets_16cores_1_hyperthreading = {
22,
1,
1,
16,
{{22, 6, 10, 6, 0, 0}},
{
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, {3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, {7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {9, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{12, 0, 0, 6, EFFICIENT_CORE_PROC, 6, -1}, {13, 0, 0, 7, EFFICIENT_CORE_PROC, 7, -1},
{14, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1}, {15, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{16, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1}, {17, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
{18, 0, 0, 12, EFFICIENT_CORE_PROC, 12, -1}, {19, 0, 0, 13, EFFICIENT_CORE_PROC, 13, -1},
{20, 0, 0, 14, EFFICIENT_CORE_PROC, 14, -1}, {21, 0, 0, 15, EFFICIENT_CORE_PROC, 15, -1},
},
{
{"0-1", "2", "3200040"}, {"0-1", "2", "3200040"}, {"2-3", "3", "3200040"}, {"2-3", "3", "3200040"},
{"4-5", "4", "3200040"}, {"4-5", "4", "3200040"}, {"6-7", "5", "3200040"}, {"6-7", "5", "3200040"},
{"8-9", "6", "3200040"}, {"8-9", "6", "3200040"}, {"10-11", "7", "3200040"}, {"10-11", "7", "3200040"},
{"12", "0", "3100000"}, {"13", "0", "3100000"}, {"14", "0", "3100000"}, {"15", "0", "3100000"},
{"16", "1", "3100000"}, {"17", "1", "3100000"}, {"18", "1", "3100000"}, {"19", "1", "3100000"},
{"20", "8", "1600011"}, {"21", "8", "1600011"},
},
{},
};
LinuxCpuMapTestCase freq_1sockets_12cores_hyperthreading = {
14,
1,
1,
12,
{{14, 2, 10, 2, 0, 0}},
{
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{4, 0, 0, 2, EFFICIENT_CORE_PROC, 2, -1},
{5, 0, 0, 3, EFFICIENT_CORE_PROC, 3, -1},
{6, 0, 0, 4, EFFICIENT_CORE_PROC, 4, -1},
{7, 0, 0, 5, EFFICIENT_CORE_PROC, 5, -1},
{8, 0, 0, 6, EFFICIENT_CORE_PROC, 6, -1},
{9, 0, 0, 7, EFFICIENT_CORE_PROC, 7, -1},
{10, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{11, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{12, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{13, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
},
{
{"0-1", "2", "4100000"},
{"0-1", "2", "4100000"},
{"2-3", "3", "4100000"},
{"2-3", "3", "4100000"},
{"4", "0", "3100000"},
{"5", "0", "3100000"},
{"6", "0", "3100000"},
{"7", "0", "3100000"},
{"8", "1", "3100000"},
{"9", "1", "3100000"},
{"10", "1", "3100000"},
{"11", "1", "3100000"},
{"12", "8", "2100000"},
{"13", "8", "2100000"},
},
{{"0-13"}},
};
LinuxCpuMapTestCase freq_1sockets_8cores_hyperthreading = {
16,
1,
1,
8,
{{16, 8, 0, 8, 0, 0}},
{
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
{5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
{7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
{9, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
{11, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{12, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
{13, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
{14, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
{15, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
},
{
{"0-1", "0", "6100000"},
{"0-1", "0", "6100000"},
{"2-3", "0", "6100000"},
{"2-3", "0", "6100000"},
{"4-5", "0", "6100000"},
{"4-5", "0", "6100000"},
{"6-7", "0", "6100000"},
{"6-7", "0", "6100000"},
{"8-9", "0", "6300000"},
{"8-9", "0", "6300000"},
{"10-11", "0", "6300000"},
{"10-11", "0", "6300000"},
{"12-13", "0", "6100000"},
{"12-13", "0", "6100000"},
{"14-15", "0", "6100000"},
{"14-15", "0", "6100000"},
},
{},
};
LinuxCpuMapTestCase freq_1sockets_8cores_hyperthreading_1 = {
16,
1,
1,
8,
{{16, 8, 0, 8, 0, 0}},
{
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
{3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
{5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
{7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
{8, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{9, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{10, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{11, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{12, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{13, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{14, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
{15, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
},
{
{"0,8", "0", "4700000"},
{"1,9", "0", "4800000"},
{"2,10", "0", "4800000"},
{"3,11", "0", "4700000"},
{"4,12", "0", "4700000"},
{"5,13", "0", "4700000"},
{"6,14", "0", "4700000"},
{"7,15", "0", "4700000"},
{"0,8", "0", "4700000"},
{"1,9", "0", "4800000"},
{"2,10", "0", "4800000"},
{"3,11", "0", "4700000"},
{"4,12", "0", "4700000"},
{"5,13", "0", "4700000"},
{"6,14", "0", "4700000"},
{"7,15", "0", "4700000"},
},
{{"0-15"}},
};
LinuxCpuMapTestCase freq_1sockets_4cores = {
4,
1,
1,
4,
{{4, 4, 0, 0, 0, 0}},
{
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{1, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{2, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{3, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
},
{
{"0", "0", "1800000"},
{"1", "0", "1800000"},
{"2", "0", "1800000"},
{"3", "0", "1800000"},
},
{},
};
TEST_P(LinuxCpuMapFreqParserTests, LinuxFreq) {}
INSTANTIATE_TEST_SUITE_P(CPUMap,
LinuxCpuMapFreqParserTests,
testing::Values(freq_2sockets_112cores_hyperthreading,
freq_2sockets_48cores_hyperthreading,
freq_2sockets_48cores_hyperthreading_1,
freq_2sockets_24cores_hyperthreading,
freq_2sockets_24cores_hyperthreading_1,
freq_2sockets_20cores_hyperthreading,
freq_2sockets_20cores_hyperthreading_1,
freq_2sockets_20cores,
freq_1sockets_32cores_hyperthreading,
freq_1sockets_16cores_hyperthreading,
freq_1sockets_16cores,
freq_1sockets_16cores_1_hyperthreading,
freq_1sockets_12cores_hyperthreading,
freq_1sockets_8cores_hyperthreading,
freq_1sockets_8cores_hyperthreading_1,
freq_1sockets_4cores));
#endif
} // namespace

View File

@ -7,7 +7,7 @@
#include <common_test_utils/test_common.hpp>
#include "ie_system_conf.h"
#include "streams_executor.hpp"
#include "os/cpu_map_info.hpp"
using namespace testing;
using namespace ov;
@ -53,366 +53,366 @@ public:
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_1 = {
{}, // param[in]: The logical processors selected in this simulation case does not include the physical core of
// Pcore
{{40, 20, 0, 20},
{20, 10, 0, 10},
{20, 10, 0, 10}}, // param[in]: The proc_type_table of simulated platform which is 2 sockets, 20 Pcores
{{40, 20, 0, 20, -1, -1},
{20, 10, 0, 10, 0, 0},
{20, 10, 0, 10, 1, 1}}, // param[in]: The proc_type_table of simulated platform which is 2 sockets, 20 Pcores
// and 40 logical processors with hyper-threading enabled.
{
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
}, // param[in]: This simulation case select logcial processor 0, 2, 4 and 6 which is marked as logcial core of
// Pcore in original cpu_mapping_table.
1, // param[expected out]: Since all selected logical processors are in one socket, the number of sockets changes
// to 1.
4, // param[expected out]: Since only 4 logical processors are selected, the number of cores changes to 4.
{{4, 4, 0, 0}}, // param[expected out]: The proc_type_table changes to 4 Pcores only
{{4, 4, 0, 0, 0, 0}}, // param[expected out]: The proc_type_table changes to 4 Pcores only
{
{0, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 2, MAIN_CORE_PROC, 2, -1},
{4, 0, 4, MAIN_CORE_PROC, 4, -1},
{6, 0, 6, MAIN_CORE_PROC, 6, -1},
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{4, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{6, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
}, // param[expected out]: cpu_mapping_table changes to physical core of Pcore.
};
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_2 = {
{1, 3, 5, 7},
{{40, 20, 0, 20}, {20, 10, 0, 10}, {20, 10, 0, 10}},
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
{
{21, 0, 1, MAIN_CORE_PROC, 1, -1},
{23, 0, 3, MAIN_CORE_PROC, 3, -1},
{25, 0, 5, MAIN_CORE_PROC, 5, -1},
{27, 0, 7, MAIN_CORE_PROC, 7, -1},
{21, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{23, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{25, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{27, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
},
1,
4,
{{4, 4, 0, 0}},
{{4, 4, 0, 0, 0, 0}},
{
{21, 0, 1, MAIN_CORE_PROC, 1, -1},
{23, 0, 3, MAIN_CORE_PROC, 3, -1},
{25, 0, 5, MAIN_CORE_PROC, 5, -1},
{27, 0, 7, MAIN_CORE_PROC, 7, -1},
{21, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{23, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{25, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{27, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
},
};
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_3 = {
{1, 3, 5, 7},
{{40, 20, 0, 20}, {20, 10, 0, 10}, {20, 10, 0, 10}},
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
{
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
{21, 0, 1, MAIN_CORE_PROC, 1, -1},
{23, 0, 3, MAIN_CORE_PROC, 3, -1},
{25, 0, 5, MAIN_CORE_PROC, 5, -1},
{27, 0, 7, MAIN_CORE_PROC, 7, -1},
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
{21, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{23, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{25, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{27, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
},
1,
8,
{{8, 8, 0, 0}},
{{8, 8, 0, 0, 0, 0}},
{
{0, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 2, MAIN_CORE_PROC, 2, -1},
{4, 0, 4, MAIN_CORE_PROC, 4, -1},
{6, 0, 6, MAIN_CORE_PROC, 6, -1},
{21, 0, 1, MAIN_CORE_PROC, 1, -1},
{23, 0, 3, MAIN_CORE_PROC, 3, -1},
{25, 0, 5, MAIN_CORE_PROC, 5, -1},
{27, 0, 7, MAIN_CORE_PROC, 7, -1},
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{4, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{6, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
{21, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{23, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{25, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{27, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
},
};
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_4 = {
{0, 2, 4, 6},
{{40, 20, 0, 20}, {20, 10, 0, 10}, {20, 10, 0, 10}},
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
{
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
},
1,
4,
{{8, 4, 0, 4}},
{{8, 4, 0, 4, 0, 0}},
{
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
},
};
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_5 = {
{},
{{40, 20, 0, 20}, {20, 10, 0, 10}, {20, 10, 0, 10}},
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
{
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
{10, 1, 10, HYPER_THREADING_PROC, 10, -1},
{12, 1, 12, HYPER_THREADING_PROC, 12, -1},
{14, 1, 14, HYPER_THREADING_PROC, 14, -1},
{16, 1, 16, HYPER_THREADING_PROC, 16, -1},
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
{10, 1, 1, 10, HYPER_THREADING_PROC, 10, -1},
{12, 1, 1, 12, HYPER_THREADING_PROC, 12, -1},
{14, 1, 1, 14, HYPER_THREADING_PROC, 14, -1},
{16, 1, 1, 16, HYPER_THREADING_PROC, 16, -1},
},
2,
8,
{{8, 8, 0, 0}, {4, 4, 0, 0}, {4, 4, 0, 0}},
{{8, 8, 0, 0, -1, -1}, {4, 4, 0, 0, 0, 0}, {4, 4, 0, 0, 1, 1}},
{
{0, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 2, MAIN_CORE_PROC, 2, -1},
{4, 0, 4, MAIN_CORE_PROC, 4, -1},
{6, 0, 6, MAIN_CORE_PROC, 6, -1},
{10, 1, 10, MAIN_CORE_PROC, 10, -1},
{12, 1, 12, MAIN_CORE_PROC, 12, -1},
{14, 1, 14, MAIN_CORE_PROC, 14, -1},
{16, 1, 16, MAIN_CORE_PROC, 16, -1},
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{4, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{6, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
{10, 1, 1, 10, MAIN_CORE_PROC, 10, -1},
{12, 1, 1, 12, MAIN_CORE_PROC, 12, -1},
{14, 1, 1, 14, MAIN_CORE_PROC, 14, -1},
{16, 1, 1, 16, MAIN_CORE_PROC, 16, -1},
},
};
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_6 = {
{0, 2, 4, 6, 10, 12, 14, 16},
{{40, 20, 0, 20}, {20, 10, 0, 10}, {20, 10, 0, 10}},
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
{
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
{30, 1, 10, MAIN_CORE_PROC, 10, -1},
{32, 1, 12, MAIN_CORE_PROC, 12, -1},
{34, 1, 14, MAIN_CORE_PROC, 14, -1},
{36, 1, 16, MAIN_CORE_PROC, 16, -1},
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
{30, 1, 1, 10, MAIN_CORE_PROC, 10, -1},
{32, 1, 1, 12, MAIN_CORE_PROC, 12, -1},
{34, 1, 1, 14, MAIN_CORE_PROC, 14, -1},
{36, 1, 1, 16, MAIN_CORE_PROC, 16, -1},
},
2,
8,
{{8, 8, 0, 0}, {4, 4, 0, 0}, {4, 4, 0, 0}},
{{8, 8, 0, 0, -1, -1}, {4, 4, 0, 0, 0, 0}, {4, 4, 0, 0, 1, 1}},
{
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
{30, 1, 10, MAIN_CORE_PROC, 10, -1},
{32, 1, 12, MAIN_CORE_PROC, 12, -1},
{34, 1, 14, MAIN_CORE_PROC, 14, -1},
{36, 1, 16, MAIN_CORE_PROC, 16, -1},
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
{30, 1, 1, 10, MAIN_CORE_PROC, 10, -1},
{32, 1, 1, 12, MAIN_CORE_PROC, 12, -1},
{34, 1, 1, 14, MAIN_CORE_PROC, 14, -1},
{36, 1, 1, 16, MAIN_CORE_PROC, 16, -1},
},
};
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_7 = {
{0, 2, 4, 6},
{{40, 20, 0, 20}, {20, 10, 0, 10}, {20, 10, 0, 10}},
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
{
{10, 1, 10, HYPER_THREADING_PROC, 10, -1},
{12, 1, 12, HYPER_THREADING_PROC, 12, -1},
{14, 1, 14, HYPER_THREADING_PROC, 14, -1},
{16, 1, 16, HYPER_THREADING_PROC, 16, -1},
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
{10, 1, 1, 10, HYPER_THREADING_PROC, 10, -1},
{12, 1, 1, 12, HYPER_THREADING_PROC, 12, -1},
{14, 1, 1, 14, HYPER_THREADING_PROC, 14, -1},
{16, 1, 1, 16, HYPER_THREADING_PROC, 16, -1},
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
},
2,
8,
{{8, 8, 0, 0}, {4, 4, 0, 0}, {4, 4, 0, 0}},
{{8, 8, 0, 0, -1, -1}, {4, 4, 0, 0, 0, 0}, {4, 4, 0, 0, 1, 1}},
{
{10, 1, 10, MAIN_CORE_PROC, 10, -1},
{12, 1, 12, MAIN_CORE_PROC, 12, -1},
{14, 1, 14, MAIN_CORE_PROC, 14, -1},
{16, 1, 16, MAIN_CORE_PROC, 16, -1},
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
{10, 1, 1, 10, MAIN_CORE_PROC, 10, -1},
{12, 1, 1, 12, MAIN_CORE_PROC, 12, -1},
{14, 1, 1, 14, MAIN_CORE_PROC, 14, -1},
{16, 1, 1, 16, MAIN_CORE_PROC, 16, -1},
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
},
};
LinuxNumactlTestCase numactl_2sockets_20cores_hyperthreading_8 = {
{0, 2, 4, 6, 10, 12, 14, 16},
{{40, 20, 0, 20}, {20, 10, 0, 10}, {20, 10, 0, 10}},
{{40, 20, 0, 20, -1, -1}, {20, 10, 0, 10, 0, 0}, {20, 10, 0, 10, 1, 1}},
{
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
{10, 1, 10, HYPER_THREADING_PROC, 10, -1},
{12, 1, 12, HYPER_THREADING_PROC, 12, -1},
{14, 1, 14, HYPER_THREADING_PROC, 14, -1},
{16, 1, 16, HYPER_THREADING_PROC, 16, -1},
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
{30, 1, 10, MAIN_CORE_PROC, 10, -1},
{32, 1, 12, MAIN_CORE_PROC, 12, -1},
{34, 1, 14, MAIN_CORE_PROC, 14, -1},
{36, 1, 16, MAIN_CORE_PROC, 16, -1},
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
{10, 1, 1, 10, HYPER_THREADING_PROC, 10, -1},
{12, 1, 1, 12, HYPER_THREADING_PROC, 12, -1},
{14, 1, 1, 14, HYPER_THREADING_PROC, 14, -1},
{16, 1, 1, 16, HYPER_THREADING_PROC, 16, -1},
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
{30, 1, 1, 10, MAIN_CORE_PROC, 10, -1},
{32, 1, 1, 12, MAIN_CORE_PROC, 12, -1},
{34, 1, 1, 14, MAIN_CORE_PROC, 14, -1},
{36, 1, 1, 16, MAIN_CORE_PROC, 16, -1},
},
2,
8,
{{16, 8, 0, 8}, {8, 4, 0, 4}, {8, 4, 0, 4}},
{{16, 8, 0, 8, -1, -1}, {8, 4, 0, 4, 0, 0}, {8, 4, 0, 4, 1, 1}},
{
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
{10, 1, 10, HYPER_THREADING_PROC, 10, -1},
{12, 1, 12, HYPER_THREADING_PROC, 12, -1},
{14, 1, 14, HYPER_THREADING_PROC, 14, -1},
{16, 1, 16, HYPER_THREADING_PROC, 16, -1},
{20, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 6, MAIN_CORE_PROC, 6, -1},
{30, 1, 10, MAIN_CORE_PROC, 10, -1},
{32, 1, 12, MAIN_CORE_PROC, 12, -1},
{34, 1, 14, MAIN_CORE_PROC, 14, -1},
{36, 1, 16, MAIN_CORE_PROC, 16, -1},
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
{10, 1, 1, 10, HYPER_THREADING_PROC, 10, -1},
{12, 1, 1, 12, HYPER_THREADING_PROC, 12, -1},
{14, 1, 1, 14, HYPER_THREADING_PROC, 14, -1},
{16, 1, 1, 16, HYPER_THREADING_PROC, 16, -1},
{20, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{22, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{24, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{26, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
{30, 1, 1, 10, MAIN_CORE_PROC, 10, -1},
{32, 1, 1, 12, MAIN_CORE_PROC, 12, -1},
{34, 1, 1, 14, MAIN_CORE_PROC, 14, -1},
{36, 1, 1, 16, MAIN_CORE_PROC, 16, -1},
},
};
LinuxNumactlTestCase numactl_1sockets_16cores_hyperthreading_1 = {
{},
{{24, 8, 8, 8}},
{{24, 8, 8, 8, 0, 0}},
{
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
},
1,
4,
{{4, 0, 4, 0}},
{{4, 0, 4, 0, 0, 0}},
{
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
},
};
LinuxNumactlTestCase numactl_1sockets_16cores_hyperthreading_2 = {
{},
{{24, 8, 8, 8}},
{{24, 8, 8, 8, 0, 0}},
{
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 6, HYPER_THREADING_PROC, 6, -1},
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
{4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
{6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
},
1,
8,
{{8, 4, 4, 0}},
{{8, 4, 4, 0, 0, 0}},
{
{0, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 2, MAIN_CORE_PROC, 2, -1},
{4, 0, 4, MAIN_CORE_PROC, 4, -1},
{6, 0, 6, MAIN_CORE_PROC, 6, -1},
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
{0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{4, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{6, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
},
};
LinuxNumactlTestCase numactl_1sockets_16cores_hyperthreading_3 = {
{0, 1, 2, 3},
{{24, 8, 8, 8}},
{{24, 8, 8, 8, 0, 0}},
{
{1, 0, 0, MAIN_CORE_PROC, 0, -1},
{3, 0, 1, MAIN_CORE_PROC, 1, -1},
{5, 0, 2, MAIN_CORE_PROC, 2, -1},
{7, 0, 3, MAIN_CORE_PROC, 3, -1},
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
},
1,
8,
{{8, 4, 4, 0}},
{{8, 4, 4, 0, 0, 0}},
{
{1, 0, 0, MAIN_CORE_PROC, 0, -1},
{3, 0, 1, MAIN_CORE_PROC, 1, -1},
{5, 0, 2, MAIN_CORE_PROC, 2, -1},
{7, 0, 3, MAIN_CORE_PROC, 3, -1},
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
},
};
LinuxNumactlTestCase numactl_1sockets_16cores_hyperthreading_4 = {
{0, 1, 2, 3},
{{24, 8, 8, 8}},
{{24, 8, 8, 8, 0, 0}},
{
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{1, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 1, HYPER_THREADING_PROC, 1, -1},
{3, 0, 1, MAIN_CORE_PROC, 1, -1},
{4, 0, 2, HYPER_THREADING_PROC, 2, -1},
{5, 0, 2, MAIN_CORE_PROC, 2, -1},
{6, 0, 3, HYPER_THREADING_PROC, 3, -1},
{7, 0, 3, MAIN_CORE_PROC, 3, -1},
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
{5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
{7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
},
1,
8,
{{12, 4, 4, 4}},
{{12, 4, 4, 4, 0, 0}},
{
{0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{1, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 1, HYPER_THREADING_PROC, 1, -1},
{3, 0, 1, MAIN_CORE_PROC, 1, -1},
{4, 0, 2, HYPER_THREADING_PROC, 2, -1},
{5, 0, 2, MAIN_CORE_PROC, 2, -1},
{6, 0, 3, HYPER_THREADING_PROC, 3, -1},
{7, 0, 3, MAIN_CORE_PROC, 3, -1},
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
{5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
{7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
},
};
LinuxNumactlTestCase numactl_1sockets_16cores_hyperthreading_5 = {
{0, 1, 2, 3},
{{24, 8, 8, 8}},
{{24, 8, 8, 8, 0, 0}},
{
{1, 0, 0, MAIN_CORE_PROC, 0, -1},
{3, 0, 1, MAIN_CORE_PROC, 1, -1},
{5, 0, 2, MAIN_CORE_PROC, 2, -1},
{7, 0, 3, MAIN_CORE_PROC, 3, -1},
{8, 0, 4, HYPER_THREADING_PROC, 4, -1},
{10, 0, 5, HYPER_THREADING_PROC, 5, -1},
{12, 0, 6, HYPER_THREADING_PROC, 6, -1},
{14, 0, 7, HYPER_THREADING_PROC, 7, -1},
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
{10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
{12, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
{14, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
},
1,
12,
{{12, 8, 4, 0}},
{{12, 8, 4, 0, 0, 0}},
{
{1, 0, 0, MAIN_CORE_PROC, 0, -1},
{3, 0, 1, MAIN_CORE_PROC, 1, -1},
{5, 0, 2, MAIN_CORE_PROC, 2, -1},
{7, 0, 3, MAIN_CORE_PROC, 3, -1},
{8, 0, 4, MAIN_CORE_PROC, 4, -1},
{10, 0, 5, MAIN_CORE_PROC, 5, -1},
{12, 0, 6, MAIN_CORE_PROC, 6, -1},
{14, 0, 7, MAIN_CORE_PROC, 7, -1},
{16, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
{1, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
{3, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
{5, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
{7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{8, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
{10, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{12, 0, 0, 6, MAIN_CORE_PROC, 6, -1},
{14, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
{16, 0, 0, 8, EFFICIENT_CORE_PROC, 8, -1},
{17, 0, 0, 9, EFFICIENT_CORE_PROC, 9, -1},
{18, 0, 0, 10, EFFICIENT_CORE_PROC, 10, -1},
{19, 0, 0, 11, EFFICIENT_CORE_PROC, 11, -1},
},
};

View File

@ -3,9 +3,9 @@
The main responsibility of the AUTO plugin is to provide a unified device that enables developers to code deep learning applications once and deploy them anywhere.
Other capabilities of the AUTO plugin include:
* Static device selection, which intelligently loads a network to one device or multiple devices.
* CPU acceleration to start inferencing while the target device is still loading the network.
* Model priority support for loading multiple networks to multiple devices.
* Static device selection, which intelligently compiles a model to one device or multiple devices.
* CPU acceleration to start inferencing while the target device is still compiling the model.
* Model priority support for compiling multiple models to multiple devices.
The component is written in `C++`. If you want to contribute to the AUTO plugin, follow [the common coding style rules](../../../docs/dev/coding_style.md).

View File

@ -18,25 +18,25 @@ Auto unit test is a set of unit tests using gmock, each of which is for testing
2. Build
```bash
make ieMultiPluginUnitTests
make ov_auto_unit_tests
```
3. You can find `ieMultiPluginUnitTests` in *bin* directory after build
3. You can find `ov_auto_unit_tests` in *bin* directory after build
### Run unit test
You can run _`ieMultiPluginUnitTests`_ in *bin* directory which is the output of OpenVINO build
You can run _`ov_auto_unit_tests`_ in *bin* directory which is the output of OpenVINO build
If you want to run a specific unit test, you can use `gtest_filter` option as follows:
```
./ieMultiPluginUnitTests --gtest_filter='*filter_name*'
./ov_auto_unit_tests --gtest_filter='*filter_name*'
```
Then, you can get the result similar to:
```bash
openvino/bin/intel64/Release$ ./ieMultiPluginUnitTests --gtest_filter=*AutoReleaseHelperTest*cpuLoadFailure_accelerateorLoadFailure*
openvino/bin/intel64/Release$ ./ov_auto_unit_tests --gtest_filter=*AutoReleaseHelperTest*cpuLoadFailure_accelerateorLoadFailure*
Running main() from /home/openvino/thirdparty/gtest/gtest/googletest/src/gtest_main.cc
Note: Google Test filter = *AutoReleaseHelperTest*cpuLoadFailure_accelerateorLoadFailure*
[==========] Running 1 test from 1 test suite.

View File

@ -4,33 +4,28 @@
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "async_infer_request.hpp"
namespace MultiDevicePlugin {
AsyncInferRequest::AsyncInferRequest(const Schedule::Ptr& schedule,
const IInferPtr& inferRequest,
const IE::ITaskExecutor::Ptr& callbackExecutor):
AsyncInferRequestThreadSafeDefault(inferRequest, nullptr, callbackExecutor),
_schedule(schedule),
_inferRequest(inferRequest) {
auto pipeline = _schedule->GetPipeline(_inferRequest, &_workerInferRequest);
ov::auto_plugin::AsyncInferRequest::AsyncInferRequest(const Schedule::Ptr& schedule,
const std::shared_ptr<ov::auto_plugin::InferRequest>& request,
const std::shared_ptr<ov::threading::ITaskExecutor>& callback_executor) :
IAsyncInferRequest(request, nullptr, callback_executor),
m_schedule(schedule),
m_inferrequest(request) {
auto pipeline = m_schedule->get_async_pipeline(m_inferrequest, &m_worker_inferrequest);
if (pipeline.size() > 0) {
_pipeline = std::move(pipeline);
m_pipeline = std::move(pipeline);
}
}
void AsyncInferRequest::Infer_ThreadUnsafe() {
InferUsingAsync();
std::vector<ov::ProfilingInfo> ov::auto_plugin::AsyncInferRequest::get_profiling_info() const {
check_state();
auto scheduled_request = std::dynamic_pointer_cast<InferRequest>(m_inferrequest);
return scheduled_request->get_profiling_info();
}
std::map<std::string, IE::InferenceEngineProfileInfo>
AsyncInferRequest::GetPerformanceCounts() const {
CheckState();
auto multiDeviceInfer = std::dynamic_pointer_cast<MultiDeviceInferRequest>(_inferRequest);
return multiDeviceInfer->GetPerformanceCounts();
void ov::auto_plugin::AsyncInferRequest::infer_thread_unsafe() {
start_async_thread_unsafe();
}
AsyncInferRequest::~AsyncInferRequest() {
StopAndWait();
ov::auto_plugin::AsyncInferRequest::~AsyncInferRequest() {
stop_and_wait();
}
} // namespace MultiDevicePlugin

View File

@ -8,27 +8,23 @@
#include "schedule.hpp"
#include "infer_request.hpp"
#ifdef MULTIUNITTEST
#define MOCKTESTMACRO virtual
#define MultiDevicePlugin MockMultiDevicePlugin
#else
#define MOCKTESTMACRO
#endif
namespace MultiDevicePlugin {
class AsyncInferRequest : public IE::AsyncInferRequestThreadSafeDefault {
namespace ov {
namespace auto_plugin {
// ! [async_infer_request:header]
class AsyncInferRequest : public ov::IAsyncInferRequest {
public:
using Ptr = std::shared_ptr<AsyncInferRequest>;
explicit AsyncInferRequest(const Schedule::Ptr& schedule, const IInferPtr& inferRequest,
const IE::ITaskExecutor::Ptr& callbackExecutor);
void Infer_ThreadUnsafe() override;
std::map<std::string, IE::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
~AsyncInferRequest();
AsyncInferRequest(const Schedule::Ptr& schedule,
const std::shared_ptr<ov::auto_plugin::InferRequest>& request,
const std::shared_ptr<ov::threading::ITaskExecutor>& callback_executor);
protected:
Schedule::Ptr _schedule;
WorkerInferRequest* _workerInferRequest = nullptr;
IInferPtr _inferRequest;
~AsyncInferRequest();
void infer_thread_unsafe() override;
std::vector<ov::ProfilingInfo> get_profiling_info() const override;
private:
Schedule::Ptr m_schedule;
WorkerInferRequest* m_worker_inferrequest = nullptr;
ISyncInferPtr m_inferrequest;
};
} // namespace MultiDevicePlugin
} // namespace auto_plugin
} // namespace ov

View File

@ -0,0 +1,252 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "auto_compiled_model.hpp"
#include "common.hpp"
#include <memory>
#include "async_infer_request.hpp"
#include "itt.hpp"
#include "openvino/runtime/exec_model_info.hpp"
#include "openvino/runtime/properties.hpp"
#include "plugin.hpp"
namespace ov {
namespace auto_plugin {
AutoCompiledModel::AutoCompiledModel(const std::shared_ptr<ov::Model>& model,
const std::shared_ptr<const ov::IPlugin>& plugin,
ScheduleContext::Ptr context,
Schedule::Ptr scheduler)
: CompiledModel(model, plugin, context, scheduler),
m_model(model),
m_context(context) {
m_scheduler = std::dynamic_pointer_cast<AutoSchedule>(scheduler);
}
void AutoCompiledModel::set_property(const ov::AnyMap& properties) {
OPENVINO_NOT_IMPLEMENTED;
}
std::shared_ptr<const ov::Model> AutoCompiledModel::get_runtime_model() const {
OPENVINO_ASSERT(m_context->m_hw_compiled_model);
return m_context->m_hw_compiled_model->get_runtime_model();
}
ov::Any AutoCompiledModel::get_property(const std::string& name) const {
const auto& add_ro_properties = [](const std::string& name, std::vector<ov::PropertyName>& properties) {
properties.emplace_back(ov::PropertyName{name, ov::PropertyMutability::RO});
};
const auto& default_ro_properties = []() {
std::vector<ov::PropertyName> ro_properties{ov::model_name,
ov::supported_properties,
ov::execution_devices,
ov::hint::performance_mode,
ov::optimal_number_of_infer_requests,
ov::device::priorities,
ov::device::properties,
ov::hint::model_priority,
ov::loaded_from_cache};
return ro_properties;
};
const auto& default_rw_properties = []() {
std::vector<ov::PropertyName> rw_properties{};
return rw_properties;
};
const auto& to_string_vector = [](const std::vector<ov::PropertyName>& properties) {
std::vector<std::string> ret;
for (const auto& property : properties) {
ret.emplace_back(property);
}
return ret;
};
if (name == ov::supported_properties) {
auto ro_properties = default_ro_properties();
auto rw_properties = default_rw_properties();
std::vector<ov::PropertyName> supported_properties;
supported_properties.reserve(ro_properties.size() + rw_properties.size());
supported_properties.insert(supported_properties.end(), ro_properties.begin(), ro_properties.end());
supported_properties.insert(supported_properties.end(), rw_properties.begin(), rw_properties.end());
return decltype(ov::supported_properties)::value_type(supported_properties);
} else if (name == ov::hint::performance_mode) {
return m_context->m_performance_hint;
} else if (name == ov::device::priorities) {
// device priority does not support change on-the-fly
return decltype(ov::device::priorities)::value_type(m_context->m_str_devices);
} else if (name == ov::device::properties) {
ov::AnyMap all_devices = {};
{
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
if (m_scheduler->m_compile_context[FALLBACKDEVICE].m_is_already) {
all_devices = get_device_supported_properties(m_scheduler->m_compile_context[FALLBACKDEVICE]);
}
}
std::lock_guard<std::mutex> lock(m_context->m_mutex);
if (m_scheduler->m_compile_context[ACTUALDEVICE].m_is_already) {
all_devices = get_device_supported_properties(m_scheduler->m_compile_context[ACTUALDEVICE]);
} else {
all_devices = get_device_supported_properties(m_scheduler->m_compile_context[CPU]);
}
return all_devices;
} else if (name == ov::hint::model_priority) {
auto value = m_context->m_model_priority;
if (m_context->m_ov_core->is_new_api()) {
return value ? ((value > 1) ? ov::hint::Priority::LOW :
ov::hint::Priority::MEDIUM) : ov::hint::Priority::HIGH;
} else {
OPENVINO_SUPPRESS_DEPRECATED_START
return value ? ((value > 1) ? CONFIG_VALUE(MODEL_PRIORITY_LOW) : CONFIG_VALUE(
MODEL_PRIORITY_MED)) : CONFIG_VALUE(MODEL_PRIORITY_HIGH);
OPENVINO_SUPPRESS_DEPRECATED_END
}
} else if (name == ov::optimal_number_of_infer_requests) {
const unsigned int default_num_for_tput = 4u;
const unsigned int default_num_for_latency = 1u;
unsigned int real = 0;
if (m_scheduler->m_compile_context[ACTUALDEVICE].m_is_already) {
real = m_scheduler->m_compile_context[ACTUALDEVICE].
m_compiled_model->get_property(name).as<unsigned int>();
} else {
OPENVINO_ASSERT(m_scheduler->m_compile_context[CPU].m_is_already == true);
std::unique_lock<std::mutex> lock(m_context->m_mutex);
auto device_info = m_scheduler->m_compile_context[ACTUALDEVICE].m_device_info;
lock.unlock();
unsigned int optimal_batch_size = 0;
unsigned int requests = 0;
bool tput_enabled_in_plugin = false;
auto actual_dev_supported_properties = m_context->m_ov_core->get_property(device_info.device_name, ov::supported_properties);
try {
// for benchmark through AUTO:CPU,GPU
// SetConfig directly set to CPU/GPU in this case
if (std::find(actual_dev_supported_properties.begin(), actual_dev_supported_properties.end(), ov::hint::performance_mode)
!= actual_dev_supported_properties.end())
tput_enabled_in_plugin =
m_context->m_ov_core->get_property(device_info.device_name,
ov::hint::performance_mode)== ov::hint::PerformanceMode::THROUGHPUT;
} catch (const ov::Exception&) {
LOG_DEBUG_TAG("get_property:%s for %s", "PERF_HINT config not supported",
device_info.device_name.c_str());
}
const auto& mode = device_info.config.find(ov::hint::performance_mode.name());
if (tput_enabled_in_plugin ||
(mode != device_info.config.end() && mode->second == ov::hint::PerformanceMode::THROUGHPUT)) {
unsigned int upper_bound_streams_num = 0;
if (std::find(actual_dev_supported_properties.begin(), actual_dev_supported_properties.end(), ov::range_for_streams)
!= actual_dev_supported_properties.end()) {
try {
auto range_of_streams = m_context->m_ov_core->get_property(device_info.device_name,
ov::range_for_streams);
upper_bound_streams_num = std::get<1>(range_of_streams);
} catch (const ov::Exception&) {
LOG_DEBUG_TAG("get_property range_for_streams failed");
}
}
if (!m_context->m_batching_disabled) {
if (std::find(actual_dev_supported_properties.begin(), actual_dev_supported_properties.end(), ov::optimal_batch_size)
!= actual_dev_supported_properties.end()) {
try {
optimal_batch_size = m_context->m_ov_core->get_property(device_info.device_name,
ov::optimal_batch_size, {ov::hint::model(m_model)});
LOG_DEBUG_TAG("BATCHING:%s:%ld", "optimal batch size",
optimal_batch_size);
} catch (const ov::Exception&) {
LOG_DEBUG_TAG("BATCHING:%s", "property optimal_batch_size not supported");
}
}
}
if (optimal_batch_size > 1) {
// batching is supported with the device
// go with auto-batching
try {
// check if app have set preferred value
requests =
m_context->m_ov_core->get_property(device_info.device_name, ov::hint::num_requests);
const auto& reqs = device_info.config.find(ov::hint::num_requests.name());
if (reqs != device_info.config.end()) {
requests = reqs->second.as<unsigned int>();
}
LOG_DEBUG_TAG("BATCHING:%s:%ld", "user requested size", requests);
if (!requests) { // no limitations from user
requests = optimal_batch_size * upper_bound_streams_num * 2;
LOG_DEBUG_TAG("BATCHING:%s:%ld", "deduced size:", requests);
}
} catch (const ov::Exception& iie) {
LOG_WARNING_TAG("deduce optimal infer requset num for auto-batch failed :%s",
iie.what());
}
real = (std::max)(requests, optimal_batch_size);
} else if (device_info.device_name.find("VPUX") != std::string::npos) {
real = 8u;
} else {
real = upper_bound_streams_num ? 2 * upper_bound_streams_num : default_num_for_tput;
}
} else {
real = default_num_for_latency;
}
}
return decltype(ov::optimal_number_of_infer_requests)::value_type {real};
} else if (name == ov::execution_devices) {
ov::Any execution_devices;
auto get_execution_devices = [&execution_devices](std::string exe_devices_string) {
std::vector<std::string> exe_devices = {};
if (exe_devices_string == "CPU_HELP")
exe_devices_string = "(CPU)";
exe_devices.push_back(exe_devices_string);
execution_devices = decltype(ov::execution_devices)::value_type {exe_devices};
};
{
std::lock_guard<std::mutex> lock(m_context->m_mutex);
for (int i = 0; i < CONTEXTNUM; i++) {
if (m_scheduler->m_compile_context[i].m_is_enabled && m_scheduler->m_compile_context[i].m_is_already) {
if (i == 0 && !m_scheduler->m_compile_context[CPU].m_compiled_model._ptr) {
continue;
} else {
get_execution_devices(m_scheduler->m_compile_context[i].m_worker_name);
break;
}
}
}
}
return execution_devices;
} else if (name == ov::model_name) {
std::lock_guard<std::mutex> lock(m_context->m_mutex);
{
if (m_scheduler->m_compile_context[CPU].m_is_enabled && m_scheduler->m_compile_context[CPU].m_is_already)
return m_scheduler->m_compile_context[CPU].m_compiled_model->get_property(name);
return m_scheduler->m_compile_context[ACTUALDEVICE].m_compiled_model->get_property(name);
}
OPENVINO_SUPPRESS_DEPRECATED_START
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
auto ro_properties = default_ro_properties();
add_ro_properties(METRIC_KEY(SUPPORTED_METRICS), ro_properties);
add_ro_properties(METRIC_KEY(SUPPORTED_CONFIG_KEYS), ro_properties);
return to_string_vector(ro_properties);
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
auto rw_properties = default_rw_properties();
return to_string_vector(rw_properties);
OPENVINO_SUPPRESS_DEPRECATED_END
} else if (name == ov::loaded_from_cache) {
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
if (m_scheduler->m_compile_context[FALLBACKDEVICE].m_is_already) {
return m_scheduler->m_compile_context[FALLBACKDEVICE].m_compiled_model->get_property(name).as<bool>();
}
if (m_scheduler->m_compile_context[ACTUALDEVICE].m_is_already) {
return m_scheduler->m_compile_context[ACTUALDEVICE].
m_compiled_model->get_property(name).as<bool>();
} else {
OPENVINO_ASSERT(m_scheduler->m_compile_context[CPU].m_is_already == true);
std::lock_guard<std::mutex> lock(m_context->m_mutex);
return m_scheduler->m_compile_context[CPU].
m_compiled_model->get_property(name).as<bool>();
}
}
OPENVINO_THROW(get_log_tag(), ": not supported property ", name);
}
void AutoCompiledModel::export_model(std::ostream& model_stream) const {
OPENVINO_NOT_IMPLEMENTED;
}
} // namespace auto_plugin
} // namespace ov

View File

@ -0,0 +1,38 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "compiled_model.hpp"
#include "auto_schedule.hpp"
namespace ov {
namespace auto_plugin {
class AutoCompiledModel : public CompiledModel {
public:
AutoCompiledModel(const std::shared_ptr<ov::Model>& model,
const std::shared_ptr<const ov::IPlugin>& plugin,
ScheduleContext::Ptr context,
Schedule::Ptr scheduler);
// implement pure virtual methods from a base class ov::ICompiledModel
void export_model(std::ostream& model) const override;
std::shared_ptr<const ov::Model> get_runtime_model() const override;
void set_property(const ov::AnyMap& properties) override;
ov::Any get_property(const std::string& name) const override;
private:
friend class InferRequest;
friend class Plugin;
std::shared_ptr<ov::Model> m_model;
ScheduleContext::Ptr m_context;
AutoSchedule::Ptr m_scheduler;
};
} // namespace auto_plugin
} // namespace ov

View File

@ -1,283 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include <ie_metric_helpers.hpp>
#include "ie_performance_hints.hpp"
#include "auto_executable_network.hpp"
// ------------------------------AutoExecutableNetwork----------------------------
//
namespace MultiDevicePlugin {
AutoExecutableNetwork::AutoExecutableNetwork(AutoScheduleContext::Ptr& context, const AutoSchedule::Ptr& schedule)
:ExecutableNetwork(schedule, context),
_autoSContext(context),
_autoSchedule(schedule) {
}
std::shared_ptr<IE::RemoteContext> AutoExecutableNetwork::GetContext() const {
if (_autoSchedule->_pCTPUTLoadContext) {
for (size_t i = 0; i < _autoSchedule->_nCTputDeviceNums; i++) {
if (_autoSchedule->_pCTPUTLoadContext[i].isAlready) {
return _autoSchedule->_pCTPUTLoadContext[i].executableNetwork->GetContext();
}
}
return nullptr;
} else {
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
if (_autoSchedule->_loadContext[FALLBACKDEVICE].isAlready) {
return _autoSchedule->_loadContext[FALLBACKDEVICE].executableNetwork->GetContext();
} else {
_autoSchedule->WaitActualNetworkReady();
return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetContext();
}
}
}
void AutoExecutableNetwork::SetConfig(const std::map<std::string, IE::Parameter>
& config) {
IE_THROW(NotImplemented);
}
IE::Parameter AutoExecutableNetwork::GetConfig(const std::string& name) const {
IE_THROW(NotFound) << name << " not found in the ExecutableNetwork config";
}
IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
if (name == ov::supported_properties) {
return decltype(ov::supported_properties)::value_type {
// Metrics
ov::PropertyName{ov::supported_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::hint::performance_mode.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::model_name.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::optimal_number_of_infer_requests.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::hint::model_priority.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::device::priorities.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::device::properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::execution_devices.name(), ov::PropertyMutability::RO}};
} else if (name == ov::hint::performance_mode) {
auto value = _autoSContext->_performanceHint;
if (!_autoSContext->_core->isNewAPI())
return value;
if (value == InferenceEngine::PluginConfigParams::THROUGHPUT) {
return ov::hint::PerformanceMode::THROUGHPUT;
} else if (value == InferenceEngine::PluginConfigParams::LATENCY) {
return ov::hint::PerformanceMode::LATENCY;
} else if (value == InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT) {
return ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT;
} else if (value == "UNDEFINED") {
OPENVINO_SUPPRESS_DEPRECATED_START
return ov::hint::PerformanceMode::UNDEFINED;
OPENVINO_SUPPRESS_DEPRECATED_END
} else {
OPENVINO_THROW("Unsupported value of ov::hint::PerformanceMode");
}
} else if (name == ov::device::priorities) {
auto value = _autoSContext->_config.find(ov::device::priorities.name());
return decltype(ov::device::priorities)::value_type {value->second.as<std::string>()};
} else if (name == ov::device::properties) {
ov::AnyMap all_devices = {};
auto get_device_supported_metrics = [&all_devices] (const AutoLoadContext& context) {
ov::AnyMap device_properties = {};
auto device_supported_metrics = context.executableNetwork->GetMetric(METRIC_KEY(SUPPORTED_METRICS));
for (auto&& property_name : device_supported_metrics.as<std::vector<std::string>>()) {
device_properties[property_name] = context.executableNetwork->GetMetric(property_name);
}
auto device_supported_configs = context.executableNetwork->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
for (auto&& property_name : device_supported_configs.as<std::vector<std::string>>()) {
device_properties[property_name] = context.executableNetwork->GetConfig(property_name);
}
all_devices[context.deviceInfo.deviceName] = device_properties;
};
if (_autoSchedule->_pCTPUTLoadContext) {
for (size_t i = 0; i < _autoSchedule->_nCTputDeviceNums; i++) {
if (_autoSchedule->_pCTPUTLoadContext[i].isAlready) {
get_device_supported_metrics(_autoSchedule->_pCTPUTLoadContext[i]);
}
}
} else {
{
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
if (_autoSchedule->_loadContext[FALLBACKDEVICE].isAlready) {
get_device_supported_metrics(_autoSchedule->_loadContext[FALLBACKDEVICE]);
}
}
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) {
get_device_supported_metrics(_autoSchedule->_loadContext[ACTUALDEVICE]);
} else {
get_device_supported_metrics(_autoSchedule->_loadContext[CPU]);
}
}
return all_devices;
} else if (name == ov::hint::model_priority) {
auto value = _autoSContext->_modelPriority;
if (_autoSContext->_core->isNewAPI()) {
return value ? ((value > 1) ? ov::hint::Priority::LOW :
ov::hint::Priority::MEDIUM) : ov::hint::Priority::HIGH;
} else {
return value ? ((value > 1) ? CONFIG_VALUE(MODEL_PRIORITY_LOW) : CONFIG_VALUE(
MODEL_PRIORITY_MED)) : CONFIG_VALUE(MODEL_PRIORITY_HIGH);
}
} else if (name == ov::optimal_number_of_infer_requests) {
const unsigned int defaultNumForTPUT = 4u;
const unsigned int defaultNumForLatency = 1u;
unsigned int real = 0;
if (_autoSchedule->_pCTPUTLoadContext) {
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
unsigned int res = 0u;
for (size_t i = 0; i < _autoSchedule->_nCTputDeviceNums; i++) {
try {
if (_autoSchedule->_pCTPUTLoadContext[i].isAlready) {
res += (_autoSchedule->_pCTPUTLoadContext[i])
.executableNetwork->GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))
.as<unsigned int>();
}
} catch (const IE::Exception& iie) {
IE_THROW()
<< "Every device used in cumulative mode should "
<< "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. "
<< "Failed to query the metric for with error:" <<
iie.what();
}
}
return decltype(ov::optimal_number_of_infer_requests)::value_type {res};
}
if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) {
real = _autoSchedule->_loadContext[ACTUALDEVICE].
executableNetwork->GetMetric(name).as<unsigned int>();
} else {
IE_ASSERT(_autoSchedule->_loadContext[CPU].isAlready == true);
std::unique_lock<std::mutex> lock(_autoSContext->_confMutex);
auto deviceInfo = _autoSchedule->_loadContext[ACTUALDEVICE].deviceInfo;
lock.unlock();
unsigned int optimalBatchSize = 0;
unsigned int requests = 0;
bool bThroughputEnabledInPlugin = false;
try {
// for benchmark through AUTO:CPU,GPU
// SetConfig directly set to CPU/GPU in this case
bThroughputEnabledInPlugin =
_autoSContext->_core->GetConfig(deviceInfo.deviceName,
CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>() == CONFIG_VALUE(THROUGHPUT);
} catch (const IE::Exception&) {
LOG_DEBUG_TAG("GetMetric:%s for %s", "PERF_HINT config not supported",
deviceInfo.deviceName.c_str());
}
const auto& mode = deviceInfo.config.find(CONFIG_KEY(PERFORMANCE_HINT));
if (bThroughputEnabledInPlugin ||
(mode != deviceInfo.config.end() && mode->second == CONFIG_VALUE(THROUGHPUT))) {
unsigned int upperBoundStreamsNum = 0;
std::map<std::string, IE::Parameter> options;
options["MODEL_PTR"] = std::const_pointer_cast<ngraph::Function>
(_autoSContext->_network.getFunction());
try {
auto rangeOfStreams = _autoSContext->_core->GetMetric(deviceInfo.deviceName,
METRIC_KEY(RANGE_FOR_STREAMS),
options).as<std::tuple<unsigned int, unsigned int>>();
upperBoundStreamsNum = std::get<1>(rangeOfStreams);
} catch (const IE::Exception&) {
LOG_DEBUG_TAG("GetMetric RANGE_FOR_STREAMS failed");
}
if (!_autoSContext->_batchingDisabled) {
try {
optimalBatchSize = _autoSContext->_core->GetMetric(deviceInfo.deviceName,
METRIC_KEY(OPTIMAL_BATCH_SIZE), options).as<unsigned int>();
LOG_DEBUG_TAG("BATCHING:%s:%ld", "optimal batch size",
optimalBatchSize);
} catch (const IE::Exception&) {
LOG_DEBUG_TAG("BATCHING:%s", "metric OPTIMAL_BATCH_SIZE not supported");
}
}
if (optimalBatchSize > 1) {
// batching is supported with the device
// go with auto-batching
try {
// check if app have set preferred value
auto res =
_autoSContext->_core->GetConfig(deviceInfo.deviceName,
CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as<std::string>();
requests = IE::PerfHintsConfig::CheckPerformanceHintRequestValue(res);
const auto& reqs = deviceInfo.config.find(CONFIG_KEY(
PERFORMANCE_HINT_NUM_REQUESTS));
if (reqs != deviceInfo.config.end()) {
requests = static_cast<unsigned int>
(IE::PerfHintsConfig::CheckPerformanceHintRequestValue(reqs->second));
}
LOG_DEBUG_TAG("BATCHING:%s:%ld", "user requested size", requests);
if (!requests) { // no limitations from user
requests = optimalBatchSize * upperBoundStreamsNum * 2;
LOG_DEBUG_TAG("BATCHING:%s:%ld", "deduced size:", requests);
}
} catch (const IE::Exception& iie) {
LOG_WARNING_TAG("deduce optimal infer requset num for auto-batch failed :%s",
iie.what());
}
real = (std::max)(requests, optimalBatchSize);
} else if (deviceInfo.deviceName.find("VPUX") != std::string::npos) {
real = 8u;
} else {
real = upperBoundStreamsNum ? 2 * upperBoundStreamsNum : defaultNumForTPUT;
}
} else {
real = defaultNumForLatency;
}
}
return decltype(ov::optimal_number_of_infer_requests)::value_type {real};
} else if (name == ov::execution_devices) {
ov::Any execution_devices;
auto GetExecutionDevices = [&execution_devices](std::string ExeDevicesString) {
std::vector<std::string> exeDevices = {};
if (ExeDevicesString == "CPU_HELP")
ExeDevicesString = "(CPU)";
exeDevices.push_back(ExeDevicesString);
execution_devices = decltype(ov::execution_devices)::value_type {exeDevices};
};
if (_autoSchedule->_pCTPUTLoadContext) {
std::vector<std::string> exeDevices = {};
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
for (auto const & n : _autoSContext->_devicePriorities) {
exeDevices.push_back(n.deviceName);
}
execution_devices = decltype(ov::execution_devices)::value_type {exeDevices};
} else {
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
for (int i = 0; i < CONTEXTNUM; i++) {
if (_autoSchedule->_loadContext[i].isEnabled && _autoSchedule->_loadContext[i].isAlready) {
if (i == 0 && !_autoSchedule->_loadContext[CPU].executableNetwork._ptr) {
continue;
} else {
GetExecutionDevices(_autoSchedule->_loadContext[i].workName);
break;
}
}
}
}
return execution_devices;
} else if (name == ov::model_name) {
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
if (_autoSchedule->_pCTPUTLoadContext) {
for (size_t i = 0; i < _autoSchedule->_nCTputDeviceNums; i++) {
if (_autoSchedule->_pCTPUTLoadContext[i].isAlready) {
return _autoSchedule->_pCTPUTLoadContext[i].executableNetwork->GetMetric(name);
}
}
IE_THROW() << "No valid executable network found to get" << name;
} else {
if (_autoSchedule->_loadContext[CPU].isEnabled && _autoSchedule->_loadContext[CPU].isAlready)
return _autoSchedule->_loadContext[CPU].executableNetwork->GetMetric(name);
return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name);
}
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
IE_SET_METRIC_RETURN(SUPPORTED_METRICS,
{METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS),
METRIC_KEY(SUPPORTED_METRICS),
METRIC_KEY(NETWORK_NAME),
METRIC_KEY(SUPPORTED_CONFIG_KEYS)});
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, {});
}
IE_THROW() << "Unsupported metric key: " << name;
}
} // namespace MultiDevicePlugin

View File

@ -1,39 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include <string>
#include <map>
#include "auto_schedule.hpp"
#include "executable_network.hpp"
#ifdef MULTIUNITTEST
#define MOCKTESTMACRO virtual
#define MultiDevicePlugin MockMultiDevicePlugin
#else
#define MOCKTESTMACRO
#endif
namespace MultiDevicePlugin {
class AutoExecutableNetwork : public ExecutableNetwork {
friend IInferPtr AutoSchedule::CreateInferRequest();
public:
using Ptr = std::shared_ptr<AutoExecutableNetwork>;
explicit AutoExecutableNetwork(AutoScheduleContext::Ptr& context, const AutoSchedule::Ptr& schedule);
void SetConfig(const std::map<std::string, IE::Parameter>& config) override;
IE::Parameter GetConfig(const std::string& name) const override;
IE::Parameter GetMetric(const std::string& name) const override;
std::shared_ptr<IE::RemoteContext> GetContext() const override;
virtual ~AutoExecutableNetwork() = default;
private:
AutoScheduleContext::Ptr _autoSContext;
AutoSchedule::Ptr _autoSchedule;
};
} // namespace MultiDevicePlugin

File diff suppressed because it is too large Load Diff

View File

@ -6,109 +6,38 @@
#pragma once
#include "schedule.hpp"
#include "async_infer_request.hpp"
#ifdef MULTIUNITTEST
#define MOCKTESTMACRO virtual
#define MultiDevicePlugin MockMultiDevicePlugin
#else
#define MOCKTESTMACRO
#endif
namespace MultiDevicePlugin {
struct ThisRequestExecutor : public IE::ITaskExecutor {
explicit ThisRequestExecutor(WorkerInferRequest** ptr, MultiImmediateExecutor::Ptr executor = nullptr): _workptrptr{ptr}, _fallbackExec(executor) {}
void run(IE::Task task) override {
(*_workptrptr)->_task = std::move(task);
(*_workptrptr)->_fallbackExec = _fallbackExec;
(*_workptrptr)->_inferRequest->StartAsync();
};
WorkerInferRequest** _workptrptr = nullptr;
MultiImmediateExecutor::Ptr _fallbackExec;
};
struct AutoLoadContext {
std::atomic<bool> isEnabled = {false};
std::atomic<bool> isAlready = {false};
std::atomic<bool> isLoadSuccess = {false};
std::atomic<bool> isReloadSuccess = {false};
std::future<void> future;
std::promise<void> promise;
SoExecNetwork executableNetwork;
DeviceInformation deviceInfo;
std::vector<DeviceInformation> metaDevices;
std::string networkPrecision;
std::string errMessage;
IE::Task task;
//ACTUALDEVICE's workName is same as it's deviceName,
//CPU_HELP's workName is "CPU_HELP", and deviceName is "CPU"
//add workName is because of ACTUALDEVICE and CPU maybe all CPU,
//they can't use the same workerQueue
std::string workName = "";
};
namespace ov {
namespace auto_plugin {
enum AutoLoadContextIndex {
CPU = 0,
ACTUALDEVICE = 1,
FALLBACKDEVICE = 2,
CONTEXTNUM = 3
};
class AutoSchedule : public Schedule, public IE::ITaskExecutor {
class AutoSchedule : public Schedule {
public:
using Ptr = std::shared_ptr<AutoSchedule>;
void init(const ScheduleContext::Ptr& sContext) override;
IInferPtr CreateInferRequest() override;
IInferPtr CreateInferRequestImpl(IE::InputsDataMap networkInputs, IE::OutputsDataMap networkOutputs) override;
IInferPtr CreateInferRequestImpl(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs) override;
void run(IE::Task inferTask) override;
Pipeline GetPipeline(const IInferPtr& syncRequestImpl, WorkerInferRequest** WorkerInferRequest) override;
void WaitActualNetworkReady() const;
virtual ~AutoSchedule();
public:
static thread_local WorkerInferRequest* _thisWorkerInferRequest;
// have to use the const char* ptr rather than std::string due to a bug in old gcc versions,
// the bug is e.g. manifesting on the old CentOS (and it's 4.8.x gcc) used in our testing
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81880
static thread_local const char* _thisPreferredDeviceName;
AutoLoadContext _loadContext[CONTEXTNUM];
std::unique_ptr<AutoLoadContext[]> _pCTPUTLoadContext = nullptr;
size_t _nCTputDeviceNums = 0;
protected:
void GenerateWorkers(const std::string& device, const SoExecNetwork& executableNetwork);
bool ScheduleToWorkerInferRequest(IE::Task, DeviceName preferred_device = "");
static bool RunPipelineTask(IE::Task& inferPipelineTask, NotBusyPriorityWorkerRequests& idleWorkerRequests,
const DeviceName& preferred_device);
std::string GetLogTag() const noexcept;
DeviceMap<NotBusyPriorityWorkerRequests> _idleWorkerRequests;
AutoScheduleContext::Ptr _autoSContext;
std::atomic_size_t _numRequestsCreated = {0};
DeviceMap<std::vector<WorkerInferRequest>> _workerRequests;
AutoCompileContext m_compile_context[CONTEXTNUM];
private:
void init() override;
// release actual task
// ov::threading::Task release_actualdevice_task;
bool schedule_to_worker_infer_request(ov::threading::Task, DeviceName preferred_device = "") override;
void wait_actual_compiled_model_ready() const;
/**
* @brief wait for one of the executable network to finish loading.
* @return An SoPtr object hold an available executable network loaded to HW device.
* @note An exception will be thrown if all loading of network to hw device fails.
* @brief wait for one of the compiled model to finish loading.
* @return An SoPtr object hold an available compiled model loaded to HW device.
* @note An exception will be thrown if all loading of model to hw device fails.
*/
SoExecNetwork WaitFirstNetworkReady();
void TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network, bool isCumulative);
bool selectOtherDevice(const std::string& currentDeviceName);
IE::Task releaseActualdeviceTask;
private:
IE::ThreadSafeQueue<IE::Task> _inferPipelineTasks;
DeviceMap<std::unique_ptr<IE::ThreadSafeQueue<IE::Task>>> _inferPipelineTasksDeviceSpecific;
SoExecNetwork _passthroughExeNet;
Time _cpuHelpReleaseTime;
size_t _cpuHelpInferCount = 0;
double _cpuHelpFps = 0.0;
std::string _LogTag;
IE::IStreamsExecutor::Ptr _executor;
mutable std::once_flag _oc;
std::once_flag _firstLoadOC;
std::future<void> _firstLoadFuture;
std::promise<void> _firstLoadPromise;
bool _exitFlag = {false};
SoCompiledModel wait_first_compiled_model_ready() override;
void try_to_compile_model(AutoCompileContext& context, const std::shared_ptr<ov::Model>& model) override;
bool select_other_device(const std::string& cur_dev_name) override;
size_t m_cpuhelp_infer_count = 0;
double m_cpuhelp_fps = 0.0;
mutable std::once_flag m_oc;
std::once_flag m_firstload_oc;
std::future<void> m_firstload_future;
std::promise<void> m_firstload_promise;
bool m_exitflag = {false};
};
} // namespace MultiDevicePlugin
} // namespace auto_plugin
} // namespace ov

View File

@ -1,98 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "async_infer_request.hpp"
#include "plugin.hpp"
#include "bind_multi_schedule.hpp"
// ------------------------------MultiSchedule----------------------------
namespace MultiDevicePlugin {
void BinderMultiSchedule::init(const ScheduleContext::Ptr& sContext) {
AutoSchedule::init(sContext);
LOG_INFO_TAG("enable bind buffer for AUTO");
}
Pipeline BinderMultiSchedule::GetPipeline(const IInferPtr& syncInferRequest, WorkerInferRequest** workerInferRequest) {
Pipeline pipeline;
struct RequestExecutor : ITaskExecutor {
explicit RequestExecutor(InferenceEngine::SoIInferRequestInternal& inferRequest) : _inferRequest(inferRequest) {
_inferRequest->SetCallback([this](std::exception_ptr exceptionPtr) mutable {
_exceptionPtr = exceptionPtr;
auto capturedTask = std::move(_task);
capturedTask();
});
}
void run(InferenceEngine::Task task) override {
_task = std::move(task);
_inferRequest->StartAsync();
};
InferenceEngine::SoIInferRequestInternal& _inferRequest;
std::exception_ptr _exceptionPtr;
InferenceEngine::Task _task;
};
auto requestExecutor = std::make_shared<RequestExecutor>(
std::static_pointer_cast<MultiDeviceInferRequest>(syncInferRequest)->GetSharedRequest());
pipeline.emplace_back(requestExecutor, [requestExecutor] {
if (nullptr != requestExecutor->_exceptionPtr) {
std::rethrow_exception(requestExecutor->_exceptionPtr);
}
});
return pipeline;
}
BinderMultiSchedule::~BinderMultiSchedule() {
}
IInferPtr BinderMultiSchedule::CreateInferRequestImpl(
const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs) {
auto num = _numRequestsCreated++;
size_t sum = 0;
SoInfer request_to_share_blobs_with;
// borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests
// this allows to potentially save on the data-copy later (if the requests are scheduled in the same order)
for (const auto& device : _autoSContext->_devicePrioritiesInitial) {
auto& dev_requests = _workerRequests[device.deviceName];
if ((num - sum) < dev_requests.size()) {
request_to_share_blobs_with = dev_requests.at(num - sum)._inferRequest;
break;
}
sum += dev_requests.size();
}
if (!request_to_share_blobs_with) {
IE_THROW() <<
"binder mode does not allow oversubsciption of infer requests"
" please use optimal infer request";
}
auto syncImpl = std::make_shared<MultiDeviceInferRequest>(inputs, outputs, request_to_share_blobs_with);
return syncImpl;
}
IInferPtr BinderMultiSchedule::CreateInferRequestImpl(IE::InputsDataMap networkInputs,
IE::OutputsDataMap networkOutputs) {
auto num = _numRequestsCreated++;
SoInfer request_to_share_blobs_with;
size_t sum = 0;
// borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests
// this allows to potentially save on the data-copy later (if the requests are scheduled in the same order)
for (const auto& device : _autoSContext->_devicePrioritiesInitial) {
auto& dev_requests = _workerRequests[device.deviceName];
if ((num - sum) < dev_requests.size()) {
request_to_share_blobs_with = dev_requests.at(num - sum)._inferRequest;
break;
}
sum += dev_requests.size();
}
if (!request_to_share_blobs_with) {
IE_THROW() <<
"binder mode does not allow oversubsciption of infer requests"
" please use optimal infer request";
}
auto syncImpl = std::make_shared<MultiDeviceInferRequest>(networkInputs, networkOutputs, request_to_share_blobs_with);
return syncImpl;
}
} // namespace MultiDevicePlugin

View File

@ -1,28 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "auto_schedule.hpp"
#ifdef MULTIUNITTEST
#define MOCKTESTMACRO virtual
#define MultiDevicePlugin MockMultiDevicePlugin
#else
#define MOCKTESTMACRO
#endif
namespace MultiDevicePlugin {
class BinderMultiSchedule : public AutoSchedule {
public:
using Ptr = std::shared_ptr<BinderMultiSchedule>;
IInferPtr CreateInferRequestImpl(IE::InputsDataMap networkInputs, IE::OutputsDataMap networkOutputs) override;
IE::IInferRequestInternal::Ptr CreateInferRequestImpl(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs) override;
void init(const ScheduleContext::Ptr& sContext) override;
Pipeline GetPipeline(const IInferPtr& syncRequestImpl, WorkerInferRequest** WorkerInferRequest) override;
virtual ~BinderMultiSchedule();
};
} // namespace MultiDevicePlugin

View File

@ -10,14 +10,13 @@
#include "ie_icore.hpp"
#include "ie_metric_helpers.hpp"
#include <ie_plugin_config.hpp>
#include "cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp"
#include "threading/ie_executor_manager.hpp"
#include "threading/ie_immediate_executor.hpp"
#include "threading/ie_istreams_executor.hpp"
#include "threading/ie_itask_executor.hpp"
#include "threading/ie_thread_safe_containers.hpp"
#include "openvino/runtime/icompiled_model.hpp"
#include "openvino/runtime/isync_infer_request.hpp"
#include "openvino/runtime/iasync_infer_request.hpp"
#include "openvino/runtime/threading/itask_executor.hpp"
#include "openvino/runtime/remote_tensor.hpp"
#include "openvino/runtime/threading/thread_safe_containers.hpp"
#include "utils/log_util.hpp"
#include <ie_performance_hints.hpp>
#include "openvino/runtime/auto/properties.hpp"
#include "ngraph/opsets/opset1.hpp"
#include "transformations/utils/utils.hpp"
@ -26,191 +25,226 @@
#ifdef MULTIUNITTEST
#define MOCKTESTMACRO virtual
#define MultiDevicePlugin MockMultiDevicePlugin
#define auto_plugin mock_auto_plugin
#else
#define MOCKTESTMACRO
#endif
namespace MultiDevicePlugin {
namespace IE = InferenceEngine;
namespace ov {
namespace auto_plugin {
using DeviceName = std::string;
using IInferPtr = IE::IInferRequestInternal::Ptr;
using IExecNetwork = IE::IExecutableNetworkInternal;
using SoInfer = IE::SoIInferRequestInternal;
using SoExecNetwork = IE::SoExecutableNetworkInternal;
using IASyncInferPtr = std::shared_ptr<ov::IAsyncInferRequest>;
using ISyncInferPtr = std::shared_ptr<ov::ISyncInferRequest>;
using SoAsyncInferRequest = ov::SoPtr<ov::IAsyncInferRequest>;
using SoCompiledModel = ov::SoPtr<ov::ICompiledModel>;
using Time = std::chrono::time_point<std::chrono::steady_clock>;
using Stage = std::pair<std::shared_ptr<ov::threading::ITaskExecutor>, ov::threading::Task>;
using Pipeline = std::vector<Stage>;
template<typename T>
using DeviceMap = std::unordered_map<DeviceName, T>;
struct MultiImmediateExecutor : public IE::ITaskExecutor {
// Bell to do, check if needed, or just use immediate exectutor is enough
struct AutoImmediateExecutor : public ov::threading::ITaskExecutor {
public:
/**
* @brief A shared pointer to a ImmediateExecutor object
*/
using Ptr = std::shared_ptr<MultiImmediateExecutor>;
using Ptr = std::shared_ptr<AutoImmediateExecutor>;
/**
* @brief Destroys the object.
*/
~MultiImmediateExecutor() override = default;
~AutoImmediateExecutor() override = default;
void run(IE::Task task) override {
_task = std::move(task);
_task();
void run(ov::threading::Task task) override {
immediate_task = std::move(task);
immediate_task();
}
IE::Task _task;
};
struct DeviceInformation {
DeviceName deviceName;
std::map<std::string, std::string> config;
int numRequestsPerDevices;
std::string defaultDeviceID;
DeviceName uniqueName;
unsigned int devicePriority;
DeviceInformation(DeviceName dn = {}, std::map<std::string, std::string> conf = {},
int nReq = -1, std::string defaultID = {}, DeviceName uName = {}, unsigned int priority = 0)
: deviceName(dn), config(conf),
numRequestsPerDevices(nReq), defaultDeviceID(defaultID), uniqueName(uName), devicePriority(priority)
{}
ov::threading::Task immediate_task;
};
struct WorkerInferRequest {
SoInfer _inferRequest;
IE::Task _task;
std::exception_ptr _exceptionPtr = nullptr;
std::list<Time> _startTimes;
std::list<Time> _endTimes;
int _index = 0;
MultiImmediateExecutor::Ptr _fallbackExec;
SoAsyncInferRequest m_inferrequest;
ov::threading::Task m_task;
std::exception_ptr m_exception_ptr = nullptr;
std::list<Time> m_start_times;
std::list<Time> m_end_times;
int m_index = 0;
AutoImmediateExecutor::Ptr m_fallback_exec;
};
struct ThisRequestExecutor : public ov::threading::ITaskExecutor {
explicit ThisRequestExecutor(WorkerInferRequest** ptr, AutoImmediateExecutor::Ptr executor = nullptr): m_workptrptr{ptr}, m_fallback_exec(executor) {}
void run(ov::threading::Task task) override {
(*m_workptrptr)->m_task = std::move(task);
(*m_workptrptr)->m_fallback_exec = m_fallback_exec;
(*m_workptrptr)->m_inferrequest->start_async();
};
WorkerInferRequest** m_workptrptr = nullptr;
AutoImmediateExecutor::Ptr m_fallback_exec;
};
struct DeviceInformation {
DeviceName device_name;
ov::AnyMap config;
int num_requests_per_devices;
std::string default_device_id;
DeviceName unique_name;
unsigned int device_priority;
DeviceInformation(DeviceName dn = {}, ov::AnyMap conf = {},
int nReq = -1, std::string defaultID = {}, DeviceName uName = {}, unsigned int priority = 0)
: device_name(dn), config(conf),
num_requests_per_devices(nReq), default_device_id(defaultID), unique_name(uName), device_priority(priority)
{}
};
struct deviceChecker {
template <typename T,
typename std::enable_if<std::is_same<typename std::decay<T>::type, std::string>::value, bool>::type = true,
typename U = typename std::vector<T>::const_iterator>
U checkAndReturnIfDeviceInList(const std::string& target, const std::vector<T>& deviceList, bool exactMatch = false) {
if (exactMatch) {
return std::find_if(deviceList.begin(), deviceList.end(),
U check_and_return_if_device_in_list(const std::string& target, const std::vector<T>& device_list, bool exact_match = false) {
if (exact_match) {
return std::find_if(device_list.begin(), device_list.end(),
[&target](const T& d) { return d == target; });
}
return std::find_if(deviceList.begin(), deviceList.end(),
return std::find_if(device_list.begin(), device_list.end(),
[&target](const T & d) {
return d.find(target) != std::string::npos;
});
}
template <typename T,
typename std::enable_if<std::is_same<typename std::decay<T>::type, std::string>::value, bool>::type = true>
bool checkIfDeviceInList(const std::string& target, const std::vector<T>& deviceList, bool exactMatch = false) {
if (exactMatch) {
return std::find_if(deviceList.begin(), deviceList.end(),
[&target](const T& d) { return d == target; }) != deviceList.cend();
bool check_if_device_in_list(const std::string& target, const std::vector<T>& device_list, bool exact_match = false) {
if (exact_match) {
return std::find_if(device_list.begin(), device_list.end(),
[&target](const T& d) { return d == target; }) != device_list.cend();
}
return std::find_if(deviceList.begin(), deviceList.end(),
return std::find_if(device_list.begin(), device_list.end(),
[&target](const T& d) {
return d.find(target) != std::string::npos;
}) != deviceList.end();
}) != device_list.end();
}
template <typename T,
typename std::enable_if<std::is_same<typename std::decay<T>::type, DeviceInformation>::value, bool>::type = true,
typename U = typename std::vector<T>::const_iterator>
U checkAndReturnIfDeviceInList(const std::string& target, const std::vector<T>& deviceList, bool exactMatch = false) {
if (exactMatch) {
return std::find_if(deviceList.begin(), deviceList.end(),
[&target](const T& d) { return d.deviceName == target; });
U check_and_return_if_device_in_list(const std::string& target, const std::vector<T>& device_list, bool exact_match = false) {
if (exact_match) {
return std::find_if(device_list.begin(), device_list.end(),
[&target](const T& d) { return d.device_name == target; });
}
return std::find_if(deviceList.begin(), deviceList.end(),
return std::find_if(device_list.begin(), device_list.end(),
[&target](const T& d) {
return d.deviceName.find(target) != std::string::npos;
return d.device_name.find(target) != std::string::npos;
});
}
template <typename T,
typename std::enable_if<std::is_same<typename std::decay<T>::type, DeviceInformation>::value, bool>::type = true>
bool checkIfDeviceInList(const std::string& target, const std::vector<T>& deviceList, bool exactMatch = false) {
if (exactMatch) {
return std::find_if(deviceList.begin(), deviceList.end(),
[&target](const T& d) { return d.deviceName == target; }) != deviceList.end();
bool check_if_device_in_list(const std::string& target, const std::vector<T>& device_list, bool exact_match = false) {
if (exact_match) {
return std::find_if(device_list.begin(), device_list.end(),
[&target](const T& d) { return d.device_name == target; }) != device_list.end();
}
return std::find_if(deviceList.begin(), deviceList.end(),
return std::find_if(device_list.begin(), device_list.end(),
[&target](const T& d) {
return d.deviceName.find(target) != std::string::npos;
}) != deviceList.end();
return d.device_name.find(target) != std::string::npos;
}) != device_list.end();
}
};
using NotBusyPriorityWorkerRequests = IE::ThreadSafeBoundedPriorityQueue<std::pair<int, WorkerInferRequest*>>;
using NotBusyWorkerRequests = IE::ThreadSafeBoundedQueue<WorkerInferRequest*>;
using NotBusyPriorityWorkerRequests = ov::threading::ThreadSafeBoundedPriorityQueue<std::pair<int, WorkerInferRequest*>>;
using NotBusyWorkerRequests = ov::threading::ThreadSafeBoundedQueue<WorkerInferRequest*>;
using TaskQueue = ov::threading::ThreadSafeQueue<ov::threading::Task>;
template <typename T>
struct IdleGuard {};
template<>
struct IdleGuard<NotBusyWorkerRequests> {
explicit IdleGuard(WorkerInferRequest* workerInferRequestPtr, NotBusyWorkerRequests& notBusyWorkerRequests) :
_workerInferRequestPtr{workerInferRequestPtr},
_notBusyWorkerRequests{&notBusyWorkerRequests} {
explicit IdleGuard(WorkerInferRequest* worker_inferrequest_ptr, NotBusyWorkerRequests& not_busy_worker_requests) :
m_worker_inferrequest_ptr{worker_inferrequest_ptr},
m_not_busy_worker_requests{&not_busy_worker_requests} {
}
~IdleGuard() {
if (nullptr != _notBusyWorkerRequests) {
_notBusyWorkerRequests->try_push(_workerInferRequestPtr);
if (nullptr != m_not_busy_worker_requests) {
m_not_busy_worker_requests->try_push(m_worker_inferrequest_ptr);
}
}
NotBusyWorkerRequests* Release() {
auto notBusyWorkerRequests = _notBusyWorkerRequests;
_notBusyWorkerRequests = nullptr;
return notBusyWorkerRequests;
NotBusyWorkerRequests* release() {
auto not_busy_worker_requests = m_not_busy_worker_requests;
m_not_busy_worker_requests = nullptr;
return not_busy_worker_requests;
}
WorkerInferRequest* _workerInferRequestPtr = nullptr;
NotBusyWorkerRequests* _notBusyWorkerRequests = nullptr;
WorkerInferRequest* m_worker_inferrequest_ptr = nullptr;
NotBusyWorkerRequests* m_not_busy_worker_requests = nullptr;
};
template<>
struct IdleGuard<NotBusyPriorityWorkerRequests> {
explicit IdleGuard(WorkerInferRequest* workerInferRequestPtr, NotBusyPriorityWorkerRequests& notBusyWorkerRequests) :
_workerInferRequestPtr{workerInferRequestPtr},
_notBusyWorkerRequests{&notBusyWorkerRequests} {
explicit IdleGuard(WorkerInferRequest* worker_inferrequest_ptr, NotBusyPriorityWorkerRequests& not_busy_worker_requests) :
m_worker_inferrequest_ptr{worker_inferrequest_ptr},
m_not_busy_worker_requests{&not_busy_worker_requests} {
}
~IdleGuard() {
if (nullptr != _notBusyWorkerRequests) {
_notBusyWorkerRequests->try_push(std::make_pair(_workerInferRequestPtr->_index, _workerInferRequestPtr));
if (nullptr != m_not_busy_worker_requests) {
m_not_busy_worker_requests->try_push(std::make_pair(m_worker_inferrequest_ptr->m_index, m_worker_inferrequest_ptr));
}
}
NotBusyPriorityWorkerRequests* Release() {
auto notBusyWorkerRequests = _notBusyWorkerRequests;
_notBusyWorkerRequests = nullptr;
return notBusyWorkerRequests;
NotBusyPriorityWorkerRequests* release() {
auto not_busy_worker_requests_queue = m_not_busy_worker_requests;
m_not_busy_worker_requests = nullptr;
return not_busy_worker_requests_queue;
}
WorkerInferRequest* _workerInferRequestPtr = nullptr;
NotBusyPriorityWorkerRequests* _notBusyWorkerRequests = nullptr;
WorkerInferRequest* m_worker_inferrequest_ptr = nullptr;
NotBusyPriorityWorkerRequests* m_not_busy_worker_requests = nullptr;
};
class Plugin;
class ScheduleContext : public std::enable_shared_from_this<ScheduleContext> {
public:
using Ptr = std::shared_ptr<ScheduleContext>;
std::shared_ptr<IE::ICore> _core;
std::weak_ptr<IExecNetwork> _executableNetwork;
std::string _LogTag;
std::shared_ptr<ov::ICore> m_ov_core;
std::weak_ptr<ov::ICompiledModel> m_compiled_model;
std::string m_log_tag;
std::vector<DeviceInformation> m_device_priorities;
std::vector<DeviceInformation> m_device_priorities_initial;
bool m_need_perf_counters;
bool m_batching_disabled = false;
bool m_startup_fallback = true;
bool m_runtime_fallback = true;
bool m_bind_buffer = false;
std::shared_ptr<ov::Model> m_model;
std::string m_model_path;
std::shared_ptr<const ov::IPlugin> m_plugin;
std::string m_str_devices;
unsigned int m_model_priority = 0;
ov::Any m_performance_hint;
std::mutex m_mutex;
std::mutex m_fallback_mutex;
SoCompiledModel m_hw_compiled_model;
std::string m_model_precision;
virtual ~ScheduleContext() = default;
};
class MultiDeviceInferencePlugin;
class AutoScheduleContext : public ScheduleContext {
public:
using Ptr = std::shared_ptr<AutoScheduleContext>;
std::vector<DeviceInformation> _devicePriorities;
std::vector<DeviceInformation> _devicePrioritiesInitial;
std::unordered_map<std::string, IE::Parameter> _config;
bool _needPerfCounters;
bool _batchingDisabled = {false};
bool _startupfallback = true;
bool _runtimeFallback = true;
std::string _modelPath;
IE::CNNNetwork _network;
std::string _strDevices;
unsigned int _modelPriority = 0;
std::string _performanceHint;
std::mutex _confMutex;
std::mutex _fallbackMutex;
MultiDeviceInferencePlugin* _plugin;
SoExecNetwork _hwExecutableNetwork;
virtual ~AutoScheduleContext() = default;
struct AutoCompileContext {
std::atomic<bool> m_is_enabled = {false};
std::atomic<bool> m_is_already = {false};
std::atomic<bool> m_is_load_success = {false};
std::atomic<bool> m_is_reload_success = {false};
std::future<void> m_future;
std::promise<void> m_promise;
SoCompiledModel m_compiled_model;
DeviceInformation m_device_info;
std::vector<DeviceInformation> m_meta_devices;
std::string m_model_precision;
std::string m_err_message;
ov::threading::Task m_task;
std::string m_worker_name = "";
};
} // namespace MultiDevicePlugin
enum AutoCompileContextIndex {
CPU = 0,
ACTUALDEVICE = 1,
FALLBACKDEVICE = 2,
CONTEXTNUM = 3
};
} // namespace auto_plugin
} // namespace ov

View File

@ -0,0 +1,87 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "compiled_model.hpp"
#include "common.hpp"
#include <memory>
#include "async_infer_request.hpp"
#include "itt.hpp"
#include "openvino/op/util/op_types.hpp"
#include "openvino/runtime/exec_model_info.hpp"
#include "openvino/runtime/properties.hpp"
#include "plugin.hpp"
#include "transformations/rt_info/fused_names_attribute.hpp"
#include "transformations/utils/utils.hpp"
ov::auto_plugin::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
const std::shared_ptr<const ov::IPlugin>& plugin,
ScheduleContext::Ptr context,
Schedule::Ptr scheduler)
: ov::ICompiledModel(model, plugin),
m_context(context),
m_scheduler(scheduler) {
scheduler->launch(context);
m_inputs_outputs_from_hardware = (model == nullptr);
}
const std::vector<ov::Output<const ov::Node>>& ov::auto_plugin::CompiledModel::outputs() const {
if (m_inputs_outputs_from_hardware && m_context->m_hw_compiled_model)
return m_context->m_hw_compiled_model->outputs();
return ov::ICompiledModel::outputs();
}
const std::vector<ov::Output<const ov::Node>>& ov::auto_plugin::CompiledModel::inputs() const {
if (m_inputs_outputs_from_hardware && m_context->m_hw_compiled_model)
return m_context->m_hw_compiled_model->inputs();
return ov::ICompiledModel::inputs();
}
ov::auto_plugin::ISyncInferPtr ov::auto_plugin::CompiledModel::create_sync_infer_request() const {
return m_scheduler->create_sync_infer_request();
}
ov::auto_plugin::IASyncInferPtr ov::auto_plugin::CompiledModel::create_infer_request() const {
const_cast<CompiledModel*>(this)->set_compile_model_for_context();
auto internal_request = create_sync_infer_request();
auto async_infer_request = std::make_shared<AsyncInferRequest>(
m_scheduler,
std::static_pointer_cast<InferRequest>(internal_request),
get_callback_executor());
return async_infer_request;
}
std::string ov::auto_plugin::CompiledModel::get_log_tag() const noexcept {
return m_context->m_log_tag;
}
ov::AnyMap ov::auto_plugin::CompiledModel::get_device_supported_properties(AutoCompileContext& context) {
ov::AnyMap all_devices;
ov::AnyMap device_properties = {};
OPENVINO_ASSERT(context.m_compiled_model);
auto device_supported_properties = context.m_compiled_model->get_property(ov::supported_properties.name());
for (auto&& property_name : device_supported_properties.as<std::vector<ov::PropertyName>>()) {
// for lto issue, explictly do the conversion here
std::string query_name = property_name;
device_properties[property_name] = context.m_compiled_model->get_property(query_name);
}
all_devices[context.m_device_info.device_name] = device_properties;
return all_devices;
}
void ov::auto_plugin::CompiledModel::set_compile_model_for_context() {
std::call_once(m_oc, [this]() {
m_context->m_compiled_model = shared_from_this();
});
}
std::shared_ptr<const ov::auto_plugin::Plugin> ov::auto_plugin::CompiledModel::get_auto_plugin() {
auto plugin = get_plugin();
OPENVINO_ASSERT(plugin);
auto auto_plugin = std::static_pointer_cast<const ov::auto_plugin::Plugin>(plugin);
OPENVINO_ASSERT(auto_plugin);
return auto_plugin;
}

View File

@ -0,0 +1,37 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "schedule.hpp"
namespace ov {
namespace auto_plugin {
class Schedule;
class CompiledModel : public ov::ICompiledModel {
public:
CompiledModel(const std::shared_ptr<ov::Model>& model,
const std::shared_ptr<const ov::IPlugin>& plugin,
ScheduleContext::Ptr context,
Schedule::Ptr scheduler);
std::shared_ptr<ov::IAsyncInferRequest> create_infer_request() const override;
std::shared_ptr<const Plugin> get_auto_plugin();
const std::vector<ov::Output<const ov::Node>>& outputs() const override;
const std::vector<ov::Output<const ov::Node>>& inputs() const override;
protected:
std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
std::string get_log_tag() const noexcept;
static ov::AnyMap get_device_supported_properties(AutoCompileContext& context);
private:
ScheduleContext::Ptr m_context;
Schedule::Ptr m_scheduler;
std::once_flag m_oc;
bool m_inputs_outputs_from_hardware;
void set_compile_model_for_context();
};
} // namespace auto_plugin
} // namespace ov

View File

@ -0,0 +1,157 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "cumulative_compiled_model.hpp"
#include "common.hpp"
#include <memory>
#include "async_infer_request.hpp"
#include "itt.hpp"
#include "openvino/runtime/exec_model_info.hpp"
#include "openvino/runtime/properties.hpp"
#include "plugin.hpp"
#include "ie_plugin_config.hpp"
namespace ov {
namespace auto_plugin {
AutoCumuCompiledModel::AutoCumuCompiledModel(const std::shared_ptr<ov::Model>& model,
const std::shared_ptr<const ov::IPlugin>& plugin,
ScheduleContext::Ptr context,
Schedule::Ptr scheduler)
: CompiledModel(model, plugin, context, scheduler),
m_model(model),
m_context(context) {
m_scheduler = std::dynamic_pointer_cast<CumuSchedule>(scheduler);
}
void AutoCumuCompiledModel::set_property(const ov::AnyMap& properties) {
OPENVINO_NOT_IMPLEMENTED;
}
std::shared_ptr<const ov::Model> AutoCumuCompiledModel::get_runtime_model() const {
if (m_context->m_hw_compiled_model)
return m_context->m_hw_compiled_model->get_runtime_model();
OPENVINO_NOT_IMPLEMENTED;
}
ov::Any AutoCumuCompiledModel::get_property(const std::string& name) const {
const auto& add_ro_properties = [](const std::string& name, std::vector<ov::PropertyName>& properties) {
properties.emplace_back(ov::PropertyName{name, ov::PropertyMutability::RO});
};
const auto& default_ro_properties = []() {
std::vector<ov::PropertyName> ro_properties{ov::model_name,
ov::supported_properties,
ov::execution_devices,
ov::hint::performance_mode,
ov::optimal_number_of_infer_requests,
ov::device::properties,
ov::hint::model_priority,
ov::loaded_from_cache};
return ro_properties;
};
const auto& default_rw_properties = []() {
std::vector<ov::PropertyName> rw_properties{ov::device::priorities};
return rw_properties;
};
const auto& to_string_vector = [](const std::vector<ov::PropertyName>& properties) {
std::vector<std::string> ret;
for (const auto& property : properties) {
ret.emplace_back(property);
}
return ret;
};
if (name == ov::supported_properties) {
auto ro_properties = default_ro_properties();
auto rw_properties = default_rw_properties();
std::vector<ov::PropertyName> supported_properties;
supported_properties.reserve(ro_properties.size() + rw_properties.size());
supported_properties.insert(supported_properties.end(), ro_properties.begin(), ro_properties.end());
supported_properties.insert(supported_properties.end(), rw_properties.begin(), rw_properties.end());
return decltype(ov::supported_properties)::value_type(supported_properties);
} else if (name == ov::hint::performance_mode) {
return m_context->m_performance_hint;
} else if (name == ov::device::priorities) {
// device priority does not support change on-the-fly
return decltype(ov::device::priorities)::value_type(m_context->m_str_devices);
} else if (name == ov::device::properties) {
ov::AnyMap all_devices = {};
for (size_t i = 0; i < m_scheduler->m_n_ctput_devicenums; i++) {
if (m_scheduler->m_p_ctput_loadcontext[i].m_is_already) {
auto temp = get_device_supported_properties(m_scheduler->m_p_ctput_loadcontext[i]);
all_devices.insert(temp.begin(), temp.end());
}
}
return all_devices;
} else if (name == ov::hint::model_priority) {
auto value = m_context->m_model_priority;
if (m_context->m_ov_core->is_new_api()) {
return value ? ((value > 1) ? ov::hint::Priority::LOW :
ov::hint::Priority::MEDIUM) : ov::hint::Priority::HIGH;
} else {
OPENVINO_SUPPRESS_DEPRECATED_START
return value ? ((value > 1) ? CONFIG_VALUE(MODEL_PRIORITY_LOW) : CONFIG_VALUE(
MODEL_PRIORITY_MED)) : CONFIG_VALUE(MODEL_PRIORITY_HIGH);
OPENVINO_SUPPRESS_DEPRECATED_END
}
} else if (name == ov::optimal_number_of_infer_requests) {
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
unsigned int res = 0u;
for (size_t i = 0; i < m_scheduler->m_n_ctput_devicenums; i++) {
try {
if (m_scheduler->m_p_ctput_loadcontext[i].m_is_already) {
res += (m_scheduler->m_p_ctput_loadcontext[i])
.m_compiled_model->get_property(ov::optimal_number_of_infer_requests.name())
.as<unsigned int>();
}
} catch (const ov::Exception& err) {
OPENVINO_THROW("Every device used in cumulative mode should support OPTIMAL_NUMBER_OF_INFER_REQUESTS property from compiled model",
"Failed to query the property with error:", err.what());
}
}
return decltype(ov::optimal_number_of_infer_requests)::value_type {res};
} else if (name == ov::execution_devices) {
std::vector<std::string> exeDevices = {};
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
for (auto const & n : m_context->m_device_priorities) {
exeDevices.push_back(n.device_name);
}
return decltype(ov::execution_devices)::value_type {exeDevices};
} else if (name == ov::model_name) {
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
for (size_t i = 0; i < m_scheduler->m_n_ctput_devicenums; i++) {
if (m_scheduler->m_p_ctput_loadcontext[i].m_is_already) {
return m_scheduler->m_p_ctput_loadcontext[i].m_compiled_model->get_property(name);
}
}
OPENVINO_THROW("No valid compiled model found to get", name);
OPENVINO_SUPPRESS_DEPRECATED_START
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
auto ro_properties = default_ro_properties();
add_ro_properties(METRIC_KEY(SUPPORTED_METRICS), ro_properties);
add_ro_properties(METRIC_KEY(SUPPORTED_CONFIG_KEYS), ro_properties);
return to_string_vector(ro_properties);
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
auto rw_properties = default_rw_properties();
return to_string_vector(rw_properties);
OPENVINO_SUPPRESS_DEPRECATED_END
} else if (name == ov::loaded_from_cache) {
bool loaded_from_cache = true;
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
for (size_t i = 0; i < m_scheduler->m_n_ctput_devicenums; i++) {
if (m_scheduler->m_p_ctput_loadcontext[i].m_is_already) {
loaded_from_cache &= (m_scheduler->m_p_ctput_loadcontext[i].m_compiled_model->get_property(name).as<bool>());
}
}
return loaded_from_cache;
}
OPENVINO_THROW(get_log_tag(), ": not supported property ", name);;
}
void AutoCumuCompiledModel::export_model(std::ostream& model_stream) const {
OPENVINO_NOT_IMPLEMENTED;
}
} // namespace auto_plugin
} // namespace ov

View File

@ -0,0 +1,38 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "compiled_model.hpp"
#include "cumulative_schedule.hpp"
namespace ov {
namespace auto_plugin {
class AutoCumuCompiledModel : public CompiledModel {
public:
AutoCumuCompiledModel(const std::shared_ptr<ov::Model>& model,
const std::shared_ptr<const ov::IPlugin>& plugin,
ScheduleContext::Ptr context,
Schedule::Ptr scheduler);
// implement pure virtual methods from a base class ov::ICompiledModel
void export_model(std::ostream& model) const override;
std::shared_ptr<const ov::Model> get_runtime_model() const override;
void set_property(const ov::AnyMap& properties) override;
ov::Any get_property(const std::string& name) const override;
private:
friend class InferRequest;
friend class Plugin;
std::shared_ptr<ov::Model> m_model;
ScheduleContext::Ptr m_context;
CumuSchedule::Ptr m_scheduler;
};
} // namespace auto_plugin
} // namespace ov

View File

@ -0,0 +1,254 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "cumulative_schedule.hpp"
#include "async_infer_request.hpp"
#include "plugin.hpp"
// ------------------------------CumuSchedule----------------------------
namespace ov {
namespace auto_plugin {
bool CumuSchedule::select_other_device(const std::string& cur_dev_name) {
{
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
auto remove_inferfail_device = [&](const std::string& device_name) {
if (m_context->m_device_priorities.size() > 1) {
const auto current_device_iter =
deviceChecker().check_and_return_if_device_in_list<DeviceInformation>(device_name, m_context->m_device_priorities);
if (current_device_iter != m_context->m_device_priorities.end()) {
m_context->m_device_priorities.erase(current_device_iter);
return true;
}
}
return false;
};
if (m_p_ctput_loadcontext) {
return remove_inferfail_device(cur_dev_name);
}
return false;
}
}
void CumuSchedule::init() {
if (m_context->m_bind_buffer) {
// disable run time fallback , as not applicable in bind mode
m_context->m_runtime_fallback = false;
LOG_INFO_TAG("disable runtime fallback in bind mode");
}
std::string profilingTask = "CumuSchedule::CumuSchedule:compile_model";
const auto& valid_devices = m_context->m_device_priorities;
{
// Total number of devices in CTPUT
m_n_ctput_devicenums = valid_devices.size();
// Generate contexts for loading each device
m_p_ctput_loadcontext.reset(new AutoCompileContext[m_n_ctput_devicenums]);
int idx = 0;
DeviceInformation cpu_device_information;
for (auto& device : valid_devices) {
if (device.device_name.find("CPU") == std::string::npos) {
m_p_ctput_loadcontext[idx].m_device_info = device;
m_p_ctput_loadcontext[idx].m_device_info.config[ov::hint::performance_mode.name()] = ov::hint::PerformanceMode::THROUGHPUT;
idx++;
} else {
cpu_device_information = device;
cpu_device_information.config.insert(
{ov::affinity.name(), ov::Any(ov::Affinity::CORE).as<std::string>()});
}
}
if (!cpu_device_information.device_name.empty()) {
m_p_ctput_loadcontext[idx].m_device_info = cpu_device_information;
m_p_ctput_loadcontext[idx].m_device_info.config[ov::hint::performance_mode.name()] = ov::hint::PerformanceMode::THROUGHPUT;
}
}
if (m_context->m_log_tag == "MULTI") {
// MULTI's performance hint always is tput
m_context->m_performance_hint = ov::hint::PerformanceMode::THROUGHPUT;
}
auto load_device_task = [&](AutoCompileContext* context_ptr,
const std::shared_ptr<ov::Model>& model) {
try_to_compile_model(*context_ptr, model);
if (context_ptr->m_is_load_success) {
if (context_ptr->m_worker_name.empty()) {
context_ptr->m_worker_name = context_ptr->m_device_info.device_name;
}
generate_workers(context_ptr->m_worker_name, context_ptr->m_compiled_model);
context_ptr->m_is_already = true;
// reloadsuccess flag only for m_compile_context[FALLBACKDEVICE]
context_ptr->m_is_reload_success = true;
auto& device_name = context_ptr->m_device_info.device_name;
LOG_INFO_TAG("device:%s compiling model finished", device_name.c_str());
DEBUG_RUN([this, &context_ptr, &device_name] {
auto supported_config_keys = context_ptr->m_compiled_model->get_property(ov::supported_properties.name()).as<std::vector<ov::PropertyName>>();
std::lock_guard<std::mutex> lock(m_context->m_mutex);
for (const auto& cfg : supported_config_keys) {
try {
LOG_DEBUG_TAG("device:%s, GetConfig:%s=%s",
device_name.c_str(),
cfg.c_str(),
context_ptr->m_compiled_model->get_property(cfg).as<std::string>().c_str());
} catch (const ov::Exception&) {
}
}
});
}
// Handle device load failure in case of ctput
if (!context_ptr->m_is_load_success) {
std::string failedDeviceName = context_ptr->m_device_info.device_name;
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
const auto DeviceIter = deviceChecker().check_and_return_if_device_in_list(failedDeviceName, m_context->m_device_priorities);
// Remove failed device from m_device_priorities
if (DeviceIter != m_context->m_device_priorities.end()) {
m_context->m_device_priorities.erase(DeviceIter);
}
}
};
m_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(
ov::threading::IStreamsExecutor::Config{"CTPUTDeviceAsyncLoad",
static_cast<int>(std::thread::hardware_concurrency()) /* max possible #streams*/,
0 /*default threads per stream, workaround for ticket 62376*/,
ov::threading::IStreamsExecutor::ThreadBindingType::NONE});
std::vector<ov::threading::Task> other_devices_loads;
std::vector<ov::threading::Task> cpu_loads;
for (size_t i = 0; i < m_n_ctput_devicenums; i++) {
auto* context_ptr = &m_p_ctput_loadcontext[i];
auto model = m_context->m_model;
m_p_ctput_loadcontext[i].m_task = std::bind(load_device_task, context_ptr, model);
if (i == m_n_ctput_devicenums - 1 &&
m_p_ctput_loadcontext[i].m_device_info.device_name.find("CPU") != std::string::npos) {
cpu_loads.push_back(m_p_ctput_loadcontext[i].m_task);
} else {
other_devices_loads.push_back(m_p_ctput_loadcontext[i].m_task);
}
}
OV_ITT_SCOPED_TASK(itt::domains::AutoPlugin, openvino::itt::handle(profilingTask));
for (auto&& device : m_context->m_device_priorities) {
// initialize containers before run async task, if not initialized, it will hang during infer
m_idle_worker_requests[device.device_name];
m_worker_requests[device.device_name];
m_infer_pipeline_tasks_device_specific[device.device_name] = nullptr;
}
// load devices other than CPU first
if (other_devices_loads.size() > 0) {
// Wait for the devices other than CPU to compile the model
m_executor->run_and_wait(other_devices_loads);
}
// Finally load the CPU
if (cpu_loads.size() > 0) {
// Wait for CPU to compile the model
m_executor->run_and_wait(cpu_loads);
}
if (m_n_ctput_devicenums == 1 && m_p_ctput_loadcontext[0].m_is_already) {
m_passthrough_compiled_model = m_p_ctput_loadcontext[0].m_compiled_model;
m_context->m_hw_compiled_model = m_passthrough_compiled_model;
}
m_context->m_hw_compiled_model = wait_first_compiled_model_ready();
}
void CumuSchedule::try_to_compile_model(AutoCompileContext& context, const std::shared_ptr<ov::Model>& model) {
auto& device = context.m_device_info.device_name;
auto& device_config = context.m_device_info.config;
bool cur_dev_is_gpu = (device.find("GPU") != std::string::npos);
{
std::lock_guard<std::mutex> lock(m_context->m_mutex);
if (cur_dev_is_gpu) {
// user does not set the compiling threads
// limit the threads num for compiling
int max_threads = 0;
try {
max_threads = m_context->m_ov_core->get_property(device, ov::compilation_num_threads);
} catch (const ov::Exception&) {
LOG_DEBUG_TAG("cannot get MAX_NUM_THREADS from GPU");
}
if (max_threads == static_cast<int>(std::thread::hardware_concurrency())) {
int thread_num = max_threads / 2;
device_config.insert(ov::compilation_num_threads(thread_num));
LOG_DEBUG_TAG("gpu streams number for compiling: %d", thread_num);
} else {
// user set the compiling threads num
// use the user's val anyway
LOG_DEBUG_TAG("user defined compiling threads: %d", max_threads);
}
}
}
try {
if (!(m_context->m_model_path.empty())) {
context.m_compiled_model = m_context->m_ov_core->compile_model(m_context->m_model_path, device, device_config);
} else {
context.m_compiled_model = m_context->m_ov_core->compile_model(model, device, device_config);
}
context.m_is_load_success = true;
} catch (const ov::Exception& e) {
context.m_err_message += device + ":" + e.what();
context.m_is_load_success = false;
} catch (const std::exception& e) {
context.m_err_message += device + ":" + e.what();
context.m_is_load_success = false;
}
}
SoCompiledModel CumuSchedule::wait_first_compiled_model_ready() {
std::ostringstream result;
result << "compile model failed, ";
for (size_t i = 0; i < m_n_ctput_devicenums; i++) {
// check if device loaded successfully
if (m_p_ctput_loadcontext[i].m_is_already) {
return m_p_ctput_loadcontext[i].m_compiled_model;
} else {
result << m_p_ctput_loadcontext[i].m_err_message.c_str();
result << "; ";
}
}
OPENVINO_THROW("[", get_log_tag(), "] ", result.str());
}
bool CumuSchedule::schedule_to_worker_infer_request(ov::threading::Task pipeline_task, DeviceName preferred_device) {
std::vector<DeviceInformation> devices;
// AUTO work mode
// Devices that fail infer will be removed from the priority list in the callback, need lock here
std::unique_lock<std::mutex> lock(m_context->m_fallback_mutex);
if (!preferred_device.empty()) {
devices = m_context->m_device_priorities;
if (!deviceChecker().check_if_device_in_list<DeviceInformation>(preferred_device, devices)) {
lock.unlock();
OPENVINO_THROW("The preferred device should be the selected device");
}
} else {
devices = m_context->m_device_priorities;
}
lock.unlock();
for (auto&& device : devices) {
if (!preferred_device.empty() && (device.device_name != preferred_device)) {
continue;
}
if (run_pipeline_task(pipeline_task, m_idle_worker_requests[device.device_name], preferred_device)) {
return true;
}
}
// no vacant requests this time, storing the task to the respective queue
if (!preferred_device.empty()) {
m_infer_pipeline_tasks_device_specific[preferred_device]->push(std::move(pipeline_task));
} else {
m_infer_pipeline_tasks.push(std::move(pipeline_task));
}
return false;
}
CumuSchedule::~CumuSchedule() {
if (m_context) {
std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
m_context->m_device_priorities.clear();
}
/* NOTE: The only threads that use `MultiSchedule` worker infer requests' threads.
* But AsyncInferRequest destructor should wait for all asynchronous tasks by the request
*/
for (auto&& idleWorker : m_idle_worker_requests) {
// stop accepting any idle requests back (for re-scheduling)
idleWorker.second.set_capacity(0);
}
}
} // namespace auto_plugin
} // namespace ov

View File

@ -0,0 +1,29 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "schedule.hpp"
#include "async_infer_request.hpp"
namespace ov {
namespace auto_plugin {
class CumuSchedule : public Schedule {
public:
using Ptr = std::shared_ptr<CumuSchedule>;
virtual ~CumuSchedule();
std::unique_ptr<AutoCompileContext[]> m_p_ctput_loadcontext = nullptr;
size_t m_n_ctput_devicenums = 0;
private:
void init() override;
SoCompiledModel wait_first_compiled_model_ready() override;
bool schedule_to_worker_infer_request(ov::threading::Task, DeviceName preferred_device = "") override;
void try_to_compile_model(AutoCompileContext& context, const std::shared_ptr<ov::Model>& model) override;
bool select_other_device(const std::string& cur_dev_name) override;
};
} // namespace auto_plugin
} // namespace ov

View File

@ -1,44 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "executable_network.hpp"
// ------------------------------ExecutableNetwork----------------------------
namespace MultiDevicePlugin {
using namespace InferenceEngine;
ExecutableNetwork::ExecutableNetwork(const Schedule::Ptr& schedule,
const ScheduleContext::Ptr& sContext):
_schedule(schedule),
_sContext(sContext) {
_schedule->init(_sContext);
}
ExecutableNetwork::~ExecutableNetwork() {
}
IInferRequestInternal::Ptr ExecutableNetwork::CreateInferRequest() {
SetExeNetworkForContext();
return _schedule->CreateInferRequest();
}
void ExecutableNetwork::SetExeNetworkForContext() {
// Maybe different API will call this function, so add call once here
// for every AutoSchedule instance
std::call_once(_oc, [this]() {
_sContext->_executableNetwork = shared_from_this();
});
}
std::string ExecutableNetwork::GetLogTag() const noexcept {
return _sContext->_LogTag;
}
std::shared_ptr<ngraph::Function> ExecutableNetwork::GetExecGraphInfo() {
// TODO: When there are more then 2 executable networks, such as MULTI:GPU,CPU
auto autoSContext = std::dynamic_pointer_cast<AutoScheduleContext>(_sContext);
return autoSContext->_hwExecutableNetwork->GetExecGraphInfo();
}
} // namespace MultiDevicePlugin

View File

@ -1,34 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
#include "schedule.hpp"
#ifdef MULTIUNITTEST
#define MOCKTESTMACRO virtual
#define MultiDevicePlugin MockMultiDevicePlugin
#else
#define MOCKTESTMACRO
#endif
namespace MultiDevicePlugin {
class ExecutableNetwork : public
InferenceEngine::ExecutableNetworkThreadSafeDefault {
public:
using Ptr = std::shared_ptr<ExecutableNetwork>;
ExecutableNetwork(const Schedule::Ptr& schedule, const ScheduleContext::Ptr& sContext);
IInferPtr CreateInferRequest() override;
std::shared_ptr<ngraph::Function> GetExecGraphInfo() override;
~ExecutableNetwork() override;
protected:
std::string GetLogTag() const noexcept;
private:
Schedule::Ptr _schedule;
ScheduleContext::Ptr _sContext;
std::once_flag _oc;
void SetExeNetworkForContext();
};
} // namespace MultiDevicePlugin

View File

@ -2,138 +2,117 @@
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "infer_request.hpp"
#include <ngraph/node.hpp>
#include <transformations/utils/utils.hpp>
#include <ie_input_info.hpp>
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
#include <blob_factory.hpp>
#include <debug.h>
namespace MultiDevicePlugin {
#include <algorithm>
#include <map>
#include <memory>
#include <string>
#include <utility>
using namespace InferenceEngine;
#include "itt.hpp"
#include "openvino/core/except.hpp"
#include "openvino/runtime/profiling_info.hpp"
#include "openvino/runtime/tensor.hpp"
#include "plugin.hpp"
// ------------------------------MultiDeviceInferRequest----------------------------
MultiDeviceInferRequest::MultiDeviceInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
InferenceEngine::RemoteContext::Ptr ctx)
: IInferRequestInternal(inputs, outputs),
_sharedRequest(request_to_share_blobs_with) {
for (const std::shared_ptr<const ov::Node>& in : inputs) {
modelInputsMap[ov::op::util::get_ie_output_name(ngraph::Output<const ngraph::Node>(in))] = in;
}
for (const std::shared_ptr<const ov::Node>& out : outputs) {
modelOutputsMap[ov::op::util::get_ie_output_name(out->input_value(0))] = out;
}
CreateInferRequest(request_to_share_blobs_with, ctx);
}
using Time = std::chrono::high_resolution_clock;
MultiDeviceInferRequest::MultiDeviceInferRequest(const InputsDataMap& networkInputs,
const OutputsDataMap& networkOutputs,
const SoIInferRequestInternal & request_to_share_blobs_with,
InferenceEngine::RemoteContext::Ptr ctx)
: IInferRequestInternal(networkInputs, networkOutputs),
_sharedRequest(request_to_share_blobs_with) {
CreateInferRequest(request_to_share_blobs_with, ctx);
}
namespace {
void MultiDeviceInferRequest::CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with,
InferenceEngine::RemoteContext::Ptr ctx) {
if (request_to_share_blobs_with) {
// do not need to touch multi memory blobs
return;
}
// Allocate all input blobs
for (const auto &it : _networkInputs) {
auto l = it.second->getLayout();
auto p = it.second->getPrecision();
auto dims = it.second->getTensorDesc().getDims();
TensorDesc desc = TensorDesc(p, dims, l);
if (ctx) {
_inputs[it.first] = ctx->CreateHostBlob(desc);
void allocate_tensor_impl(ov::Tensor& tensor, const ov::element::Type& element_type, const ov::Shape& shape) {
if (!tensor || tensor.get_element_type() != element_type) {
tensor = ov::Tensor(element_type, shape);
} else {
_inputs[it.first] = make_blob_with_precision(desc);
}
_inputs[it.first]->allocate();
}
// Allocate all output blobs
for (const auto &it : _networkOutputs) {
auto l = it.second->getLayout();
auto p = it.second->getPrecision();
auto dims = it.second->getTensorDesc().getDims();
// for 1.0 API, dims is not dynamic anyway
if (InferenceEngine::details::product(dims) == 0 && !modelOutputsMap.empty()) {
// replace the dims with one from dynamic shape
const auto outputNodeItr = modelOutputsMap.find(it.first);
if (outputNodeItr != modelOutputsMap.end()) {
const auto shape = outputNodeItr->second->get_input_partial_shape(0);
// update dims
dims = shape.get_max_shape();
}
}
TensorDesc desc = TensorDesc(p, dims, l);
if (ctx) {
_outputs[it.first] = ctx->CreateHostBlob(desc);
} else {
_outputs[it.first] = make_blob_with_precision(desc);
}
_outputs[it.first]->allocate();
tensor.set_shape(shape);
}
}
void MultiDeviceInferRequest::SetBlobsToAnotherRequest(const SoIInferRequestInternal& req) {
for (const auto &it : _networkInputs) {
auto &name = it.first;
} // namespace
ov::auto_plugin::InferRequest::InferRequest(const std::shared_ptr<const ov::auto_plugin::CompiledModel>& model,
const SoAsyncInferRequest& request_to_share_tensors_with)
: ov::ISyncInferRequest(model),
m_shared_request(request_to_share_tensors_with) {
if (!m_shared_request) {
// Allocate input/output tensors
for (const auto& input : get_inputs()) {
allocate_tensor(input, [input](ov::Tensor& tensor) {
// Can add a check to avoid double work in case of shared tensors
allocate_tensor_impl(tensor,
input.get_element_type(),
input.get_partial_shape().is_dynamic() ? ov::Shape{0} : input.get_shape());
});
}
for (const auto& output : get_outputs()) {
allocate_tensor(output, [output](ov::Tensor& tensor) {
// Can add a check to avoid double work in case of shared tensors
allocate_tensor_impl(tensor,
output.get_element_type(),
output.get_partial_shape().is_dynamic() ? ov::Shape{0} : output.get_shape());
});
}
} else {
for (const auto& input : get_inputs()) {
ov::ISyncInferRequest::set_tensor(input, ov::Tensor(m_shared_request->get_tensor(input), m_shared_request._so));
}
for (const auto& output : get_outputs()) {
ov::ISyncInferRequest::set_tensor(output, ov::Tensor(m_shared_request->get_tensor(output), m_shared_request._so));
}
}
}
const ov::auto_plugin::SoAsyncInferRequest& ov::auto_plugin::InferRequest::get_shared_request() {
return m_shared_request;
}
void ov::auto_plugin::InferRequest::set_scheduled_request(SoAsyncInferRequest request) {
m_scheduled_request = request;
}
void ov::auto_plugin::InferRequest::set_tensors_to_another_request(const SoAsyncInferRequest& req) {
for (const auto &it : get_inputs()) {
// this request is already in BUSY state, so using the internal functions safely
auto blob = GetBlob(name);
if (req->GetBlob(name) != blob)
req->SetBlob(name, blob);
auto tensor = get_tensor(it);
auto type = tensor.get_element_type();
bool is_remote = tensor.is<ov::RemoteTensor>() || req->get_tensor(it).is<ov::RemoteTensor>();
if (is_remote || req->get_tensor(it).data(type) != tensor.data(type))
req->set_tensor(it, tensor);
}
for (const auto &it : _networkOutputs) {
auto &name = it.first;
for (const auto &it : get_outputs()) {
// this request is already in BUSY state, so using the internal functions safely
auto blob = GetBlob(name);
if (req->GetBlob(name) != blob)
req->SetBlob(name, blob);
auto tensor = get_tensor(it);
auto type = tensor.get_element_type();
bool is_remote = tensor.is<ov::RemoteTensor>() || req->get_tensor(it).is<ov::RemoteTensor>();
if (is_remote || req->get_tensor(it).data(type) != tensor.data(type))
req->set_tensor(it, tensor);
}
}
void MultiDeviceInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& blob) {
if (_sharedRequest)
_sharedRequest->SetBlob(name, blob);
else
IInferRequestInternal::SetBlob(name, blob);
void ov::auto_plugin::InferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) {
if (m_shared_request)
m_shared_request->set_tensor(port, tensor);
ov::ISyncInferRequest::set_tensor(port, tensor);
}
InferenceEngine::Blob::Ptr MultiDeviceInferRequest::GetBlob(const std::string& name) {
if (_sharedRequest)
return _sharedRequest->GetBlob(name);
else
return IInferRequestInternal::GetBlob(name);
void ov::auto_plugin::InferRequest::infer() {
OPENVINO_NOT_IMPLEMENTED;
}
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> MultiDeviceInferRequest::GetPerformanceCounts() const {
if (_sharedRequest) {
return _sharedRequest->GetPerformanceCounts();
} else {
// get the profiling info directly from target infer request
// not thread-safe for plugin like GPU, see CVS-86034
if (_scheduledRequest)
return _scheduledRequest->GetPerformanceCounts();
else
IE_THROW() << "Performance counters were not enabled";
}
std::vector<ov::ProfilingInfo> ov::auto_plugin::InferRequest::get_profiling_info() const {
if (m_shared_request)
return m_shared_request->get_profiling_info();
if (m_scheduled_request)
return m_shared_request->get_profiling_info();
OPENVINO_NOT_IMPLEMENTED;
}
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> MultiDeviceInferRequest::QueryState() {
if (_sharedRequest)
return _sharedRequest->QueryState();
IE_THROW(NotImplemented);
}
ov::auto_plugin::InferRequest::~InferRequest() = default;
} // namespace MultiDevicePlugin
std::vector<std::shared_ptr<ov::IVariableState>> ov::auto_plugin::InferRequest::query_state() const {
if (m_shared_request)
return m_shared_request->query_state();
OPENVINO_NOT_IMPLEMENTED;
}

View File

@ -14,44 +14,30 @@
#include <utility>
#include <memory>
#include <string>
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
#include "ie_remote_context.hpp"
#include "plugin.hpp"
#ifdef MULTIUNITTEST
#define MOCKTESTMACRO virtual
#define MultiDevicePlugin MockMultiDevicePlugin
#else
#define MOCKTESTMACRO
#endif
namespace MultiDevicePlugin {
class MultiDeviceInferRequest : public InferenceEngine::IInferRequestInternal {
namespace ov {
namespace auto_plugin {
class CompiledModel;
class InferRequest : public ov::ISyncInferRequest {
public:
using Ptr = std::shared_ptr<MultiDeviceInferRequest>;
explicit MultiDeviceInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
const InferenceEngine::OutputsDataMap& networkOutputs,
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
InferenceEngine::RemoteContext::Ptr ctx = nullptr);
explicit MultiDeviceInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
InferenceEngine::RemoteContext::Ptr ctx = nullptr);
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& blob) override;
InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> QueryState() override;
// Multi-Device impl specific: sets the data (blobs from the device-less requests to the specific device request)
void SetBlobsToAnotherRequest(const InferenceEngine::SoIInferRequestInternal& req);
InferenceEngine::SoIInferRequestInternal& GetSharedRequest() { return _sharedRequest; }
InferenceEngine::SoIInferRequestInternal _scheduledRequest;
explicit InferRequest(const std::shared_ptr<const ov::auto_plugin::CompiledModel>& compiled_model,
const SoAsyncInferRequest& request_to_share_tensors_with);
~InferRequest();
void infer() override;
std::vector<std::shared_ptr<ov::IVariableState>> query_state() const override;
std::vector<ov::ProfilingInfo> get_profiling_info() const override;
const SoAsyncInferRequest& get_shared_request();
void set_scheduled_request(SoAsyncInferRequest request);
// Auto-Device impl specific: sets the data (tensors from the device-less requests to the specific device request)
void set_tensors_to_another_request(const SoAsyncInferRequest& req);
void set_tensor(const ov::Output<const ov::Node>& port, const ov::Tensor& tensor) override;
private:
void CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with,
InferenceEngine::RemoteContext::Ptr ctx);
InferenceEngine::SoIInferRequestInternal _sharedRequest;
std::unordered_map<std::string, std::shared_ptr<const ov::Node>> modelInputsMap;
std::unordered_map<std::string, std::shared_ptr<const ov::Node>> modelOutputsMap;
SoAsyncInferRequest m_shared_request;
SoAsyncInferRequest m_scheduled_request;
};
} // namespace MultiDevicePlugin
} // namespace auto_plugin
} // namespace ov

View File

@ -4,17 +4,18 @@
/**
* @brief Defines openvino domains for tracing
* @file multi_itt.h
* @file itt.h
*/
#pragma once
#include <openvino/itt.hpp>
namespace MultiDevicePlugin {
namespace ov {
namespace auto_plugin {
namespace itt {
namespace domains {
OV_ITT_DOMAIN(MULTIPlugin);
}
}
OV_ITT_DOMAIN(AutoPlugin);
}
} // namespace itt
} // namespace auto_plugin
} // namespace ov

File diff suppressed because it is too large Load Diff

View File

@ -10,70 +10,81 @@
#include <string>
#include <list>
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
#include "openvino/runtime/iplugin.hpp"
#include "utils/log_util.hpp"
#include "common.hpp"
#include "plugin_config.hpp"
#include "compiled_model.hpp"
#ifdef MULTIUNITTEST
#define MOCKTESTMACRO virtual
#define MultiDevicePlugin MockMultiDevicePlugin
#else
#define MOCKTESTMACRO
#endif
namespace ov {
namespace auto_plugin {
namespace MultiDevicePlugin {
class MultiDeviceInferencePlugin : public InferenceEngine::IInferencePlugin {
class Plugin : public ov::IPlugin {
public:
MultiDeviceInferencePlugin();
~MultiDeviceInferencePlugin() = default;
Plugin();
~Plugin() = default;
InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::CNNNetwork& network,
const std::map<std::string, std::string>& config) override;
void set_property(const ov::AnyMap& properties) override;
ov::SoPtr<InferenceEngine::IExecutableNetworkInternal> LoadNetwork(const std::string& modelPath,
const std::map<std::string, std::string>& config) override;
ov::Any get_property(const std::string& name, const ov::AnyMap& arguments) const override;
void SetConfig(const std::map<std::string, std::string>& config) override;
InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter> & options) const override;
InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network,
const std::map<std::string, std::string>& config) const override;
InferenceEngine::Parameter GetMetric(const std::string& name,
const std::map<std::string, InferenceEngine::Parameter>& options) const override;
ov::SupportedOpsMap query_model(const std::shared_ptr<const ov::Model>& model,
const ov::AnyMap& properties) const override;
MOCKTESTMACRO std::vector<MultiDevicePlugin::DeviceInformation> ParseMetaDevices(const std::string & devicesRequestsCfg,
const std::map<std::string, std::string> & config) const;
std::shared_ptr<ov::ICompiledModel> compile_model(const std::shared_ptr<const ov::Model>& model,
const ov::AnyMap& properties) const override;
MOCKTESTMACRO std::string GetDeviceList(const std::map<std::string, std::string>& config) const;
std::shared_ptr<ov::ICompiledModel> compile_model(const std::shared_ptr<const ov::Model>& model,
const ov::AnyMap& properties,
const ov::RemoteContext& context) const override;
MOCKTESTMACRO std::list<DeviceInformation> GetValidDevice(const std::vector<DeviceInformation>& metaDevices,
const std::string& networkPrecision = METRIC_VALUE(FP32));
std::shared_ptr<ov::ICompiledModel> compile_model(const std::string& model_path,
const ov::AnyMap& properties) const override;
MOCKTESTMACRO DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices,
const std::string& networkPrecision = METRIC_VALUE(FP32),
MOCKTESTMACRO std::vector<auto_plugin::DeviceInformation> parse_meta_devices(const std::string & devices_requests_cfg,
const ov::AnyMap& properties) const;
MOCKTESTMACRO std::string get_device_list(const ov::AnyMap& properties) const;
MOCKTESTMACRO std::list<DeviceInformation> get_valid_device(const std::vector<DeviceInformation>& meta_devices,
const std::string& model_precision = "FP32") const;
MOCKTESTMACRO DeviceInformation select_device(const std::vector<DeviceInformation>& meta_devices,
const std::string& model_precision = "FP32",
unsigned int priority = 0);
void UnregisterPriority(const unsigned int& priority, const std::string& deviceName);
void RegisterPriority(const unsigned int& priority, const std::string& deviceName);
void unregister_priority(const unsigned int& priority, const std::string& device_name);
void register_priority(const unsigned int& priority, const std::string& device_name);
std::shared_ptr<ov::IRemoteContext> create_context(const ov::AnyMap& remote_properties) const override;
std::shared_ptr<ov::IRemoteContext> get_default_context(const ov::AnyMap& remote_properties) const override;
std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
const ov::AnyMap& properties) const override;
std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
const ov::RemoteContext& context,
const ov::AnyMap& properties) const override;
protected:
ov::AnyMap PreProcessConfig(const std::map<std::string, std::string>& orig_config) const;
ov::AnyMap pre_process_config(const ov::AnyMap& orig_config) const;
private:
InferenceEngine::IExecutableNetworkInternal::Ptr LoadNetworkImpl(const std::string& modelPath,
InferenceEngine::CNNNetwork network,
const std::map<std::string, std::string>& config,
const std::string &networkPrecision = METRIC_VALUE(FP32));
std::vector<DeviceInformation> FilterDevice(const std::vector<DeviceInformation>& metaDevices,
const std::map<std::string, std::string>& config);
std::vector<DeviceInformation> FilterDeviceByNetwork(const std::vector<DeviceInformation>& metaDevices,
InferenceEngine::CNNNetwork network);
std::string GetLogTag() const noexcept;
static std::mutex _mtx;
static std::map<unsigned int, std::list<std::string>> _priorityMap;
std::string _LogTag;
PluginConfig _pluginConfig;
std::shared_ptr<ov::ICompiledModel> compile_model_impl(const std::string& model_path,
const std::shared_ptr<const ov::Model>& model,
const ov::AnyMap& properties,
const std::string& model_precision = "FP32") const;
std::vector<DeviceInformation> filter_device(const std::vector<DeviceInformation>& meta_devices,
const ov::AnyMap& properties) const;
std::vector<DeviceInformation> filter_device_by_model(const std::vector<DeviceInformation>& meta_devices,
const std::shared_ptr<const ov::Model>& model) const;
std::string get_log_tag() const noexcept;
static std::mutex m_mtx;
static std::map<unsigned int, std::list<std::string>> m_priority_map;
PluginConfig m_plugin_config;
mutable SoCompiledModel m_hw_compiledmodel;
};
} // namespace MultiDevicePlugin
} // namespace auto_plugin
} // namespace ov

View File

@ -3,11 +3,12 @@
//
#include "plugin_config.hpp"
namespace MultiDevicePlugin {
namespace ov {
namespace auto_plugin {
// AUTO will enable the blocklist if
// 1.No device priority passed to AUTO/MULTI.(eg. core.compile_model(model, "AUTO", configs);)
// 2.No valid device parsed out from device priority (eg. core.compile_model(model, "AUTO:-CPU,-GPU", configs);).
const std::set<std::string> PluginConfig::_deviceBlocklist = {"VPUX", "GNA", "notIntelGPU"};
const std::set<std::string> PluginConfig::device_block_list = {"VPUX", "GNA", "notIntelGPU"};
PluginConfig::PluginConfig() {
set_default();
@ -52,9 +53,8 @@ void PluginConfig::set_property(const ov::AnyMap& properties) {
// when user call set_property to set some config to plugin, we also respect this and pass through the config in this case
user_properties[name] = val;
if (kv.first == ov::log::level.name()) {
auto log_level = kv.second.as<std::string>();
if (!setLogLevel(log_level)) {
IE_THROW() << "Unsupported log level: " << log_level;
if (!set_log_level(kv.second)) {
OPENVINO_THROW("Unsupported log level: ", kv.second.as<std::string>());
}
}
} else {
@ -108,9 +108,8 @@ void PluginConfig::apply_user_properties() {
for (auto& kv : user_properties) {
full_properties[kv.first] = kv.second;
if (kv.first == ov::log::level.name()) {
auto log_level = kv.second.as<std::string>();
if (!setLogLevel(log_level)) {
IE_THROW() << "Unsupported log level: " << log_level;
if (!set_log_level(kv.second)) {
OPENVINO_THROW("Unsupported log level: ", kv.second.as<std::string>());
}
}
}
@ -120,4 +119,5 @@ ov::AnyMap PluginConfig::get_full_properties() {
return full_properties;
}
} // namespace MultiDevicePlugin
} // namespace auto_plugin
} // namespace ov

Some files were not shown because too many files have changed in this diff Show More