From 263e51a1be82d9244abac42e47656ff9938f0dda Mon Sep 17 00:00:00 2001 From: Anton Voronov Date: Wed, 31 May 2023 10:37:32 +0400 Subject: [PATCH] [DOC][CPU] Documentation update (#17784) --- docs/OV_Runtime_UG/precision_control.md | 14 +++++++++ docs/OV_Runtime_UG/supported_plugins/CPU.md | 30 +++++++++++++------ .../supported_plugins/Supported_Devices.md | 5 ++-- docs/snippets/cpu/ov_execution_mode.cpp | 17 +++++++++++ docs/snippets/cpu/ov_execution_mode.py | 12 ++++++++ 5 files changed, 67 insertions(+), 11 deletions(-) create mode 100644 docs/snippets/cpu/ov_execution_mode.cpp create mode 100644 docs/snippets/cpu/ov_execution_mode.py diff --git a/docs/OV_Runtime_UG/precision_control.md b/docs/OV_Runtime_UG/precision_control.md index 8fc09a7c856..79b36eaba33 100644 --- a/docs/OV_Runtime_UG/precision_control.md +++ b/docs/OV_Runtime_UG/precision_control.md @@ -26,6 +26,20 @@ Execution Mode If the model has been quantized using :doc:`OpenVINO optimization tools ` or any other method, the quantized operators will be executed with the target integer precision if the device has hardware acceleration for that type. For example, quantized ``int8`` primitives are executed with ``int8`` precision for both **ACCURACY** and **PERFORMANCE modes** if the device provides higher compute bandwidth for 8-bit data types compared to any available floating-point type. On the other hand, devices without hardware acceleration for the ``int8`` data type can keep such operators in floating point precision, and the exact floating point type will be affected by ``execution_mode`` and ``inference_precision`` properties. +Code examples: + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/cpu/ov_execution_mode.cpp + :language: cpp + :fragment: [ov:execution_mode:part0] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/cpu/ov_execution_mode.py + :language: python + :fragment: [ov:execution_mode:part0] + Inference Precision ################### diff --git a/docs/OV_Runtime_UG/supported_plugins/CPU.md b/docs/OV_Runtime_UG/supported_plugins/CPU.md index 45f4428ee79..34eb92261b9 100644 --- a/docs/OV_Runtime_UG/supported_plugins/CPU.md +++ b/docs/OV_Runtime_UG/supported_plugins/CPU.md @@ -87,13 +87,15 @@ CPU plugin supports the following floating-point data types as inference precisi The default floating-point precision of a CPU primitive is ``f32``. To support the ``f16`` OpenVINO IR the plugin internally converts all the ``f16`` values to ``f32`` and all the calculations are performed using the native precision of ``f32``. -On platforms that natively support ``bfloat16`` calculations (have the ``AVX512_BF16`` extension), the ``bf16`` type is automatically used instead -of ``f32`` to achieve better performance. Thus, no special steps are required to run a ``bf16`` model. For more details about the ``bfloat16`` format, see +On platforms that natively support ``bfloat16`` calculations (have the ``AVX512_BF16`` or ``AMX`` extension), the ``bf16`` type is automatically used instead +of ``f32`` to achieve better performance (see the `Execution Mode Hint <#execution-mode-hint>`__). +Thus, no special steps are required to run a ``bf16`` model. For more details about the ``bfloat16`` format, see the `BFLOAT16 – Hardware Numerics Definition white paper `__. Using the ``bf16`` precision provides the following performance benefits: -- Faster multiplication of two ``bfloat16`` numbers because of shorter mantissa of the ``bfloat16`` data. +- ``bfloat16`` data type allows using Intel® Advanced Matrix Extension (AMX), which provides dramatically faster computations on corresponding hardware in +comparison with AVX512 or AVX2 instructions in many DL operation implementations. - Reduced memory consumption since ``bfloat16`` data half the size of 32-bit float. To check if the CPU device can support the ``bfloat16`` data type, use the :doc:`query device properties interface ` @@ -117,6 +119,9 @@ to query ``ov::device::capabilities`` property, which should contain ``BF16`` in :fragment: [part0] +Inference Precision Hint +----------------------------------------------------------- + If the model has been converted to ``bf16``, the ``ov::hint::inference_precision`` is set to ``ov::element::bf16`` and can be checked via the ``ov::CompiledModel::get_property`` call. The code below demonstrates how to get the element type: @@ -156,7 +161,18 @@ To enable the simulation, the ``ov::hint::inference_precision`` has to be explic Due to the reduced mantissa size of the ``bfloat16`` data type, the resulting ``bf16`` inference accuracy may differ from the ``f32`` inference, especially for models that were not trained using the ``bfloat16`` data type. If the ``bf16`` inference accuracy is not acceptable, - it is recommended to switch to the ``f32`` precision. + it is recommended to switch to the ``f32`` precision. Also, the performance/accuracy balance can be managed using the ``ov::hint::execution_mode`` hint, + see the `Execution Mode Hint <#execution-mode-hint>`__. + +Execution Mode Hint +----------------------------------------------------------- +In case ``ov::hint::inference_precision`` is not explicitly set, one can use ``ov::hint::execution_mode`` hint to direct the run-time optimizations toward either better accuracy or better performance. +If ``ov::hint::execution_mode`` is set to ``ov::hint::ExecutionMode::PERFORMANCE`` (default behavior) and the platform natively supports ``bfloat16`` +calculations (has the ``AVX512_BF16`` or ``AMX`` extension) then ``bf16`` type is automatically used instead of ``f32`` to achieve better performance. +If the accuracy in this mode is not good enough, then set ``ov::hint::execution_mode`` to ``ov::hint::ExecutionMode::ACCURACY`` to enforce the plugin to +use the ``f32`` precision in floating point calculations. + +For more details and code examples, see the :doc:`Precision Control `. Supported Features ########################################################### @@ -285,11 +301,6 @@ That means that :doc:`OpenVINO™ Extensibility Mechanism ` for details). -.. note:: - - At the moment, custom operations with internal dynamism (when the output tensor shape can only be determined - as a result of performing the operation) are not supported by the plugin. - Stateful Models +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -310,6 +321,7 @@ All parameters must be set before calling ``ov::Core::compile_model()`` in order - ``ov::enable_profiling`` - ``ov::hint::inference_precision`` - ``ov::hint::performance_mode`` +- ``ov::hint::execution_mode`` - ``ov::hint::num_request`` - ``ov::num_streams`` - ``ov::affinity`` diff --git a/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md b/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md index 30319533bb0..fdad1107f17 100644 --- a/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md +++ b/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md @@ -19,8 +19,9 @@ The OpenVINO Runtime provides unique capabilities to infer deep learning models || :doc:`GPU ` | Intel® Processor Graphics, including Intel® HD Graphics and Intel® Iris® Graphics | +--------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+ || :doc:`CPU (x86) ` | Intel® Xeon® with Intel® Advanced Vector Extensions 2 (Intel® AVX2), Intel® Advanced Vector | -|| | Extensions 512 (Intel® AVX-512), and AVX512_BF16, Intel® Core™ Processors with Intel® | -|| | AVX2, Intel® Atom® Processors with Intel® Streaming SIMD Extensions (Intel® SSE) | +|| | Extensions 512 (Intel® AVX-512), Intel® Advanced Matrix Extensions (Intel® AMX), | +|| | Intel® Core™ Processors with Intel® AVX2, | +|| | Intel® Atom® Processors with Intel® Streaming SIMD Extensions (Intel® SSE) | +--------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------+ || :doc:`CPU (Arm®) ` | Raspberry Pi™ 4 Model B, Apple® Mac with Apple silicon | || | | diff --git a/docs/snippets/cpu/ov_execution_mode.cpp b/docs/snippets/cpu/ov_execution_mode.cpp new file mode 100644 index 00000000000..877c90b70d1 --- /dev/null +++ b/docs/snippets/cpu/ov_execution_mode.cpp @@ -0,0 +1,17 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +int main() { + //! [ov:execution_mode:part0] + ov::Core core; + // in case of Accuracy + core.set_property("CPU", ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)); + // in case of Performance + core.set_property("CPU", ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE)); + //! [ov:execution_mode:part0] + + return 0; +} diff --git a/docs/snippets/cpu/ov_execution_mode.py b/docs/snippets/cpu/ov_execution_mode.py new file mode 100644 index 00000000000..4abd0ccf375 --- /dev/null +++ b/docs/snippets/cpu/ov_execution_mode.py @@ -0,0 +1,12 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.runtime import Core + +#! [ov:execution_mode:part0] +core = Core() +# in case of Accuracy +core.set_property("CPU", {"EXECUTION_MODE_HINT": "ACCURACY"}) +# in case of Performance +core.set_property("CPU", {"EXECUTION_MODE_HINT": "PERFORMANCE"}) +#! [ov:execution_mode:part0]