Merge remote-tracking branch 'upstream/master' into mbecer/PythonSegFaultCppFix

2022-03-17 11:19:08 +01:00 · 2022-03-17 11:19:08 +01:00 · 658748f83d
commit 658748f83d
parent 1a62074a75 857a1ad2af
1467 changed files with 52458 additions and 22436 deletions
--- a/.ci/azure/windows.yml
+++ b/.ci/azure/windows.yml
@ -30,7 +30,7 @@ jobs:
    maxParallel: 2

  # About 150% of total time
-  timeoutInMinutes: 150
+  timeoutInMinutes: 180

  pool:
    name: WIN_VMSS_VENV_D8S_WU2
@ -133,7 +133,7 @@ jobs:

  - script: |
      set PATH=$(WORK_DIR)\ninja-win;%PATH%
-      call "$(MSVS_VARS_PATH)" && $(CMAKE_CMD) -G "Ninja Multi-Config" -DENABLE_WHEEL=ON -DENABLE_ONEDNN_FOR_GPU=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_GAPI_PREPROCESSING=$(CMAKE_BUILD_SHARED_LIBS) -DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_REQUIREMENTS_INSTALL=OFF -DENABLE_FASTER_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.7.6\x64\python.exe" -DPYTHON_INCLUDE_DIR="C:\hostedtoolcache\windows\Python\3.7.6\x64\include" -DPYTHON_LIBRARY="C:\hostedtoolcache\windows\Python\3.7.6\x64\libs\python37.lib" -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
+      call "$(MSVS_VARS_PATH)" && $(CMAKE_CMD) -G "Ninja Multi-Config" -DENABLE_WHEEL=ON -DENABLE_ONEDNN_FOR_GPU=$(CMAKE_BUILD_SHARED_LIBS) -DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_REQUIREMENTS_INSTALL=OFF -DENABLE_FASTER_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.7.6\x64\python.exe" -DPYTHON_INCLUDE_DIR="C:\hostedtoolcache\windows\Python\3.7.6\x64\include" -DPYTHON_LIBRARY="C:\hostedtoolcache\windows\Python\3.7.6\x64\libs\python37.lib" -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
    workingDirectory: $(BUILD_DIR)
    displayName: 'CMake'

@ -167,13 +167,6 @@ jobs:
    workingDirectory: $(BUILD_SAMPLES_TESTS_DIR)
    displayName: 'Install Samples Tests'

-  - script: $(CMAKE_CMD) -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake && xcopy $(REPO_DIR)\temp\opencv_4.5.2\opencv\* $(INSTALL_DIR)\opencv\ /e /h /y
-    workingDirectory: $(BUILD_DIR)
-    displayName: 'Install tests'
-
-  - script: dir $(INSTALL_DIR) /s
-    displayName: 'List install files'
-
  - script: $(INSTALL_DIR)\samples\cpp\build_samples_msvc.bat -i $(INSTALL_DIR)
    workingDirectory: $(BUILD_SAMPLES_DIR)
    displayName: 'Build cpp samples'
@ -198,9 +191,15 @@ jobs:
      python -m pytest $(INSTALL_DIR)\tests\smoke_tests\  --env_conf $(INSTALL_DIR)\tests\smoke_tests\env_config.yml -s --junitxml=TEST-SamplesSmokeTests.xml
    workingDirectory: $(INSTALL_DIR)
    displayName: 'Samples Smoke Tests'
-    condition: eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'ON')
    continueOnError: false

+  - script: $(CMAKE_CMD) -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake && xcopy $(REPO_DIR)\temp\opencv_4.5.2\opencv\* $(INSTALL_DIR)\opencv\ /e /h /y
+    workingDirectory: $(BUILD_DIR)
+    displayName: 'Install tests'
+
+  - script: dir $(INSTALL_DIR) /s
+    displayName: 'List install files'
+
  - script: rd /Q /S $(BUILD_DIR)
    displayName: 'Clean build dir'
    continueOnError: false
--- a/.github/github_org_control/config.json
+++ b/.github/github_org_control/config.json
@ -5,11 +5,10 @@
    "IGNORE_LOGINS": [
        "openvino-ci",
        "openvino-pushbot",
-        "lab-nerval",
-        "lab-nerval-onnx-ci",
-        "onnx-watchdog-agent",
        "workbench-ci-bot",
-        "openvino-pot-ci"
+        "openvino-pot-ci",
+        "sysicvvpux",
+        "ote-ci-bot"
    ],
    "MAX_MEMBERS_TO_REMOVE": 15,
    "EMAILS_FILE_PATH": "dev_emails-test.txt",
@ -28,7 +27,7 @@
        "openvino-ie-gna-maintainers": "category: GNA",
        "openvino-ie-gpu-maintainers": "category: GPU",
        "openvino-ie-lpt-maintainers": "category: LP transformations",
-        "openvino-ie-multi-maintainers": "category: MULTI",
+        "openvino-ie-auto-multi-maintainers": "category: MULTI",
        "openvino-ie-python-api-maintainers": "category: python api",
        "openvino-ie-template-maintainers": "category: TEMPLATE",
        "openvino-ie-tests-maintainers": "category: IE Tests",
--- a/.github/github_org_control/github_api.py
+++ b/.github/github_org_control/github_api.py
@ -157,7 +157,7 @@ class GithubOrgApi:
                self.github_users_by_email[email] = org_member
                if not is_valid_name(org_member.name):
                    self.members_to_fix_name.add(org_member)
-            elif not is_user_ignored(org_member):
+            else:
                self.members_to_remove.add(org_member)

        print("\nOrg members - no Intel emails:")
--- a/13
+++ b/13
@ -39,14 +39,19 @@ Jenkinsfile  @openvinotoolkit/openvino-admins

 # IE CPU:
 /src/plugins/intel_cpu/  @openvinotoolkit/openvino-ie-cpu-maintainers @openvinotoolkit/openvino-ie-cpu-developers
-/src/common/low_precision_transformations/  @openvinotoolkit/openvino-ie-cpu-maintainers @openvinotoolkit/openvino-ie-cpu-developers
 /src/plugins/intel_cpu/thirdparty/mkl-dnn/  @openvinotoolkit/openvino-ie-cpu-maintainers @openvinotoolkit/openvino-ie-cpu-developers

+#IE LPT
+/src/common/low_precision_transformations/  @openvinotoolkit/openvino-ie-lpt-maintainers
+
 # IE GPU:
 /src/inference/include/ie/gpu/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
 /src/inference/include/ie/cldnn/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
 /src/inference/include/openvino/runtime/intel_gpu/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
 /src/plugins/intel_gpu/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
+/docs/snippets/gpu/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
+/docs/OV_Runtime_UG/supported_plugins/GPU.md  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
+/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers

 # IE VPU:
 /src/plugins/intel_myriad  @openvinotoolkit/openvino-ie-vpu-maintainers
@ -82,6 +87,6 @@ Jenkinsfile  @openvinotoolkit/openvino-admins
 *.md  @openvinotoolkit/openvino-docs-maintainers

 # Control 3d party dependencies
-**/*requirements*.*  @openvino-configuration-mgmt
-**/setup.py  @openvino-configuration-mgmt
-/scripts/install_dependencies/  @openvino-configuration-mgmt
+**/*requirements*.*  @openvinotoolkit/openvino-configuration-mgmt
+**/setup.py  @openvinotoolkit/openvino-configuration-mgmt
+/scripts/install_dependencies/  @openvinotoolkit/openvino-configuration-mgmt
--- a/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake
+++ b/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake
@ -107,8 +107,10 @@ function(ov_ncc_naming_style)

    list(APPEND NCC_STYLE_ADDITIONAL_INCLUDE_DIRECTORIES "${NCC_STYLE_SOURCE_DIRECTORY}")

+    # without it sources with same name from different directories will map to same .ncc_style target
+    file(RELATIVE_PATH source_dir_rel ${CMAKE_SOURCE_DIR} ${NCC_STYLE_SOURCE_DIRECTORY})
    foreach(source IN LISTS sources)
-        set(output_file "${ncc_style_bin_dir}/${source}.ncc_style")
+        set(output_file "${ncc_style_bin_dir}/${source_dir_rel}/${source}.ncc_style")
        set(full_source_path "${NCC_STYLE_SOURCE_DIRECTORY}/${source}")

        add_custom_command(
--- a/docs/Extensibility_UG/GPU_Extensibility.md
+++ b/docs/Extensibility_UG/GPU_Extensibility.md
@ -0,0 +1,229 @@
+# How to Implement Custom GPU Operations {#openvino_docs_Extensibility_UG_GPU}
+
+To enable operations not supported by OpenVINO out of the box, you may need an extension for OpenVINO operation set, and a custom kernel for the device you will target. This page describes custom kernel support for the GPU device.
+
+The GPU codepath abstracts many details about OpenCL\*. You need to provide the kernel code in OpenCL C and an XML configuration file that connects the kernel and its parameters to the parameters of the operation.
+
+There are two options for using the custom operation configuration file:
+
+* Include a section with your kernels into the automatically-loaded `<lib_path>/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml` file.
+* Call the `ov::Core::set_property()` method from your application with the `"CONFIG_FILE"` key and the configuration file name as a value before loading the network that uses custom operations to the plugin:
+
+@snippet snippets/gpu/custom_kernels_api.cpp part0
+
+All OpenVINO samples, except the trivial `hello_classification`, and most Open Model Zoo demos
+feature a dedicated command-line option `-c` to load custom kernels. For example, to load custom operations for the classification sample, run the command below:
+```sh
+$ ./classification_sample -m <path_to_model>/bvlc_alexnet_fp16.xml -i ./validation_set/daily/227x227/apron.bmp -d GPU
+ -c <absolute_path_to_config>/custom_layer_example.xml
+```
+
+## Configuration File Format <a name="config-file-format"></a>
+
+The configuration file is expected to follow the `.xml` file structure
+with a node of the type `CustomLayer` for every custom operation you provide.
+
+The definitions described in the sections below use the following notations:
+
+Notation | Description
+---|---
+(0/1) | Can have zero or one instance of this node or attribute
+(1) | Must have only one instance of this node or attribute
+(0+) | Can have any number of instances of this node or attribute
+(1+) | Can have one or more instances of this node or attribute
+
+### CustomLayer Node and Sub-Node Structure
+
+`CustomLayer` node contains the entire configuration for a single custom operation.
+
+| Attribute Name   |\#    |  Description |
+|-----|-----|-----|
+| `name`           | (1)  | The name of the operation type to be used. This name should be identical to the type used in the IR.|
+| `type`           | (1)  | Must be `SimpleGPU`.                                                                                |
+| `version`        | (1)  | Must be `1`.                                                                                        |
+
+**Sub-nodes**: `Kernel` (1), `Buffers` (1), `CompilerOptions` (0+),
+`WorkSizes` (0/1)
+
+### Kernel Node and Sub-Node Structure
+
+`Kernel` node contains all kernel source code configuration.
+
+**Sub-nodes**: `Source` (1+), `Define` (0+)
+
+### Source Node and Sub-Node Structure
+
+`Source` node points to a single OpenCL source file.
+
+| Attribute Name | \#  |Description|
+|-----|-----|-----|
+| `filename`     | (1) | Name of the file containing OpenCL source code. Note that the path is relative to your executable. Multiple source nodes will have their sources concatenated in order. |
+
+**Sub-nodes**: None
+
+### Define Node and Sub-Node Structure
+
+`Define` node configures a single `#&zwj;define` instruction to be added to
+the sources during compilation (JIT).
+
+| Attribute Name | \#    | Description |
+|------|-------|------|
+| `name`         | (1)   | The name of the defined JIT. For static constants, this can include the value as well, which is taken as a string. |
+| `param`        | (0/1) | This parameter value is used as the value of this JIT definition.                                          |
+| `type`         | (0/1) | The parameter type. Accepted values: `int`, `float`, and `int[]`, `float[]` for arrays.                    |
+| `default`      | (0/1) | The default value to be used if the specified parameters are missing from the operation in the IR.          |
+
+**Sub-nodes:** None
+
+The resulting JIT has the following form:
+`#&zwj;define [name] [type] [value/default]`.
+
+### Buffers Node and Sub-Node Structure
+
+`Buffers` node configures all input/output buffers for the OpenCL entry
+function. No buffers node structure exists.
+
+**Sub-nodes:** `Data` (0+), `Tensor` (1+)
+
+### Data Node and Sub-Node Structure
+
+`Data` node configures a single input with static data, for example,
+weights or biases.
+
+| Attribute Name | \#  | Description |
+|----|-----|------|
+| `name`         | (1) | Name of a blob attached to an operation in the IR             |
+| `arg-index`    | (1) | 0-based index in the entry function arguments to be bound to |
+
+**Sub-nodes**: None
+
+### Tensor Node and Sub-Node Structure
+
+`Tensor` node configures a single input or output tensor.
+
+| Attribute Name | \#    | Description  |
+|------|-------|-------|
+| `arg-index`    | (1)   | 0-based index in the entry function arguments to be bound to.                                                                          |
+| `type`         | (1)   | `input` or `output`                                                                                                                    |
+| `port-index`   | (1)   | 0-based index in the operation input/output ports in the IR                                                                            |
+| `format`       | (0/1) | Data layout declaration for the tensor. Accepted values: `BFYX`, `BYXF`, `YXFB`, `FYXB`, and same values in all lowercase. Default value: `BFYX` |
+
+### CompilerOptions Node and Sub-Node Structure
+
+`CompilerOptions` node configures the compilation flags for the OpenCL
+sources.
+
+| Attribute Name | \#  | Description                                        |
+|--------|-----|------|
+| `options`      | (1) | Options string to be passed to the OpenCL compiler |
+
+**Sub-nodes**: None
+
+### WorkSizes Node and Sub-Node Structure
+
+`WorkSizes` node configures the global/local work sizes to be used when
+queuing an OpenCL program for execution.
+
+| Attribute Name      | \#             | Description                                                                 |
+|-----|------|-----|
+| `global`<br>`local` | (0/1)<br>(0/1) | An array of up to three integers or formulas for defining OpenCL work-sizes to be used during execution.<br> The formulas can use the values of the B,F,Y,X dimensions and contain the operators: +,-,/,\*,%. All operators are evaluated in integer arithmetic. <br>Default value: `global=”B*F*Y*X” local=””` |
+| `dim`               | (0/1)          | A tensor to take the work-size from. Accepted values: `input N`, `output`, where `N` is an index of input tensor starting with 0. Default value: `output` |
+
+**Sub-nodes**: None
+
+## Example Configuration File
+
+The following code sample provides an example configuration file in XML
+format. For information on the configuration file structure, see
+[Configuration File Format](#config-file-format).
+```xml
+<CustomLayer name="ReLU" type="SimpleGPU" version="1">
+  <Kernel entry="example_relu_kernel">
+    <Source filename="custom_layer_kernel.cl"/>
+    <Define name="neg_slope" type="float" param="negative_slope" default="0.0"/>
+  </Kernel>
+  <Buffers>
+    <Tensor arg-index="0" type="input" port-index="0" format="BFYX"/>
+    <Tensor arg-index="1" type="output" port-index="0" format="BFYX"/>
+  </Buffers>
+  <CompilerOptions options="-cl-mad-enable"/>
+  <WorkSizes global="X,Y,B*F"/>
+</CustomLayer>
+```
+
+## Built-In Definitions for Custom Layers
+
+The following table includes definitions that are attached before
+user sources.
+
+For an example, see [Example Kernel](#example-kernel).
+
+| Name | Value  |
+|---|---|
+| `NUM_INPUTS` | Number of the input tensors bound to this kernel |
+| `GLOBAL_WORKSIZE`  | An array of global work sizes used to execute this kernel |
+| `GLOBAL_WORKSIZE_SIZE` | The size of the `GLOBAL_WORKSIZE` array |
+| `LOCAL_WORKSIZE`  | An array of local work sizes used to execute this kernel  |
+| `LOCAL_WORKSIZE_SIZE`   | The size of the `LOCAL_WORKSIZE` array |
+| `<TENSOR>_DIMS`| An array of the tensor dimension sizes. Always ordered as `BFYX` |
+| `<TENSOR>_DIMS_SIZE`| The size of the `<TENSOR>_DIMS` array.|
+| `<TENSOR>_TYPE`| The datatype of the tensor: `float`, `half`, or `char`|
+| `<TENSOR>_FORMAT_<TENSOR_FORMAT>` | The format of the tensor, BFYX, BYXF, YXFB , FYXB, or ANY. The format is concatenated to the defined name. You can use the tensor format to define codepaths in your code with `#&zwj;ifdef/#&zwj;endif`. |
+| `<TENSOR>_LOWER_PADDING` | An array of padding elements used for the tensor dimensions before they start. Always ordered as BFYX.|
+| `<TENSOR>_LOWER_PADDING_SIZE` | The size of the `<TENSOR>_LOWER_PADDING` array  |
+| `<TENSOR>_UPPER_PADDING`   | An array of padding elements used for the tensor dimensions after they end. Always ordered as BFYX. |
+| `<TENSOR>_UPPER_PADDING_SIZE`  | The size of the `<TENSOR>_UPPER_PADDING` array |
+| `<TENSOR>_PITCHES` | The offset (in elements) between adjacent elements in each dimension. Always ordered as BFYX.|
+| `<TENSOR>_PITCHES_SIZE`| The size of the `<TENSOR>_PITCHES` array   |
+| `<TENSOR>_OFFSET`| The number of elements from the start of the tensor to the first valid element, bypassing the lower padding.  |
+
+All `<TENSOR>` values are automatically defined for every tensor
+bound to this operation, such as `INPUT0`, `INPUT1`, and `OUTPUT0`, as shown
+in the following example:
+
+```c
+#define INPUT0_DIMS_SIZE 4
+#define INPUT0_DIMS (int []){ 1,96,55,55, }
+```
+
+## Example Kernel<a name="example-kernel"></a>
+
+```c
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+__kernel void example_relu_kernel(
+    const __global INPUT0_TYPE*  input0,
+          __global OUTPUT0_TYPE* output)
+{
+    const uint idx  = get_global_id(0);
+    const uint idy  = get_global_id(1);
+    const uint idbf = get_global_id(2); // batches*features, as OpenCL supports 3D nd-ranges only
+    const uint feature = idbf % OUTPUT0_DIMS[1];
+    const uint batch   = idbf / OUTPUT0_DIMS[1];
+    //notice that pitches are in elements, not in bytes!
+    const uint in_id  = batch*INPUT0_PITCHES[0] + feature*INPUT0_PITCHES[1]   + idy*INPUT0_PITCHES[2]  + idx*INPUT0_PITCHES[3]  + INPUT0_OFFSET;
+    const uint out_id = batch*OUTPUT0_PITCHES[0] + feature*OUTPUT0_PITCHES[1]  + idy*OUTPUT0_PITCHES[2]  + idx*OUTPUT0_PITCHES[3]  + OUTPUT0_OFFSET;
+
+    INPUT0_TYPE value = input0[in_id];
+    // neg_slope (which is non-zero for leaky ReLU) is put automatically as #define, refer to the config xml
+    output[out_id] = value < 0 ? value * neg_slope : value;
+}
+```
+
+
+> **NOTE**: As described in the previous section, all items like
+> `INPUT0_TYPE` are actually defined as OpenCL (pre-)compiler inputs by
+> the OpenVINO for efficiency reasons. See [Debugging
+> Tips](#debugging-tips) for information on debugging the results.
+
+## Debugging Tips<a name="debugging-tips"></a>
+
+* **Using `printf` in the OpenCL™ Kernels**.
+To debug the specific values, you can use `printf` in your kernels.
+However, be careful not to output excessively, which
+could generate too much data. The `printf` output is typical, so
+your output can be truncated to fit the buffer. Also, because of
+buffering, you actually get an entire buffer of output when the
+execution ends.<br>
+
+For more information, refer to the [printf
+Function](https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/printfFunction.html).
--- a/docs/Extensibility_UG/Intro.md
+++ b/docs/Extensibility_UG/Intro.md
@ -7,11 +7,13 @@
   :hidden:

   openvino_docs_Extensibility_UG_add_openvino_ops
+   openvino_docs_Extensibility_UG_GPU
+   openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer

@endsphinxdirective

-The Intel® Distribution of OpenVINO™ toolkit supports neural network models trained with multiple frameworks including
-TensorFlow, Caffe, MXNet, Kaldi, PaddlePaddle, and ONNX. The list of supported operations (layers) is different for
+The Intel® Distribution of OpenVINO™ toolkit supports neural network models trained with various frameworks, including
+TensorFlow, PyTorch, ONNX, PaddlePaddle, MXNet, Caffe, and Kaldi. The list of supported operations (layers) is different for
 each of the supported frameworks. To see the operations supported by your framework, refer to
 [Supported Framework Operations](../MO_DG/prepare_model/Supported_Frameworks_Layers.md).

@ -26,13 +28,16 @@ the Model Optimizer can generate the IR with the operation.

 ## OpenVINO™ Extensions

-An OpenVINO™ provides extensions for:
+OpenVINO™ provides extensions for:

 * [Custom OpenVINO™ Operation](add_openvino_ops.md):
    - Enables the creation of unsupported operations
    - Enables the use of `ov::Core::read_model` to read models with unsupported operations
    - Provides a shape inference mechanism for custom operations
-    - Provides an evaluate method which allow to support the operation on CPU or perform constant folding
+    - Provides an evaluate method that allows you to support the operation on CPU or perform constant folding
+ * [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md):
+    - Enables support of new operations to generate IR
+    - Enables support of custom transformations to replace sub-graphs for performance optimization

 > **NOTE**: This documentation is written based on the [Template extension](https://github.com/openvinotoolkit/openvino/tree/master/docs/template_extension/new), which demonstrates extension development details. You can review the complete code, which is fully compilable and up-to-date, to see how it works.

--- a/docs/Extensibility_UG/ov_transformations.md
+++ b/docs/Extensibility_UG/ov_transformations.md
@ -12,6 +12,7 @@

@endsphinxdirective

+OpenVINO Transformation mechanism allows to develop transformation passes to modify `ov::Model`. You can use this mechanism to apply additional optimizations to the original Model or transform unsupported subgraphs and operations to new operations which are supported by the plugin.
 This guide contains all necessary information that you need to start implementing OpenVINO™ transformations.

 ## Working with Model
--- a/docs/IE_PLUGIN_DG/ExecutableNetwork.md
+++ b/docs/IE_PLUGIN_DG/ExecutableNetwork.md
@ -38,7 +38,7 @@ The implementation `CompileNetwork` is fully device-specific.
 The function accepts a const shared pointer to `ngraph::Function` object and performs the following steps:

 1. Applies ngraph passes using `TransformNetwork` function, which defines plugin-specific conversion pipeline. To support low precision inference, the pipeline can include Low Precision Transformations. These transformations are usually hardware specific. You can find how to use and configure Low Precisions Transformations in [Low Precision Transformations](@ref openvino_docs_IE_DG_lpt) guide.
-2. Maps the transformed graph to a backend specific graph representation (for example, to MKLDNN graph for Intel CPU).
+2. Maps the transformed graph to a backend specific graph representation (for example, to CPU plugin internal graph representation).
 3. Allocates and fills memory for graph weights, backend specific memory handles and so on.

@snippet src/template_executable_network.cpp executable_network:map_graph
--- a/docs/IE_PLUGIN_DG/Plugin.md
+++ b/docs/IE_PLUGIN_DG/Plugin.md
@ -2,7 +2,7 @@

 Inference Engine Plugin usually represents a wrapper around a backend. Backends can be:
 - OpenCL-like backend (e.g. clDNN library) for GPU devices.
- MKLDNN backend for Intel CPU devices.
+- oneDNN backend for Intel CPU devices.
 - NVIDIA cuDNN for NVIDIA GPUs.

 The responsibility of Inference Engine Plugin:
--- a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md
+++ b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md
@ -1,4 +1,4 @@
-# Model Optimizer Developer Guide {#openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide}
+# Model Optimizer User Guide {#openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide}

@sphinxdirective

@ -7,678 +7,128 @@
 .. toctree::
   :maxdepth: 1
   :hidden:
-   
-   openvino_docs_MO_DG_IR_and_opsets
+
   openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model
+   openvino_docs_MO_DG_prepare_model_convert_model_Cutting_Model
   openvino_docs_MO_DG_Additional_Optimization_Use_Cases
-   openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer
+   openvino_docs_MO_DG_FP16_Compression
+   openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow
+   openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_ONNX
+   openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_PyTorch
+   openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Paddle
+   openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet
+   openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe
+   openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Kaldi
+   openvino_docs_MO_DG_prepare_model_convert_model_tutorials
   openvino_docs_MO_DG_prepare_model_Model_Optimizer_FAQ
-   openvino_docs_MO_DG_Known_Issues_Limitations
-   openvino_docs_MO_DG_Default_Model_Optimizer_Optimizations

@endsphinxdirective

-## Introduction 
+## Introduction

-Model Optimizer is a cross-platform command-line tool that facilitates the transition between the training and deployment environment, performs static model analysis, and adjusts deep learning models for optimal execution on end-point target devices.
+Model Optimizer is a cross-platform command-line tool that facilitates the transition between training and deployment environments, performs static model analysis, and adjusts deep learning models for optimal execution on end-point target devices.

-Model Optimizer process assumes you have a network model trained using supported deep learning frameworks: Caffe*, TensorFlow*, Kaldi*, MXNet* or converted to the ONNX* format. Model Optimizer produces an Intermediate Representation (IR) of the network, which can be inferred with the [OpenVINO™ Runtime](../OV_Runtime_UG/openvino_intro.md).
+Using Model Optimizer tool assumes you already have a deep learning model trained using one of the supported frameworks: TensorFlow, PyTorch, PaddlePaddle, MXNet, Caffe, Kaldi, or represented in ONNX* format. Model Optimizer produces an Intermediate Representation (IR) of the model, which can be inferred with [OpenVINO™ Runtime](../OV_Runtime_UG/openvino_intro.md).

-> **NOTE**: Model Optimizer does not infer models. Model Optimizer is an offline tool that runs before the inference takes place.
+> **NOTE**: Model Optimizer does not infer models. Model Optimizer is an offline tool that converts a model into IR and optimizes before the inference takes place.

-The scheme below illustrates the typical workflow for deploying a trained deep learning model: 
+The scheme below illustrates the typical workflow for deploying a trained deep learning model:

 ![](img/BASIC_FLOW_MO_simplified.svg)

-The IR is a pair of files describing the model: 
+The IR is a pair of files describing the model:

 *  <code>.xml</code> - Describes the network topology

 *  <code>.bin</code> - Contains the weights and biases binary data.

+> **NOTE**: The generated IR can be additionally optimized for inference by [Post-training Optimization tool](../../tools/pot/README.md)
+> that applies post-training quantization methods.
+
 > **TIP**: You also can work with the Model Optimizer inside the OpenVINO™ [Deep Learning Workbench](https://docs.openvino.ai/latest/workbench_docs_Workbench_DG_Introduction.html) (DL Workbench).
 > [DL Workbench](https://docs.openvino.ai/latest/workbench_docs_Workbench_DG_Introduction.html) is a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models.

-## Install Model Optimizer Pre-Requisites
-
-Before running the Model Optimizer, you must install the Model Optimizer pre-requisites for the framework that was used to train the model.
-
-@sphinxdirective
-.. tab:: Using configuration scripts
-
-   .. tab:: Linux
-
-      .. tab:: All frameworks
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               ./install_prerequisites.sh
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               virtualenv --system-site-packages -p python3 ./venv
-               source ./venv/bin/activate  # sh, bash, ksh, or zsh
-               ./install_prerequisites.shs
-
-      .. tab:: Caffe
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisitess
-               install_prerequisites_caffe.sh
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               virtualenv --system-site-packages -p python3 ./venv
-               source ./venv/bin/activate  # sh, bash, ksh, or zsh
-               install_prerequisites_caffe.shs
-
-      .. tab:: Tensorflow 1.x
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               install_prerequisites_tf.sh
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               virtualenv --system-site-packages -p python3 ./venv
-               source ./venv/bin/activate  # sh, bash, ksh, or zsh
-               install_prerequisites_tf.sh
-
-      .. tab:: Tensorflow 2.x
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               install_prerequisites_tf2.sh
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               virtualenv --system-site-packages -p python3 ./venv
-               source ./venv/bin/activate  # sh, bash, ksh, or zsh
-               install_prerequisites_tf2.sh
-
-      .. tab:: MXNet
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               install_prerequisites_mxnet.sh
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               virtualenv --system-site-packages -p python3 ./venv
-               source ./venv/bin/activate  # sh, bash, ksh, or zsh
-               install_prerequisites_mxnet.sh
-
-      .. tab:: ONNX
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               install_prerequisites_onnx.sh
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               virtualenv --system-site-packages -p python3 ./venv
-               source ./venv/bin/activate  # sh, bash, ksh, or zsh
-               install_prerequisites_onnx.sh
-
-      .. tab:: Kaldi
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               install_prerequisites_kaldi.sh
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               virtualenv --system-site-packages -p python3 ./venv
-               source ./venv/bin/activate  # sh, bash, ksh, or zsh
-               install_prerequisites_kaldi.sh
-
-   .. tab:: Windows
-
-      .. tab:: All frameworks
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>\deployment_tools\model_optimizer\install_prerequisites\
-               install_prerequisites.bat
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>\deployment_tools\model_optimizer\install_prerequisites
-               virtualenv --system-site-packages -p python .\env
-               env\Scripts\activate.bat
-               install_prerequisites.bat
-
-      .. tab:: Caffe
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>\deployment_tools\model_optimizer\install_prerequisites\
-               install_prerequisites_caffe.bat
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>\deployment_tools\model_optimizer\install_prerequisites
-               virtualenv --system-site-packages -p python .\env
-               env\Scripts\activate.bat
-               install_prerequisites_caffe.bat
-
-      .. tab:: Tensorflow 1.x
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>\deployment_tools\model_optimizer\install_prerequisites\
-               install_prerequisites_tf.bat
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>\deployment_tools\model_optimizer\install_prerequisites
-               virtualenv --system-site-packages -p python .\env
-               env\Scripts\activate.bat
-               install_prerequisites_tf.bat
-
-      .. tab:: Tensorflow 2.x
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>\deployment_tools\model_optimizer\install_prerequisites\
-               install_prerequisites_tf2.bat
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>\deployment_tools\model_optimizer\install_prerequisites
-               virtualenv --system-site-packages -p python .\env
-               env\Scripts\activate.bat
-               install_prerequisites_tf2.bat
-
-      .. tab:: MXNet
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>\deployment_tools\model_optimizer\install_prerequisites\
-               install_prerequisites_mxnet.bat
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>\deployment_tools\model_optimizer\install_prerequisites
-               virtualenv --system-site-packages -p python .\env
-               env\Scripts\activate.bat
-               install_prerequisites_mxnet.bat
-
-      .. tab:: ONNX
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>\deployment_tools\model_optimizer\install_prerequisites\
-               install_prerequisites_onnx.bat
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>\deployment_tools\model_optimizer\install_prerequisites
-               virtualenv --system-site-packages -p python .\env
-               env\Scripts\activate.bat
-               install_prerequisites_onnx.bat
-
-      .. tab:: Kaldi
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>\deployment_tools\model_optimizer\install_prerequisites\
-               install_prerequisites_kaldi.bat
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>\deployment_tools\model_optimizer\install_prerequisites
-               virtualenv --system-site-packages -p python .\env
-               env\Scripts\activate.bat
-               install_prerequisites_kaldi.bat
-
-   .. tab:: macOS
-
-      .. tab:: All frameworks
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               ./install_prerequisites.sh
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               virtualenv --system-site-packages -p python3 ./venv
-               source ./venv/bin/activate  # sh, bash, ksh, or zsh
-               ./install_prerequisites.shs
-
-      .. tab:: Caffe
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisitess
-               install_prerequisites_caffe.sh
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               virtualenv --system-site-packages -p python3 ./venv
-               source ./venv/bin/activate  # sh, bash, ksh, or zsh
-               install_prerequisites_caffe.shs
-
-      .. tab:: Tensorflow 1.x
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               install_prerequisites_tf.sh
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               virtualenv --system-site-packages -p python3 ./venv
-               source ./venv/bin/activate  # sh, bash, ksh, or zsh
-               install_prerequisites_tf.sh
-
-      .. tab:: Tensorflow 2.x
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               install_prerequisites_tf2.sh
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               virtualenv --system-site-packages -p python3 ./venv
-               source ./venv/bin/activate  # sh, bash, ksh, or zsh
-               install_prerequisites_tf2.sh
-
-      .. tab:: MXNet
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               install_prerequisites_mxnet.sh
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               virtualenv --system-site-packages -p python3 ./venv
-               source ./venv/bin/activate  # sh, bash, ksh, or zsh
-               install_prerequisites_mxnet.sh
-
-      .. tab:: ONNX
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               install_prerequisites_onnx.sh
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               virtualenv --system-site-packages -p python3 ./venv
-               source ./venv/bin/activate  # sh, bash, ksh, or zsh
-               install_prerequisites_onnx.sh
-
-      .. tab:: Kaldi
-      
-         .. tab:: Install globally
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               install_prerequisites_kaldi.sh
-         
-         .. tab:: Install to virtualenv
-
-            .. code-block:: sh
-               
-               cd <INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites
-               virtualenv --system-site-packages -p python3 ./venv
-               source ./venv/bin/activate  # sh, bash, ksh, or zsh
-               install_prerequisites_kaldi.sh
-
-.. tab:: Using manual configuration process
-
-   .. tab:: Linux
-
-      .. tab:: All frameworks
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>/deployment_tools/model_optimizer/
-            virtualenv --system-site-packages -p python3 ./venv
-            source ./venv/bin/activate
-            pip3 install -r requirements.txt
-
-      .. tab:: Caffe
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>/deployment_tools/model_optimizer/
-            virtualenv --system-site-packages -p python3 ./venv
-            source ./venv/bin/activate
-            pip3 install -r requirements_caffe.txt
-
-      .. tab:: Tensorflow 1.x
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>/deployment_tools/model_optimizer/
-            virtualenv --system-site-packages -p python3 ./venv
-            source ./venv/bin/activate
-            pip3 install -r requirements_tf.txt
-
-      .. tab:: Tensorflow 2.x
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>/deployment_tools/model_optimizer/
-            virtualenv --system-site-packages -p python3 ./venv
-            source ./venv/bin/activate
-            pip3 install -r requirements_tf2.txt
-
-      .. tab:: MXNet
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>/deployment_tools/model_optimizer/
-            virtualenv --system-site-packages -p python3 ./venv
-            source ./venv/bin/activate
-            pip3 install -r requirements_mxnet.txt
-
-      .. tab:: ONNX
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>/deployment_tools/model_optimizer/
-            virtualenv --system-site-packages -p python3 ./venv
-            source ./venv/bin/activate
-            pip3 install -r requirements_onnx.txt
-
-      .. tab:: Kaldi
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>/deployment_tools/model_optimizer/
-            virtualenv --system-site-packages -p python3 ./venv
-            source ./venv/bin/activate
-            pip3 install -r requirements_kaldi.txt
-
-   .. tab:: Windows
-
-      .. tab:: All frameworks
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>\deployment_tools\model_optimizer
-            virtualenv --system-site-packages -p python .\env
-            env\Scripts\activate.bat
-            pip install -r requirements.txt
-
-      .. tab:: Caffe
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>\deployment_tools\model_optimizer
-            virtualenv --system-site-packages -p python .\env
-            env\Scripts\activate.bat
-            pip install -r requirements_caffe.txt
-
-      .. tab:: Tensorflow 1.x
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>\deployment_tools\model_optimizer
-            virtualenv --system-site-packages -p python .\env
-            env\Scripts\activate.bat
-            pip install -r requirements_tf.txt
-
-      .. tab:: Tensorflow 2.x
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>\deployment_tools\model_optimizer
-            virtualenv --system-site-packages -p python .\env
-            env\Scripts\activate.bat
-            pip install -r requirements_tf2.txt
-
-      .. tab:: MXNet
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>\deployment_tools\model_optimizer
-            virtualenv --system-site-packages -p python .\env
-            env\Scripts\activate.bat
-            pip install -r requirements_mxnet.txt
-
-      .. tab:: ONNX
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>\deployment_tools\model_optimizer
-            virtualenv --system-site-packages -p python .\env
-            env\Scripts\activate.bat
-            pip install -r requirements_onnx.txt
-
-      .. tab:: Kaldi
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>\deployment_tools\model_optimizer
-            virtualenv --system-site-packages -p python .\env
-            env\Scripts\activate.bat
-            pip install -r requirements_kaldi.txt
-
-   .. tab:: macOS
-
-      .. tab:: All frameworks
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>/deployment_tools/model_optimizer/
-            virtualenv --system-site-packages -p python3 ./venv
-            source ./venv/bin/activate
-            pip3 install -r requirements.txt
-
-      .. tab:: Caffe
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>/deployment_tools/model_optimizer/
-            virtualenv --system-site-packages -p python3 ./venv
-            source ./venv/bin/activate
-            pip3 install -r requirements_caffe.txt
-
-      .. tab:: Tensorflow 1.x
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>/deployment_tools/model_optimizer/
-            virtualenv --system-site-packages -p python3 ./venv
-            source ./venv/bin/activate
-            pip3 install -r requirements_tf.txt
-
-      .. tab:: Tensorflow 2.x
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>/deployment_tools/model_optimizer/
-            virtualenv --system-site-packages -p python3 ./venv
-            source ./venv/bin/activate
-            pip3 install -r requirements_tf2.txt
-
-      .. tab:: MXNet
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>/deployment_tools/model_optimizer/
-            virtualenv --system-site-packages -p python3 ./venv
-            source ./venv/bin/activate
-            pip3 install -r requirements_mxnet.txt
-
-      .. tab:: ONNX
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>/deployment_tools/model_optimizer/
-            virtualenv --system-site-packages -p python3 ./venv
-            source ./venv/bin/activate
-            pip3 install -r requirements_onnx.txt
-
-      .. tab:: Kaldi
-      
-         .. code-block:: sh
-               
-            cd <INSTALL_DIR>/deployment_tools/model_optimizer/
-            virtualenv --system-site-packages -p python3 ./venv
-            source ./venv/bin/activate
-            pip3 install -r requirements_kaldi.txt
-
-@endsphinxdirective
-
 ## Run Model Optimizer

-To convert the model to the Intermediate Representation (IR), run Model Optimizer:
+To convert the model to IR, run Model Optimizer:

 ```sh
-mo --input_model INPUT_MODEL --output_dir <OUTPUT_MODEL_DIR>
+mo --input_model INPUT_MODEL
 ```

-You need to have write permissions for an output directory.
+If out-of-the-box conversion (only the `--input_model` parameter is specified) is not succeed,
+try to use parameters for overriding input shapes and cutting the model, mentioned below.

-> **NOTE**: Some models require using additional arguments to specify conversion parameters, such as `--input_shape`, `--scale`, `--scale_values`, `--mean_values`, `--mean_file`. To learn about when you need to use these parameters, refer to [Converting a Model to Intermediate Representation (IR)](prepare_model/convert_model/Converting_Model.md).
+To override original input shapes for model conversion, Model Optimizer provides two parameters: `--input` and `--input_shape`.
+For more information about these parameters, refer to [Setting Input Shapes](prepare_model/convert_model/Converting_Model.md).

-To adjust the conversion process, you may use general parameters defined in the [Converting a Model to Intermediate Representation (IR)](prepare_model/convert_model/Converting_Model.md) and 
-framework-specific parameters for:
-* [Caffe](prepare_model/convert_model/Convert_Model_From_Caffe.md)
-* [TensorFlow](prepare_model/convert_model/Convert_Model_From_TensorFlow.md)
-* [MXNet](prepare_model/convert_model/Convert_Model_From_MxNet.md)
-* [ONNX](prepare_model/convert_model/Convert_Model_From_ONNX.md)
-* [Kaldi](prepare_model/convert_model/Convert_Model_From_Kaldi.md)
+To cut off unwanted parts of a model, such as unsupported operations and training sub-graphs,
+the `--input` and `--output` parameters can be used, defining new inputs and outputs of the converted model.
+For a more detailed description, refer to [Cutting Off Parts of a Model](prepare_model/convert_model/Cutting_Model.md).

-## Videos
+Also, you can insert additional input pre-processing sub-graphs into the converted model using
+the `--mean_values`, `scales_values`, `--layout`, and other parameters described
+in [Embedding Preprocessing Computation](prepare_model/Additional_Optimizations.md).

-@sphinxdirective
+Model Optimizer's compression parameter `--data_type` allows to generate IR of the `FP16` data type. For more details,
+please refer to [Compression of a Model to FP16](prepare_model/FP16_Compression.md).

-.. list-table::
+To get the full list of conversion parameters available in Model Optimizer, run the following command:

-   * - .. raw:: html
+```sh
+mo --help
+```

-           <iframe allowfullscreen mozallowfullscreen msallowfullscreen oallowfullscreen webkitallowfullscreen width="220"
-           src="https://www.youtube.com/embed/Kl1ptVb7aI8">
-           </iframe>
-    
-     - .. raw:: html
+## Examples of CLI Commands

-           <iframe allowfullscreen mozallowfullscreen msallowfullscreen oallowfullscreen webkitallowfullscreen width="220"
-           src="https://www.youtube.com/embed/BBt1rseDcy0">
-           </iframe>
+Below is a list of separate examples for different frameworks and Model Optimizer parameters.

-     - .. raw:: html
+1. Launch Model Optimizer for a TensorFlow MobileNet model in the binary protobuf format.
+```sh
+mo --input_model MobileNet.pb
+```
+Launch Model Optimizer for a TensorFlow BERT model in the SavedModel format, with three inputs. Explicitly specify input shapes
+where the batch size and the sequence length equal 2 and 30 respectively.
+```sh
+mo --saved_model_dir BERT --input mask,word_ids,type_ids --input_shape [2,30],[2,30],[2,30]
+```
+For more information on TensorFlow model conversion,
+refer to [Converting a TensorFlow Model](prepare_model/convert_model/Convert_Model_From_TensorFlow.md).

-           <iframe allowfullscreen mozallowfullscreen msallowfullscreen oallowfullscreen webkitallowfullscreen width="220"
-           src="https://www.youtube.com/embed/RF8ypHyiKrY">
-           </iframe>
+2. Launch Model Optimizer for an ONNX OCR model and explicitly specify new output.
+```sh
+mo --input_model ocr.onnx --output probabilities
+```
+For more information on ONNX model conversion,
+please refer to [Converting an ONNX Model](prepare_model/convert_model/Convert_Model_From_ONNX.md).
+Note that PyTorch models must be exported to the ONNX format before its conversion into IR.
+More details can be found in [Converting a PyTorch Model](prepare_model/convert_model/Convert_Model_From_PyTorch.md).

-   * - **Model Optimizer Concept.**
-     - **Model Optimizer Basic Operation.**
-     - **Choosing the Right Precision.**
+3. Launch Model Optimizer for a PaddlePaddle UNet model and apply mean-scale normalization to the input.
+```sh
+mo --input_model unet.pdmodel --mean_values [123,117,104] --scale 255
+```
+For more information on PaddlePaddle model conversion, please refer to
+[Converting a PaddlePaddle Model](prepare_model/convert_model/Convert_Model_From_Paddle.md).

-   * - Duration: 3:56
-     - Duration: 2:57
-     - Duration: 4:18
+4. Launch Model Optimizer for an MXNet SSD Inception V3 model and specify first-channel layout for the input.
+```sh
+mo --input_model ssd_inception_v3-0000.params --layout NCHW
+```
+For more information on MXNet models conversion, please refer to [Converting an MXNet Model](prepare_model/convert_model/Convert_Model_From_MxNet.md).

-@endsphinxdirective
+5. Launch Model Optimizer for a Caffe AlexNet model with input channels in the RGB format, which needs to be reversed.
+```sh
+mo --input_model alexnet.caffemodel --reverse_input_channels
+```
+For more information on Caffe model conversion, please refer to [Converting a Caffe Model](prepare_model/convert_model/Convert_Model_From_Caffe.md).
+
+6. Launch Model Optimizer for a Kaldi LibriSpeech nnet2 model.
+```sh
+mo --input_model librispeech_nnet2.mdl --input_shape [1,140]
+```
+For more information on Kaldi model conversion,
+refer to [Converting a Kaldi Model](prepare_model/convert_model/Convert_Model_From_Kaldi.md).
+
+To get conversion recipes for specific TensorFlow, ONNX, PyTorch, MXNet, and Kaldi models,
+refer to [Model Conversion Tutorials](prepare_model/convert_model/Convert_Model_Tutorials.md).
--- a/docs/MO_DG/Known_Issues_Limitations.md
+++ b/docs/MO_DG/Known_Issues_Limitations.md
@ -1,9 +0,0 @@
-# Known Issues and Limitations in the Model Optimizer {#openvino_docs_MO_DG_Known_Issues_Limitations}
-
-## Model Optimizer for TensorFlow* should be run on Intel® hardware that supports the AVX instruction set
-
-TensorFlow* provides only prebuilt binaries with AVX instructions enabled. When you're configuring the Model Optimizer by running the `install_prerequisites` or `install_prerequisites_tf` scripts, they download only those ones, which are not supported on hardware such as Intel® Pentium® processor N4200/5, N3350/5, N3450/5 (formerly known as Apollo Lake).
-
-To run the Model Optimizer on this hardware, you should compile TensorFlow binaries from source as described at the [TensorFlow website](https://www.tensorflow.org/install/source). 
-
-Another option is to run the Model Optimizer to generate an IR on hardware that supports AVX to and then perform inference on hardware without AVX.
--- a/docs/MO_DG/img/BASIC_FLOW_MO_simplified.svg
+++ b/docs/MO_DG/img/BASIC_FLOW_MO_simplified.svg
--- a/docs/MO_DG/prepare_model/Additional_Optimizations.md
+++ b/docs/MO_DG/prepare_model/Additional_Optimizations.md
@ -1,15 +1,99 @@
-# Optimize Preprocessing Computation{#openvino_docs_MO_DG_Additional_Optimization_Use_Cases}
+# Embedding Preprocessing Computation {#openvino_docs_MO_DG_Additional_Optimization_Use_Cases}

-Model Optimizer performs preprocessing to a model. It is possible to optimize this step and improve first inference time, to do that, follow the tips bellow:
+Input data for inference can be different from the training dataset and requires additional preprocessing before inference.
+To accelerate the whole pipeline including preprocessing and inference, Model Optimizer provides special parameters such as `--mean_values`,
+`--scale_values`, `--reverse_input_channels`, and `--layout`. Based on these parameters, Model Optimizer generates IR with additionally
+inserted sub-graph that performs the defined preprocessing. This preprocessing block can perform mean-scale normalization of input data,
+reverting data along channel dimension, and changing the data layout. For more details about these parameters, refer to the paragraphs below.

-	**Image mean/scale parameters**<br>
-	Make sure to use the input image mean/scale parameters (`--scale` and `–mean_values`) with the Model Optimizer when you need pre-processing. It allows the tool to bake the pre-processing into the IR to get accelerated by the OpenVINO Runtime.
+## When to Specify Layout

-	**RGB vs. BGR inputs**<br>
-	If, for example, your network assumes the RGB inputs, the Model Optimizer can swap the channels in the first convolution using the `--reverse_input_channels` command line option, so you do not need to convert your inputs to RGB every time you get the BGR image, for example, from OpenCV*.
+You may need to set input layouts, as it is required by some preprocessing, for example, setting a batch,
+applying mean or scales, and reversing input channels (BGR<->RGB).

-	**Larger batch size**<br>
-	Notice that the devices like GPU are doing better with larger batch size. While it is possible to set the batch size in the runtime using the OpenVINO Runtime API [ShapeInference feature](../../OV_Runtime_UG/ShapeInference.md).
+Layout defines the meaning of dimensions in shape and can be specified for both inputs and outputs.
+For the layout syntax, check the [Layout API overview](../../OV_Runtime_UG/layout_overview.md). 
+To specify the layout, you can use `--layout` option followed by the layout value. 

-	**Resulting IR precision**<br>
-The resulting IR precision, for instance, `FP16` or `FP32`, directly affects performance. As CPU now supports `FP16` (while internally upscaling to `FP32` anyway) and because this is the best precision for a GPU target, you may want to always convert models to `FP16`. Notice that this is the only precision that Intel&reg; Movidius&trade; Myriad&trade; 2 and Intel&reg; Myriad&trade; X VPUs support.
+For example, for Tensorflow\* `nasnet_large` model that was exported to ONNX format and thus has input with `NHWC` layout:
+
+```
+mo --input_model tf_nasnet_large.onnx --layout nhwc
+```
+
+Additionally, if a model has more than one input or needs both input and output layouts specified,
+you need to provide the name of each input or output to which you apply the layout.
+
+For example, for ONNX\* `Yolo v3 Tiny` model that has first input `input_1` in `NCHW` layout and second input `image_shape`
+with 2 dimensions: batch and size of the image which can be expressed as `N?` layout:
+
+```
+mo --input_model yolov3-tiny.onnx --layout input_1(nchw),image_shape(n?)
+```
+
+## How to Change Layout of a Model Inputs and Outputs
+
+Changing the model layout may be necessary if it differs from the one presented by input data. 
+To change the layout, you can use either `--layout` or `--source_layout` with `--target_layout`.
+
+For example, for the same `nasnet_large` that were mentioned previously we may want to provide data in `NCHW` layout:
+
+```
+mo --input_model tf_nasnet_large.onnx --source_layout nhwc --target_layout nchw
+mo --input_model tf_nasnet_large.onnx --layout "nhwc->nchw"
+```
+
+Again, if a model has more than one input or needs both input and output layouts specified, you need to provide the name of each input or output to which you apply the layout.
+
+For example, to provide data in the `NHWC` layout for the `Yolo v3 Tiny` model mentioned earlier:
+
+```
+mo --input_model yolov3-tiny.onnx --source_layout "input_1(nchw),image_shape(n?)" --target_layout "input_1(nhwc)"
+mo --input_model yolov3-tiny.onnx --layout "input_1(nchw->nhwc),image_shape(n?)"
+```
+
+## When to Specify Mean and Scale Values
+Usually neural network models are trained with the normalized input data. This means that the input data values are converted to be in a specific range,
+for example, `[0, 1]` or `[-1, 1]`. Sometimes the mean values (mean images) are subtracted from the input data values as part of the pre-processing.
+There are two cases of how the input data pre-processing is implemented.
+ * The input pre-processing operations are a part of a model. In this case, the application does not pre-process the input data as a separate step: everything is embedded into the model itself.
+ * The input pre-processing operations are not a part of a model and the pre-processing is performed within the application which feeds the model with input data.
+
+In the first case, the Model Optimizer generates the IR with required pre-processing operations and no `mean` and `scale` parameters are required.
+
+In the second case, information about mean/scale values should be provided to the Model Optimizer to embed it to the generated IR.
+Model Optimizer provides command-line parameters to specify the values: `--mean_values`, `--scale_values`, `--scale`.
+Using these parameters, Model Optimizer embeds the corresponding preprocessing block for mean-value normalization of the input data
+and optimizes this block so that the preprocessing takes negligible time for inference.
+
+For example, run the Model Optimizer for the PaddlePaddle* UNet model and apply mean-scale normalization to the input data.
+
+```sh
+mo --input_model unet.pdmodel --mean_values [123,117,104] --scale 255
+```
+
+## When to Reverse Input Channels <a name="when_to_reverse_input_channels"></a>
+Sometimes input images for your application can be of the RGB (BGR) format and the model is trained on images of the BGR (RGB) format,
+the opposite color channel order. In this case, it is important to preprocess the input images by reverting the color channels before inference.
+To embed this preprocessing step into IR, Model Optimizer provides the `--reverse_input_channels` command-line parameter to shuffle the color channels.
+
+The `--reverse_input_channels` parameter applies to an input of the model in two cases.
+ * Only one dimension in the input shape has a size equal to 3.
+ * One dimension has an undefined size and is marked as `C` channel using `layout` parameters.
+
+Using the `--reverse_input_channels` parameter, Model Optimizer embeds the corresponding preprocessing block for reverting
+the input data along channel dimension and optimizes this block so that the preprocessing takes negligible time for inference.
+
+For example, launch the Model Optimizer for the TensorFlow* AlexNet model and embed `reverse_input_channel` preprocessing block into IR.
+
+```sh
+mo --input_model alexnet.pb --reverse_input_channels
+```
+
+> **NOTE**: If both mean and scale values are specified, the mean is subtracted first and then the scale is applied regardless of the order of options
+in the command line. Input values are *divided* by the scale value(s). If also `--reverse_input_channels` option is used, the `reverse_input_channels`
+will be applied first, then `mean` and after that `scale`. The data flow in the model looks as follows:
+`Parameter -> ReverseInputChannels -> Mean apply-> Scale apply -> the original body of the model`.
+
+## See Also
+* [Overview of Preprocessing API](../../OV_Runtime_UG/preprocessing_overview.md)
--- a/docs/MO_DG/prepare_model/Default_Model_Optimizer_Optimizations.md
+++ b/docs/MO_DG/prepare_model/Default_Model_Optimizer_Optimizations.md
@ -1,11 +0,0 @@
-# Default Model Optimizer Optimizations {#openvino_docs_MO_DG_Default_Model_Optimizer_Optimizations}
-
-Model Optimizer not only converts a model to IR format, but also performs a number of optimizations. For example, certain primitives like linear operations (BatchNorm and ScaleShift), are automatically fused into convolutions. Generally, these layers should not be manifested in the resulting IR:
-
-![](../img/optimizations/resnet_269.png)
-
-The picture above shows Caffe\* Resnet269\* topology. The left model is the original model, and the one on the right (after conversion) is the resulting model that the Model Optimizer produces, with BatchNorm and ScaleShift layers  fused into the convolution weights rather than constituting separate layers.
-
-If you still see these operations, inspect the Model Optimizer output carefully while searching for warnings, such as on the tool being unable to fuse. For example, non-linear operations (like activations) in between convolutions and linear operations might prevent the fusing. If performance is of concern, try to change (and potentially re-train) the topology. Refer to the [Model Optimizer Guide](Model_Optimization_Techniques.md) for more optimizations.
-
-Notice that the activation (`_relu`) is not touched by the Model Optimizer, and while it can be merged into convolution as well, this is rather a device-specific optimization, covered by OpenVINO Runtime during the model loading time. You are encouraged to inspect performance counters from plugins that should indicate that these particular layers are not executed (“Optimized out”). For more information, refer to <a href="#performance-counters">Internal Inference Performance Counters</a>.
--- a/docs/MO_DG/prepare_model/FP16_Compression.md
+++ b/docs/MO_DG/prepare_model/FP16_Compression.md
@ -0,0 +1,20 @@
+# Compression of a Model to FP16 {#openvino_docs_MO_DG_FP16_Compression}
+
+Model Optimizer can convert all floating-point weights to `FP16` data type. The resulting IR is called
+compressed `FP16` model.
+
+To compress the model, use the `--data_type` option:
+
+```
+ mo --input_model INPUT_MODEL --data_type FP16
+```
+
+> **NOTE**: Using `--data_type FP32` will give no result and will not force `FP32` 
+> precision in the model. If the model was `FP16` it will have `FP16` precision in IR as well.
+
+The resulting model will occupy about twice as less space in the file system, but it may have some accuracy drop,
+although for the majority of models accuracy degradation is negligible. For details on how plugins handle
+compressed `FP16` models refer to [Working with devices](../../OV_Runtime_UG/supported_plugins/Device_Plugins.md) page.
+
+> **NOTE**: `FP16` compression is sometimes used as initial step for `INT8` quantization, please refer to
+> [Post-Training Optimization tool](../../../tools/pot/README.md) for more information about that.
--- a/docs/MO_DG/prepare_model/Getting_performance_numbers.md
+++ b/docs/MO_DG/prepare_model/Getting_performance_numbers.md
@ -9,7 +9,7 @@ When evaluating performance of your model with the OpenVINO Runtime, you must me

 - Track separately the operations that happen outside the OpenVINO Runtime, like video decoding. 

-> **NOTE**: Some image pre-processing can be baked into the IR and accelerated. For more information, refer to [Model Optimizer Knobs Related to Performance](Additional_Optimizations.md)
+> **NOTE**: Some image pre-processing can be baked into the IR and accelerated. For more information, refer to [Embedding Preprocessing Computation](Additional_Optimizations.md)

 ## Tip 2. Getting Credible Performance Numbers 

--- a/docs/MO_DG/prepare_model/Model_Optimization_Techniques.md
+++ b/docs/MO_DG/prepare_model/Model_Optimization_Techniques.md
@ -1,65 +0,0 @@
-# Model Optimization Techniques  {#openvino_docs_MO_DG_prepare_model_Model_Optimization_Techniques}
-
-Optimization offers methods to accelerate inference with the convolution neural networks (CNN) that do not require model retraining.
-
-* * *
-
-## Linear Operations Fusing
-
-Many convolution neural networks includes `BatchNormalization` and `ScaleShift` layers (for example, Resnet\*, Inception\*) that can be presented as a sequence of linear operations: additions and multiplications. For example ScaleShift layer can be presented as Mul → Add sequence. These layers can be fused into previous `Convolution` or `FullyConnected` layers, except when Convolution comes after an Add operation (due to Convolution paddings).
-
-### Usage
-
-In the Model Optimizer, this optimization is turned on by default. To disable it, you can pass `--disable_fusing` parameter to the Model Optimizer.
-
-### Optimization Description
-
-This optimization method consists of three stages:
-
-1.  <strong>`BatchNormalization` and `ScaleShift` decomposition</strong>: in this stage, `BatchNormalization` layer is decomposed to `Mul → Add → Mul → Add` sequence, and `ScaleShift` layer is decomposed to `Mul → Add` layers sequence.
-
-2.  **Linear operations merge**: in this stage, the `Mul` and `Add` operations are merged into a single `Mul → Add` instance. 
-    For example, if there is a `BatchNormalization → ScaleShift` sequence in the topology, it is replaced with `Mul → Add` in the first stage. In the next stage, the latter is replaced with a `ScaleShift` layer if there is no available `Convolution` or `FullyConnected` layer to fuse into next.
-3.  **Linear operations fusion**: in this stage, the tool fuses `Mul` and `Add` operations to `Convolution` or `FullyConnected` layers. Notice that it searches for `Convolution` and `FullyConnected` layers both backward and forward in the graph (except for `Add` operation that cannot be fused to `Convolution` layer in forward direction).
-
-### Usage Examples
-
-The picture below shows the depicted part of Caffe\* Resnet269 topology where `BatchNorm` and `ScaleShift` layers will be fused to `Convolution` layers.
-
-![Caffe ResNet269 block before and after optimization generated with Netscope*](../img/optimizations/resnet_269.png)
-
-* * *
-
-## ResNet optimization (stride optimization)
-
-ResNet optimization is a specific optimization that applies to Caffe ResNet topologies such as ResNet50, ResNet101, ResNet152 and to ResNet-based topologies. This optimization is turned on by default, and can be disabled with the `--disable_resnet_optimization` key.
-
-### Optimization Description
-
-In the picture below, you can see the original and optimized parts of a Caffe ResNet50 model. The main idea of this optimization is to move the stride that is greater than 1 from Convolution layers with the kernel size = 1 to upper Convolution layers. In addition, the Model Optimizer adds a Pooling layer to align the input shape for a Eltwise layer, if it was changed during the optimization. 
-
-![ResNet50 blocks (original and optimized) from Netscope*](../img/optimizations/resnet_optimization.png)
-
-In this example, the stride from the `res3a_branch1` and `res3a_branch2a` Convolution layers moves to the `res2c_branch2b` Convolution layer. In addition, to align the input shape for `res2c` Eltwise, the optimization inserts the Pooling layer with kernel size = 1 and stride = 2.
-
-* * *
-
-## Grouped Convolution Fusing
-
-Grouped convolution fusing is a specific optimization that applies for TensorFlow\* topologies. The main idea of this optimization is to combine convolutions results for the `Split` outputs and then recombine them using `Concat` operation in the same order as they were out from `Split`.
-
-![Split→Convolutions→Concat block from TensorBoard*](../img/optimizations/groups.png)
-
-* * *
-
-## Disable Fusing
-
-Model Optimizer allows to disable optimizations for specified nodes via `--finegrain_fusing <node_name1>,<node_name2>,...` (regex is also supported). Using this key, you mark nodes that will noy be touched by any optimizations.
-
-### Examples of usage
-
-On the picture below you can see two visualized Intermediate Representations (IR) of TensorFlow InceptionV4 topology.
-The first one is original IR that will be produced by the Model Optimizer.
-The second one will be produced by the Model Optimizer with key `--finegrain_fusing InceptionV4/InceptionV4/Conv2d_1a_3x3/Conv2D`, where you can see that `Convolution` was not fused with `Mul1_3752` and `Mul1_4061/Fused_Mul_5096/FusedScaleShift_5987` operations.
-
-![TF InceptionV4 block without/with key --finegrain_fusing (from IR visualizer)](../img/optimizations/inception_v4.png)
--- a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
+++ b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
@ -6,7 +6,7 @@ If your question is not covered by the topics below, use the [OpenVINO&trade; Su

 Internally, the Model Optimizer uses a protobuf library to parse and load Caffe\* models. This library requires a file grammar and a generated parser. For a Caffe fallback, the Model Optimizer uses a Caffe-generated parser for a Caffe-specific `.proto` file (which is usually located in the `src/caffe/proto` directory). So, if you have Caffe installed on your machine with Python* interface available, make sure that this is exactly the version of Caffe that was used to create the model.

-If you just want to experiment with the Model Optimizer and test a Python extension for working with your custom 
+If you just want to experiment with the Model Optimizer and test a Python extension for working with your custom
 layers without building Caffe, add the layer description to the `caffe.proto` file and generate a parser for it.

 For example, to add the description of the `CustomReshape` layer, which is an artificial layer not present in any `caffe.proto` files:
@ -25,17 +25,17 @@ For example, to add the description of the `CustomReshape` layer, which is an ar
      optional BlobShape shape = 1; // we just use the same parameter type as some other Caffe layers
    }
 ```
-    
+
 2.  Generate a new parser:
 ```shell
 cd <SITE_PACKAGES_WITH_INSTALLED_OPENVINO>/openvino/tools/mo/front/caffe/proto
 python3 generate_caffe_pb2.py --input_proto <PATH_TO_CUSTOM_CAFFE>/src/caffe/proto/caffe.proto
 ```
 where `PATH_TO_CUSTOM_CAFFE` is the path to the root directory of custom Caffe\*.
-    
+
 3.  Now, the Model Optimizer is able to load the model into memory and start working with your extensions if there are any.

-However, because your model has custom layers, you must register your custom layers as custom. To learn more about it, refer to the section [Custom Layers in Model Optimizer](customize_model_optimizer/Customize_Model_Optimizer.md). 
+However, because your model has custom layers, you must register your custom layers as custom. To learn more about it, refer to the section [Custom Layers in Model Optimizer](customize_model_optimizer/Customize_Model_Optimizer.md).

 #### 2. How do I create a bare caffemodel, if I have only prototxt? <a name="question-2"></a>

@ -48,8 +48,8 @@ net.save('<PATH_TO_PROTOTXT>/my_net.caffemodel')
 ```
 #### 3. What does the message "[ ERROR ]: Unable to create ports for node with id" mean? <a name="question-3"></a>

-Most likely, the Model Optimizer does not know how to infer output shapes of some layers in the given topology. 
-To lessen the scope, compile the list of layers that are custom for the Model Optimizer: present in the topology, 
+Most likely, the Model Optimizer does not know how to infer output shapes of some layers in the given topology.
+To lessen the scope, compile the list of layers that are custom for the Model Optimizer: present in the topology,
 absent in [list of supported layers](Supported_Frameworks_Layers.md) for the target framework. Then refer to available options in the corresponding section in [Custom Layers in Model Optimizer](customize_model_optimizer/Customize_Model_Optimizer.md).

 #### 4. What does the message "Input image of shape is larger than mean image from file" mean? <a name="question-4"></a>
@ -100,7 +100,7 @@ message NetParameter {
 ```
 So, the input layer of the provided model must be specified in one of the following styles:

-*   
+*
 ```sh
 input: "data"
 input_shape
@ -111,8 +111,8 @@ input_shape
    dim: 227
 }
 ```
-    
-*   
+
+*
 ```sh
 input: "data"
 input_shape
@ -129,7 +129,7 @@ input_shape
     dim: 3
 }
 ```
-*   
+*
 ```sh
 layer
 {
@ -146,7 +146,7 @@ layer
    input_param {shape: {dim: 1 dim: 3}}
 }
 ```
-*   
+*
 ```sh
 input: "data"
 input_dim: 1
@ -252,7 +252,7 @@ Looks like you have provided only one shape for the placeholder, however there a

 #### 33. What does the message "The amount of input nodes for port is not equal to 1" mean? <a name="question-33"></a>

-This error occurs when the `SubgraphMatch.single_input_node` function is used for an input port that supplies more than one node in a sub-graph. The `single_input_node` function can be used only for ports that has a single consumer inside the matching sub-graph. When multiple nodes are connected to the port, use the `input_nodes` function or `node_by_pattern` function instead of `single_input_node`. Please, refer to [Sub-Graph Replacement in the Model Optimizer](customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md) for more details.
+This error occurs when the `SubgraphMatch.single_input_node` function is used for an input port that supplies more than one node in a sub-graph. The `single_input_node` function can be used only for ports that has a single consumer inside the matching sub-graph. When multiple nodes are connected to the port, use the `input_nodes` function or `node_by_pattern` function instead of `single_input_node`. Please, refer to **Graph Transformation Extensions** section in the [Model Optimizer Extensibility](customize_model_optimizer/Customize_Model_Optimizer.md) documentation for more details.

 #### 34. What does the message "Output node for port has already been specified" mean? <a name="question-34"></a>

@ -350,7 +350,7 @@ You cannot specify the batch and the input shape at the same time. You should sp

 The specified input shape cannot be parsed. Please, define it in one of the following ways:

-*   
+*
 ```shell
 mo --input_model <INPUT_MODEL>.caffemodel --input_shape (1,3,227,227)
 ```
@ -447,7 +447,7 @@ This message may appear when the `--data_type=FP16` command line option is used.

 #### 78. What does the message "The amount of nodes matched pattern ... is not equal to 1" mean? <a name="question-78"></a>

-This error occurs when the `SubgraphMatch.node_by_pattern` function is used with a pattern that does not uniquely identify a single node in a sub-graph. Try to extend the pattern string to make unambiguous match to a single sub-graph node. For more details, refer to [Sub-graph Replacement in the Model Optimizer](customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md).
+This error occurs when the `SubgraphMatch.node_by_pattern` function is used with a pattern that does not uniquely identify a single node in a sub-graph. Try to extend the pattern string to make unambiguous match to a single sub-graph node. For more details, refer to **Graph Transformation Extensions** section in the [Model Optimizer Extensibility](customize_model_optimizer/Customize_Model_Optimizer.md) documentation.

 #### 79. What does the message "The topology contains no "input" layers" mean? <a name="question-79"></a>

@ -459,18 +459,18 @@ You are using an unsupported Python\* version. Use only versions 3.4 - 3.6 for t

 #### 81. What does the message "Arguments --nd_prefix_name, --pretrained_model_name and --input_symbol should be provided. Please provide all or do not use any." mean? <a name="question-81"></a>

-This error occurs if you do not provide `--nd_prefix_name`, `--pretrained_model_name` and `--input_symbol` parameters. 
-Model Optimizer requires both `.params` and `.nd` model files to merge into the result file (`.params`). Topology 
+This error occurs if you do not provide `--nd_prefix_name`, `--pretrained_model_name` and `--input_symbol` parameters.
+Model Optimizer requires both `.params` and `.nd` model files to merge into the result file (`.params`). Topology
 description (`.json` file) should be prepared (merged) in advance and provided with `--input_symbol` parameter.

-If you add to your model additional layers and weights that are in `.nd` files, the Model Optimizer can build a model 
+If you add to your model additional layers and weights that are in `.nd` files, the Model Optimizer can build a model
 from one `.params` file and two additional `.nd` files (`*_args.nd`, `*_auxs.nd`).
 To do that, provide both CLI options or do not pass them if you want to convert an MXNet model without additional weights.
 For more information, refer to [Converting a MXNet* Model](convert_model/Convert_Model_From_MxNet.md).

 #### 82. What does the message "You should specify input for mean/scale values" mean? <a name="question-82"></a>

-In case when the model has multiple inputs and you want to provide mean/scale values, you need to pass those values for each input. More specifically, a number of passed values should be the same as the number of inputs of the model. 
+In case when the model has multiple inputs and you want to provide mean/scale values, you need to pass those values for each input. More specifically, a number of passed values should be the same as the number of inputs of the model.
 For more information, refer to [Converting a Model to Intermediate Representation](convert_model/Converting_Model.md).

 #### 83. What does the message "Input with name ... not found!" mean? <a name="question-83"></a>
@ -490,7 +490,7 @@ For more information, refer to [Converting a MXNet* Model](convert_model/Convert

 #### 86. What does the message "Operation ... not supported. Please register it as custom op" mean? <a name="question-86"></a>

-Model Optimizer tried to load the model that contains some unsupported operations. 
+Model Optimizer tried to load the model that contains some unsupported operations.
 If you want to convert model that contains unsupported operations you need to prepare extension for all such operations.
 For more information, refer to [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md).

@ -499,7 +499,7 @@ For more information, refer to [OpenVINO™ Extensibility Mechanism](../../Exten
 This error appears if the class of implementation of op for Python Caffe layer could not be used by Model Optimizer. Python layers should be handled differently compared to ordinary Caffe layers.

 In particular, you need to call the function `register_caffe_python_extractor` and pass `name` as the second argument of the function.
-The name should be the compilation of the layer name and the module name separated by a dot. 
+The name should be the compilation of the layer name and the module name separated by a dot.

 For example, your topology contains this layer with type `Python`:

@ -520,7 +520,7 @@ What you do first is implementing an extension for this layer in the Model Optim
 ```
 class ProposalPythonExampleOp(Op):
       op = 'Proposal'
-   
+
       def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
           ...
 ```
@ -536,25 +536,25 @@ Op.excluded_classes.append(ProposalPythonExampleOp)

 Note that the first call <code>register_caffe_python_extractor(ProposalPythonExampleOp, 'rpn.proposal_layer.ProposalLayer')</code> registers extension of the layer in the Model Optimizer that will be found by the specific name (mandatory to join module name and layer name): <code>rpn.proposal_layer.ProposalLayer</code>.

-The second call prevents Model Optimizer from using this extension as if it is an extension for 
+The second call prevents Model Optimizer from using this extension as if it is an extension for
 a layer with type `Proposal`. Otherwise, this layer can be chosen as an implementation of extension that can lead to potential issues.
 For more information, refer to the [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md).

 #### 88. What does the message "Model Optimizer is unable to calculate output shape of Memory node .." mean? <a name="question-88"></a>

-Model Optimizer supports only `Memory` layers, in which `input_memory` goes before `ScaleShift` or `FullyConnected` layer.  
+Model Optimizer supports only `Memory` layers, in which `input_memory` goes before `ScaleShift` or `FullyConnected` layer.
 This error message means that in your model the layer after input memory is not of type `ScaleShift` or `FullyConnected`.
 This is a known limitation.

 #### 89. What do the messages "File ...  does not appear to be a Kaldi file (magic number does not match)", "Kaldi model should start with <Nnet> tag" mean? <a name="question-89"></a>

-These error messages mean that the Model Optimizer does not support your Kaldi\* model, because check sum of the model is not 
+These error messages mean that the Model Optimizer does not support your Kaldi\* model, because check sum of the model is not
 16896 (the model should start with this number) or model file does not contain tag `<Net>` as a starting one.
 Double check that you provide a path to a true Kaldi model and try again.

 #### 90. What do the messages "Expect counts file to be one-line file." or "Expect counts file to contain list of integers" mean? <a name="question-90"></a>

-These messages mean that you passed the file counts containing not one line. The count file should start with 
+These messages mean that you passed the file counts containing not one line. The count file should start with
 `[` and end with  `]`,  and integer values should be separated by space between those signs.

 #### 91. What does the message "Model Optimizer is not able to read Kaldi model .." mean? <a name="question-91"></a>
@ -570,8 +570,8 @@ file is not available or does not exist. Also refer to FAQ [#90](#question-90).
 #### 93. What does the message "For legacy MXNet models Model Optimizer does not support conversion of old MXNet models (trained with 1.0.0 version of MXNet and lower) with custom layers." mean? <a name="question-93"></a>

 This message means that if you have model with custom layers and its json file has been generated with MXNet version
-lower than 1.0.0, Model Optimizer does not support such topologies. If you want to convert it you have to rebuild 
-MXNet with unsupported layers or generate new json with MXNet version 1.0.0 and higher. Also you need to implement 
+lower than 1.0.0, Model Optimizer does not support such topologies. If you want to convert it you have to rebuild
+MXNet with unsupported layers or generate new json with MXNet version 1.0.0 and higher. Also you need to implement
 OpenVINO extension for used custom layers.
 For more information, refer to the [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md).

@ -581,22 +581,22 @@ Model Optimizer supports only straightforward models without cycles.

 There are multiple ways to avoid cycles:

-For Tensorflow: 
+For Tensorflow:
 * [Convert models, created with TensorFlow Object Detection API](convert_model/tf_specific/Convert_Object_Detection_API_Models.md)

-For all frameworks: 
-1. [Replace cycle containing Sub-graph in Model Optimizer](customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md)
+For all frameworks:
+1. [Replace cycle containing Sub-graph in Model Optimizer](customize_model_optimizer/Customize_Model_Optimizer.md)
 2. [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md)

 or
-* Edit network in original framework to exclude cycle.
+* Edit model in original framework to exclude cycle.

 #### 98. What does the message "Can not transpose attribute '..' with value .. for node '..' .." mean?  <a name="question-98"></a>

 This message means that model is not supported. It may be caused by using shapes larger than 4-D.
 There are two ways to avoid such message:

-1. [Cut model part containing such layers in Model Optimizer](convert_model/Cutting_Model.md) 
+1. [Cutting Off Parts of a Model](convert_model/Cutting_Model.md)
 2. Edit network in original framework to exclude such layers.

 #### 99. What does the message "Expected token `</ParallelComponent>`, has `...`" mean?  <a name="question-99"></a>
@ -611,7 +611,7 @@ But there are exceptions. For example, output value of layer Interp is calculate

 #### 101. What does the message "Mean/scale values should ..." mean? <a name="question-101"></a>

-It means that your mean/scale values have wrong format. Specify mean/scale values using the form `layer_name(val1,val2,val3)`. 
+It means that your mean/scale values have wrong format. Specify mean/scale values using the form `layer_name(val1,val2,val3)`.
 You need to specify values for each input of the model. For more information, refer to [Converting a Model to Intermediate Representation](convert_model/Converting_Model.md).

 #### 102. What does the message "Operation _contrib_box_nms is not supported ..." mean? <a name="question-102"></a>
@ -632,10 +632,10 @@ Note that you might have conflicts between previously installed PyPI dependencie

 #### 105. What does the message "The IR preparation was executed by the legacy MO path. ..." mean? <a name="question-105"></a>

-For the models in ONNX* format, there are two available paths of IR conversion. 
-The old one is handled by the old Python* implementation, while the new one uses new C++ frontends. 
-Starting from the 2022.1 version, the default IR conversion path for ONNX models is processed using the new ONNX frontend. 
-Certain features, such as `--extensions` and `--transformations_config`, are not yet fully supported on the new frontends. 
-For `--extensions`, the new frontends support only paths to shared libraries (.dll and .so). For `--transformations_config`, they support JSON configurations with defined library fields. 
-Inputs freezing (enabled by `--freeze_placeholder_with_value` or `--input` arguments) is not supported on the new frontends. 
+For the models in ONNX* format, there are two available paths of IR conversion.
+The old one is handled by the old Python* implementation, while the new one uses new C++ frontends.
+Starting from the 2022.1 version, the default IR conversion path for ONNX models is processed using the new ONNX frontend.
+Certain features, such as `--extensions` and `--transformations_config`, are not yet fully supported on the new frontends.
+For `--extensions`, the new frontends support only paths to shared libraries (.dll and .so). For `--transformations_config`, they support JSON configurations with defined library fields.
+Inputs freezing (enabled by `--freeze_placeholder_with_value` or `--input` arguments) is not supported on the new frontends.
 The IR conversion falls back to the old path if a user does not select any expected path of conversion explicitly (by `--use_new_frontend` or `--use_legacy_frontend` MO arguments) and unsupported pre-defined scenario is detected on the new frontend path.
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md
@ -1,60 +1,12 @@
 # Converting a Caffe* Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe}

-@sphinxdirective
-
-.. _convert model caffe:
-
-@endsphinxdirective
-
-A summary of the steps for optimizing and deploying a model that was trained with Caffe\*:
-
-1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for Caffe\*.
-2. [Convert a Caffe\* Model](#Convert_From_Caffe) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values
-3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided [OpenVINO samples](../../../OV_Runtime_UG/Samples_Overview.md)
-4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in your application to deploy the model in the target environment
-
-## Supported Topologies
-
-* **Classification models:**
-	* AlexNet
-	* VGG-16, VGG-19
-	* SqueezeNet v1.0, SqueezeNet v1.1
-	* ResNet-50, ResNet-101, Res-Net-152
-	* Inception v1, Inception v2, Inception v3, Inception v4
-	* CaffeNet
-	* MobileNet
-	* Squeeze-and-Excitation Networks: SE-BN-Inception, SE-Resnet-101, SE-ResNet-152, SE-ResNet-50, SE-ResNeXt-101, SE-ResNeXt-50
-	* ShuffleNet v2
-
-* **Object detection models:**
-	* SSD300-VGG16, SSD500-VGG16
-	* Faster-RCNN
-	* RefineDet (MYRIAD plugin only)
-
-* **Face detection models:**
-	* VGG Face
-    * SSH: Single Stage Headless Face Detector
-
-* **Semantic segmentation models:**
-	* FCN8
-
-> **NOTE**: It is necessary to specify mean and scale values for most of the Caffe\* models to convert them with the Model Optimizer. The exact values should be determined separately for each model. For example, for Caffe\* models trained on ImageNet, the mean values usually are `123.68`, `116.779`, `103.939` for blue, green and red channels respectively. The scale value is usually `127.5`. Refer to the General Conversion Parameters section in [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) for the information on how to specify mean and scale values.
-
 ## Convert a Caffe* Model <a name="Convert_From_Caffe"></a>
-
-To convert a Caffe\* model, run Model Optimizer with the path to the input model `.caffemodel` file and the path to an output directory with write permissions:
+To convert a Caffe\* model, run Model Optimizer with the path to the input model `.caffemodel` file:

 ```sh
- mo --input_model <INPUT_MODEL>.caffemodel --output_dir <OUTPUT_MODEL_DIR>
+ mo --input_model <INPUT_MODEL>.caffemodel
 ```

-Two groups of parameters are available to convert your model:
-
-* Framework-agnostic parameters are used to convert a model trained with any supported framework. For details, see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page.
-* [Caffe-specific parameters](#caffe_specific_conversion_params) are used to convert only Caffe\* models.
-
-### Using Caffe\*-Specific Conversion Parameters <a name="caffe_specific_conversion_params"></a>
-
 The following list provides the Caffe\*-specific parameters.

 ```
@ -93,16 +45,16 @@ Caffe*-specific parameters:
                        attributes without flattening nested parameters.
 ```

-#### Command-Line Interface (CLI) Examples Using Caffe\*-Specific Parameters
+### Command-Line Interface (CLI) Examples Using Caffe\*-Specific Parameters

-* Launching the Model Optimizer for the [bvlc_alexnet.caffemodel](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet) with a specified `prototxt` file. This is needed when the name of the Caffe\* model and the `.prototxt` file are different or are placed in different directories. Otherwise, it is enough to provide only the path to the input `model.caffemodel` file. You must have write permissions for the output directory.
-   ```sh    
-   mo --input_model bvlc_alexnet.caffemodel --input_proto bvlc_alexnet.prototxt --output_dir <OUTPUT_MODEL_DIR>
+* Launching the Model Optimizer for the [bvlc_alexnet.caffemodel](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet) with a specified `prototxt` file. This is needed when the name of the Caffe\* model and the `.prototxt` file are different or are placed in different directories. Otherwise, it is enough to provide only the path to the input `model.caffemodel` file.
+   ```sh
+   mo --input_model bvlc_alexnet.caffemodel --input_proto bvlc_alexnet.prototxt
   ```
 * Launching the Model Optimizer for the [bvlc_alexnet.caffemodel](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet) with a specified `CustomLayersMapping` file. This is the legacy method of quickly enabling model conversion if your model has custom layers. This requires the Caffe\* system on the computer.
 Optional parameters without default values and not specified by the user in the `.prototxt` file are removed from the Intermediate Representation, and nested parameters are flattened:
   ```sh
-   mo --input_model bvlc_alexnet.caffemodel -k CustomLayersMapping.xml --disable_omitting_optional --enable_flattening_nested_params --output_dir <OUTPUT_MODEL_DIR>
+   mo --input_model bvlc_alexnet.caffemodel -k CustomLayersMapping.xml --disable_omitting_optional --enable_flattening_nested_params
   ```
   This example shows a multi-input model with input layers: `data`, `rois`
   ```
@ -124,9 +76,9 @@ Optional parameters without default values and not specified by the user in the
   }
   ```

-* Launching the Model Optimizer for a multi-input model with two inputs and providing a new shape for each input in the order they are passed to the Model Optimizer along with a writable output directory. In particular, for data, set the shape to `1,3,227,227`. For rois, set the shape to `1,6,1,1`:
+* Launching the Model Optimizer for a multi-input model with two inputs and providing a new shape for each input in the order they are passed to the Model Optimizer. In particular, for data, set the shape to `1,3,227,227`. For rois, set the shape to `1,6,1,1`:
   ```sh
-   mo --input_model /path-to/your-model.caffemodel --input data,rois --input_shape (1,3,227,227),[1,6,1,1] --output_dir <OUTPUT_MODEL_DIR>
+   mo --input_model /path-to/your-model.caffemodel --input data,rois --input_shape (1,3,227,227),[1,6,1,1]
   ```
 ## Custom Layer Definition

@ -146,3 +98,6 @@ In this document, you learned:
 * Basic information about how the Model Optimizer works with Caffe\* models
 * Which Caffe\* models are supported
 * How to convert a trained Caffe\* model using the Model Optimizer with both framework-agnostic and Caffe-specific command-line options
+
+## See Also
+[Model Conversion Tutorials](Convert_Model_Tutorials.md)
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md
@ -1,58 +1,14 @@
 # Converting a Kaldi* Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Kaldi}

-@sphinxdirective
-
-.. _convert model kaldi:
-
-.. toctree::
-   :maxdepth: 1
-   :hidden:
-
-   openvino_docs_MO_DG_prepare_model_convert_model_kaldi_specific_Aspire_Tdnn_Model
-
-@endsphinxdirective
-
-A summary of the steps for optimizing and deploying a model that was trained with Kaldi\*:
-
-1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for Kaldi\*.
-2. [Convert a Kaldi\* Model](#Convert_From_Kaldi) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values.
-3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided [OpenVINO Samples](../../../OV_Runtime_UG/Samples_Overview.md).
-4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in your application to deploy the model in the target environment.
-
 > **NOTE**: The Model Optimizer supports the [nnet1](http://kaldi-asr.org/doc/dnn1.html) and [nnet2](http://kaldi-asr.org/doc/dnn2.html) formats of Kaldi models. Support of the [nnet3](http://kaldi-asr.org/doc/dnn3.html) format is limited.

-## Supported Topologies
-* Convolutional Neural Networks (CNN):
-    * Wall Street Journal CNN (wsj_cnn4b)
-    * Resource Management CNN (rm_cnn4a_smbr)
-
-* Long Short Term Memory (LSTM) Networks:
-    * Resource Management LSTM (rm_lstm4f)
-    * TED-LIUM LSTM (ted_lstm4f)
-
-* Deep Neural Networks (DNN):
-    * Wall Street Journal DNN (wsj_dnn5b_smbr);
-    * TED-LIUM DNN (ted_dnn_smbr)
-
-* Time delay neural network (TDNN)
-    * [ASpIRE Chain TDNN](kaldi_specific/Aspire_Tdnn_Model.md);
-    * [Librispeech nnet3](https://github.com/ryanleary/kaldi-test/releases/download/v0.0/LibriSpeech-trained.tgz).
-
-* TDNN-LSTM model
-
-
 ## Convert a Kaldi* Model <a name="Convert_From_Kaldi"></a>
+To convert a Kaldi\* model, run Model Optimizer with the path to the input model `.nnet` or `.mdl` file:

-To convert a Kaldi\* model, run Model Optimizer with the path to the input model `.nnet` or `.mdl` file and to an output directory where you have write permissions:
 ```sh
-mo --input_model <INPUT_MODEL>.nnet --output_dir <OUTPUT_MODEL_DIR>
+ mo --input_model <INPUT_MODEL>.nnet
 ```

-Two groups of parameters are available to convert your model:
-
-* Framework-agnostic parameters are used to convert a model trained with any supported framework. For details, see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page.
-* [Kaldi-specific parameters](#kaldi_specific_conversion_params) are used to convert only Kaldi\* models.
-
 ### Using Kaldi\*-Specific Conversion Parameters <a name="kaldi_specific_conversion_params"></a>

 The following list provides the Kaldi\*-specific parameters.
@ -67,14 +23,14 @@ Kaldi-specific parameters:

 ### Examples of CLI Commands

-* To launch the Model Optimizer for the wsj_dnn5b_smbr model with the specified `.nnet` file and an output directory where you have write permissions:
+* To launch the Model Optimizer for the wsj_dnn5b_smbr model with the specified `.nnet` file:
   ```sh
-   mo --input_model wsj_dnn5b_smbr.nnet --output_dir <OUTPUT_MODEL_DIR>
+   mo --input_model wsj_dnn5b_smbr.nnet
   ```

-* To launch the Model Optimizer for the wsj_dnn5b_smbr model with existing file that contains counts for the last layer with biases and a writable output directory:
+* To launch the Model Optimizer for the wsj_dnn5b_smbr model with existing file that contains counts for the last layer with biases:
   ```sh
-   mo --input_model wsj_dnn5b_smbr.nnet --counts wsj_dnn5b_smbr.counts --output_dir <OUTPUT_MODEL_DIR>
+   mo --input_model wsj_dnn5b_smbr.nnet --counts wsj_dnn5b_smbr.counts
   ```

  * The Model Optimizer normalizes сounts in the following way:
@ -88,34 +44,36 @@ Kaldi-specific parameters:
 	\f$|C|\f$ - number of elements in the counts array;
  * The normalized counts are subtracted from biases of the last or next to last layer (if last layer is SoftMax).

-  > **NOTE:** Model Optimizer will show warning if model contains counts values inside model and `--counts` option is not used.
+  > **NOTE**: Model Optimizer will show warning if model contains counts values inside model and `--counts` option is not used.

 * If you want to remove the last SoftMax layer in the topology, launch the Model Optimizer with the
 `--remove_output_softmax` flag:
   ```sh
-   mo --input_model wsj_dnn5b_smbr.nnet --counts wsj_dnn5b_smbr.counts --remove_output_softmax --output_dir <OUTPUT_MODEL_DIR>_
+   mo --input_model wsj_dnn5b_smbr.nnet --counts wsj_dnn5b_smbr.counts --remove_output_softmax
   ```

 The Model Optimizer finds the last layer of the topology and removes this layer only if it is a SoftMax layer.

  > **NOTE**: Model Optimizer can remove SoftMax layer only if the topology has one output.
- 
-  > **NOTE**: For sample inference of Kaldi models, you can use the OpenVINO Speech Recognition sample application. The sample supports models with one output. If your model has several outputs, specify the desired one with the `--output` option.    
-  
- If you want to convert a model for inference on Intel® Movidius™ Myriad™, use the `--remove_memory` option. 
-It removes Memory layers from the IR. Instead of it, additional inputs and outputs appear in the IR. 
+
+  > **NOTE**: For sample inference of Kaldi models, you can use the OpenVINO Speech Recognition sample application. The sample supports models with one output. If your model has several outputs, specify the desired one with the `--output` option.
+
+ If you want to convert a model for inference on Intel® Movidius™ Myriad™, use the `--remove_memory` option.
+It removes Memory layers from the IR. Instead of it, additional inputs and outputs appear in the IR.
 The Model Optimizer outputs the mapping between inputs and outputs. For example:
 ```sh
-[ WARNING ]  Add input/output mapped Parameter_0_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out -> Result_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out 
-[ WARNING ]  Add input/output mapped Parameter_1_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out -> Result_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out 
-[ WARNING ]  Add input/output mapped Parameter_0_for_iteration_Offset_fastlstm3.c_trunc__3390 -> Result_for_iteration_Offset_fastlstm3.c_trunc__3390 
+[ WARNING ]  Add input/output mapped Parameter_0_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out -> Result_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out
+[ WARNING ]  Add input/output mapped Parameter_1_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out -> Result_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out
+[ WARNING ]  Add input/output mapped Parameter_0_for_iteration_Offset_fastlstm3.c_trunc__3390 -> Result_for_iteration_Offset_fastlstm3.c_trunc__3390
 ```
 Based on this mapping, link inputs and outputs in your application manually as follows:
- 
-1. Initialize inputs from the mapping as zeros in the first frame of an utterance.
-2. Copy output blobs from the mapping to the corresponding inputs. For example, data from `Result_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out` 
-must be copied to `Parameter_0_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out`.

+1. Initialize inputs from the mapping as zeros in the first frame of an utterance.
+2. Copy output blobs from the mapping to the corresponding inputs. For example, data from `Result_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out`
+must be copied to `Parameter_0_for_Offset_fastlstm2.r_trunc__2Offset_fastlstm2.r_trunc__2_out`.

 ## Supported Kaldi\* Layers
 Refer to [Supported Framework Layers ](../Supported_Frameworks_Layers.md) for the list of supported standard layers.
+
+## See Also
+[Model Conversion Tutorials](Convert_Model_Tutorials.md)
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md
@ -1,73 +1,12 @@
 # Converting an MXNet* Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet}

-@sphinxdirective
-
-.. _convert model mxnet:
-
-.. toctree::
-   :maxdepth: 1
-   :hidden:
-
-   openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_Style_Transfer_From_MXNet
-   openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_GluonCV_Models
-
-@endsphinxdirective
-
-A summary of the steps for optimizing and deploying a model that was trained with the MXNet\* framework:
-
-1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for MXNet* (MXNet was used to train your model)
-2. [Convert a MXNet model](#ConvertMxNet) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values
-3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided [OpenVINO Samples](../../../OV_Runtime_UG/Samples_Overview.md)
-4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in your application to deploy the model in the target environment
-
-## Supported Topologies
-
-> **NOTE**: SSD models from the table require converting to the deploy mode. For details, see the [Conversion Instructions](https://github.com/zhreshold/mxnet-ssd/#convert-model-to-deploy-mode) in the GitHub MXNet-SSD repository.
-
-| Model Name| Model File |
-| ------------- |:-------------:|
-|VGG-16|	[Repo](https://github.com/dmlc/mxnet-model-gallery/tree/master), [Symbol](http://data.mxnet.io/models/imagenet/vgg/vgg16-symbol.json), [Params](http://data.mxnet.io/models/imagenet/vgg/vgg16-0000.params)|
-|VGG-19|	[Repo](https://github.com/dmlc/mxnet-model-gallery/tree/master), [Symbol](http://data.mxnet.io/models/imagenet/vgg/vgg19-symbol.json), [Params](http://data.mxnet.io/models/imagenet/vgg/vgg19-0000.params)|
-|ResNet-152 v1|	[Repo](https://github.com/dmlc/mxnet-model-gallery/tree/master), [Symbol](http://data.mxnet.io/models/imagenet/resnet/152-layers/resnet-152-symbol.json), [Params](http://data.mxnet.io/models/imagenet/resnet/152-layers/resnet-152-0000.params)|
-|SqueezeNet_v1.1|	[Repo](https://github.com/dmlc/mxnet-model-gallery/tree/master), [Symbol](http://data.mxnet.io/models/imagenet/squeezenet/squeezenet_v1.1-symbol.json), [Params](http://data.mxnet.io/models/imagenet/squeezenet/squeezenet_v1.1-0000.params)|
-|Inception BN|	[Repo](https://github.com/dmlc/mxnet-model-gallery/tree/master), [Symbol](http://data.mxnet.io/models/imagenet/inception-bn/Inception-BN-symbol.json), [Params](http://data.mxnet.io/models/imagenet/inception-bn/Inception-BN-0126.params)|
-|CaffeNet|	[Repo](https://github.com/dmlc/mxnet-model-gallery/tree/master), [Symbol](http://data.mxnet.io/mxnet/models/imagenet/caffenet/caffenet-symbol.json), [Params](http://data.mxnet.io/models/imagenet/caffenet/caffenet-0000.params)|
-|DenseNet-121|	[Repo](https://github.com/miraclewkf/DenseNet), [Symbol](https://raw.githubusercontent.com/miraclewkf/DenseNet/master/model/densenet-121-symbol.json), [Params](https://drive.google.com/file/d/0ByXcv9gLjrVcb3NGb1JPa3ZFQUk/view?usp=drive_web)|
-|DenseNet-161|	[Repo](https://github.com/miraclewkf/DenseNet), [Symbol](https://raw.githubusercontent.com/miraclewkf/DenseNet/master/model/densenet-161-symbol.json), [Params](https://drive.google.com/file/d/0ByXcv9gLjrVcS0FwZ082SEtiUjQ/view)|
-|DenseNet-169| 	[Repo](https://github.com/miraclewkf/DenseNet), [Symbol](https://raw.githubusercontent.com/miraclewkf/DenseNet/master/model/densenet-169-symbol.json), [Params](https://drive.google.com/file/d/0ByXcv9gLjrVcOWZJejlMOWZvZmc/view)|
-|DenseNet-201|	[Repo](https://github.com/miraclewkf/DenseNet), [Symbol](https://raw.githubusercontent.com/miraclewkf/DenseNet/master/model/densenet-201-symbol.json), [Params](https://drive.google.com/file/d/0ByXcv9gLjrVcUjF4MDBwZ3FQbkU/view)|
-|MobileNet|	[Repo](https://github.com/KeyKy/mobilenet-mxnet), [Symbol](https://github.com/KeyKy/mobilenet-mxnet/blob/master/mobilenet.py), [Params](https://github.com/KeyKy/mobilenet-mxnet/blob/master/mobilenet-0000.params)|
-|SSD-ResNet-50|	[Repo](https://github.com/zhreshold/mxnet-ssd), [Symbol + Params](https://github.com/zhreshold/mxnet-ssd/releases/download/v0.6/resnet50_ssd_512_voc0712_trainval.zip)|
-|SSD-VGG-16-300|	[Repo](https://github.com/zhreshold/mxnet-ssd), [Symbol + Params](https://github.com/zhreshold/mxnet-ssd/releases/download/v0.5-beta/vgg16_ssd_300_voc0712_trainval.zip)|
-|SSD-Inception v3|	[Repo](https://github.com/zhreshold/mxnet-ssd), [Symbol + Params](https://github.com/zhreshold/mxnet-ssd/releases/download/v0.7-alpha/ssd_inceptionv3_512_voc0712trainval.zip)|
-|FCN8 (Semantic Segmentation)|	[Repo](https://github.com/apache/incubator-mxnet/tree/master/example/fcn-xs), [Symbol](https://www.dropbox.com/sh/578n5cxej7ofd6m/AAA9SFCBN8R_uL2CnAd3WQ5ia/FCN8s_VGG16-symbol.json?dl=0), [Params](https://www.dropbox.com/sh/578n5cxej7ofd6m/AABHWZHCtA2P6iR6LUflkxb_a/FCN8s_VGG16-0019-cpu.params?dl=0)|
-|MTCNN part 1 (Face Detection)| [Repo](https://github.com/pangyupo/mxnet_mtcnn_face_detection), [Symbol](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det1-symbol.json), [Params](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det1-0001.params)|
-|MTCNN part 2 (Face Detection)| [Repo](https://github.com/pangyupo/mxnet_mtcnn_face_detection), [Symbol](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det2-symbol.json), [Params](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det2-0001.params)|
-|MTCNN part 3 (Face Detection)| [Repo](https://github.com/pangyupo/mxnet_mtcnn_face_detection), [Symbol](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det3-symbol.json), [Params](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det3-0001.params)|
-|MTCNN part 4 (Face Detection)| [Repo](https://github.com/pangyupo/mxnet_mtcnn_face_detection), [Symbol](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det4-symbol.json), [Params](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det4-0001.params)|
-|Lightened_moon| [Repo](https://github.com/tornadomeet/mxnet-face/tree/master/model/lightened_moon), [Symbol](https://github.com/tornadomeet/mxnet-face/blob/master/model/lightened_moon/lightened_moon_fuse-symbol.json), [Params](https://github.com/tornadomeet/mxnet-face/blob/master/model/lightened_moon/lightened_moon_fuse-0082.params)|
-|RNN-Transducer| [Repo](https://github.com/HawkAaron/mxnet-transducer) |
-|word_lm| [Repo](https://github.com/apache/incubator-mxnet/tree/master/example/rnn/word_lm) |
-
-**Other supported topologies**
-
-* [GluonCV SSD and YOLO-v3 models](https://gluon-cv.mxnet.io/model_zoo/detection.html) can be converted using the following [instructions](mxnet_specific/Convert_GluonCV_Models.md).
-* [Style transfer model](https://github.com/zhaw/neural_style) can be converted using the following [instructions](mxnet_specific/Convert_Style_Transfer_From_MXNet.md).
-
 ## Convert an MXNet* Model <a name="ConvertMxNet"></a>
-
-To convert an MXNet\* model, run Model Optimizer with a path to the input model `.params` file and to an output directory where you have write permissions:
+To convert an MXNet\* model, run Model Optimizer with a path to the input model `.params` file:

 ```sh
- mo --input_model model-file-0000.params --output_dir <OUTPUT_MODEL_DIR>
+ mo --input_model model-file-0000.params
 ```

-Two groups of parameters are available to convert your model:
-
-* Framework-agnostic parameters are used to convert a model trained with any supported framework. For details, see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page.
-* [MXNet-specific parameters](#mxnet_specific_conversion_params) are used to convert only MXNet models.
-
-
 ### Using MXNet\*-Specific Conversion Parameters <a name="mxnet_specific_conversion_params"></a>
 The following list provides the MXNet\*-specific parameters.

@ -101,7 +40,7 @@ MXNet-specific parameters:
 Internally, when you run the Model Optimizer, it loads the model, goes through the topology, and tries to find each layer type in a list of known layers. Custom layers are layers that are not included in the list of known layers. If your topology contains any layers that are not in this list of known layers, the Model Optimizer classifies them as custom.

 ## Supported MXNet\* Layers
-Refer to [Supported Framework Layers ](../Supported_Frameworks_Layers.md) for the list of supported standard layers.
+Refer to [Supported Framework Layers](../Supported_Frameworks_Layers.md) for the list of supported standard layers.

 ## Frequently Asked Questions (FAQ)

@ -114,3 +53,6 @@ In this document, you learned:
 * Basic information about how the Model Optimizer works with MXNet\* models
 * Which MXNet\* models are supported
 * How to convert a trained MXNet\* model using the Model Optimizer with both framework-agnostic and MXNet-specific command-line options
+
+## See Also
+[Model Conversion Tutorials](Convert_Model_Tutorials.md)
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md
@ -1,83 +1,21 @@
 # Converting a ONNX* Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_ONNX}

-@sphinxdirective
-
-.. _convert model onnx:
-
-.. toctree::
-   :maxdepth: 1
-   :hidden:
-
-   openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Faster_RCNN
-   openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Mask_RCNN
-   openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_GPT2
-   openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_DLRM
-   openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_PyTorch
-
-@endsphinxdirective
-
 ## Introduction to ONNX
-
 [ONNX*](https://github.com/onnx/onnx) is a representation format for deep learning models. ONNX allows AI developers easily transfer models between different frameworks that helps to choose the best combination for them. Today, PyTorch\*, Caffe2\*, Apache MXNet\*, Microsoft Cognitive Toolkit\* and other tools are developing ONNX support.

-## Supported Public ONNX Topologies
-| Model Name | Path to <a href="https://github.com/onnx/models">Public Models</a> master branch|
-|:----|:----|
-| bert_large | [model archive](https://github.com/mlperf/inference/tree/master/v0.7/language/bert) |
-| bvlc_alexnet | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/bvlc_alexnet.tar.gz) |
-| bvlc_googlenet | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/bvlc_googlenet.tar.gz) |
-| bvlc_reference_caffenet | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/bvlc_reference_caffenet.tar.gz) |
-| bvlc_reference_rcnn_ilsvrc13 | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/bvlc_reference_rcnn_ilsvrc13.tar.gz) |
-| inception_v1 | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/inception_v1.tar.gz) |
-| inception_v2 | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/inception_v2.tar.gz) |
-| resnet50 | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/resnet50.tar.gz) |
-| squeezenet | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/squeezenet.tar.gz) |
-| densenet121 | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/densenet121.tar.gz) |
-| emotion_ferplus | [model archive](https://www.cntk.ai/OnnxModels/emotion_ferplus/opset_2/emotion_ferplus.tar.gz) |
-| mnist | [model archive](https://www.cntk.ai/OnnxModels/mnist/opset_1/mnist.tar.gz) |
-| shufflenet | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/shufflenet.tar.gz) |
-| VGG19 | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/vgg19.tar.gz) |
-| zfnet512 | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/zfnet512.tar.gz) |
-| GPT-2 | [model archive](https://github.com/onnx/models/blob/master/text/machine_comprehension/gpt-2/model/gpt2-10.tar.gz) |
-| YOLOv3 | [model archive](https://github.com/onnx/models/blob/master/vision/object_detection_segmentation/yolov3/model/yolov3-10.tar.gz) |
-
-Listed models are built with the operation set version 8 except the GPT-2 model (which uses version 10). Models that are upgraded to higher operation set versions may not be supported.
-
-## Supported PaddlePaddle* Models via ONNX Conversion
-Starting from the R5 release, the OpenVINO™ toolkit officially supports public PaddlePaddle* models via ONNX conversion.
-The list of supported topologies downloadable from PaddleHub is presented below:
-
-| Model Name | Command to download the model from PaddleHub |
-|:----|:----|
-| [MobileNetV2](https://www.paddlepaddle.org.cn/hubdetail?name=mobilenet_v2_imagenet) | `hub install mobilenet_v2_imagenet==1.0.1` |
-| [ResNet18](https://www.paddlepaddle.org.cn/hubdetail?name=resnet_v2_18_imagenet) | `hub install resnet_v2_18_imagenet==1.0.0` |
-| [ResNet34](https://www.paddlepaddle.org.cn/hubdetail?name=resnet_v2_34_imagenet) | `hub install resnet_v2_34_imagenet==1.0.0` |
-| [ResNet50](https://www.paddlepaddle.org.cn/hubdetail?name=resnet_v2_50_imagenet) | `hub install resnet_v2_50_imagenet==1.0.1` |
-| [ResNet101](https://www.paddlepaddle.org.cn/hubdetail?name=resnet_v2_101_imagenet) | `hub install resnet_v2_101_imagenet==1.0.1` |
-| [ResNet152](https://www.paddlepaddle.org.cn/hubdetail?name=resnet_v2_152_imagenet) | `hub install resnet_v2_152_imagenet==1.0.1` |
-> **NOTE**: To convert a model downloaded from PaddleHub use [paddle2onnx](https://github.com/PaddlePaddle/paddle2onnx) converter.
-
-The list of supported topologies from the [models v1.5](https://github.com/PaddlePaddle/models/tree/release/1.5) package:
-* [MobileNetV1](https://github.com/PaddlePaddle/models/blob/release/1.5/PaddleCV/image_classification/models/mobilenet.py)
-* [MobileNetV2](https://github.com/PaddlePaddle/models/blob/release/1.5/PaddleCV/image_classification/models/mobilenet_v2.py)
-* [ResNet](https://github.com/PaddlePaddle/models/blob/release/1.5/PaddleCV/image_classification/models/resnet.py)
-* [ResNet_vc](https://github.com/PaddlePaddle/models/blob/release/1.5/PaddleCV/image_classification/models/resnet_vc.py)
-* [ResNet_vd](https://github.com/PaddlePaddle/models/blob/release/1.5/PaddleCV/image_classification/models/resnet_vd.py)
-* [ResNeXt](https://github.com/PaddlePaddle/models/blob/release/1.5/PaddleCV/image_classification/models/resnext.py)
-* [ResNeXt_vd](https://github.com/PaddlePaddle/models/blob/release/1.5/PaddleCV/image_classification/models/resnext_vd.py)
-
-> **NOTE**: To convert these topologies one should first serialize the model by calling `paddle.fluid.io.save_inference_model`
- ([description](https://www.paddlepaddle.org.cn/documentation/docs/en/1.3/api/io.html#save-inference-model)) command and
-  after that use [paddle2onnx](https://github.com/PaddlePaddle/paddle2onnx) converter.
-
 ## Convert an ONNX* Model <a name="Convert_From_ONNX"></a>
 The Model Optimizer process assumes you have an ONNX model that was directly downloaded from a public repository or converted from any framework that supports exporting to the ONNX format.

-To convert an ONNX\* model, run Model Optimizer with the path to the input model `.nnet` file and an output directory where you have write permissions:
+To convert an ONNX\* model, run Model Optimizer with the path to the input model `.onnx` file:
+
 ```sh
- mo --input_model <INPUT_MODEL>.onnx --output_dir <OUTPUT_MODEL_DIR>
+ mo --input_model <INPUT_MODEL>.onnx
 ```
+
 There are no ONNX\* specific parameters, so only framework-agnostic parameters are available to convert your model. For details, see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page.

 ## Supported ONNX\* Layers
 Refer to [Supported Framework Layers](../Supported_Frameworks_Layers.md) for the list of supported standard layers.
+
+## See Also
+[Model Conversion Tutorials](Convert_Model_Tutorials.md)
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md
@ -1,53 +1,17 @@
-# Converting a PaddlePaddle Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Paddle}
-
-A summary of the steps for optimizing and deploying a model trained with PaddlePaddle:
-
-1. [Configure Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for PaddlePaddle.
-2. [Convert a PaddlePaddle Model](#Convert_From_Paddle) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases.
-3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided [OpenVINO Samples](../../../OV_Runtime_UG/Samples_Overview.md).
-4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in your application to deploy the model in the target environment.
-
-## Supported Topologies
-
-| Model Name| Model Type| Description|
-| ------------- | ------------ | ------------- |
-|ppocr-det| optical character recognition| Models are exported from [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/). Refer to [READ.md](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/#pp-ocr-20-series-model-listupdate-on-dec-15).|
-|ppocr-rec| optical character recognition| Models are exported from [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/). Refer to [READ.md](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/#pp-ocr-20-series-model-listupdate-on-dec-15).|
-|ResNet-50| classification| Models are exported from [PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.1/). Refer to [getting_started_en.md](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/en/tutorials/getting_started_en.md#4-use-the-inference-model-to-predict)|
-|MobileNet v2| classification| Models are exported from [PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.1/). Refer to [getting_started_en.md](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/en/tutorials/getting_started_en.md#4-use-the-inference-model-to-predict)|
-|MobileNet v3| classification| Models are exported from [PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.1/). Refer to [getting_started_en.md](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/en/tutorials/getting_started_en.md#4-use-the-inference-model-to-predict)|
-|BiSeNet v2| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
-|DeepLab v3 plus| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
-|Fast-SCNN| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
-|OCRNET| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
-|Yolo v3| detection| Models are exported from [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.1). Refer to [EXPORT_MODEL.md](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/deploy/EXPORT_MODEL.md#).|
-|ppyolo| detection| Models are exported from [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.1). Refer to [EXPORT_MODEL.md](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/deploy/EXPORT_MODEL.md#).|
-|MobileNetv3-SSD| detection| Models are exported from [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.2). Refer to [EXPORT_MODEL.md](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.2/deploy/EXPORT_MODEL.md#).|
-|U-Net| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.3). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.3/docs/model_export.md#)|
-|BERT| language representation| Models are exported from [PaddleNLP](https://github.com/PaddlePaddle/PaddleNLP/tree/v2.1.1). Refer to [README.md](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/language_model/bert#readme)|
-|ernie| language representation| Models are exported from [PaddleNLP](https://github.com/PaddlePaddle/PaddleNLP/tree/v2.1.1). Refer to [README.md](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/language_model/bert#readme)|
-
-> **NOTE:** The verified models are exported from the repository of branch release/2.1.
+# Converting a PaddlePaddle* Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Paddle}

 ## Convert a PaddlePaddle Model <a name="Convert_From_Paddle"></a>
+To convert a PaddlePaddle model, use the `mo` script and specify the path to the input model `.pdmodel` file:

-To convert a PaddlePaddle model:
-
-1. Activate environment with installed OpenVINO™ if needed
-2. Use the `mo` script to simply convert a model, specifying the framework, the path to the input model `.pdmodel` file and the path to an output directory with write permissions:
 ```sh
- mo --input_model <INPUT_MODEL>.pdmodel --output_dir <OUTPUT_MODEL_DIR> --framework=paddle
+ mo --input_model <INPUT_MODEL>.pdmodel
 ```

-Parameters to convert your model:
-
-* [Framework-agnostic parameters](Converting_Model.md): These parameters are used to convert a model trained with any supported framework.
-> **NOTE:** `--scale`, `--scale_values`, `--mean_values` are not supported in the current version of mo_paddle.
-
 ### Example of Converting a PaddlePaddle Model
 Below is the example command to convert yolo v3 PaddlePaddle network to OpenVINO IR network with Model Optimizer.
+
 ```sh
- mo --model_name yolov3_darknet53_270e_coco --output_dir <OUTPUT_MODEL_DIR> --framework=paddle --data_type=FP32 --reverse_input_channels --input_shape=[1,3,608,608],[1,2],[1,2] --input=image,im_shape,scale_factor --output=save_infer_model/scale_0.tmp_1,save_infer_model/scale_1.tmp_1 --input_model=yolov3.pdmodel
+ mo --input_model=yolov3.pdmodel --input=image,im_shape,scale_factor --input_shape=[1,3,608,608],[1,2],[1,2] --reverse_input_channels --output=save_infer_model/scale_0.tmp_1,save_infer_model/scale_1.tmp_1
 ```

 ## Supported PaddlePaddle Layers
@ -56,3 +20,6 @@ Refer to [Supported Framework Layers](../Supported_Frameworks_Layers.md) for the
 ## Frequently Asked Questions (FAQ)

 When Model Optimizer is unable to run to completion due to issues like typographical errors, incorrectly used options, etc., it provides explanatory messages. They describe the potential cause of the problem and give a link to the [Model Optimizer FAQ](../Model_Optimizer_FAQ.md), which provides instructions on how to resolve most issues. The FAQ also includes links to relevant sections in the Model Optimizer Developer Guide to help you understand what went wrong.
+
+## See Also
+[Model Conversion Tutorials](Convert_Model_Tutorials.md)
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md
@ -1,66 +1,19 @@
 # Converting a PyTorch* Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_PyTorch}

-@sphinxdirective
-
-.. toctree::
-   :maxdepth: 1
-   :hidden:
-
-   openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net
-   openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_QuartzNet
-   openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT
-   openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT
-   openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Bert_ner
-   openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN
-
-@endsphinxdirective
-
-## Supported Topologies
-
-Here is the list of models that are tested and guaranteed to be supported. However, you can also use these instructions to convert PyTorch\* models that are not presented in the list.
-
-* [Torchvision Models](https://pytorch.org/docs/stable/torchvision/index.html):  alexnet, densenet121, densenet161,
-  densenet169, densenet201, resnet101, resnet152, resnet18, resnet34, resnet50, vgg11, vgg13, vgg16, vgg19.
-  The models can be converted using [regular instructions](#typical-pytorch).
-* [Cadene Pretrained Models](https://github.com/Cadene/pretrained-models.pytorch): alexnet, fbresnet152, resnet101,
-  resnet152, resnet18, resnet34, resnet152, resnet18, resnet34, resnet50, resnext101_32x4d, resnext101_64x4d, vgg11.
-  The models can be converted using [regular instructions](#typical-pytorch).
-* [ESPNet Models](https://github.com/sacmehta/ESPNet/tree/master/pretrained) can be converted using [regular instructions](#typical-pytorch).
-* [MobileNetV3](https://github.com/d-li14/mobilenetv3.pytorch) can be converted using [regular instructions](#typical-pytorch).
-* [iSeeBetter](https://github.com/amanchadha/iSeeBetter) can be converted using [regular instructions](#typical-pytorch).
-  Please refer to [`iSeeBetterTest.py`](https://github.com/amanchadha/iSeeBetter/blob/master/iSeeBetterTest.py) script for code to initialize the model.
-* F3Net topology can be converted using steps described in [Convert PyTorch\* F3Net to the IR](pytorch_specific/Convert_F3Net.md)
-  instruction which is used instead of steps 2 and 3 of [regular instructions](#typical-pytorch).
-* QuartzNet topologies from [NeMo project](https://github.com/NVIDIA/NeMo) can be converted using steps described in
-  [Convert PyTorch\* QuartzNet to the IR](pytorch_specific/Convert_QuartzNet.md) instruction which is used instead of
-  steps 2 and 3 of [regular instructions](#typical-pytorch).
-* YOLACT topology can be converted using steps described in [Convert PyTorch\* YOLACT to the IR](pytorch_specific/Convert_YOLACT.md)
-  instruction which is used instead of steps 2 and 3 of [regular instructions](#typical-pytorch).
-* [RCAN](https://github.com/yulunzhang/RCAN) topology can be converted using steps described in [Convert PyTorch\* RCAN to the IR](pytorch_specific/Convert_RCAN.md)
-  instruction which is used instead of steps 2 and 3 of [regular instructions](#typical-pytorch).
-* [BERT_NER](https://github.com/kamalkraj/BERT-NER) topology can be converted using steps described in [Convert PyTorch* BERT-NER to the IR](pytorch_specific/Convert_Bert_ner.md)
-  instruction which is used instead of steps 2 and 3 of [regular instructions](#typical-pytorch).
-* ResNeXt-101 from [facebookresearch/semi-supervised-ImageNet1K-models](https://github.com/facebookresearch/semi-supervised-ImageNet1K-models)
-  can be converted using [regular instructions](#typical-pytorch).
-
-## Typical steps to convert PyTorch\* model <a name="typical-pytorch"></a>
-
+## Typical Steps to Convert PyTorch Model <a name="typical-pytorch"></a>
 PyTorch* framework is supported through export to ONNX\* format. A summary of the steps for optimizing and deploying a model that was trained with the PyTorch\* framework:

-1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for ONNX\*.
-2. [Export PyTorch model to ONNX\*](#export-to-onnx).
-3. [Convert an ONNX\* model](Convert_Model_From_ONNX.md) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values.
-4. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided [sample applications](../../../OV_Runtime_UG/Samples_Overview.md).
-5. [Integrate OpenVINO Runtime](../../../OV_Runtime_UG/Samples_Overview.md) in your application to deploy the model in the target environment.
+1. [Export PyTorch model to ONNX\*](#export-to-onnx).
+2. [Convert an ONNX\* model](Convert_Model_From_ONNX.md) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values.

 ## Export PyTorch\* Model to ONNX\* Format <a name="export-to-onnx"></a>
-
 PyTorch models are defined in a Python\* code, to export such models use `torch.onnx.export()` method. Usually code to
 evaluate or test the model is provided with the model code and can be used to initialize and export model.
 Only the basics will be covered here, the step to export to ONNX\* is crucial but it is covered by PyTorch\* framework.
-For more information, please refer to [PyTorch\* documentation](https://pytorch.org/docs/stable/onnx.html).
+For more information, please refer to [Exporting PyTorch models to ONNX format](https://pytorch.org/docs/stable/onnx.html).

 To export a PyTorch\* model you need to obtain the model as an instance of `torch.nn.Module` class and call the `export` function.
+
 ```python
 import torch

@ -68,7 +21,7 @@ import torch
 model = SomeModel()
 # Evaluate the model to switch some operations from training mode to inference.
 model.eval()
-# Create dummy input for the model. It will be used to run the model inside export function. 
+# Create dummy input for the model. It will be used to run the model inside export function.
 dummy_input = torch.randn(1, 3, 224, 224)
 # Call the export function
 torch.onnx.export(model, (dummy_input, ), 'model.onnx')
@ -79,3 +32,6 @@ torch.onnx.export(model, (dummy_input, ), 'model.onnx')
 * Not all PyTorch\* operations can be exported to ONNX\* opset 9 which is used by default, as of version 1.8.1.
 It is recommended to export models to opset 11 or higher when export to default opset 9 is not working. In that case, use `opset_version`
 option of the `torch.onnx.export`. For more information about ONNX* opset, refer to the [Operator Schemas](https://github.com/onnx/onnx/blob/master/docs/Operators.md).
+
+## See Also
+[Model Conversion Tutorials](Convert_Model_Tutorials.md)
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md
@ -1,247 +1,40 @@
 # Converting a TensorFlow* Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow}

-@sphinxdirective
+## Convert TensorFlow 1 Models <a name="Convert_From_TF2X"></a>

-.. _convert model tf:
+### Convert Frozen Model Format <a name="Convert_From_TF"></a>
+To convert a TensorFlow model, use the `mo` script to simply convert a model with the path to the input model `.pb` file:

-.. toctree::
-   :maxdepth: 1
-   :hidden:
-   
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_RetinaNet_From_Tensorflow
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_AttentionOCR_From_Tensorflow
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_FaceNet_From_Tensorflow
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_NCF_From_Tensorflow
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_DeepSpeech_From_Tensorflow
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_lm_1b_From_Tensorflow
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Slim_Library_Models
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_CRNN_From_Tensorflow
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_GNMT_From_Tensorflow
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_BERT_From_Tensorflow
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_XLNet_From_Tensorflow
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_WideAndDeep_Family_Models
-   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_EfficientDet_Models
-
-@endsphinxdirective
-
-A summary of the steps for optimizing and deploying a model that was trained with the TensorFlow\* framework:
-
-1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for TensorFlow\* (TensorFlow was used to train your model).
-2. [Freeze the TensorFlow model](#freeze-the-tensorflow-model) if your model is not already frozen or skip this step and use the [instruction](#loading-nonfrozen-models) to a convert a non-frozen model.
-3. [Convert a TensorFlow\* model](#Convert_From_TF) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values.
-4. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided [sample applications](../../../OV_Runtime_UG/Samples_Overview.md).
-5. [Integrate OpenVINO Runtime](../../../OV_Runtime_UG/Samples_Overview.md) in your application to deploy the model in the target environment.
-
-## Supported Topologies
-
-**Supported Non-Frozen Topologies with Links to the Associated Slim Model Classification Download Files**
-
-Detailed information on how to convert models from the <a href="https://github.com/tensorflow/models/tree/master/research/slim/README.md">TensorFlow\*-Slim Image Classification Model Library</a> is available in the [Converting TensorFlow*-Slim Image Classification Model Library Models](tf_specific/Convert_Slim_Library_Models.md) chapter. The table below contains list of supported TensorFlow\*-Slim Image Classification Model Library models and required mean/scale values. The mean values are specified as if the input image is read in BGR channels order layout like OpenVINO classification sample does.
-
-| Model Name| Slim Model Checkpoint File| \-\-mean_values | \-\-scale|
-| ------------- | ------------ | ------------- | -----:|
-|Inception v1| [inception_v1_2016_08_28.tar.gz](http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz)| [127.5,127.5,127.5]| 127.5|
-|Inception v2| [inception_v1_2016_08_28.tar.gz](http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz)| [127.5,127.5,127.5]| 127.5|
-|Inception v3| [inception_v3_2016_08_28.tar.gz](http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz)| [127.5,127.5,127.5]| 127.5|
-|Inception V4| [inception_v4_2016_09_09.tar.gz](http://download.tensorflow.org/models/inception_v4_2016_09_09.tar.gz)| [127.5,127.5,127.5]| 127.5|
-|Inception ResNet v2| [inception_resnet_v2_2016_08_30.tar.gz](http://download.tensorflow.org/models/inception_resnet_v2_2016_08_30.tar.gz)| [127.5,127.5,127.5]| 127.5|
-|MobileNet v1 128| [mobilenet_v1_0.25_128.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.25_128.tgz)| [127.5,127.5,127.5]| 127.5|
-|MobileNet v1 160| [mobilenet_v1_0.5_160.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_0.5_160.tgz)| [127.5,127.5,127.5]| 127.5|
-|MobileNet v1 224| [mobilenet_v1_1.0_224.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz)| [127.5,127.5,127.5]| 127.5|
-|NasNet Large| [nasnet-a_large_04_10_2017.tar.gz](https://storage.googleapis.com/download.tensorflow.org/models/nasnet-a_large_04_10_2017.tar.gz)| [127.5,127.5,127.5]| 127.5|
-|NasNet Mobile| [nasnet-a_mobile_04_10_2017.tar.gz](https://storage.googleapis.com/download.tensorflow.org/models/nasnet-a_mobile_04_10_2017.tar.gz)| [127.5,127.5,127.5]| 127.5|
-|ResidualNet-50 v1| [resnet_v1_50_2016_08_28.tar.gz](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz)| [103.94,116.78,123.68] | 1 |
-|ResidualNet-50 v2| [resnet_v2_50_2017_04_14.tar.gz](http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz)| [103.94,116.78,123.68] | 1 |
-|ResidualNet-101 v1| [resnet_v1_101_2016_08_28.tar.gz](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz)| [103.94,116.78,123.68] | 1 |
-|ResidualNet-101 v2| [resnet_v2_101_2017_04_14.tar.gz](http://download.tensorflow.org/models/resnet_v2_101_2017_04_14.tar.gz)| [103.94,116.78,123.68] | 1 |
-|ResidualNet-152 v1| [resnet_v1_152_2016_08_28.tar.gz](http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz)| [103.94,116.78,123.68] | 1 |
-|ResidualNet-152 v2| [resnet_v2_152_2017_04_14.tar.gz](http://download.tensorflow.org/models/resnet_v2_152_2017_04_14.tar.gz)| [103.94,116.78,123.68] | 1 |
-|VGG-16| [vgg_16_2016_08_28.tar.gz](http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz)| [103.94,116.78,123.68] | 1 |
-|VGG-19| [vgg_19_2016_08_28.tar.gz](http://download.tensorflow.org/models/vgg_19_2016_08_28.tar.gz)| [103.94,116.78,123.68] | 1 |
-
-**Supported Pre-Trained Topologies from TensorFlow 1 Detection Model Zoo**
-
-Detailed information on how to convert models from the <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md">TensorFlow 1 Detection Model Zoo</a> is available in the [Converting TensorFlow Object Detection API Models](tf_specific/Convert_Object_Detection_API_Models.md) chapter. The table below contains models from the Object Detection Models zoo that are supported.
-
-| Model Name| TensorFlow 1 Object Detection API Models|
-| :------------- | -----:|
-|SSD MobileNet V1 COCO\*| [ssd_mobilenet_v1_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz)|
-|SSD MobileNet V1 0.75 Depth COCO|  [ssd_mobilenet_v1_0.75_depth_300x300_coco14_sync_2018_07_03.tar.gz](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_0.75_depth_300x300_coco14_sync_2018_07_03.tar.gz)|
-|SSD MobileNet V1 PPN COCO|  [ssd_mobilenet_v1_ppn_shared_box_predictor_300x300_coco14_sync_2018_07_03.tar.gz](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_ppn_shared_box_predictor_300x300_coco14_sync_2018_07_03.tar.gz)|
-|SSD MobileNet V1 FPN COCO|  [ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz)|
-|SSD ResNet50 FPN COCO|  [ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz](http://download.tensorflow.org/models/object_detection/ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz)|
-|SSD MobileNet V2 COCO|  [ssd_mobilenet_v2_coco_2018_03_29.tar.gz](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz)|
-|SSD Lite MobileNet V2 COCO|  [ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz](http://download.tensorflow.org/models/object_detection/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz)|
-|SSD Inception V2 COCO|	[ssd_inception_v2_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2018_01_28.tar.gz)|
-|RFCN ResNet 101 COCO|  [rfcn_resnet101_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/rfcn_resnet101_coco_2018_01_28.tar.gz)|
-|Faster R-CNN Inception V2 COCO|  [faster_rcnn_inception_v2_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz)|
-|Faster R-CNN ResNet 50 COCO|  [faster_rcnn_resnet50_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_coco_2018_01_28.tar.gz)|
-|Faster R-CNN ResNet 50 Low Proposals COCO|  [faster_rcnn_resnet50_lowproposals_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_lowproposals_coco_2018_01_28.tar.gz)|
-|Faster R-CNN ResNet 101 COCO|  [faster_rcnn_resnet101_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_coco_2018_01_28.tar.gz)|
-|Faster R-CNN ResNet 101 Low Proposals COCO|  [faster_rcnn_resnet101_lowproposals_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_lowproposals_coco_2018_01_28.tar.gz)|
-|Faster R-CNN Inception ResNet V2 COCO|  [faster_rcnn_inception_resnet_v2_atrous_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_coco_2018_01_28.tar.gz)|
-|Faster R-CNN Inception ResNet V2 Low Proposals COCO|  [faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco_2018_01_28.tar.gz)|
-|Faster R-CNN NasNet COCO|  [faster_rcnn_nas_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_coco_2018_01_28.tar.gz)|
-|Faster R-CNN NasNet Low Proposals COCO|  [faster_rcnn_nas_lowproposals_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_lowproposals_coco_2018_01_28.tar.gz)|
-|Mask R-CNN Inception ResNet V2 COCO|  [mask_rcnn_inception_resnet_v2_atrous_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_resnet_v2_atrous_coco_2018_01_28.tar.gz)|
-|Mask R-CNN Inception V2 COCO|  [mask_rcnn_inception_v2_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz)|
-|Mask R-CNN ResNet 101 COCO|  [mask_rcnn_resnet101_atrous_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/mask_rcnn_resnet101_atrous_coco_2018_01_28.tar.gz)|
-|Mask R-CNN ResNet 50 COCO|  [mask_rcnn_resnet50_atrous_coco_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/mask_rcnn_resnet50_atrous_coco_2018_01_28.tar.gz)|
-|Faster R-CNN ResNet 101 Kitti\*|  [faster_rcnn_resnet101_kitti_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_kitti_2018_01_28.tar.gz)|
-|Faster R-CNN Inception ResNet V2 Open Images\*|  [faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_oid_2018_01_28.tar.gz)|
-|Faster R-CNN Inception ResNet V2 Low Proposals Open Images\*|  [faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz)|
-|Faster R-CNN ResNet 101 AVA v2.1\*|  [faster_rcnn_resnet101_ava_v2.1_2018_04_30.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_ava_v2.1_2018_04_30.tar.gz)|
-
-**Supported Pre-Trained Topologies from TensorFlow 2 Detection Model Zoo**
-
-Detailed information on how to convert models from the <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md">TensorFlow 2 Detection Model Zoo</a> is available in the [Converting TensorFlow Object Detection API Models](tf_specific/Convert_Object_Detection_API_Models.md) chapter. The table below contains models from the Object Detection Models zoo that are supported.
-
-| Model Name| TensorFlow 2 Object Detection API Models|
-| :------------- | -----:|
-| EfficientDet D0 512x512 |  [efficientdet_d0_coco17_tpu-32.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d0_coco17_tpu-32.tar.gz)|
-| EfficientDet D1 640x640 |  [efficientdet_d1_coco17_tpu-32.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d1_coco17_tpu-32.tar.gz)|
-| EfficientDet D2 768x768 |  [efficientdet_d2_coco17_tpu-32.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d2_coco17_tpu-32.tar.gz)|
-| EfficientDet D3 896x896 |  [efficientdet_d3_coco17_tpu-32.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d3_coco17_tpu-32.tar.gz)|
-| EfficientDet D4 1024x1024 |  [efficientdet_d4_coco17_tpu-32.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d4_coco17_tpu-32.tar.gz)|
-| EfficientDet D5 1280x1280 |  [efficientdet_d5_coco17_tpu-32.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d5_coco17_tpu-32.tar.gz)|
-| EfficientDet D6 1280x1280 |  [efficientdet_d6_coco17_tpu-32.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d6_coco17_tpu-32.tar.gz)|
-| EfficientDet D7 1536x1536 |  [efficientdet_d7_coco17_tpu-32.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d7_coco17_tpu-32.tar.gz)|
-| SSD MobileNet v2 320x320 |  [ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz)|
-| SSD MobileNet V1 FPN 640x640 |  [ssd_mobilenet_v1_fpn_640x640_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v1_fpn_640x640_coco17_tpu-8.tar.gz)|
-| SSD MobileNet V2 FPNLite 320x320 |  [ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz)|
-| SSD MobileNet V2 FPNLite 640x640 |  [ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz)|
-| SSD ResNet50 V1 FPN 640x640 (RetinaNet50) |  [ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz)|
-| SSD ResNet50 V1 FPN 1024x1024 (RetinaNet50) |  [ssd_resnet50_v1_fpn_1024x1024_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet50_v1_fpn_1024x1024_coco17_tpu-8.tar.gz)|
-| SSD ResNet101 V1 FPN 640x640 (RetinaNet101) |  [ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.tar.gz)|
-| SSD ResNet101 V1 FPN 1024x1024 (RetinaNet101) |  [ssd_resnet101_v1_fpn_1024x1024_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet101_v1_fpn_1024x1024_coco17_tpu-8.tar.gz)|
-| SSD ResNet152 V1 FPN 640x640 (RetinaNet152) |  [ssd_resnet152_v1_fpn_640x640_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8.tar.gz)|
-| SSD ResNet152 V1 FPN 1024x1024 (RetinaNet152) |  [ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.tar.gz)|
-| Faster R-CNN ResNet50 V1 640x640 |  [faster_rcnn_resnet50_v1_640x640_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/faster_rcnn_resnet50_v1_640x640_coco17_tpu-8.tar.gz)|
-| Faster R-CNN ResNet50 V1 1024x1024 |  [faster_rcnn_resnet50_v1_1024x1024_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/faster_rcnn_resnet50_v1_1024x1024_coco17_tpu-8.tar.gz)|
-| Faster R-CNN ResNet50 V1 800x1333 |  [faster_rcnn_resnet50_v1_800x1333_coco17_gpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/faster_rcnn_resnet50_v1_800x1333_coco17_gpu-8.tar.gz)|
-| Faster R-CNN ResNet101 V1 640x640 |  [faster_rcnn_resnet101_v1_640x640_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/faster_rcnn_resnet101_v1_640x640_coco17_tpu-8.tar.gz)|
-| Faster R-CNN ResNet101 V1 1024x1024 |  [faster_rcnn_resnet101_v1_1024x1024_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/faster_rcnn_resnet101_v1_1024x1024_coco17_tpu-8.tar.gz)|
-| Faster R-CNN ResNet101 V1 800x1333 |  [faster_rcnn_resnet101_v1_800x1333_coco17_gpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/faster_rcnn_resnet101_v1_800x1333_coco17_gpu-8.tar.gz)|
-| Faster R-CNN ResNet152 V1 640x640 |  [faster_rcnn_resnet152_v1_640x640_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/faster_rcnn_resnet152_v1_640x640_coco17_tpu-8.tar.gz)|
-| Faster R-CNN ResNet152 V1 1024x1024 |  [faster_rcnn_resnet152_v1_1024x1024_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/faster_rcnn_resnet152_v1_1024x1024_coco17_tpu-8.tar.gz)|
-| Faster R-CNN ResNet152 V1 800x1333 |  [faster_rcnn_resnet152_v1_800x1333_coco17_gpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/faster_rcnn_resnet152_v1_800x1333_coco17_gpu-8.tar.gz)|
-| Faster R-CNN Inception ResNet V2 640x640 |  [faster_rcnn_inception_resnet_v2_640x640_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/faster_rcnn_inception_resnet_v2_640x640_coco17_tpu-8.tar.gz)|
-| Faster R-CNN Inception ResNet V2 1024x1024 |  [faster_rcnn_inception_resnet_v2_1024x1024_coco17_tpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/faster_rcnn_inception_resnet_v2_1024x1024_coco17_tpu-8.tar.gz)|
-| Mask R-CNN Inception ResNet V2 1024x1024 |  [mask_rcnn_inception_resnet_v2_1024x1024_coco17_gpu-8.tar.gz](http://download.tensorflow.org/models/object_detection/tf2/20200711/mask_rcnn_inception_resnet_v2_1024x1024_coco17_gpu-8.tar.gz)|
-
-**Supported Frozen Quantized Topologies**
-
-The topologies hosted on the TensorFlow\* Lite [site](https://www.tensorflow.org/lite/guide/hosted_models). The frozen model file (`.pb` file) should be fed to the Model Optimizer.
-
-| Model Name            |                                                                                                                Frozen Model File |
-|:----------------------|---------------------------------------------------------------------------------------------------------------------------------:|
-| Mobilenet V1 0.25 128 | [mobilenet_v1_0.25_128_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_0.25_128_quant.tgz) |
-| Mobilenet V1 0.25 160 | [mobilenet_v1_0.25_160_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_0.25_160_quant.tgz) |
-| Mobilenet V1 0.25 192 | [mobilenet_v1_0.25_192_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_0.25_192_quant.tgz) |
-| Mobilenet V1 0.25 224 | [mobilenet_v1_0.25_224_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_0.25_224_quant.tgz) |
-| Mobilenet V1 0.50 128 |   [mobilenet_v1_0.5_128_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_0.5_128_quant.tgz) |
-| Mobilenet V1 0.50 160 |   [mobilenet_v1_0.5_160_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_0.5_160_quant.tgz) |
-| Mobilenet V1 0.50 192 |   [mobilenet_v1_0.5_192_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_0.5_192_quant.tgz) |
-| Mobilenet V1 0.50 224 |   [mobilenet_v1_0.5_224_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_0.5_224_quant.tgz) |
-| Mobilenet V1 0.75 128 | [mobilenet_v1_0.75_128_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_0.75_128_quant.tgz) |
-| Mobilenet V1 0.75 160 | [mobilenet_v1_0.75_160_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_0.75_160_quant.tgz) |
-| Mobilenet V1 0.75 192 | [mobilenet_v1_0.75_192_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_0.75_192_quant.tgz) |
-| Mobilenet V1 0.75 224 | [mobilenet_v1_0.75_224_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_0.75_224_quant.tgz) |
-| Mobilenet V1 1.0 128  |   [mobilenet_v1_1.0_128_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_128_quant.tgz) |
-| Mobilenet V1 1.0 160  |   [mobilenet_v1_1.0_160_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_160_quant.tgz) |
-| Mobilenet V1 1.0 192  |   [mobilenet_v1_1.0_192_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_192_quant.tgz) |
-| Mobilenet V1 1.0 224  |   [mobilenet_v1_1.0_224_quant.tgz](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz) |
-| Mobilenet V2 1.0 224  |           [mobilenet_v2_1.0_224_quant.tgz](http://download.tensorflow.org/models/tflite_11_05_08/mobilenet_v2_1.0_224_quant.tgz) |
-| Inception V1          |                 [inception_v1_224_quant_20181026.tgz](http://download.tensorflow.org/models/inception_v1_224_quant_20181026.tgz) |
-| Inception V2          |                 [inception_v2_224_quant_20181026.tgz](http://download.tensorflow.org/models/inception_v2_224_quant_20181026.tgz) |
-| Inception V3          |                           [inception_v3_quant.tgz](http://download.tensorflow.org/models/tflite_11_05_08/inception_v3_quant.tgz) |
-| Inception V4          |                 [inception_v4_299_quant_20181026.tgz](http://download.tensorflow.org/models/inception_v4_299_quant_20181026.tgz) |
-
-It is necessary to specify the following command line parameters for the Model Optimizer to convert some of the models from the list above: `--input input --input_shape [1,HEIGHT,WIDTH,3]`.
-Where `HEIGHT` and `WIDTH` are the input images height and width for which the model was trained.
-
-**Other supported topologies**
-
-| Model Name| Repository |
-| :------------- | -----:|
-| ResNext | [Repo](https://github.com/taki0112/ResNeXt-Tensorflow)|
-| DenseNet | [Repo](https://github.com/taki0112/Densenet-Tensorflow)|
-| CRNN | [Repo](https://github.com/MaybeShewill-CV/CRNN_Tensorflow) |
-| NCF | [Repo](https://github.com/tensorflow/models/tree/master/official/recommendation) |
-| lm_1b | [Repo](https://github.com/tensorflow/models/tree/master/research/lm_1b) |
-| DeepSpeech | [Repo](https://github.com/mozilla/DeepSpeech) |
-| A3C | [Repo](https://github.com/miyosuda/async_deep_reinforce) |
-| VDCNN | [Repo](https://github.com/WenchenLi/VDCNN) |
-| Unet | [Repo](https://github.com/kkweon/UNet-in-Tensorflow) |
-| Keras-TCN | [Repo](https://github.com/philipperemy/keras-tcn) |
-| PRNet | [Repo](https://github.com/YadiraF/PRNet) |
-| YOLOv4 | [Repo](https://github.com/Ma-Dan/keras-yolo4) |
-| STN | [Repo](https://github.com/oarriaga/STN.keras) |
-
-* YOLO topologies from DarkNet* can be converted using [these instructions](tf_specific/Convert_YOLO_From_Tensorflow.md).
-* FaceNet topologies can be converted using [these instructions](tf_specific/Convert_FaceNet_From_Tensorflow.md).
-* CRNN topologies can be converted using [these instructions](tf_specific/Convert_CRNN_From_Tensorflow.md).
-* NCF topologies can be converted using [these instructions](tf_specific/Convert_NCF_From_Tensorflow.md).
-* [GNMT](https://github.com/tensorflow/nmt) topology can be converted using [these instructions](tf_specific/Convert_GNMT_From_Tensorflow.md).
-* [BERT](https://github.com/google-research/bert) topology can be converted using [these instructions](tf_specific/Convert_BERT_From_Tensorflow.md).
-* [XLNet](https://github.com/zihangdai/xlnet) topology can be converted using [these instructions](tf_specific/Convert_XLNet_From_Tensorflow.md).
-* [Attention OCR](https://github.com/emedvedev/attention-ocr) topology can be converted using [these instructions](tf_specific/Convert_AttentionOCR_From_Tensorflow.md).
-  
-
-## Loading Non-Frozen Models to the Model Optimizer <a name="loading-nonfrozen-models"></a>
-
-There are three ways to store non-frozen TensorFlow models and load them to the Model Optimizer:
-
-1. Checkpoint:
-
-    In this case, a model consists of two files:
-    - `inference_graph.pb` or `inference_graph.pbtxt`
-    - `checkpoint_file.ckpt`
-
-    If you do not have an inference graph file, refer to [Freezing Custom Models in Python](#freeze-the-tensorflow-model).
-
-    To convert such a TensorFlow model:
-
-    1. Go to the `<INSTALL_DIR>/tools/model_optimizer` directory
-    2. Run the `mo` script with the path to the checkpoint file to convert a model and an output directory where you have write permissions:
-
-    * If input model is in `.pb` format:<br>
 ```sh
- mo --input_model <INFERENCE_GRAPH>.pb --input_checkpoint <INPUT_CHECKPOINT> --output_dir <OUTPUT_MODEL_DIR>
-```
-    * If input model is in `.pbtxt` format:<br>
-```sh
- mo --input_model <INFERENCE_GRAPH>.pbtxt --input_checkpoint <INPUT_CHECKPOINT> --input_model_is_text --output_dir <OUTPUT_MODEL_DIR>
+ mo --input_model <INPUT_MODEL>.pb
 ```

-2. MetaGraph:
+### Convert Non-Frozen Model Formats <a name="loading-nonfrozen-models"></a>
+There are three ways to store non-frozen TensorFlow models and convert them by Model Optimizer:

-    In this case, a model consists of three or four files stored in the same directory:
-    - `model_name.meta`
-    - `model_name.index`
-    - `model_name.data-00000-of-00001` (digit part may vary)
-    - `checkpoint` (optional)
-
-    To convert such TensorFlow model:
-
-    1. Go to the `<INSTALL_DIR>/tools/model_optimizer` directory
-    2. Run the `mo` script with a path to the MetaGraph `.meta` file and a writable output directory to convert a model:<br>
+1. **Checkpoint**. In this case, a model consists of two files: `inference_graph.pb` (or `inference_graph.pbtxt`) and `checkpoint_file.ckpt`.
+If you do not have an inference graph file, refer to [Freezing Custom Models in Python](#freeze-the-tensorflow-model).
+To convert the model with the inference graph in `.pb` format, run the `mo` script with the path to the checkpoint file to convert a model:
 ```sh
- mo --input_meta_graph <INPUT_META_GRAPH>.meta --output_dir <OUTPUT_MODEL_DIR>
+ mo --input_model <INFERENCE_GRAPH>.pb --input_checkpoint <INPUT_CHECKPOINT>
+```
+To convert the model with the inference graph in `.pbtxt` format, run the `mo` script with the path to the checkpoint file to convert a model:
+```sh
+ mo --input_model <INFERENCE_GRAPH>.pbtxt --input_checkpoint <INPUT_CHECKPOINT> --input_model_is_text
 ```

-3. SavedModel format of TensorFlow 1.x and 2.x versions:
-
-    In this case, a model consists of a special directory with a `.pb` file and several subfolders: `variables`, `assets`, and `assets.extra`. For more information about the SavedModel directory, refer to the [README](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/saved_model#components) file in the TensorFlow repository.
-
-    To convert such TensorFlow model:
-
-    1. Go to the `<INSTALL_DIR>/tools/model_optimizer` directory
-    2. Run the `mo` script with a path to the SavedModel directory and a writable output directory to convert a model:<br>
+2. **MetaGraph**. In this case, a model consists of three or four files stored in the same directory: `model_name.meta`, `model_name.index`,
+`model_name.data-00000-of-00001` (digit part may vary), and `checkpoint` (optional).
+To convert such TensorFlow model, run the `mo` script with a path to the MetaGraph `.meta` file:
 ```sh
- mo --saved_model_dir <SAVED_MODEL_DIRECTORY> --output_dir <OUTPUT_MODEL_DIR>
+ mo --input_meta_graph <INPUT_META_GRAPH>.meta
+```
+
+3. **SavedModel format**. In this case, a model consists of a special directory with a `.pb` file
+and several subfolders: `variables`, `assets`, and `assets.extra`. For more information about the SavedModel directory, refer to the [README](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/saved_model#components) file in the TensorFlow repository.
+To convert such TensorFlow model, run the `mo` script with a path to the SavedModel directory:
+```sh
+ mo --saved_model_dir <SAVED_MODEL_DIRECTORY>
 ```

 You can convert TensorFlow 1.x SavedModel format in the environment that has a 1.x or 2.x version of TensorFlow. However, TensorFlow 2.x SavedModel format strictly requires the 2.x version of TensorFlow.
@ -249,12 +42,12 @@ If a model contains operations currently unsupported by OpenVINO, prune these op
 To determine custom input nodes, display a graph of the model in TensorBoard. To generate TensorBoard logs of the graph, use the `--tensorboard_logs` option.
 TensorFlow 2.x SavedModel format has a specific graph due to eager execution. In case of pruning, find custom input nodes in the `StatefulPartitionedCall/*` subgraph of TensorFlow 2.x SavedModel format.

-## Freezing Custom Models in Python\* <a name="freeze-the-tensorflow-model"></a>
-
+### Freezing Custom Models in Python\* <a name="freeze-the-tensorflow-model"></a>
 When a network is defined in Python\* code, you have to create an inference graph file. Usually graphs are built in a form
 that allows model training. That means that all trainable parameters are represented as variables in the graph.
 To be able to use such graph with Model Optimizer such graph should be frozen.
 The graph is frozen and dumped to a file with the following code:
+
 ```python
 import tensorflow as tf
 from tensorflow.python.framework import graph_io
@ -273,126 +66,36 @@ Where:
 * `inference_graph.pb` is the name of the generated inference graph file.
 * `as_text` specifies whether the generated file should be in human readable text format or binary.

-## Convert a TensorFlow* Model <a name="Convert_From_TF"></a>
-
-To convert a TensorFlow model:
-
-1. Go to the `<INSTALL_DIR>/tools/model_optimizer` directory
-2. Use the `mo` script to simply convert a model with the path to the input model `.pb` file and a writable output directory:
-```sh
- mo --input_model <INPUT_MODEL>.pb --output_dir <OUTPUT_MODEL_DIR>
-```
-
-Two groups of parameters are available to convert your model:
-
-* Framework-agnostic parameters are used to convert a model trained with any supported framework. For details, see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page.
-* [TensorFlow-specific parameters](#tensorflow_specific_conversion_params): Parameters used to convert only TensorFlow models.
-
-> **NOTE**: The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For more information about the parameter, refer to **When to Reverse Input Channels** section of [Converting a Model to Intermediate Representation (IR)](Converting_Model.md).
-
-### Using TensorFlow\*-Specific Conversion Parameters  <a name="tensorflow_specific_conversion_params"></a>
-The following list provides the TensorFlow\*-specific parameters.
-
-```
-TensorFlow*-specific parameters:
-  --input_model_is_text
-                        TensorFlow*: treat the input model file as a text
-                        protobuf format. If not specified, the Model Optimizer
-                        treats it as a binary file by default.
-  --input_checkpoint INPUT_CHECKPOINT
-                        TensorFlow*: variables file to load.
-  --input_meta_graph INPUT_META_GRAPH
-                        Tensorflow*: a file with a meta-graph of the model
-                        before freezing
-  --saved_model_dir SAVED_MODEL_DIR
-                        TensorFlow*: directory with a model in SavedModel format
-                        of TensorFlow 1.x or 2.x version
-  --saved_model_tags SAVED_MODEL_TAGS
-                        Group of tag(s) of the MetaGraphDef to load, in string
-                        format, separated by ','. For tag-set contains
-                        multiple tags, all tags must be passed in.
-  --tensorflow_custom_operations_config_update TENSORFLOW_CUSTOM_OPERATIONS_CONFIG_UPDATE
-                        TensorFlow*: update the configuration file with node
-                        name patterns with input/output nodes information.
-  --tensorflow_object_detection_api_pipeline_config TENSORFLOW_OBJECT_DETECTION_API_PIPELINE_CONFIG
-                        TensorFlow*: path to the pipeline configuration file
-                        used to generate model created with help of Object
-                        Detection API.
-  --tensorboard_logdir TENSORBOARD_LOGDIR
-                        TensorFlow*: dump the input graph to a given directory
-                        that should be used with TensorBoard.
-  --tensorflow_custom_layer_libraries TENSORFLOW_CUSTOM_LAYER_LIBRARIES
-                        TensorFlow*: comma separated list of shared libraries
-                        with TensorFlow* custom operations implementation.
-  --disable_nhwc_to_nchw
-                        [DEPRECATED] Disables default translation from NHWC to NCHW. Since 2022.1
-                        this option is deprecated and used only to maintain backward compatibility
-                        with previous releases.
-```
-
-> **NOTE**: Models produces with TensorFlow\* usually have not fully defined shapes (contain `-1` in some dimensions). It is necessary to pass explicit shape for the input using command line parameter `--input_shape` or `-b` to override just batch dimension. If the shape is fully defined, then there is no need to specify either `-b` or `--input_shape` options.
-
-#### Command-Line Interface (CLI) Examples Using TensorFlow\*-Specific Parameters
-
-* Launching the Model Optimizer for Inception V1 frozen model when model file is a plain text protobuf, specifying a writable output directory:
-```sh
- mo --input_model inception_v1.pbtxt --input_model_is_text -b 1 --output_dir <OUTPUT_MODEL_DIR>
-```
-
-* Launching the Model Optimizer for Inception V1 frozen model and update custom sub-graph replacement file `transform.json` with information about input and output nodes of the matched sub-graph, specifying a writable output directory. For more information about this feature, refer to [Sub-Graph Replacement in the Model Optimizer](../customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md).
-```sh
- mo --input_model inception_v1.pb -b 1 --tensorflow_custom_operations_config_update transform.json --output_dir <OUTPUT_MODEL_DIR>
-```
-
-* Launching the Model Optimizer for Inception V1 frozen model and use custom sub-graph replacement file `transform.json` for model conversion. For more information about this feature, refer to [Sub-Graph Replacement in the Model Optimizer](../customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md).
-```sh
- mo --input_model inception_v1.pb -b 1 --transformations_config transform.json --output_dir <OUTPUT_MODEL_DIR>
-```
-
-* Launching the Model Optimizer for Inception V1 frozen model and dump information about the graph to TensorBoard log dir `/tmp/log_dir`
-```sh
- mo --input_model inception_v1.pb -b 1 --tensorboard_logdir /tmp/log_dir --output_dir <OUTPUT_MODEL_DIR>
-```
-
-* Launching the Model Optimizer for a model with custom TensorFlow operations (refer to the [TensorFlow* documentation](https://www.tensorflow.org/extend/adding_an_op)) implemented in C++ and compiled into the shared library `my_custom_op.so`. Model Optimizer falls back to TensorFlow to infer output shape of operations implemented in the library if a custom TensorFlow operation library is provided. If it is not provided, a custom operation with an inference function is needed. For more information about custom operations, refer to the [OpenVINO™ Extensibility Mechanism](../../../Extensibility_UG/Intro.md).
-```sh
- mo --input_model custom_model.pb --tensorflow_custom_layer_libraries ./my_custom_op.so --output_dir <OUTPUT_MODEL_DIR>
-```
-
-
-## Convert TensorFlow* 2 Models <a name="Convert_From_TF2X"></a>
-
-In order to convert TensorFlow* 2 models, installation of dependencies from `requirements_tf.txt` is required.
-TensorFlow* 2.X officially supports two model formats: SavedModel and Keras H5 (or HDF5).    
+## Convert TensorFlow 2 Models <a name="Convert_From_TF2X"></a>
+To convert TensorFlow* 2 models, ensure that `openvino-dev[tensorflow2]` is installed via `pip`.
+TensorFlow* 2.X officially supports two model formats: SavedModel and Keras H5 (or HDF5).
 Below are the instructions on how to convert each of them.

-### SavedModel Format     
+### SavedModel Format
+A model in the SavedModel format consists of a directory with a `saved_model.pb` file and two subfolders: `variables` and `assets`.
+To convert such a model, run the `mo` script with a path to the SavedModel directory:

-A model in the SavedModel format consists of a directory with a `saved_model.pb` file and two subfolders: `variables` and `assets`. 
-To convert such a model:
-1. Go to the `<INSTALL_DIR>/tools/model_optimizer` directory.
-2. Run the `mo` script with a path to the SavedModel directory and a writable output directory:
 ```sh
- mo --saved_model_dir <SAVED_MODEL_DIRECTORY> --output_dir <OUTPUT_MODEL_DIR>
+ mo --saved_model_dir <SAVED_MODEL_DIRECTORY>
 ```

 TensorFlow* 2 SavedModel format strictly requires the 2.x version of TensorFlow installed in the
-environment for conversion to the Intermediate Representation (IR). 
+environment for conversion to the Intermediate Representation (IR).

 If a model contains operations currently unsupported by OpenVINO™,
 prune these operations by explicit specification of input nodes using the `--input` or `--output`
-options. To determine custom input nodes, visualize a model graph in the TensorBoard.   
+options. To determine custom input nodes, visualize a model graph in the TensorBoard.

 To generate TensorBoard logs of the graph, use the Model Optimizer `--tensorboard_logs` command-line
-option.      
+option.

 TensorFlow* 2 SavedModel format has a specific graph structure due to eager execution. In case of
 pruning, find custom input nodes in the `StatefulPartitionedCall/*` subgraph.

-### Keras H5        
-
+### Keras H5
 If you have a model in the HDF5 format, load the model using TensorFlow* 2 and serialize it in the
 SavedModel format. Here is an example of how to do it:
+
 ```python
 import tensorflow as tf
 model = tf.keras.models.load_model('model.h5')
@ -401,6 +104,7 @@ tf.saved_model.save(model,'model')

 The Keras H5 model with a custom layer has specifics to be converted into SavedModel format.
 For example, the model with a custom layer `CustomLayer` from `custom_layer.py` is converted as follows:
+
 ```python
 import tensorflow as tf
 from custom_layer import CustomLayer
@ -412,42 +116,39 @@ Then follow the above instructions for the SavedModel format.

 > **NOTE**: Do not use other hacks to resave TensorFlow* 2 models into TensorFlow* 1 formats.

+## Command-Line Interface (CLI) Examples Using TensorFlow\*-Specific Parameters
+* Launching the Model Optimizer for Inception V1 frozen model when model file is a plain text protobuf:

-## Custom Layer Definition
+```sh
+ mo --input_model inception_v1.pbtxt --input_model_is_text -b 1
+```

-Internally, when you run the Model Optimizer, it loads the model, goes through the topology, and tries to find each layer type in a list of known layers. Custom layers are layers that are not included in the list of known layers. If your topology contains any layers that are not in this list of known layers, the Model Optimizer classifies them as custom.
+* Launching the Model Optimizer for Inception V1 frozen model and dump information about the graph to TensorBoard log dir `/tmp/log_dir`

-See [Custom Layers in the Model Optimizer](../customize_model_optimizer/Customize_Model_Optimizer.md) for information about:
+```sh
+ mo --input_model inception_v1.pb -b 1 --tensorboard_logdir /tmp/log_dir
+```

-* Model Optimizer internal procedure for working with custom layers
-* How to convert a TensorFlow model that has custom layers
-* Custom layer implementation details
+* Launching the Model Optimizer for BERT model in the SavedModel format, with three inputs. Explicitly specify input shapes
+where the batch size and the sequence length equal 2 and 30 respectively.

+```sh
+mo --saved_model_dir BERT --input mask,word_ids,type_ids --input_shape [2,30],[2,30],[2,30]
+```

 ## Supported TensorFlow\* and TensorFlow 2 Keras\* Layers
 Refer to [Supported Framework Layers ](../Supported_Frameworks_Layers.md) for the list of supported standard layers.

-
 ## Frequently Asked Questions (FAQ)
-
 The Model Optimizer provides explanatory messages if it is unable to run to completion due to issues like typographical errors, incorrectly used options, or other issues. The message describes the potential cause of the problem and gives a link to the [Model Optimizer FAQ](../Model_Optimizer_FAQ.md). The FAQ has instructions on how to resolve most issues. The FAQ also includes links to relevant sections in the Model Optimizer Developer Guide to help you understand what went wrong.

-## Video: Converting a TensorFlow Model
-
-@sphinxdirective
-
-.. raw:: html
-
-    <iframe allowfullscreen mozallowfullscreen msallowfullscreen oallowfullscreen webkitallowfullscreen width="560" height="315"
-    src="https://www.youtube.com/embed/QW6532LtiTc">
-    </iframe>
-
-@endsphinxdirective
-
 ## Summary
 In this document, you learned:

-* Basic information about how the Model Optimizer works with TensorFlow\* models
+* Basic information about how the Model Optimizer works with TensorFlow models
 * Which TensorFlow models are supported
 * How to freeze a TensorFlow model
 * How to convert a trained TensorFlow model using the Model Optimizer with both framework-agnostic and TensorFlow-specific command-line options
+
+## See Also
+[Model Conversion Tutorials](Convert_Model_Tutorials.md)
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_Tutorials.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_Tutorials.md
@ -0,0 +1,44 @@
+# Model Conversion Tutorials {#openvino_docs_MO_DG_prepare_model_convert_model_tutorials}
+
+@sphinxdirective
+
+.. toctree::
+   :maxdepth: 1
+   :hidden:
+
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_AttentionOCR_From_Tensorflow
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_BERT_From_Tensorflow
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_CRNN_From_Tensorflow
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_DeepSpeech_From_Tensorflow
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_EfficientDet_Models
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_FaceNet_From_Tensorflow
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_GNMT_From_Tensorflow
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_lm_1b_From_Tensorflow
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_NCF_From_Tensorflow
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_RetinaNet_From_Tensorflow
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Slim_Library_Models
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_WideAndDeep_Family_Models
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_XLNet_From_Tensorflow
+   openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow
+   openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Faster_RCNN
+   openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_GPT2
+   openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Mask_RCNN
+   openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Bert_ner
+   openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Cascade_RCNN_res101
+   openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net
+   openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_QuartzNet
+   openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN
+   openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT
+   openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT
+   openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_GluonCV_Models
+   openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_Style_Transfer_From_MXNet
+   openvino_docs_MO_DG_prepare_model_convert_model_kaldi_specific_Aspire_Tdnn_Model
+
+@endsphinxdirective
+
+This section provides you with a set of tutorials that demonstrate conversion steps for specific TensorFlow, ONNX, PyTorch, MXNet, and Kaldi models.
+It contains conversion recipes for concrete models, that unnecessarily cover your case.
+Try to convert the model out-of-the-box, meaning only the `--input_model` parameter is specified in the command line, before studying the tutorials.
+
+You can also find a collection of [Python tutorials](../../../tutorials.md) written for running on Jupyter* notebooks that provide an introduction to the OpenVINO™ toolkit and explain how to use the Python API and tools for optimized deep learning inference.
--- a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md
+++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md
@ -1,296 +1,74 @@
-# Converting a Model to Intermediate Representation (IR)  {#openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model}
+# Setting Input Shapes {#openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model}

-@sphinxdirective
+Paragraphs below provide details about specifying input shapes for model conversion.

-.. toctree::
-   :maxdepth: 1
-   :hidden:
+## When to Specify --input_shape Command-line Parameter <a name="when_to_specify_input_shapes"></a>
+Model Optimizer supports conversion of models with input dynamic shapes that contain undefined dimensions.
+However, if the shape of inference data is not going to change from one inference request to another,
+it is recommended to set up static shapes (when all dimensions are fully defined) for the inputs.
+It can be beneficial from a performance perspective and memory consumption.
+To set up static shapes, Model Optimizer provides the `--input_shape` parameter. This is an offline approach to set static shapes and
+can save time on calling `reshape` method.
+For more information about the `reshape` method and dynamic shapes, refer to [Dynamic Shapes](../../../OV_Runtime_UG/ov_dynamic_shapes.md)

-   openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow
-   openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe
-   openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet
-   openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Kaldi
-   openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_ONNX
-   openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Paddle
-   openvino_docs_MO_DG_prepare_model_Model_Optimization_Techniques
-   openvino_docs_MO_DG_prepare_model_convert_model_Cutting_Model
-   openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers
-   openvino_docs_MO_DG_prepare_model_convert_model_IR_suitable_for_INT8_inference
-   openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Subgraph_Replacement_Model_Optimizer
-   openvino_docs_MO_DG_prepare_model_convert_model_Legacy_IR_Layers_Catalog_Spec
+OpenVINO Runtime API can have limitations to infer models with undefined dimensions on some hardware.
+In this case, the `--input_shape` parameter and the `reshape` method can help resolving undefined dimensions.

-@endsphinxdirective
+Sometimes Model Optimizer is unable to convert models out-of-the-box (only the `--input_model` parameter is specified).
+Such problem can relate to models with inputs of undefined ranks and a case of cutting off parts of a model.
+In this case, user has to specify input shapes explicitly using `--input_shape` parameter.

-To convert the model to the Intermediate Representation (IR), run Model Optimizer using the following command:
+For example, run the Model Optimizer for the TensorFlow* MobileNet model with the single input
+and specify input shape `[2,300,300,3]`.

 ```sh
-mo --input_model INPUT_MODEL
+mo --input_model MobileNet.pb --input_shape [2,300,300,3]
 ```

-The output directory must have write permissions, so you can run Model Optimizer from the output directory or specify an output path with the `--output_dir` option.
-
-> **NOTE**: The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For details, refer to [When to Reverse Input Channels](#when_to_reverse_input_channels).
-
-To adjust the conversion process, you may use general parameters defined in the [General Conversion Parameters](#general_conversion_parameters) and 
-Framework-specific parameters for:
-* [Caffe](Convert_Model_From_Caffe.md)
-* [TensorFlow](Convert_Model_From_TensorFlow.md)
-* [MXNet](Convert_Model_From_MxNet.md)
-* [ONNX](Convert_Model_From_ONNX.md)
-* [PaddlePaddle](Convert_Model_From_Paddle.md)
-* [Kaldi](Convert_Model_From_Kaldi.md)
-
-
-## General Conversion Parameters
-
-To adjust the conversion process, you can also use the general (framework-agnostic) parameters:
+If a model has multiple inputs, `--input_shape` must be used in conjunction with `--input` parameter.
+The parameter `--input` contains a list of input names for which shapes in the same order are defined via `--input_shape`.
+For example, launch the Model Optimizer for the ONNX* OCR model with a pair of inputs `data` and `seq_len` 
+and specify shapes `[3,150,200,1]` and `[3]` for them.

 ```sh
-optional arguments:
-  -h, --help            show this help message and exit
-  --framework {tf,caffe,mxnet,kaldi,onnx}
-                        Name of the framework used to train the input model.
-
-Framework-agnostic parameters:
-  --input_model INPUT_MODEL, -w INPUT_MODEL, -m INPUT_MODEL
-                        Tensorflow*: a file with a pre-trained model (binary
-                        or text .pb file after freezing). Caffe*: a model
-                        proto file with model weights
-  --model_name MODEL_NAME, -n MODEL_NAME
-                        Model_name parameter passed to the final create_ir
-                        transform. This parameter is used to name a network in
-                        a generated IR and output .xml/.bin files.
-  --output_dir OUTPUT_DIR, -o OUTPUT_DIR
-                        Directory that stores the generated IR. By default, it
-                        is the directory from where the Model Optimizer is
-                        launched.
-  --input_shape INPUT_SHAPE
-                        Input shape(s) that should be fed to an input node(s)
-                        of the model. Shape is defined as a comma-separated
-                        list of integer numbers enclosed in parentheses or
-                        square brackets, for example [1,3,227,227] or
-                        (1,227,227,3), where the order of dimensions depends
-                        on the framework input layout of the model. For
-                        example, [N,C,H,W] is used for ONNX* models and
-                        [N,H,W,C] for TensorFlow* models. The shape can contain 
-                        undefined dimensions (? or -1) and should fit the dimensions
-                        defined in the input operation of the graph. Boundaries 
-                        of undefined dimension can be specified with ellipsis, 
-                        for example [1,1..10,128,128]. One boundary can be undefined, 
-                        for example [1,..100] or [1,3,1..,1..]. If there
-                        are multiple inputs in the model, --input_shape should
-                        contain definition of shape for each input separated
-                        by a comma, for example: [1,3,227,227],[2,4] for a
-                        model with two inputs with 4D and 2D shapes.
-                        Alternatively, specify shapes with the --input
-                        option.
-  --scale SCALE, -s SCALE
-                        All input values coming from original network inputs
-                        will be divided by this value. When a list of inputs
-                        is overridden by the --input parameter, this scale is
-                        not applied for any input that does not match with the
-                        original input of the model.
-                        If both --mean and --scale are specified,
-                        the mean is subtracted first and then scale is applied
-                        regardless of the order of options in command line.
-  --reverse_input_channels
-                        Switch the input channels order from RGB to BGR (or
-                        vice versa). Applied to original inputs of the model
-                        if and only if a number of channels equals 3.
-                        When --mean_values/--scale_values are also specified,
-                        reversing of channels will be applied to user's input
-                        data first, so that numbers in --mean_values and
-                        --scale_values go in the order of channels used in
-                        the original model. In other words, if both options are
-                        specified then the data flow in the model looks as following:
-                        Parameter -> ReverseInputChannels -> Mean/Scale apply -> the original body of the model.                        
-  --log_level {CRITICAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}
-                        Logger level
-  --input INPUT         Quoted list of comma-separated input nodes names with shapes, 
-                        data types, and values for freezing. The order of inputs in converted 
-                        model is the same as order of specified operation names. The shape and value are 
-                        specified as space-separated lists. The data type of input 
-                        node is specified in braces and can have one of the values: 
-                        f64 (float64), f32 (float32), f16 (float16), i64 (int64), 
-                        i32 (int32), u8 (uint8), boolean (bool). Data type is optional. 
-                        If it's not specified explicitly then there are two options: 
-                        if input node is a parameter, data type is taken from the 
-                        original node dtype, if input node is not a parameter, data type 
-                        is set to f32. Example, to set `input_1` with shape [1 100], 
-                        and Parameter node `sequence_len` with scalar input with value `150`, 
-                        and boolean input `is_training` with `False` value use the 
-                        following format: "input_1[1 10],sequence_len->150,is_training->False". 
-                        Another example, use the following format to set input port 0 
-                        of the node `node_name1` with the shape [3 4] as an input node 
-                        and freeze output port 1 of the node `node_name2` with the 
-                        value [20 15] of the int32 type and shape [2]: 
-                        "0:node_name1[3 4],node_name2:1[2]{i32}->[20 15]".
-  --output OUTPUT       The name of the output operation of the model. For
-                        TensorFlow*, do not add :0 to this name.
-                        The order of outputs in converted model is the same as order of
-                        specified operation names.
-  --mean_values MEAN_VALUES, -ms MEAN_VALUES
-                        Mean values to be used for the input image per
-                        channel. Values to be provided in the (R,G,B) or
-                        [R,G,B] format. Can be defined for desired input of
-                        the model, for example: "--mean_values
-                        data[255,255,255],info[255,255,255]". The exact
-                        meaning and order of channels depend on how the
-                        original model was trained.
-  --scale_values SCALE_VALUES
-                        Scale values to be used for the input image per
-                        channel. Values are provided in the (R,G,B) or [R,G,B]
-                        format. Can be defined for desired input of the model,
-                        for example: "--scale_values
-                        data[255,255,255],info[255,255,255]". The exact
-                        meaning and order of channels depend on how the
-                        original model was trained.
-                        If both --mean_values and --scale_values are specified,
-                        the mean is subtracted first and then scale is applied
-                        regardless of the order of options in command line.
-  --data_type {FP16,FP32,half,float}
-                        Data type for all intermediate tensors and weights. If
-                        original model is in FP32 and --data_type=FP16 is
-                        specified, all model weights and biases are compressed
-                        to FP16.
-  --disable_fusing      [DEPRECATED] Turn off fusing of linear operations to Convolution.
-  --disable_resnet_optimization
-                        [DEPRECATED] Turn off ResNet optimization.
-  --finegrain_fusing FINEGRAIN_FUSING
-                        [DEPRECATED] Regex for layers/operations that won't be fused.
-                        Example: --finegrain_fusing Convolution1,.*Scale.*
-  --enable_concat_optimization
-                        Turn on Concat optimization.
-  --extensions EXTENSIONS
-                        Directory or a comma separated list of directories
-                        with extensions. To disable all extensions including
-                        those that are placed at the default location, pass an
-                        empty string.
-  --batch BATCH, -b BATCH
-                        Input batch size
-  --version             Version of Model Optimizer
-  --silent              Prevent any output messages except those that
-                        correspond to log level equals ERROR, that can be set
-                        with the following option: --log_level. By default,
-                        log level is already ERROR.
-  --freeze_placeholder_with_value FREEZE_PLACEHOLDER_WITH_VALUE
-                        Replaces input layer with constant node with provided
-                        value, for example: "node_name->True". It will be
-                        DEPRECATED in future releases. Use --input option to
-                        specify a value for freezing.
-  --static_shape        Enables IR generation for fixed input shape (folding
-                        `ShapeOf` operations and shape-calculating sub-graphs
-                        to `Constant`). Changing model input shape using
-                        the OpenVINO Runtime API in runtime may fail for such an IR.
-  --disable_weights_compression
-                        [DEPRECATED] Disable compression and store weights with original
-                        precision.
-  --progress            Enable model conversion progress display.
-  --stream_output       Switch model conversion progress display to a
-                        multiline mode.
-  --transformations_config TRANSFORMATIONS_CONFIG
-                        Use the configuration file with transformations
-                        description.
-  --use_new_frontend    Force the usage of new Frontend of Model Optimizer for model conversion into IR.
-                        The new Frontend is C++ based and is available for ONNX* and PaddlePaddle* models.
-                        Model optimizer uses new Frontend for ONNX* and PaddlePaddle* by default that means
-                        `--use_new_frontend` and `--use_legacy_frontend` options are not specified.
-  --use_legacy_frontend Force the usage of legacy Frontend of Model Optimizer for model conversion into IR.
-                        The legacy Frontend is Python based and is available for TensorFlow*, ONNX*, MXNet*,
-                        Caffe*, and Kaldi* models.
+mo --input_model ocr.onnx --input data,seq_len --input_shape [3,150,200,1],[3]
 ```

-The sections below provide details on using particular parameters and examples of CLI commands.
+The alternative way to specify input shapes is to use the `--input` parameter as follows:

-## When to Specify Mean and Scale Values
-Usually neural network models are trained with the normalized input data. This means that the input data values are converted to be in a specific range, for example, `[0, 1]` or `[-1, 1]`. Sometimes the mean values (mean images) are subtracted from the input data values as part of the pre-processing. There are two cases how the input data pre-processing is implemented.
- * The input pre-processing operations are a part of a topology. In this case, the application that uses the framework to infer the topology does not pre-process the input.
- * The input pre-processing operations are not a part of a topology and the pre-processing is performed within the application which feeds the model with an input data.
- 
-In the first case, the Model Optimizer generates the IR with required pre-processing operations and OpenVINO Samples may be used to infer the model. 
- 
-In the second case, information about mean/scale values should be provided to the Model Optimizer to embed it to the generated IR. Model Optimizer provides a number of command line parameters to specify them: `--mean`, `--scale`, `--scale_values`, `--mean_values`. 
-
-> **NOTE:** If both mean and scale values are specified, the mean is subtracted first and then scale is applied regardless of the order of options in command line. Input values are *divided* by the scale value(s). If also `--reverse_input_channels` option is used, the reverse_input_channels will be applied first, then mean and after that scale.
-
-There is no a universal recipe for determining the mean/scale values for a particular model. The steps below could help to determine them:
-* Read the model documentation. Usually the documentation describes mean/scale value if the pre-processing is required.
-* Open the example script/application executing the model and track how the input data is read and passed to the framework.
-* Open the model in a visualization tool and check for layers performing subtraction or multiplication (like `Sub`, `Mul`, `ScaleShift`, `Eltwise` etc) of the input data. If such layers exist, pre-processing is probably part of the model.
-
-## When to Specify Input Shapes <a name="when_to_specify_input_shapes"></a>
-There are situations when Model Optimizer is unable to deduce input shapes of the model, for example, in case of model cutting due to unsupported operations.
-The solution is to provide input shapes of a static rank explicitly.
-
-## When to Reverse Input Channels <a name="when_to_reverse_input_channels"></a>
-Input data for your application can be of RGB or BRG color input order. For example, OpenVINO Samples load input images in the BGR channels order. However, the model may be trained on images loaded with the opposite order (for example, most TensorFlow\* models are trained with images in RGB order). In this case, inference results using the OpenVINO samples may be incorrect. The solution is to provide `--reverse_input_channels` command line parameter. Taking this parameter, the Model Optimizer performs first convolution or other channel dependent operation weights modification so these operations output will be like the image is passed with RGB channels order.
-
-## When to Specify `--static_shape` Command Line Parameter
-If the `--static_shape` command line parameter is specified the Model Optimizer evaluates shapes of all operations in the model (shape propagation) for a fixed input(s) shape(s). During the shape propagation the Model Optimizer evaluates operations *Shape* and removes them from the computation graph. With that approach, the initial model which can consume inputs of different shapes may be converted to IR working with the input of one fixed shape only. For example, consider the case when some blob is reshaped from 4D of a shape *[N, C, H, W]* to a shape *[N, C, H \* W]*. During the model conversion the Model Optimize calculates output shape as a constant 1D blob with values *[N, C, H \* W]*. So if the input shape changes to some other value *[N,C,H1,W1]* (it is possible scenario for a fully convolutional model) then the reshape layer becomes invalid.
-Resulting Intermediate Representation will not be resizable with the help of OpenVINO Runtime API.
-
-## Examples of CLI Commands
-
-Launch the Model Optimizer for the Caffe bvlc_alexnet model with debug log level:
 ```sh
-mo --input_model bvlc_alexnet.caffemodel --log_level DEBUG
+mo --input_model ocr.onnx --input data[3 150 200 1],seq_len[3]
 ```

-Launch the Model Optimizer for the Caffe bvlc_alexnet model with the output IR called `result.*` in the specified `output_dir`:
+The parameter `--input_shape` allows overriding original input shapes to the shapes compatible with a given model.
+Dynamic shapes, i.e. with dynamic dimensions, in the original model can be replaced with static shapes for the converted model, and vice versa.
+The dynamic dimension can be marked in Model Optimizer command-line as `-1` or `?`.
+For example, launch the Model Optimizer for the ONNX* OCR model and specify dynamic batch dimension for inputs.
+
 ```sh
-mo --input_model bvlc_alexnet.caffemodel --model_name result --output_dir <OUTPUT_MODEL_DIR>
+mo --input_model ocr.onnx --input data,seq_len --input_shape [-1,150,200,1],[-1]
 ```

-Launch the Model Optimizer for the Caffe bvlc_alexnet model with one input with scale values:
+To optimize memory consumption for models with undefined dimensions in run-time, Model Optimizer provides the capability to define boundaries of dimensions.
+The boundaries of undefined dimension can be specified with ellipsis.
+For example, launch the Model Optimizer for the ONNX* OCR model and specify a boundary for the batch dimension.
+
 ```sh
-mo --input_model bvlc_alexnet.caffemodel --scale_values [59,59,59]
+mo --input_model ocr.onnx --input data,seq_len --input_shape [1..3,150,200,1],[1..3]
 ```

-Launch the Model Optimizer for the Caffe bvlc_alexnet model with multiple inputs with scale values:
+## When to Specify --static_shape Command-line Parameter
+Model Optimizer provides the `--static_shape` parameter that allows evaluating shapes of all operations in the model for fixed input shapes
+and to fold shape computing sub-graphs into constants. The resulting IR can be more compact in size and the loading time for such IR can be decreased.
+However, the resulting IR will not be reshape-able with the help of the `reshape` method from OpenVINO Runtime API.
+It is worth noting that the `--input_shape` parameter does not affect reshape-ability of the model.
+
+For example, launch the Model Optimizer for the ONNX* OCR model using `--static_shape`.
+
 ```sh
-mo --input_model bvlc_alexnet.caffemodel --input data,rois --scale_values [59,59,59],[5,5,5]
+mo --input_model ocr.onnx --input data[3 150 200 1],seq_len[3] --static_shape
 ```

-Launch the Model Optimizer for the Caffe bvlc_alexnet model with multiple inputs with scale and mean values specified for the particular nodes:
-```sh
-mo --input_model bvlc_alexnet.caffemodel --input data,rois --mean_values data[59,59,59] --scale_values rois[5,5,5]
-```
-
-Launch the Model Optimizer for the Caffe bvlc_alexnet model with specified input layer, overridden input shape, scale 5, batch 8 and specified name of an output operation:
-```sh
-mo --input_model bvlc_alexnet.caffemodel --input data --output pool5 -s 5 -b 8
-```
-
-Launch the Model Optimizer for the Caffe bvlc_alexnet model with reversed input channels order between RGB and BGR, specified mean values to be used for the input image per channel and specified data type for input tensor values:
-```sh
-mo --input_model bvlc_alexnet.caffemodel --reverse_input_channels --mean_values [255,255,255] --data_type FP16
-```
-
-Launch the Model Optimizer for the Caffe bvlc_alexnet model with extensions listed in specified directories, specified mean_images binaryproto 
- file. For more information about extensions, please refer to the [OpenVINO™ Extensibility Mechanism](../../../Extensibility_UG/Intro.md).
-```sh
-mo --input_model bvlc_alexnet.caffemodel --extensions /home/,/some/other/path/ --mean_file /path/to/binaryproto
-```
-
-Launch the Model Optimizer for TensorFlow* FaceNet* model with a placeholder freezing value. 
-It replaces the placeholder with a constant layer that contains the passed value.
-For more information about FaceNet conversion, please refer to [this](tf_specific/Convert_FaceNet_From_Tensorflow.md) page.
-```sh
-mo --input_model FaceNet.pb --input "phase_train->False"
-```
-Launch the Model Optimizer for any model with a placeholder freezing tensor of values. 
-It replaces the placeholder with a constant layer that contains the passed values.
-
-Tensor here is represented in square brackets with each value separated from another by a whitespace. 
-If data type is set in the model, this tensor will be reshaped to a placeholder shape and casted to placeholder data type.
-Otherwise, it will be casted to data type passed to `--data_type` parameter (by default, it is FP32).
-```sh
-mo --input_model FaceNet.pb --input "placeholder_layer_name->[0.1 1.2 2.3]"
-```
-
-
 ## See Also
-* [Configuring the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md)
-* [IR Notation Reference](../../IR_and_opsets.md)
-* [Model Optimizer Extensibility](../customize_model_optimizer/Customize_Model_Optimizer.md)
-* [Model Cutting](Cutting_Model.md)
+* [Introduction](../../Deep_Learning_Model_Optimizer_DevGuide.md)
+* [Cutting Off Parts of a Model](Cutting_Model.md)
--- a/docs/MO_DG/prepare_model/convert_model/Legacy_IR_Layers_Catalog_Spec.md
+++ b/docs/MO_DG/prepare_model/convert_model/Legacy_IR_Layers_Catalog_Spec.md
--- a/docs/MO_DG/prepare_model/convert_model/kaldi_specific/Aspire_Tdnn_Model.md
+++ b/docs/MO_DG/prepare_model/convert_model/kaldi_specific/Aspire_Tdnn_Model.md
@ -1,6 +1,6 @@
-# Convert Kaldi* ASpIRE Chain Time Delay Neural Network (TDNN) Model to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_kaldi_specific_Aspire_Tdnn_Model}
+# Convert Kaldi* ASpIRE Chain Time Delay Neural Network (TDNN) Model {#openvino_docs_MO_DG_prepare_model_convert_model_kaldi_specific_Aspire_Tdnn_Model}

-You can [download a pre-trained model](https://kaldi-asr.org/models/1/0001_aspire_chain_model.tar.gz) 
+You can [download a pre-trained model](https://kaldi-asr.org/models/1/0001_aspire_chain_model.tar.gz)
 for the ASpIRE Chain Time Delay Neural Network (TDNN) from the Kaldi* project official website.

 ## Convert ASpIRE Chain TDNN Model to IR
@ -10,15 +10,15 @@ To generate the Intermediate Representation (IR) of the model, run the Model Opt
 mo --input_model exp/chain/tdnn_7b/final.mdl --output output
 ```

-The IR will have two inputs: `input` for data and `ivector` for ivectors. 
+The IR will have two inputs: `input` for data and `ivector` for ivectors.

 ## Example: Run ASpIRE Chain TDNN Model with the Speech Recognition Sample

 These instructions show how to run the converted model with the [Speech Recognition sample](../../../../../samples/cpp/speech_sample/README.md).
-In this example, the input data contains one utterance from one speaker. 
+In this example, the input data contains one utterance from one speaker.

-To follow the steps described below, you must first do the following: 
-1. Download a [Kaldi repository](https://github.com/kaldi-asr/kaldi). 
+To follow the steps described below, you must first do the following:
+1. Download a [Kaldi repository](https://github.com/kaldi-asr/kaldi).
 2. Build it using instructions in `README.md` in the repository.
 3. Download the [model archive](https://kaldi-asr.org/models/1/0001_aspire_chain_model.tar.gz) from Kaldi website.
 4. Extract the downloaded model archive to the `egs/aspire/s5` folder of the Kaldi repository.
@ -49,10 +49,10 @@ cd <path_to_kaldi_repo>/egs/aspire/s5/

 2. Extract ivectors from the data:
 ```sh
-./steps/online/nnet2/extract_ivectors_online.sh --nj 1 --ivector_period <max_frame_count_in_utterance> <data folder> exp/tdnn_7b_chain_online/ivector_extractor <ivector folder> 
+./steps/online/nnet2/extract_ivectors_online.sh --nj 1 --ivector_period <max_frame_count_in_utterance> <data folder> exp/tdnn_7b_chain_online/ivector_extractor <ivector folder>
 ```
-To simplify the preparation of ivectors for the Speech Recognition sample, 
-specify the maximum number of frames in utterances as a parameter for `--ivector_period` 
+To simplify the preparation of ivectors for the Speech Recognition sample,
+specify the maximum number of frames in utterances as a parameter for `--ivector_period`
 to get only one ivector per utterance.

 To get the maximum number of frames in utterances, you can use the following command line:
@ -71,7 +71,7 @@ cd <ivector folder>
 <path_to_kaldi_repo>/src/featbin/copy-feats --binary=False ark:ivector_online.1.ark ark,t:ivector_online.1.ark.txt
 ```

-5. For the Speech Recognition sample, the `.ark` file must contain an ivector 
+5. For the Speech Recognition sample, the `.ark` file must contain an ivector
 for each frame. You must copy the ivector `frame_count` times.
 To do this, you can run the following script in the Python* command prompt:
 ```python
@ -108,5 +108,5 @@ Run the Speech Recognition sample with the created ivector `.ark` file as follow
 speech_sample -i feats.ark,ivector_online_ie.ark -m final.xml -d CPU -o prediction.ark -cw_l 17 -cw_r 12
 ```

-Results can be decoded as described in "Use of Sample in Kaldi* Speech Recognition Pipeline" chapter 
+Results can be decoded as described in "Use of Sample in Kaldi* Speech Recognition Pipeline" chapter
 in [the Speech Recognition Sample description](../../../../../samples/cpp/speech_sample/README.md).
--- a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_GluonCV_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_GluonCV_Models.md
@ -1,15 +1,15 @@
-# Converting GluonCV* Models {#openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_GluonCV_Models}
+# Convert MXNet GluonCV* Models {#openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_GluonCV_Models}

 This document provides the instructions and examples on how to use Model Optimizer to convert [GluonCV SSD and YOLO-v3 models](https://gluon-cv.mxnet.io/model_zoo/detection.html) to IR.

-1. Choose the topology available from the [GluonCV Model Zoo](https://gluon-cv.mxnet.io/model_zoo/detection.html) and export to the MXNet format using the GluonCV API. For example, for the `ssd_512_mobilenet1.0` topology: 
+1. Choose the topology available from the [GluonCV Model Zoo](https://gluon-cv.mxnet.io/model_zoo/detection.html) and export to the MXNet format using the GluonCV API. For example, for the `ssd_512_mobilenet1.0` topology:
 ```python
 from gluoncv import model_zoo, data, utils
 from gluoncv.utils import export_block
 net = model_zoo.get_model('ssd_512_mobilenet1.0_voc', pretrained=True)
 export_block('ssd_512_mobilenet1.0_voc', net, preprocess=True, layout='HWC')
 ```
-As a result, you will get an MXNet model representation in `ssd_512_mobilenet1.0.params` and `ssd_512_mobilenet1.0.json` files generated in the current directory. 
+As a result, you will get an MXNet model representation in `ssd_512_mobilenet1.0.params` and `ssd_512_mobilenet1.0.json` files generated in the current directory.
 2. Run the Model Optimizer tool specifying the `--enable_ssd_gluoncv` option. Make sure the `--input_shape` parameter is set to the input shape layout of your model (NHWC or NCHW). The examples below illustrates running the Model Optimizer for the SSD and YOLO-v3 models trained with the NHWC layout and located in the `<model_directory>`:
 * **For GluonCV SSD topologies:**
 ```sh
--- a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md
+++ b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md
@ -1,4 +1,4 @@
-# Converting a Style Transfer Model from MXNet*  {#openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_Style_Transfer_From_MXNet}
+# Convert MXNet Style Transfer Model {#openvino_docs_MO_DG_prepare_model_convert_model_mxnet_specific_Convert_Style_Transfer_From_MXNet}

 The tutorial explains how to generate a model for style transfer using the public MXNet\* neural style transfer sample.
 To use the style transfer sample from OpenVINO&trade;, follow the steps below as no public pre-trained style transfer model is provided with the OpenVINO toolkit.
@ -86,8 +86,8 @@ import make_image
 maker = make_image.Maker('models/13', (1024, 768))
 maker.generate('output.jpg', '../images/tubingen.jpg')
 ```
-Where the `models/13` string is composed of the following substrings: 
-* `models/`: path to the folder that contains .nd files with pre-trained styles weights 
+Where the `models/13` string is composed of the following substrings:
+* `models/`: path to the folder that contains .nd files with pre-trained styles weights
 * `13`: prefix pointing to 13_decoder, which is the default decoder for the repository.

 > **NOTE**: If you get an error saying "No module named 'cPickle'", try running the script from this step in Python 2. Then return to Python 3 for the remaining steps.
@ -114,4 +114,4 @@ cp models/13_decoder_auxs.nd nst_model
 ```sh
 mo --input_symbol <path/to/nst_model>/nst_vgg19-symbol.json --framework mxnet --output_dir <path/to/output_dir> --input_shape [1,3,224,224] --nd_prefix_name 13_decoder --pretrained_model <path/to/nst_model>/vgg19-0000.params
 ```
-4. The IR is generated (`.bin`, `.xml` and `.mapping` files) in the specified output directory and ready to be consumed by the OpenVINO Runtime. 
+4. The IR is generated (`.bin`, `.xml` and `.mapping` files) in the specified output directory and ready to be consumed by the OpenVINO Runtime.
--- a/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_DLRM.md
+++ b/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_DLRM.md
@ -1,32 +0,0 @@
-[DEPRECATED] Convert ONNX* DLRM to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_DLRM}
-===============================
-
-> **NOTE**: These instructions are currently deprecated. Since OpenVINO™ 2020.4 version, no specific steps are needed to convert ONNX\* DLRM models. For general instructions on converting ONNX models, please refer to [Converting a ONNX* Model](../Convert_Model_From_ONNX.md) topic.
-
-These instructions are applicable only to the DLRM converted to the ONNX* file format from the [facebookresearch/dlrm model](https://github.com/facebookresearch/dlrm).
-
-**Step 1**. Save trained Pytorch* model to ONNX* format or download pretrained ONNX* from 
-[MLCommons/inference/recommendation/dlrm](https://github.com/mlcommons/inference/tree/r1.0/recommendation/dlrm/pytorch#supported-models) repository. 
-If you train the model using the [script provided in model repository](https://github.com/facebookresearch/dlrm/blob/master/dlrm_s_pytorch.py), just add the `--save-onnx` flag to the command line parameters and you'll get the `dlrm_s_pytorch.onnx` file containing the model serialized in ONNX* format.
-
-**Step 2**. To generate the Intermediate Representation (IR) of the model, change your current working directory to the Model Optimizer installation directory and run the Model Optimizer with the following parameters:
-```sh
-mo --input_model dlrm_s_pytorch.onnx
-```
-
-Note that Pytorch model uses operation `torch.nn.EmbeddingBag`. This operation converts to onnx as custom `ATen` layer and not directly supported by OpenVINO*, but it is possible to convert this operation to:
-* `Gather` if each "bag" consists of exactly one index. In this case `offsets` input becomes obsolete and not needed. They will be removed during conversion.
-* `ExperimentalSparseWeightedSum` if "bags" contain not just one index. In this case Model Optimizer will print warning that pre-process of offsets is needed, because `ExperimentalSparseWeightedSum` and `torch.nn.EmbeddingBag` have different format of inputs.
-For example if you have `indices` input of shape [indices_shape] and `offsets` input of shape [num_bags] you need to get offsets of shape [indices_shape, 2]. To do that you may use the following code snippet:
-```python
-import numpy as np
-
-new_offsets = np.zeros((indices.shape[-1], 2), dtype=np.int32)
-new_offsets[:, 1] = np.arange(indices.shape[-1])
-bag_index = 0
-for i in range(offsets.shape[-1] - 1):
-    new_offsets[offsets[i]:offsets[i + 1], 0] = bag_index
-    bag_index += 1
-new_offsets[offsets[-1]:, 0] = bag_index
-```
-If you have more than one `torch.nn.EmbeddingBag` operation you'll need to do that for every offset input. If your offsets have same shape they will be merged into one input of shape [num_embedding_bags, indices_shape, 2].
--- a/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_Faster_RCNN.md
+++ b/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_Faster_RCNN.md
@ -1,4 +1,4 @@
-# Convert ONNX* Faster R-CNN Model to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Faster_RCNN}
+# Convert ONNX* Faster R-CNN Model {#openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Faster_RCNN}

 These instructions are applicable only to the Faster R-CNN model converted to the ONNX* file format from the [facebookresearch/maskrcnn-benchmark model](https://github.com/facebookresearch/maskrcnn-benchmark).

@ -11,7 +11,7 @@ These instructions are applicable only to the Faster R-CNN model converted to th
 --input_shape [1,3,800,800] \
 --input 0:2 \
 --mean_values [102.9801,115.9465,122.7717] \
--transformations_config front/onnx/faster_rcnn.json 
+--transformations_config front/onnx/faster_rcnn.json
 ```

 Note that the height and width specified with the `input_shape` command line parameter could be different. Refer to the [documentation](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/faster-rcnn) for more information about supported input image dimensions and required pre- and post-processing steps.
--- a/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_GPT2.md
+++ b/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_GPT2.md
@ -1,4 +1,4 @@
-# Convert ONNX* GPT-2 Model to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_GPT2}
+# Convert ONNX* GPT-2 Model {#openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_GPT2}

 [Public pre-trained GPT-2 model](https://github.com/onnx/models/tree/master/text/machine_comprehension/gpt-2)  is a large
 transformer-based language model with a simple objective: predict the next word, given all of the previous words within some text.
--- a/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_Mask_RCNN.md
+++ b/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_Mask_RCNN.md
@ -1,4 +1,4 @@
-# Convert ONNX* Mask R-CNN Model to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Mask_RCNN}
+# Convert ONNX* Mask R-CNN Model {#openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Mask_RCNN}

 These instructions are applicable only to the Mask R-CNN model converted to the ONNX* file format from the [facebookresearch/maskrcnn-benchmark model](https://github.com/facebookresearch/maskrcnn-benchmark).

@ -11,7 +11,7 @@ These instructions are applicable only to the Mask R-CNN model converted to the
 --input "0:2" \
 --input_shape [1,3,800,800] \
 --mean_values [102.9801,115.9465,122.7717] \
--transformations_config front/onnx/mask_rcnn.json 
+--transformations_config front/onnx/mask_rcnn.json
 ```

 Note that the height and width specified with the `input_shape` command line parameter could be different. Refer to the [documentation](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/mask-rcnn) for more information about supported input image dimensions and required pre- and post-processing steps.
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Bert_ner.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Bert_ner.md
@ -1,4 +1,4 @@
-# Convert PyTorch* BERT-NER to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Bert_ner}
+# Convert PyTorch* BERT-NER Model {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Bert_ner}

 ## Download and Convert the Model to ONNX*

--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Cascade_RCNN_res101.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_Cascade_RCNN_res101.md
@ -0,0 +1,28 @@
+# Convert PyTorch Cascade RCNN R-101 Model {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Cascade_RCNN_res101}
+
+## Download and Convert Model to ONNX
+
+* Clone the [repository](https://github.com/open-mmlab/mmdetection):
+
+```bash
+git clone https://github.com/open-mmlab/mmdetection
+cd mmdetection
+```
+
+> **NOTE**: To set up an environment, refer to this [instruction](https://github.com/open-mmlab/mmdetection/blob/master/docs/en/get_started.md#installation).
+
+* Download the pre-trained [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco/cascade_rcnn_r101_fpn_1x_coco_20200317-0b6a2fbf.pth). You can also find the link to the model [here](https://github.com/open-mmlab/mmdetection/blob/master/configs/cascade_rcnn/README.md).
+
+* To convert the model to ONNX format, use this [script](https://github.com/open-mmlab/mmdetection/blob/master/tools/deployment/pytorch2onnx.py).
+
+```bash
+python3 tools/deployment/pytorch2onnx.py configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py cascade_rcnn_r101_fpn_1x_coco_20200317-0b6a2fbf.pth --output-file cascade_rcnn_r101_fpn_1x_coco.onnx
+```
+
+The script generates ONNX model file `cascade_rcnn_r101_fpn_1x_coco.onnx` in the directory `tools/deployment/`. If required, you can specify the model name or output directory using `--output-file <path-to-dir>/<model-name>.onnx` 
+
+## Convert ONNX Cascade RCNN R-101 Model to IR
+
+```bash
+mo --input_model cascade_rcnn_r101_fpn_1x_coco.onnx --mean_values [123.675,116.28,103.53] --scale_values [58.395,57.12,57.375]
+```
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md
@ -1,4 +1,4 @@
-# Convert PyTorch* F3Net to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net}
+# Convert PyTorch* F3Net Model {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net}

 [F3Net](https://github.com/weijun88/F3Net): Fusion, Feedback and Focus for Salient Object Detection

@ -7,12 +7,12 @@
 To clone the repository, run the following command:

 ```sh
-git clone http://github.com/weijun88/F3Net.git 
+git clone http://github.com/weijun88/F3Net.git
 ```

 ## Download and Convert the Model to ONNX*

-To download the pre-trained model or train the model yourself, refer to the 
+To download the pre-trained model or train the model yourself, refer to the
 [instruction](https://github.com/weijun88/F3Net/blob/master/README.md) in the F3Net model repository. First, convert the model to ONNX\* format. Create and run the following Python script in the `src` directory of the model repository:
 ```python
 import torch
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_QuartzNet.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_QuartzNet.md
@ -1,4 +1,4 @@
-# Convert PyTorch* QuartzNet to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_QuartzNet}
+# Convert PyTorch* QuartzNet Model {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_QuartzNet}

 [NeMo project](https://github.com/NVIDIA/NeMo) provides the QuartzNet model.

--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RCAN.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RCAN.md
@ -1,4 +1,4 @@
-# Convert PyTorch* RCAN to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN}
+# Convert PyTorch* RCAN Model {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN}

 [RCAN](https://github.com/yulunzhang/RCAN): Image Super-Resolution Using Very Deep Residual Channel Attention Networks

--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md
@ -1,22 +1,22 @@
-# Convert PyTorch\* RNN-T Model to the Intermediate Representation (IR) {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT}
+# Convert PyTorch* RNN-T Model {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT}

-This instruction covers conversion of RNN-T model from [MLCommons](https://github.com/mlcommons) repository. Follow 
+This instruction covers conversion of RNN-T model from [MLCommons](https://github.com/mlcommons) repository. Follow
 the steps below to export a PyTorch* model into ONNX* before converting it to IR:

-**Step 1**. Clone RNN-T PyTorch implementation from MLCommons repository (revision r1.0). Make a shallow clone to pull 
+**Step 1**. Clone RNN-T PyTorch implementation from MLCommons repository (revision r1.0). Make a shallow clone to pull
 only RNN-T model without full repository. If you already have a full repository, skip this and go to **Step 2**:
 ```bash
 git clone -b r1.0 -n https://github.com/mlcommons/inference rnnt_for_openvino --depth 1
 cd rnnt_for_openvino
-git checkout HEAD speech_recognition/rnnt 
+git checkout HEAD speech_recognition/rnnt
 ```

-**Step 2**. If you already have a full clone of MLCommons inference repository, create a folder for 
-pretrained PyTorch model, where conversion into IR will take place. You will also need to specify the path to 
+**Step 2**. If you already have a full clone of MLCommons inference repository, create a folder for
+pretrained PyTorch model, where conversion into IR will take place. You will also need to specify the path to
 your full clone at **Step 5**. Skip this step if you have a shallow clone.

 ```bash
-mkdir rnnt_for_openvino 
+mkdir rnnt_for_openvino
 cd rnnt_for_openvino
 ```

@ -25,7 +25,7 @@ For UNIX*-like systems you can use `wget`:
 ```bash
 wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt
 ```
-The link was taken from `setup.sh` in the `speech_recoginitin/rnnt` subfolder. You will get exactly the same weights as 
+The link was taken from `setup.sh` in the `speech_recoginitin/rnnt` subfolder. You will get exactly the same weights as
 if you were following the steps from [https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt](https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt).

 **Step 4**. Install required Python packages:
@ -33,7 +33,7 @@ if you were following the steps from [https://github.com/mlcommons/inference/tre
 pip3 install torch toml
 ```

-**Step 5**. Export RNN-T model into ONNX with the script below. Copy the code below into a file named 
+**Step 5**. Export RNN-T model into ONNX with the script below. Copy the code below into a file named
 `export_rnnt_to_onnx.py` and run it in the current directory `rnnt_for_openvino`:

 > **NOTE**: If you already have a full clone of MLCommons inference repository, you need to
@ -94,7 +94,7 @@ torch.onnx.export(model.joint, (f, g), "rnnt_joint.onnx", opset_version=12,
 python3 export_rnnt_to_onnx.py
 ```

-After completing this step, the files `rnnt_encoder.onnx`, `rnnt_prediction.onnx`, and `rnnt_joint.onnx` will be saved in the current directory. 
+After completing this step, the files `rnnt_encoder.onnx`, `rnnt_prediction.onnx`, and `rnnt_joint.onnx` will be saved in the current directory.

 **Step 6**. Run the conversion commands:

@ -103,6 +103,6 @@ mo --input_model rnnt_encoder.onnx --input "input[157 1 240],feature_length->157
 mo --input_model rnnt_prediction.onnx --input "symbol[1 1],hidden_in_1[2 1 320],hidden_in_2[2 1 320]"
 mo --input_model rnnt_joint.onnx --input "0[1 1 1024],1[1 1 320]"
 ```
-Please note that hardcoded value for sequence length = 157 was taken from the MLCommons but conversion to IR preserves 
-network [reshapeability](../../../../OV_Runtime_UG/ShapeInference.md), this means you can change input shapes manually to any value either during conversion or 
+Please note that hardcoded value for sequence length = 157 was taken from the MLCommons but conversion to IR preserves
+network [reshapeability](../../../../OV_Runtime_UG/ShapeInference.md), this means you can change input shapes manually to any value either during conversion or
 inference.
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md
@ -1,4 +1,4 @@
-# Convert PyTorch* YOLACT to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT}
+# Convert PyTorch* YOLACT Model {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT}

 You Only Look At CoefficienTs (YOLACT) is a simple, fully convolutional model for real-time instance segmentation.
 The PyTorch\* implementation is publicly available in [this GitHub* repository](https://github.com/dbolya/yolact).
@ -29,7 +29,7 @@ index 547bc0a..bde0680 100644
 +++ b/eval.py
@@ -593,9 +593,12 @@ def badhash(x):
     return x
- 
+
 def evalimage(net:Yolact, path:str, save_path:str=None):
 -    frame = torch.from_numpy(cv2.imread(path)).cuda().float()
 +    frame = torch.from_numpy(cv2.imread(path)).float()
@ -38,9 +38,9 @@ index 547bc0a..bde0680 100644
     batch = FastBaseTransform()(frame.unsqueeze(0))
     preds = net(batch)
 +    torch.onnx.export(net, batch, "yolact.onnx", opset_version=11)
- 
+
     img_numpy = prep_display(preds, frame, None, None, undo_transform=False)
-     
+
 diff --git a/utils/augmentations.py b/utils/augmentations.py
 index cc7a73a..2420603 100644
 --- a/utils/augmentations.py
@ -48,7 +48,7 @@ index cc7a73a..2420603 100644
@@ -623,8 +623,11 @@ class FastBaseTransform(torch.nn.Module):
     def __init__(self):
         super().__init__()
- 
+
 -        self.mean = torch.Tensor(MEANS).float().cuda()[None, :, None, None]
 -        self.std  = torch.Tensor( STD ).float().cuda()[None, :, None, None]
 +        self.mean = torch.Tensor(MEANS).float()[None, :, None, None]
@ -57,7 +57,7 @@ index cc7a73a..2420603 100644
 +            self.mean.cuda()
 +            self.std.cuda()
         self.transform = cfg.backbone.transform
- 
+
     def forward(self, img):
 diff --git a/yolact.py b/yolact.py
 index d83703b..f8c787c 100644
@ -66,7 +66,7 @@ index d83703b..f8c787c 100644
@@ -17,19 +17,22 @@ import torch.backends.cudnn as cudnn
 from utils import timer
 from utils.functions import MovingAverage, make_net
- 
+
 -# This is required for Pytorch 1.0.1 on Windows to initialize Cuda on some driver versions.
 -# See the bug report here: https://github.com/pytorch/pytorch/issues/17108
 -torch.cuda.current_device()
@ -76,26 +76,26 @@ index d83703b..f8c787c 100644
 -if not use_jit:
 -    print('Multiple GPUs detected! Turning off JIT.')
 +use_jit = False
- 
+
 ScriptModuleWrapper = torch.jit.ScriptModule if use_jit else nn.Module
 script_method_wrapper = torch.jit.script_method if use_jit else lambda fn, _rcn=None: fn
- 
- 
+
+
 +def decode(loc, priors):
 +    variances = [0.1, 0.2]
 +    boxes = torch.cat((priors[:, :2] + loc[:, :, :2] * variances[0] * priors[:, 2:], priors[:, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2)
 +
 +    boxes_result1 = boxes[:, :, :2] - boxes[:, :, 2:] / 2
-+    boxes_result2 = boxes[:, :, 2:] + boxes[:, :, :2]
+    boxes_result2 = boxes[:, :, 2:] + boxes_result1
 +    boxes_result = torch.cat((boxes_result1, boxes_result2), 2)
 +
 +    return boxes_result
 +
- 
+
 class Concat(nn.Module):
     def __init__(self, nets, extra_params):
@@ -476,7 +479,10 @@ class Yolact(nn.Module):
-     
+
     def load_weights(self, path):
         """ Loads weights from a compressed save file. """
 -        state_dict = torch.load(path)
@ -103,23 +103,23 @@ index d83703b..f8c787c 100644
 +            state_dict = torch.load(path)
 +        else:
 +            state_dict = torch.load(path, map_location=torch.device('cpu'))
- 
+
         # For backward compatability, remove these (the new variable is called layers)
         for key in list(state_dict.keys()):
@@ -673,8 +679,11 @@ class Yolact(nn.Module):
                 else:
                     pred_outs['conf'] = F.softmax(pred_outs['conf'], -1)
- 
+
 -            return self.detect(pred_outs, self)
 +            pred_outs['boxes'] = decode(pred_outs['loc'], pred_outs['priors']) # decode output boxes
- 
+
 +            pred_outs.pop('priors') # remove unused in postprocessing layers
 +            pred_outs.pop('loc') # remove unused in postprocessing layers
 +            return pred_outs
- 
- 
- 
-- 
+
+
+
+--
 ```
 3. Save and close the file.

--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_AttentionOCR_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_AttentionOCR_From_Tensorflow.md
@ -1,4 +1,4 @@
-# Convert TensorFlow* Attention OCR Model to Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_AttentionOCR_From_Tensorflow}
+# Convert TensorFlow Attention OCR Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_AttentionOCR_From_Tensorflow}

 This tutorial explains how to convert the Attention OCR (AOCR) model from the [TensorFlow* Attention OCR repository](https://github.com/emedvedev/attention-ocr) to the Intermediate Representation (IR).

@ -20,7 +20,7 @@ The original AOCR model contains data preprocessing which consists of the follow
 * Decoding input data to binary format where input data is an image represented as a string.
 * Resizing binary image to working resolution.

-After that, the resized image is sent to the convolution neural network (CNN). The Model Optimizer does not support image decoding so you should cut of preprocessing part of the model using '--input' command line parameter. 
+After that, the resized image is sent to the convolution neural network (CNN). The Model Optimizer does not support image decoding so you should cut of preprocessing part of the model using '--input' command line parameter.
 ```sh
 mo \
 --input_model=model/path/frozen_graph.pb \
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_BERT_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_BERT_From_Tensorflow.md
@ -1,4 +1,4 @@
-# Convert TensorFlow* BERT Model to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_BERT_From_Tensorflow}
+# Convert TensorFlow BERT Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_BERT_From_Tensorflow}

 Pre-trained models for BERT (Bidirectional Encoder Representations from Transformers) are
 [publicly available](https://github.com/google-research/bert).
@ -112,7 +112,7 @@ Run the Model Optimizer with the following command line parameters to generate r
 ```sh
 mo \
 --input_model inference_graph.pb \
--input "IteratorGetNext:0{i32}[1 128],IteratorGetNext:1{i32}[1 128],IteratorGetNext:4{i32}[1 128]" 
+--input "IteratorGetNext:0{i32}[1 128],IteratorGetNext:1{i32}[1 128],IteratorGetNext:4{i32}[1 128]"
 ```
 For other applicable parameters, refer to [Convert Model from TensorFlow](../Convert_Model_From_TensorFlow.md).

--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_CRNN_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_CRNN_From_Tensorflow.md
@ -1,4 +1,4 @@
-# Convert CRNN* Models to the Intermediate Representation (IR) {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_CRNN_From_Tensorflow}
+# Convert TensorFlow CRNN Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_CRNN_From_Tensorflow}

 This tutorial explains how to convert a CRNN model to Intermediate Representation (IR).

--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_DeepSpeech_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_DeepSpeech_From_Tensorflow.md
@ -1,4 +1,4 @@
-# Convert TensorFlow* DeepSpeech Model to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_DeepSpeech_From_Tensorflow}
+# Convert TensorFlow DeepSpeech Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_DeepSpeech_From_Tensorflow}

 [DeepSpeech project](https://github.com/mozilla/DeepSpeech) provides an engine to train speech-to-text models.

@ -9,7 +9,7 @@ Create a directory where model and metagraph with pretrained weights will be sto
 mkdir deepspeech
 cd deepspeech
 ```
-[Pretrained English speech-to-text model](https://github.com/mozilla/DeepSpeech/releases/tag/v0.8.2) is publicly available. 
+[Pretrained English speech-to-text model](https://github.com/mozilla/DeepSpeech/releases/tag/v0.8.2) is publicly available.
 To download the model, follow the instruction below:

 * For UNIX*-like systems, run the following command:
@ -24,7 +24,7 @@ wget -O - https://github.com/mozilla/DeepSpeech/releases/download/v0.8.2/deepspe

 ## Freeze the Model into a *.pb File

-After unpacking the archives above, you have to freeze the model. Note that this requires 
+After unpacking the archives above, you have to freeze the model. Note that this requires
 TensorFlow* version 1 which is not available under Python 3.8, so you need Python 3.7 or lower.
 Before freezing, deploy a virtual environment and install the required packages:
 ```
@ -37,29 +37,29 @@ Freeze the model with the following command:
 ```
 python3 DeepSpeech.py --checkpoint_dir ../deepspeech-0.8.2-checkpoint --export_dir ../
 ```
-After that, you will get the pretrained frozen model file `output_graph.pb` in the directory `deepspeech` created at 
-the beginning. The model contains the preprocessing and main parts. The first preprocessing part performs conversion of input 
-spectrogram into a form useful for speech recognition (mel). This part of the model is not convertible into 
+After that, you will get the pretrained frozen model file `output_graph.pb` in the directory `deepspeech` created at
+the beginning. The model contains the preprocessing and main parts. The first preprocessing part performs conversion of input
+spectrogram into a form useful for speech recognition (mel). This part of the model is not convertible into
 IR because it contains unsupported operations `AudioSpectrogram` and `Mfcc`.

-The main and most computationally expensive part of the model converts the preprocessed audio into text. 
-There are two specificities with the supported part of the model. 
+The main and most computationally expensive part of the model converts the preprocessed audio into text.
+There are two specificities with the supported part of the model.

-The first is that the model contains an input with sequence length. So the model can be converted with 
-a fixed input length shape, thus the model is not reshapeable. 
+The first is that the model contains an input with sequence length. So the model can be converted with
+a fixed input length shape, thus the model is not reshapeable.
 Refer to the [Using Shape Inference](../../../../OV_Runtime_UG/ShapeInference.md).

-The second is that the frozen model still has two variables: `previous_state_c` and `previous_state_h`, figure 
-with the frozen *.pb model is below. It means that the model keeps training these variables at each inference. 
+The second is that the frozen model still has two variables: `previous_state_c` and `previous_state_h`, figure
+with the frozen *.pb model is below. It means that the model keeps training these variables at each inference.

 ![DeepSpeech model view](../../../img/DeepSpeech-0.8.2.png)

-At the first inference the variables are initialized with zero tensors. After executing, the results of the `BlockLSTM` 
+At the first inference the variables are initialized with zero tensors. After executing, the results of the `BlockLSTM`
 are assigned to cell state and hidden state, which are these two variables.

 ## Convert the Main Part of DeepSpeech Model into IR

-Model Optimizer assumes that the output model is for inference only. That is why you should cut `previous_state_c` 
+Model Optimizer assumes that the output model is for inference only. That is why you should cut `previous_state_c`
 and `previous_state_h` variables off and resolve keeping cell and hidden states on the application level.

 There are certain limitations for the model conversion:
@ -75,7 +75,7 @@ mo                             \
 ```

 Where:
-* `input_lengths->[16]` Replaces the input node with name "input_lengths" with a constant tensor of shape [1] with a 
+* `input_lengths->[16]` Replaces the input node with name "input_lengths" with a constant tensor of shape [1] with a
  single integer value 16. This means that the model now can consume input sequences of length 16 only.
 * `input_node[1 16 19 26],previous_state_h[1 2048],previous_state_c[1 2048]` replaces the variables with a placeholder.
 * `--output ".../GatherNd_1,.../GatherNd,logits" ` output node names.
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md
@ -1,11 +1,11 @@
-# Converting EfficientDet Models from TensorFlow {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_EfficientDet_Models}
+# Convert TensorFlow EfficientDet Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_EfficientDet_Models}

-This tutorial explains how to convert EfficientDet\* public object detection models to the Intermediate Representation (IR). 
+This tutorial explains how to convert EfficientDet\* public object detection models to the Intermediate Representation (IR).

 ## <a name="efficientdet-to-ir"></a>Convert EfficientDet Model to IR

-On GitHub*, you can find several public versions of EfficientDet model implementation. This tutorial explains how to 
-convert models from the [https://github.com/google/automl/tree/master/efficientdet](https://github.com/google/automl/tree/master/efficientdet) 
+On GitHub*, you can find several public versions of EfficientDet model implementation. This tutorial explains how to
+convert models from the [https://github.com/google/automl/tree/master/efficientdet](https://github.com/google/automl/tree/master/efficientdet)
 repository (commit 96e1fee) to IR.

 ### Get Frozen TensorFlow\* Model
@ -60,15 +60,15 @@ dictionary in the [hparams_config.py](https://github.com/google/automl/blob/96e1
 The attribute `image_size` specifies the shape to be specified for the model conversion.

 The `transformations_config` command line parameter specifies the configuration json file containing hints
-to the Model Optimizer on how to convert the model and trigger transformations implemented in the 
+to the Model Optimizer on how to convert the model and trigger transformations implemented in the
 `<PYTHON_SITE_PACKAGES>/openvino/tools/mo/front/tf/AutomlEfficientDet.py`. The json file contains some parameters which must be changed if you
 train the model yourself and modified the `hparams_config` file or the parameters are different from the ones used for EfficientDet-D4.
 The attribute names are self-explanatory or match the name in the `hparams_config` file.

 > **NOTE**: The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For more information about the parameter, refer to **When to Reverse Input Channels** section of [Converting a Model to Intermediate Representation (IR)](../Converting_Model.md).

-OpenVINO&trade; toolkit provides samples that can be used to infer EfficientDet model. For more information, refer to 
-[Open Model Zoo Demos](@ref omz_demos) and 
+OpenVINO&trade; toolkit provides samples that can be used to infer EfficientDet model. For more information, refer to
+[Open Model Zoo Demos](@ref omz_demos) and

 ## <a name="efficientdet-ir-results-interpretation"></a>Interpreting Results of the TensorFlow Model and the IR

@ -90,9 +90,4 @@ The output of the IR is a list of 7-element tuples: `[image_id, class_id, confid
 * `x_max` -- normalized `x` coordinate of the upper right corner of the detected object.
 * `y_max` -- normalized `y` coordinate of the upper right corner of the detected object.

-The first element with `image_id = -1` means end of data.
-
---
-## See Also
-
-* [Sub-Graph Replacement in Model Optimizer](../../customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md)
+The first element with `image_id = -1` means end of data.
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_FaceNet_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_FaceNet_From_Tensorflow.md
@ -1,4 +1,4 @@
-# Convert TensorFlow* FaceNet Models to Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_FaceNet_From_Tensorflow}
+# Convert TensorFlow FaceNet Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_FaceNet_From_Tensorflow}

 [Public pre-trained FaceNet models](https://github.com/davidsandberg/facenet#pre-trained-models) contain both training
 and inference part of graph. Switch between this two states is manageable with placeholder value.
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md
@ -1,4 +1,4 @@
-# Convert GNMT* Model to the Intermediate Representation (IR) {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_GNMT_From_Tensorflow}
+# Convert TensorFlow GNMT Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_GNMT_From_Tensorflow}

 This tutorial explains how to convert Google\* Neural Machine Translation (GNMT) model to the Intermediate Representation (IR).

@ -17,20 +17,20 @@ index 2cbef07..e185490 100644
 +++ b/nmt/inference.py
@@ -17,9 +17,11 @@
 from __future__ import print_function
- 
+
 import codecs
 +import os
 import time
- 
+
 import tensorflow as tf
 +from tensorflow.python.framework import graph_io
- 
+
 from . import attention_model
 from . import gnmt_model
@@ -105,6 +107,29 @@ def start_sess_and_load_model(infer_model, ckpt_path):
   return sess, loaded_infer_model
- 
- 
+
+
 +def inference_dump_graph(ckpt_path, path_to_dump, hparams, scope=None):
 +    model_creator = get_model_creator(hparams)
 +    infer_model = model_helper.create_infer_model(model_creator, hparams, scope)
@ -64,7 +64,7 @@ index f5823d8..a733748 100644
@@ -310,6 +310,13 @@ def add_arguments(parser):
   parser.add_argument("--num_intra_threads", type=int, default=0,
                       help="number of intra_op_parallelism_threads")
- 
+
 +  # Special argument for inference model dumping without inference
 +  parser.add_argument("--dump_inference_model", type="bool", nargs="?",
 +                      const=True, default=False,
@ -72,7 +72,7 @@ index f5823d8..a733748 100644
 +
 +  parser.add_argument("--path_to_dump", type=str, default="",
 +                      help="Path to dump inference graph.")
- 
+
 def create_hparams(flags):
   """Create training hparams."""
@@ -396,6 +403,9 @@ def create_hparams(flags):
@ -83,12 +83,12 @@ index f5823d8..a733748 100644
 +      dump_inference_model=flags.dump_inference_model,
 +      path_to_dump=flags.path_to_dump,
   )
- 
- 
+
+
@@ -613,7 +623,7 @@ def create_or_load_hparams(
   return hparams
- 
- 
+
+
 -def run_main(flags, default_hparams, train_fn, inference_fn, target_session=""):
 +def run_main(flags, default_hparams, train_fn, inference_fn, inference_dump, target_session=""):
   """Run main."""
@ -97,7 +97,7 @@ index f5823d8..a733748 100644
@@ -653,8 +663,26 @@ def run_main(flags, default_hparams, train_fn, inference_fn, target_session=""):
         out_dir, default_hparams, flags.hparams_path,
         save_hparams=(jobid == 0))
- 
+
 -  ## Train / Decode
 -  if flags.inference_input_file:
 +  #  Dumping inference model
@ -130,8 +130,8 @@ index f5823d8..a733748 100644
 -  run_main(FLAGS, default_hparams, train_fn, inference_fn)
 +  inference_dump = inference.inference_dump_graph
 +  run_main(FLAGS, default_hparams, train_fn, inference_fn, inference_dump)
- 
- 
+
+
 if __name__ == "__main__":

 ```
@ -224,7 +224,7 @@ For more information about model cutting, refer to [Cutting Off Parts of a Model
 Inputs of the model:
 * `IteratorGetNext/placeholder_out_port_0` input with shape `[batch_size, max_sequence_length]` contains `batch_size` decoded input sentences.
 Every sentence is decoded the same way as indices of sentence elements in vocabulary and padded with index of `eos` (end of sentence symbol). If the length of the sentence is less than `max_sequence_length`, remaining elements are filled with index of `eos` token.
- 
+
 * `IteratorGetNext/placeholder_out_port_1` input with shape `[batch_size]` contains sequence lengths for every sentence from the first input. \
 For example, if `max_sequence_length = 50`, `batch_size = 1` and the sentence has only 30 elements, then the input tensor for `IteratorGetNext/placeholder_out_port_1` should be `[30]`.

--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_NCF_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_NCF_From_Tensorflow.md
@ -1,12 +1,12 @@
-# Convert Neural Collaborative Filtering Model from TensorFlow* to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_NCF_From_Tensorflow}
+# Convert TensorFlow Neural Collaborative Filtering Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_NCF_From_Tensorflow}

 This tutorial explains how to convert Neural Collaborative Filtering (NCF) model to Intermediate Representation (IR).

 [Public TensorFlow NCF model](https://github.com/tensorflow/models/tree/master/official/recommendation) does not contain pre-trained weights. To convert this model to the IR:
- 1. Use [the instructions](https://github.com/tensorflow/models/tree/master/official/recommendation#train-and-evaluate-model) from this repository to train the model. 
- 2. Freeze the inference graph you get on previous step in `model_dir` following 
-the instructions from the Freezing Custom Models in Python* section of 
-[Converting a TensorFlow* Model](../Convert_Model_From_TensorFlow.md). 
+ 1. Use [the instructions](https://github.com/tensorflow/models/tree/master/official/recommendation#train-and-evaluate-model) from this repository to train the model.
+ 2. Freeze the inference graph you get on previous step in `model_dir` following
+the instructions from the Freezing Custom Models in Python* section of
+[Converting a TensorFlow* Model](../Convert_Model_From_TensorFlow.md).
 Run the following commands:
 ```python
 import tensorflow as tf
@ -22,12 +22,12 @@ graph_io.write_graph(frozen, './', 'inference_graph.pb', as_text=False)
 ```
 where `rating/BiasAdd` is an output node.

- 3. Convert the model to the IR.If you look at your frozen model, you can see that 
+ 3. Convert the model to the IR.If you look at your frozen model, you can see that
 it has one input that is split into four `ResourceGather` layers. (Click image to zoom in.)

 ![NCF model beginning](../../../img/NCF_start.png)

- But as the Model Optimizer does not support such data feeding, you should skip it. Cut 
+ But as the Model Optimizer does not support such data feeding, you should skip it. Cut
 the edges incoming in `ResourceGather`s port 1:
 ```sh
 mo --input_model inference_graph.pb                    \
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md
@ -1,4 +1,4 @@
-# Converting TensorFlow* Object Detection API Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models}
+# Convert TensorFlow Object Detection API Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models}

 > **NOTES**:
 > * Starting with the 2022.1 release, the Model Optimizer can convert the TensorFlow\* Object Detection API Faster and Mask RCNNs topologies differently. By default, the Model Optimizer adds operation "Proposal" to the generated IR. This operation needs an additional input to the model with name "image_info" which should be fed with several values describing the pre-processing applied to the input image (refer to the [Proposal](../../../../ops/detection/Proposal_4.md) operation specification for more information). However, this input is redundant for the models trained and inferred with equal size images. Model Optimizer can generate IR for such models and insert operation [DetectionOutput](../../../../ops/detection/DetectionOutput_1.md) instead of `Proposal`. The `DetectionOutput` operation does not require additional model input "image_info" and moreover, for some models the produced inference results are closer to the original TensorFlow\* model. In order to trigger new behaviour the attribute "operation_to_add" in the corresponding JSON transformation configuration file should be set to value "DetectionOutput" instead of default one "Proposal".
@ -128,7 +128,7 @@ Models with `keep_aspect_ratio_resizer` were trained to recognize object in real

 ## Detailed Explanations of Model Conversion Process

-This section is intended for users who want to understand how the Model Optimizer performs Object Detection API models conversion in details. The knowledge given in this section is also useful for users having complex models that are not converted with the Model Optimizer out of the box. It is highly recommended to read [Sub-Graph Replacement in Model Optimizer](../../customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md) chapter first to understand sub-graph replacement concepts which are used here.
+This section is intended for users who want to understand how the Model Optimizer performs Object Detection API models conversion in details. The knowledge given in this section is also useful for users having complex models that are not converted with the Model Optimizer out of the box. It is highly recommended to read the **Graph Transformation Extensions** section in the [Model Optimizer Extensibility](../../customize_model_optimizer/Customize_Model_Optimizer.md) documentation first to understand sub-graph replacement concepts which are used here.

 It is also important to open the model in the [TensorBoard](https://www.tensorflow.org/guide/summaries_and_tensorboard) to see the topology structure. Model Optimizer can create an event file that can be then fed to the TensorBoard* tool. Run the Model Optimizer with providing two command line parameters:
 * `--input_model <path_to_frozen.pb>` --- Path to the frozen model
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md
@ -1,8 +1,8 @@
-# Converting RetinaNet Model from TensorFlow* to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_RetinaNet_From_Tensorflow}
+# Converting TensorFlow RetinaNet Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_RetinaNet_From_Tensorflow}

 This tutorial explains how to convert RetinaNet model to the Intermediate Representation (IR).

-[Public RetinaNet model](https://github.com/fizyr/keras-retinanet) does not contain pretrained TensorFlow\* weights. 
+[Public RetinaNet model](https://github.com/fizyr/keras-retinanet) does not contain pretrained TensorFlow\* weights.
 To convert this model to the TensorFlow\* format, you can use [Reproduce Keras* to TensorFlow* Conversion tutorial](https://docs.openvino.ai/latest/omz_models_model_retinanet_tf.html).

 After you convert the model to TensorFlow* format, run the Model Optimizer command below:
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Slim_Library_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Slim_Library_Models.md
@ -1,13 +1,13 @@
-# Converting TensorFlow*-Slim Image Classification Model Library Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Slim_Library_Models}
+# Convert TensorFlow Slim Image Classification Model Library Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Slim_Library_Models}

-<a href="https://github.com/tensorflow/models/tree/master/research/slim/README.md">TensorFlow\*-Slim Image Classification Model Library</a> is a library to define, train and evaluate classification models in TensorFlow\*. The library contains Python scripts defining the classification topologies together with checkpoint files for several pre-trained classification topologies. To convert a TensorFlow\*-Slim library model, complete the following steps: 
+<a href="https://github.com/tensorflow/models/tree/master/research/slim/README.md">TensorFlow\*-Slim Image Classification Model Library</a> is a library to define, train and evaluate classification models in TensorFlow\*. The library contains Python scripts defining the classification topologies together with checkpoint files for several pre-trained classification topologies. To convert a TensorFlow\*-Slim library model, complete the following steps:

 1. Download the TensorFlow\*-Slim models [git repository](https://github.com/tensorflow/models).
 2. Download the pre-trained model [checkpoint](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models).
 3. Export the inference graph.
 4. Convert the model using the Model Optimizer.

-The [Example of an Inception V1 Model Conversion](#example_of_an_inception_v1_model_conversion) section below illustrates the process of converting an Inception V1 Model. 
+The [Example of an Inception V1 Model Conversion](#example_of_an_inception_v1_model_conversion) section below illustrates the process of converting an Inception V1 Model.

 ## Example of an Inception V1 Model Conversion <a name="example_of_an_inception_v1_model_conversion"></a>
 This example demonstrates how to convert the model on Linux\* OSes, but it could be easily adopted for the Windows\* OSes.
@ -39,7 +39,7 @@ python3 tf_models/research/slim/export_inference_graph.py \
 ```

 Model Optimizer comes with the summarize graph utility, which identifies graph input and output nodes. Run the utility to determine input/output nodes of the Inception V1 model:
-    
+
 ```sh
 python3 <PYTHON_SITE_PACKAGES>/openvino/tools/mo/utils/summarize_graph.py --input_model ./inception_v1_inference_graph.pb
 ```
@ -66,7 +66,7 @@ Refer to the [Mean and Scale Values for TensorFlow\*-Slim Models](#tf_slim_mean_
 ## Mean and Scale Values for TensorFlow\*-Slim Models <a name="tf_slim_mean_scale_values"></a>
 The TensorFlow\*-Slim Models were trained with normalized input data. There are several different normalization algorithms used in the Slim library. OpenVINO classification sample does not perform image pre-processing except resizing to the input layer size. It is necessary to pass mean and scale values to the Model Optimizer so they are embedded into the generated IR in order to get correct classification results.

-The file [preprocessing_factory.py](https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/preprocessing_factory.py) contains a dictionary variable `preprocessing_fn_map` defining mapping between the model type and pre-processing function to be used. The function code should be analyzed to figure out the mean/scale values. 
+The file [preprocessing_factory.py](https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/preprocessing_factory.py) contains a dictionary variable `preprocessing_fn_map` defining mapping between the model type and pre-processing function to be used. The function code should be analyzed to figure out the mean/scale values.

 The [inception_preprocessing.py](https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/inception_preprocessing.py) file defines the pre-processing function for the Inception models. The `preprocess_for_eval` function contains the following code:

--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md
@ -1,4 +1,4 @@
-# Converting TensorFlow* Wide and Deep Family Models to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_WideAndDeep_Family_Models}
+# Convert TensorFlow Wide and Deep Family Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_WideAndDeep_Family_Models}

 The Wide and Deep models is a combination of wide and deep parts for memorization and generalization of object features respectively.
 These models can contain different types of object features such as numerical, categorical, sparse and sequential features. These feature types are specified
@ -24,7 +24,7 @@ The Wide and Deep model is no longer in the master branch of the repository but

 **Step 2**. Train the model

-As the OpenVINO&trade; toolkit does not support the categorical with hash and crossed features, such feature types must be switched off in the model 
+As the OpenVINO&trade; toolkit does not support the categorical with hash and crossed features, such feature types must be switched off in the model
 by changing the `build_model_columns()` function in `census_dataset.py` as follows:

 ```python
@ -61,7 +61,7 @@ def build_model_columns():
      age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
  # Wide columns and deep columns.
  base_columns = [
-      education, marital_status, relationship, workclass, 
+      education, marital_status, relationship, workclass,
      age_buckets,
  ]
  crossed_columns = []
@ -92,7 +92,7 @@ python census_main.py
 Use the following command line to convert the saved model file with the checkpoint:

 ```sh
- mo 
+ mo
 --input_checkpoint checkpoint --input_meta_graph model.ckpt.meta
 --input "IteratorGetNext:0[2],
         IteratorGetNext:1[2],
@ -122,7 +122,7 @@ Use the following command line to convert the saved model file with the checkpoi
         dnn/input_from_feature_columns/input_layer/relationship_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2 50],
         dnn/input_from_feature_columns/input_layer/workclass_indicator/to_sparse_input/indices:0[10 2]{i64},
         dnn/input_from_feature_columns/input_layer/workclass_indicator/hash_table_Lookup/LookupTableFindV2:0[10]{i64},
-         dnn/input_from_feature_columns/input_layer/workclass_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2 50]" 
+         dnn/input_from_feature_columns/input_layer/workclass_indicator/to_sparse_input/dense_shape:0[2]{i64}->[2 50]"
 --output head/predictions/probabilities
 ```

--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md
@ -1,4 +1,4 @@
-# Convert TensorFlow* XLNet Model to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_XLNet_From_Tensorflow}
+# Convert TensorFlow XLNet Model {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_XLNet_From_Tensorflow}

 Pre-trained models for XLNet (Bidirectional Encoder Representations from Transformers) are
 [publicly available](https://github.com/zihangdai/xlnet).
@ -16,8 +16,8 @@ Download and unzip an archive with the [XLNet-Base, Cased](https://storage.googl

 After the archive is unzipped, the directory `cased_L-12_H-768_A-12` is created and contains the following files:
 * TensorFlow checkpoint (`xlnet_model.ckpt`) containing the pre-trained weights (which is actually 3 files)
-* sentence piece model (`spiece.model`) used for (de)tokenization 
-* config file (`xlnet_config.json`) which specifies the hyperparameters of the model 
+* sentence piece model (`spiece.model`) used for (de)tokenization
+* config file (`xlnet_config.json`) which specifies the hyperparameters of the model

 To get pb-file from the archive contents, you need to do the following.

@ -33,7 +33,7 @@ To get pb-file from the archive contents, you need to do the following.
   mkdir try_save
 ```

-   
+

 2. Save and run the following Python script in `~/XLNet-Base/xlnet`:

@ -102,8 +102,8 @@ Download and unzip an archive with the [XLNet-Large, Cased](https://storage.goog
 After the archive is unzipped, the directory `cased_L-12_H-1024_A-16` is created and contains the following files:

 * TensorFlow checkpoint (`xlnet_model.ckpt`) containing the pre-trained weights (which is actually 3 files)
-* sentence piece model (`spiece.model`) used for (de)tokenization 
-* config file (`xlnet_config.json`) which specifies the hyperparameters of the model 
+* sentence piece model (`spiece.model`) used for (de)tokenization
+* config file (`xlnet_config.json`) which specifies the hyperparameters of the model

 To get pb-file from the archive contents, you need to do the following.

--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md
@ -1,7 +1,7 @@
-# Converting YOLO* Models to the Intermediate Representation (IR) {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow}
+# Convert TensorFlow YOLO Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow}

 This document explains how to convert real-time object detection YOLOv1\*, YOLOv2\*, YOLOv3\* and YOLOv4\* public models to the Intermediate Representation (IR). All YOLO\* models are originally implemented in the DarkNet\* framework and consist of two files:
-* `.cfg` file with model configurations  
+* `.cfg` file with model configurations
 * `.weights` file with model weights

 Depending on a YOLO model version, the Model Optimizer converts it differently:
@ -35,9 +35,9 @@ python keras-YOLOv3-model-set/tools/model_converter/convert.py <path_to_cfg_file

 4. Run Model Optimizer to converter the model from the TensorFlow 2 format to an IR:

-> **NOTE:** Before you run the conversion, make sure you have installed all the Model Optimizer dependencies for TensorFlow 2.
+> **NOTE**: Before you run the conversion, make sure you have installed all the Model Optimizer dependencies for TensorFlow 2.
 ```sh
-mo --saved_model_dir yolov4 --output_dir models/IRs --input_shape [1,608,608,3] --model_name yolov4 
+mo --saved_model_dir yolov4 --output_dir models/IRs --input_shape [1,608,608,3] --model_name yolov4
 ```

 ## <a name="yolov3-to-ir"></a>Convert YOLOv3 Model to IR
@ -115,7 +115,7 @@ It consists of several attributes:<br>
 where:
 - `id` and `match_kind` are parameters that you cannot change.
 - `custom_attributes` is a parameter that stores all the YOLOv3 specific attributes:
-    - `classes`, `coords`, `num`, and `masks` are attributes that you should copy from the configuration 
+    - `classes`, `coords`, `num`, and `masks` are attributes that you should copy from the configuration
    file that was used for model training. If you used DarkNet officially shared weights,
    you can use `yolov3.cfg` or `yolov3-tiny.cfg` configuration file from https://github.com/david8862/keras-YOLOv3-model-set/tree/master/cfg. Replace the default values in `custom_attributes` with the parameters that
    follow the `[yolo]` titles in the configuration file.
@ -184,8 +184,8 @@ To convert YOLOv1 or YOLOv2 model to TensorFlow, go to the root directory of the
 python3 flow --model yolov1.cfg --load yolov1.weights --savepb
 ```

- For YOLOv2 with VOC dataset `--labels` argument should be specified and additional changes in the original exporting script are required. 
-In the file [https://github.com/thtrieu/darkflow/blob/b187c65/darkflow/utils/loader.py#L121](https://github.com/thtrieu/darkflow/blob/b187c65630f9aa1bb8b809c33ec67c8cc5d60124/darkflow/utils/loader.py#L121) 
+- For YOLOv2 with VOC dataset `--labels` argument should be specified and additional changes in the original exporting script are required.
+In the file [https://github.com/thtrieu/darkflow/blob/b187c65/darkflow/utils/loader.py#L121](https://github.com/thtrieu/darkflow/blob/b187c65630f9aa1bb8b809c33ec67c8cc5d60124/darkflow/utils/loader.py#L121)
 change line 121 from `self.offset = 16` to `self.offset = 20`. Then run:
 ```sh
 python3 flow --model yolov2-voc.cfg --load yolov2-voc.weights --labels voc-labels.txt --savepb
@ -204,7 +204,7 @@ File `<model_name>.pb` is a TensorFlow representation of the YOLO model.
 #### <a name="yolov1-v2-to-ir"></a>Convert TensorFlow YOLOv1 or YOLOv2 Model to the IR

 Converted TensorFlow YOLO model is missing `Region` layer and its parameters. Original YOLO `Region` layer parameters are stored in the configuration `<path_to_model>/<model_name>.cfg`
-file under the `[region]` title.   
+file under the `[region]` title.

 To recreate the original model structure, use the corresponding yolo `.json` configuration file with custom operations and `Region` layer
 parameters when converting the model to the IR. This file is located in the `<OPENVINO_INSTALL_DIR>/tools/model_optimizer/extensions/front/tf` directory.
@ -223,7 +223,7 @@ To generate the IR of the YOLOv1 model, provide TensorFlow YOLOv1 or YOLOv2 mode
 where:

 * `--batch` defines shape of model input. In the example, `--batch` is equal to 1, but you can also specify other integers larger than 1.
-* `--scale` specifies scale factor that input values will be divided by. 
+* `--scale` specifies scale factor that input values will be divided by.
 The model was trained with input values in the range `[0,1]`. OpenVINO&trade; toolkit samples read input images as values in `[0,255]` range, so the scale 255 must be applied.
 * `--transformations_config` adds missing `Region` layers to the model. In the IR, the `Region` layer has name `RegionYolo`.
 For other applicable parameters, refer to [Convert Model from TensorFlow](../Convert_Model_From_TensorFlow.md).
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_lm_1b_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_lm_1b_From_Tensorflow.md
@ -1,4 +1,4 @@
-# Converting TensorFlow* Language Model on One Billion Word Benchmark to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_lm_1b_From_Tensorflow}
+# Convert TensorFlow Language Model on One Billion Word Benchmark {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_lm_1b_From_Tensorflow}

 ## Download the Pre-trained Language Model on One Billion Word Benchmark

@ -51,14 +51,14 @@ lm_1b/
        ckpt-char-embedding
        ckpt-lstm
        ckpt-softmax0
-        ckpt-softmax1        
-        ckpt-softmax2        
-        ckpt-softmax3        
-        ckpt-softmax4        
-        ckpt-softmax5        
-        ckpt-softmax6        
-        ckpt-softmax7        
-        ckpt-softmax8        
+        ckpt-softmax1
+        ckpt-softmax2
+        ckpt-softmax3
+        ckpt-softmax4
+        ckpt-softmax5
+        ckpt-softmax6
+        ckpt-softmax7
+        ckpt-softmax8
 ```


--- a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md
+++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md
@ -10,7 +10,7 @@

@endsphinxdirective

-<a name="model-optimizer-extensibility"></a>Model Optimizer extensibility mechanism enables support of new operations and custom transformations to generate the optimized intermediate representation (IR) as described in the 
+<a name="model-optimizer-extensibility"></a>Model Optimizer extensibility mechanism enables support of new operations and custom transformations to generate the optimized intermediate representation (IR) as described in the
 [Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™](../../IR_and_opsets.md). This
 mechanism is a core part of the Model Optimizer. The Model Optimizer itself uses it under the hood, being a huge set of examples on how to add custom logic to support your model.

@ -144,7 +144,7 @@ OpenVINO&trade; [TopK](../../../ops/sort/TopK_3.md) operation semantic, which re

 It is important to mention that sometimes it seems like transformation cannot be implemented during the front phase
 because the actual values of inputs or shapes are needed. But in fact shapes or values manipulations can be implemented
-using operations that are added to the graph. Consider the 
+using operations that are added to the graph. Consider the
 `extensions/front/onnx/flattenONNX_to_reshape.py` transformation, which replaces an ONNX\* operation
 [Flatten](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Flatten) with a sub-graph of operations performing
 the following (for the case when `axis` is not equal to 0 and 1):
@ -260,7 +260,7 @@ More information on how to develop middle transformations and dedicated API desc

 There are several middle transformations responsible for changing model layout from NHWC to NCHW. These transformations are triggered by default for TensorFlow models as TensorFlow supports Convolution operations in the NHWC layout.

-This layout change is disabled automatically if the model does not have operations that OpenVINO&trade needs to execute in the NCHW layout, for example, Convolutions in NHWC layout. 
+This layout change is disabled automatically if the model does not have operations that OpenVINO&trade needs to execute in the NCHW layout, for example, Convolutions in NHWC layout.

 It is still possible to force Model Optimizer to do layout change, using `--disable_nhwc_to_nchw` command-line parameter, although it is not advised.

@ -287,7 +287,7 @@ The back phase starts after the layout change to NCHW. This phase contains mostl

 1. Transformations that should work with a graph in the NCHW layout and thus cannot be implemented in the middle
 phase.
-2. Transformations that replace nodes corresponding to internal Model Optimizer operations with nodes corresponding to the 
+2. Transformations that replace nodes corresponding to internal Model Optimizer operations with nodes corresponding to the
 [opset](@ref openvino_docs_ops_opset) operations.
 3. Transformations that normalize operations inputs according to the specification.
 4. Final optimization transformations.
@ -703,7 +703,7 @@ to enable or disable execution of the transformation during a model conversion.
 2. Attribute `id` specifies a unique transformation string identifier. This transformation identifier can be used to
 enable (disable) the transformation by setting environment variable `MO_ENABLED_TRANSFORMS` (`MO_DISABLED_TRANSFORMS`)
 with a comma separated list of `id`s. The environment variables override the value of the `enabled` attribute of the
-transformation. Instead of using `id` attribute value you can add fully defined class name to `MO_ENABLED_TRANSFORMS` 
+transformation. Instead of using `id` attribute value you can add fully defined class name to `MO_ENABLED_TRANSFORMS`
 (`MO_DISABLED_TRANSFORMS`) variable, `extensions.back.NonmalizeToNormalizeL2.NormalizeToNormalizeL2` for example. Optional attribute.
 3. Attribute `run_not_recursively` specifies whether the transformation should be executed in the sub-graphs, for
 example, body of the [TensorIterator](../../../ops/infrastructure/TensorIterator_1.md) and
--- a/docs/MO_DG/prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md
+++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md
@ -1,4 +0,0 @@
-# [DEPRECATED] Sub-Graph Replacement in the Model Optimizer  {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Subgraph_Replacement_Model_Optimizer}
-
-The document has been deprecated. Refer to the [Model Optimizer Extensibility](Customize_Model_Optimizer.md)
-for the up-to-date documentation.
--- a/docs/OV_Runtime_UG/Model_caching_overview.md
+++ b/docs/OV_Runtime_UG/Model_caching_overview.md
@ -1,59 +1,95 @@
 # Model Caching Overview {#openvino_docs_IE_DG_Model_caching_overview}

-## Introduction (C++)
+## Introduction

-@sphinxdirective
-.. raw:: html
+As described in the [Integrate OpenVINO™ with Your Application](integrate_with_your_application.md), a common application flow consists of the following steps:

-    <div id="switcher-cpp" class="switcher-anchor">C++</div>
-@endsphinxdirective
+1. **Create a Core object**: First step to manage available devices and read model objects

-As described in the [OpenVINO™ Runtime User Guide](openvino_intro.md), a common application flow consists of the following steps:
-
-1. **Create a Core object**: First step to manage available devices and read network objects
-
-2. **Read the Intermediate Representation**: Read an Intermediate Representation file into an object of the `InferenceEngine::CNNNetwork`
+2. **Read the Intermediate Representation**: Read an Intermediate Representation file into an object of the `ov::Model`

 3. **Prepare inputs and outputs**: If needed, manipulate precision, memory layout, size or color format

 4. **Set configuration**: Pass device-specific loading configurations to the device

-5. **Compile and Load Network to device**: Use the `InferenceEngine::Core::LoadNetwork()` method with a specific device
+5. **Compile and Load Network to device**: Use the `ov::Core::compile_model()` method with a specific device

-6. **Set input data**: Specify input blob
+6. **Set input data**: Specify input tensor

 7. **Execute**: Carry out inference and process results

 Step 5 can potentially perform several time-consuming device-specific optimizations and network compilations,
 and such delays can lead to a bad user experience on application startup. To avoid this, some devices offer
 import/export network capability, and it is possible to either use the [Compile tool](../../tools/compile_tool/README.md)
-or enable model caching to export compiled network automatically. Reusing cached networks can significantly reduce load network time.
+or enable model caching to export compiled model automatically. Reusing cached model can significantly reduce compile model time.

-### Set "CACHE_DIR" config option to enable model caching
+### Set "cache_dir" config option to enable model caching

 To enable model caching, the application must specify a folder to store cached blobs, which is done like this:

-@snippet snippets/InferenceEngine_Caching0.cpp part0
+@sphinxdirective

-With this code, if the device specified by `LoadNetwork` supports import/export network capability, a cached blob is automatically created inside the `myCacheFolder` folder.
-CACHE_DIR config is set to the Core object. If the device does not support import/export capability, cache is not created and no error is thrown.
+.. tab:: C++

-Depending on your device, total time for loading network on application startup can be significantly reduced.
-Also note that the very first LoadNetwork (when cache is not yet created) takes slightly longer time to "export" the compiled blob into a cache file:
+      .. doxygensnippet:: docs/snippets/ov_caching.cpp
+         :language: cpp
+         :fragment: [ov:caching:part0]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_caching.py
+         :language: python
+         :fragment: [ov:caching:part0]
+
+@endsphinxdirective
+
+With this code, if the device specified by `device_name` supports import/export model capability, a cached blob is automatically created inside the `/path/to/cache/dir` folder.
+If the device does not support import/export capability, cache is not created and no error is thrown.
+
+Depending on your device, total time for compiling model on application startup can be significantly reduced.
+Also note that the very first `compile_model` (when cache is not yet created) takes slightly longer time to "export" the compiled blob into a cache file:

 ![caching_enabled]

-### Even faster: use LoadNetwork(modelPath)
+### Even faster: use compile_model(modelPath)

-In some cases, applications do not need to customize inputs and outputs every time. Such an application always
-call `cnnNet = ie.ReadNetwork(...)`, then `ie.LoadNetwork(cnnNet, ..)` and it can be further optimized.
-For these cases, the 2021.4 release introduces a more convenient API to load the network in a single call, skipping the export step:
+In some cases, applications do not need to customize inputs and outputs every time. Such application always
+call `model = core.read_model(...)`, then `core.compile_model(model, ..)` and it can be further optimized.
+For these cases, there is a more convenient API to compile the model in a single call, skipping the read step:

-@snippet snippets/InferenceEngine_Caching1.cpp part1
+@sphinxdirective

-With model caching enabled, total load time is even smaller, if ReadNetwork is optimized as well.
+.. tab:: C++

-@snippet snippets/InferenceEngine_Caching2.cpp part2
+      .. doxygensnippet:: docs/snippets/ov_caching.cpp
+         :language: cpp
+         :fragment: [ov:caching:part1]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_caching.py
+         :language: python
+         :fragment: [ov:caching:part1]
+
+@endsphinxdirective
+
+With model caching enabled, total load time is even smaller, if `read_model` is optimized as well.
+
+@sphinxdirective
+
+.. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/ov_caching.cpp
+         :language: cpp
+         :fragment: [ov:caching:part2]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_caching.py
+         :language: python
+         :fragment: [ov:caching:part2]
+
+@endsphinxdirective

 ![caching_times]

@ -62,74 +98,23 @@ With model caching enabled, total load time is even smaller, if ReadNetwork is o
 Not every device supports network import/export capability. For those that don't, enabling caching has no effect.
 To check in advance if a particular device supports model caching, your application can use the following code:

-@snippet snippets/InferenceEngine_Caching3.cpp part3
-
-## Introduction (Python)
-
@sphinxdirective
-.. raw:: html

-    <div id="switcher-python" class="switcher-anchor">Python</div>
+.. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/ov_caching.cpp
+         :language: cpp
+         :fragment: [ov:caching:part3]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_caching.py
+         :language: python
+         :fragment: [ov:caching:part3]
+
@endsphinxdirective

-As described in OpenVINO User Guide, a common application flow consists of the following steps:
-
-1. **Create a Core Object**
-2. **Read the Intermediate Representation** - Read an Intermediate Representation file into an object of the [ie_api.IENetwork](api/ie_python_api/_autosummary/openvino.inference_engine.IENetwork.html)
-3. **Prepare inputs and outputs**
-4. **Set configuration** - Pass device-specific loading configurations to the device
-5. **Compile and Load Network to device** - Use the `IECore.load_network()` method and specify the target device
-6. **Set input data**
-7. **Execute the model** - Run inference
-
-Step #5 can potentially perform several time-consuming device-specific optimizations and network compilations, and such delays can lead to bad user experience on application startup. To avoid this, some devices offer Import/Export network capability, and it is possible to either use the [Compile tool](../../tools/compile_tool/README.md) or enable model caching to export the compiled network automatically. Reusing cached networks can significantly reduce load network time.
-
-### Set the “CACHE_DIR” config option to enable model caching
-
-To enable model caching, the application must specify the folder where to store cached blobs. It can be done using [IECore.set_config](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.set_config).
-
-``` python
-from openvino.inference_engine import IECore
-
-ie = IECore()
-ie.set_config(config={"CACHE_DIR": path_to_cache}, device_name=device)
-net = ie.read_network(model=path_to_xml_file)
-exec_net = ie.load_network(network=net, device_name=device)
-```
-
-With this code, if a device supports the Import/Export network capability, a cached blob is automatically created inside the path_to_cache directory `CACHE_DIR` config is set to the Core object. If device does not support Import/Export capability, cache is just not created and no error is thrown
-
-Depending on your device, total time for loading network on application startup can be significantly reduced. Please also note that very first [IECore.load_network](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.load_network) (when the cache is not yet created) takes slightly longer time to ‘export’ the compiled blob into a cache file.
-
-![caching_enabled]
-
-
-### Even Faster: Use IECore.load_network(path_to_xml_file)
-
-In some cases, applications do not need to customize inputs and outputs every time. These applications always call [IECore.read_network](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.read_network), then `IECore.load_network(model=path_to_xml_file)` and may be further optimized. For such cases, it's more convenient to load the network in a single call to `ie.load_network()`
-A model can be loaded directly to the device, with model caching enabled:
-
-``` python
-from openvino.inference_engine import IECore
-
-ie = IECore()
-ie.set_config(config={"CACHE_DIR" : path_to_cache}, device_name=device)
-ie.load_network(network=path_to_xml_file, device_name=device)
-```
-
-![caching_times]
-
-### Advanced Examples
-
-Not every device supports network import/export capability, enabling of caching for such devices does not have any effect. To check in advance if a particular device supports model caching, your application can use the following code:
-
-```python
-all_metrics = ie.get_metric(device_name=device, metric_name="SUPPORTED_METRICS")
-# Find the 'IMPORT_EXPORT_SUPPORT' metric in supported metrics
-allows_caching = "IMPORT_EXPORT_SUPPORT" in all_metrics
-```
-
-> **NOTE**: The GPU plugin does not have the IMPORT_EXPORT_SUPPORT capability, and does not support model caching yet. However, the GPU plugin supports caching kernels (see the [GPU plugin documentation](supported_plugins/GPU.md)). Kernel caching for the GPU plugin can be accessed the same way as model caching: by setting the `CACHE_DIR` configuration key to a folder where the cache should be stored.
+> **NOTE**: The GPU plugin does not have the EXPORT_IMPORT capability, and does not support model caching yet. However, the GPU plugin supports caching kernels (see the [GPU plugin documentation](supported_plugins/GPU.md)). Kernel caching for the GPU plugin can be accessed the same way as model caching: by setting the `CACHE_DIR` configuration key to a folder where the cache should be stored.


 [caching_enabled]: ../img/caching_enabled.png
--- a/docs/OV_Runtime_UG/Operations_specifications.md
+++ b/docs/OV_Runtime_UG/Operations_specifications.md
@ -177,6 +177,7 @@
   openvino_docs_ops_activation_SoftMax_1
   openvino_docs_ops_activation_SoftMax_8
   openvino_docs_ops_activation_SoftPlus_4
+   openvino_docs_ops_activation_SoftSign_9
   openvino_docs_ops_movement_SpaceToBatch_2
   openvino_docs_ops_movement_SpaceToDepth_1
   openvino_docs_ops_movement_Split_1
--- a/docs/OV_Runtime_UG/Python_API_exclusives.md
+++ b/docs/OV_Runtime_UG/Python_API_exclusives.md
@ -0,0 +1,143 @@
+# OpenVINO™ Python API exclusives {#openvino_docs_OV_Runtime_UG_Python_API_exclusives}
+
+OpenVINO™ Runtime Python API is exposing additional features and helpers to elevate user experience. Main goal of Python API is to provide user-friendly and simple, still powerful, tool for Python users.
+
+## Easier model compilation 
+
+`CompiledModel` can be easily created with the helper method. It hides `Core` creation and applies `AUTO` device by default.
+
+@sphinxdirective
+
+.. doxygensnippet:: docs/snippets/ov_python_exclusives.py
+    :language: python
+    :fragment: [auto_compilation]
+
+@endsphinxdirective
+
+## Model/CompiledModel inputs and outputs
+
+Besides functions aligned to C++ API, some of them have their Pythonic counterparts or extensions. For example, `Model` and `CompiledModel` inputs/outputs can be accessed via properties.
+
+@sphinxdirective
+
+.. doxygensnippet:: docs/snippets/ov_python_exclusives.py
+    :language: python
+    :fragment: [properties_example]
+
+@endsphinxdirective
+
+Refer to Python API documentation on which helper functions or properties are available for different classes.
+
+## Working with Tensor
+
+Python API allows passing data as tensors. `Tensor` object holds a copy of the data from the given array. `dtype` of numpy arrays is converted to OpenVINO™ types automatically.
+
+@sphinxdirective
+
+.. doxygensnippet:: docs/snippets/ov_python_exclusives.py
+    :language: python
+    :fragment: [tensor_basics]
+
+@endsphinxdirective
+
+### Shared memory mode
+
+`Tensor` objects can share the memory with numpy arrays. By specifing `shared_memory` argument, a `Tensor` object does not perform copy of data and has access to the memory of the numpy array.
+
+@sphinxdirective
+
+.. doxygensnippet:: docs/snippets/ov_python_exclusives.py
+    :language: python
+    :fragment: [tensor_shared_mode]
+
+@endsphinxdirective
+
+### Slices of array's memory
+
+One of the `Tensor` class constructors allows to share the slice of array's memory. When `shape` is specified in the constructor that has the numpy array as first argument, it triggers the special shared memory mode.
+
+@sphinxdirective
+
+.. doxygensnippet:: docs/snippets/ov_python_exclusives.py
+    :language: python
+    :fragment: [tensor_slice_mode]
+
+@endsphinxdirective
+
+## Running inference
+
+Python API supports extra calling methods to synchronous and asynchronous modes for inference.
+
+All infer methods allow users to pass data as popular numpy arrays, gathered in either Python dicts or lists.
+
+@sphinxdirective
+
+.. doxygensnippet:: docs/snippets/ov_python_exclusives.py
+    :language: python
+    :fragment: [passing_numpy_array]
+
+@endsphinxdirective
+
+Results from inference can be obtained in various ways:
+
+@sphinxdirective
+
+.. doxygensnippet:: docs/snippets/ov_python_exclusives.py
+    :language: python
+    :fragment: [getting_results]
+
+@endsphinxdirective
+
+### Synchronous mode - extended
+
+Python API provides different synchronous calls to infer model, which block the application execution. Additionally these calls return results of inference:
+
+@sphinxdirective
+
+.. doxygensnippet:: docs/snippets/ov_python_exclusives.py
+    :language: python
+    :fragment: [sync_infer]
+
+@endsphinxdirective
+
+### AsyncInferQueue
+
+Asynchronous mode pipelines can be supported with wrapper class called `AsyncInferQueue`. This class automatically spawns pool of `InferRequest` objects (also called "jobs") and provides synchronization mechanisms to control flow of the pipeline.
+
+Each job is distinguishable by unique `id`, which is in the range from 0 up to number of jobs specified in `AsyncInferQueue` constructor.
+
+Function call `start_async` is not required to be synchronized, it waits for any available job if queue is busy/overloaded. Every `AsyncInferQueue` code block should end with `wait_all` function. It provides "global" synchronization of all jobs in the pool and ensure that access to them is safe.
+
+@sphinxdirective
+
+.. doxygensnippet:: docs/snippets/ov_python_exclusives.py
+    :language: python
+    :fragment: [asyncinferqueue]
+
+@endsphinxdirective
+
+#### Acquire results from requests
+
+After the call to `wait_all`, jobs and their data can be safely accessed. Acquring of a specific job with `[id]` returns `InferRequest` object, which results in seamless retrieval of the output data.
+
+@sphinxdirective
+
+.. doxygensnippet:: docs/snippets/ov_python_exclusives.py
+    :language: python
+    :fragment: [asyncinferqueue_access]
+
+@endsphinxdirective
+
+#### Setting callbacks
+
+Another feature of `AsyncInferQueue` is ability of setting callbacks. When callback is set, any job that ends inference, calls upon Python function. Callback function must have two arguments. First is the request that calls the callback, it provides `InferRequest` API. Second one being called "userdata", provides possibility of passing runtime values, which can be of any Python type and later used inside callback function.
+
+The callback of `AsyncInferQueue` is uniform for every job. When executed, GIL is acquired to ensure safety of data manipulation inside the function.
+
+@sphinxdirective
+
+.. doxygensnippet:: docs/snippets/ov_python_exclusives.py
+    :language: python
+    :fragment: [asyncinferqueue_set_callback]
+
+@endsphinxdirective
--- a/docs/OV_Runtime_UG/auto_device_selection.md
+++ b/docs/OV_Runtime_UG/auto_device_selection.md
@ -1,332 +1,410 @@
 # Automatic device selection {#openvino_docs_IE_DG_supported_plugins_AUTO}

-## Auto-Device Plugin Execution (C++)
-
@sphinxdirective
-.. raw:: html

-    <div id="switcher-cpp" class="switcher-anchor">C++</div>
-@endsphinxdirective
+.. toctree::
+   :maxdepth: 1
+   :hidden:

-The AUTO device is a new, special "virtual" or "proxy" device in the OpenVINO™ toolkit.
-
-Use "AUTO" as the device name to delegate selection of an actual accelerator to OpenVINO. The Auto-device plugin internally recognizes and selects devices from among CPU, integrated GPU and discrete Intel GPUs (when available) depending on the device capabilities and the characteristics of CNN models (for example, precision). Then the Auto-device assigns inference requests to the selected device.
-
-From the application's point of view, this is just another device that handles all accelerators in the full system.
-
-With the 2021.4 release, Auto-device setup is done in three major steps:
-1. Configure each device as usual (for example, via the conventional `SetConfig()` method)
-2. Load a network to the Auto-device plugin. This is the only change needed in your application.
-3. As with any other executable network resulting from `LoadNetwork()`, create as many requests as needed to saturate the devices. 
-
-These steps are covered below in detail.
-
-### Defining and Configuring the Auto-Device Plugin
-Following the OpenVINO convention for devices names, the Auto-device uses the label "AUTO". The only configuration option for Auto-device is a limited device list:
-
-| Parameter name     | Parameter values      | Default            |             Description                                                      |
-| :---               | :---                  | :---               |:-----------------------------------------------------------------------------|
-| "MULTI_DEVICE_PRIORITIES" | comma-separated device names <span style="color:red">with no spaces</span>| N/A | Device candidate list to be selected    |
-
-You can use the configuration name directly as a string or use `InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES` from `multi-device/multi_device_config.hpp`, which defines the same string.
-
-There are two ways to use Auto-device:
-1. Directly indicate device by "AUTO" or an empty string:
-@snippet snippets/AUTO0.cpp part0
-
-2. Use the Auto-device configuration:
-@snippet snippets/AUTO1.cpp part1
-
-Both methods allow limiting the list of device candidates for the AUTO plugin.
-
-> **NOTE**: The OpenVINO Runtime lets you use "GPU" as an alias for "GPU.0" in function calls. 
-
-The Auto-device plugin supports query device optimization capabilities in metric.
-
-| Parameter name                 | Parameter values         |
-| :---                           | :---                     |
-| "OPTIMIZATION_CAPABILITIES"    | Auto-Device capabilities |
-
-### Enumerating Devices and Selection Logic
-
-The OpenVINO Runtime API now features a dedicated methods to enumerate devices and their capabilities. 
-See [Hello Query Device C++ Sample](../../samples/cpp/hello_query_device/README.md).
-This is the example output from the sample (truncated to device names only):
-
-```sh
-./hello_query_device
-Available devices: 
-    Device: CPU
-...
-    Device: GPU.0
-...
-    Device: GPU.1
-```
-
-### Default Auto-Device Selection Logic
-
-With the 2021.4 release, the Auto-Device selects the most suitable device using the following default logic:
-
-1. Check if dGPU (discrete), iGPU (integrated) and CPU devices are available
-2. Get the precision of the input model, such as FP32
-3. According to the priority of dGPU, iGPU, and CPU (in this order), if the device supports the precision of the input network, select it as the most suitable device
-
-For example, CPU, dGPU and iGPU can support the following precision and optimization capabilities:
-
-| Device   | OPTIMIZATION_CAPABILITIES       |
-| :---     | :---                            |
-| CPU      | WINOGRAD FP32 FP16 INT8 BIN     |
-| dGPU     | FP32 BIN BATCHED_BLOB FP16 INT8 |
-| iGPU     | FP32 BIN BATCHED_BLOB FP16 INT8 |
-
-* When the application uses the Auto-device to run FP16 IR on a system with CPU, dGPU and iGPU, Auto-device will offload this workload to dGPU.
-* When the application uses the Auto-device to run FP16 IR on a system with CPU and iGPU, Auto-device will offload this workload to iGPU.
-* When the application uses the Auto-device to run WINOGRAD-enabled IR on a system with CPU, dGPU and iGPU, Auto-device will offload this workload to CPU.
-
-In cases when loading the network to dGPU or iGPU fails, CPU is the fall-back choice.
-
-According to the Auto-device selection logic from the previous section, tell the OpenVINO Runtime 
-to use the most suitable device from available devices as follows:
-
-@snippet snippets/AUTO2.cpp part2
-
-You can also use the Auto-device plugin to choose a device from a limited choice of devices, in this example CPU and GPU:
-
-@snippet snippets/AUTO3.cpp part3
-
-### Configuring the Individual Devices and Creating the Auto-Device on Top
-
-It is possible to configure each individual device as usual and create the "AUTO" device on top:
-
-@snippet snippets/AUTO4.cpp part4
-
-Alternatively, you can combine all the individual device settings into single config file and load it, allowing the Auto-device plugin to parse and apply it to the right devices. See the code example here:
-
-@snippet snippets/AUTO5.cpp part5
-
-### Using the Auto-Device with OpenVINO Samples and Benchmark App
-
-Note that every OpenVINO sample or application that supports the "-d" (which stands for "device") command-line option transparently accepts the Auto-device. The Benchmark Application is the best example of the optimal usage of the Auto-device. You do not need to set the number of requests and CPU threads, as the application provides optimal out-of-the-box performance. Below is the example command-line to evaluate AUTO performance with that:
-
-@sphinxdirective
-.. tab:: Package, Docker, open-source installation
-
-   .. code-block:: sh
-
-      ./benchmark_app.py –d AUTO –m <model>
-
-.. tab:: pip installation
-
-    .. code-block:: sh
-
-      benchmark_app –d AUTO –m <model>
+   Debugging Auto-Device Plugin <openvino_docs_IE_DG_supported_plugins_AUTO_debugging>

@endsphinxdirective

+The Auto-Device plugin, or AUTO, is a virtual device which automatically selects the processing unit to use for inference with OpenVINO™. It chooses from a list of available devices defined by the user and aims at finding the most suitable hardware for the given model. The best device is chosen using the following logic: 

-You can also use the auto-device with limit device choice:
+1. Check which supported devices are available. 
+2. Check the precision of the input model (for detailed information on precisions read more on the [OPTIMIZATION_CAPABILITIES metric](../IE_PLUGIN_DG/Plugin.md)) 
+3. From the priority list, select the first device capable of supporting the given precision. 
+4. If the network’s precision is FP32 but there is no device capable of supporting it, offload the network to a device supporting FP16. 

@sphinxdirective
-.. tab:: Package, Docker, open-source installation
+----------+-------------------------------------------------+-------------------------------------+
+| Choice   | | Supported                                     | | Supported                         |
+| Priority | | Device                                        | | model precision                   |
+==========+=================================================+=====================================+
+| 1        | | dGPU                                          | FP32, FP16, INT8, BIN               |
+|          | | (e.g. Intel® Iris® Xe MAX)                    |                                     |
+----------+-------------------------------------------------+-------------------------------------+
+| 2        | | VPUX                                          | INT8                                |
+|          | | (e.g. Intel® Movidius® VPU 3700VE)            |                                     |
+----------+-------------------------------------------------+-------------------------------------+
+| 3        | | iGPU                                          | FP32, FP16, BIN,                    |
+|          | | (e.g. Intel® UHD Graphics 620 (iGPU))         |                                     |
+----------+-------------------------------------------------+-------------------------------------+
+| 4        | | Intel® Neural Compute Stick 2 (Intel® NCS2)   | FP16                                |
+|          |                                                 |                                     |
+----------+-------------------------------------------------+-------------------------------------+
+| 5        | | Intel® CPU                                    | FP32, FP16, INT8, BIN               |
+|          | | (e.g. Intel® Core™ i7-1165G7)                 |                                     |
+----------+-------------------------------------------------+-------------------------------------+
+@endsphinxdirective

-   .. code-block:: sh
+To put it simply, when loading the network to the first device on the list fails, AUTO will try to load it to the next device in line, until one of them succeeds. For example: 
+If you have dGPU in your system, it will be selected for most jobs (first on the priority list and supports multiple precisions). But if you want to run a WINOGRAD-enabled IR, your CPU will be selected (WINOGRAD optimization is not supported by dGPU). If you have Myriad and IA CPU in your system, Myriad will be selected for FP16 models, but IA CPU will be chosen for FP32 ones.  

-      ./benchmark_app.py –d AUTO:CPU,GPU –m <model>
+What is important, **AUTO always starts inference with the CPU**. CPU provides very low latency and can start inference with no additional delays. While it performs inference, the Auto-Device plugin continues to load the model to the device best suited for the purpose and transfers the task to it when ready. This way, the devices which are much slower in loading the network, GPU being the best example, do not impede inference at its initial stages. 

-.. tab:: pip installation
+This mechanism can be easily observed in our Benchmark Application sample ([see here](#Benchmark App Info)), showing how the first-inference latency (the time it takes to load the network and perform the first inference) is reduced when using AUTO. For example: 

-    .. code-block:: sh
+@sphinxdirective
+.. code-block:: sh

-      benchmark_app –d AUTO:CPU,GPU –m <model>
+   ./benchmark_app -m ../public/alexnet/FP32/alexnet.xml -d GPU -niter 128
+@endsphinxdirective 
+
+first-inference latency: **2594.29 ms + 9.21 ms** 
+
+@sphinxdirective
+.. code-block:: sh
+
+   ./benchmark_app -m ../public/alexnet/FP32/alexnet.xml -d AUTO:CPU,GPU -niter 128
+@endsphinxdirective 
+
+first-inference latency: **173.13 ms + 13.20 ms**
+
+@sphinxdirective
+.. note::
+   The realtime performance will be closer to the best suited device the longer the process runs.
+@endsphinxdirective
+
+## Using the Auto-Device Plugin 
+
+Inference with AUTO is configured similarly to other plugins: first you configure devices, then load a network to the plugin, and finally, execute inference. 
+
+Following the OpenVINO™ naming convention, the Auto-Device plugin is assigned the label of “AUTO.” It may be defined with no additional parameters, resulting in defaults being used, or configured further with the following setup options: 
+
+@sphinxdirective
+-------------------------+-----------------------------------------------+-----------------------------------------------------------+
+| Property                | Property values                               | Description                                               |
+=========================+===============================================+===========================================================+
+| <device candidate list> | | AUTO: <device names>                        | | Lists the devices available for selection.              |
+|                         | | comma-separated, no spaces                  | | The device sequence will be taken as priority           |
+|                         | |                                             | | from high to low.                                       |
+|                         | |                                             | | If not specified, “AUTO” will be used as default        |
+|                         | |                                             | | and all devices will be included.                       |
+-------------------------+-----------------------------------------------+-----------------------------------------------------------+
+| ov::device:priorities   | | device names                                | | Specifies the devices for Auto-Device plugin to select. |
+|                         | | comma-separated, no spaces                  | | The device sequence will be taken as priority           |
+|                         | |                                             | | from high to low.                                       |
+|                         | |                                             | | This configuration is optional.                         |
+-------------------------+-----------------------------------------------+-----------------------------------------------------------+
+| ov::hint                | | THROUGHPUT                                  | | Specifies the performance mode preferred                |
+|                         | | LATENCY                                     | | by the application.                                     |
+-------------------------+-----------------------------------------------+-----------------------------------------------------------+
+| ov::hint:model_priority | | MODEL_PRIORITY_HIGH                         | | Indicates the priority for a network.                   |
+|                         | | MODEL_PRIORITY_MED                          | | Importantly!                                            |
+|                         | | MODEL_PRIORITY_LOW                          | | This property is still not fully supported              |
+-------------------------+-----------------------------------------------+-----------------------------------------------------------+
+@endsphinxdirective
+
+@sphinxdirective
+.. dropdown:: Click for information on Legacy APIs 
+
+   For legacy APIs like LoadNetwork/SetConfig/GetConfig/GetMetric:
+   
+   - replace {ov::device:priorities, "GPU,CPU"} with {"MULTI_DEVICE_PRIORITIES", "GPU,CPU"}
+   - replace {ov::hint:model_priority, "LOW"} with {"MODEL_PRIORITY", "LOW"}
+   - InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES is defined as same string "MULTI_DEVICE_PRIORITIES"
+   - CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU is equal to "GPU,CPU"
+   - InferenceEngine::PluginConfigParams::KEY_MODEL_PRIORITY is defined as same string "MODEL_PRIORITY"
+   - InferenceEngine::PluginConfigParams::MODEL_PRIORITY_LOW is defined as same string "LOW"
+@endsphinxdirective
+
+### Device candidate list
+The device candidate list allows users to customize the priority and limit the choice of devices available to the AUTO plugin. If not specified, the plugin assumes all the devices present in the system can be used. Note, that OpenVINO™ Runtime lets you use “GPU” as an alias for “GPU.0” in function calls. 
+The following commands are accepted by the API: 
+
+@sphinxdirective
+.. tab:: C++ API
+
+   .. code-block:: cpp
+
+      /*** With Inference Engine 2.0 API ***/
+      ov::Core core; 
+
+      // Read a network in IR, PaddlePaddle, or ONNX format:
+      std::shared_ptr<ov::Model> model = core.read_model("sample.xml");    
+
+      // Load a network to AUTO using the default list of device candidates.
+      // The following lines are equivalent:
+      ov::CompiledModel model0 = core.compile_model(model);
+      ov::CompiledModel model1 = core.compile_model(model, "AUTO");
+      ov::CompiledModel model2 = core.compile_model(model, "AUTO", {});      
+
+      // You can also specify the devices to be used by AUTO in its selection process.
+      // The following lines are equivalent:
+      ov::CompiledModel model3 = core.compile_model(model, "AUTO:GPU,CPU");
+	   ov::CompiledModel model4 = core.compile_model(model, "AUTO", {{ov::device::priorities.name(), "GPU,CPU"}});
+
+      // the AUTO plugin is pre-configured (globally) with the explicit option:
+      core.set_property("AUTO", ov::device::priorities("GPU,CPU"));       
+
+.. tab:: C++ legacy API
+
+   .. code-block:: cpp
+
+      /*** With API Prior to 2022.1 Release ***/
+      InferenceEngine::Core ie;      
+
+      // Read a network in IR, PaddlePaddle, or ONNX format:
+      InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");  
+
+      // Load a network to AUTO using the default list of device candidates.
+      // The following lines are equivalent:
+      InferenceEngine::ExecutableNetwork exec0 = ie.LoadNetwork(network);
+      InferenceEngine::ExecutableNetwork exec1 = ie.LoadNetwork(network, "AUTO");
+      InferenceEngine::ExecutableNetwork exec2 = ie.LoadNetwork(network, "AUTO", {});      
+      
+      // You can also specify the devices to be used by AUTO in its selection process.
+      // The following lines are equivalent:
+      InferenceEngine::ExecutableNetwork exec3 = ie.LoadNetwork(network, "AUTO:GPU,CPU");
+	   InferenceEngine::ExecutableNetwork exec4 = ie.LoadNetwork(network, "AUTO", {{"MULTI_DEVICE_PRIORITIES", "GPU,CPU"}});      
+      
+      // the AUTO plugin is pre-configured (globally) with the explicit option:
+      ie.SetConfig({{"MULTI_DEVICE_PRIORITIES", "GPU,CPU"}}, "AUTO");
+
+.. tab:: Python API
+
+   .. code-block:: python
+
+      ### New IE 2.0 API ###
+	  
+      from openvino.runtime import Core
+      core = Core()
+      
+      # Read a network in IR, PaddlePaddle, or ONNX format:
+      model = core.read_model(model_path)
+      
+      # Load a network to AUTO using the default list of device candidates.
+      # The following lines are equivalent:
+      model = core.compile_model(model=model) 
+      compiled_model = core.compile_model(model=model, device_name="AUTO")
+      compiled_model = core.compile_model(model=model, device_name="AUTO", config={})
+      
+      # You can also specify the devices to be used by AUTO in its selection process.
+      # The following lines are equivalent:
+      compiled_model = core.compile_model(model=model, device_name="AUTO:CPU,GPU")
+      compiled_model = core.compile_model(model=model, device_name="AUTO", config={"MULTI_DEVICE_PRIORITIES": "CPU,GPU"})
+      
+      # the AUTO plugin is pre-configured (globally) with the explicit option:
+      core.set_config(config={"MULTI_DEVICE_PRIORITIES":"CPU,GPU"}, device_name="AUTO")
+    
+.. tab:: Python legacy API
+
+   .. code-block:: python
+
+      ### API before 2022.1 ###
+      from openvino.inference_engine import IECore
+      ie = IECore()
+      
+      # Read a network in IR, PaddlePaddle, or ONNX format:
+      net = ie.read_network(model=path_to_model)
+      
+      # Load a network to AUTO using the default list of device candidates.
+      # The following lines are equivalent:
+      exec_net = ie.load_network(network=net)
+      exec_net = ie.load_network(network=net, device_name="AUTO")
+      exec_net = ie.load_network(network=net, device_name="AUTO", config={})
+      
+      # You can also specify the devices to be used by AUTO in its selection process.
+      # The following lines are equivalent:
+      exec_net = ie.load_network(network=net, device_name="AUTO:CPU,GPU")
+      exec_net = ie.load_network(network=net, device_name="AUTO", config={"MULTI_DEVICE_PRIORITIES": "CPU,GPU"})
+      
+      # the AUTO plugin is pre-configured (globally) with the explicit option:
+      ie.SetConfig(config={"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}, device_name="AUTO");

@endsphinxdirective

-**NOTES:**
-* The default CPU stream is 1 if using `-d AUTO`. 
-* You can use the FP16 IR to work with Auto-device.
-* No demos are fully optimized for Auto-device yet to select the most suitable device, 
-use GPU streams/throttling, and so on.
-
-## Auto-Device Plugin Execution (Python)
+To check what devices are present in the system, you can use Device API:

+For C++ API
@sphinxdirective
-.. raw:: html
+.. code-block:: sh

-    <div id="switcher-python" class="switcher-anchor">Python</div>
+   ov::runtime::Core::get_available_devices() (see Hello Query Device C++ Sample)
@endsphinxdirective

-The AUTO device is a new, special "virtual" or "proxy" device in the OpenVINO™ toolkit.
-
-Use "AUTO" as the device name to delegate selection of an actual accelerator to OpenVINO. The Auto-device plugin internally recognizes and selects devices from among CPU, integrated GPU and discrete Intel GPUs (when available) depending on the device capabilities and the characteristics of CNN models (for example, precision). Then the Auto-device assigns inference requests to the selected device.
-
-From the application's point of view, this is just another device that handles all accelerators in the full system.
-
-With the 2021.4 release, Auto-device setup is done in three major steps:
-
-1. Configure each device as usual (for example, via the conventional [IECore.set_config](https://docs.openvino.ai/latest/ie_python_api/classie__api_1_1IECore.html#a2c738cee90fca27146e629825c039a05) method).
-2. Load a network to the Auto-device plugin. This is the only change needed in your application.
-3. As with any other executable network resulting from [IECore.load_network](https://docs.openvino.ai/latest/ie_python_api/classie__api_1_1IECore.html#ac9a2e043d14ccfa9c6bbf626cfd69fcc), create as many requests as needed to saturate the devices. 
-
-These steps are covered below in detail.
-
-### Defining and Configuring the Auto-Device Plugin
-Following the OpenVINO convention for devices names, the Auto-device uses the label "AUTO". The only configuration option for Auto-device is a limited device list:
-
-| Parameter name | Parameter values | Default | Description |
-| -------------- | ---------------- | ------- | ----------- |
-| "AUTO_DEVICE_LIST" | comma-separated device names with no spaces | N/A | Device candidate list to be selected
-
-There are two ways to use the Auto-device plugin:
-
-1. Directly indicate device by "AUTO" or an empty string.
-2. Use the Auto-device configuration
-
-Both methods allow limiting the list of device candidates for the AUTO plugin.
-
-```python
-from openvino.inference_engine import IECore
-
-ie = IECore()
-# Read a network in IR or ONNX format
-net = ie.read_network(model=path_to_model)
-
-# Load a network on the "AUTO" device
-exec_net = ie.load_network(network=net, device_name="AUTO")
-
-# Optionally specify the list of device candidates for the AUTO plugin
-# The following two lines are equivalent
-exec_net = ie.load_network(network=net, device_name="AUTO:CPU,GPU")
-exec_net = ie.load_network(network=net, device_name="AUTO",
-                           config={"AUTO_DEVICE_LIST": "CPU,GPU"})
-```
-
-The Auto-device plugin supports query device optimization capabilities in metric.
-
-| Parameter name | Parameter values |
-| --- | --- |
-| "OPTIMIZATION_CAPABILITIES" | Auto-Device capabilities |
-
-### Enumerating Devices and Selection Logic
-
-The OpenVINO Runtime API now features a dedicated methods to enumerate devices and their capabilities. See the [Hello Query Device Python Sample](../../samples/python/hello_query_device/README.md) for code.
-
-This is the example output from the sample (truncated to device names only):
-
-```python
-./hello_query_device
-
-Available devices:
-    Device: CPU
-...
-    Device: GPU.0
-...
-    Device: GPU.1
-```
-
-### Default Auto-Device Selection Logic
-
-With the 2021.4 release, the Auto-Device selects the most suitable device using the following default logic:
-
-1. Check if dGPU (discrete), iGPU (integrated) and CPU devices are available
-2. Get the precision of the input model, such as FP32
-3. According to the priority of dGPU, iGPU, and CPU (in this order), if the device supports the precision of the input network, select it as the most suitable device
-
-For example, CPU, dGPU and iGPU can support the following precision and optimization capabilities:
-
-| Device | OPTIMIZATION_CAPABILITIES |
-| --- | --- |
-| CPU | WINOGRAD FP32 FP16 INT8 BIN |
-| dGPU | FP32 BIN BATCHED_BLOB FP16 INT8 |
-| iGPU | FP32 BIN BATCHED_BLOB FP16 INT8 |
-
-* When the application uses the Auto-device to run FP16 IR on a system with CPU, dGPU and iGPU, Auto-device will offload this workload to dGPU.
-* When the application uses the Auto-device to run FP16 IR on a system with CPU and iGPU, Auto-device will offload this workload to iGPU.
-* When the application uses the Auto-device to run WINOGRAD-enabled IR on a system with CPU, dGPU and iGPU, Auto-device will offload this workload to CPU.
-
-In cases when loading the network to dGPU or iGPU fails, CPU is the fall-back choice.
-
-To show the capabilities for a specific device, query the OPTIMIZATION_CAPABILITIES metric:
-
-
-```python
-from openvino.inference_engine import IECore
-
-ie = IECore()
-ie.get_metric(device_name=device,
-              metric_name="OPTIMIZATION_CAPABILITIES")
-```
-
-### Configuring the Individual Devices and Creating the Auto-Device on Top
-
-It is possible to configure each individual device as usual and create the "AUTO" device on top:
-
-```python
-from openvino.inference_engine import IECore
-
-ie = IECore()
-net = ie.read_network(model=path_to_model)
-
-cpu_config = {}
-gpu_config = {}
-
-ie.set_config(config=cpu_config, device_name="CPU")
-ie.set_config(config=gpu_config, device_name="GPU")
-
-# Load the network to the AUTO device
-exec_net = ie.load_network(network=net, device_name="AUTO")
-```
-
-Alternatively, you can combine all the individual device settings into single config file and load it, allowing the Auto-device plugin to parse and apply it to the right devices. See the code example here:
-
-```python
-from openvino.inference_engine import IECore
-
-# Init the Inference Engine Core
-ie = IECore()
-
-# Read a network in IR or ONNX format
-net = ie.read_network(model=path_to_model)
-
-full_config = {}
-
-# Load the network to the AUTO device
-exec_net = ie.load_network(network=net, device_name="AUTO", config=full_config)
-```
-
-### Using the Auto-Device with OpenVINO Samples and Benchmark App
-
-Note that every OpenVINO sample or application that supports the "-d" (which stands for "device") command-line option transparently accepts the Auto-device. The Benchmark Application is the best example of the optimal usage of the Auto-device. You do not need to set the number of requests and CPU threads, as the application provides optimal out-of-the-box performance. Below is the example command-line to evaluate AUTO performance with that:
-
+For Python API
@sphinxdirective
-.. tab:: Package, Docker, open-source installation
-
-   .. code-block:: sh
-
-      ./benchmark_app.py –d AUTO –m <model>
-
-.. tab:: pip installation
-
-    .. code-block:: sh
-
-      benchmark_app –d AUTO –m <model>
+.. code-block:: sh

+   openvino.runtime.Core.available_devices (see Hello Query Device Python Sample)
@endsphinxdirective

-You can also use the auto-device with limit device choice:

+### Performance Hints
+The `ov::hint` property enables you to specify a performance mode for the plugin to be more efficient for particular use cases.
+
+#### ov::hint::PerformanceMode::THROUGHPUT
+This mode prioritizes high throughput, balancing between latency and power. It is best suited for tasks involving multiple jobs, like inference of video feeds or large numbers of images.
+
+#### ov::hint::PerformanceMode::LATENCY
+This mode prioritizes low latency, providing short response time for each inference job. It performs best for tasks where inference is required for a single input image, like a medical analysis of an ultrasound scan image. It also fits the tasks of real-time or nearly real-time applications, such as an industrial robot's response to actions in its environment or obstacle avoidance for autonomous vehicles.
+Note that currently the `ov::hint` property is supported by CPU and GPU devices only.
+
+To enable Performance Hints for your application, use the following code: 
@sphinxdirective
-.. tab:: Package, Docker, open-source installation
+.. tab:: C++ API

-   .. code-block:: sh
+   .. code-block:: cpp

-      ./benchmark_app.py –d AUTO:CPU,GPU –m <model>
+      ov::Core core;

-.. tab:: pip installation
+      // Read a network in IR, PaddlePaddle, or ONNX format:
+      std::shared_ptr<ov::Model> model = core.read_model("sample.xml");      
+      
+      // Load a network to AUTO with Performance Hints enabled:
+      // To use the “throughput” mode:
+      ov::CompiledModel compiled_model = core.compile_model(model, "AUTO:GPU,CPU", {{ov::hint::performance_mode.name(), "THROUGHPUT"}});
+      
+      // or the “latency” mode:
+      ov::CompiledModel compiledModel1 = core.compile_model(model, "AUTO:GPU,CPU", {{ov::hint::performance_mode.name(), "LATENCY"}});
+ 
+.. tab:: Python API

-    .. code-block:: sh
-
-      benchmark_app –d AUTO:CPU,GPU –m <model>
+   .. code-block:: python

+      from openvino.runtime import Core
+      
+      core = Core()
+      
+      # Read a network in IR, PaddlePaddle, or ONNX format:
+      model = core.read_model(model_path)
+      
+      # Load a network to AUTO with Performance Hints enabled:
+      # To use the “throughput” mode:
+      compiled_model = core.compile_model(model=model, device_name="AUTO:CPU,GPU", config={"PERFORMANCE_HINT":"THROUGHPUT"})
+      
+      # or the “latency” mode:
+      compiled_model = core.compile_model(model=model, device_name="AUTO:CPU,GPU", config={"PERFORMANCE_HINT":"LATENCY"})
@endsphinxdirective

-> **NOTE**: If you installed OpenVINO with pip, use `benchmark_app -d AUTO:CPU,GPU -m <model>`
+### ov::hint::model_priority
+The property enables you to control the priorities of networks in the Auto-Device plugin. A high-priority network will be loaded to a supported high-priority device. A lower-priority network will not be loaded to a device that is occupied by a higher-priority network.
+
+@sphinxdirective
+.. tab:: C++ API
+
+   .. code-block:: cpp
+
+      // Example 1
+      // Compile and load networks:
+      ov::CompiledModel compiled_model0 = core.compile_model(model, "AUTO:GPU,MYRIAD,CPU", {{ov::hint::model_priority.name(), "HIGH"}});
+	   ov::CompiledModel compiled_model1 = core.compile_model(model, "AUTO:GPU,MYRIAD,CPU", {{ov::hint::model_priority.name(), "MEDIUM"}});
+	   ov::CompiledModel compiled_model2 = core.compile_model(model, "AUTO:GPU,MYRIAD,CPU", {{ov::hint::model_priority.name(), "LOW"}});
+      
+      /************
+        Assume that all the devices (CPU, GPU, and MYRIAD) can support all the networks.
+        	  Result: compiled_model0 will use GPU, compiled_model1 will use MYRIAD, compiled_model2 will use CPU.
+       ************/
+      
+      // Example 2
+      // Compile and load networks:
+      ov::CompiledModel compiled_model3 = core.compile_model(model, "AUTO:GPU,MYRIAD,CPU", {{ov::hint::model_priority.name(), "LOW"}});
+	   ov::CompiledModel compiled_model4 = core.compile_model(model, "AUTO:GPU,MYRIAD,CPU", {{ov::hint::model_priority.name(), "MEDIUM"}});
+	   ov::CompiledModel compiled_model5 = core.compile_model(model, "AUTO:GPU,MYRIAD,CPU", {{ov::hint::model_priority.name(), "LOW"}});
+      
+      /************
+        Assume that all the devices (CPU, GPU, and MYRIAD) can support all the networks.
+        Result: compiled_model3 will use GPU, compiled_model4 will use GPU, compiled_model5 will use MYRIAD.
+       ************/
+      
+.. tab:: Python API
+
+   .. code-block:: python
+
+      # Example 1
+      # Compile and load networks:
+      compiled_model0 = core.compile_model(model=model, device_name="AUTO:CPU,GPU,MYRIAD", config={"AUTO_NETWORK_PRIORITY":"0"})
+      compiled_model1 = core.compile_model(model=model, device_name="AUTO:CPU,GPU,MYRIAD", config={"AUTO_NETWORK_PRIORITY":"1"})
+      compiled_model2 = core.compile_model(model=model, device_name="AUTO:CPU,GPU,MYRIAD", config={"AUTO_NETWORK_PRIORITY":"2"})
+
+      # Assume that all the devices (CPU, GPU, and MYRIAD) can support all the networks.
+      # Result: compiled_model0 will use GPU, compiled_model1 will use MYRIAD, compiled_model3 will use CPU.
+      
+      # Example 2
+      # Compile and load networks:
+      compiled_model0 = core.compile_model(model=model, device_name="AUTO:CPU,GPU,MYRIAD", config={"AUTO_NETWORK_PRIORITY":"2"})
+      compiled_model1 = core.compile_model(model=model, device_name="AUTO:CPU,GPU,MYRIAD", config={"AUTO_NETWORK_PRIORITY":"1"})
+      compiled_model2 = core.compile_model(model=model, device_name="AUTO:CPU,GPU,MYRIAD", config={"AUTO_NETWORK_PRIORITY":"2"})
+
+      # Assume that all the devices (CPU, GPU, and MYRIAD) can support all the networks.
+      # Result: compiled_model0 will use GPU, compiled_model1 will use GPU, compiled_model3 will use MYRIAD.
+@endsphinxdirective
+
+## Configuring Individual Devices and Creating the Auto-Device plugin on Top
+Although the methods described above are currently the preferred way to execute inference with AUTO, the following steps can be also used as an alternative. It is currently available as a legacy feature and used if the device candidate list includes VPUX or Myriad (devices uncapable of utilizing the Performance Hints option). 
+
+@sphinxdirective
+.. tab:: C++ API
+
+   .. code-block:: cpp
+
+      ovCore core;
+
+      // Read a network in IR, PaddlePaddle, or ONNX format
+      stdshared_ptrovModel model = core.read_model(sample.xml);
+
+      // Configure the VPUX and the Myriad devices separately and load the network to the Auto-Device plugin
+      set VPU config
+      core.set_property(VPUX, {});
+
+      // set MYRIAD config
+      core.set_property(MYRIAD, {});
+      ovCompiledModel compiled_model = core.compile_model(model, AUTO);
+
+.. tab:: Python API
+
+   .. code-block:: python
+
+      from openvino.runtime import Core
+      
+      core = Core()
+      
+      # Read a network in IR, PaddlePaddle, or ONNX format:
+      model = core.read_model(model_path)
+      
+      # Configure the VPUX and the Myriad devices separately and load the network to the Auto-Device plugin:
+      core.set_config(config=vpux_config, device_name="VPUX")
+      core.set_config (config=vpux_config, device_name="MYRIAD")
+      compiled_model = core.compile_model(model=model)
+      
+      # Alternatively, you can combine the individual device settings into one configuration and load the network.
+      # The AUTO plugin will parse and apply the settings to the right devices.
+      # The 'device_name' of "AUTO:VPUX,MYRIAD" will configure auto-device to use devices.
+      compiled_model = core.compile_model(model=model, device_name=device_name, config=full_config)
+      
+      # To query the optimization capabilities:
+      device_cap = core.get_metric("CPU", "OPTIMIZATION_CAPABILITIES")
+@endsphinxdirective
+
+<a name="Benchmark App Info"></a>
+## Using AUTO with OpenVINO™ Samples and the Benchmark App
+To see how the Auto-Device plugin is used in practice and test its performance, take a look at OpenVINO™ samples. All samples supporting the "-d" command-line option (which stands for "device") will accept the plugin out-of-the-box. The Benchmark Application will be a perfect place to start – it presents the optimal performance of the plugin without the need for additional settings, like the number of requests or CPU threads. To evaluate the AUTO performance, you can use the following commands:
+
+For unlimited device choice:
+@sphinxdirective
+.. code-block:: sh
+
+   ./benchmark_app –d AUTO –m <model> -i <input> -niter 1000
+@endsphinxdirective 
+  
+For limited device choice:
+@sphinxdirective
+.. code-block:: sh
+
+   ./benchmark_app –d AUTO:CPU,GPU,MYRIAD –m <model> -i <input> -niter 1000
+@endsphinxdirective
+
+For more information, refer to the [C++](../../samples/cpp/benchmark_app/README.md) or [Python](../../tools/benchmark_tool/README.md) version instructions.	
+	
+@sphinxdirective
+.. note::
+
+   The default CPU stream is 1 if using “-d AUTO”.
+
+   You can use the FP16 IR to work with auto-device.
+   
+   No demos are yet fully optimized for AUTO, by means of selecting the most suitable device, using the GPU streams/throttling, and so on.
+@endsphinxdirective
--- a/docs/OV_Runtime_UG/automatic_batching.md
+++ b/docs/OV_Runtime_UG/automatic_batching.md
@ -76,7 +76,7 @@ For example, the application processes only 4 video streams, so there is no need

    .. doxygensnippet:: docs/snippets/ov_auto_batching.py
       :language: python
-       :fragment: hint_num_requests]
+       :fragment: [hint_num_requests]

@endsphinxdirective

--- a/docs/OV_Runtime_UG/hetero_execution.md
+++ b/docs/OV_Runtime_UG/hetero_execution.md
@ -1,52 +1,35 @@
 # Heterogeneous execution {#openvino_docs_OV_UG_Hetero_execution}

-## Introducing the Heterogeneous execution
+Heterogeneous execution enables executing inference of one model on several devices. Its purpose is to:

-The heterogeneous execution enables computing the inference of one model on several devices. The purposes of executing models in heterogeneous mode are to:
+* Utilize the power of accelerators to process the heaviest parts of the model and to execute unsupported operations on fallback devices, like the CPU.
+* Utilize all available hardware more efficiently during one inference.

-* Utilize the power of accelerators to process the heaviest parts of the model and to execute unsupported operations on fallback devices like the CPU
-* Utilize all available hardware more efficiently during one inference
+Execution via the heterogeneous mode can be divided into two independent steps:

-The execution through heterogeneous mode can be divided into two independent steps:
+1. Setting hardware affinity to operations (`ov::Core::query_model` is used internally by the Hetero device)
+2. Compiling a model to the Heterogeneous device assumes splitting the model to parts, compiling them on the specified devices (via `ov::device::priorities`), and executing them in the Heterogeneous mode. The model is split to subgraphs in accordance with the affinities, where a set of connected operations with the same affinity is to be a dedicated subgraph. Each subgraph is compiled on a dedicated device and multiple `ov::CompiledModel` objects are made, which are connected via automatically allocated intermediate tensors.

-1. Setting of hardware affinity to operations (ov::Core::query_model is used internally by the Hetero device)
-2. Compiling a model to the Heterogeneous device assuming splitting the model to parts and compiling on the specified devices (via ov::device::priorities), and executing them through the Heterogeneous mode. The model is split to the subgraphs in according to the affinities where a set of conntected operations with the same affinity are supposed to be a dedicated subgraph. Each subgraph is compiled on a dedicated device and we have multiple ov::CompiledModel objects, which are connected via automatically allocated intermediate tensors.
-
-These steps are decoupled. The setting of affinities can be done automatically using the `automatic fallback` policy or in `manual` mode:
-
- The fallback automatic policy causes "greedy" behavior and assigns all operations that can be executed on certain device according to the priorities you specify (for example, `ov::device::priorities("GPU,CPU")`).
-Automatic policy does not take into account device peculiarities such as the inability to infer some operations without other special operations placed before or after that layer. The plugin is responsible for solving such cases. If the device plugin does not support the subgraph topology constructed by the HETERO device, then you should set affinity manually.
- Manual policy assumes explicit setting of affinities for all operations in the model using the runtime information ov::Node::get_rt_info.
+These two steps are not interconnected and affinities can be set in one of two ways, used separately or in combination (as described below): in the `manual` or the `automatic` mode.

 ### Defining and Configuring the Hetero Device

-Following the OpenVINO™ convention of labeling devices, the Hetero execution uses the name `"HETERO"`. Configuration options for the Hetero device:
-
-| Parameter name | C++ property | Parameter values | Default | Description |
-| -------------- | ---------------- | ---------------- | --- | --- |
-| "MULTI_DEVICE_PRIORITIES" | `ov::device::priorities` | comma-separated device names with no spaces | N/A | Prioritized list of devices |
-
-### Automatic and manual policies for assigning affinities
-
-`Automatic fallback` policy decides which operation goes to which device automatically according to the support in dedicated devices (`GPU`, `CPU`, `MYRIAD`, etc) and query model step is called implicitly by Hetero device during model compilation:
+Following the OpenVINO™ naming convention, the Hetero execution plugin is assigned the label of `"HETERO".` It may be defined with no additional parameters, resulting in defaults being used, or configured further with the following setup options: 

@sphinxdirective
-
-.. tab:: C++
-
-    .. doxygensnippet:: docs/snippets/ov_hetero.cpp
-       :language: cpp
-       :fragment: [compile_model]
-
-.. tab:: Python
-
-    .. doxygensnippet:: docs/snippets/ov_hetero.py
-       :language: python
-       :fragment: [compile_model]
-
+-------------------------------+--------------------------------------------+-----------------------------------------------------------+
+| Parameter Name & C++ property | Property values                            | Description                                               |
+===============================+============================================+===========================================================+
+| | "MULTI_DEVICE_PRIORITIES"   | | HETERO: <device names>                   | | Lists the devices available for selection.              |
+| | `ov::device::priorities`    | | comma-separated, no spaces               | | The device sequence will be taken as priority           |
+| |                             | |                                          | | from high to low.                                       |
+-------------------------------+--------------------------------------------+-----------------------------------------------------------+
@endsphinxdirective

-Another way to annotate a model is to set all affinities `manually` using ov::Node::get_rt_info with key `"affinity"`:
+### Manual and Automatic modes for assigning affinities
+
+#### The Manual Mode
+It assumes setting affinities explicitly for all operations in the model using `ov::Node::get_rt_info` with the `"affinity"` key. 

@sphinxdirective

@ -64,7 +47,32 @@ Another way to annotate a model is to set all affinities `manually` using ov::No

@endsphinxdirective

-The fallback policy does not work if at least one operation has an initialized `"affinity"`. If you want to adjust automatically set affinities, then get automatic affinities first, then fix them (usually, to minimize a number of total subgraphs to optimize memory transfers):
+
+
+#### The Automatic Mode
+It decides automatically which operation is assigned to which device according to the support from dedicated devices (`GPU`, `CPU`, `MYRIAD`, etc.) and query model step is called implicitly by Hetero device during model compilation.
+
+The automatic mode causes "greedy" behavior and assigns all operations that can be executed on a given device to it, according to the priorities you specify (for example, `ov::device::priorities("GPU,CPU")`).
+It does not take into account device peculiarities such as the inability to infer certain operations without other special operations placed before or after that layer. If the device plugin does not support the subgraph topology constructed by the HETERO device, then you should set affinity manually.
+
+@sphinxdirective
+
+.. tab:: C++
+
+    .. doxygensnippet:: docs/snippets/ov_hetero.cpp
+       :language: cpp
+       :fragment: [compile_model]
+
+.. tab:: Python
+
+    .. doxygensnippet:: docs/snippets/ov_hetero.py
+       :language: python
+       :fragment: [compile_model]
+
+@endsphinxdirective
+
+#### Using Manual and Automatic Modes in Combination
+In some cases you may need to consider manually adjusting affinities which were set automatically. It usually serves minimizing the number of total subgraphs to optimize memory transfers. To do it, you need to "fix" the automatically assigned affinities like so:

@sphinxdirective

@ -82,10 +90,12 @@ The fallback policy does not work if at least one operation has an initialized `

@endsphinxdirective

-> **NOTE**: ov::Core::query_model does not depend on affinities set by a user. Instead, it queries for an operation support based on device capabilities.
+Importantly, the automatic mode will not work if any operation in a model has its `"affinity"` already initialized.
+
+> **NOTE**: `ov::Core::query_model` does not depend on affinities set by a user. Instead, it queries for an operation support based on device capabilities.

 ### Configure fallback devices
-If you want different devices in Hetero execution to have different device-specific configuration options, you can use the special helper property ov::device::properties:
+If you want different devices in Hetero execution to have different device-specific configuration options, you can use the special helper property `ov::device::properties`:

@sphinxdirective

@ -103,24 +113,24 @@ If you want different devices in Hetero execution to have different device-speci

@endsphinxdirective

-In the example above, `CPU` device is configured to enable profiling data, while only `GPU` device has configuration property to perform inference in `f16` precision, while CPU has default execution precision.
+In the example above, the `GPU` device is configured to enable profiling data and uses the default execution precision, while `CPU` has the configuration property to perform inference in `fp32`.

-### Handling Difficult Topologies
+### Handling of Difficult Topologies

-Some topologies are not friendly to heterogeneous execution on some devices or cannot be executed at all with this device.
-For example, models having activation operations that are not supported on the primary device are split by Hetero device into multiple set of subgraphs which leads to unoptimal execution.
-If transmitting data from one subgraph of a whole model to another part in heterogeneous mode takes more time than in normal execution, it may not make sense to execute them heterogeneously.
-In this case, you can define the heaviest part manually and set the affinity to avoid sending data back and forth many times during one inference.
+Some topologies are not friendly to heterogeneous execution on some devices, even to the point of being unable to execute.
+For example, models having activation operations that are not supported on the primary device are split by Hetero into multiple sets of subgraphs which leads to suboptimal execution.
+If transmitting data from one subgraph to another part of the model in the heterogeneous mode takes more time than under normal execution, heterogeneous execution may be unsubstantiated.
+In such cases, you can define the heaviest part manually and set the affinity to avoid sending data back and forth many times during one inference.

-### Analyzing Performance Heterogeneous Execution
-After enabling the <code>OPENVINO_HETERO_VISUALIZE</code> environment variable, you can dump GraphViz* `.dot` files with annotations of operations per devices.
+### Analyzing Performance of Heterogeneous Execution
+After enabling the <code>OPENVINO_HETERO_VISUALIZE</code> environment variable, you can dump GraphViz `.dot` files with annotations of operations per devices.

-The Heterogeneous device can generate two files:
+The Heterogeneous execution mode can generate two files:

 * `hetero_affinity_<model name>.dot` - annotation of affinities per operation.
 * `hetero_subgraphs_<model name>.dot` - annotation of affinities per graph.

-You can use the GraphViz* utility or a file converter to view the images. On the Ubuntu* operating system, you can use xdot:
+You can use the GraphViz utility or a file converter to view the images. On the Ubuntu operating system, you can use xdot:

 * `sudo apt-get install xdot`
 * `xdot hetero_subgraphs.dot`
@ -149,9 +159,9 @@ OpenVINO™ sample programs can use the Heterogeneous execution used with the `-
 ```
 where:
 - `HETERO` stands for the Heterogeneous execution
- `GPU,CPU` points to fallback policy with priority on GPU and fallback to CPU
+- `GPU,CPU` points to a fallback policy with the priority on GPU and fallback to CPU

-You can point more than two devices: `-d HETERO:MYRIAD,GPU,CPU`
+You can also point to more than two devices: `-d HETERO:MYRIAD,GPU,CPU`

 ### See Also
 [Supported Devices](supported_plugins/Supported_Devices.md)
--- a/docs/OV_Runtime_UG/layout_overview.md
+++ b/docs/OV_Runtime_UG/layout_overview.md
@ -12,7 +12,7 @@ Reasons when you may want to care about input/output layout:
 - Perform model modification:
    - Apply [preprocessing](./preprocessing_overview.md) steps, like subtract means, divide by scales, resize image, convert RGB<->BGR
    - Set/get batch for a model
- - Same operations, used during model conversion phase, see [Model Optimizer model conversion](../MO_DG/prepare_model/convert_model/Converting_Model.md)
+ - Same operations, used during model conversion phase, see [Model Optimizer Embedding Preprocessing Computation](../MO_DG/prepare_model/Additional_Optimizations.md)
 - Improve readability of a model's input and output

 ## Layout syntax
--- a/docs/OV_Runtime_UG/migration_ov_2_0/common_inference_pipeline.md
+++ b/docs/OV_Runtime_UG/migration_ov_2_0/common_inference_pipeline.md
@ -80,13 +80,13 @@ Inference Engine API fills inputs as `I32` precision (**not** aligned with the o
    .. doxygensnippet:: docs/snippets/ie_common.cpp
       :language: cpp
       :fragment: [ie:get_input_tensor]
-       
+
 .. tab:: ONNX

    .. doxygensnippet:: docs/snippets/ie_common.cpp
       :language: cpp
       :fragment: [ie:get_input_tensor]
-       
+
 .. tab:: Model created in code

    .. doxygensnippet:: docs/snippets/ie_common.cpp
@ -110,13 +110,13 @@ OpenVINO™ Runtime API 2.0 fills inputs as `I64` precision (aligned with the or
    .. doxygensnippet:: docs/snippets/ov_common.cpp
       :language: cpp
       :fragment: [ov_api_2_0:get_input_tensor_aligned]
-       
+
 .. tab:: ONNX

    .. doxygensnippet:: docs/snippets/ov_common.cpp
       :language: cpp
       :fragment: [ov_api_2_0:get_input_tensor_aligned]
-       
+
 .. tab:: Model created in code

    .. doxygensnippet:: docs/snippets/ov_common.cpp
@ -129,11 +129,39 @@ OpenVINO™ Runtime API 2.0 fills inputs as `I64` precision (aligned with the or

 Inference Engine API:

-@snippet docs/snippets/ie_common.cpp ie:inference
+@sphinxdirective
+
+.. tab:: sync
+
+    .. doxygensnippet:: docs/snippets/ie_common.cpp
+       :language: cpp
+       :fragment: [ie:inference]
+
+.. tab:: async
+
+    .. doxygensnippet:: docs/snippets/ie_common.cpp
+       :language: cpp
+       :fragment: [ie:start_async_and_wait]
+
+@endsphinxdirective

 OpenVINO™ Runtime API 2.0:

-@snippet docs/snippets/ov_common.cpp ov_api_2_0:inference
+@sphinxdirective
+
+.. tab:: sync
+
+    .. doxygensnippet:: docs/snippets/ov_common.cpp
+       :language: cpp
+       :fragment: [ov_api_2_0:inference]
+
+.. tab:: async
+
+    .. doxygensnippet:: docs/snippets/ov_common.cpp
+       :language: cpp
+       :fragment: [ov_api_2_0:start_async_and_wait]
+
+@endsphinxdirective

 ## 7. Process the Inference Results

@ -152,13 +180,13 @@ Inference Engine API processes outputs as `I32` precision (**not** aligned with
    .. doxygensnippet:: docs/snippets/ie_common.cpp
       :language: cpp
       :fragment: [ie:get_output_tensor]
-       
+
 .. tab:: ONNX

    .. doxygensnippet:: docs/snippets/ie_common.cpp
       :language: cpp
       :fragment: [ie:get_output_tensor]
-       
+
 .. tab:: Model created in code

    .. doxygensnippet:: docs/snippets/ie_common.cpp
@ -184,17 +212,17 @@ OpenVINO™ Runtime API 2.0 processes outputs:
    .. doxygensnippet:: docs/snippets/ov_common.cpp
       :language: cpp
       :fragment: [ov_api_2_0:get_output_tensor_aligned]
-       
+
 .. tab:: ONNX

    .. doxygensnippet:: docs/snippets/ov_common.cpp
       :language: cpp
       :fragment: [ov_api_2_0:get_output_tensor_aligned]
-       
+
 .. tab:: Model created in code

    .. doxygensnippet:: docs/snippets/ov_common.cpp
       :language: cpp
       :fragment: [ov_api_2_0:get_output_tensor_aligned]

-@endsphinxdirective
+@endsphinxdirective
--- a/docs/OV_Runtime_UG/migration_ov_2_0/configure_devices.md
+++ b/docs/OV_Runtime_UG/migration_ov_2_0/configure_devices.md
@ -46,23 +46,45 @@ OpenVINO Runtime API 2.0:

@sphinxdirective

-.. tab:: Devices
+.. tab:: C++

-    .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp
-       :language: cpp
-       :fragment: [core_set_property]
+    .. tab:: Devices

-.. tab:: Model Loading
+        .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp
+            :language: cpp
+            :fragment: [core_set_property]

-    .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp
-       :language: cpp
-       :fragment: [core_compile_model]
+    .. tab:: Model Loading

-.. tab:: Execution
+        .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp
+            :language: cpp
+            :fragment: [core_compile_model]

-    .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp
-       :language: cpp
-       :fragment: [compiled_model_set_property]
+    .. tab:: Execution
+
+        .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp
+            :language: cpp
+            :fragment: [compiled_model_set_property]
+
+.. tab:: Python
+
+    .. tab:: Devices
+
+        .. doxygensnippet:: docs/snippets/ov_properties_migration.py
+            :language: python
+            :fragment: [core_set_property]
+
+    .. tab:: Model Loading
+
+        .. doxygensnippet:: docs/snippets/ov_properties_migration.py
+            :language: python
+            :fragment: [core_compile_model]
+
+    .. tab:: Execution
+
+        .. doxygensnippet:: docs/snippets/ov_properties_migration.py
+            :language: python
+            :fragment: [compiled_model_set_property]

@endsphinxdirective

@ -102,28 +124,56 @@ OpenVINO Runtime API 2.0:

@sphinxdirective

-.. tab:: Device configuration
+.. tab:: C++

-    .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp
-       :language: cpp
-       :fragment: [core_get_rw_property]
+    .. tab:: Device configuration

-.. tab:: Device metrics
+        .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp
+            :language: cpp
+            :fragment: [core_get_rw_property]

-    .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp
-       :language: cpp
-       :fragment: [core_get_ro_property]
+    .. tab:: Device metrics

-.. tab:: Execution config
+        .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp
+            :language: cpp
+            :fragment: [core_get_ro_property]

-    .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp
-       :language: cpp
-       :fragment: [compiled_model_get_rw_property]
+    .. tab:: Execution config

-.. tab:: Execution metrics
+        .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp
+            :language: cpp
+            :fragment: [compiled_model_get_rw_property]

-    .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp
-       :language: cpp
-       :fragment: [compiled_model_get_ro_property]
+    .. tab:: Execution metrics
+
+        .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp
+            :language: cpp
+            :fragment: [compiled_model_get_ro_property]
+
+.. tab:: Python
+
+    .. tab:: Device configuration
+
+        .. doxygensnippet:: docs/snippets/ov_properties_migration.py
+            :language: python
+            :fragment: [core_get_rw_property]
+
+    .. tab:: Device metrics
+
+        .. doxygensnippet:: docs/snippets/ov_properties_migration.py
+            :language: python
+            :fragment: [core_get_ro_property]
+
+    .. tab:: Execution config
+
+        .. doxygensnippet:: docs/snippets/ov_properties_migration.py
+            :language: python
+            :fragment: [compiled_model_get_rw_property]
+
+    .. tab:: Execution metrics
+
+        .. doxygensnippet:: docs/snippets/ov_properties_migration.py
+            :language: python
+            :fragment: [compiled_model_get_ro_property]

@endsphinxdirective
--- a/docs/OV_Runtime_UG/migration_ov_2_0/intro.md
+++ b/docs/OV_Runtime_UG/migration_ov_2_0/intro.md
@ -5,53 +5,55 @@
 .. toctree::
   :maxdepth: 1
   :hidden:
-   
+
   openvino_2_0_inference_pipeline
   openvino_2_0_configure_devices
   openvino_2_0_preprocessing
   openvino_2_0_model_creation
-      
+
@endsphinxdirective

 ### Introduction

 Older versions of OpenVINO (prior to 2022.1) required to change the logic of applications when an user migrates from the frameworks like TensorFlow, ONNX Runtime, PyTorch, PaddlePaddle, etc. The change of application's logic is connected with:

- Model Optimizer changed input precisions for some inputs. For example, neural langauge processing models with `I64` input are becoming to have `I32` input element type.
+- Model Optimizer changed input precisions for some inputs. For example, neural language processing models with `I64` input are becoming to have `I32` input element type.
 - Model Optimizer changed layouts for TensorFlow models (see [Layouts in OpenVINO](../layout_overview.md)). It leads to unexpected user behavior that a user needs to use a different layout for its input data with compare to the framework:
 ![tf_openvino]
 - Inference Engine API (`InferenceEngine::CNNNetwork`) also applied some conversion rules for input and output precisions because of device plugins limitations.
 - Users need to specify input shapes during model conversions in Model Optimizer and work with static shapes in the application.

-OpenVINO Runtime API 2.0 is introduced to align logic of working with model as it is done in the frameworks - no layout and precision changes, operates with tensor names and indeces to address inputs and outputs. OpenVINO Runtime is composed of Inference Engine API used for inference and ngraph API targeted to work with models, operations. The OpenVINO API 2.0 has common structure, naming convention styles, namespaces, removes duplicated structures. See [How to migrate to OpenVINO 2.0 API](./common_inference_pipeline.md) for details.
+OpenVINO™ introduces API 2.0 to align logic of working with model as it is done in the frameworks - no layout and precision changes, operates with tensor names and indices to address inputs and outputs. OpenVINO Runtime is composed of Inference Engine API used for inference and nGraph API targeted to work with models, operations. The API 2.0 has common structure, naming convention styles, namespaces, removes duplicated structures. See [How to migrate to OpenVINO API v2](common_inference_pipeline.md) for details.

 > **NOTE**: Most important is that your existing application can continue working with OpenVINO Runtime 2.0 as it used to be, but we recommend migration to new API to unlock additional features like [Preprocessing](../preprocessing_overview.md) and [Dynamic shapes support](../ov_dynamic_shapes.md).

-### Introduce IR v11
+### Introducing IR v11

 To support these features, OpenVINO introduced IR v11 which is generated by Model Optimizer by default since 2022.1. The model represented in IR v11 fully matches the original model in a original framework format in terms of inputs and outputs. Also, a user does not have to specify input shapes during the conversion, so the resulting IR v11 contains `-1` to denote undefined dimensions (see [Working with dynamic shapes](../ov_dynamic_shapes.md) to fully utilize this feature; or [Changning input shapes](../ShapeInference.md) to reshape to static shapes in the application).

-What is also important to mention - the IR v11 is fully compatible with old applications written with Inference Engine API from older versions of OpenVINO. This is achieved by adding additional runtime information to the IR v11 which is responsible for backwark compatible behavior. So, once the IR v11 is read by the old Inference Engine based application, it's internally converted to IR v10 to provide backward-compatible behavior.
+What is also important to mention - the IR v11 is fully compatible with old applications written with Inference Engine API from older versions of OpenVINO. This is achieved by adding additional runtime information to the IR v11 which is responsible for backward compatible behavior. So, once the IR v11 is read by the old Inference Engine based application, it's internally converted to IR v10 to provide backward-compatible behavior.

 The IR v11 is supported by all OpenVINO Development tools including Post Training Optimization tool, Benchmark app, etc.

 ### IR v10 compatibility

-OpenVINO Runtime API 2.0 also supports model in IR v10 for backward compatibility. So, if a user has an IR v10, such IR v10 can be fed to OpenVINO Runtime as well (see [migration steps](./common_inference_pipeline.md)).
+OpenVINO API 2.0 also supports models in IR v10 for backward compatibility. So, if a user has an IR v10, it can be fed to OpenVINO Runtime as well (see [migration steps](common_inference_pipeline.md)).

 Some OpenVINO Development Tools also support both IR v10 and IR v11 as an input:
 - Accuracy checker also supports IR v10, but requires an additional option to denote which API is used underneath.
 - [Compile tool](../../../tools/compile_tool/README.md) compiles the model to be used in OpenVINO 2.0 API by default. If a user wants to use the resulting compiled blob in Inference Engine API, the additional `ov_api_1_0` option should be passed.

-But the following OpenVINO tools don't support IR v10 as an input, they require to regenerate an IR v11 from the original model with latest Model Optimizer:
- Post Training Optimization tool
- Deep Learning WorkBench
+The following OpenVINO tools don't support IR v10 as an input, and require to generate an IR v11 from the original model with the latest version of Model Optimizer:
+- Post-Training Optimization tool
+- Deep Learning Workbench
+
+> **NOTE**: If you need to quantize your IR v10 models to run with OpenVINO 2022.1, it's recommended to download and use Post-Training Optimization tool from OpenVINO 2021.4 release.

 ### Differences between Inference Engine and OpenVINO Runtime 2.0

-Inference Engine and ngraph APIs are not deprecated, they are fully functional and can be used in applications. But OpenVINO recommends users to migrate to new OpenVINO Runtime API 2.0, because it already has additional features and this list will be extended later. The following list of additional features is supported by new API:
- [Working with dynamic shapes](../ov_dynamic_shapes.md). The feature is quite usefull for best performance for NLP (Neural Language Processing) models, super resolution models and other which accepts dynamic input shapes.
- [Preprocessing of the model](../preprocessing_overview.md) to add preprocessing operations to the inference models and fully ocupay the accelerator and free CPU resources.
+Inference Engine and nGraph APIs are not deprecated, they are fully functional and can be used in applications. However, it's highly recommended to migrate to API 2.0, because it already has additional features and this list will be extended later. The following list of additional features is supported by API 2.0:
+- [Working with dynamic shapes](../ov_dynamic_shapes.md). The feature is quite useful for best performance for NLP (Neural Language Processing) models, super resolution models and other which accepts dynamic input shapes.
+- [Preprocessing of the model](../preprocessing_overview.md) to add preprocessing operations to the inference models and fully occupy the accelerator and free CPU resources.

 To define a difference on the API level between Inference Engine and OpenVINO RUntime API 2.0, let's define two types of behaviors:
 - **Old behavior** of OpenVINO supposes:
@ -59,18 +61,18 @@ To define a difference on the API level between Inference Engine and OpenVINO RU
  - Inference Engine can override input and output element types.
  - Inference Engine API operates with operation names to address inputs and outputs (e.g. InferenceEngine::InferRequest::GetBlob).
  - Does not support compiling of models with dynamic input shapes.
- **New behavior** assumes full model aligment with the framework and is implemented in OpenVINO 2.0:
+- **New behavior** assumes full model alignment with the framework and is implemented in OpenVINO 2022.1:
  - Model Optimizer preserves the input element types, order of dimensions (layouts) and stores tensor names from the original models.
-  - OpenVINO Runtime 2.0 reads models in any formats (IR v10, IR v11, ONNX, PaddlePaddle, etc) as is.
-  - OpenVINO Runtime API 2.0 operates with tensor names. Note, the difference between tensor names and operations names is that in case if a single operation has several output tensors, such tensors cannot identified in a unique manner, so tensor names are used for addressing as it's usually done in the frameworks.
-  - OpenVINO Runtime API 2.0 can address input and outputs tensors also by its index. Some model formats like ONNX are sensitive to order of inputs, outputs and its preserved by OpenVINO Runtime 2.0. 
+  - OpenVINO Runtime 2022.1 reads models in any formats (IR v10, IR v11, ONNX, PaddlePaddle, etc) as is.
+  - API 2.0 operates with tensor names. Note, the difference between tensor names and operations names is that in case if a single operation has several output tensors, such tensors cannot identified in a unique manner, so tensor names are used for addressing as it's usually done in the frameworks.
+  - API 2.0 can address input and outputs tensors also by its index. Some model formats like ONNX are sensitive to order of inputs, outputs and its preserved by OpenVINO 2022.1.

 The table below demonstrates which behavior **old** or **new** is used depending on a model source, used APIs.

 |               API             | IR v10  | IR v11  | ONNX file | Model created in code |
 |-------------------------------|---------|---------|-----------|-----------------------|
-|Inference Engine / ngraph APIs |     Old |     Old |       Old |                   Old |
-|OpenVINO Runtime API 2.0       |     Old |     New |       New |                   New |
+|Inference Engine / nGraph APIs |     Old |     Old |       Old |                   Old |
+|API 2.0                        |     Old |     New |       New |                   New |

 Please look at next transition guides to understand how migrate Inference Engine-based application to OpenVINO™ Runtime API 2.0:
 - [OpenVINO™ Common Inference pipeline](common_inference_pipeline.md)
--- a/docs/OV_Runtime_UG/migration_ov_2_0/preprocessing.md
+++ b/docs/OV_Runtime_UG/migration_ov_2_0/preprocessing.md
@ -19,45 +19,184 @@ It's also important to mention that since OpenVINO 2.0, the Runtime API does not
 The steps below demonstrates how to migrate preprocessing scenarios from Inference Engine API to OpenVINO Runtime API 2.0.
 The snippets suppose we need to preprocess a model input with tensor name `tensor_name`, in Inferenece Engine API using operation names to address the data, it's called `operation_name`.

+#### Importing preprocessing in Python
+
+In order to utilize preprocessing following imports must be added.
+
+Inference Engine API:
+
+@sphinxdirective
+
+.. doxygensnippet:: docs/snippets/ov_preprocessing_migration.py
+         :language: python
+         :fragment: [imports]
+
+@endsphinxdirective
+
+OpenVINO Runtime API 2.0:
+
+@sphinxdirective
+
+.. doxygensnippet:: docs/snippets/ov_preprocessing_migration.py
+         :language: python
+         :fragment: [ov_imports]
+
+@endsphinxdirective
+
+There are two different namespaces `runtime`, which contains OpenVINO Runtime API classes and `preprocess` which provides Preprocessing API.
+
+
 ### Mean and scale values

 Inference Engine API:

-@snippet docs/snippets/ov_preprocessing_migration.cpp mean_scale
+@sphinxdirective
+
+.. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.cpp
+         :language: cpp
+         :fragment: [mean_scale]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.py
+         :language: python
+         :fragment: [mean_scale]
+
+@endsphinxdirective

 OpenVINO Runtime API 2.0:

-@snippet docs/snippets/ov_preprocessing_migration.cpp ov_mean_scale
+@sphinxdirective
+
+.. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.cpp
+         :language: cpp
+         :fragment: [ov_mean_scale]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.py
+         :language: python
+         :fragment: [ov_mean_scale]
+
+@endsphinxdirective

 ### Precision and layout conversions

 Inference Engine API:

-@snippet docs/snippets/ov_preprocessing_migration.cpp conversions
+@sphinxdirective
+
+.. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.cpp
+         :language: cpp
+         :fragment: [conversions]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.py
+         :language: python
+         :fragment: [conversions]
+
+@endsphinxdirective

 OpenVINO Runtime API 2.0:

-@snippet docs/snippets/ov_preprocessing_migration.cpp ov_conversions
+@sphinxdirective
+
+.. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.cpp
+         :language: cpp
+         :fragment: [ov_conversions]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.py
+         :language: python
+         :fragment: [ov_conversions]
+
+@endsphinxdirective

 ### Image scaling

 Inference Engine API:

-@snippet docs/snippets/ov_preprocessing_migration.cpp image_scale
+@sphinxdirective
+
+.. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.cpp
+         :language: cpp
+         :fragment: [image_scale]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.py
+         :language: python
+         :fragment: [image_scale]
+
+@endsphinxdirective

 OpenVINO Runtime API 2.0:

-@snippet docs/snippets/ov_preprocessing_migration.cpp ov_image_scale
+@sphinxdirective
+
+.. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.cpp
+         :language: cpp
+         :fragment: [ov_image_scale]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.py
+         :language: python
+         :fragment: [ov_image_scale]
+
+@endsphinxdirective

 ### Color space conversions

 Inference Engine API:

-@snippet docs/snippets/ov_preprocessing_migration.cpp color_space
+@sphinxdirective
+
+.. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.cpp
+         :language: cpp
+         :fragment: [color_space]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.py
+         :language: python
+         :fragment: [color_space]
+
+@endsphinxdirective

 OpenVINO Runtime API 2.0:

-@snippet docs/snippets/ov_preprocessing_migration.cpp ov_color_space
+@sphinxdirective
+
+.. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.cpp
+         :language: cpp
+         :fragment: [ov_color_space]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing_migration.py
+         :language: python
+         :fragment: [ov_color_space]
+
+@endsphinxdirective

 **See also:**
 - [Preprocessing details](../preprocessing_details.md)
--- a/docs/OV_Runtime_UG/model_representation.md
+++ b/docs/OV_Runtime_UG/model_representation.md
@ -64,7 +64,7 @@ The `ov::Op` class represents any abstract operation in the model representation

 Operation set (opset) is a collection of operations that can be used to construct a model. The `ov::OpSet` class  provides a functionality to work with operation sets.
 For each operation set, OpenVINO™ Runtime provides a separate namespace, for example `opset8`.
-Each OpenVINO™ Release release introduces new operations and add these operations to a new operation set. New operation sets help to introduce a new version of operations that change behavior of previous operations. Using operation sets allows you to avoid changes in your application if new operations have been introduced.
+Each OpenVINO™ Release introduces new operations and add these operations to a new operation set. New operation sets help to introduce a new version of operations that change behavior of previous operations. Using operation sets allows you to avoid changes in your application if new operations have been introduced.
 For a complete list of operation sets supported in OpenVINO™ toolkit, see [Available Operations Sets](../ops/opset.md).
 To add support of custom operations, see the [Add Custom OpenVINO Operations](../Extensibility_UG/Intro.md) document.

--- a/docs/OV_Runtime_UG/openvino_intro.md
+++ b/docs/OV_Runtime_UG/openvino_intro.md
@ -19,6 +19,7 @@
   openvino_docs_OV_UG_Hetero_execution
   openvino_docs_OV_UG_Automatic_Batching
   openvino_docs_IE_DG_network_state_intro
+   openvino_docs_OV_Runtime_UG_Python_API_exclusives
   openvino_2_0_transition_guide
   openvino_docs_OV_Should_be_in_performance

--- a/docs/OV_Runtime_UG/openvino_temporary.md
+++ b/docs/OV_Runtime_UG/openvino_temporary.md
@ -2,8 +2,6 @@

@sphinxdirective

-.. _deep learning inference engine:
-
 .. toctree::
   :maxdepth: 1
   :hidden:
--- a/docs/OV_Runtime_UG/ov_dynamic_shapes.md
+++ b/docs/OV_Runtime_UG/ov_dynamic_shapes.md
@ -42,7 +42,21 @@ To avoid the tricks mentioned in the previous section there is a way to directly
 This is achieved with the same reshape method that is used for alternating static shape of inputs.
 Dynamic dimensions are specified as `-1` or `ov::Dimension()` instead of a positive number used for static dimensions:

-@snippet snippets/ov_dynamic_shapes.cpp ov_dynamic_shapes:reshape_undefined
+@sphinxdirective
+
+.. tab:: C++
+
+    .. doxygensnippet:: docs/snippets/ov_dynamic_shapes.cpp
+       :language: cpp
+       :fragment: [ov_dynamic_shapes:reshape_undefined]
+
+.. tab:: Python
+
+    .. doxygensnippet:: docs/snippets/ov_dynamic_shapes.py
+       :language: python
+       :fragment: [reshape_undefined]
+
+@endsphinxdirective

 To simplify the code, the examples assume that the model has a single input and single output.
 However, there are no limitations on the number of inputs and outputs to apply dynamic shapes.
@ -59,14 +73,29 @@ If the input model has undefined dimensions that you are not going to change dur
 From the API perspective any combination of dynamic and static dimensions can be configured.

 Model Optimizer provides capability to reshape the model during the conversion, including specifying dynamic dimensions.
-Use this capability to save time on calling `reshape` method in the end application. <TODO: Link to MO setting shape doc>
+Use this capability to save time on calling `reshape` method in the end application.
+To get information about setting input shapes using Model Optimizer, refer to [Setting Input Shapes](../MO_DG/prepare_model/convert_model/Converting_Model.md)

 ### Dimension Bounds

 Besides marking a dimension just dynamic, you can also specify lower and/or upper bounds that define a range of allowed values for the dimension.
 Bounds are coded as arguments for `ov::Dimension`:

-@snippet snippets/ov_dynamic_shapes.cpp ov_dynamic_shapes:reshape_bounds
+@sphinxdirective
+
+.. tab:: C++
+
+    .. doxygensnippet:: docs/snippets/ov_dynamic_shapes.cpp
+       :language: cpp
+       :fragment: [ov_dynamic_shapes:reshape_bounds]
+
+.. tab:: Python
+
+    .. doxygensnippet:: docs/snippets/ov_dynamic_shapes.py
+       :language: python
+       :fragment: [reshape_bounds]
+
+@endsphinxdirective

 Information about bounds gives opportunity for the inference plugin to apply additional optimizations.
 Using dynamic shapes assumes the plugins apply more loose optimization technique during model compilation
@ -86,7 +115,21 @@ Preparing model with the reshape method was the first step.
 The second step is passing a tensor with an appropriate shape to infer request.
 This is similar to [regular steps](integrate_with_your_application.md), but now we can pass tensors with different shapes for the same executable model and even for the same inference request:

-@snippet snippets/ov_dynamic_shapes.cpp ov_dynamic_shapes:set_input_tensor
+@sphinxdirective
+
+.. tab:: C++
+
+    .. doxygensnippet:: docs/snippets/ov_dynamic_shapes.cpp
+       :language: cpp
+       :fragment: [ov_dynamic_shapes:set_input_tensor]
+
+.. tab:: Python
+
+    .. doxygensnippet:: docs/snippets/ov_dynamic_shapes.py
+       :language: python
+       :fragment: [set_input_tensor]
+
+@endsphinxdirective

 In the example above `set_input_tensor` is used to specify input tensors.
 The real dimensions of the tensor is always static, because it is a concrete tensor and it doesn't have any dimension variations in contrast to model inputs.
@ -97,7 +140,21 @@ Without doing that, the tensor returned by `get_input_tensor` is an empty tensor
 Setting shape for input tensor is required when the corresponding input has at least one dynamic dimension regardless of bounds information.
 The following example makes the same sequence of two infer request as the previous example but using `get_input_tensor` instead of `set_input_tensor`:

-@snippet snippets/ov_dynamic_shapes.cpp ov_dynamic_shapes:get_input_tensor
+@sphinxdirective
+
+.. tab:: C++
+
+    .. doxygensnippet:: docs/snippets/ov_dynamic_shapes.cpp
+       :language: cpp
+       :fragment: [ov_dynamic_shapes:get_input_tensor]
+
+.. tab:: Python
+
+    .. doxygensnippet:: docs/snippets/ov_dynamic_shapes.py
+       :language: python
+       :fragment: [get_input_tensor]
+
+@endsphinxdirective

 ### Dynamic Shapes in Outputs

@ -108,13 +165,41 @@ The same is true for other dimensions, like sequence length for NLP models or sp
 Whether or not output has dynamic dimensions can be examined by querying output partial shape after model read or reshape.
 The same is applicable for inputs. For example:

-@snippet snippets/ov_dynamic_shapes.cpp ov_dynamic_shapes:print_dynamic
+@sphinxdirective
+
+.. tab:: C++
+
+    .. doxygensnippet:: docs/snippets/ov_dynamic_shapes.cpp
+       :language: cpp
+       :fragment: [ov_dynamic_shapes:print_dynamic]
+
+.. tab:: Python
+
+    .. doxygensnippet:: docs/snippets/ov_dynamic_shapes.py
+       :language: python
+       :fragment: [print_dynamic]
+
+@endsphinxdirective

 Appearing `?` or ranges like `1..10` means there are dynamic dimensions in corresponding inputs or outputs.

 Or more programmatically:

-@snippet snippets/ov_dynamic_shapes.cpp ov_dynamic_shapes:detect_dynamic
+@sphinxdirective
+
+.. tab:: C++
+
+    .. doxygensnippet:: docs/snippets/ov_dynamic_shapes.cpp
+       :language: cpp
+       :fragment: [ov_dynamic_shapes:detect_dynamic]
+
+.. tab:: Python
+
+    .. doxygensnippet:: docs/snippets/ov_dynamic_shapes.py
+       :language: python
+       :fragment: [detect_dynamic]
+
+@endsphinxdirective

 If at least one dynamic dimension exists in output of the model, shape of the corresponding output tensor will be set as the result of inference call.
 Before the first inference, memory for such a tensor is not allocated and has shape `[0]`.
--- a/docs/OV_Runtime_UG/preprocessing_overview.md
+++ b/docs/OV_Runtime_UG/preprocessing_overview.md
@ -8,6 +8,7 @@

   openvino_docs_OV_Runtime_UG_Preprocessing_Details
   openvino_docs_OV_Runtime_UG_Layout_Overview
+   openvino_docs_OV_Runtime_UG_Preprocess_Usecase_save

@endsphinxdirective

--- a/docs/OV_Runtime_UG/preprocessing_usecase_save.md
+++ b/docs/OV_Runtime_UG/preprocessing_usecase_save.md
@ -0,0 +1,83 @@
+# Use Case - Integrate and Save Preprocessing Steps Into IR {#openvino_docs_OV_Runtime_UG_Preprocess_Usecase_save}
+
+## Introduction
+
+In previous sections we've covered how to add [preprocessing steps](./preprocessing_details.md) and got the overview of [Layout](./layout_overview.md) API.
+
+For many applications it is also important to minimize model's read/load time, so performing integration of preprocessing steps every time on application startup after `ov::runtime::Core::read_model` may look not convenient. In such cases, after adding of Pre- and Post-processing steps it can be useful to store new execution model to Intermediate Representation (IR, .xml format).
+
+Most part of existing preprocessing steps can also be performed via command line options using Model Optimizer tool. Refer to [Model Optimizer - Optimize Preprocessing Computation](../MO_DG/prepare_model/Additional_Optimizations.md) for details os such command line options.
+
+## Code example - saving model with preprocessing to IR
+
+In case if you have some preprocessing steps which can't be integrated into execution graph using Model Optimizer command line options (e.g. `YUV->RGB` color space conversion, Resize, etc.) it is possible to write simple code which:
+ - Reads original model (IR, ONNX, Paddle)
+ - Adds preprocessing/postprocessing steps
+ - Saves resulting model as IR (.xml/.bin)
+
+Let's consider the example, there is an original `ONNX` model which takes one `float32` input with shape `{1, 3, 224, 224}` with `RGB` channels order, with mean/scale values applied. User's application can provide `BGR` image buffer with not fixed size. Additionally, we'll also imagine that our application provides input images as batches, each batch contains 2 images. Here is how model conversion code may look like in your model preparation script
+
+- Includes / Imports
+@sphinxdirective
+
+.. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp
+         :language: cpp
+         :fragment: [ov:preprocess:save_headers]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing.py
+         :language: python
+         :fragment: [ov:preprocess:save_headers]
+
+@endsphinxdirective
+
+- Preprocessing & Saving to IR code
+@sphinxdirective
+
+.. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp
+         :language: cpp
+         :fragment: [ov:preprocess:save]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing.py
+         :language: python
+         :fragment: [ov:preprocess:save]
+
+@endsphinxdirective
+
+
+## Application code - load model to target device
+
+After this, your application's code can load saved file and don't perform preprocessing anymore. In this example we'll also enable [model caching](./Model_caching_overview.md) to minimize load time when cached model is available
+
+@sphinxdirective
+
+.. tab:: C++
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp
+         :language: cpp
+         :fragment: [ov:preprocess:save_load]
+
+.. tab:: Python
+
+      .. doxygensnippet:: docs/snippets/ov_preprocessing.py
+         :language: python
+         :fragment: [ov:preprocess:save_load]
+
+@endsphinxdirective
+
+
+## See Also
+* [Preprocessing Details](./preprocessing_details.md)
+* [Layout API overview](./layout_overview.md)
+* [Model Optimizer - Optimize Preprocessing Computation](../MO_DG/prepare_model/Additional_Optimizations.md)
+* [Model Caching Overview](./Model_caching_overview.md)
+* <code>ov::preprocess::PrePostProcessor</code> C++ class documentation
+* <code>ov::pass::Serialize</code> - pass to serialize model to XML/BIN
+* <code>ov::set_batch</code> - update batch dimension for a given model
--- a/docs/OV_Runtime_UG/supported_plugins/AutoPlugin_Debugging.md
+++ b/docs/OV_Runtime_UG/supported_plugins/AutoPlugin_Debugging.md
@ -0,0 +1,136 @@
+# Debugging Auto-Device Plugin {#openvino_docs_IE_DG_supported_plugins_AUTO_debugging}
+
+## Using Debug Log
+In case of execution problems, just like all other plugins, Auto-Device provides the user with information on exceptions and error values. If the returned data is not enough for debugging purposes, more information may be acquired by means of `ov::log::Level`.
+
+There are six levels of logs, which can be called explicitly or set via the `OPENVINO_LOG_LEVEL` environment variable (can be overwritten by `compile_model()` or `set_property()`):
+
+0 - ov::log::Level::NO  
+1 - ov::log::Level::ERR  
+2 - ov::log::Level::WARNING  
+3 - ov::log::Level::INFO  
+4 - ov::log::Level::DEBUG  
+5 - ov::log::Level::TRACE  
+
+@sphinxdirective
+.. tab:: C++ API
+
+   .. code-block:: cpp
+
+      ov::Core core;
+
+      // read a network in IR, PaddlePaddle, or ONNX format
+      std::shared_ptr<ov::Model> model = core.read_model("sample.xml");
+
+      // load a network to AUTO and set log level to debug
+      ov::CompiledModel compiled_model = core.compile_model(model, "AUTO", {{ov::log::level.name(), "LOG_DEBUG"}});
+
+      // or set log level with set_config and load network
+      core.set_property("AUTO", {{ov::log::level.name(), "LOG_DEBUG"}});
+      ov::CompiledModel compiled_model2 = core.compile_model(model, "AUTO");
+	  
+.. tab:: Python API
+
+   .. code-block:: python
+
+      from openvino.runtime import Core
+      core = Core()
+      
+      # read a network in IR, PaddlePaddle, or ONNX format
+      model = core.read_model(model_path)
+      
+      # load a network to AUTO and set log level to debug
+      compiled_model = core.compile_model(model=model, device_name="AUTO", config={"LOG_LEVEL":"LOG_DEBUG"});
+      
+      // or set log level with set_config and load network
+      ie.SetConfig(config={"LOG_LEVEL":"LOG_DEBUG"}, device_name="AUTO");
+      compiled_model = core.compile_model(model=model, device_name="AUTO");
+
+.. tab:: OS environment variable
+
+   .. code-block:: sh
+
+      When defining it via the variable, 
+      a number needs to be used instead of a log level name, e.g.:
+      
+      Linux
+      export OPENVINO_LOG_LEVEL=0
+      
+      Windows
+      set OPENVINO_LOG_LEVEL=0
+@endsphinxdirective
+
+The property returns information in the following format:
+
+@sphinxdirective
+.. code-block:: sh
+
+   [time]LOG_LEVEL[file] [PLUGIN]: message
+@endsphinxdirective
+
+in which the `LOG_LEVEL` is represented by the first letter of its name (ERROR being an exception and using its full name). For example:
+
+@sphinxdirective
+.. code-block:: sh
+
+   [17:09:36.6188]D[plugin.cpp:167] deviceName:MYRIAD, defaultDeviceID:, uniqueName:MYRIAD_
+   [17:09:36.6242]I[executable_network.cpp:181] [AUTOPLUGIN]:select device:MYRIAD
+   [17:09:36.6809]ERROR[executable_network.cpp:384] [AUTOPLUGIN] load failed, MYRIAD:[ GENERAL_ERROR ]
+@endsphinxdirective
+
+
+## Instrumentation and Tracing Technology
+
+All major performance calls of both OpenVINO™ Runtime and the AUTO plugin are instrumented with Instrumentation and Tracing Technology (ITT) APIs. To enable ITT in OpenVINO™ Runtime, compile it with the following option:
+@sphinxdirective
+.. code-block:: sh
+
+   -DENABLE_PROFILING_ITT=ON
+@endsphinxdirective
+
+For more information, you can refer to:
+* [OpenVINO profiling](https://docs.openvino.ai/latest/groupie_dev_profiling.html)
+* [Intel® VTune™ Profiler User Guide](https://www.intel.com/content/www/us/en/develop/documentation/vtune-help/top/api-support/instrumentation-and-tracing-technology-apis.html)
+
+### Analyze Code Performance on Linux
+
+You can analyze code performance using Intel® VTune™ Profiler. For more information and installation instructions refer to the [installation guide (PDF)](https://software.intel.com/content/www/us/en/develop/download/intel-vtune-install-guide-linux-os.html)
+With Intel® VTune™ Profiler installed you can configure your analysis with the following steps:
+
+1. Open Intel® VTune™ Profiler GUI on the host machine with the following command:
+@sphinxdirective
+
+.. code-block:: sh
+
+   cd /vtune install dir/intel/oneapi/vtune/2021.6.0/env
+   source vars.sh
+   vtune-gui
+@endsphinxdirective
+
+2. select **Configure Analysis**
+3. In the **where** pane, select **Local Host**
+@sphinxdirective
+.. image:: _static/images/IE_DG_supported_plugins_AUTO_debugging-img01-localhost.png
+   :align: center
+@endsphinxdirective
+4. In the **what** pane, specify your target application/script on the local system.
+@sphinxdirective
+.. image:: _static/images/IE_DG_supported_plugins_AUTO_debugging-img02-launch.png
+   :align: center
+@endsphinxdirective
+5. In the **how** pane, choose and configure the analysis type you want to perform, for example, **Hotspots Analysis**:
+identify the most time-consuming functions and drill down to see time spent on each line of source code. Focus optimization efforts on hot code for the greatest performance impact.
+@sphinxdirective
+.. image:: _static/images/IE_DG_supported_plugins_AUTO_debugging-img03-hotspots.png
+   :align: center
+@endsphinxdirective
+6.	Start the analysis by clicking the start button. When it is done, you will get a summary of the run, including top hotspots and top tasks in your application:
+@sphinxdirective
+.. image:: _static/images/IE_DG_supported_plugins_AUTO_debugging-img04-vtunesummary.png
+   :align: center
+@endsphinxdirective
+7. To analyze ITT info related to the Auto plugin, click on the **Bottom-up** tab, choose the **Task Domain/Task Type/Function/Call Stack** from the dropdown list - Auto plugin-related ITT info is under the MULTIPlugin task  domain:
+@sphinxdirective
+.. image:: _static/images/IE_DG_supported_plugins_AUTO_debugging-img05-vtunebottomup.png
+   :align: center
+@endsphinxdirective
--- a/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md
+++ b/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md
@ -16,16 +16,16 @@

 The OpenVINO Runtime provides capabilities to infer deep learning models on the following device types with corresponding plugins:

-| Plugin                                   | Device types                                                                                                                                                |
-|------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|
-|[CPU plugin](CPU.md)              |Intel&reg; Xeon&reg; with Intel® Advanced Vector Extensions 2 (Intel® AVX2), Intel® Advanced Vector Extensions 512 (Intel® AVX-512), and AVX512_BF16, Intel&reg; Core&trade; Processors with Intel&reg; AVX2, Intel&reg; Atom&reg; Processors with Intel® Streaming SIMD Extensions (Intel® SSE) |
-|[GPU plugin](GPU.md)            |Intel&reg; Processor Graphics, including Intel&reg; HD Graphics and Intel&reg; Iris&reg; Graphics                                                            |
-|[VPU plugins](VPU.md) (available in the Intel® Distribution of OpenVINO™ toolkit)            |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs                                                                                           |
-|[GNA plugin](GNA.md) (available in the Intel® Distribution of OpenVINO™ toolkit)              |Intel&reg; Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel&reg; Pentium&reg; Silver J5005 Processor, Intel&reg; Pentium&reg; Silver N5000 Processor, Intel&reg; Celeron&reg; J4005 Processor, Intel&reg; Celeron&reg; J4105 Processor, Intel&reg; Celeron&reg; Processor N4100, Intel&reg; Celeron&reg; Processor N4000, Intel&reg; Core&trade; i3-8121U Processor, Intel&reg; Core&trade; i7-1065G7 Processor, Intel&reg; Core&trade; i7-1060G7 Processor, Intel&reg; Core&trade; i5-1035G4 Processor, Intel&reg; Core&trade; i5-1035G7 Processor, Intel&reg; Core&trade; i5-1035G1 Processor, Intel&reg; Core&trade; i5-1030G7 Processor, Intel&reg; Core&trade; i5-1030G4 Processor, Intel&reg; Core&trade; i3-1005G1 Processor, Intel&reg; Core&trade; i3-1000G1 Processor, Intel&reg; Core&trade; i3-1000G4 Processor|
+| Plugin | Device types                                                                                                                                                |
+|--------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|
+|[CPU](CPU.md)              |Intel&reg; Xeon&reg; with Intel® Advanced Vector Extensions 2 (Intel® AVX2), Intel® Advanced Vector Extensions 512 (Intel® AVX-512), and AVX512_BF16, Intel&reg; Core&trade; Processors with Intel&reg; AVX2, Intel&reg; Atom&reg; Processors with Intel® Streaming SIMD Extensions (Intel® SSE) |
+|[GPU](GPU.md)            |Intel® Graphics, including Intel® HD Graphics, Intel® UHD Graphics, Intel® Iris® Graphics, Intel® Xe Graphics, Intel® Xe MAX Graphics |
+|[VPUs](VPU.md)            |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs                                                                                           |
+|[GNA](GNA.md)              |[Intel® Speech Enabling Developer Kit](https://www.intel.com/content/www/us/en/support/articles/000026156/boards-and-kits/smart-home.html); [Amazon Alexa\* Premium Far-Field Developer Kit](https://developer.amazon.com/en-US/alexa/alexa-voice-service/dev-kits/amazon-premium-voice); [Intel® Pentium® Silver Processors N5xxx, J5xxx and Intel® Celeron® Processors N4xxx, J4xxx (formerly codenamed Gemini Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/83915/gemini-lake.html): [Intel® Pentium® Silver J5005 Processor](https://ark.intel.com/content/www/us/en/ark/products/128984/intel-pentium-silver-j5005-processor-4m-cache-up-to-2-80-ghz.html), [Intel® Pentium® Silver N5000 Processor](https://ark.intel.com/content/www/us/en/ark/products/128990/intel-pentium-silver-n5000-processor-4m-cache-up-to-2-70-ghz.html), [Intel® Celeron® J4005 Processor](https://ark.intel.com/content/www/us/en/ark/products/128992/intel-celeron-j4005-processor-4m-cache-up-to-2-70-ghz.html), [Intel® Celeron® J4105 Processor](https://ark.intel.com/content/www/us/en/ark/products/128989/intel-celeron-j4105-processor-4m-cache-up-to-2-50-ghz.html), [Intel® Celeron® J4125 Processor](https://ark.intel.com/content/www/us/en/ark/products/197305/intel-celeron-processor-j4125-4m-cache-up-to-2-70-ghz.html), [Intel® Celeron® Processor N4100](https://ark.intel.com/content/www/us/en/ark/products/128983/intel-celeron-processor-n4100-4m-cache-up-to-2-40-ghz.html), [Intel® Celeron® Processor N4000](https://ark.intel.com/content/www/us/en/ark/products/128988/intel-celeron-processor-n4000-4m-cache-up-to-2-60-ghz.html); [Intel® Pentium® Processors N6xxx, J6xxx, Intel® Celeron® Processors N6xxx, J6xxx and Intel Atom® x6xxxxx (formerly codenamed Elkhart Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/128825/products-formerly-elkhart-lake.html); [Intel® Core™ Processors (formerly codenamed Cannon Lake)](https://ark.intel.com/content/www/us/en/ark/products/136863/intel-core-i3-8121u-processor-4m-cache-up-to-3-20-ghz.html); [10th Generation Intel® Core™ Processors (formerly codenamed Ice Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/74979/ice-lake.html): [Intel® Core™ i7-1065G7 Processor](https://ark.intel.com/content/www/us/en/ark/products/196597/intel-core-i71065g7-processor-8m-cache-up-to-3-90-ghz.html), [Intel® Core™ i7-1060G7 Processor](https://ark.intel.com/content/www/us/en/ark/products/197120/intel-core-i71060g7-processor-8m-cache-up-to-3-80-ghz.html), [Intel® Core™ i5-1035G4 Processor](https://ark.intel.com/content/www/us/en/ark/products/196591/intel-core-i51035g4-processor-6m-cache-up-to-3-70-ghz.html), [Intel® Core™ i5-1035G7 Processor](https://ark.intel.com/content/www/us/en/ark/products/196592/intel-core-i51035g7-processor-6m-cache-up-to-3-70-ghz.html), [Intel® Core™ i5-1035G1 Processor](https://ark.intel.com/content/www/us/en/ark/products/196603/intel-core-i51035g1-processor-6m-cache-up-to-3-60-ghz.html), [Intel® Core™ i5-1030G7 Processor](https://ark.intel.com/content/www/us/en/ark/products/197119/intel-core-i51030g7-processor-6m-cache-up-to-3-50-ghz.html), [Intel® Core™ i5-1030G4 Processor](https://ark.intel.com/content/www/us/en/ark/products/197121/intel-core-i51030g4-processor-6m-cache-up-to-3-50-ghz.html), [Intel® Core™ i3-1005G1 Processor](https://ark.intel.com/content/www/us/en/ark/products/196588/intel-core-i31005g1-processor-4m-cache-up-to-3-40-ghz.html), [Intel® Core™ i3-1000G1 Processor](https://ark.intel.com/content/www/us/en/ark/products/197122/intel-core-i31000g1-processor-4m-cache-up-to-3-20-ghz.html), [Intel® Core™ i3-1000G4 Processor](https://ark.intel.com/content/www/us/en/ark/products/197123/intel-core-i31000g4-processor-4m-cache-up-to-3-20-ghz.html); [11th Generation Intel® Core™ Processors (formerly codenamed Tiger Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/88759/tiger-lake.html); [12th Generation Intel® Core™ Processors (formerly codenamed Alder Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/147470/products-formerly-alder-lake.html)|

 OpenVINO runtime also has several execution capabilities which work on top of other devices:

-| Capability                                   | Description                                                                                                                                                |
+| Capability                               | Description                                                                                                                                                 |
 |------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|
 |[Multi-Device execution](../multi_device.md) |Multi-Device enables simultaneous inference of the same model on several devices in parallel    |
 |[Auto-Device selection](../auto_device_selection.md) |Auto-Device selection enables selecting Intel&reg; device for inference automatically |
@ -34,3 +34,21 @@ OpenVINO runtime also has several execution capabilities which work on top of ot

 Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/).

+
+## Features support matrix
+The table below demonstrates support of key features by OpenVINO device plugins.
+
+| Capability | [CPU](CPU.md) | [GPU](GPU.md) | [GNA](GNA.md) | [VPU](VPU.md) |
+| ---------- | --- | --- | --- | --- |
+| [Heterogeneous execution](../hetero_execution.md)| Yes | Yes | No | ? |
+| [Multi-device execution](../multi_device.md) | Yes | Yes | Partial | ? |
+| [Automatic batching](../automatic_batching.md) | No | Yes | No | ? |
+| [Multi-stream execution](@ref openvino_docs_optimization_guide_dldt_optimization_guide) | Yes | Yes | No | ? |
+| [Models caching](../Model_caching_overview.md) | Yes | Partial | Yes | ? |
+| [Dynamic shapes](../ov_dynamic_shapes.md) | Yes | Partial | No | ? |
+| Import/Export | Yes | No | Yes | ? |
+| [Preprocessing acceleration](../preprocessing_overview.md) | Yes | Yes | No | ? |
+| [Stateful models](../network_state_intro.md) | Yes | No | Yes | ? |
+| [Extensibility](@ref openvino_docs_Extensibility_UG_Intro) | Yes | Yes | No | ? |
+
+For more details on plugin specific feature limitation, see corresponding plugin pages.
--- a/docs/OV_Runtime_UG/supported_plugins/GNA.md
+++ b/docs/OV_Runtime_UG/supported_plugins/GNA.md
@ -1,7 +1,6 @@
 # GNA device {#openvino_docs_OV_UG_supported_plugins_GNA}
-## Introducing the GNA Plugin

-The Intel® Gaussian & Neural Accelerator is a low-power neural coprocessor for continuous inference at the edge.
+The Intel® Gaussian & Neural Accelerator (GNA) is a low-power neural coprocessor for continuous inference at the edge.

 Intel® GNA is not intended to replace typical inference devices such as the
 CPU, graphics processing unit (GPU), or vision processing unit (VPU). It is designed for offloading
@ -10,371 +9,175 @@ to save power and free CPU resources.

 The GNA plugin provides a way to run inference on Intel® GNA, as well as in the software execution mode on CPU.

-## Devices with Intel® GNA
-
-Devices with Intel® GNA support:
-
-* [Intel® Speech Enabling Developer Kit](https://www.intel.com/content/www/us/en/support/articles/000026156/boards-and-kits/smart-home.html)
-
-* [Amazon Alexa\* Premium Far-Field Developer Kit](https://developer.amazon.com/en-US/alexa/alexa-voice-service/dev-kits/amazon-premium-voice)
-
-* [Intel® Pentium® Silver Processors N5xxx, J5xxx and Intel® Celeron® Processors N4xxx, J4xxx (formerly codenamed Gemini Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/83915/gemini-lake.html):
-   - Intel® Pentium® Silver J5005 Processor
-   - Intel® Pentium® Silver N5000 Processor
-   - Intel® Celeron® J4005 Processor
-   - Intel® Celeron® J4105 Processor
-   - Intel® Celeron® J4125 Processor
-   - Intel® Celeron® Processor N4100
-   - Intel® Celeron® Processor N4000
-
-* [Intel® Pentium® Processors N6xxx, J6xxx, Intel® Celeron® Processors N6xxx, J6xxx and Intel Atom® x6xxxxx (formerly codenamed Elkhart Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/128825/products-formerly-elkhart-lake.html)
-
-* [Intel® Core™ Processors (formerly codenamed Cannon Lake)](https://ark.intel.com/content/www/us/en/ark/products/136863/intel-core-i3-8121u-processor-4m-cache-up-to-3-20-ghz.html)
-
-* [10th Generation Intel® Core™ Processors (formerly codenamed Ice Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/74979/ice-lake.html):
-
-* [11th Generation Intel® Core™ Processors (formerly codenamed Tiger Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/88759/tiger-lake.html).
-
-* [12th Generation Intel® Core™ Processors (formerly codenamed Alder Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/147470/products-formerly-alder-lake.html).
-
-> **NOTE**: On platforms where Intel® GNA is not enabled in the BIOS, the driver cannot be installed, so the GNA plugin uses the software emulation mode only.
+For more details on how to configure a machine to use GNA plugin, see [GNA configuration page](@ref openvino_docs_install_guides_configurations_for_intel_gna).

 ## Intel® GNA Generational Differences

-The first and second versions of Intel® GNA found in 10th and 11th generation Intel® Core™ Processors may be considered to be functionally equivalent.  Intel® GNA 2.0 provided performance improvement with respect to Intel® GNA 1.0.  Starting with 12th Generation Intel® Core™ Processors (formerly codenamed Alder Lake), support for Intel® GNA 3.0 features is being added.
+The first (1.0) and second (2.0) versions of Intel® GNA found in 10th and 11th generation Intel® Core™ Processors may be considered to be functionally equivalent.  Intel® GNA 2.0 provided performance improvement with respect to Intel® GNA 1.0.  Starting with 12th Generation Intel® Core™ Processors (formerly codenamed Alder Lake), support for Intel® GNA 3.0 features is being added.

-In the rest of this documentation, "GNA 2.0" refers to Intel® GNA hardware delivered on 10th and 11th generation Intel® Core™ processors, and the term "GNA 3.0" will be used to refer to GNA hardware delivered on 12th generation Intel® Core™ processors.
+In the rest of this documentation, "GNA 2.0" refers to Intel® GNA hardware delivered on 10th and 11th generation Intel® Core™ processors, and the term "GNA 3.0" refers to GNA hardware delivered on 12th generation Intel® Core™ processors.

-Initially, a limited subset of Intel® GNA 3.0 features are added to the previous feature set including the following:
+### Intel® GNA Forward and Backward Compatibility

-* **2D VALID Convolution With Small 2D Kernels:**  Two-dimensional convolutions with the following kernel dimensions [H,W] are supported:  [1,1], [2,2], [3,3], [2,1], [3,1], [4,1], [5,1], [6,1], [7,1], [1,2], or [1,3].  Input tensor dimensions are limited to [1,8,16,16] <= [N,C,H,W] <= [1,120,384,240].  Up to 384 channels C may be used with a subset of kernel sizes (see table below).  Up to 256 kernels (output channels) are supported.  Pooling is limited to pool shapes of [1,1], [2,2], or [3,3].  Not all combinations of kernel shape and input tensor shape are supported (see the tables below for exact limitations).
-
-The tables below show that the exact limitation on the input tensor width W depends on the number of input channels C (indicated as Ci below) and the kernel shape.  There is much more freedom to choose the input tensor height and number of output channels.
-
-## Initially Supported Subset of Intel® GNA 2D Convolutions
-
-The following tables provide a more explicit representation of the Intel(R) GNA 3.0 2D convolution operations initially supported.  The limits depend strongly on number of input tensor channels (Ci) and the input tensor width (W).  Other factors are kernel height (KH), kernel width (KW), pool height (PH), pool width (PW), horizontal pool step (SH), and vertical pool step (PW).  For example, the first table shows that for a 3x3 kernel with max pooling, only square pools are supported, and W is limited to 87 when there are 64 input channels.
-
-**Table of Maximum Input Tensor Widths (W) vs. Rest of Parameters** (Input and Kernel Precision: 2 bytes)
-
-|KH|KW|PH|PW|SH|SW|H|W<br>Ci=8<br>Co=256|W<br>Ci=16<br>Co=256|W<br>Ci=32<br>Co=256|W<br>Ci=64<br>Co=256|W<br>Ci=128<br>Co=256|W<br>Ci=256<br>Co=256|W<br>Ci=384<br>Co=256|
-|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|
-|1|1|1|1|1|1|128|240|240|240|240|240|240|170|
-|1|1|1|1|1|1|256|240|240|240|240|240|128|85|
-|1|1|1|1|1|1|384|240|240|240|240|170|85|56|
-|1|2|1|1|1|1|128|240|240|240|240|   |  |  |
-|1|2|1|1|1|1|256|240|240|240|240|   |  |  |
-|1|2|1|1|1|1|384|240|240|240|240|   |  |  |
-|1|3|1|1|1|1|128|240|240|240|240|   |  |  |
-|1|3|1|1|1|1|256|240|240|240|240|   |  |  |
-|1|3|1|1|1|1|384|240|240|240|240|   |  |  |
-|2|1|1|1|1|1|128|192|192|192|192|192|192|128|
-|2|1|1|1|1|1|256|192|192|192|192|192|128|85|
-|2|1|1|1|1|1|384|192|192|192|192|170|85|56|
-|2|2|1|1|1|1|128|193|193|193|193|   |  |  |
-|2|2|1|1|1|1|256|193|193|193|193|   |  |  |
-|2|2|1|1|1|1|384|193|193|193|193|   |  |  |
-|2|2|2|2|1|1|128|193|193|192|179|   |  |  |
-|2|2|2|2|1|1|256|193|193|192|179|   |  |  |
-|2|2|2|2|1|1|384|193|193|192|179|   |  |  |
-|2|2|2|2|1|2|128|193|193|192|179|   |  |  |
-|2|2|2|2|1|2|256|193|193|192|179|   |  |  |
-|2|2|2|2|1|2|384|193|193|192|179|   |  |  |
-|2|2|2|2|2|1|128|193|193|192|179|   |  |  |
-|2|2|2|2|2|1|256|193|193|192|179|   |  |  |
-|2|2|2|2|2|1|384|193|193|192|179|   |  |  |
-|2|2|2|2|2|2|128|193|193|192|179|   |  |  |
-|2|2|2|2|2|2|256|193|193|192|179|   |  |  |
-|2|2|2|2|2|2|384|193|193|192|179|   |  |  |
-|3|1|1|1|1|1|128|128|128|128|128|128|85|42|
-|3|1|1|1|1|1|256|128|128|128|128|128|85|42|
-|3|1|1|1|1|1|384|128|128|128|128|128|85|42|
-|3|3|1|1|1|1|128|130|130|130|87|   |  |  |
-|3|3|1|1|1|1|256|130|130|130|87|   |  |  |
-|3|3|1|1|1|1|384|130|130|130|87|   |  |  |
-|3|3|2|2|1|1|128|130|130|126|87|   |  |  |
-|3|3|2|2|1|1|256|130|130|126|87|   |  |  |
-|3|3|2|2|1|1|384|130|130|126|87|   |  |  |
-|3|3|2|2|1|2|128|130|130|126|87|   |  |  |
-|3|3|2|2|1|2|256|130|130|126|87|   |  |  |
-|3|3|2|2|1|2|384|130|130|126|87|   |  |  |
-|3|3|2|2|2|1|128|130|130|126|87|   |  |  |
-|3|3|2|2|2|1|256|130|130|126|87|   |  |  |
-|3|3|2|2|2|1|384|130|130|126|87|   |  |  |
-|3|3|2|2|2|2|128|130|130|126|87|   |  |  |
-|3|3|2|2|2|2|256|130|130|126|87|   |  |  |
-|3|3|2|2|2|2|384|130|130|126|87|   |  |  |
-|3|3|3|3|1|1|128|130|128|118|87|   |  |  |
-|3|3|3|3|1|1|256|130|128|118|87|   |  |  |
-|3|3|3|3|1|1|384|130|128|118|87|   |  |  |
-|3|3|3|3|1|2|128|130|128|118|87|   |  |  |
-|3|3|3|3|1|2|256|130|128|118|87|   |  |  |
-|3|3|3|3|1|2|384|130|128|118|87|   |  |  |
-|3|3|3|3|1|3|128|130|128|118|87|   |  |  |
-|3|3|3|3|1|3|256|130|128|118|87|   |  |  |
-|3|3|3|3|1|3|384|130|128|118|87|   |  |  |
-|3|3|3|3|2|1|128|130|128|118|87|   |  |  |
-|3|3|3|3|2|1|256|130|128|118|87|   |  |  |
-|3|3|3|3|2|1|384|130|128|118|87|   |  |  |
-|3|3|3|3|2|2|128|130|128|118|87|   |  |  |
-|3|3|3|3|2|2|256|130|128|118|87|   |  |  |
-|3|3|3|3|2|2|384|130|128|118|87|   |  |  |
-|3|3|3|3|2|3|128|130|128|118|87|   |  |  |
-|3|3|3|3|2|3|256|130|128|118|87|   |  |  |
-|3|3|3|3|2|3|384|130|128|118|87|   |  |  |
-|3|3|3|3|3|1|128|130|128|118|87|   |  |  |
-|3|3|3|3|3|1|256|130|128|118|87|   |  |  |
-|3|3|3|3|3|1|384|130|128|118|87|   |  |  |
-|3|3|3|3|3|2|128|130|128|118|87|   |  |  |
-|3|3|3|3|3|2|256|130|128|118|87|   |  |  |
-|3|3|3|3|3|2|384|130|128|118|87|   |  |  |
-|3|3|3|3|3|3|128|130|128|118|87|   |  |  |
-|3|3|3|3|3|3|256|130|128|118|87|   |  |  |
-|3|3|3|3|3|3|384|130|128|118|87|   |  |  |
-|4|1|1|1|1|1|128|96|96|96|96|96|64|32|
-|4|1|1|1|1|1|256|96|96|96|96|96|64|32|
-|4|1|1|1|1|1|384|96|96|96|96|96|64|32|
-|5|1|1|1|1|1|128|76|76|76|76|51|25|  |
-|5|1|1|1|1|1|256|76|76|76|76|51|25|  |
-|5|1|1|1|1|1|384|76|76|76|76|51|25|  |
-|6|1|1|1|1|1|128|64|64|64|64|42|21|  |
-|6|1|1|1|1|1|256|64|64|64|64|42|21|  |
-|6|1|1|1|1|1|384|64|64|64|64|42|21|  |
-|7|1|1|1|1|1|128|54|54|54|54|36|  |  |
-|7|1|1|1|1|1|256|54|54|54|54|36|  |  |
-|7|1|1|1|1|1|384|54|54|54|54|36|  |  |
-
-**Table of Maximum Input Tensor Widths (W) vs. Rest of Parameters** (Input and Kernel Precision: 1 bytes)
-
-|KH|KW|PH|PW|SH|SW|H|W<br>Ci=8<br>Co=256|W<br>Ci=16<br>Co=256|W<br>Ci=32<br>Co=256|W<br>Ci=64<br>Co=256|W<br>Ci=128<br>Co=256|W<br>Ci=256<br>Co=256|W<br>Ci=384<br>Co=256|
-|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|
-|1|1|1|1|1|1|128|240|240|240|240|240|240|240|
-|1|1|1|1|1|1|256|240|240|240|240|240|240|170|
-|1|1|1|1|1|1|384|240|240|240|240|240|170|113|
-|1|2|1|1|1|1|128|240|240|240|240|240|240|240|
-|1|2|1|1|1|1|256|240|240|240|240|240|240|170|
-|1|2|1|1|1|1|384|240|240|240|240|240|170|113|
-|1|3|1|1|1|1|128|240|240|240|240|240|   |   |
-|1|3|1|1|1|1|256|240|240|240|240|240|   |   |
-|1|3|1|1|1|1|384|240|240|240|240|240|   |   |
-|2|1|1|1|1|1|128|192|192|192|192|192|192|192|
-|2|1|1|1|1|1|256|192|192|192|192|192|192|170|
-|2|1|1|1|1|1|384|192|192|192|192|192|170|113|
-|2|2|1|1|1|1|128|193|193|193|193|193|193|129|
-|2|2|1|1|1|1|256|193|193|193|193|193|193|129|
-|2|2|1|1|1|1|384|193|193|193|193|193|170|113|
-|3|1|1|1|1|1|128|128|128|128|128|128|128|85|
-|3|1|1|1|1|1|256|128|128|128|128|128|128|85|
-|3|1|1|1|1|1|384|128|128|128|128|128|128|85|
-|3|3|1|1|1|1|128|130|130|130|130|87 |   |  |
-|3|3|1|1|1|1|256|130|130|130|130|87 |   |  |
-|3|3|1|1|1|1|384|130|130|130|130|87 |   |  |
-|4|1|1|1|1|1|128|96|96|96|96|96|96|64|
-|4|1|1|1|1|1|256|96|96|96|96|96|96|64|
-|4|1|1|1|1|1|384|96|96|96|96|96|96|64|
-|5|1|1|1|1|1|128|76|76|76|76|76|51|51|
-|5|1|1|1|1|1|256|76|76|76|76|76|51|51|
-|5|1|1|1|1|1|384|76|76|76|76|76|51|51|
-|6|1|1|1|1|1|128|64|64|64|64|64|42|21|
-|6|1|1|1|1|1|256|64|64|64|64|64|42|21|
-|6|1|1|1|1|1|384|64|64|64|64|64|42|21|
-|7|1|1|1|1|1|128|54|54|54|54|54|36|18|
-|7|1|1|1|1|1|256|54|54|54|54|54|36|18|
-|7|1|1|1|1|1|384|54|54|54|54|54|36|18|
-
-
-> **NOTE**:  The above limitations only apply to the new hardware 2D convolution operation.  When possible, the Intel® GNA plugin graph compiler flattens 2D convolutions so that the second generation Intel® GNA 1D convolution operations (without these limitations) may be used.  The plugin will also flatten 2D convolutions regardless of the sizes  if GNA 2.0 compilation target is selected (see below).
-
-## Intel® GNA Forward and Backward Compatibility
-
-In the general case, there is no guarantee that a model compiled for GNA 2.0 will run on GNA 3.0, or vice versa.
-
-However, in most cases, networks compiled for GNA 2.0 will run as expected on GNA 3.0, although the performance may be worse compared to the case when a network is compiled specifically for the latter.  The exception is networks with convolutions with the number of filters greater than 8192 (see the <a href="#models-and-layers-limitations">Models and Layers Limitations</a> section).
-
-Networks compiled for GNA 3.0 should run on GNA 2.0 with incompatible layers emulated on CPU.
-
-You can use the following options `KEY_GNA_EXEC_TARGET` and `KEY_GNA_COMPILE_TARGET` options  to check interoperability (see the <a href="#supported-configuration-parameters">Supported Configuration Parameters</a> section below):
+When you run a model using the GNA plugin, it is compiled internally for the specific hardware target. It is possible to export compiled model using <a href="#import-export">Import/Export</a> functionality to use it later, but in the general case, there is no guarantee that a model compiled and exported for GNA 2.0 runs on GNA 3.0, or vice versa.

@sphinxdirective
-.. tab:: C++

-   ``KEY_GNA_EXEC_TARGET``,  ``KEY_GNA_COMPILE_TARGET``
+.. csv-table:: Interoperability of compile target and hardware target
+   :header: "Hardware", "Compile target 2.0", "Compile target 3.0"

-.. tab:: Python
-
-   ``GNA_EXEC_TARGET``,  ``GNA_COMPILE_TARGET``
+   "GNA 2.0", "Supported", "Not supported (incompatible layers emulated on CPU)"
+   "GNA 3.0", "Partially supported", "Supported"

@endsphinxdirective

-## Drivers and Dependencies
+> **NOTE**: In most cases, networks compiled for GNA 2.0 runs as expected on GNA 3.0, although the performance may be worse compared to the case when a network is compiled specifically for the latter.  The exception is networks with convolutions with the number of filters greater than 8192 (see the <a href="#models-and-operations-limitations">Models and Operations Limitations</a> section).

-Intel® GNA hardware requires a driver to be installed on the system.
+For optimal work with POT quantized models which includes 2D convolutions on GNA 3.0 hardware, the <a href="#support-for-2d-convolutions-using-pot">following requirements</a> should be satisfied.

-* Linux\* OS:
-[Download Intel® GNA driver for Ubuntu Linux 18.04.3 LTS (with HWE Kernel version 5.4+)](https://storage.openvinotoolkit.org/drivers/gna/)
+Choose a compile target depending on the priority: cross-platform execution, performance, memory, or power optimization..

-* Windows\* OS:
-Intel® GNA driver for Windows is available through Windows Update\*
+Use the following properties to check interoperability in your application: `ov::intel_gna::execution_target` and `ov::intel_gna::compile_target`

-## <a name="models-and-layers-limitations">Models and Layers Limitations</a>
+[Speech C++ Sample](@ref openvino_inference_engine_samples_speech_sample_README) can be used for experiments (see `-exec_target` and `-compile_target` command line options).

-Because of specifics of hardware architecture, Intel® GNA supports a limited set of layers, their kinds and combinations.
-For example, you should not expect the GNA Plugin to be able to run computer vision models, except those specifically adapted for the GNA Plugin, because the plugin does not fully support 2D convolutions.
+## Software emulation mode

-For the list of supported layers, see the **GNA** column of the **Supported Layers** section in [Supported Devices](Supported_Devices.md).
+On platforms without GNA hardware support plugin chooses software emulation mode by default. It means, model runs even if you do not have GNA HW within your platform.
+GNA plugin enables you to switch the execution between software emulation mode and hardware execution mode after the model is loaded.
+For details, see description of the `ov::intel_gna::execution_mode` property.

-Limitations include:
+## Recovery from Interruption by High-Priority Windows Audio Processes\*

- Only 1D convolutions are natively supported on the HW prior to GNA 3.0; 2D convolutions have specific limitations (see the table above).
- The number of output channels for convolutions must be a multiple of 4.
- The maximum number of filters is 65532 for GNA 2.0 and 8192 for GNA 3.0.
- Transpose layer support is limited to the cases where no data reordering is needed or when reordering is happening for two dimensions, at least one of which is not greater than 8.
- Splits and concatenations are supported for continuous portions of memory (e.g., split of 1,2,3,4 to 1,1,3,4 and 1,1,3,4 or concats of 1,2,3,4 and 1,2,3,5 to 2,2,3,4).
- For Multiply, Add and Subtract layers, auto broadcasting is only supported for constant inputs.
+GNA is designed for real-time workloads such as noise reduction.
+For such workloads, processing should be time constrained, otherwise extra delays may cause undesired effects such as
+*audio glitches*. To make sure that processing can satisfy real-time requirements, the GNA driver provides a Quality of Service
+(QoS) mechanism, which interrupts requests that might cause high-priority Windows audio processes to miss
+the schedule, thereby causing long running GNA tasks to terminate early.

-### Support for 2D Convolutions in Previous Generations of GNA Hardware
+To prepare the applications correctly, use Automatic QoS Feature described below.

-The Intel® GNA 1.0 and 2.0 hardware natively supports only 1D convolutions.
+### Automatic QoS Feature on Windows*

-However, 2D convolutions can be mapped to 1D when a convolution kernel moves in a single direction. GNA Plugin performs such a transformation for Kaldi `nnet1` convolution. From this perspective, the Intel® GNA hardware convolution operation accepts an `NHWC` input and produces an `NHWC` output. Because OpenVINO™ only supports the `NCHW` layout, you may need to insert `Transpose` layers before or after convolutions.
-
-For example, the Kaldi model optimizer inserts such a transpose after convolution for the [rm_cnn4a network](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/rm_cnn4a_smbr/). This `Transpose` layer is automatically removed by the GNA Plugin, because the Intel® GNA hardware convolution layer already produces the required `NHWC` result.
-
-## Operation Precision
-
-Intel® GNA essentially operates in the low-precision mode, which represents a mix of 8-bit (`I8`), 16-bit (`I16`), and 32-bit (`I32`) integer computations. Outputs calculated using a reduced integer precision are different from the scores calculated using the floating point format, for example, `FP32` outputs calculated on CPU using the OpenVINO [CPU device](CPU.md).
-
-Unlike other plugins supporting low-precision execution, the GNA plugin can calculate quantization factors at the model loading time, so you can run a model without calibration using the [Post-Training Optimization Tool](@ref pot_README).
-However, this mode may not provide satisfactory accuracy because the internal quantization algorithm is based on heuristics which may or may not be efficient, depending on the model and dynamic range of input data.
-
-Starting with 2021.4 release of OpenVINO, GNA plugin users are encouraged to use the [POT API Usage sample for GNA](@ref pot_sample_speech_README) to get a model with quantization hints based on statistics for the provided dataset.
-
-## <a name="execution-modes">Execution Modes</a>
+Starting with 2021.4.1 release of OpenVINO and 03.00.00.1363 version of Windows* GNA driver, a new execution mode `ov::intel_gna::ExecutionMode::HW_WITH_SW_FBACK` is introduced
+to assure that workloads satisfy real-time execution. In this mode, the GNA driver automatically falls back on CPU for a particular infer request
+if the HW queue is not empty, so there is no need for explicitly switching between GNA and CPU.

@sphinxdirective
 .. tab:: C++

-   ============================  ==============================================================================================================================================
-   Mode                          Description
-   ============================  ==============================================================================================================================================
-   ``KEY_GNA_AUTO``              Uses Intel® GNA if available, otherwise uses software execution mode on CPU.
-   ``KEY_GNA_HW``                Uses Intel® GNA if available, otherwise raises an error.
-   ``KEY_GNA_SW``                *Deprecated*. Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA, but not in the bit-exact mode.
-   ``KEY_GNA_SW_EXACT``          Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA in the bit-exact mode.
-   ``KEY_GNA_HW_WITH_SW_FBACK``  Uses Intel® GNA if available, otherwise raises an error. If the hardware queue is not empty, automatically falls back to CPU in the bit-exact mode.
-   ``KEY_GNA_SW_FP32``           Executes the GNA-compiled graph on CPU but substitutes parameters and calculations from low precision to floating point (``FP32``).
-   ============================  ==============================================================================================================================================
+   .. doxygensnippet:: docs/snippets/gna/configure.cpp
+      :language: cpp
+      :fragment: [include]
+
+   .. doxygensnippet:: docs/snippets/gna/configure.cpp
+      :language: cpp
+      :fragment: [ov_gna_exec_mode_hw_with_sw_fback]

 .. tab:: Python

-   ========================  ==============================================================================================================================================
-   Mode                      Description
-   ========================  ==============================================================================================================================================
-   ``GNA_AUTO``              Uses Intel® GNA if available, otherwise uses software execution mode on CPU.
-   ``GNA_HW``                Uses Intel® GNA if available, otherwise raises an error.
-   ``GNA_SW``                *Deprecated*. Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA, but not in the bit-exact mode.
-   ``GNA_SW_EXACT``          Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA in the bit-exact mode.
-   ``GNA_HW_WITH_SW_FBACK``  Uses Intel® GNA if available, otherwise raises an error. If the hardware queue is not empty, automatically falls back to CPU in the bit-exact mode.
-   ``GNA_SW_FP32``           Executes the GNA-compiled graph on CPU but substitutes parameters and calculations from low precision to floating point (``FP32``).
-   ========================  ==============================================================================================================================================
+   .. doxygensnippet:: docs/snippets/gna/configure.py
+      :language: python
+      :fragment: [import]
+
+   .. doxygensnippet:: docs/snippets/gna/configure.py
+      :language: python
+      :fragment: [ov_gna_exec_mode_hw_with_sw_fback]

@endsphinxdirective

-## <a name="supported-configuration-parameters">Supported Configuration Parameters</a>
+> **NOTE**: Due to the "first come - first served" nature of GNA driver and the QoS feature, this mode may lead to increased CPU consumption
+if there are several clients using GNA simultaneously.
+Even a lightweight competing infer request which has not been cleared at the time when the user's GNA client process makes its request,
+can cause the user's request to be executed on CPU, thereby unnecessarily increasing CPU utilization and power.

-The plugin supports the configuration parameters listed below. The parameter names correspond to their usage through API keys, such as ``GNAConfigParams::KEY_GNA_DEVICE_MODE`` or ``PluginConfigParams::KEY_PERF_COUNT`` in C++ and ``GNA_DEVICE_MODE`` or ``PERF_COUNT`` in Python.
+## Supported inference data types
+
+Intel® GNA essentially operates in the low-precision mode which represents a mix of 8-bit (`i8`), 16-bit (`i16`), and 32-bit (`i32`) integer computations.
+
+GNA plugin users are encouraged to use the [Post-Training Optimization Tool](@ref pot_README) to get a model with quantization hints based on statistics for the provided dataset.
+
+Unlike other plugins supporting low-precision execution, the GNA plugin can calculate quantization factors at the model loading time, so you can run a model without calibration. However, this mode may not provide satisfactory accuracy because the internal quantization algorithm is based on heuristics which may or may not be efficient, depending on the model and dynamic range of input data and this mode is going to be deprecated soon.
+
+GNA plugin supports the following data types as inference precision of internal primitives
+* Quantized data types:
+  - i16
+  - i8
+
+[Hello Query Device C++ Sample](@ref openvino_inference_engine_samples_hello_query_device_README) can be used to print out supported data types for all detected devices.
+
+[POT API Usage sample for GNA](@ref pot_sample_speech_README) demonstrates how a model can be quantized for GNA using POT API in 2 modes:
+* Accuracy (i16 weights)
+* Performance (i8 weights)
+
+For POT quantized model `ov::hint::inference_precision` property has no effect except cases described in <a href="#support-for-2d-convolutions-using-pot">Support for 2D Convolutions using POT</a>.
+
+## Supported features
+
+### Models caching
+Cache for GNA plugin may be enabled via common OpenVINO `ov::cache_dir` property due to import/export functionality support (see below).
+
+See [Model caching overview page](@ref openvino_docs_IE_DG_Model_caching_overview) for more details.
+
+### Import/Export
+
+The GNA plugin supports import/export capability which helps to significantly decrease first inference time. The model compile target is the same as the execution target by default. The default value for the execution target corresponds to available hardware, or latest hardware version supported by the plugin (i.e., GNA 3.0) if there is no GNA HW in the system.
+
+If you are willing to export a model for a specific version of GNA HW, please use the `ov::intel_gna::compile_target` property and then export the model:

@sphinxdirective
 .. tab:: C++

-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | Parameter Name                   | Values                  | Default Value | Description                                                     |
-   +==================================+=========================+===============+=================================================================+
-   | ``KEY_GNA_EXEC_TARGET``          | ``TARGET_2_0``,         | *see below*   | Defines the execution target.                                   |
-   |                                  | ``TARGET_3_0``          |               |                                                                 |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``KEY_GNA_COMPILE_TARGET``       | ``TARGET_2_0``,         | *see below*   | Defines the compilation target.                                 |
-   |                                  | ``TARGET_3_0``          |               |                                                                 |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``KEY_GNA_COMPACT_MODE``         | ``YES``, ``NO``         | ``NO``        | Enables I/O buffers reuse to save space.                        |
-   |                                  |                         |               | Makes debugging harder.                                         |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``KEY_GNA_SCALE_FACTOR``         | FP32 number             | 1.0           | Sets the scale factor to use for input quantization.            |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``KEY_GNA_DEVICE_MODE``          | ``GNA_AUTO``,           | ``GNA_AUTO``  | One of the modes described                                      |
-   |                                  | ``GNA_HW``,             |               | in `Execution Modes <#execution-modes>`_.                       |
-   |                                  | ``GNA_HW_WITH_SW_FBACK``|               |                                                                 |
-   |                                  | ``GNA_SW_EXACT``,       |               |                                                                 |
-   |                                  | ``GNA_SW_FP32``         |               |                                                                 |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``KEY_GNA_FIRMWARE_MODEL_IMAGE`` | ``std::string``         | ``""``        | Sets the name for the embedded model binary dump file.          |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``KEY_GNA_PRECISION``            | ``I16``, ``I8``         | ``I16``       | Sets the preferred integer weight resolution for quantization   |
-   |                                  |                         |               | (ignored for models produced using POT).                        |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``KEY_PERF_COUNT``               | ``YES``, ``NO``         | ``NO``        | Turns on performance counters reporting.                        |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-
-   The parameters are passed as ``std::map<std::string, std::string>`` on ``InferenceEngine::Core::LoadNetwork`` or ``InferenceEngine::SetConfig``.
-
-   Normally, you do not need to select the execution target (``KEY_GNA_EXEC_TARGET``) and compilation target (``KEY_GNA_COMPILE_TARGET``). The default value for the execution target corresponds to available hardware, or latest hardware version supported by the plugin (i.e., GNA 3.0) if there is no GNA HW in the system. The compilation target is the same as the execution target by default. However, you may want to change the targets, for example, if you want to check how a model compiled for one generation would behave on the other generation (using the software emulation mode), or if you are willing to export a model for a specific version of GNA HW.
-
-   You can change the ``KEY_GNA_DEVICE_MODE`` parameter at run time using ``InferenceEngine::ExecutableNetwork::SetConfig``, which works for any value excluding ``GNA_SW_FP32``. This enables you to switch the execution between software emulation mode and hardware execution mode after the model is loaded.
+   .. doxygensnippet:: docs/snippets/gna/import_export.cpp
+      :language: cpp
+      :fragment: [ov_gna_export]

 .. tab:: Python

-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | Parameter Name                   | Values                  | Default Value | Description                                                     |
-   +==================================+=========================+===============+=================================================================+
-   | ``GNA_EXEC_TARGET``              | ``TARGET_2_0``,         | _see below_   | Defines the execution target.                                   |
-   |                                  | ``TARGET_3_0``          |               |                                                                 |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``GNA_COMPILE_TARGET``           | ``TARGET_2_0``,         | _see below_   | Defines the compilation target.                                 |
-   |                                  | ``TARGET_3_0``          |               |                                                                 |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``GNA_COMPACT_MODE``             | ``YES``, ``NO``         | ``NO``        | Enables I/O buffers reuse to save space.                        |
-   |                                  |                         |               | Makes debugging harder.                                         |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``GNA_SCALE_FACTOR``             | FP32 number             | 1.0           | Sets the scale factor to use for input quantization.            |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``KEY_GNA_DEVICE_MODE``          | ``GNA_AUTO``,           | ``GNA_AUTO``  | One of the modes described                                      |
-   |                                  | ``GNA_HW``,             |               | in `Execution Modes <#execution-modes>`_.                       |
-   |                                  | ``GNA_HW_WITH_SW_FBACK``|               |                                                                 |
-   |                                  | ``GNA_SW_EXACT``,       |               |                                                                 |
-   |                                  | ``GNA_SW_FP32``         |               |                                                                 |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``GNA_FIRMWARE_MODEL_IMAGE``     | ``string``              | ``""``        | Sets the name for the embedded model binary dump file.          |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``GNA_PRECISION``                | ``I16``, ``I8``         | ``I16``       | Sets the preferred integer weight resolution for quantization   |
-   |                                  |                         |               | (ignored for models produced using POT).                        |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``PERF_COUNT``                   | ``YES``, ``NO``         | ``NO``        | Turns on performance counters reporting.                        |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-
-   The parameters are passed as strings to `IECore.load_network <api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.load_network>`_.
-
-   Normally, you do not need to select the execution target (``GNA_EXEC_TARGET``) and compilation target (``GNA_COMPILE_TARGET``). The default value for the execution target corresponds to available hardware, or latest hardware version supported by the plugin (i.e., GNA 3.0) if there is no GNA HW in the system. The compilation target is the same as the execution target by default. However, you may want to change the targets, for example, if you want to check how a model compiled for one generation would behave on the other generation (using the SW emulation mode), or if you are willing to export a model for a specific version of GNA HW.
-
-   You can change the ``GNA_DEVICE_MODE`` parameter at run time by sending a configuration dict to the `IECore.load_network <api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.load_network>`_ call, which works for any value excluding ``GNA_SW_FP32``. This enables you to switch the execution between software emulation mode and hardware execution mode after the model is loaded.
+   .. doxygensnippet:: docs/snippets/gna/import_export.py
+      :language: python
+      :fragment: [ov_gna_export]

@endsphinxdirective
-## How to Interpret Performance Counters

-With the following methods, you can collect performance counters that provides various performance data about execution on GNA:
+Import model:

@sphinxdirective
 .. tab:: C++

-   ``InferenceEngine::InferRequest::GetPerformanceCounts``
-
-   The returned map stores a counter description as a key, and a counter value in the ``realTime_uSec`` field of the ``InferenceEngineProfileInfo`` structure.
-
+   .. doxygensnippet:: docs/snippets/gna/import_export.cpp
+      :language: cpp
+      :fragment: [ov_gna_import]

 .. tab:: Python

-   ``openvino.inference_engine.InferRequest.get_perf_counts``
+   .. doxygensnippet:: docs/snippets/gna/import_export.py
+      :language: python
+      :fragment: [ov_gna_import]

-   The returned map stores a counter description as a key, and a counter value in the ``real_time`` field.
+@endsphinxdirective
+
+[Compile Tool](@ref openvino_inference_engine_tools_compile_tool_README) or [Speech C++ Sample](@ref openvino_inference_engine_samples_speech_sample_README) can be used to compile model.
+
+### Stateful models
+GNA plugin natively supports stateful models.
+
+Please refer to [Stateful models] (@ref openvino_docs_IE_DG_network_state_intro) for more details about such models.
+
+> **NOTE**: Typically, GNA is used in streaming scenarios, when minimizing the latency is important. Taking into account that POT does not support the `TensorIterator` operation, the recommendation is to use the `--transform` option of the Model Optimizer to apply `LowLatency2` transformation when converting an original model.
+
+### Profiling
+The GNA plugin allows to turn on profiling using the `ov::enable_profiling` property.
+With the following methods, you can collect profiling information that provides various performance data about execution on GNA:
+
+@sphinxdirective
+.. tab:: C++
+
+   ``ov::InferRequest::get_profiling_info``
+
+.. tab:: Python
+
+   ``openvino.runtime.InferRequest.get_profiling_info``

@endsphinxdirective

@ -385,109 +188,154 @@ seconds = cycles / frequency
 ```

 Refer to the table below to learn about the frequency of Intel® GNA inside a particular processor:
-Processor | Frequency of Intel® GNA
---|---
-Intel® Core™ processors| 400MHz
-Intel® processors formerly codenamed Elkhart Lake | 200MHz
-Intel® processors formerly codenamed Gemini Lake | 200MHz
+
+@sphinxdirective
+
+.. csv-table:: Frequency of Intel® GNA inside a particular processor
+   :header: "Processor", "Frequency of Intel® GNA, MHz"
+
+   "Intel® Core™ processors", 400
+   "Intel® processors formerly codenamed Elkhart Lake", 200
+   "Intel® processors formerly codenamed Gemini Lake", 200
+
+@endsphinxdirective

 Performance counters provided for the time being:

-* Scoring request performance results
+* Inference request performance results
 	* Number of total cycles spent on scoring in hardware including compute and memory stall cycles
 	* Number of stall cycles spent in hardware

-## Network Batch Size
+##  Supported properties
+The plugin supports the properties listed below.

-Intel® GNA plugin supports the processing of context-windowed speech frames in batches of 1-8 frames in one
-input blob using the following methods:
+### Read-write properties
+The following parameters must be set before model compilation in order to take effect or passed as additional argument to `ov::Core::compile_model()`:
+
+- ov::cache_dir
+- ov::enable_profiling
+- ov::hint::inference_precision
+- ov::hint::num_requests
+- ov::intel_gna::compile_target
+- ov::intel_gna::firmware_model_image_path
+- ov::intel_gna::execution_target
+- ov::intel_gna::pwl_design_algorithm
+- ov::intel_gna::pwl_max_error_percent
+- ov::intel_gna::scale_factors_per_input
+
+These parameters can be changed after model compilation `ov::CompiledModel::set_property`:
+- ov::hint::performance_mode
+- ov::intel_gna::execution_mode
+- ov::log::level
+
+### Read-only properties
+- ov::available_devices
+- ov::device::capabilities
+- ov::device::full_name
+- ov::intel_gna::library_full_version
+- ov::optimal_number_of_infer_requests
+- ov::range_for_async_infer_requests
+- ov::supported_properties
+
+## Limitations
+
+### Models and Operations Limitations
+
+Because of specifics of hardware architecture, Intel® GNA supports a limited set of operations, their kinds and combinations.
+For example, you should not expect the GNA Plugin to be able to run computer vision models, except those specifically adapted for the GNA Plugin, because the plugin does not fully support 2D convolutions.
+
+Limitations include:
+
+- Only 1D convolutions are natively supported on the HW prior to GNA 3.0; 2D convolutions have specific limitations (see the table below).
+- The number of output channels for convolutions must be a multiple of 4.
+- The maximum number of filters is 65532 for GNA 2.0 and 8192 for GNA 3.0.
+- Transpose layer support is limited to the cases where no data reordering is needed or when reordering is happening for two dimensions, at least one of which is not greater than 8.
+- Splits and concatenations are supported for continuous portions of memory (e.g., split of 1,2,3,4 to 1,1,3,4 and 1,1,3,4 or concats of 1,2,3,4 and 1,2,3,5 to 2,2,3,4).
+- For Multiply, Add and Subtract layers, auto broadcasting is only supported for constant inputs.
+
+#### Support for 2D Convolutions
+
+The Intel® GNA 1.0 and 2.0 hardware natively supports only 1D convolutions. However, 2D convolutions can be mapped to 1D when a convolution kernel moves in a single direction.
+
+Initially, a limited subset of Intel® GNA 3.0 features are added to the previous feature set including the following:
+
+* **2D VALID Convolution With Small 2D Kernels:**  Two-dimensional convolutions with the following kernel dimensions [H,W] are supported: [1,1], [2,2], [3,3], [2,1], [3,1], [4,1], [5,1], [6,1], [7,1], [1,2], or [1,3]. Input tensor dimensions are limited to [1,8,16,16] <= [N,C,H,W] <= [1,120,384,240]. Up to 384 channels C may be used with a subset of kernel sizes (see table below).  Up to 256 kernels (output channels) are supported. Pooling is limited to pool shapes of [1,1], [2,2], or [3,3]. Not all combinations of kernel shape and input tensor shape are supported (see the tables below for exact limitations).
+
+The tables below show that the exact limitation on the input tensor width W depends on the number of input channels C (indicated as Ci below) and the kernel shape.  There is much more freedom to choose the input tensor height and number of output channels.
+
+The following tables provide a more explicit representation of the Intel(R) GNA 3.0 2D convolution operations initially supported. The limits depend strongly on number of input tensor channels (Ci) and the input tensor width (W). Other factors are kernel height (KH), kernel width (KW), pool height (PH), pool width (PW), horizontal pool step (SH), and vertical pool step (PW). For example, the first table shows that for a 3x3 kernel with max pooling, only square pools are supported, and W is limited to 87 when there are 64 input channels.
+
+@sphinxdirective
+
+:download:`Table of Maximum Input Tensor Widths (W) vs. Rest of Parameters (Input and Kernel Precision: i16) <../../../docs/OV_Runtime_UG/supported_plugins/files/GNA_Maximum_Input_Tensor_Widths_i16.csv>`
+
+:download:`Table of Maximum Input Tensor Widths (W) vs. Rest of Parameters (Input and Kernel Precision: i8) <../../../docs/OV_Runtime_UG/supported_plugins/files/GNA_Maximum_Input_Tensor_Widths_i8.csv>`
+
+@endsphinxdirective
+
+> **NOTE**: The above limitations only apply to the new hardware 2D convolution operation. When possible, the Intel® GNA plugin graph compiler flattens 2D convolutions so that the second generation Intel® GNA 1D convolution operations (without these limitations) may be used. The plugin will also flatten 2D convolutions regardless of the sizes if GNA 2.0 compilation target is selected (see below).
+
+#### Support for 2D Convolutions using POT
+
+For POT to successfully work with the models including GNA3.0 2D convolutions, the following requirements must be met:
+* All convolution parameters are natively supported by HW (see tables above)
+* The runtime precision is explicitly set by the `ov::hint::inference_precision` property as `i8` for the models produced by the `performance mode` of POT, and as `i16` for the models produced by the `accuracy mode` of POT.
+
+### Batch Size Limitation
+
+Intel® GNA plugin supports the processing of context-windowed speech frames in batches of 1-8 frames.
+
+Please refer to [Layout API overview](@ref openvino_docs_OV_Runtime_UG_Layout_Overview) to determine batch dimension.
+
+To set layout of model inputs in runtime use [Preprocessing API](@ref openvino_docs_OV_Runtime_UG_Preprocessing_Overview):

@sphinxdirective
 .. tab:: C++

-   ``InferenceEngine::ICNNNetwork::setBatchSize``
+   .. doxygensnippet:: docs/snippets/gna/set_batch.cpp
+      :language: cpp
+      :fragment: [include]
+
+   .. doxygensnippet:: docs/snippets/gna/set_batch.cpp
+      :language: cpp
+      :fragment: [ov_gna_set_nc_layout]

 .. tab:: Python

-   `IENetwork.batch_size <api/ie_python_api/_autosummary/openvino.inference_engine.IENetwork.html#openvino.inference_engine.IENetwork.batch_size>`_
+   .. doxygensnippet:: docs/snippets/gna/set_batch.py
+      :language: python
+      :fragment: [import]
+
+   .. doxygensnippet:: docs/snippets/gna/set_batch.py
+      :language: python
+      :fragment: [ov_gna_set_nc_layout]

@endsphinxdirective

-Increasing batch size only improves efficiency of `Fully Connected` layers.
-
-> **NOTE**: For networks with `Convolutional`, `LSTM`, or `Memory` layers, the only supported batch size is 1.
-
-## Compatibility with Heterogeneous Plugin
-
-Heterogeneous plugin was tested with the Intel® GNA as a primary device and CPU as a secondary device. To run inference of networks with layers unsupported by the GNA plugin, such as Softmax, use the Heterogeneous plugin with the `HETERO:GNA,CPU` configuration.
-
-> **NOTE**: Due to limitation of the Intel® GNA backend library, heterogenous support is limited to cases where in the resulted sliced graph, only one subgraph is scheduled to run on GNA\_HW or GNA\_SW devices.
-
-## Recovery from Interruption by High-Priority Windows Audio Processes\*
-
-GNA is designed for real-time workloads such as noise reduction.
-For such workloads, processing should be time constrained, otherwise extra delays may cause undesired effects such as
-*audio glitches*. To make sure that processing can satisfy real-time requirements, the GNA driver provides a Quality of Service
-(QoS) mechanism, which interrupts requests that might cause high-priority Windows audio processes to miss
-the schedule, thereby causing long running GNA tasks to terminate early.
-
-Applications should be prepared for this situation.
-
-If an inference in the `GNA_HW` mode cannot be executed because of such an interruption, then the `wait` method returns the following status code:
+then set batch size:

@sphinxdirective
 .. tab:: C++

-   ``InferRequest::Wait()`` returns status code ``StatusCode::INFER_NOT_STARTED``.
+   .. doxygensnippet:: docs/snippets/gna/set_batch.cpp
+      :language: cpp
+      :fragment: [ov_gna_set_batch_size]

 .. tab:: Python

-   `InferRequest.wait <api/ie_python_api/_autosummary/openvino.inference_engine.InferRequest.html#openvino.inference_engine.InferRequest.wait>`_ returns status code `INFER_NOT_STARTED`.
+   .. doxygensnippet:: docs/snippets/gna/set_batch.py
+      :language: python
+      :fragment: [ov_gna_set_batch_size]

@endsphinxdirective

-In future releases, it will be changed to a more meaningful status code.
+Increasing batch size only improves efficiency of `MatMul` layers.

-Any application working with GNA must properly react to this code.
-One of the strategies to adapt an application:
+> **NOTE**: For models with `Convolution`, `LSTMCell`, or `ReadValue`/`Assign` operations, the only supported batch size is 1.

-1. Immediately switch to the GNA_SW_EXACT emulation mode:
-@sphinxdirective
-.. tab:: C++
+### Compatibility with Heterogeneous mode

-   .. code-block:: cpp
-
-      std::map<std::string, Parameter> newConfig;
-      newConfig[GNAConfigParams::KEY_GNA_DEVICE_MODE] = Parameter("GNA_SW_EXACT");
-      executableNet.SetConfig(newConfig);
-
-.. tab:: Python
-
-   .. code-block:: python
-
-      from openvino.inference_engine import IECore
-
-      ie = IECore()
-      new_cfg = {'GNA_DEVICE_MODE' : 'GNA_SW_EXACT'}
-      net = ie.read_network(model=path_to_model)
-      exec_net = ie.load_network(network=net, device_name="GNA", config=new_cfg)
-
-@endsphinxdirective
-
-2. Resubmit and switch back to GNA_HW expecting that the competing application has finished.
-
-   > **NOTE**: This method is deprecated since a new automatic QoS mode has been introduced in 2021.4.1 release of OpenVINO™ (see below).
-
-## GNA3 Automatic QoS Feature on Windows*
-
-Starting with 2021.4.1 release of OpenVINO and 03.00.00.1363 version of Windows* GNA driver, a new execution mode `GNA_HW_WITH_SW_FBACK` is introduced
-to assure that workloads satisfy real-time execution. In this mode, the GNA driver automatically falls back on CPU for a particular infer request
-if the HW queue is not empty, so there is no need for explicitly switching between GNA and CPU.
-
-> **NOTE**: Due to the "first come - first served" nature of GNA driver and the QoS feature, this mode may lead to increased CPU consumption
-if there are several clients using GNA simultaneously.
-Even a lightweight competing infer request which has not been cleared at the time when the user's GNA client process makes its request,
-can cause the user's request to be executed on CPU, thereby unnecessarily increasing CPU utilization and power.
+[Heterogeneous execution](@ref openvino_docs_OV_UG_Hetero_execution) is currently not supported by GNA plugin.

 ## See Also

--- a/docs/OV_Runtime_UG/supported_plugins/GPU.md
+++ b/docs/OV_Runtime_UG/supported_plugins/GPU.md
@ -6,21 +6,27 @@
   :maxdepth: 1
   :hidden:

-   openvino_docs_OV_UG_supported_plugins_GPU_RemoteBlob_API
-
+   openvino_docs_OV_UG_supported_plugins_GPU_RemoteTensor_API

@endsphinxdirective

-The GPU plugin uses the Intel® Compute Library for Deep Neural Networks (clDNN) to infer deep neural networks.
-clDNN is an open source performance library for Deep Learning (DL) applications intended for acceleration of Deep Learning Inference on Intel® Processor Graphics including Intel® HD Graphics, Intel® Iris® Graphics, Intel® Iris® Xe Graphics, and Intel® Iris® Xe MAX graphics.
-For an in-depth description of clDNN, see [OpenVINO Runtime GPU plugin source files](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/) and [Accelerate Deep Learning Inference with Intel® Processor Graphics](https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics).
+The GPU plugin is OpenCL based plugin for inference of deep neural networks on Intel GPUs including integrated and discrete ones.
+For an in-depth description of GPU plugin, see
+- [GPU plugin developers documentation](https://github.com/openvinotoolkit/openvino/wiki/GPUPluginDevelopersDocs)
+- [OpenVINO Runtime GPU plugin source files](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/)
+- [Accelerate Deep Learning Inference with Intel® Processor Graphics](https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics).
+
+The GPU plugin is a part of the Intel® Distribution of OpenVINO™ toolkit.
+
+See [GPU configuration page](@ref openvino_docs_install_guides_configurations_for_intel_gpu) for more details on how to configure machine to use GPU plugin.

 ## Device Naming Convention
-* Devices are enumerated as "GPU.X" where `X={0, 1, 2,...}`. Only Intel® GPU devices are considered.
-* If the system has an integrated GPU, it always has id=0 ("GPU.0").
+* Devices are enumerated as `"GPU.X"` where `X={0, 1, 2,...}`. Only Intel® GPU devices are considered.
+* If the system has an integrated GPU, it always has id=0 (`"GPU.0"`).
 * Other GPUs have undefined order that depends on the GPU driver.
-* "GPU" is an alias for "GPU.0"
+* `"GPU"` is an alias for `"GPU.0"`
 * If the system doesn't have an integrated GPU, then devices are enumerated starting from 0.
+* For GPUs with multi-tile architecture (multiple sub-devices in OpenCL terms) specific tile may be addresed as `"GPU.X.Y"` where `X,Y={0, 1, 2,...}`, `X` - id of the GPU device, `Y` - id of the tile within device `X`

 For demonstration purposes, see the [Hello Query Device C++ Sample](../../../samples/cpp/hello_query_device/README.md) that can print out the list of available devices with associated indices. Below is an example output (truncated to the device names only):

@ -36,122 +42,180 @@ Available devices:
    Device: HDDL
 ```

-## Optimizations
+Then device name can be passed to `ov::Core::compile_model()` method:

-The plugin supports algorithms that fuse several operations into one optimized operation. Refer to the sections below for details.
+@sphinxdirective

-> **NOTE**: For operation descriptions, see the [IR Notation Reference](../../ops/opset.md).
+.. tab:: Running on default device

-### Fusing Convolution and Simple Layers
+    .. doxygensnippet:: docs/snippets/gpu/compile_model.cpp
+        :language: cpp
+        :fragment: [compile_model_default_gpu]

-Merge of a Convolution layer and any of the simple layers listed below:
- Activation: ReLU, ELU, Sigmoid, Clamp, and others
- Depthwise: ScaleShift, PReLU
- FakeQuantize
+.. tab:: Running on specific GPU

-> **NOTE**: You can have any number and order of simple layers.
+    .. doxygensnippet:: docs/snippets/gpu/compile_model.cpp
+        :language: cpp
+        :fragment: [compile_model_gpu_with_id]

-A combination of a Convolution layer and simple layers results in a single fused layer called
-*Convolution*:
-![conv_simple_01]
+.. tab:: Running on specific tile
+
+    .. doxygensnippet:: docs/snippets/gpu/compile_model.cpp
+        :language: cpp
+        :fragment: [compile_model_gpu_with_id_and_tile]
+
+@endsphinxdirective
+
+## Supported inference data types
+GPU plugin supports the following data types as inference precision of internal primitives:
+
+- Floating-point data types:
+  - f32
+  - f16
+- Quantized data types:
+  - u8
+  - i8
+  - u1
+
+Selected precision of each primitive depends on the operation precision in IR, quantization primitives, and available hardware capabilities.
+u1/u8/i8 data types are used for quantized operations only, i.e. those are not selected automatically for non-quantized operations.
+See [low-precision optimization guide](@ref pot_docs_LowPrecisionOptimizationGuide) for more details on how to get quantized model.
+
+Floating-point precision of a GPU primitive is selected based on operation precision in IR except [compressed f16 IR form](../../MO_DG/prepare_model/FP16_Compression.md) which is executed in f16 precision.
+
+> **NOTE**: Harware acceleration for i8/u8 precision may be unavailable on some platforms. In that case model is executed in floating-point precision taken from IR. Hardware support of u8/i8 acceleration can be queried via `ov::device::capabilities` property.
+
+[Hello Query Device C++ Sample](../../../samples/cpp/hello_query_device/README.md) can be used to print out supported data types for all detected devices.
+
+## Supported features
+
+### Multi-device execution
+If a machine has multiple GPUs (for example integrated GPU and discrete Intel GPU), then any supported model can be executed on all GPUs simultaneously.
+This can be achieved by specifying `"MULTI:GPU.1,GPU.0"` as a target device.
+
+@snippet snippets/gpu/compile_model.cpp compile_model_multi
+
+See [Multi-device execution page](../multi_device.md) for more details.
+
+### Automatic batching
+GPU plugin is capable of reporting `ov::max_batch_size` and `ov::optimal_batch_size` metrics with respect to the current hardware platform and model,
+thus automatic batching can be applied in cases when `ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)` is set
+or device is specified as `"BATCH:GPU"`.
+
+@sphinxdirective
+
+.. tab:: Batching via BATCH plugin
+
+    .. doxygensnippet:: docs/snippets/gpu/compile_model.cpp
+        :language: cpp
+        :fragment: [compile_model_batch_plugin]
+
+.. tab:: Bacthing via throughput hint
+
+    .. doxygensnippet:: docs/snippets/gpu/compile_model.cpp
+        :language: cpp
+        :fragment: [compile_model_auto_batch]
+
+@endsphinxdirective
+
+See [Automatic batching page](../automatic_batching.md) for more details.
+
+### Multi-stream execution
+If either `ov::num_streams(n_streams)` with `n_streams > 1` or `ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)` property is set for GPU plugin,
+then multiple streams are created for the model. In case of GPU plugin each stream has its own host thread and associated OpenCL queue
+which means that incoming infer requests can be processed simultaneously.
+
+> **NOTE**: Simultaneous scheduling of kernels to different queues doesn't mean that the kernels are actually executed in parallel on GPU device. The actual behavior depends on the hardware architecture, and in some cases the execution may be serialized inside the GPU driver.
+
+When multiple inferences of the same model need to be executed in parallel, multi-stream feature is preferrable over multiple instances of the model or application,
+since implementation of streams in GPU plugin supports weights memory sharing across streams, thus memory consumption may be less comparing to the other approaches.
+
+See [optimization guide](@ref openvino_docs_deployment_optimization_guide_dldt_optimization_guide) for more details.
+
+### Dynamic shapes
+GPU plugin supports dynamic shapes for batch dimension only (specified as 'N' in the [layouts terms](../layout_overview.md)) with fixed upper bound. Any other dynamic dimensions are unsupported. Internally GPU plugin creates
+`log2(N)` (`N` - is an upper bound for batch dimension here) low-level execution graphs for batch sizes equal to powers of 2 to emulate dynamic behavior, so that incoming infer request with specific batch size is executed via minimal combination of internal networks.
+For example, batch size 33 may be executed via 2 internal networks with batch size 32 and 1.
+
+> **NOTE**: Such approach requires much more memory and overall model compilation time is significantly bigger comparing to static batch scenario.
+
+The code snippet below demonstrates how to use dynamic batch in simple scenarios:
+
+@snippet snippets/gpu/dynamic_batch.cpp dynamic_batch
+
+See [dynamic shapes guide](../ov_dynamic_shapes.md) for more details.
+
+### Preprocessing acceleration
+GPU plugin has the following additional preprocessing options:
+- `ov::intel_gpu::memory_type::surface` and `ov::intel_gpu::memory_type::buffer` values for `ov::preprocess::InputTensorInfo::set_memory_type()` preprocessing method. These values are intended to be used to provide a hint for the plugin on the type of input Tensors that will be set in runtime to generate proper kernels.
+
+@snippet snippets/gpu/preprocessing.cpp init_preproc
+
+With such preprocessing GPU plugin will expect `ov::intel_gpu::ocl::ClImage2DTensor` (or derived) to be passed for each NV12 plane via `ov::InferRequest::set_tensor()` or `ov::InferRequest::set_tensors()` methods.
+
+Refer to [RemoteTensor API](./GPU_RemoteTensor_API.md) for usage examples.
+
+See [preprocessing API guide](../preprocessing_overview.md) for more details.
+
+### Models caching
+Cache for GPU plugin may be enabled via common OpenVINO `ov::cache_dir` property. GPU plugin implementation supports only compiled kernels caching,
+thus all plugin specific model transformations are executed on each `ov::Core::compile_model()` call regardless `cache_dir` option, but since
+the kernels compilation is a bottleneck in the model loading process, significant load time reduction can be achieved with `ov::cache_dir` property enabled.
+
+See [Model caching overview page](../Model_caching_overview.md) for more details.
+
+### Extensibility
+See [GPU Extensibility](@ref openvino_docs_Extensibility_UG_GPU) page.
+
+### GPU context and memory sharing via RemoteTensor API
+See [RemoteTensor API of GPU Plugin](GPU_RemoteTensor_API.md).


-### Fusing Pooling and FakeQuantize Layers
+## Supported properties
+The plugin supports the properties listed below.

-A combination of Pooling and FakeQuantize layers results in a single fused layer called *Pooling*:
-![pooling_fakequant_01]
+### Read-write properties
+All parameters must be set before calling `ov::Core::compile_model()` in order to take effect or passed as additional argument to `ov::Core::compile_model()`

-### Fusing Activation Layers
+- ov::cache_dir
+- ov::enable_profiling
+- ov::hint::model_priority
+- ov::hint::performance_mode
+- ov::hint::num_requests
+- ov::num_streams
+- ov::compilation_num_threads
+- ov::device::id
+- ov::intel_gpu::hint::host_task_priority
+- ov::intel_gpu::hint::queue_priority
+- ov::intel_gpu::hint::queue_throttle
+- ov::intel_gpu::enable_loop_unrolling

-Given the linear pattern, an Activation layer can be fused into other layers:
+### Read-only properties
+- ov::supported_properties
+- ov::available_devices
+- ov::range_for_async_infer_requests
+- ov::range_for_streams
+- ov::optimal_batch_size
+- ov::max_batch_size
+- ov::device::full_name
+- ov::device::type
+- ov::device::gops
+- ov::device::capabilities
+- ov::intel_gpu::device_total_mem_size
+- ov::intel_gpu::uarch_version
+- ov::intel_gpu::execution_units_count
+- ov::intel_gpu::memory_statistics

-![fullyconnected_activation_01]
+## Limitations
+In some cases GPU plugin may implicitly execute several primitives on CPU using internal implementations which may lead to increase of CPU utilization.
+Below is the list of such operations:
+- Proposal
+- NonMaxSuppression
+- DetectionOutput

-
-### Fusing Convolution and Sum Layers
-
-A combination of Convolution, Simple, and Eltwise layers with the sum operation results in a single layer called  *Convolution*:
-![conv_sum_relu_01]
-
-### Fusing a Group of Convolutions
-
-If a topology contains the following pipeline, a GPU plugin merges Split, Convolution, and Concatenation layers  into a single Convolution layer with the group parameter:
-> **NOTE**: Parameters of the Convolution layers must coincide.
-
-![group_convolutions_01]
-
-### Optimizing Layers Out
-
-The following layers are optimized out under certain conditions:
-  * Crop
-  * Concatenate
-  * Reshape
-  * Flatten
-  * Split
-  * Copy
-
-### Load-Time Execution
-
-Some layers are executed during the load time, not during the inference. One of such layers is PriorBox.
-
-
-## CPU Executed Layers
-
-The following layers are not accelerated on the GPU and executed on the host CPU instead:
-* Proposal
-* NonMaxSuppression
-* PriorBox
-* DetectionOutput
-
-## Supported Configuration Parameters
-
-The plugin supports the configuration parameters listed below.
-All parameters must be set before calling <code>InferenceEngine::Core::LoadNetwork()</code> in order to take effect.
-When specifying key values as raw strings (that is, when using Python API), omit the `KEY_` prefix.
-
-| Parameter Name          | Parameter Values                | Default         | Description                                               |
-|---------------------|-----------------------------|-----------------|-----------------------------------------------------------|
-| `KEY_CACHE_DIR`      | `"<cache_dir>"`                    | `""`              | Specifies a directory where compiled OCL binaries can be cached. First model loading generates the cache, and all subsequent LoadNetwork calls use precompiled kernels which significantly improves load time. If empty - caching is disabled             |
-| `KEY_PERF_COUNT`      | `YES` / `NO`                    | `NO`              | Collect performance counters during inference             |
-| `KEY_CONFIG_FILE`     | `"<file1> [<file2> ...]"`         | `""`              | Load custom layer configuration files                     |
-| `KEY_GPU_HOST_`<br>`TASK_PRIORITY` | `GPU_HOST_TASK_PRIORITY_<HIGH\|MEDIUM\|LOW>`                       | `GPU_HOST_TASK_PRIORITY_MEDIUM`               | This key instructs the GPU plugin which cpu core type of TBB affinity used in load network. <br> This option has 3 types of levels: HIGH, LOW, and ANY. It is only affected on Hybrid CPUs. <br>- LOW - instructs the GPU Plugin to use LITTLE cores if they are available <br>- MEDIUM (DEFAULT) - instructs the GPU Plugin to use any available cores (BIG or LITTLE cores) <br>- HIGH - instructs the GPU Plugin to use BIG cores if they are available |
-| `KEY_GPU_PLUGIN_`<br>`PRIORITY` | `<0-3>`                       | `0`               | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)<br> Higher value means higher priority for OpenCL queue. 0 disables the setting. **Deprecated**. Please use KEY_GPU_MODEL_PRIORITY |
-| `KEY_GPU_PLUGIN_`<br>`THROTTLE` | `<0-3>`                       | `2`               | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)<br> Lower value means lower driver thread priority and longer sleep time for it. Has no effect if the driver does not support reqired hint.  |
-| `KEY_CLDNN_ENABLE_`<br>`FP16_FOR_QUANTIZED_`<br>`MODELS` | `YES` / `NO`                       | `YES`               | Allows using FP16+INT8 mixed precision mode, so non-quantized parts of a model will be executed in FP16 precision for FP16 IR. Does not affect quantized FP32 IRs |
-| `KEY_GPU_NV12_`<br>`TWO_INPUTS` | `YES` / `NO`                       | `NO`               | Controls preprocessing logic for nv12 input. If it's set to YES, then device graph will expect that user will set biplanar nv12 blob as input wich will be directly passed to device execution graph. Otherwise, preprocessing via GAPI is used to convert NV12->BGR, thus GPU graph have to expect single input |
-| `KEY_GPU_THROUGHPUT_`<br>`STREAMS`  | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_GPU_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behavior.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. |
-| `KEY_EXCLUSIVE_ASYNC_`<br>`REQUESTS` | `YES` / `NO`                | `NO`              | Forces async requests (also from different executable networks) to execute serially.|
-| `KEY_GPU_MAX_NUM_`<br>`THREADS` | `integer value` | `maximum # of HW threads available in host environment` |  Specifies the number of CPU threads that can be used for GPU engine, e.g, JIT compilation of GPU kernels or cpu kernel processing within GPU plugin. The default value is set as the number of maximum available threads in host environment to minimize the time for LoadNetwork, where the GPU kernel build time occupies a large portion. Note that if the specified value is larger than the maximum available # of threads or less than zero, it is set as maximum available # of threads. It can be specified with a smaller number than the available HW threads according to the usage scenario, e.g., when the user wants to assign more CPU threads while GPU plugin is running. Note that setting this value with lower number will affect not only the network loading time but also the cpu layers of GPU networks that are optimized with multi-threading. |
-| `KEY_GPU_ENABLE_`<br>`LOOP_UNROLLING` | `YES` / `NO`             | `YES`             | Enables recurrent layers such as TensorIterator or Loop with fixed iteration count to be unrolled. It is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb). Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16). Note that turning this key on will increase the graph loading time in proportion to the iteration counts. Thus, this key should be turned off if graph loading time is considered to be most important target to optimize. |
-| `KEY_CLDNN_PLUGIN_`<br>`PRIORITY` | `<0-3>`                       | `0`               | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)<br> Higher value means higher priority for OpenCL queue. 0 disables the setting. **Deprecated**. Please use KEY_GPU_MODEL_PRIORITY |
-| `KEY_CLDNN_PLUGIN_`<br>`THROTTLE` | `<0-3>`                       | `0`               | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)<br> Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. **Deprecated**. Please use KEY_GPU_PLUGIN_THROTTLE |
-| `KEY_CLDNN_GRAPH_`<br>`DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | clDNN graph optimizer stages dump output directory (in GraphViz format) **Deprecated**. Will be removed in the next release                                     |
-| `KEY_CLDNN_SOURCES_`<br>`DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | Final optimized clDNN OpenCL sources dump output directory. **Deprecated**. Will be removed in the next release                                   |
-| `KEY_DUMP_KERNELS`    | `YES` / `NO`                    | `NO`              | Dump the final kernels used for custom layers. **Deprecated**. Will be removed in the next release             |
-| `KEY_TUNING_MODE`     | `TUNING_DISABLED` <br /> `TUNING_CREATE` <br />  `TUNING_USE_EXISTING`            | `TUNING_DISABLED` | Disable inference kernel tuning     <br /> Create tuning file (expect much longer runtime)  <br />         Use an existing tuning file. **Deprecated**. Will be removed in the next release |
-| `KEY_TUNING_FILE`     | `"<filename>"`                  | `""`              | Tuning file to create / use. **Deprecated**. Will be removed in the next release |
-
-## Quering GPU specific metric keys
-* MEMORY_STATISTICS : Returns overall memory statistics of `GPU` device allocated by engine with allocation types. If the network has `TensorIterator` or `Loop` operation which is not unrolled, there will be additional allocation at the first inference phase. In such a case, querying for `MEMORY_STATISTICS` should be done after first inference for more accurate result. The code below demonstrates how to query overall memory statistics of `GPU` device:
-
-@snippet snippets/GPU_Metric0.cpp part0
-
-* MAX_BATCH_SIZE : Returns maximum batch size for a given network which is not only executable but also does not lose performance due to the memory swap impact. Note that the returned value may not aligned to power of 2. Also, MODEL_PTR is the required option for this metric since the available max batch size depends on the model size. If the MODEL_PTR is not given, it will return 1. The example code to set the required and optional configs for this metic is available in the following snippet:
-
-@snippet snippets/GPU_Metric1.cpp part1
-
-* OPTIMAL_BATCH_SIZE : Returns _optimal_ batch size for a given network on the given GPU device. The returned value is aligned to power of 2. Also, MODEL_PTR is the required option for this metric since the optimal batch size highly depends on the model. If the MODEL_PTR is not given, the value of 1 is returned. The example code to set the required and optional configs for this metric is available in the following snippet:
-
-@snippet snippets/GPU_Metric1.cpp part2
-## GPU Context and Video Memory Sharing RemoteBlob API
-
-See [RemoteBlob API of GPU Plugin](GPU_RemoteBlob_API.md)
+The behavior depends on specific parameters of the operations and hardware configuration.

 ## See Also
 * [Supported Devices](Supported_Devices.md)
-
-[conv_simple_01]: ../img/conv_simple_01.png
-[pooling_fakequant_01]: ../img/pooling_fakequant_01.png
-[fullyconnected_activation_01]: ../img/fullyconnected_activation_01.png
-[group_convolutions_01]: ../img/group_convolutions_01.png
-[conv_sum_relu_01]: ../img/conv_sum_relu_01.png
+* [Optimization guide](@ref openvino_docs_optimization_guide_dldt_optimization_guide)
+* [GPU plugin developers documentation](https://github.com/openvinotoolkit/openvino/wiki/GPUPluginDevelopersDocs)
--- a/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteBlob_API.md
+++ b/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteBlob_API.md
@ -1,141 +0,0 @@
-Remote Blob API of GPU Plugin {#openvino_docs_OV_UG_supported_plugins_GPU_RemoteBlob_API}
-================================
-
-The GPU plugin implementation of the `RemoteContext` and `RemoteBlob` interfaces supports GPU
-pipeline developers who need video memory sharing and interoperability with existing native APIs
-such as OpenCL\*, Microsoft DirectX\*, or VAAPI\*.
-Using these interfaces allows you to avoid any memory copy overhead when plugging the OpenVINO™ inference 
-into an existing GPU pipeline. It also enables OpenCL kernels participating in the pipeline to become 
-native buffer consumers or producers of the OpenVINO™ inference.
-Since the GPU plugin works on top of the clDNN library, the functionality above is also implemented
-using OpenCL and its sharing extensions provided by Intel®.
-
-There are two interoperability scenarios supported by the Remote Blob API:
-
-* GPU plugin context and memory objects can be constructed from low-level device, display, or memory
-handles and used to create the OpenVINO™ `ExecutableNetwork` or `Blob` class.
-* OpenCL context or buffer handles can be obtained from existing GPU plugin objects, and used in OpenCL processing.
-
-Class and function declarations for the API are defined in the following files:
-* Windows\*: `gpu/gpu_context_api_ocl.hpp` and `gpu/gpu_context_api_dx.hpp`
-* Linux\*: `gpu/gpu_context_api_ocl.hpp` and `gpu/gpu_context_api_va.hpp`
-
-The most common way to enable the interaction of your application with the Remote Blob API is to use user-side utility classes
-and functions that consume or produce native handles directly.
-
-## Execution Context User-Side Wrappers
-
-GPU plugin classes that implement the `RemoteContext` interface are responsible for context sharing.
-Obtaining a pointer to a context object is the first step of sharing pipeline objects.
-The context object of the GPU plugin directly wraps OpenCL context, setting a scope for sharing
-`ExecutableNetwork` and `RemoteBlob` objects.
-To create such objects within user context, explicitly provide the context to the plugin using the
-`make_shared_context()` overloaded function. Depending on the platform, the function accepts the
-`cl_context` handle, the pointer to the `ID3D11Device` interface, or the `VADisplay` handle, and
-returns a smart pointer to the `RemoteContext` plugin object.
-
-If you do not provide any user context, the plugin uses its default internal context.
-The plugin attempts to use the same internal context object as long as plugin options are kept the same.
-Therefore, all ExecutableNetwork objects created during this time share the same context.
-Once the plugin options are changed, the internal context is replaced by the new one.
-
-To request the current default context of the plugin, call the `GetDefaultContext()` method of the core engine.
-To request the internal context of the given `ExecutableNetwork`, use the `GetContext()` method.
-
-## Shared Blob User-Side Wrappers
-
-The classes that implement the `RemoteBlob` interface are both wrappers for native API
-memory handles (which can be obtained from them at any time) and act just like regular OpenVINO™
-`Blob` objects.
-
-Once you obtain the context, you can use it to compile a new `ExecutableNetwork` or create `RemoteBlob`
-objects.
-For network compilation, use a dedicated flavor of `LoadNetwork()`, which accepts the context as an
-additional parameter.
-
-To create a shared blob from a native memory handle, use `make_shared_blob()` overloaded functions
-that can accept the `cl::Buffer`, `cl::Image2D`, `cl_mem` handles, and either `ID3D11Buffer`,
-`ID3D11Texture2D` pointers or the `VASurfaceID` handle.
-All `make_shared_blob()` flavors return a smart pointer to the `Blob` object, which can be directly
-passed to the `SetBlob() `method of an inference request object.
-
-## Direct NV12 video surface input
-
-To support the direct consumption of a hardware video decoder output, plugin accepts two-plane video
-surfaces as arguments for the `make_shared_blob_nv12()` function, which creates an `NV12Blob` object
-and returns a smart pointer to it, which is cast to `Blob::Ptr`.
-
-To ensure that the plugin generates the correct execution graph for the NV12 dual-plane input, set
-the `CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS` plugin configuration flag to `PluginConfigParams::YES`.
-
-## Context & queue sharing
-
-GPU plugin supports creation of shared context from `cl_command_queue` handle. In that case
-opencl context handle is extracted from given queue via OpenCL™ API, and the queue itself is used inside
-the plugin for further execution of inference primitives. Sharing of the queue changes behavior of `StartAsync()`
-method to guarantee that submission of inference primitives into given queue is finished before
-returning of control back to calling thread.
-
-This sharing mechanism allows to do pipeline synchronization on app side and avoid blocking of host thread
-on waiting for completion of inference. Pseudocode may look as follows:
-
-@snippet snippets/GPU_RemoteBlob_API3.cpp part0
-
-### Limitations
-
- - Some primitives in GPU plugin may block host thread on waiting for previous primitives before adding its kernels
-   to the command queue. In such cases `StartAsync()` call takes much more time to return control to the calling thread
-   as internally it waits for partial or full network completion.
-   Examples of operations: Loop, TensorIterator, DetectionOutput, NonMaxSuppression
- - Synchronization of pre/post processing jobs and inference pipeline inside shared queue is the user responsibility
- - Throughput mode is not available when queue sharing is used, i.e. only single stream can be used for each executable network.
-
-## Low-Level Methods and Their Parameter Description
-
-The high-level wrappers above bring a direct dependency on native APIs to the user program.
-If you want to avoid the dependency, you still can directly use the `CreateContext()`,
-`CreateBlob()`, and `getParams()` methods.
-On this level, native handles are re-interpreted as void pointers and all arguments are passed
-using `std::map` containers that are filled with `std::string, InferenceEngine::Parameter` pairs.
-Two types of map entries are possible: descriptor and container. The first map entry is a
-descriptor, which sets the expected structure  and possible parameter values of the map.
-
-**Parameter Map Entries**
-
-| Key Name           | Description and Possible Parameter Values                                 |
-|----------------|---------------------------------------------------------------------|
-| `CONTEXT_TYPE` | Describes the type of the shared context in a map. Can be `OCL` (for pure OpenCL context) or `VA_SHARED` (for context shared with a video decoding device). |
-| `OCL_CONTEXT` | Contains the OpenCL context handle. |
-| `OCL_QUEUE` | Contains the OpenCL queue handle if queue sharing is needed. |
-| `VA_DEVICE` | Contains the native video decoding device handle. Can be `VADisplay` or `ID3D11Device` (a pointer). |
-| `SHARED_MEM_TYPE` | Describes the type of the shared memory buffer in a map. Can be `OCL_BUFFER` (clBuffer), `OCL_IMAGE2D` (clImage2D), `VA_SURFACE()`,  or `DX_BUFFER`.  |
-| `MEM_HANDLE` | Contains the OpenCL memory handle. |
-| `DEV_OBJECT_HANDLE` | Contains the native video decoder surface handle. |
-| `VA_PLANE` | Contains the NV12 video decoder surface plane index. Can be `0` or `1`. |
-
-> **NOTE**: To initialize the entry key and value, use the `GPU_PARAM_KEY()` or `GPU_PARAM_VALUE()` macro.
-
-## Examples
-
-Refer to the sections below to see pseudo-code of usage examples.
-
-> **NOTE**: For low-level parameter usage examples, see the source code of user-side wrappers from the include files mentioned above.
-
-### OpenCL Kernel Execution on a Shared Buffer
-
-This example uses the OpenCL context obtained from an executable network object.
-
-@snippet snippets/GPU_RemoteBlob_API0.cpp part0
-
-### Running GPU Plugin Inference within User-Supplied Shared Context
-
-@snippet snippets/GPU_RemoteBlob_API1.cpp part1
-
-### Direct Consuming of the NV12 VAAPI Video Decoder Surface on Linux
-
-@snippet snippets/GPU_RemoteBlob_API2.cpp part2
-
-## See Also
-
-* InferenceEngine::Core
-* InferenceEngine::RemoteBlob
--- a/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md
+++ b/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md
@ -0,0 +1,324 @@
+Remote Tensor API of GPU Plugin {#openvino_docs_OV_UG_supported_plugins_GPU_RemoteTensor_API}
+================================
+
+The GPU plugin implementation of the `ov::RemoteContext` and `ov::RemoteTensor` interfaces supports GPU
+pipeline developers who need video memory sharing and interoperability with existing native APIs
+such as OpenCL\*, Microsoft DirectX\*, or VAAPI\*.
+Using of these interfaces allows you to avoid any memory copy overhead when plugging the OpenVINO™ inference
+into an existing GPU pipeline. It also enables OpenCL kernels participating in the pipeline to become
+native buffer consumers or producers of the OpenVINO™ inference.
+
+There are two interoperability scenarios supported by the Remote Tensor API:
+
+* GPU plugin context and memory objects can be constructed from low-level device, display, or memory
+handles and used to create the OpenVINO™ `ov::CompiledModel` or `ov::Tensor` objects.
+* OpenCL context or buffer handles can be obtained from existing GPU plugin objects, and used in OpenCL processing on the application side.
+
+Class and function declarations for the API are defined in the following files:
+* Windows\*: `openvino/runtime/intel_gpu/ocl/ocl.hpp` and `openvino/runtime/intel_gpu/ocl/dx.hpp`
+* Linux\*: `openvino/runtime/intel_gpu/ocl/ocl.hpp` and `openvino/runtime/intel_gpu/ocl/va.hpp`
+
+The most common way to enable the interaction of your application with the Remote Tensor API is to use user-side utility classes
+and functions that consume or produce native handles directly.
+
+## Context sharing between application and GPU plugin
+
+GPU plugin classes that implement the `ov::RemoteContext` interface are responsible for context sharing.
+Obtaining a context object is the first step of sharing pipeline objects.
+The context object of the GPU plugin directly wraps OpenCL context, setting a scope for sharing
+`ov::CompiledModel` and `ov::RemoteTensor` objects. `ov::RemoteContext` object can be either created on top ov
+existing handle from native api or retrieved from GPU plugin.
+
+Once you obtain the context, you can use it to compile a new `ov::CompiledModel` or create `ov::RemoteTensor`
+objects.
+For network compilation, use a dedicated flavor of `ov::Core::compile_model()`, which accepts the context as an
+additional parameter.
+
+### Creation of RemoteContext from native handle
+To create `ov::RemoteContext` object for user context, explicitly provide the context to the plugin using constructor for one
+of `ov::RemoteContext` derived classes.
+
+@sphinxdirective
+
+.. tab:: Linux
+
+    .. tab:: Create from cl_context
+
+        .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+          :language: cpp
+          :fragment: [context_from_cl_context]
+
+    .. tab:: Create from cl_queue
+
+        .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+          :language: cpp
+          :fragment: [context_from_cl_queue]
+
+    .. tab:: Create from VADisplay
+
+        .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+          :language: cpp
+          :fragment: [context_from_va_display]
+
+.. tab:: Windows
+
+    .. tab:: Create from cl_context
+
+        .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+          :language: cpp
+          :fragment: [context_from_cl_context]
+
+    .. tab:: Create from cl_queue
+
+        .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+          :language: cpp
+          :fragment: [context_from_cl_queue]
+
+    .. tab:: Create from ID3D11Device
+
+        .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+          :language: cpp
+          :fragment: [context_from_d3d_device]
+
+@endsphinxdirective
+
+
+### Getting RemoteContext from the plugin
+If you do not provide any user context, the plugin uses its default internal context.
+The plugin attempts to use the same internal context object as long as plugin options are kept the same.
+Therefore, all `ov::CompiledModel` objects created during this time share the same context.
+Once the plugin options are changed, the internal context is replaced by the new one.
+
+To request the current default context of the plugin use one of the following methods:
+
+@sphinxdirective
+
+.. tab:: Get context from Core
+
+    .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+        :language: cpp
+        :fragment: [default_context_from_core]
+
+.. tab:: Get context from CompiledModel
+
+    .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+        :language: cpp
+        :fragment: [default_context_from_model]
+
+
+@endsphinxdirective
+
+## Memory sharing between application and GPU plugin
+
+The classes that implement the `ov::RemoteTensor` interface are the wrappers for native API
+memory handles (which can be obtained from them at any time).
+
+To create a shared tensor from a native memory handle, use dedicated `create_tensor`or `create_tensor_nv12` methods
+of the `ov::RemoteContext` sub-classes.
+`ov::intel_gpu::ocl::ClContext` has multiple overloads of `create_tensor` methods which allow to wrap pre-allocated native handles with `ov::RemoteTensor`
+object or request plugin to allocate specific device memory. See code snippets below for more details.
+
+@sphinxdirective
+
+.. tab:: Wrap native handles
+
+    .. tab:: USM pointer
+
+        .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+          :language: cpp
+          :fragment: [wrap_usm_pointer]
+
+    .. tab:: cl_mem
+
+        .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+          :language: cpp
+          :fragment: [wrap_cl_mem]
+
+    .. tab:: cl::Buffer
+
+        .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+          :language: cpp
+          :fragment: [wrap_cl_buffer]
+
+    .. tab:: cl::Image2D
+
+        .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+          :language: cpp
+          :fragment: [wrap_cl_image]
+
+    .. tab:: biplanar NV12 surface
+
+        .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+          :language: cpp
+          :fragment: [wrap_nv12_surface]
+
+.. tab:: Allocate device memory
+
+    .. tab:: USM host memory
+
+        .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+          :language: cpp
+          :fragment: [allocate_usm_host]
+
+    .. tab:: USM device memory
+
+        .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+          :language: cpp
+          :fragment: [allocate_usm_device]
+
+    .. tab:: cl::Buffer
+
+        .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp
+          :language: cpp
+          :fragment: [allocate_cl_buffer]
+
+@endsphinxdirective
+
+`ov::intel_gpu::ocl::D3DContext` and `ov::intel_gpu::ocl::VAContext` classes are derived from `ov::intel_gpu::ocl::ClContext`,
+thus they provide functionality described above and extends it
+to allow creation of `ov::RemoteTensor` objects from `ID3D11Buffer`, `ID3D11Texture2D` pointers or the `VASurfaceID` handle respectively.
+
+## Direct NV12 video surface input
+
+To support the direct consumption of a hardware video decoder output, plugin accepts two-plane video
+surfaces as arguments for the `create_tensor_nv12()` function, which creates a pair or `ov::RemoteTensor`
+objects which represents Y and UV planes.
+
+To ensure that the plugin generates the correct execution graph for the NV12 dual-plane input, static preprocessing
+should be added before model compilation:
+
+@snippet snippets/gpu/preprocessing.cpp init_preproc
+
+Since `ov::intel_gpu::ocl::ClImage2DTensor` (and derived classes) doesn't support batched surfaces, in cases when batching and surface sharing are required
+at the same time, user need to set inputs via `ov::InferRequest::set_tensors` method with vector of shared surfaces for each plane:
+
+@sphinxdirective
+
+.. tab:: Single batch
+
+    .. doxygensnippet:: docs/snippets/gpu/preprocessing.cpp
+        :language: cpp
+        :fragment: [single_batch]
+
+.. tab:: Multiple batches
+
+    .. doxygensnippet:: docs/snippets/gpu/preprocessing.cpp
+        :language: cpp
+        :fragment: [batched_case]
+
+
+@endsphinxdirective
+
+I420 color format can be processed in similar way
+
+## Context & queue sharing
+
+GPU plugin supports creation of shared context from `cl_command_queue` handle. In that case
+opencl context handle is extracted from given queue via OpenCL™ API, and the queue itself is used inside
+the plugin for further execution of inference primitives. Sharing of the queue changes behavior of `ov::InferRequest::start_async()`
+method to guarantee that submission of inference primitives into given queue is finished before
+returning of control back to calling thread.
+
+This sharing mechanism allows to do pipeline synchronization on app side and avoid blocking of host thread
+on waiting for completion of inference. Pseudocode may look as follows:
+
+@sphinxdirective
+.. raw:: html
+
+   <div class="collapsible-section" data-title="Queue and context sharing example">
+
+@endsphinxdirective
+
+@snippet snippets/gpu/queue_sharing.cpp queue_sharing
+
+@sphinxdirective
+.. raw:: html
+
+   </div>
+
+@endsphinxdirective
+
+### Limitations
+
+ - Some primitives in GPU plugin may block host thread on waiting for previous primitives before adding its kernels
+   to the command queue. In such cases `ov::InferRequest::start_async()` call takes much more time to return control to the calling thread
+   as internally it waits for partial or full network completion.
+   Examples of operations: Loop, TensorIterator, DetectionOutput, NonMaxSuppression
+ - Synchronization of pre/post processing jobs and inference pipeline inside shared queue is the user responsibility
+ - Throughput mode is not available when queue sharing is used, i.e. only single stream can be used for each compiled model.
+
+## Low-Level Methods for RemoteContext and RemoteTensor creation
+
+The high-level wrappers above bring a direct dependency on native APIs to the user program.
+If you want to avoid the dependency, you still can directly use the `ov::Core::create_context()`,
+`ov::RemoteContext::create_tensor()`, and `ov::RemoteContext::get_params()` methods.
+On this level, native handles are re-interpreted as void pointers and all arguments are passed
+using `ov::AnyMap` containers that are filled with `std::string, ov::Any` pairs.
+Two types of map entries are possible: descriptor and container. The first map entry is a
+descriptor, which sets the expected structure and possible parameter values of the map.
+
+Refer to `openvino/runtime/intel_gpu/remote_properties.hpp` header file for possible low-level properties and their description.
+
+## Examples
+
+Refer to the sections below to see pseudo-code of usage examples.
+
+> **NOTE**: For low-level parameter usage examples, see the source code of user-side wrappers from the include files mentioned above.
+
+
+@sphinxdirective
+.. raw:: html
+
+   <div class="collapsible-section" data-title="OpenCL Kernel Execution on a Shared Buffer">
+
+@endsphinxdirective
+
+This example uses the OpenCL context obtained from an compiled model object.
+
+@snippet snippets/gpu/context_sharing.cpp context_sharing_get_from_ov
+
+@sphinxdirective
+.. raw:: html
+
+   </div>
+
+@endsphinxdirective
+
+
+@sphinxdirective
+.. raw:: html
+
+   <div class="collapsible-section" data-title="Running GPU Plugin Inference within User-Supplied Shared Context">
+
+@endsphinxdirective
+
+@snippet snippets/gpu/context_sharing.cpp context_sharing_user_handle
+
+@sphinxdirective
+.. raw:: html
+
+   </div>
+
+@endsphinxdirective
+
+
+@sphinxdirective
+.. raw:: html
+
+   <div class="collapsible-section" data-title="Direct Consuming of the NV12 VAAPI Video Decoder Surface on Linux">
+
+@endsphinxdirective
+
+@snippet snippets/gpu/context_sharing_va.cpp context_sharing_va
+
+@sphinxdirective
+.. raw:: html
+
+   </div>
+
+@endsphinxdirective
+
+## See Also
+
+* ov::Core
+* ov::RemoteTensor
--- a/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md
+++ b/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md
@ -13,8 +13,8 @@ The OpenVINO Runtime provides unique capabilities to infer deep learning models
 |[CPU plugin](CPU.md)              |Intel&reg; Xeon&reg; with Intel® Advanced Vector Extensions 2 (Intel® AVX2), Intel® Advanced Vector Extensions 512 (Intel® AVX-512), and AVX512_BF16, Intel&reg; Core&trade; Processors with Intel&reg; AVX2, Intel&reg; Atom&reg; Processors with Intel® Streaming SIMD Extensions (Intel® SSE) |
 |[VPU plugins](VPU.md) (available in the Intel® Distribution of OpenVINO™ toolkit)            |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs                                                                                           |
 |[GNA plugin](GNA.md) (available in the Intel® Distribution of OpenVINO™ toolkit)              |Intel&reg; Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel&reg; Pentium&reg; Silver J5005 Processor, Intel&reg; Pentium&reg; Silver N5000 Processor, Intel&reg; Celeron&reg; J4005 Processor, Intel&reg; Celeron&reg; J4105 Processor, Intel&reg; Celeron&reg; Processor N4100, Intel&reg; Celeron&reg; Processor N4000, Intel&reg; Core&trade; i3-8121U Processor, Intel&reg; Core&trade; i7-1065G7 Processor, Intel&reg; Core&trade; i7-1060G7 Processor, Intel&reg; Core&trade; i5-1035G4 Processor, Intel&reg; Core&trade; i5-1035G7 Processor, Intel&reg; Core&trade; i5-1035G1 Processor, Intel&reg; Core&trade; i5-1030G7 Processor, Intel&reg; Core&trade; i5-1030G4 Processor, Intel&reg; Core&trade; i3-1005G1 Processor, Intel&reg; Core&trade; i3-1000G1 Processor, Intel&reg; Core&trade; i3-1000G4 Processor|
-|[Multi-Device execution](../multi_device.md) |Multi-Device execution enables simultaneous inference of the same model on several devices in parallel    |   
-|[Auto-Device plugin](../auto_device_selection.md) |Auto-Device plugin enables selecting Intel&reg; device for inference automatically |   
+|[Multi-Device execution](../multi_device.md) |Multi-Device execution enables simultaneous inference of the same model on several devices in parallel    |
+|[Auto-Device plugin](../auto_device_selection.md) |Auto-Device plugin enables selecting Intel&reg; device for inference automatically |
 |[Heterogeneous plugin](../hetero_execution.md) |Heterogeneous execution enables automatic inference splitting between several devices (for example if a device doesn't [support certain operation](#supported-layers)).                                                           |

 Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/).
@ -69,10 +69,9 @@ For example, the CHW value at index (c,h,w) is physically located at index (c\*H
 |Plugin        |FP32                    |FP16                    |I8                      |
 |:-------------|:----------------------:|:----------------------:|:----------------------:|
 |CPU plugin    |Supported and preferred |Supported               |Supported               |
-|GPU plugin    |Supported               |Supported and preferred |Supported\*             |
+|GPU plugin    |Supported               |Supported and preferred |Supported               |
 |VPU plugins   |Not supported           |Supported               |Not supported           |
 |GNA plugin    |Supported               |Supported               |Not supported           |
-<br>\* - currently, only limited set of topologies might benefit from enabling I8 model on GPU<br>
 For [Multi-Device](../multi_device.md) and [Heterogeneous](../hetero_execution.md) executions
 the supported models formats depends on the actual underlying devices. _Generally, FP16 is preferable as it is most ubiquitous and performant_.

--- a/docs/OV_Runtime_UG/supported_plugins/files/GNA_Maximum_Input_Tensor_Widths_i16.csv
+++ b/docs/OV_Runtime_UG/supported_plugins/files/GNA_Maximum_Input_Tensor_Widths_i16.csv
--- a/docs/OV_Runtime_UG/supported_plugins/files/GNA_Maximum_Input_Tensor_Widths_i8.csv
+++ b/docs/OV_Runtime_UG/supported_plugins/files/GNA_Maximum_Input_Tensor_Widths_i8.csv
--- a/docs/_static/css/custom.css
+++ b/docs/_static/css/custom.css
@ -49,7 +49,31 @@ main img {
    background-image: url('media/union-up.svg');
 }

+div.highlight {
+    margin-bottom: 1.15rem;
+}
+
 .highlight .err {
    border:none;
    color:inherit;
 }
+
+.opt-notice-wrapper {
+    position: fixed;
+    bottom:0;
+    background: black;
+    width:100%;
+    text-align: center;
+    padding: 1rem;
+    z-index: 1000;
+}
+
+.opt-notice {
+    margin-bottom: 0;
+    position: absolute;
+    top: 50%;
+    transform: translateY(-50%);
+    text-align: center;
+    width:100%;
+    color: #fff;
+}
--- a/docs/_static/images/IE_DG_supported_plugins_AUTO_debugging-img01-localhost.png
+++ b/docs/_static/images/IE_DG_supported_plugins_AUTO_debugging-img01-localhost.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36f4b9e0714e819b0c98a30f3c08d6ce1f9206906be42e80cb1fa746e6354ad6
+size 25333
--- a/docs/_static/images/IE_DG_supported_plugins_AUTO_debugging-img02-launch.png
+++ b/docs/_static/images/IE_DG_supported_plugins_AUTO_debugging-img02-launch.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5a0022bda018ae7e5261bbb9f5e8cc28374254c272dd3cbc2ab2f872381e2c5
+size 21106
--- a/docs/_static/images/IE_DG_supported_plugins_AUTO_debugging-img03-hotspots.png
+++ b/docs/_static/images/IE_DG_supported_plugins_AUTO_debugging-img03-hotspots.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0319b56afe702fb09957f4d3c996155be79efc672edc0e780d34441eaa660b2c
+size 43521
--- a/docs/_static/images/IE_DG_supported_plugins_AUTO_debugging-img04-vtunesummary.png
+++ b/docs/_static/images/IE_DG_supported_plugins_AUTO_debugging-img04-vtunesummary.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:813e629fe674b676c8484a92f94d55ffc13cacc1e077fa19a2c82cd528819d72
+size 256217
--- a/docs/_static/images/IE_DG_supported_plugins_AUTO_debugging-img05-vtunebottomup.png
+++ b/docs/_static/images/IE_DG_supported_plugins_AUTO_debugging-img05-vtunebottomup.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62fb4b08191499cfe765f3777dd7ae543739232cfd8861e21f976ba09aa9797b
+size 176560
--- a/docs/_static/images/accuracy_table_yolo.png
+++ b/docs/_static/images/accuracy_table_yolo.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d9d37e783ef8cf930f4009743cb604ff3180225f981d07f917c7f25846bbefe
+size 125159
--- a/docs/_static/images/accuracy_table_yolo.png.png
+++ b/docs/_static/images/accuracy_table_yolo.png.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b11408aa0c5aba0a28615a585abda61b539cb198bf511297275949d6dee78e5
+size 115623
--- a/docs/_static/images/accuracy_table_yolo_advanced.png
+++ b/docs/_static/images/accuracy_table_yolo_advanced.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50bd093500582971fd9ad3c99556d739132680328adfa9e034d9093fb5a31023
+size 53183
--- a/docs/_static/images/accuracy_table_yolo_basic.png
+++ b/docs/_static/images/accuracy_table_yolo_basic.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6f96b5460e218899068bb9bcaf4caae7d9b5827ac44ff5a6d7145dfb5a4a6a3
+size 43233
--- a/docs/_static/images/calibration_yolov4.png
+++ b/docs/_static/images/calibration_yolov4.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:351cf888e0ed3d5c6701db028523859c42faa91ac2ce95b7ee00eaa2db8d6ad5
+size 60247
--- a/docs/_static/images/check_yolo_model.png
+++ b/docs/_static/images/check_yolo_model.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2cf1cfc3cc38d25f6a46ff0cd2bc63ff1d93bb3e52b57c640f80a73e56f3c44
+size 489383
--- a/docs/_static/images/config_filled.png
+++ b/docs/_static/images/config_filled.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a63fe316d6ee337ccafcf83ff088fe413b985793631d2cf7d4ff6d5f0c5391b
+size 51962
--- a/docs/_static/images/convert_model_to_ir_general.png
+++ b/docs/_static/images/convert_model_to_ir_general.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdf4769ec02cd5fb13335f11436c18151aaacbb0efe13b680ff8813ca3c2f997
+size 45839
--- a/Show More
+++ b/Show More