Merge remote-tracking branch 'upstream/master'

2021-09-09 10:24:37 +09:00 · 2021-09-09 10:24:37 +09:00 · b8966457b9
commit b8966457b9
parent 904384fee3 1c1401b069
1162 changed files with 28277 additions and 20148 deletions
--- a/.ci/azure/linux.yml
+++ b/.ci/azure/linux.yml
@ -103,6 +103,7 @@ jobs:
    workingDirectory: $(WORK_DIR)
    displayName: 'Install dependencies'

+    # Should be after 'Install dependencies' because Git lfs is not installed
  - checkout: testdata
    clean: true
    lfs: true
@ -140,18 +141,18 @@ jobs:
  - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
    workingDirectory: $(BUILD_DIR)
    displayName: 'Install'
-  
+
  - task: CMake@1
    inputs:
      cmakeArgs: >
        -GNinja
        $(REPO_DIR)/tests/layer_tests
      workingDirectory: $(BUILD_LAYER_TESTS_DIR)
-      
+
  - script: ninja
    workingDirectory: $(BUILD_LAYER_TESTS_DIR)
    displayName: 'Build Layer Tests'
-    
+
  - script: cmake -DCOMPONENT=tests -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
    workingDirectory: $(BUILD_LAYER_TESTS_DIR)
    displayName: 'Install Layer Tests'
@ -166,7 +167,7 @@ jobs:
      cp -R $(REPO_DIR)/inference-engine/temp/opencv_4.5.2_ubuntu20/opencv/* $(INSTALL_DIR)/opencv/
    workingDirectory: $(BUILD_DIR)
    displayName: 'Install tests'
-    
+
  - script: ls -alR $(INSTALL_DIR)
    displayName: 'List install files'

@ -177,7 +178,7 @@ jobs:
  - script: $(INSTALL_DIR)/deployment_tools/inference_engine/samples/c/build_samples.sh
    workingDirectory: $(BUILD_SAMPLES_DIR)
    displayName: 'Build c samples'
-  
+
  - script: rm -fr $(BUILD_DIR)
    displayName: 'Clean build dir'
    continueOnError: false
@ -253,7 +254,7 @@ jobs:
      . $(SETUPVARS) -pyver 3.8 && python3 -m pytest --junitxml=TEST-PythonAPI.xml
    displayName: 'Python API Tests'
    continueOnError: false
-    
+
  - script: |
      . $(SETUPVARS)
      python3 -m pip install -r requirements.txt
--- a/.ci/azure/mac.yml
+++ b/.ci/azure/mac.yml
@ -28,19 +28,19 @@ jobs:
    MODELS_PATH: $(REPO_DIR)/../testdata
    WORK_DIR: $(Pipeline.Workspace)/_w
    BUILD_DIR: $(WORK_DIR)/build
-    BIN_DIR: $(REPO_DIR)/bin/intel64/$(BUILD_TYPE)
    INSTALL_DIR: $(WORK_DIR)/install_pkg
+    INSTALL_TEST_DIR: $(INSTALL_DIR)/tests
    SETUPVARS: $(INSTALL_DIR)/bin/setupvars.sh

  steps:
  - script: |
      whoami
      uname -a
-      which python3
-      python3 --version
-      which java
-      java -version
-      gcc --version
+      echo Python3 info ; which python3 ; python3 --version
+      echo Python info ; which python ; python --version
+      echo Java info ; which java ; java -version
+      echo gcc info ; which gcc ; gcc --version
+      echo cmake info ; which cmake ; cmake --version
      xcrun --sdk macosx --show-sdk-version
      env
      sysctl -a
@ -91,47 +91,64 @@ jobs:
    workingDirectory: $(BUILD_DIR)
    displayName: 'CMake'

+  - script: ls -alR $(REPO_DIR)/inference-engine/temp/
+    displayName: 'List temp SDKs'
+
  - script: ninja
    workingDirectory: $(BUILD_DIR)
    displayName: 'Build Mac'

  - script: ls -alR $(REPO_DIR)/bin/
-    displayName: 'List files'
+    displayName: 'List bin files'

  - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
    workingDirectory: $(BUILD_DIR)
    displayName: 'Install'

-  - script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru* --gtest_output=xml:TEST-NGraphUT.xml
-    workingDirectory: $(BIN_DIR)
+  - script: ls -alR $(INSTALL_DIR)
+    displayName: 'List install files'
+
+  - script: |
+      set -e
+      mkdir $(INSTALL_DIR)/opencv/
+      cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake
+      cp -R $(REPO_DIR)/inference-engine/temp/opencv_4.5.2_osx/opencv/* $(INSTALL_DIR)/opencv/
+    workingDirectory: $(BUILD_DIR)
+    displayName: 'Install tests'
+
+  - script: ls -alR $(INSTALL_DIR)
+    displayName: 'List install files'
+
+  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru* --gtest_output=xml:TEST-NGraphUT.xml
+    workingDirectory: $(INSTALL_TEST_DIR)
    displayName: 'nGraph UT'
    continueOnError: false

-  - script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_filter=-MKLDNNGraphStructureTests.TestNoRedundantReordersBeforeDWConvolution:TestConvolution/MKLDNNGraphConvolutionTests.TestsConvolution/0:TestConvolutionDefaultPrimitivesPriority/MKLDNNGraphConvolutionTests.TestsConvolution/0 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
+  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_filter=-MKLDNNGraphStructureTests.TestNoRedundantReordersBeforeDWConvolution:TestConvolution/MKLDNNGraphConvolutionTests.TestsConvolution/0:TestConvolutionDefaultPrimitivesPriority/MKLDNNGraphConvolutionTests.TestsConvolution/0 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
    displayName: 'IE UT old'
    continueOnError: false

-  - script: $(BIN_DIR)/ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml
+  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml
    displayName: 'IE UT'
    continueOnError: false

-  - script: $(BIN_DIR)/cpuUnitTests --gtest_output=xml:TEST-cpuUnitTests.xml
+  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/cpuUnitTests --gtest_output=xml:TEST-cpuUnitTests.xml
    displayName: 'CPU UT'
    continueOnError: false

-  - script: $(BIN_DIR)/vpuUnitTests --gtest_output=xml:TEST-vpuUnitTests.xml
+  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/vpuUnitTests --gtest_output=xml:TEST-vpuUnitTests.xml
    displayName: 'VPU UT'
    continueOnError: false

-  - script: $(BIN_DIR)/onnxImporterUnitTests --gtest_output=xml:TEST-onnxImporterUnitTests.xml
+  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/onnxImporterUnitTests --gtest_output=xml:TEST-onnxImporterUnitTests.xml
    displayName: 'ONNX Importer UT'
    continueOnError: false

-  - script: $(BIN_DIR)/ieFuncTests --gtest_output=xml:TEST-ieFuncTests.xml
+  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ieFuncTests --gtest_output=xml:TEST-ieFuncTests.xml
    displayName: 'IE FuncTests'
    continueOnError: false

-  - script: $(BIN_DIR)/cpuFuncTests --gtest_filter=*smoke*:-smoke_LPT/ReduceMinTransformation.CompareWithRefImpl/f32_Shape* --gtest_print_time=1 --gtest_output=xml:TEST-cpuFuncTests.xml
+  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/cpuFuncTests --gtest_filter=*smoke*:-smoke_LPT/ReduceMinTransformation.CompareWithRefImpl/f32_Shape* --gtest_print_time=1 --gtest_output=xml:TEST-cpuFuncTests.xml
    displayName: 'CPU FuncTests'
    continueOnError: false
    enabled: false
@ -139,7 +156,7 @@ jobs:
  - script: |
      export DATA_PATH=$(MODELS_PATH)
      export MODELS_PATH=$(MODELS_PATH)
-      $(BIN_DIR)/InferenceEngineCAPITests --gtest_output=xml:TEST-InferenceEngineCAPITests.xml
+      . $(SETUPVARS) && $(INSTALL_TEST_DIR)/InferenceEngineCAPITests --gtest_output=xml:TEST-InferenceEngineCAPITests.xml
    displayName: 'IE CAPITests'
    continueOnError: false

--- a/.ci/azure/windows.yml
+++ b/.ci/azure/windows.yml
@ -16,7 +16,7 @@ jobs:
  timeoutInMinutes: 120

  pool:
-    name: WIN_VMSS_VENV_F16S_WU2
+    name: WIN_VMSS_VENV_F8S_WU2

  variables:
    system.debug: true
@ -34,6 +34,8 @@ jobs:
    INSTALL_DIR: $(WORK_DIR)\install_pkg
    INSTALL_TEST_DIR: $(INSTALL_DIR)\tests
    SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
+    IB_DIR: C:\Program Files (x86)\IncrediBuild
+    IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe

  steps:
  - script: |
@ -57,6 +59,12 @@ jobs:
      rd /Q /S $(BUILD_SAMPLES_DIR) & mkdir $(BUILD_SAMPLES_DIR)
    displayName: 'Make dir'

+  - script: |
+      certutil -urlcache -split -f https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
+      call install_ib_console.bat
+    workingDirectory: $(WORK_DIR)
+    displayName: 'Install IncrediBuild'
+
  - checkout: self
    clean: true
    lfs: false
@ -101,7 +109,9 @@ jobs:
  - script: dir $(REPO_DIR)\inference-engine\temp\ /s
    displayName: 'List temp SDKs'

-  - script: call "$(MSVS_VARS_PATH)" && $(WORK_DIR)\ninja-win\ninja
+  - script: |
+      set PATH=$(WORK_DIR)\ninja-win;%PATH%
+      call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja"
    workingDirectory: $(BUILD_DIR)
    displayName: 'Build Win'

@ -143,8 +153,10 @@ jobs:
    displayName: 'PaddlePaddle Frontend UT'
    continueOnError: false

-  - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
-    displayName: 'IE UT old'
+  - script: |
+      set PATH=$(IB_DIR);%PATH%
+      call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests-IB.xml
+    displayName: 'IE UT old - IB'
    continueOnError: false

  - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml
@ -175,8 +187,11 @@ jobs:
    displayName: 'TEMPLATE FuncTests'
    continueOnError: false

-  - script: $(SETUPVARS) && $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-cpuFuncTests.xml
-    displayName: 'CPU FuncTests'
+    # call $(SETUPVARS) && $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-cpuFuncTests.xml
+  - script: |
+      set PATH=$(IB_DIR);%PATH%
+      call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke*:-*CompareWithRefs/base_size=16_pre_nms_topn=100_post_nms_topn=100_nms_thresh=0.7_feat_stride=1_min_size=1_ratio*:*smoke_GRUSequenceCommonZeroClip/GRUSequenceTest.CompareWithRefs/mode=CONVERT_TO_TI_MAX_SEQ_LEN_CONST_seq_lengths* --gtest_output=xml:TEST-cpuFuncTests-IB.xml /testlevel=24
+    displayName: 'CPU FuncTests - IB'
    continueOnError: false

  - script: |
@ -198,3 +213,8 @@ jobs:
      buildPlatform: 'x64' # Optional
      buildConfiguration: 'Windows' # Optional
      #publishRunAttachments: true # Optional
+
+  - script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
+    displayName: Stop IncrediBuild
+    continueOnError: true
+    enabled: false
--- a/1
+++ b/1
@ -44,7 +44,6 @@ azure-pipelines.yml  @openvinotoolkit/openvino-admins
 /inference-engine/tests/functional/plugin/myriad/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
 /inference-engine/tests/unit/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
 /inference-engine/tests/unit/engines/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
-/inference-engine/tools/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers
 /inference-engine/scripts/run_tests_myriad_multistick.sh  @openvinotoolkit/openvino-ie-vpu-maintainers

 # IE GNA:
--- a/cmake/developer_package/plugins/create_plugin_file.cmake
+++ b/cmake/developer_package/plugins/create_plugin_file.cmake
@ -2,6 +2,8 @@
 # SPDX-License-Identifier: Apache-2.0
 #

+cmake_policy(SET CMP0007 NEW)
+
 set(newContent "        <plugin name=\"${IE_DEVICE_NAME}\" location=\"${IE_PLUGIN_LIBRARY_NAME}\">")

 if(IE_PLUGIN_PROPERTIES)
@ -9,10 +11,11 @@ if(IE_PLUGIN_PROPERTIES)
            <properties>")

    foreach(props IN LISTS IE_PLUGIN_PROPERTIES)
-        string(REPLACE "," ";" props "${props}")
+        string(REPLACE ":" ";" props "${props}")

        list(GET props 0 key)
        list(GET props 1 value)
+
        set(newContent "${newContent}
                <property key=\"${key}\" value=\"${value}\"/>")
    endforeach()
--- a/cmake/developer_package/plugins/plugins.cmake
+++ b/cmake/developer_package/plugins/plugins.cmake
@ -20,19 +20,18 @@ endif()
 #
 # ie_add_plugin(NAME <targetName>
 #               DEVICE_NAME <deviceName>
-#               SOURCES <sources>
-#               OBJECT_LIBRARIES <object_libs>
-#               VERSION_DEFINES_FOR <source>
-#               SKIP_INSTALL
+#               [PSEUDO]
+#               [DEFAULT_CONFIG <key:value;...>]
+#               [SOURCES <sources>]
+#               [OBJECT_LIBRARIES <object_libs>]
+#               [VERSION_DEFINES_FOR <source>]
+#               [SKIP_INSTALL]
 #               )
 #
 function(ie_add_plugin)
-    set(options 
-        SKIP_INSTALL 
-        ADD_CLANG_FORMAT
-        )
+    set(options SKIP_INSTALL ADD_CLANG_FORMAT PSEUDO_PLUGIN)
    set(oneValueArgs NAME DEVICE_NAME VERSION_DEFINES_FOR)
-    set(multiValueArgs SOURCES OBJECT_LIBRARIES CPPLINT_FILTERS)
+    set(multiValueArgs DEFAULT_CONFIG SOURCES OBJECT_LIBRARIES CPPLINT_FILTERS)
    cmake_parse_arguments(IE_PLUGIN "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

    if(NOT IE_PLUGIN_NAME)
@ -45,41 +44,73 @@ function(ie_add_plugin)

    # create and configure target

-    if(IE_PLUGIN_VERSION_DEFINES_FOR)
-        addVersionDefines(${IE_PLUGIN_VERSION_DEFINES_FOR} CI_BUILD_NUMBER)
-    endif()
+    if(NOT IE_PLUGIN_PSEUDO_PLUGIN)
+        if(IE_PLUGIN_VERSION_DEFINES_FOR)
+            addVersionDefines(${IE_PLUGIN_VERSION_DEFINES_FOR} CI_BUILD_NUMBER)
+        endif()

-    set(input_files ${IE_PLUGIN_SOURCES})
-    foreach(obj_lib IN LISTS IE_PLUGIN_OBJECT_LIBRARIES)
-        list(APPEND input_files $<TARGET_OBJECTS:${obj_lib}>)
-        add_cpplint_target(${obj_lib}_cpplint FOR_TARGETS ${obj_lib})
-    endforeach()
+        set(input_files ${IE_PLUGIN_SOURCES})
+        foreach(obj_lib IN LISTS IE_PLUGIN_OBJECT_LIBRARIES)
+            list(APPEND input_files $<TARGET_OBJECTS:${obj_lib}>)
+            add_cpplint_target(${obj_lib}_cpplint FOR_TARGETS ${obj_lib})
+        endforeach()

-    add_library(${IE_PLUGIN_NAME} MODULE ${input_files})
-    target_compile_definitions(${IE_PLUGIN_NAME} PRIVATE IMPLEMENT_INFERENCE_ENGINE_PLUGIN)
+        add_library(${IE_PLUGIN_NAME} MODULE ${input_files})
+        target_compile_definitions(${IE_PLUGIN_NAME} PRIVATE IMPLEMENT_INFERENCE_ENGINE_PLUGIN)

-    ie_add_vs_version_file(NAME ${IE_PLUGIN_NAME}
-                           FILEDESCRIPTION "Inference Engine ${IE_PLUGIN_DEVICE_NAME} device plugin library")
+        ie_add_vs_version_file(NAME ${IE_PLUGIN_NAME}
+            FILEDESCRIPTION "Inference Engine ${IE_PLUGIN_DEVICE_NAME} device plugin library")

-    if(TARGET IE::inference_engine_plugin_api)
-        target_link_libraries(${IE_PLUGIN_NAME} PRIVATE IE::inference_engine_plugin_api)
-    else()
-        target_link_libraries(${IE_PLUGIN_NAME} PRIVATE inference_engine_plugin_api)
-    endif()
+        if(TARGET IE::inference_engine_plugin_api)
+            target_link_libraries(${IE_PLUGIN_NAME} PRIVATE IE::inference_engine_plugin_api)
+        else()
+            target_link_libraries(${IE_PLUGIN_NAME} PRIVATE inference_engine_plugin_api)
+        endif()

-    if(WIN32)
-        set_target_properties(${IE_PLUGIN_NAME} PROPERTIES COMPILE_PDB_NAME ${IE_PLUGIN_NAME})
-    endif()
+        if(WIN32)
+            set_target_properties(${IE_PLUGIN_NAME} PROPERTIES COMPILE_PDB_NAME ${IE_PLUGIN_NAME})
+        endif()

-    set(custom_filter "")
-    foreach(filter IN LISTS IE_PLUGIN_CPPLINT_FILTERS)
-        string(CONCAT custom_filter "${custom_filter}" "," "${filter}")
-    endforeach()
+        set(custom_filter "")
+        foreach(filter IN LISTS IE_PLUGIN_CPPLINT_FILTERS)
+            string(CONCAT custom_filter "${custom_filter}" "," "${filter}")
+        endforeach()

-    if (IE_PLUGIN_ADD_CLANG_FORMAT)
-        add_clang_format_target(${IE_PLUGIN_NAME}_clang FOR_TARGETS ${IE_PLUGIN_NAME})
-    else()
-        add_cpplint_target(${IE_PLUGIN_NAME}_cpplint FOR_TARGETS ${IE_PLUGIN_NAME} CUSTOM_FILTERS ${custom_filter})
+        if (IE_PLUGIN_ADD_CLANG_FORMAT)
+            add_clang_format_target(${IE_PLUGIN_NAME}_clang FOR_TARGETS ${IE_PLUGIN_NAME})
+        else()
+            add_cpplint_target(${IE_PLUGIN_NAME}_cpplint FOR_TARGETS ${IE_PLUGIN_NAME} CUSTOM_FILTERS ${custom_filter})
+        endif()
+
+        add_dependencies(ie_plugins ${IE_PLUGIN_NAME})
+        if(TARGET inference_engine_preproc)
+            add_dependencies(${IE_PLUGIN_NAME} inference_engine_preproc)
+        endif()
+
+        # fake dependencies to build in the following order:
+        # IE -> IE readers -> IE inference plugins -> IE-based apps
+        if(TARGET inference_engine_ir_reader)
+            add_dependencies(${IE_PLUGIN_NAME} inference_engine_ir_reader)
+        endif()
+        if(TARGET inference_engine_ir_v7_reader)
+            add_dependencies(${IE_PLUGIN_NAME} inference_engine_ir_v7_reader)
+        endif()
+        if(TARGET onnx_ngraph_frontend)
+            add_dependencies(${IE_PLUGIN_NAME} onnx_ngraph_frontend)
+        endif()
+        if(TARGET paddlepaddle_ngraph_frontend)
+            add_dependencies(${IE_PLUGIN_NAME} paddlepaddle_ngraph_frontend)
+        endif()
+
+        # install rules
+        if(NOT IE_PLUGIN_SKIP_INSTALL)
+            string(TOLOWER "${IE_PLUGIN_DEVICE_NAME}" install_component)
+            ie_cpack_add_component(${install_component} REQUIRED DEPENDS core)
+
+            install(TARGETS ${IE_PLUGIN_NAME}
+                    LIBRARY DESTINATION ${IE_CPACK_RUNTIME_PATH}
+                    COMPONENT ${install_component})
+        endif()
    endif()

    # check that plugin with such name is not registered
@ -98,33 +129,7 @@ function(ie_add_plugin)

    list(APPEND PLUGIN_FILES "${IE_PLUGIN_DEVICE_NAME}:${IE_PLUGIN_NAME}")
    set(PLUGIN_FILES "${PLUGIN_FILES}" CACHE INTERNAL "" FORCE)
-
-    add_dependencies(ie_plugins ${IE_PLUGIN_NAME})
-    if(TARGET inference_engine_preproc)
-        add_dependencies(${IE_PLUGIN_NAME} inference_engine_preproc)
-    endif()
-
-    # fake dependencies to build in the following order:
-    # IE -> IE readers -> IE inference plugins -> IE-based apps
-    if(TARGET inference_engine_ir_reader)
-        add_dependencies(${IE_PLUGIN_NAME} inference_engine_ir_reader)
-    endif()
-    if(TARGET inference_engine_ir_v7_reader)
-        add_dependencies(${IE_PLUGIN_NAME} inference_engine_ir_v7_reader)
-    endif()
-    if(TARGET onnx_ngraph_frontend)
-        add_dependencies(${IE_PLUGIN_NAME} onnx_ngraph_frontend)
-    endif()
-
-    # install rules
-
-    if(NOT IE_PLUGIN_SKIP_INSTALL)
-        string(TOLOWER "${IE_PLUGIN_DEVICE_NAME}" install_component)
-        ie_cpack_add_component(${install_component} REQUIRED DEPENDS core)
-
-        install(TARGETS ${IE_PLUGIN_NAME}
-                LIBRARY DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT ${install_component})
-    endif()
+    set(${IE_PLUGIN_DEVICE_NAME}_CONFIG "${IE_PLUGIN_DEFAULT_CONFIG}" CACHE INTERNAL "" FORCE)
 endfunction()

 #
@ -168,7 +173,7 @@ macro(ie_register_plugins)
        list(GET name 1 name)

        # create plugin file
-        set(config_file_name "${CMAKE_BINARY_DIR}/plugins/${name}.xml")
+        set(config_file_name "${CMAKE_BINARY_DIR}/plugins/${device_name}.xml")
        ie_plugin_get_file_name(${name} library_name)

        add_custom_command(TARGET ${IE_REGISTER_MAIN_TARGET} POST_BUILD
@ -176,9 +181,10 @@ macro(ie_register_plugins)
              "${CMAKE_COMMAND}"
              -D "IE_CONFIG_OUTPUT_FILE=${config_file_name}"
              -D "IE_DEVICE_NAME=${device_name}"
+              -D "IE_PLUGIN_PROPERTIES=${${device_name}_CONFIG}"
              -D "IE_PLUGIN_LIBRARY_NAME=${library_name}"
              -P "${IEDevScripts_DIR}/plugins/create_plugin_file.cmake"
-          COMMENT "Register ${name} plugin"
+          COMMENT "Register ${device_name} device as ${library_name}"
          VERBATIM)

        list(APPEND plugin_files_local "${config_file_name}")
--- a/docs/IE_DG/Deep_Learning_Inference_Engine_DevGuide.md
+++ b/docs/IE_DG/Deep_Learning_Inference_Engine_DevGuide.md
@ -1,7 +1,5 @@
 # Inference Engine Developer Guide {#openvino_docs_IE_DG_Deep_Learning_Inference_Engine_DevGuide}

-> **NOTE:** [Intel® System Studio](https://software.intel.com/content/www/us/en/develop/tools/oneapi/commercial-base-iot.html) (click "Intel® System Studio Users" tab) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019).
-
 This Guide provides an overview of the Inference Engine describing the typical workflow for performing inference of a pre-trained and optimized deep learning model and a set of sample applications.

 > **NOTE:** Before you perform inference with the Inference Engine, your models should be converted to the Inference Engine format using the Model Optimizer or built directly in runtime using nGraph API. To learn about how to use Model Optimizer, refer to the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). To learn about the pre-trained and optimized models delivered with the OpenVINO™ toolkit, refer to [Pre-Trained Models](@ref omz_models_group_intel).
@ -111,10 +109,8 @@ The common workflow contains the following steps:
 8. **Get the output** - After inference is completed, get the output memory or read the memory you provided earlier. Do this with the `InferenceEngine::IInferRequest::GetBlob()` method.

 ## Video: Inference Engine Concept
-[![](https://img.youtube.com/vi/e6R13V8nbak/0.jpg)](https://www.youtube.com/watch?v=e6R13V8nbak)
-\htmlonly
+
 <iframe width="560" height="315" src="https://www.youtube.com/embed/e6R13V8nbak" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
-\endhtmlonly

 ## Further Reading

--- a/docs/IE_DG/Extensibility_DG/AddingNGraphOps.md
+++ b/docs/IE_DG/Extensibility_DG/AddingNGraphOps.md
@ -1,6 +1,6 @@
 # Custom nGraph Operation {#openvino_docs_IE_DG_Extensibility_DG_AddingNGraphOps}

-Inference Engine Extension API allows you to register operation sets (opsets) with custom nGraph operations to support models with operations which OpenVINO™ does not support out-of-the-box.
+The Inference Engine Extension API allows you to register operation sets (opsets) with custom nGraph operations to support models with operations that OpenVINO™ does not support out-of-the-box.

 ## Operation Class

--- a/docs/IE_DG/Extensibility_DG/Extension.md
+++ b/docs/IE_DG/Extensibility_DG/Extension.md
@ -25,5 +25,6 @@ Also, an `Extension` object should implement the following methods:
 Implement the InferenceEngine::IExtension::getOpSets method if the extension contains custom layers. 
 Read [Custom nGraph Operation](AddingNGraphOps.md) for more information.

-To integrate execution kernels to the extension library, read [How to Implement Custom CPU Operations](CPU_Kernel.md).
-To register a custom ONNX\* operator to the extension library, read [Custom ONNX Operators](Custom_ONNX_Ops.md).
+To understand how to integrate execution kernels to the extension library, read the [documentation about development of custom CPU kernels](CPU_Kernel.md).
+
+To understand how to register custom ONNX operator to the extension library, read the [documentation about custom ONNX operators](Custom_ONNX_Ops.md).
--- a/docs/IE_DG/Int8Inference.md
+++ b/docs/IE_DG/Int8Inference.md
@ -1,12 +1,5 @@
 # Low-Precision 8-bit Integer Inference {#openvino_docs_IE_DG_Int8Inference}

-## Table of Contents
-1. [Supported devices](#supported-devices)
-2. [Low-Precision 8-bit Integer Inference Workflow](#low-precision-8-bit-integer-inference-workflow)
-3. [Prerequisites](#prerequisites)
-4. [Inference](#inference)
-5. [Results analysis](#results-analysis)
-
 ## Supported devices

 Low-precision 8-bit inference is optimized for:
@ -24,34 +17,35 @@ Low-precision 8-bit inference is optimized for:

 ## Low-Precision 8-bit Integer Inference Workflow

-8-bit computations (referred to as `int8`) offer better performance compared to the results of inference in higher precision (for example, `fp32`), because they allow loading more data into a single processor instruction. Usually the cost for significant boost is a reduced accuracy. However, it is proved that an accuracy drop can be negligible and depends on task requirements, so that the application engineer can set up the maximum accuracy drop that is acceptable.
+8-bit computations (referred to as `int8`) offer better performance compared to the results of inference in higher precision (for example, `fp32`), because they allow loading more data into a single processor instruction. Usually the cost for significant boost is reduced accuracy. However, it is proved that an accuracy drop can be negligible and depends on task requirements, so that the application engineer can set up the maximum accuracy drop that is acceptable.

 For 8-bit integer computations, a model must be quantized. Quantized models can be downloaded from [Overview of OpenVINO™ Toolkit Intel's Pre-Trained Models](@ref omz_models_group_intel). If the model is not quantized, you can use the [Post-Training Optimization Tool](@ref pot_README) to quantize the model. The quantization process adds [FakeQuantize](../ops/quantization/FakeQuantize_1.md) layers on activations and weights for most layers. Read more about mathematical computations in the [Uniform Quantization with Fine-Tuning](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md).

 When you pass the quantized IR to the OpenVINO™ plugin, the plugin automatically recognizes it as a quantized model and performs 8-bit inference. Note, if you pass a quantized model to another plugin that does not support 8-bit inference but supports all operations from the model, the model is inferred in precision that this plugin supports.

-In *Runtime stage* stage, the quantized model is loaded to the plugin. The plugin uses `Low Precision Transformation` component to update the model to infer it in low precision:
-   - Update `FakeQuantize` layers to have quantized output tensors in low precision range and add dequantization layers to compensate the update. Dequantization layers are pushed through as many layers as possible to have more layers in low precision. After that, most layers have quantized input tensors in low precision range and can be inferred in low precision. Ideally, dequantization layers should be fused in the next `FakeQuantize` layer.
-   - Weights are quantized and stored in `Constant` layers. 
+In *Runtime stage*, the quantized model is loaded to the plugin. The plugin uses the `Low Precision Transformation` component to update the model to infer it in low precision:
+   - Update `FakeQuantize` layers to have quantized output tensors in a low precision range and add dequantization layers to compensate the update. Dequantization layers are pushed through as many layers as possible to have more layers in low precision. After that, most layers quantized input tensors in the low precision range and can be inferred in low precision. Ideally, dequantization layers should be fused in the next `FakeQuantize` layer.
+   - Quantize weights and store them in `Constant` layers. 

 ## Prerequisites

-Let's explore quantized [TensorFlow* implementation of ResNet-50](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf) model. Use [Model Downloader](@ref omz_tools_downloader) tool to download the `fp16` model from [OpenVINO™ Toolkit - Open Model Zoo repository](https://github.com/openvinotoolkit/open_model_zoo):
+Let's explore the quantized [TensorFlow* implementation of ResNet-50](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf) model. Use the [Model Downloader](@ref omz_tools_downloader) tool to download the `fp16` model from [OpenVINO™ Toolkit - Open Model Zoo repository](https://github.com/openvinotoolkit/open_model_zoo):
 ```sh
-./downloader.py --name resnet-50-tf --precisions FP16-INT8
+cd $INTEL_OPENVINO_DIR/deployment_tools/tools/model_downloader
+./downloader.py --name resnet-50-tf --precisions FP16-INT8 --output_dir <your_model_directory>
 ```
-After that you should quantize model by the [Model Quantizer](@ref omz_tools_downloader) tool.
+After that, you should quantize the model by the [Model Quantizer](@ref omz_tools_downloader) tool. For the dataset, you can choose to download the ImageNet dataset from [here](https://www.image-net.org/download.php).
 ```sh
-./quantizer.py --model_dir public/resnet-50-tf --dataset_dir <DATASET_DIR> --precisions=FP16-INT8
+./quantizer.py --model_dir --name public/resnet-50-tf --dataset_dir <DATASET_DIR> --precisions=FP16-INT8
 ```

 ## Inference

-The simplest way to infer the model and collect performance counters is [C++ Benchmark Application](../../inference-engine/samples/benchmark_app/README.md). 
+The simplest way to infer the model and collect performance counters is the [C++ Benchmark Application](../../inference-engine/samples/benchmark_app/README.md). 
 ```sh
 ./benchmark_app -m resnet-50-tf.xml -d CPU -niter 1 -api sync -report_type average_counters  -report_folder pc_report_dir
 ```
-If you infer the model with the OpenVINO™ CPU plugin and collect performance counters, all operations (except last not quantized SoftMax) are executed in INT8 precision.  
+If you infer the model with the Inference Engine CPU plugin and collect performance counters, all operations (except the last non-quantized SoftMax) are executed in INT8 precision.  

 ## Results analysis

--- a/docs/IE_DG/Integrate_with_customer_application_new_API.md
+++ b/docs/IE_DG/Integrate_with_customer_application_new_API.md
@ -35,7 +35,7 @@ Integration process includes the following steps:

@snippet snippets/Integrate_with_customer_application_new_API.cpp part1

-**Or read the model from ONNX format** (.onnx and .prototxt are supported formats). You can find more information about the ONNX format support in the document [ONNX format support in the OpenVINO™](./ONNX_Support.md).
+**Or read the model from ONNX format**. You can find more information about the ONNX format support in the document [ONNX format support in the OpenVINO™](./ONNX_Support.md).

@snippet snippets/Integrate_with_customer_application_new_API.cpp part2

--- a/docs/IE_DG/Intro_to_Performance.md
+++ b/docs/IE_DG/Intro_to_Performance.md
@ -34,7 +34,7 @@ Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README
 ## Using Caching API for first inference latency optimization
 Since with the 2021.4 release, Inference Engine provides an ability to enable internal caching of loaded networks.
 This can significantly reduce load network latency for some devices at application startup.
-Internally caching uses plugin's Export/ImportNetwork flow, like it is done for [Compile tool](../../inference-engine/tools/compile_tool/README.md), using the regular ReadNetwork/LoadNetwork API.
+Internally caching uses plugin's Export/ImportNetwork flow, like it is done for [Compile tool](../../tools/compile_tool/README.md), using the regular ReadNetwork/LoadNetwork API.
 Refer to the [Model Caching Overview](Model_caching_overview.md) for more detailed explanation.

 ## Using Async API
--- a/docs/IE_DG/Legal_Information.md
+++ b/docs/IE_DG/Legal_Information.md
@ -1,12 +0,0 @@
-# Legal Information {#openvino_docs_IE_DG_Legal_Information}
-
-<sup>No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document.</sup><br/>
-<sup>Intel disclaims all express and implied warranties, including without limitation, the implied warranties of merchantability, fitness for a particular purpose, and non-infringement, as well as any warranty arising from course of performance, course of dealing, or usage in trade.</sup><br/>
-<sup>This document contains information on products, services and/or processes in development. All information provided here is subject to change without notice. Contact your Intel representative to obtain the latest forecast, schedule, specifications and roadmaps.</sup><br/>
-<sup>The products and services described may contain defects or errors known as errata which may cause deviations from published specifications. Current characterized errata are available on request.</sup><br/>
-<sup>Copies of documents which have an order number and are referenced in this document may be obtained by calling 1-800-548-4725 or by visiting [<b>www.intel.com/design/literature.htm</b>](http://www.intel.com/design/literature.htm).</sup><br/>
-<sup>Intel, Intel logo, Intel Core, VTune, Xeon are trademarks of Intel Corporation in the U.S. and other countries.</sup><br/>
-<sup>\* Other names and brands may be claimed as the property of others.</sup><br/>
-<sup>Copyright © 2016-2018 Intel Corporation.</sup><br/>
-<sup>This software and the related documents are Intel copyrighted materials, and your use of them is governed by the express license under which they were provided to you (License). Unless the License provides otherwise, you may not use, modify, copy, publish, distribute, disclose or transmit this software or the related documents without Intel's prior written permission.</sup><br/>
-<sup>This software and the related documents are provided as is, with no express or implied warranties, other than those that are expressly stated in the License.</sup><br/>
--- a/docs/IE_DG/Model_caching_overview.md
+++ b/docs/IE_DG/Model_caching_overview.md
@ -20,7 +20,7 @@ As described in [Inference Engine Developer Guide](Deep_Learning_Inference_Engin

 Step #5 can potentially perform several time-consuming device-specific optimizations and network compilations,
 and such delays can lead to bad user experience on application startup. To avoid this, some devices offer
-Import/Export network capability, and it is possible to either use [Compile tool](../../inference-engine/tools/compile_tool/README.md)
+Import/Export network capability, and it is possible to either use [Compile tool](../../tools/compile_tool/README.md)
 or enable model caching to export compiled network automatically. Reusing cached networks can significantly reduce load network time.


--- a/docs/IE_DG/Samples_Overview.md
+++ b/docs/IE_DG/Samples_Overview.md
@ -109,7 +109,7 @@ for the debug configuration — in `<path_to_build_directory>/intel64/Debug/`.

 The recommended Windows* build environment is the following:
 * Microsoft Windows* 10
-* Microsoft Visual Studio* 2017, or 2019
+* Microsoft Visual Studio* 2017, or 2019. Make sure that C++ CMake tools for Windows is [enabled](https://docs.microsoft.com/en-us/cpp/build/cmake-projects-in-visual-studio?view=msvc-160#:~:text=The%20Visual%20C%2B%2B%20Tools%20for,Visual%20Studio%20generators%20are%20supported).
 * CMake* version 3.10 or higher

 > **NOTE**: If you want to use Microsoft Visual Studio 2019, you are required to install CMake 3.14.
--- a/docs/IE_DG/ShapeInference.md
+++ b/docs/IE_DG/ShapeInference.md
@ -33,7 +33,7 @@ If a model has a hard-coded batch dimension, use `InferenceEngine::CNNNetwork::s

 Inference Engine takes three kinds of a model description as an input, which are converted into an `InferenceEngine::CNNNetwork` object:
 1. [Intermediate Representation (IR)](../MO_DG/IR_and_opsets.md) through `InferenceEngine::Core::ReadNetwork`
-2. [ONNX model](../IE_DG/OnnxImporterTutorial.md) through `InferenceEngine::Core::ReadNetwork`
+2. [ONNX model](../IE_DG/ONNX_Support.md) through `InferenceEngine::Core::ReadNetwork`
 3. [nGraph function](../nGraph_DG/nGraph_dg.md) through the constructor of `InferenceEngine::CNNNetwork`

 `InferenceEngine::CNNNetwork` keeps an `ngraph::Function` object with the model description internally.
--- a/docs/IE_DG/Tools_Overview.md
+++ b/docs/IE_DG/Tools_Overview.md
@ -9,8 +9,8 @@ The OpenVINO™ toolkit installation includes the following tools:
 |[Accuracy Checker Tool](@ref omz_tools_accuracy_checker)              | `<INSTALL_DIR>/deployment_tools/tools/open_model_zoo/tools/accuracy_checker`|
 |[Post-Training Optimization Tool](@ref pot_README)                           | `<INSTALL_DIR>/deployment_tools/tools/post_training_optimization_toolkit`|
 |[Model Downloader](@ref omz_tools_downloader)                         | `<INSTALL_DIR>/deployment_tools/tools/model_downloader`| 
-|[Cross Check Tool](../../inference-engine/tools/cross_check_tool/README.md)  | `<INSTALL_DIR>/deployment_tools/tools/cross_check_tool`|
-|[Compile Tool](../../inference-engine/tools/compile_tool/README.md)          | `<INSTALL_DIR>/deployment_tools/inference_engine/lib/intel64/`|
+|[Cross Check Tool](../../tools/cross_check_tool/README.md)  | `<INSTALL_DIR>/deployment_tools/tools/cross_check_tool`|
+|[Compile Tool](../../tools/compile_tool/README.md)          | `<INSTALL_DIR>/deployment_tools/inference_engine/lib/intel64/`|


 ## See Also
--- a/docs/IE_DG/supported_plugins/CPU.md
+++ b/docs/IE_DG/supported_plugins/CPU.md
@ -105,17 +105,18 @@ These are general options, also supported by other plugins:

 | Parameter name                  | Parameter values      | Default            | Description                                                                                                                  |
 | :---                            | :---                  | :---               | :----------------------------------------------------------------------------------------------------------------------------|
-| KEY_EXCLUSIVE_ASYNC_REQUESTS    | YES/NO                | NO                 | Forces async requests (also from different executable networks) to execute serially. This prevents potential oversubscription|
-| KEY_PERF_COUNT                  | YES/NO                | NO                 | Enables gathering performance counters                                                                                       |
+| `KEY_EXCLUSIVE_ASYNC_REQUESTS`    | `YES`/`NO`                | `NO`                 | Forces async requests (also from different executable networks) to execute serially. This prevents potential oversubscription|
+| `KEY_PERF_COUNT`                 | `YES`/`NO`                | `NO`                 | Enables gathering performance counters                                                                                       |

 CPU-specific settings:

-| Parameter name              | Parameter values      | Default            | Description                                               |
-| :---                        | :---                  | :---               | :--- |
-| KEY_CPU_THREADS_NUM         | positive integer values| 0                 | Specifies the number of threads that CPU plugin should use for inference. Zero (default) means using all (logical) cores|
-| KEY_CPU_BIND_THREAD         | YES/NUMA/NO           | YES                | Binds inference threads to CPU cores. 'YES' (default) binding option maps threads to cores - this works best for static/synthetic scenarios like benchmarks. The 'NUMA' binding is more relaxed, binding inference threads only to NUMA nodes, leaving further scheduling to specific cores to the OS. This option might perform better in the real-life/contended scenarios. Note that for the latency-oriented cases (number of the streams is less or equal to the number of NUMA nodes, see below) both YES and NUMA options limit number of inference threads to the number of hardware cores (ignoring hyper-threading) on the multi-socket machines. |
-| KEY_CPU_THROUGHPUT_STREAMS  | KEY_CPU_THROUGHPUT_NUMA, KEY_CPU_THROUGHPUT_AUTO, or positive integer values| 1 | Specifies number of CPU "execution" streams for the throughput mode. Upper bound for the number of inference requests that can be executed simultaneously. All available CPU cores are evenly distributed between the streams. The default value is 1, which implies latency-oriented behavior for single NUMA-node machine, with all available cores processing requests one by one. On the multi-socket (multiple NUMA nodes) machine, the best latency numbers usually achieved with a number of streams matching the number of NUMA-nodes. <br>KEY_CPU_THROUGHPUT_NUMA creates as many streams as needed to accommodate NUMA and avoid associated penalties.<br>KEY_CPU_THROUGHPUT_AUTO creates bare minimum of streams to improve the performance; this is the most portable option if you don't know how many cores your target machine has (and what would be the optimal number of streams). Note that your application should provide enough parallel slack (for example, run many inference requests) to leverage the throughput mode. <br> Non-negative integer value creates the requested number of streams. If a number of streams is 0, no internal streams are created and user threads are interpreted as stream master threads.|
-| KEY_ENFORCE_BF16            | YES/NO| YES | The name for setting to execute in bfloat16 precision whenever it is possible. This option lets plugin know to downscale the precision where it sees performance benefits from bfloat16 execution. Such option does not guarantee accuracy of the network, you need to verify the accuracy in this mode separately, based on performance and accuracy results. It should be your decision whether to use this option or not. |
+
+| Parameter name     | Parameter values      | Default            |             Description                                                      |
+| :---               | :---                  | :---               |:-----------------------------------------------------------------------------|
+| `KEY_CPU_THREADS_NUM`         | `positive integer values`| `0`                 | Specifies the number of threads that CPU plugin should use for inference. Zero (default) means using all (logical) cores|
+| `KEY_CPU_BIND_THREAD`         | `YES`/`NUMA`/`NO`           | `YES`                | Binds inference threads to CPU cores. 'YES' (default) binding option maps threads to cores - this works best for static/synthetic scenarios like benchmarks. The 'NUMA' binding is more relaxed, binding inference threads only to NUMA nodes, leaving further scheduling to specific cores to the OS. This option might perform better in the real-life/contended scenarios. Note that for the latency-oriented cases (number of the streams is less or equal to the number of NUMA nodes, see below) both YES and NUMA options limit number of inference threads to the number of hardware cores (ignoring hyper-threading) on the multi-socket machines. |
+| `KEY_CPU_THROUGHPUT_STREAMS`  | `KEY_CPU_THROUGHPUT_NUMA`, `KEY_CPU_THROUGHPUT_AUTO`, or `positive integer values`| `1` | Specifies number of CPU "execution" streams for the throughput mode. Upper bound for the number of inference requests that can be executed simultaneously. All available CPU cores are evenly distributed between the streams. The default value is 1, which implies latency-oriented behavior for single NUMA-node machine, with all available cores processing requests one by one. On the multi-socket (multiple NUMA nodes) machine, the best latency numbers usually achieved with a number of streams matching the number of NUMA-nodes. <br>`KEY_CPU_THROUGHPUT_NUMA` creates as many streams as needed to accommodate NUMA and avoid associated penalties.<br>`KEY_CPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you don't know how many cores your target machine has (and what would be the optimal number of streams). Note that your application should provide enough parallel slack (for example, run many inference requests) to leverage the throughput mode. <br> Non-negative integer value creates the requested number of streams. If a number of streams is 0, no internal streams are created and user threads are interpreted as stream master threads.|
+| `KEY_ENFORCE_BF16`            | `YES`/`NO`| `YES` | The name for setting to execute in bfloat16 precision whenever it is possible. This option lets plugin know to downscale the precision where it sees performance benefits from bfloat16 execution. Such option does not guarantee accuracy of the network, you need to verify the accuracy in this mode separately, based on performance and accuracy results. It should be your decision whether to use this option or not. |

 > **NOTE**: To disable all internal threading, use the following set of configuration parameters: `KEY_CPU_THROUGHPUT_STREAMS=0`, `KEY_CPU_THREADS_NUM=1`, `KEY_CPU_BIND_THREAD=NO`.

--- a/docs/IE_DG/supported_plugins/GPU.md
+++ b/docs/IE_DG/supported_plugins/GPU.md
@ -99,23 +99,24 @@ The plugin supports the configuration parameters listed below.
 All parameters must be set before calling <code>InferenceEngine::Core::LoadNetwork()</code> in order to take effect.
 When specifying key values as raw strings (that is, when using Python API), omit the `KEY_` prefix.

+
 | Parameter Name          | Parameter Values                | Default         | Description                                               |
 |---------------------|-----------------------------|-----------------|-----------------------------------------------------------|
 | `KEY_CACHE_DIR`      | `"<cache_dir>"`                    | `""`              | Specifies a directory where compiled OCL binaries can be cached. First model loading generates the cache, and all subsequent LoadNetwork calls use precompiled kernels which significantly improves load time. If empty - caching is disabled             |
 | `KEY_PERF_COUNT`      | `YES` / `NO`                    | `NO`              | Collect performance counters during inference             |
 | `KEY_CONFIG_FILE`     | `"<file1> [<file2> ...]"`         | `""`              | Load custom layer configuration files                     |
-| `KEY_GPU_PLUGIN_PRIORITY` | `<0-3>`                       | `0`               | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)<br> Higher value means higher priority for OpenCL queue. 0 disables the setting. |
-| `KEY_GPU_PLUGIN_THROTTLE` | `<0-3>`                       | `0`               | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)<br> Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. |
-| `KEY_CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS` | `YES` / `NO`                       | `YES`               | Allows using FP16+INT8 mixed precision mode, so non-quantized parts of a model will be executed in FP16 precision for FP16 IR. Does not affect quantized FP32 IRs |
-| `KEY_GPU_NV12_TWO_INPUTS` | `YES` / `NO`                       | `NO`               | Controls preprocessing logic for nv12 input. If it's set to YES, then device graph will expect that user will set biplanar nv12 blob as input wich will be directly passed to device execution graph. Otherwise, preprocessing via GAPI is used to convert NV12->BGR, thus GPU graph have to expect single input |
-| `KEY_GPU_THROUGHPUT_STREAMS`  | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_GPU_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behavior.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. |
-| `KEY_EXCLUSIVE_ASYNC_REQUESTS` | `YES` / `NO`                | `NO`              | Forces async requests (also from different executable networks) to execute serially.|
-| `KEY_GPU_MAX_NUM_THREADS` | `integer value` | `maximum # of HW threads available in host environment` |  Specifies the number of CPU threads that can be used for GPU engine, e.g, JIT compilation of GPU kernels or cpu kernel processing within GPU plugin. The default value is set as the number of maximum available threads in host environment to minimize the time for LoadNetwork, where the GPU kernel build time occupies a large portion. Note that if the specified value is larger than the maximum available # of threads or less than zero, it is set as maximum available # of threads. It can be specified with a smaller number than the available HW threads according to the usage scenario, e.g., when the user wants to assign more CPU threads while GPU plugin is running. Note that setting this value with lower number will affect not only the network loading time but also the cpu layers of GPU networks that are optimized with multi-threading. |
-| `KEY_GPU_ENABLE_LOOP_UNROLLING` | `YES` / `NO`             | `YES`             | Enables recurrent layers such as TensorIterator or Loop with fixed iteration count to be unrolled. It is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb). Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16). Note that turning this key on will increase the graph loading time in proportion to the iteration counts. Thus, this key should be turned off if graph loading time is considered to be most important target to optimize. |
-| `KEY_CLDNN_PLUGIN_PRIORITY` | `<0-3>`                       | `0`               | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)<br> Higher value means higher priority for OpenCL queue. 0 disables the setting. **Deprecated**. Please use KEY_GPU_PLUGIN_PRIORITY |
-| `KEY_CLDNN_PLUGIN_THROTTLE` | `<0-3>`                       | `0`               | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)<br> Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. **Deprecated**. Please use KEY_GPU_PLUGIN_THROTTLE |
-| `KEY_CLDNN_GRAPH_DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | clDNN graph optimizer stages dump output directory (in GraphViz format) **Deprecated**. Will be removed in the next release                                     |
-| `KEY_CLDNN_SOURCES_DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | Final optimized clDNN OpenCL sources dump output directory. **Deprecated**. Will be removed in the next release                                   |
+| `KEY_GPU_PLUGIN_`<br>`PRIORITY` | `<0-3>`                       | `0`               | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)<br> Higher value means higher priority for OpenCL queue. 0 disables the setting. |
+| `KEY_GPU_PLUGIN_`<br>`THROTTLE` | `<0-3>`                       | `0`               | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)<br> Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. |
+| `KEY_CLDNN_ENABLE_`<br>`FP16_FOR_QUANTIZED_`<br>`MODELS` | `YES` / `NO`                       | `YES`               | Allows using FP16+INT8 mixed precision mode, so non-quantized parts of a model will be executed in FP16 precision for FP16 IR. Does not affect quantized FP32 IRs |
+| `KEY_GPU_NV12_`<br>`TWO_INPUTS` | `YES` / `NO`                       | `NO`               | Controls preprocessing logic for nv12 input. If it's set to YES, then device graph will expect that user will set biplanar nv12 blob as input wich will be directly passed to device execution graph. Otherwise, preprocessing via GAPI is used to convert NV12->BGR, thus GPU graph have to expect single input |
+| `KEY_GPU_THROUGHPUT_`<br>`STREAMS`  | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_GPU_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behavior.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. |
+| `KEY_EXCLUSIVE_ASYNC_`<br>`REQUESTS` | `YES` / `NO`                | `NO`              | Forces async requests (also from different executable networks) to execute serially.|
+| `KEY_GPU_MAX_NUM_`<br>`THREADS` | `integer value` | `maximum # of HW threads available in host environment` |  Specifies the number of CPU threads that can be used for GPU engine, e.g, JIT compilation of GPU kernels or cpu kernel processing within GPU plugin. The default value is set as the number of maximum available threads in host environment to minimize the time for LoadNetwork, where the GPU kernel build time occupies a large portion. Note that if the specified value is larger than the maximum available # of threads or less than zero, it is set as maximum available # of threads. It can be specified with a smaller number than the available HW threads according to the usage scenario, e.g., when the user wants to assign more CPU threads while GPU plugin is running. Note that setting this value with lower number will affect not only the network loading time but also the cpu layers of GPU networks that are optimized with multi-threading. |
+| `KEY_GPU_ENABLE_`<br>`LOOP_UNROLLING` | `YES` / `NO`             | `YES`             | Enables recurrent layers such as TensorIterator or Loop with fixed iteration count to be unrolled. It is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb). Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16). Note that turning this key on will increase the graph loading time in proportion to the iteration counts. Thus, this key should be turned off if graph loading time is considered to be most important target to optimize. |
+| `KEY_CLDNN_PLUGIN_`<br>`PRIORITY` | `<0-3>`                       | `0`               | OpenCL queue priority (before usage, make sure your OpenCL driver supports appropriate extension)<br> Higher value means higher priority for OpenCL queue. 0 disables the setting. **Deprecated**. Please use KEY_GPU_PLUGIN_PRIORITY |
+| `KEY_CLDNN_PLUGIN_`<br>`THROTTLE` | `<0-3>`                       | `0`               | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)<br> Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. **Deprecated**. Please use KEY_GPU_PLUGIN_THROTTLE |
+| `KEY_CLDNN_GRAPH_`<br>`DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | clDNN graph optimizer stages dump output directory (in GraphViz format) **Deprecated**. Will be removed in the next release                                     |
+| `KEY_CLDNN_SOURCES_`<br>`DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | Final optimized clDNN OpenCL sources dump output directory. **Deprecated**. Will be removed in the next release                                   |
 | `KEY_DUMP_KERNELS`    | `YES` / `NO`                    | `NO`              | Dump the final kernels used for custom layers. **Deprecated**. Will be removed in the next release             |
 | `KEY_TUNING_MODE`     | `TUNING_DISABLED` <br /> `TUNING_CREATE` <br />  `TUNING_USE_EXISTING`            | `TUNING_DISABLED` | Disable inference kernel tuning     <br /> Create tuning file (expect much longer runtime)  <br />         Use an existing tuning file. **Deprecated**. Will be removed in the next release |
 | `KEY_TUNING_FILE`     | `"<filename>"`                  | `""`              | Tuning file to create / use. **Deprecated**. Will be removed in the next release |
--- a/docs/IE_DG/supported_plugins/MULTI.md
+++ b/docs/IE_DG/supported_plugins/MULTI.md
@ -96,10 +96,8 @@ Notice that you can use the FP16 IR to work with multi-device (as CPU automatica
 Also notice that no demos are (yet) fully optimized for the multi-device, by means of supporting the OPTIMAL_NUMBER_OF_INFER_REQUESTS metric, using the GPU streams/throttling, and so on.

 ## Video: MULTI Plugin
-[![](https://img.youtube.com/vi/xbORYFEmrqU/0.jpg)](https://www.youtube.com/watch?v=xbORYFEmrqU)
-\htmlonly
+
 <iframe width="560" height="315" src="https://www.youtube.com/embed/xbORYFEmrqU" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
-\endhtmlonly

 ## See Also
 * [Supported Devices](Supported_Devices.md)
--- a/docs/Legal_Information.md
+++ b/docs/Legal_Information.md
@ -1,22 +1,20 @@
 # Legal Information {#openvino_docs_Legal_Information}

-This software and the related documents are Intel copyrighted materials, and your use of them is governed by the express license (the “License”) under which they were provided to you. No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document. Unless the License provides otherwise, you may not use, modify, copy, publish, distribute, disclose or transmit this software or the related documents without Intel's prior written permission. This software and the related documents are provided as is, with no express or implied warranties, other than those that are expressly stated in the License. Intel disclaims all express and implied warranties, including without limitation, the implied warranties of merchantability, fitness for a particular purpose, and non-infringement, as well as any warranty arising from course of performance, course of dealing, or usage in trade.
-
-This document contains information on products, services and/or processes in development. All information provided here is subject to change without notice. Contact your Intel representative to obtain the latest forecast, schedule, specifications and roadmaps. The products and services described may contain defects or errors known as errata which may cause deviations from published specifications. Current characterized errata are available on request. Copies of documents which have an order number and are referenced in this document may be obtained by calling 1-800-548-4725 or by visiting [www.intel.com/design/literature.htm](https://www.intel.com/design/literature.htm).
-
 Performance varies by use, configuration and other factors. Learn more at [www.intel.com/PerformanceIndex](https://www.intel.com/PerformanceIndex).
-
-Performance results are based on testing as of dates shown in configurations and may not reflect all publicly available updates.  See backup for configuration details.  No product or component can be absolutely secure. 
-
-Your costs and results may vary. 
-
+ 
+Performance results are based on testing as of dates shown in configurations and may not reflect all publicly available updates. See backup for configuration details.  No product or component can be absolutely secure.
+ 
+Your costs and results may vary.
+ 
 Intel technologies may require enabled hardware, software or service activation.

-© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. \*Other names and brands may be claimed as the property of others.  
+OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission by Khronos.

+© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others.
+ 
 ## OpenVINO™ Logo
 To build equity around the project, the OpenVINO logo was created for both Intel and community usage. The logo may only be used to represent the OpenVINO toolkit and offerings built using the OpenVINO toolkit.
-
+ 
 ## Logo Usage Guidelines
 The OpenVINO logo must be used in connection with truthful, non-misleading references to the OpenVINO toolkit, and for no other purpose.
-Modification of the logo or use of any separate element(s) of the logo alone is not allowed.
+Modification of the logo or use of any separate element(s) of the logo alone is not allowed.
--- a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md
+++ b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md
@ -1,136 +1,54 @@
 # Model Optimizer Developer Guide {#openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide}

+## Introduction 
+
 Model Optimizer is a cross-platform command-line tool that facilitates the transition between the training and deployment environment, performs static model analysis, and adjusts deep learning models for optimal execution on end-point target devices.

-Model Optimizer process assumes you have a network model trained using a supported deep learning framework. The scheme below illustrates the typical workflow for deploying a trained deep learning model:
+Model Optimizer process assumes you have a network model trained using supported deep learning frameworks: Caffe*, TensorFlow*, Kaldi*, MXNet* or converted to the ONNX* format. Model Optimizer produces an Intermediate Representation (IR) of the network, which can be inferred with the [Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md).
+
+> **NOTE**: Model Optimizer does not infer models. Model Optimizer is an offline tool that runs before the inference takes place.
+
+The scheme below illustrates the typical workflow for deploying a trained deep learning model: 

 ![](img/workflow_steps.png)

-Model Optimizer produces an Intermediate Representation (IR) of the network, which can be read, loaded, and inferred with the Inference Engine. The Inference Engine API offers a unified API across a number of supported Intel® platforms. The Intermediate Representation is a pair of files describing the model:
+The IR is a pair of files describing the model: 

 *  <code>.xml</code> - Describes the network topology

 *  <code>.bin</code> - Contains the weights and biases binary data.

-> **TIP**: You also can work with the Model Optimizer inside the OpenVINO™ [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench).
-> [DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare 
-> performance of deep learning models on various Intel® architecture
-> configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components.
-> <br>
-> Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started.
+Below is a simple command running Model Optimizer to generate an IR for the input model:

-## What's New in the Model Optimizer in this Release?
+```sh
+python3 mo.py --input_model INPUT_MODEL
+```
+To learn about all Model Optimizer parameters and conversion technics, see the [Converting a Model to IR](prepare_model/convert_model/Converting_Model.md) page.

-* Common changes:
-    * Implemented several optimization transformations to replace sub-graphs of operations with HSwish, Mish, Swish and SoftPlus operations.
-    * Model Optimizer generates IR keeping shape-calculating sub-graphs **by default**. Previously, this behavior was triggered if the "--keep_shape_ops" command line parameter was provided. The key is ignored in this release and will be deleted in the next release. To trigger the legacy behavior to generate an IR for a fixed input shape (folding ShapeOf operations and shape-calculating sub-graphs to Constant), use the "--static_shape" command line parameter. Changing model input shape using the Inference Engine API in runtime may fail for such an IR.
-    * Fixed Model Optimizer conversion issues resulted in non-reshapeable IR using the Inference Engine reshape API.
-    * Enabled transformations to fix non-reshapeable patterns in the original networks:
-        * Hardcoded Reshape
-            * In Reshape(2D)->MatMul pattern
-            * Reshape->Transpose->Reshape when the pattern can be fused to the ShuffleChannels or DepthToSpace operation
-        * Hardcoded Interpolate
-            * In Interpolate->Concat pattern
-        * Added a dedicated requirements file for TensorFlow 2.X as well as the dedicated install prerequisites scripts.
-        * Replaced the SparseToDense operation with ScatterNDUpdate-4.
-* ONNX*:
-    * Enabled an ability to specify the model output **tensor** name using the "--output" command line parameter.
-    * Added support for the following operations:
-        * Acosh
-        * Asinh
-        * Atanh
-        * DepthToSpace-11, 13
-        * DequantizeLinear-10 (zero_point must be constant)
-        * HardSigmoid-1,6
-        * QuantizeLinear-10 (zero_point must be constant)
-        * ReduceL1-11, 13
-        * ReduceL2-11, 13
-        * Resize-11, 13 (except mode="nearest" with 5D+ input, mode="tf_crop_and_resize", and attributes exclude_outside and extrapolation_value with non-zero values)
-        * ScatterND-11, 13
-        * SpaceToDepth-11, 13
-* TensorFlow*:
-    * Added support for the following operations:
-        * Acosh
-        * Asinh
-        * Atanh
-        * CTCLoss
-        * EuclideanNorm
-        * ExtractImagePatches
-        * FloorDiv
-* MXNet*:
-    * Added support for the following operations:
-        * Acosh
-        * Asinh
-        * Atanh
-* Kaldi*:
-    * Fixed bug with ParallelComponent support. Now it is fully supported with no restrictions.
+> **TIP**: You can quick start with the Model Optimizer inside the OpenVINO™ [Deep Learning Workbench](@ref 
+> openvino_docs_get_started_get_started_dl_workbench) (DL Workbench).
+> [DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is the OpenVINO™ toolkit UI that enables you to
+> import a model, analyze its performance and accuracy, visualize the outputs, optimize and prepare the model for 
+> deployment on various Intel® platforms.

-> **NOTE:** 
-> [Intel® System Studio](https://software.intel.com/en-us/system-studio) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019).
+## Videos

-## Table of Contents
+<table>
+  <tr>
+    <td>
+<iframe width="220" src="https://www.youtube.com/embed/Kl1ptVb7aI8" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
+</td>
+<td>
+    <iframe width="220" src="https://www.youtube.com/embed/BBt1rseDcy0" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
+</td>
+<td>
+<iframe width="220" src="https://www.youtube.com/embed/RF8ypHyiKrY" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
+</td>
+  </tr>
+  <tr>
+    <td><strong>Model Optimizer Concept</strong>. <br>Duration: 3:56</td>
+    <td><strong>Model Optimizer Basic<br> Operation</strong>. <br>Duration: 2:57.</td>
+    <td><strong>Choosing the Right Precision</strong>. <br>Duration: 4:18.</td>
+  </tr>
+</table>

-* [Preparing and Optimizing your Trained Model with Model Optimizer](prepare_model/Prepare_Trained_Model.md)
-    * [Configuring Model Optimizer](prepare_model/Config_Model_Optimizer.md)
-    * [Converting a Model to Intermediate Representation (IR)](prepare_model/convert_model/Converting_Model.md)
-        * [Converting a Model Using General Conversion Parameters](prepare_model/convert_model/Converting_Model_General.md)
-        * [Converting Your Caffe* Model](prepare_model/convert_model/Convert_Model_From_Caffe.md)
-        * [Converting Your TensorFlow* Model](prepare_model/convert_model/Convert_Model_From_TensorFlow.md)
-            * [Converting BERT from TensorFlow](prepare_model/convert_model/tf_specific/Convert_BERT_From_Tensorflow.md)
-            * [Converting GNMT from TensorFlow](prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md)
-            * [Converting YOLO from DarkNet to TensorFlow and then to IR](prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md)
-            * [Converting Wide and Deep Models from TensorFlow](prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md)
-            * [Converting FaceNet from TensorFlow](prepare_model/convert_model/tf_specific/Convert_FaceNet_From_Tensorflow.md)
-            * [Converting DeepSpeech from TensorFlow](prepare_model/convert_model/tf_specific/Convert_DeepSpeech_From_Tensorflow.md)
-            * [Converting Language Model on One Billion Word Benchmark from TensorFlow](prepare_model/convert_model/tf_specific/Convert_lm_1b_From_Tensorflow.md)
-            * [Converting Neural Collaborative Filtering Model from TensorFlow*](prepare_model/convert_model/tf_specific/Convert_NCF_From_Tensorflow.md)
-            * [Converting TensorFlow* Object Detection API Models](prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md)
-            * [Converting TensorFlow*-Slim Image Classification Model Library Models](prepare_model/convert_model/tf_specific/Convert_Slim_Library_Models.md)
-            * [Converting CRNN Model from TensorFlow*](prepare_model/convert_model/tf_specific/Convert_CRNN_From_Tensorflow.md)
-        * [Converting Your MXNet* Model](prepare_model/convert_model/Convert_Model_From_MxNet.md)
-            * [Converting a Style Transfer Model from MXNet](prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md)
-        * [Converting Your Kaldi* Model](prepare_model/convert_model/Convert_Model_From_Kaldi.md)
-        * [Converting Your ONNX* Model](prepare_model/convert_model/Convert_Model_From_ONNX.md)
-            * [Converting Faster-RCNN ONNX* Model](prepare_model/convert_model/onnx_specific/Convert_Faster_RCNN.md)
-            * [Converting Mask-RCNN ONNX* Model](prepare_model/convert_model/onnx_specific/Convert_Mask_RCNN.md)
-            * [Converting GPT2 ONNX* Model](prepare_model/convert_model/onnx_specific/Convert_GPT2.md)
-        * [Converting Your PyTorch* Model](prepare_model/convert_model/Convert_Model_From_PyTorch.md)
-            * [Converting F3Net PyTorch* Model](prepare_model/convert_model/pytorch_specific/Convert_F3Net.md)
-            * [Converting QuartzNet PyTorch* Model](prepare_model/convert_model/pytorch_specific/Convert_QuartzNet.md)
-            * [Converting YOLACT PyTorch* Model](prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md)
-        * [Model Optimizations Techniques](prepare_model/Model_Optimization_Techniques.md)
-        * [Cutting parts of the model](prepare_model/convert_model/Cutting_Model.md)
-        * [Sub-graph Replacement in Model Optimizer](prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md)
-        * [Supported Framework Layers](prepare_model/Supported_Frameworks_Layers.md)
-        * [Intermediate Representation and Operation Sets](IR_and_opsets.md)
-        * [Operations Specification](../ops/opset.md)
-        * [Intermediate Representation suitable for INT8 inference](prepare_model/convert_model/IR_suitable_for_INT8_inference.md)
-    * [Model Optimizer Extensibility](prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md)
-        * [Extending Model Optimizer with New Primitives](prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md)
-        * [Extending Model Optimizer with Caffe Python Layers](prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_Caffe_Python_Layers.md)
-        * [Extending Model Optimizer with Custom MXNet* Operations](prepare_model/customize_model_optimizer/Extending_MXNet_Model_Optimizer_with_New_Primitives.md)
-        * [Legacy Mode for Caffe* Custom Layers](prepare_model/customize_model_optimizer/Legacy_Mode_for_Caffe_Custom_Layers.md)
-    * [Model Optimizer Frequently Asked Questions](prepare_model/Model_Optimizer_FAQ.md)
-
-* [Known Issues](Known_Issues_Limitations.md)
-
-**Typical Next Step:** [Preparing and Optimizing your Trained Model with Model Optimizer](prepare_model/Prepare_Trained_Model.md)
-
-## Video: Model Optimizer Concept
-
-[![](https://img.youtube.com/vi/Kl1ptVb7aI8/0.jpg)](https://www.youtube.com/watch?v=Kl1ptVb7aI8)
-\htmlonly
-<iframe width="560" height="315" src="https://www.youtube.com/embed/Kl1ptVb7aI8" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
-\endhtmlonly
-
-## Video: Model Optimizer Basic Operation
-[![](https://img.youtube.com/vi/BBt1rseDcy0/0.jpg)](https://www.youtube.com/watch?v=BBt1rseDcy0)
-\htmlonly
-<iframe width="560" height="315" src="https://www.youtube.com/embed/BBt1rseDcy0" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
-\endhtmlonly
-
-## Video: Choosing the Right Precision
-[![](https://img.youtube.com/vi/RF8ypHyiKrY/0.jpg)](https://www.youtube.com/watch?v=RF8ypHyiKrY)
-\htmlonly
-<iframe width="560" height="315" src="https://www.youtube.com/embed/RF8ypHyiKrY" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
-\endhtmlonly
--- a/docs/MO_DG/img/small_IR_graph_demonstration.png
+++ b/docs/MO_DG/img/small_IR_graph_demonstration.png
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c8ae479880ab43cdb12eeb2fbaaf3b7861f786413c583eeba906c5fdf4b66730
-size 30696
+oid sha256:e8a86ea362473121a266c0ec1257c8d428a4bb6438fecdc9d4a4f1ff5cfc9047
+size 26220
--- a/docs/MO_DG/img/workflow_steps.png
+++ b/docs/MO_DG/img/workflow_steps.png
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e22bc22d614c7335ae461a8ce449ea8695973d755faca718cf74b95972c94e2
-size 19773
+oid sha256:5281f26cbaa468dc4cafa4ce2fde35d338fe0f658bbb796abaaf793e951939f6
+size 13943
--- a/docs/MO_DG/prepare_model/Config_Model_Optimizer.md
+++ b/docs/MO_DG/prepare_model/Config_Model_Optimizer.md
@ -1,8 +1,6 @@
-# Configuring the Model Optimizer {#openvino_docs_MO_DG_prepare_model_Config_Model_Optimizer}
+# Installing Model Optimizer Pre-Requisites {#openvino_docs_MO_DG_prepare_model_Config_Model_Optimizer}

-You must configure the Model Optimizer for the framework that was used to train
-the model. This section tells you how to configure the Model Optimizer either
-through scripts or by using a manual process.
+Before running the Model Optimizer, you must install the Model Optimizer pre-requisites for the framework that was used to train the model. This section tells you how to install the pre-requisites either through scripts or by using a manual process.

 ## Using Configuration Scripts

@ -154,6 +152,10 @@ pip3 install -r requirements_onnx.txt
 ```

 ## Using the protobuf Library in the Model Optimizer for Caffe\*
+<details>
+    <summary>Click to expand</summary>
+
+

 These procedures require:

@ -166,7 +168,7 @@ By default, the library executes pure Python\* language implementation,
 which is slow. These steps show how to use the faster C++ implementation
 of the protobuf library on Windows OS or Linux OS.

-### Using the protobuf Library on Linux\* OS
+#### Using the protobuf Library on Linux\* OS

 To use the C++ implementation of the protobuf library on Linux, it is enough to
 set up the environment variable:
@ -174,7 +176,7 @@ set up the environment variable:
 export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp
 ```

-### <a name="protobuf-install-windows"></a>Using the protobuf Library on Windows\* OS
+#### <a name="protobuf-install-windows"></a>Using the protobuf Library on Windows\* OS

 On Windows, pre-built protobuf packages for Python versions 3.4, 3.5, 3.6,
 and 3.7 are provided with the installation package and can be found in
@ -262,6 +264,8 @@ python3 -m easy_install dist/protobuf-3.6.1-py3.6-win-amd64.egg
 set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp
 ```

+</details>
+
 ## See Also

 * [Converting a Model to Intermediate Representation (IR)](convert_model/Converting_Model.md)
--- a/docs/MO_DG/prepare_model/Prepare_Trained_Model.md
+++ b/docs/MO_DG/prepare_model/Prepare_Trained_Model.md
@ -1,63 +0,0 @@
-# Preparing and Optimizing Your Trained Model {#openvino_docs_MO_DG_prepare_model_Prepare_Trained_Model}
-
-Inference Engine enables _deploying_ your network model trained with any of supported deep learning frameworks: Caffe\*, TensorFlow\*, Kaldi\*, MXNet\* or converted to the ONNX\* format. To perform the inference, the Inference Engine does not operate with the original model, but with its Intermediate Representation (IR), which is optimized for execution on end-point target devices. To generate an IR for your trained model, the Model Optimizer tool is used.
-
-## How the Model Optimizer Works
-
-Model Optimizer loads a model into memory, reads it, builds the internal representation of the model, optimizes it, and produces the Intermediate Representation. Intermediate Representation is the only format the Inference Engine accepts.
-
-> **NOTE**: Model Optimizer does not infer models. Model Optimizer is an offline tool that runs before the inference takes place.
-
-Model Optimizer has two main purposes:
-
-*   **Produce a valid Intermediate Representation**. If this main conversion artifact is not valid, the Inference Engine cannot run. The primary responsibility of the Model Optimizer is to produce the two files (`.xml` and `.bin`) that form the Intermediate Representation.
-*   **Produce an optimized Intermediate Representation**. Pre-trained models contain layers that are important for training, such as the `Dropout` layer. These layers are useless during inference and might increase the inference time. In many cases, these operations can be automatically removed from the resulting Intermediate Representation. However, if a group of operations can be represented as a single mathematical operation, and thus as a single operation node in a model graph, the Model Optimizer recognizes such patterns and replaces this group of operation nodes with the only one operation. The result is an Intermediate Representation that has fewer operation nodes than the original model. This decreases the inference time.
-
-To produce a valid Intermediate Representation, the Model Optimizer must be able to read the original model operations, handle their properties and represent them in Intermediate Representation format, while maintaining validity of the resulting Intermediate Representation. The resulting model consists of operations described in the [Operations Specification](../../ops/opset.md).
-
-## What You Need to Know about Your Model
-
-Many common layers exist across known frameworks and neural network topologies. Examples of these layers are `Convolution`, `Pooling`, and `Activation`. To read the original model and produce the Intermediate Representation of a model, the Model Optimizer must be able to work with these layers.
-
-The full list of them depends on the framework and can be found in the [Supported Framework Layers](Supported_Frameworks_Layers.md) section. If your topology contains only layers from the list of layers, as is the case for the topologies used by most users, the Model Optimizer easily creates the Intermediate Representation. After that you can proceed to work with the Inference Engine.
-
-However, if you use a topology with layers that are not recognized by the Model Optimizer out of the box, see [Custom Layers in the Model Optimizer](customize_model_optimizer/Customize_Model_Optimizer.md) to learn how to work with custom layers.
-
-## Model Optimizer Directory Structure
-
-After installation with OpenVINO&trade; toolkit or Intel&reg; Deep Learning Deployment Toolkit, the Model Optimizer folder has the following structure (some directories omitted for clarity):
-```
-|-- model_optimizer
-    |-- extensions
-        |-- front - Front-End framework agnostic transformations (operations output shapes are not defined yet). 
-            |-- caffe - Front-End Caffe-specific transformations and Caffe layers extractors
-                |-- CustomLayersMapping.xml.example - example of file for registering custom Caffe layers (compatible with the 2017R3 release)
-            |-- kaldi - Front-End Kaldi-specific transformations and Kaldi operations extractors
-            |-- mxnet - Front-End MxNet-specific transformations and MxNet symbols extractors
-            |-- onnx - Front-End ONNX-specific transformations and ONNX operators extractors            
-            |-- tf - Front-End TensorFlow-specific transformations, TensorFlow operations extractors, sub-graph replacements configuration files. 
-        |-- middle - Middle-End framework agnostic transformations (layers output shapes are defined).
-        |-- back - Back-End framework agnostic transformations (preparation for IR generation).        
-    |-- mo
-        |-- back - Back-End logic: contains IR emitting logic
-        |-- front - Front-End logic: contains matching between Framework-specific layers and IR specific, calculation of output shapes for each registered layer
-        |-- graph - Graph utilities to work with internal IR representation
-        |-- middle - Graph transformations - optimizations of the model
-        |-- pipeline - Sequence of steps required to create IR for each framework
-        |-- utils - Utility functions
-    |-- tf_call_ie_layer - Source code that enables TensorFlow fallback in Inference Engine during model inference
-    |-- mo.py - Centralized entry point that can be used for any supported framework
-    |-- mo_caffe.py - Entry point particularly for Caffe
-    |-- mo_kaldi.py - Entry point particularly for Kaldi
-    |-- mo_mxnet.py - Entry point particularly for MXNet
-    |-- mo_onnx.py - Entry point particularly for ONNX
-    |-- mo_tf.py - Entry point particularly for TensorFlow
-```
-
-The following sections provide the information about how to use the Model Optimizer, from configuring the tool and generating an IR for a given model to customizing the tool for your needs:
-
-* [Configuring Model Optimizer](Config_Model_Optimizer.md)
-* [Converting a Model to Intermediate Representation](convert_model/Converting_Model.md)
-* [Custom Layers in Model Optimizer](customize_model_optimizer/Customize_Model_Optimizer.md)
-* [Model Optimization Techniques](Model_Optimization_Techniques.md)
-* [Model Optimizer Frequently Asked Questions](Model_Optimizer_FAQ.md)
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md
@ -27,14 +27,12 @@ A summary of the steps for optimizing and deploying a model that was trained wit
 |SSD-ResNet-50|	[Repo](https://github.com/zhreshold/mxnet-ssd), [Symbol + Params](https://github.com/zhreshold/mxnet-ssd/releases/download/v0.6/resnet50_ssd_512_voc0712_trainval.zip)|
 |SSD-VGG-16-300|	[Repo](https://github.com/zhreshold/mxnet-ssd), [Symbol + Params](https://github.com/zhreshold/mxnet-ssd/releases/download/v0.5-beta/vgg16_ssd_300_voc0712_trainval.zip)|
 |SSD-Inception v3|	[Repo](https://github.com/zhreshold/mxnet-ssd), [Symbol + Params](https://github.com/zhreshold/mxnet-ssd/releases/download/v0.7-alpha/ssd_inceptionv3_512_voc0712trainval.zip)|
-|FCN8 (Semantic Segmentation)|	[Repo](https://github.com/apache/incubator-mxnet/tree/master/example/fcn-xs), [Symbol](https://www.dropbox.com/sh/578n5cxej7ofd6m/AAA9SFCBN8R_uL2CnAd3WQ5ia/FCN8s_VGG16-symbol.json?dl=0), [Params](https://www.dropbox.com/sh/578n5cxej7ofd6m/AABHWZHCtA2P6iR6LUflkxb_a/FCN8s_VGG16-0019-cpu.params?dl=0)|
 |MTCNN part 1 (Face Detection)| [Repo](https://github.com/pangyupo/mxnet_mtcnn_face_detection), [Symbol](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det1-symbol.json), [Params](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det1-0001.params)|
 |MTCNN part 2 (Face Detection)| [Repo](https://github.com/pangyupo/mxnet_mtcnn_face_detection), [Symbol](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det2-symbol.json), [Params](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det2-0001.params)|
 |MTCNN part 3 (Face Detection)| [Repo](https://github.com/pangyupo/mxnet_mtcnn_face_detection), [Symbol](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det3-symbol.json), [Params](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det3-0001.params)|
 |MTCNN part 4 (Face Detection)| [Repo](https://github.com/pangyupo/mxnet_mtcnn_face_detection), [Symbol](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det4-symbol.json), [Params](https://github.com/pangyupo/mxnet_mtcnn_face_detection/blob/master/model/det4-0001.params)|
 |Lightened_moon| [Repo](https://github.com/tornadomeet/mxnet-face/tree/master/model/lightened_moon), [Symbol](https://github.com/tornadomeet/mxnet-face/blob/master/model/lightened_moon/lightened_moon_fuse-symbol.json), [Params](https://github.com/tornadomeet/mxnet-face/blob/master/model/lightened_moon/lightened_moon_fuse-0082.params)|
 |RNN-Transducer| [Repo](https://github.com/HawkAaron/mxnet-transducer) |
-|word_lm| [Repo](https://github.com/apache/incubator-mxnet/tree/master/example/rnn/word_lm) |

 **Other supported topologies**

--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md
@ -37,7 +37,7 @@ Detailed information on how to convert models from the <a href="https://github.c

 **Supported Pre-Trained Topologies from TensorFlow 1 Detection Model Zoo**

-Detailed information on how to convert models from the <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md">TensorFlow 1 Detection Model Zoo</a> is available in the [Converting TensorFlow Object Detection API Models](tf_specific/Convert_Object_Detection_API_Models.md) chapter. The table below contains models from the Object Detection Models zoo that are supported.
+Detailed information on how to convert models from the <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md">TensorFlow 1 Object Detection Models Zoo</a> and <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md">TensorFlow 2 Object Detection Models Zoo</a> is available in the [Converting TensorFlow Object Detection API Models](tf_specific/Convert_Object_Detection_API_Models.md) chapter. The table below contains models from the Object Detection Models Zoo that are supported.

 | Model Name| TensorFlow 1 Object Detection API Models|
 | :------------- | -----:|
@ -405,10 +405,8 @@ Refer to [Supported Framework Layers ](../Supported_Frameworks_Layers.md) for th
 The Model Optimizer provides explanatory messages if it is unable to run to completion due to issues like typographical errors, incorrectly used options, or other issues. The message describes the potential cause of the problem and gives a link to the [Model Optimizer FAQ](../Model_Optimizer_FAQ.md). The FAQ has instructions on how to resolve most issues. The FAQ also includes links to relevant sections in the Model Optimizer Developer Guide to help you understand what went wrong.

 ## Video: Converting a TensorFlow Model
-[![](https://img.youtube.com/vi/QW6532LtiTc/0.jpg)](https://www.youtube.com/watch?v=QW6532LtiTc)
-\htmlonly
+
 <iframe width="560" height="315" src="https://www.youtube.com/embed/QW6532LtiTc" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
-\endhtmlonly

 ## Summary
 In this document, you learned:
--- a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md
+++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md
@ -1,39 +1,20 @@
 # Converting a Model to Intermediate Representation (IR)  {#openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model}

-Use the <code>mo.py</code> script from the `<INSTALL_DIR>/deployment_tools/model_optimizer` directory to run the Model Optimizer and convert the model to the Intermediate Representation (IR). 
-The simplest way to convert a model is to run <code>mo.py</code> with a path to the input model file and an output directory where you have write permissions:
+Use the <code>mo.py</code> script from the `<INSTALL_DIR>/deployment_tools/model_optimizer` directory to run the Model Optimizer and convert the model to the Intermediate Representation (IR): 
 ```sh
 python3 mo.py --input_model INPUT_MODEL --output_dir <OUTPUT_MODEL_DIR>
 ```
+You need to have have write permissions for an output directory.

-> **NOTE**: Some models require using additional arguments to specify conversion parameters, such as `--scale`, `--scale_values`, `--mean_values`, `--mean_file`. To learn about when you need to use these parameters, refer to [Converting a Model Using General Conversion Parameters](Converting_Model_General.md).
-
-The <code>mo.py</code> script is the universal entry point that can deduce the framework that has produced the input model by a standard extension of the model file:
-
-* `.caffemodel` - Caffe\* models
-* `.pb` - TensorFlow\* models
-* `.params` - MXNet\* models
-* `.onnx` - ONNX\* models
-* `.nnet` - Kaldi\* models.
-
-If the model files do not have standard extensions, you can use the ``--framework {tf,caffe,kaldi,onnx,mxnet,paddle}`` option to specify the framework type explicitly. 
-
-For example, the following commands are equivalent: 
-```sh
-python3 mo.py --input_model /user/models/model.pb
-```
-```sh
-python3 mo.py --framework tf --input_model /user/models/model.pb
-```
+> **NOTE**: Some models require using additional arguments to specify conversion parameters, such as `--input_shape`, `--scale`, `--scale_values`, `--mean_values`, `--mean_file`. To learn about when you need to use these parameters, refer to [Converting a Model Using General Conversion Parameters](Converting_Model_General.md).

 To adjust the conversion process, you may use general parameters defined in the [Converting a Model Using General Conversion Parameters](Converting_Model_General.md) and 
 Framework-specific parameters for:
-* [Caffe](Convert_Model_From_Caffe.md),
-* [TensorFlow](Convert_Model_From_TensorFlow.md),
-* [MXNet](Convert_Model_From_MxNet.md),
-* [ONNX](Convert_Model_From_ONNX.md),
-* [Kaldi](Convert_Model_From_Kaldi.md).
-* [Paddle](Convert_Model_From_Paddle.md).
+* [Caffe](Convert_Model_From_Caffe.md)
+* [TensorFlow](Convert_Model_From_TensorFlow.md)
+* [MXNet](Convert_Model_From_MxNet.md)
+* [ONNX](Convert_Model_From_ONNX.md)
+* [Kaldi](Convert_Model_From_Kaldi.md)


 ## See Also
--- a/docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md
+++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md
@ -212,8 +212,7 @@ Launch the Model Optimizer for the Caffe bvlc_alexnet model with reversed input
 python3 mo.py --input_model bvlc_alexnet.caffemodel --reverse_input_channels --mean_values [255,255,255] --data_type FP16 --output_dir <OUTPUT_MODEL_DIR>
 ```

-Launch the Model Optimizer for the Caffe bvlc_alexnet model with extensions listed in specified directories, specified mean_images binaryproto.
- file For more information about extensions, please refer to [this](../customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md) page.
+Launch the Model Optimizer for the Caffe bvlc_alexnet model with extensions listed in specified directories, specified mean_images binaryproto file. For more information about extensions, please refer to [this](../customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md) page.
 ```sh
 python3 mo.py --input_model bvlc_alexnet.caffemodel --extensions /home/,/some/other/path/ --mean_file /path/to/binaryproto --output_dir <OUTPUT_MODEL_DIR>
 ```
--- a/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md
+++ b/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md
@ -19,7 +19,7 @@ Model Optimizer provides command line options `--input` and `--output` to specif
 *   `--input` option accepts a comma-separated list of layer names of the input model that should be treated as new entry points to the model.
 *   `--output` option accepts a comma-separated list of layer names of the input model that should be treated as new exit points from the model.

-The `--input` option is required for cases unrelated to model cutting. For example, when the model contains several inputs and `--input_shape` or `--mean_values` options are used, you should use the `--input` option to specify the order of input nodes for correct mapping between multiple items provided in `--input_shape` and `--mean_values` and the inputs in the model. This is out of scope.
+The `--input` option is required for cases unrelated to model cutting. For example, when the model contains several inputs and `--input_shape` or `--mean_values` options are used, you should use the `--input` option to specify the order of input nodes for correct mapping between multiple items provided in `--input_shape` and `--mean_values` and the inputs in the model. Details on these options are out of scope for this document, which focuses on model cutting.

 Model cutting is illustrated with Inception V1. This model is in `models/research/slim` repository. [This section](Converting_Model.md) describes pre-work to prepare the model for the Model Optimizer to be ready to proceed with this chapter.

--- a/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md
+++ b/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md
@ -9,7 +9,7 @@ Intermediate Representation (IR) should be specifically formed to be suitable fo
 Such an IR is called a Low Precision IR and you can generate it in two ways:
 - [Quantize regular IR with the Post-Training Optimization tool](@ref pot_README)
 - Use the Model Optimizer for a model pretrained for Low Precision inference: TensorFlow\* pre-TFLite models (`.pb` model file with `FakeQuantize*` operations) and ONNX\* quantized models.
-Both Tensorflow and ONNX quantized models could be prepared by [Neural Network Compression Framework](https://github.com/openvinotoolkit/nncf/blob/develop/README.md) 
+Both TensorFlow and ONNX quantized models could be prepared by [Neural Network Compression Framework](https://github.com/openvinotoolkit/nncf/blob/develop/README.md). 

 For an operation to be executed in INT8, it must have `FakeQuantize` operations as inputs.
 See the [specification of `FakeQuantize` operation](../../../ops/quantization/FakeQuantize_1.md) for details. 
@ -17,7 +17,7 @@ See the [specification of `FakeQuantize` operation](../../../ops/quantization/Fa
 To execute the `Convolution` operation in INT8 on CPU, both data and weight inputs should have `FakeQuantize` as an input operation:
 ![](../../img/expanded_int8_Convolution_weights.png)

-Low pecision IR is also suitable for FP32 and FP16 inference if a chosen plugin supports all operations of the IR, because the only difference between a Low Precision IR and FP16 or FP32 IR is the existence of `FakeQuantize` in the Low Precision IR. 
+Low precision IR is also suitable for FP32 and FP16 inference if a chosen plugin supports all operations of the IR, because the only difference between a Low Precision IR and FP16 or FP32 IR is the existence of `FakeQuantize` in the Low Precision IR. 
 Plugins with Low Precision Inference support recognize these sub-graphs and quantize them during the inference time. 
 Plugins without Low Precision support execute all operations, including `FakeQuantize`, as is in the FP32 or FP16 precision.   

--- a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md
+++ b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md
@ -90,6 +90,8 @@ Where the `models/13` string is composed of the following substrings:
 * `models/`: path to the folder that contains .nd files with pre-trained styles weights 
 * `13`: prefix pointing to 13_decoder, which is the default decoder for the repository

+>**NOTE**: If you get an error saying "No module named 'cPickle'", try running the script from this step in Python 2. Then return to Python 3 for the remaining steps.
+
 You can choose any style from [collection of pre-trained weights](https://pan.baidu.com/s/1skMHqYp). (On the Chinese-language page, click the down arrow next to a size in megabytes. Then wait for an overlay box to appear, and click the blue button in it to download.) The `generate()` function generates `nst_vgg19-symbol.json` and `vgg19-symbol.json` files for the specified shape. In the code, it is [1024 x 768] for a 4:3 ratio, and you can specify another, for example, [224,224] for a square ratio.

 #### 6. Run the Model Optimizer to generate an Intermediate Representation (IR):
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md
@ -2,15 +2,19 @@

 [F3Net](https://github.com/weijun88/F3Net): Fusion, Feedback and Focus for Salient Object Detection

+## Clone the F3Net Model Repository
+
+To clone the repository, run the following command:
+```bash
+git clone http://github.com/weijun88/F3Net.git
+```
+
 ## Download and Convert the Model to ONNX*

 To download the pre-trained model or train the model yourself, refer to the 
-[instruction](https://github.com/weijun88/F3Net/blob/master/README.md) in the F3Net model repository. Firstly, 
-convert the model to ONNX\* format. Create and run the script with the following content in the `src`
-directory of the model repository:
+[instruction](https://github.com/weijun88/F3Net/blob/master/README.md) in the F3Net model repository. First, convert the model to ONNX\* format. Create and run the script with the following content in the `src` directory of the model repository:
 ```python
 import torch
-
 from dataset import Config
 from net import F3Net

@ -19,7 +23,7 @@ net = F3Net(cfg)
 image = torch.zeros([1, 3, 352, 352])
 torch.onnx.export(net, image, 'f3net.onnx', export_params=True, do_constant_folding=True, opset_version=11)
 ```
-The script generates the ONNX\* model file f3net.onnx. The model conversion was tested with the repository hash commit `eecace3adf1e8946b571a4f4397681252f9dc1b8`.
+The script generates the ONNX\* model file `f3net.onnx`. This model conversion was tested with the repository hash commit `eecace3adf1e8946b571a4f4397681252f9dc1b8`.

 ## Convert ONNX* F3Net Model to IR

--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md
@ -20,15 +20,15 @@ mkdir rnnt_for_openvino
 cd rnnt_for_openvino
 ```

-**Step 3**. Download pretrained weights for PyTorch implementation from https://zenodo.org/record/3662521#.YG21DugzZaQ.
-For UNIX*-like systems you can use wget:
+**Step 3**. Download pretrained weights for PyTorch implementation from [https://zenodo.org/record/3662521#.YG21DugzZaQ](https://zenodo.org/record/3662521#.YG21DugzZaQ).
+For UNIX*-like systems you can use `wget`:
 ```bash
 wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt
 ```
 The link was taken from `setup.sh` in the `speech_recoginitin/rnnt` subfolder. You will get exactly the same weights as 
-if you were following the steps from https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt.
+if you were following the steps from [https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt](https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt).

-**Step 4**. Install required python* packages:
+**Step 4**. Install required Python packages:
 ```bash
 pip3 install torch toml
 ```
@ -37,7 +37,7 @@ pip3 install torch toml
 `export_rnnt_to_onnx.py` and run it in the current directory `rnnt_for_openvino`:

 > **NOTE**: If you already have a full clone of MLCommons inference repository, you need to
-> specify `mlcommons_inference_path` variable.
+> specify the `mlcommons_inference_path` variable.

 ```python
 import toml
@ -92,8 +92,7 @@ torch.onnx.export(model.joint, (f, g), "rnnt_joint.onnx", opset_version=12,
 python3 export_rnnt_to_onnx.py
 ```

-After completing this step, the files rnnt_encoder.onnx, rnnt_prediction.onnx, and rnnt_joint.onnx will be saved in 
-the current directory. 
+After completing this step, the files `rnnt_encoder.onnx`, `rnnt_prediction.onnx`, and `rnnt_joint.onnx` will be saved in the current directory. 

 **Step 6**. Run the conversion command:

@ -102,6 +101,6 @@ python3 {path_to_openvino}/mo.py --input_model rnnt_encoder.onnx --input "input.
 python3 {path_to_openvino}/mo.py --input_model rnnt_prediction.onnx --input "input.1[1 1],1[2 1 320],2[2 1 320]"
 python3 {path_to_openvino}/mo.py --input_model rnnt_joint.onnx --input "0[1 1 1024],1[1 1 320]"
 ```
-Please note that hardcoded value for sequence length = 157 was taken from the MLCommons, but conversion to IR preserves 
-network [reshapeability](../../../../IE_DG/ShapeInference.md); this means you can change input shapes manually to any value either during conversion or 
-inference. 
+Please note that hardcoded value for sequence length = 157 was taken from the MLCommons but conversion to IR preserves 
+network [reshapeability](../../../../IE_DG/ShapeInference.md), this means you can change input shapes manually to any value either during conversion or 
+inference.
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md
@ -138,7 +138,7 @@ git checkout 57b8f2d95e62e2e649b382f516ab41f949b57239

 3. Set up the environment as described in `README.md`.

-**Step 2**. Download a pre-trained model from the list attached in the `Evaluation` section of `README.md` document, for example `yolact_base_54_800000.pth`.
+**Step 2**. Download a pre-trained model from the list attached in the `Evaluation` section of the [README.md](https://github.com/dbolya/yolact/blob/master/README.md) document, for example `yolact_base_54_800000.pth`.

 **Step 3**. Export the model to ONNX* format.

@ -187,5 +187,4 @@ python path/to/model_optimizer/mo.py \
    --input_model /path/to/yolact.onnx \
    --reverse_input_channels \
    --scale 255
-```
-
+```
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md
@ -24,13 +24,15 @@ To get pb-file from the archive contents, you need to do the following.
 1. Run commands

 ```sh
-   cd ~
-   mkdir XLNet-Base
-   cd XLNet-Base
-   git clone https://github.com/zihangdai/xlnet
-   wget https://storage.googleapis.com/xlnet/released_models/cased_L-12_H-768_A-12.zip
-   unzip cased_L-12_H-768_A-12.zip
-   mkdir try_save
+cd ~
+mkdir XLNet-Base
+cd XLNet-Base
+git clone https://github.com/zihangdai/xlnet
+wget https://storage.googleapis.com/xlnet/released_models/cased_L-12_H-768_A-12.zip
+unzip cased_L-12_H-768_A-12.zip
+mkdir try_save
+cd xlnet
+sed -i "s/tf\.train\.Optimizer/tf\.train.Optimizer if tf.version < '1.15' else tf.compat.v1.train.Optimizer/g" model_utils.py
 ```

   
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md
@ -67,7 +67,11 @@ git checkout ed60b90
 ```
 3. Download [coco.names](https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names) file from the DarkNet website **OR** use labels that fit your task.
 4. Download the [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights) (for the YOLOv3 model) or [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights) (for the YOLOv3-tiny model) file **OR** use your pre-trained weights with the same structure
-5. Run a converter:
+5. Install PIL, which is used by the conversion script in the repo:
+```sh
+pip install PIL
+```
+6. Run a converter:
 - for YOLO-v3:
 ```sh
 python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weights_file yolov3.weights
--- a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md
+++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md
@ -34,7 +34,7 @@

 <a name="model-optimizer-extensibility"></a>Model Optimizer extensibility mechanism enables support of new operations and custom transformations to generate the optimized intermediate representation (IR) as described in the 
 [Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™](../../IR_and_opsets.md). This
-mechanism is a core part of the Model Optimizer. The Model Optimizer itself uses it under the hood, being a huge set of examples on how to add custom logic to support your model.
+mechanism is a core part of the Model Optimizer, which uses it under the hood, so the Model Optimizer itself is a huge set of examples for adding custom logic to support your model.

 There are several cases when the customization is needed:

--- a/docs/benchmarks/performance_benchmarks_faq.md
+++ b/docs/benchmarks/performance_benchmarks_faq.md
@ -19,31 +19,34 @@ All of the performance benchmarks were generated using the open-sourced tool wit

 #### 6. What image sizes are used for the classification network models?
 The image size used in the inference depends on the network being benchmarked. The following table shows the list of input sizes for each network model.
-|   **Model**																														 |   **Public Network**                    |     **Task**                | **Input Size** (Height x Width)   |
-|------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------|-----------------------------|-----------------------------------|
-|    [bert-large-uncased-whole-word-masking-squad](https://github.com/openvinotoolkit/open_model_zoo/tree/develop/models/intel/bert-large-uncased-whole-word-masking-squad-int8-0001)   | 	BERT-large	|question / answer	|384|
-|    [deeplabv3-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/deeplabv3)                                    |	  DeepLab v3 Tf	                       |semantic segmentation	     |    513x513                          |
-|    [densenet-121-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/densenet-121-tf)                  | 	  Densenet-121 Tf	                   |classification	    |    224x224                 |
-|    [facenet-20180408-102900-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/facenet-20180408-102900)        |    FaceNet TF                           |    face recognition         |    160x160                        |
-|    [faster_rcnn_resnet50_coco-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/faster_rcnn_resnet50_coco)    |    Faster RCNN Tf                       |    object detection           |    600x1024					     |
-|    [googlenet-v1-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/googlenet-v1-tf)				     |    GoogLeNet_ILSVRC-2012                |    classification           |    224x224				  |
-|    [inception-v3-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/googlenet-v3)								 |    Inception v3 Tf                      |    classification           |    299x299				  |
-|    [mobilenet-ssd-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-ssd)						     |    SSD (MobileNet)_COCO-2017_Caffe      |    object detection         |    300x300						 |
-|    [mobilenet-v1-1.0-224-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-v1-1.0-224-tf)  |    MobileNet v1 Tf                      |    classification    |    224x224                        |
-|    [mobilenet-v2-1.0-224-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-v2-1.0-224)			     |    MobileNet v2 Tf                      |    classification           |    224x224						 |
-|    [mobilenet-v2-pytorch](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-v2-pytorch )		 |    Mobilenet V2 PyTorch                 |    classification           |    224x224					     |
-|    [resnet-18-pytorch](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-18-pytorch)		  			     |    ResNet-18 PyTorch                    |    classification           |    224x224						 |
-|    [resnet-50-pytorch](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-pytorch)              |    ResNet-50 v1 PyTorch                 |    classification           |    224x224                        |
-|    [resnet-50-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf)								 |    ResNet-50_v1_ILSVRC-2012             |    classification           |    224x224						 |
-|    [se-resnext-50-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/se-resnext-50)						     |    Se-ResNext-50_ILSVRC-2012_Caffe      |    classification           |    224x224						 |
-|    [squeezenet1.1-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/squeezenet1.1)						     |    SqueezeNet_v1.1_ILSVRC-2012_Caffe    |    classification           |    227x227						 |
-|    [ssd300-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssd300)										     |    SSD (VGG-16)_VOC-2007_Caffe          |    object detection         |    300x300						 |
-|    [yolo_v3-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v3-tf)                            | 	  TF Keras YOLO v3 Modelset            |	 object detection	      |    300x300                        |
-|    [yolo_v4-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v4-tf)                            | 	  Yolo-V4 TF                           |	 object detection	     |    608x608                        |
-|    [ssd_mobilenet_v1_coco-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssd_mobilenet_v1_coco)   |    ssd_mobilenet_v1_coco                |    object detection         |    300x300                        |
-|    [ssdlite_mobilenet_v2-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssdlite_mobilenet_v2)     |    ssd_mobilenet_v2                     |    object detection         |    300x300                        |
-|    [unet-camvid-onnx-0001](https://github.com/openvinotoolkit/open_model_zoo/blob/master/models/intel/unet-camvid-onnx-0001/description/unet-camvid-onnx-0001.md)            |    U-Net                    |    semantic segmentation          |    368x480                        |
-
+|   **Model**																														 |   **Public Network**               |     **Task**                | **Input Size** (Height x Width)   |
+|------------------------------------------------------------------------------------------------------------------------------------|------------------------------------|-----------------------------|-----------------------------------|
+| [bert-large-uncased-whole-word-masking-squad](https://github.com/openvinotoolkit/open_model_zoo/tree/develop/models/intel/bert-large-uncased-whole-word-masking-squad-int8-0001) | BERT-large	|question / answer	|384|
+| [brain-tumor-segmentation-0001-MXNET](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/brain-tumor-segmentation-0001) | brain-tumor-segmentation-0001 | semantic segmentation       | 128x128x128 |
+| [brain-tumor-segmentation-0002-CF2](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/brain-tumor-segmentation-0002)   | brain-tumor-segmentation-0002 | semantic segmentation       | 128x128x128 |
+| [deeplabv3-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/deeplabv3)                                    |	DeepLab v3 Tf	                      | semantic segmentation	    | 513x513                          |
+| [densenet-121-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/densenet-121-tf)                  | Densenet-121 Tf	                      | classification	            | 224x224                 |
+| [facenet-20180408-102900-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/facenet-20180408-102900)        | FaceNet TF                            | face recognition            | 160x160                        |
+| [faster_rcnn_resnet50_coco-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/faster_rcnn_resnet50_coco)    | Faster RCNN Tf                        | object detection            | 600x1024					     |
+| [inception-v4-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/develop/models/public/googlenet-v4-tf)				  | Inception v4 Tf (aka GoogleNet-V4)    | classification              | 299x299				  |
+| [inception-v3-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/googlenet-v3)							  | Inception v3 Tf                       | classification              | 299x299				  |
+| [mobilenet-ssd-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-ssd)						      | SSD (MobileNet)_COCO-2017_Caffe       | object detection            | 300x300						 |
+| [mobilenet-v2-1.0-224-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-v2-1.0-224)			  | MobileNet v2 Tf                       | classification              | 224x224						 |
+| [mobilenet-v2-pytorch](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/mobilenet-v2-pytorch )		  | Mobilenet V2 PyTorch                  | classification              | 224x224					     |
+| [resnet-18-pytorch](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-18-pytorch)		  			      | ResNet-18 PyTorch                     | classification              | 224x224						 |
+| [resnet-50-pytorch](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-pytorch)              | ResNet-50 v1 PyTorch                  | classification              | 224x224                        |
+| [resnet-50-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf)								  | ResNet-50_v1_ILSVRC-2012              | classification              | 224x224						 |
+| [se-resnext-50-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/se-resnext-50)						      | Se-ResNext-50_ILSVRC-2012_Caffe       | classification              | 224x224						 |
+| [squeezenet1.1-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/squeezenet1.1)						      | SqueezeNet_v1.1_ILSVRC-2012_Caffe     | classification              | 227x227						 |
+| [ssd300-CF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssd300)										  | SSD (VGG-16)_VOC-2007_Caffe           | object detection            | 300x300						 |
+| [yolo_v4-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v4-tf)                            | Yolo-V4 TF                            |	object detection	        | 608x608                        |
+| [ssd_mobilenet_v1_coco-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssd_mobilenet_v1_coco)   | ssd_mobilenet_v1_coco                 | object detection            | 300x300                        |
+| [ssdlite_mobilenet_v2-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssdlite_mobilenet_v2)     | ssdlite_mobilenet_v2                  | object detection            | 300x300                        |
+| [unet-camvid-onnx-0001](https://github.com/openvinotoolkit/open_model_zoo/blob/master/models/intel/unet-camvid-onnx-0001/description/unet-camvid-onnx-0001.md) | U-Net  | semantic segmentation       | 368x480                        |
+| [yolo-v3-tiny-tf](https://github.com/openvinotoolkit/open_model_zoo/tree/develop/models/public/yolo-v3-tiny-tf)                 | YOLO v3 Tiny                          | object detection            | 416x416 |
+| [ssd-resnet34-1200-onnx](https://github.com/openvinotoolkit/open_model_zoo/tree/develop/models/public/ssd-resnet34-1200-onnx)   | ssd-resnet34 onnx model               | object detection            | 1200x1200 |
+| [vgg19-caffe](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/vgg19-caffe2)                         | VGG-19                                | classification              | 224x224|
+ 
 #### 7. Where can I purchase the specific hardware used in the benchmarking?
 Intel partners with various vendors all over the world. Visit the [Intel® AI: In Production Partners & Solutions Catalog](https://www.intel.com/content/www/us/en/internet-of-things/ai-in-production/partners-solutions-catalog.html) for a list of Equipment Makers and the [Supported Devices](../IE_DG/supported_plugins/Supported_Devices.md) documentation. You can also remotely test and run models before purchasing any hardware by using [Intel® DevCloud for the Edge](http://devcloud.intel.com/edge/).

--- a/docs/benchmarks/performance_benchmarks_openvino.md
+++ b/docs/benchmarks/performance_benchmarks_openvino.md
@ -29,81 +29,86 @@ Measuring inference performance involves many variables and is extremely use-cas


 \htmlonly
-<script src="bert-large-uncased-whole-word-masking-squad-int8-0001-ov-2021-3-338-5.js" id="bert-large-uncased-whole-word-masking-squad-int8-0001-ov-2021-3-338-5"></script>
+<script src="bert-large-uncased-whole-word-masking-squad-int8-0001-384-ov-2021-4-569.js" id="bert-large-uncased-whole-word-masking-squad-int8-0001-384-ov-2021-4-569"></script>
 \endhtmlonly

 \htmlonly
-<script src="deeplabv3-tf-ov-2021-3-338-5.js" id="deeplabv3-tf-ov-2021-3-338-5"></script>
+<script src="deeplabv3-tf-513x513-ov-2021-4-569.js" id="deeplabv3-tf-513x513-ov-2021-4-569"></script>
 \endhtmlonly

 \htmlonly
-<script src="densenet-121-tf-ov-2021-3-338-5.js" id="densenet-121-tf-ov-2021-3-338-5"></script>
+<script src="densenet-121-tf-224x224-ov-2021-4-569.js" id="densenet-121-tf-224x224-ov-2021-4-569"></script>
 \endhtmlonly

 \htmlonly
-<script src="faster-rcnn-resnet50-coco-tf-ov-2021-3-338-5.js" id="faster-rcnn-resnet50-coco-tf-ov-2021-3-338-5"></script>
+<script src="faster-rcnn-resnet50-coco-tf-600x1024-ov-2021-4-569.js" id="faster-rcnn-resnet50-coco-tf-600x1024-ov-2021-4-569"></script>
 \endhtmlonly

 \htmlonly
-<script src="googlenet-v1-tf-ov-2021-3-338-5.js" id="googlenet-v1-tf-ov-2021-3-338-5"></script>
+<script src="inception-v3-tf-299x299-ov-2021-4-569.js" id="inception-v3-tf-299x299-ov-2021-4-569"></script>
 \endhtmlonly

 \htmlonly
-<script src="inception-v3-tf-ov-2021-3-338-5.js" id="inception-v3-tf-ov-2021-3-338-5"></script>
+<script src="inception-v4-tf-299x299-ov-2021-4-569.js" id="inception-v4-tf-299x299-ov-2021-4-569"></script>
 \endhtmlonly

 \htmlonly
-<script src="mobilenet-ssd-cf-ov-2021-3-338-5.js" id="mobilenet-ssd-cf-ov-2021-3-338-5"></script>
+<script src="mobilenet-ssd-cf-300x300-ov-2021-4-569.js" id="mobilenet-ssd-cf-300x300-ov-2021-4-569"></script>
 \endhtmlonly

 \htmlonly
-<script src="mobilenet-v1-1-0-224-tf-ov-2021-3-338-5.js" id="mobilenet-v1-1-0-224-tf-ov-2021-3-338-5"></script>
+<script src="mobilenet-v2-pytorch-224x224-ov-2021-4-569.js" id="mobilenet-v2-pytorch-224x224-ov-2021-4-569"></script>
 \endhtmlonly

 \htmlonly
-<script src="mobilenet-v2-pytorch-ov-2021-3-338-5.js" id="mobilenet-v2-pytorch-ov-2021-3-338-5"></script>
+<script src="resnet-18-pytorch-224x224-ov-2021-4-569.js" id="resnet-18-pytorch-224x224-ov-2021-4-569"></script>
 \endhtmlonly

 \htmlonly
-<script src="resnet-18-pytorch-ov-2021-3-338-5.js" id="resnet-18-pytorch-ov-2021-3-338-5"></script>
+<script src="resnet-50-tf-224x224-ov-2021-4-569.js" id="resnet-50-tf-224x224-ov-2021-4-569"></script>
 \endhtmlonly

 \htmlonly
-<script src="resnet-50-tf-ov-2021-3-338-5.js" id="resnet-50-tf-ov-2021-3-338-5"></script>
+<script src="se-resnext-50-cf-224x224-ov-2021-4-569.js" id="se-resnext-50-cf-224x224-ov-2021-4-569"></script>
+\endhtmlonly
+
+\htmlonly
+<script src="squeezenet1-1-cf-227x227-ov-2021-4-569.js" id="squeezenet1-1-cf-227x227-ov-2021-4-569"></script>
 \endhtmlonly


 \htmlonly
-<script src="se-resnext-50-cf-ov-2021-3-338-5.js" id="se-resnext-50-cf-ov-2021-3-338-5"></script>
+<script src="ssd300-cf-300x300-ov-2021-4-569.js" id="ssd300-cf-300x300-ov-2021-4-569"></script>
 \endhtmlonly

 \htmlonly
-<script src="squeezenet1-1-cf-ov-2021-3-338-5.js" id="squeezenet1-1-cf-ov-2021-3-338-5"></script>
-\endhtmlonly
-
-
-\htmlonly
-<script src="ssd300-cf-ov-2021-3-338-5.js" id="ssd300-cf-ov-2021-3-338-5"></script>
+<script src="yolo-v3-tiny-tf-416x416-ov-2021-4-569.js" id="yolo-v3-tiny-tf-416x416-ov-2021-4-569"></script>
 \endhtmlonly

 \htmlonly
-<script src="yolo-v3-tf-ov-2021-3-338-5.js" id="yolo-v3-tf-ov-2021-3-338-5"></script>
+<script src="yolo-v4-tf-608x608-ov-2021-4-569.js" id="yolo-v4-tf-608x608-ov-2021-4-569"></script>
 \endhtmlonly

 \htmlonly
-<script src="yolo-v4-tf-ov-2021-3-338-5.js" id="yolo-v4-tf-ov-2021-3-338-5"></script>
+<script src="unet-camvid-onnx-0001-368x480-ov-2021-4-569.js" id="unet-camvid-onnx-0001-368x480-ov-2021-4-569"></script>
 \endhtmlonly

 \htmlonly
-<script src="unet-camvid-onnx-0001-ov-2021-3-338-5.js" id="unet-camvid-onnx-0001-ov-2021-3-338-5"></script>
+<script src="ssd-resnet34-1200-onnx-1200x1200-ov-2021-4-569.js" id="ssd-resnet34-1200-onnx-1200x1200-ov-2021-4-569"></script>
 \endhtmlonly

+\htmlonly
+<script src="vgg19-caffe-224x224-ov-2021-4-569.js" id="vgg19-caffe-224x224-ov-2021-4-569"></script>
+\endhtmlonly
+
+
+

 ## Platform Configurations

-Intel® Distribution of OpenVINO™ toolkit performance benchmark numbers are based on release 2021.3. 
+Intel® Distribution of OpenVINO™ toolkit performance benchmark numbers are based on release 2021.4. 

-Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Learn more at intel.com, or from the OEM or retailer. Performance results are based on testing as of March 15, 2021 and may not reflect all publicly available updates. See configuration disclosure for details. No product can be absolutely secure. 
+Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Learn more at intel.com, or from the OEM or retailer. Performance results are based on testing as of June 18, 2021 and may not reflect all publicly available updates. See configuration disclosure for details. No product can be absolutely secure. 

 Performance varies by use, configuration and other factors. Learn more at [www.intel.com/PerformanceIndex](https://www.intel.com/PerformanceIndex).

@ -127,15 +132,15 @@ Testing by Intel done on: see test date for each HW platform below.
 | Operating System                | Ubuntu* 18.04 LTS                  | Ubuntu* 18.04 LTS                  | Ubuntu* 18.04 LTS                       |
 | Kernel Version                  | 5.3.0-24-generic                   | 5.3.0-24-generic                   | 5.3.0-24-generic                        | 
 | BIOS Vendor                     | American Megatrends Inc.*          | American Megatrends Inc.           | Intel Corporation                       |
-| BIOS Version                    | 0904                               | 607                                | SE5C620.86B.02.01.<br>0009.092820190230 |
-| BIOS Release                    | April 12, 2019                     | May 29, 2020                       | September 28, 2019                      |
+| BIOS Version                    | 0904                               | 607                                | SE5C620.86B.02.01.<br>0013.121520200651 |
+| BIOS Release                    | April 12, 2019                     | May 29, 2020                       | December 15, 2020                       |
 | BIOS Settings                   | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>change power policy <br>to "performance", <br>save & exit |
 | Batch size                      | 1                                  | 1                                  | 1                            
 | Precision                       | INT8                               | INT8                               | INT8                         
 | Number of concurrent inference requests | 4                          | 5                                  | 32                           
-| Test Date                       | March 15, 2021                     | March 15, 2021                     | March 15, 2021             
-| Power dissipation, TDP in Watt  | [71](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html#tab-blade-1-0-1)                    | [125](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html)                          | [125](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) |
-| CPU Price on Mach 15th, 2021, USD<br>Prices may vary  | [213](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html)     | [539](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html)     |[1,002](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html)                 | 
+| Test Date                       | June 18, 2021                     | June 18, 2021                     | June 18, 2021             
+| Rated maximum TDP/socket in Watt  | [71](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html#tab-blade-1-0-1)                    | [125](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html)                          | [125](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) |
+| CPU Price/socket on June 21, 2021, USD<br>Prices may vary  | [213](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html)     | [539](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html)     |[1,002](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html)                 | 

 **CPU Inference Engines (continue)**

@ -149,84 +154,104 @@ Testing by Intel done on: see test date for each HW platform below.
 | Operating System                | Ubuntu* 18.04 LTS                       | Ubuntu* 18.04 LTS                        | Ubuntu* 18.04 LTS                        |
 | Kernel Version                  | 5.3.0-24-generic                        | 5.3.0-24-generic                         | 5.3.0-24-generic                         |
 | BIOS Vendor                     | Intel Corporation                       | Intel Corporation                        | Intel Corporation                        |
-| BIOS Version                    | SE5C620.86B.02.01.<br>0009.092820190230 | SE5C620.86B.02.01.<br>0009.092820190230  | WLYDCRB1.SYS.0020.<br>P86.2103050636     |
-| BIOS Release                    | September 28, 2019                      | September 28, 2019                       | March 5, 2021                            |
+| BIOS Version                    | SE5C620.86B.02.01.<br>0013.121520200651 | SE5C620.86B.02.01.<br>0013.121520200651  | WLYDCRB1.SYS.0020.<br>P86.2103050636     |
+| BIOS Release                    | December 15, 2020                       | December 15, 2020                        | March 5, 2021                            |
 | BIOS Settings                   | Select optimized default settings, <br>change power policy to "performance", <br>save & exit | Select optimized default settings, <br>change power policy to "performance", <br>save & exit | Select optimized default settings, <br>change power policy to "performance", <br>save & exit |
 | Batch size                      | 1                                       | 1                                        | 1                                        |
 | Precision                       | INT8                                    | INT8                                     | INT8                                     |
 | Number of concurrent inference requests |32                               | 52                                       | 80                                       |
-| Test Date                       | March 15, 2021                          | March 15, 2021                           | March 22, 2021                           |
-| Power dissipation, TDP in Watt  | [105](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1)           | [205](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html#tab-blade-1-0-1)          | [270](https://ark.intel.com/content/www/us/en/ark/products/212287/intel-xeon-platinum-8380-processor-60m-cache-2-30-ghz.html) |
-| CPU Price, USD<br>Prices may vary  | [1,349](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html) (on Mach 15th, 2021) | [7,405](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html) (on Mach 15th, 2021)   | [8,099](https://ark.intel.com/content/www/us/en/ark/products/212287/intel-xeon-platinum-8380-processor-60m-cache-2-30-ghz.html) (on March 26th, 2021) |
+| Test Date                       | June 18, 2021                          | June 18, 2021                           | June 18, 2021                           |
+| Rated maximum TDP/socket in Watt  | [105](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1)           | [205](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html#tab-blade-1-0-1)          | [270](https://ark.intel.com/content/www/us/en/ark/products/212287/intel-xeon-platinum-8380-processor-60m-cache-2-30-ghz.html) |
+| CPU Price/socket on June 21, 2021, USD<br>Prices may vary  | [1,349](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html) | [7,405](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html) | [8,099](https://ark.intel.com/content/www/us/en/ark/products/212287/intel-xeon-platinum-8380-processor-60m-cache-2-30-ghz.html) |


 **CPU Inference Engines (continue)**

-|                      | Intel® Core™ i7-8700T               | Intel® Core™ i9-10920X               | 11th Gen Intel® Core™ i7-1185G7 |
-| -------------------- | ----------------------------------- |--------------------------------------| --------------------------------|
-| Motherboard          | GIGABYTE* Z370M DS3H-CF             | ASUS* PRIME X299-A II                | Intel Corporation<br>internal/Reference<br>Validation Platform |
-| CPU                  | Intel® Core™ i7-8700T CPU @ 2.40GHz | Intel® Core™ i9-10920X CPU @ 3.50GHz | 11th Gen Intel® Core™ i7-1185G7 @ 3.00GHz |
-| Hyper Threading      | ON                                  | ON                                   | ON                                        |
-| Turbo Setting        | ON                                  | ON                                   | ON                                        |
-| Memory               | 4 x 16 GB DDR4 2400MHz              | 4 x 16 GB DDR4 2666MHz               | 2 x 8 GB DDR4 3200MHz                     |
-| Operating System     | Ubuntu* 18.04 LTS                   | Ubuntu* 18.04 LTS                    | Ubuntu* 18.04 LTS                         |
-| Kernel Version       | 5.3.0-24-generic                    | 5.3.0-24-generic                     | 5.8.0-05-generic                          |
-| BIOS Vendor          | American Megatrends Inc.*           | American Megatrends Inc.*            | Intel Corporation                         |
-| BIOS Version         | F11                                 | 505                                  | TGLSFWI1.R00.3425.<br>A00.2010162309      |
-| BIOS Release         | March 13, 2019                      | December 17, 2019                    | October 16, 2020                          |
-| BIOS Settings        | Select optimized default settings, <br>set OS type to "other", <br>save & exit | Default Settings | Default Settings   |
-| Batch size           | 1                                   | 1                                    | 1                                         |
-| Precision            | INT8                                | INT8                                 | INT8                                      |
-| Number of concurrent inference requests |4                 | 24                                   | 4                                         |
-| Test Date            | March 15, 2021                      | March 15, 2021                       | March 15, 2021                          |
-| Power dissipation, TDP in Watt                             | [35](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html#tab-blade-1-0-1) | [165](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [28](https://ark.intel.com/content/www/us/en/ark/products/208664/intel-core-i7-1185g7-processor-12m-cache-up-to-4-80-ghz-with-ipu.html#tab-blade-1-0-1) |
-| CPU Price on Mach 15th, 2021, USD<br>Prices may vary    | [303](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html)                | [700](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [426](https://ark.intel.com/content/www/us/en/ark/products/208664/intel-core-i7-1185g7-processor-12m-cache-up-to-4-80-ghz-with-ipu.html#tab-blade-1-0-0)             |
+|                      | Intel® Core™ i7-8700T               | Intel® Core™ i9-10920X               |
+| -------------------- | ----------------------------------- |--------------------------------------|
+| Motherboard          | GIGABYTE* Z370M DS3H-CF             | ASUS* PRIME X299-A II                |
+| CPU                  | Intel® Core™ i7-8700T CPU @ 2.40GHz | Intel® Core™ i9-10920X CPU @ 3.50GHz |
+| Hyper Threading      | ON                                  | ON                                   |
+| Turbo Setting        | ON                                  | ON                                   |
+| Memory               | 4 x 16 GB DDR4 2400MHz              | 4 x 16 GB DDR4 2666MHz               |
+| Operating System     | Ubuntu* 18.04 LTS                   | Ubuntu* 18.04 LTS                    |
+| Kernel Version       | 5.3.0-24-generic                    | 5.3.0-24-generic                     |
+| BIOS Vendor          | American Megatrends Inc.*           | American Megatrends Inc.*            |
+| BIOS Version         | F14c                                | 1004                                 |
+| BIOS Release         | March 23, 2021                      | March 19, 2021                       |
+| BIOS Settings        | Select optimized default settings, <br>set OS type to "other", <br>save & exit | Default Settings |
+| Batch size           | 1                                   | 1                                    |
+| Precision            | INT8                                | INT8                                 |
+| Number of concurrent inference requests |4                 | 24                                   |
+| Test Date            | June 18, 2021                      | June 18, 2021                         |
+| Rated maximum TDP/socket in Watt                             | [35](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html#tab-blade-1-0-1) | [165](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) |
+| CPU Price/socket on June 21, 2021, USD<br>Prices may vary    | [303](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html)                | [700](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) |
+
+**CPU Inference Engines (continue)**
+|                      | 11th Gen Intel® Core™ i7-1185G7 | 11th Gen Intel® Core™ i7-11850HE |
+| -------------------- | --------------------------------|----------------------------------|
+| Motherboard          | Intel Corporation<br>internal/Reference<br>Validation Platform | Intel Corporation<br>internal/Reference<br>Validation Platform |
+| CPU                  | 11th Gen Intel® Core™ i7-1185G7 @ 3.00GHz | 11th Gen Intel® Core™ i7-11850HE @ 2.60GHz |
+| Hyper Threading      | ON                                        | ON |
+| Turbo Setting        | ON                                        | ON |
+| Memory               | 2 x 8 GB DDR4 3200MHz                     | 2 x 16 GB DDR4 3200MHz |
+| Operating System     | Ubuntu* 18.04 LTS                         | Ubuntu* 18.04.4 LTS |
+| Kernel Version       | 5.8.0-05-generic                          | 5.8.0-050800-generic |
+| BIOS Vendor          | Intel Corporation                         | Intel Corporation |
+| BIOS Version         | TGLSFWI1.R00.3425.<br>A00.2010162309      | TGLIFUI1.R00.4064.<br>A01.2102200132 |
+| BIOS Release         | October 16, 2020                          | February 20, 2021 |
+| BIOS Settings        | Default Settings                          | Default Settings |
+| Batch size           | 1                                         | 1 |
+| Precision            | INT8                                      | INT8 |
+| Number of concurrent inference requests |4                       | 4 |
+| Test Date            | June 18, 2021                             | June 18, 2021  |
+| Rated maximum TDP/socket in Watt                             | [28](https://ark.intel.com/content/www/us/en/ark/products/208664/intel-core-i7-1185g7-processor-12m-cache-up-to-4-80-ghz-with-ipu.html) | [45](https://ark.intel.com/content/www/us/en/ark/products/213799/intel-core-i7-11850h-processor-24m-cache-up-to-4-80-ghz.html) |
+| CPU Price/socket on June 21, 2021, USD<br>Prices may vary    | [426](https://ark.intel.com/content/www/us/en/ark/products/208664/intel-core-i7-1185g7-processor-12m-cache-up-to-4-80-ghz-with-ipu.html)                | [395](https://ark.intel.com/content/www/us/en/ark/products/213799/intel-core-i7-11850h-processor-24m-cache-up-to-4-80-ghz.html) |
+
+**CPU Inference Engines (continue)**
+
+|                      | Intel® Core™ i3-8100               | Intel® Core™ i5-8500               | Intel® Core™ i5-10500TE               |
+| -------------------- |----------------------------------- | ---------------------------------- | -----------------------------------   |
+| Motherboard          | GIGABYTE* Z390 UD                  | ASUS* PRIME Z370-A                 | GIGABYTE* Z490 AORUS PRO AX           |
+| CPU                  | Intel® Core™ i3-8100 CPU @ 3.60GHz | Intel® Core™ i5-8500 CPU @ 3.00GHz | Intel® Core™ i5-10500TE CPU @ 2.30GHz |
+| Hyper Threading      | OFF                                | OFF                                | ON                                    |
+| Turbo Setting        | OFF                                | ON                                 | ON                                    |
+| Memory               | 4 x 8 GB DDR4 2400MHz              | 2 x 16 GB DDR4 2666MHz             | 2 x 16 GB DDR4 @ 2666MHz              |
+| Operating System     | Ubuntu* 18.04 LTS                  | Ubuntu* 18.04 LTS                  | Ubuntu* 18.04 LTS                     |
+| Kernel Version       | 5.3.0-24-generic                   | 5.3.0-24-generic                   | 5.3.0-24-generic                      |
+| BIOS Vendor          | American Megatrends Inc.*          | American Megatrends Inc.*          | American Megatrends Inc.*             |
+| BIOS Version         | F8                                 | 2401                               | F3                                    |
+| BIOS Release         | May 24, 2019                       | July 12, 2019                      | March 25, 2020                        |
+| BIOS Settings        | Select optimized default settings, <br> set OS type to "other", <br>save & exit | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>set OS type to "other", <br>save & exit |
+| Batch size           | 1                                  | 1                                  | 1                                     |
+| Precision            | INT8                               | INT8                               | INT8                                  |
+| Number of concurrent inference requests | 4               | 3               | 4                                     |
+| Test Date            | June 18, 2021                       | June 18, 2021                     | June 18, 2021                      |
+| Rated maximum TDP/socket in Watt                            |  [65](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html#tab-blade-1-0-1)| [65](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html#tab-blade-1-0-1)| [35](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html)  |
+| CPU Price/socket on June 21, 2021, USD<br>Prices may vary   |  [117](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html) | [192](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html)               | [195](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) |


 **CPU Inference Engines (continue)**

-|                      | Intel® Core™ i5-8500               | Intel® Core™ i5-10500TE               |
-| -------------------- | ---------------------------------- | -----------------------------------   |
-| Motherboard          | ASUS* PRIME Z370-A                 | GIGABYTE* Z490 AORUS PRO AX           |
-| CPU                  | Intel® Core™ i5-8500 CPU @ 3.00GHz | Intel® Core™ i5-10500TE CPU @ 2.30GHz |
-| Hyper Threading      | OFF                                | ON                                    |
-| Turbo Setting        | ON                                 | ON                                    |
-| Memory               | 2 x 16 GB DDR4 2666MHz             | 2 x 16 GB DDR4 @ 2666MHz              |
-| Operating System     | Ubuntu* 18.04 LTS                  | Ubuntu* 18.04 LTS                     |
-| Kernel Version       | 5.3.0-24-generic                   | 5.3.0-24-generic                      |
-| BIOS Vendor          | American Megatrends Inc.*          | American Megatrends Inc.*             |
-| BIOS Version         | 2401                               | F3                                    |
-| BIOS Release         | July 12, 2019                      | March 25, 2020                        |
-| BIOS Settings        | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>set OS type to "other", <br>save & exit |
-| Batch size           | 1                                  | 1                                     |
-| Precision            | INT8                               | INT8                                  |
-| Number of concurrent inference requests | 3               | 4                                     |
-| Test Date            | March 15, 2021                     | March 15, 2021                      |
-| Power dissipation, TDP in Watt                            | [65](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html#tab-blade-1-0-1)| [35](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html)  |
-| CPU Price on Mach 15th, 2021, USD<br>Prices may vary   | [192](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html)               | [195](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) |
-
-
-**CPU Inference Engines (continue)**
-
-|                      | Intel Atom® x5-E3940                  | Intel Atom® x6425RE                               | Intel® Core™ i3-8100               | 
-| -------------------- | --------------------------------------|-------------------------------                    |----------------------------------- |
-| Motherboard          |                                       | Intel Corporation /<br>ElkhartLake LPDDR4x T3 CRB | GIGABYTE* Z390 UD                  |
-| CPU                  | Intel Atom® Processor E3940 @ 1.60GHz | Intel Atom® x6425RE<br>Processor @ 1.90GHz        | Intel® Core™ i3-8100 CPU @ 3.60GHz |
-| Hyper Threading      | OFF                                   | OFF                                               | OFF                                |
-| Turbo Setting        | ON                                    | ON                                                | OFF                                |
-| Memory               | 1 x 8 GB DDR3 1600MHz                 | 2 x 4GB DDR4 3200 MHz                             | 4 x 8 GB DDR4 2400MHz              |
-| Operating System     | Ubuntu* 18.04 LTS                     | Ubuntu* 18.04 LTS                                 | Ubuntu* 18.04 LTS                  |
-| Kernel Version       | 5.3.0-24-generic                      | 5.8.0-050800-generic                              | 5.3.0-24-generic                   |
-| BIOS Vendor          | American Megatrends Inc.*             | Intel Corporation                                 | American Megatrends Inc.*          |
-| BIOS Version         | 5.12                                  | EHLSFWI1.R00.2463.<br>A03.2011200425              | F8                                 |
-| BIOS Release         | September 6, 2017                     | November 22, 2020                                 | May 24, 2019                       |
-| BIOS Settings        | Default settings                      | Default settings                                  | Select optimized default settings, <br> set OS type to "other", <br>save & exit |
-| Batch size           | 1                                     | 1                                                 | 1                                  |
-| Precision            | INT8                                  | INT8                                              | INT8                               |
-| Number of concurrent inference requests | 4                  | 4                                                 | 4                                  |
-| Test Date            | March 15, 2021                        | March 15, 2021                                    | March 15, 2021                     |
-| Power dissipation, TDP in Watt | [9.5](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html)  | [12](https://ark.intel.com/content/www/us/en/ark/products/207899/intel-atom-x6425re-processor-1-5m-cache-1-90-ghz.html) | [65](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html#tab-blade-1-0-1)|
-| CPU Price, USD<br>Prices may vary  | [34](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html) (on March 15th, 2021) | [59](https://ark.intel.com/content/www/us/en/ark/products/207899/intel-atom-x6425re-processor-1-5m-cache-1-90-ghz.html) (on March 26th, 2021) | [117](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html) (on March 15th, 2021)  |
+|                      | Intel Atom® x5-E3940                  | Intel Atom® x6425RE                               | Intel® Celeron® 6305E            |
+| -------------------- | --------------------------------------|-------------------------------                    |----------------------------------|
+| Motherboard          | Intel Corporation<br>internal/Reference<br>Validation Platform | Intel Corporation<br>internal/Reference<br>Validation Platform | Intel Corporation<br>internal/Reference<br>Validation Platform      |
+| CPU                  | Intel Atom® Processor E3940 @ 1.60GHz | Intel Atom® x6425RE<br>Processor @ 1.90GHz        | Intel® Celeron®<br>6305E @ 1.80GHz  |
+| Hyper Threading      | OFF                                   | OFF                                               | OFF |
+| Turbo Setting        | ON                                    | ON                                                | ON |
+| Memory               | 1 x 8 GB DDR3 1600MHz                 | 2 x 4GB DDR4 3200MHz                              | 2 x 8 GB DDR4 3200MHz |                                  
+| Operating System     | Ubuntu* 18.04 LTS                     | Ubuntu* 18.04 LTS                                 | Ubuntu 18.04.5 LTS               |
+| Kernel Version       | 5.3.0-24-generic                      | 5.8.0-050800-generic                              | 5.8.0-050800-generic |
+| BIOS Vendor          | American Megatrends Inc.*             | Intel Corporation                                 | Intel Corporation |
+| BIOS Version         | 5.12                                  | EHLSFWI1.R00.2463.<br>A03.2011200425              | TGLIFUI1.R00.4064.A02.2102260133 |
+| BIOS Release         | September 6, 2017                     | November 22, 2020                                 | February 26, 2021 |
+| BIOS Settings        | Default settings                      | Default settings                                  | Default settings |
+| Batch size           | 1                                     | 1                                                 | 1 |
+| Precision            | INT8                                  | INT8                                              | INT8 |
+| Number of concurrent inference requests | 4                  | 4                                                 | 4|
+| Test Date            | June 18, 2021                          | June 18, 2021                                    | June 18, 2021 |
+| Rated maximum TDP/socket in Watt | [9.5](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html)  | [12](https://ark.intel.com/content/www/us/en/ark/products/207899/intel-atom-x6425re-processor-1-5m-cache-1-90-ghz.html) | [15](https://ark.intel.com/content/www/us/en/ark/products/208072/intel-celeron-6305e-processor-4m-cache-1-80-ghz.html)|
+| CPU Price/socket on June 21, 2021, USD<br>Prices may vary  | [34](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html) | [59](https://ark.intel.com/content/www/us/en/ark/products/207899/intel-atom-x6425re-processor-1-5m-cache-1-90-ghz.html) |[107](https://ark.intel.com/content/www/us/en/ark/products/208072/intel-celeron-6305e-processor-4m-cache-1-80-ghz.html) |



@ -239,8 +264,8 @@ Testing by Intel done on: see test date for each HW platform below.
 | Batch size                              | 1                                     | 1                                     |
 | Precision                               | FP16                                  | FP16                                  |
 | Number of concurrent inference requests | 4                                     | 32                                    |
-| Power dissipation, TDP in Watt          | 2.5                                   | [30](https://www.arrow.com/en/products/mustang-v100-mx8-r10/iei-technology?gclid=Cj0KCQiA5bz-BRD-ARIsABjT4ng1v1apmxz3BVCPA-tdIsOwbEjTtqnmp_rQJGMfJ6Q2xTq6ADtf9OYaAhMUEALw_wcB)          |
-| CPU Price, USD<br>Prices may vary | [69](https://ark.intel.com/content/www/us/en/ark/products/140109/intel-neural-compute-stick-2.html) (from March 15, 2021) | [1180](https://www.arrow.com/en/products/mustang-v100-mx8-r10/iei-technology?gclid=Cj0KCQiA5bz-BRD-ARIsABjT4ng1v1apmxz3BVCPA-tdIsOwbEjTtqnmp_rQJGMfJ6Q2xTq6ADtf9OYaAhMUEALw_wcB) (from March 15, 2021)                           |
+| Rated maximum TDP/socket in Watt          | 2.5                                   | [30](https://www.arrow.com/en/products/mustang-v100-mx8-r10/iei-technology?gclid=Cj0KCQiA5bz-BRD-ARIsABjT4ng1v1apmxz3BVCPA-tdIsOwbEjTtqnmp_rQJGMfJ6Q2xTq6ADtf9OYaAhMUEALw_wcB)          |
+| CPU Price/socket on June 21, 2021, USD<br>Prices may vary | [69](https://ark.intel.com/content/www/us/en/ark/products/140109/intel-neural-compute-stick-2.html) | [425](https://www.arrow.com/en/products/mustang-v100-mx8-r10/iei-technology?gclid=Cj0KCQiA5bz-BRD-ARIsABjT4ng1v1apmxz3BVCPA-tdIsOwbEjTtqnmp_rQJGMfJ6Q2xTq6ADtf9OYaAhMUEALw_wcB)  |
 | Host Computer                           | Intel® Core™ i7                       | Intel® Core™ i5                       |
 | Motherboard                             | ASUS* Z370-A II                       | Uzelinfo* / US-E1300                  |
 | CPU                                     | Intel® Core™ i7-8700 CPU @ 3.20GHz    | Intel® Core™ i5-6600 CPU @ 3.30GHz    |
@ -252,9 +277,9 @@ Testing by Intel done on: see test date for each HW platform below.
 | BIOS Vendor                             | American Megatrends Inc.*             | American Megatrends Inc.*             |
 | BIOS Version                            | 411                                   | 5.12                                  |
 | BIOS Release                            | September 21, 2018                    | September 21, 2018                    |
-| Test Date                               | March 15, 2021                        | March 15, 2021                      |        
+| Test Date                               | June 18, 2021                        | June 18, 2021                      |        

-Please follow this link for more detailed configuration descriptions: [Configuration Details](https://docs.openvinotoolkit.org/resources/benchmark_files/system_configurations_2021.3.html)
+Please follow this link for more detailed configuration descriptions: [Configuration Details](https://docs.openvinotoolkit.org/resources/benchmark_files/system_configurations_2021.4.html)

 \htmlonly
 <style>
--- a/docs/benchmarks/performance_benchmarks_ovms.md
+++ b/docs/benchmarks/performance_benchmarks_ovms.md
@ -18,20 +18,98 @@ OpenVINO™ Model Server is measured in multiple-client-single-server configurat

 * **Execution Controller** is launched on the client platform. It is responsible for synchronization of the whole measurement process, downloading metrics from the load balancer, and presenting the final report of the execution.

-## 3D U-Net (FP32)
-![](../img/throughput_ovms_3dunet.png)
 ## resnet-50-TF (INT8)
 ![](../img/throughput_ovms_resnet50_int8.png)
 ## resnet-50-TF (FP32)
-![](../img/throughput_ovms_resnet50_fp32.png)
-## bert-large-uncased-whole-word-masking-squad-int8-0001 (INT8)
-![](../img/throughput_ovms_bertlarge_int8.png)
-
+![](../img/throughput_ovms_resnet50_fp32_bs_1.png)
+## 3D U-Net (FP32)
+![](../img/throughput_ovms_3dunet.png)
+## yolo-v3-tf (FP32)
+![](../img/throughput_ovms_yolo3_fp32.png)
+## yolo-v3-tiny-tf (FP32)
+![](../img/throughput_ovms_yolo3tiny_fp32.png)
+## yolo-v4-tf (FP32)
+![](../img/throughput_ovms_yolo4_fp32.png)
+## bert-small-uncased-whole-word-masking-squad-0002 (FP32)
+![](../img/throughput_ovms_bertsmall_fp32.png)
+## bert-small-uncased-whole-word-masking-squad-int8-0002 (INT8)
+![](../img/throughput_ovms_bertsmall_int8.png)
 ## bert-large-uncased-whole-word-masking-squad-0001 (FP32)
 ![](../img/throughput_ovms_bertlarge_fp32.png)
+## bert-large-uncased-whole-word-masking-squad-int8-0001 (INT8)
+![](../img/throughput_ovms_bertlarge_int8.png)
+## mobilenet-v3-large-1.0-224-tf (FP32)
+![](../img/throughput_ovms_mobilenet3large_fp32.png)
+## ssd_mobilenet_v1_coco (FP32)
+![](../img/throughput_ovms_ssdmobilenet1_fp32.png)
+
 ## Platform Configurations

-OpenVINO™ Model Server performance benchmark numbers are based on release 2021.3. Performance results are based on testing as of March 15, 2021 and may not reflect all publicly available updates. 
+OpenVINO™ Model Server performance benchmark numbers are based on release 2021.4. Performance results are based on testing as of June 17, 2021 and may not reflect all publicly available updates. 
+
+**Platform with Intel® Xeon® Platinum 8260M**
+
+<table>
+  <tr>
+    <th></th>
+    <th><strong>Server Platform</strong></th>
+    <th><strong>Client Platform</strong></th>
+  </tr>
+  <tr>
+    <td><strong>Motherboard</strong></td>
+    <td>Inspur YZMB-00882-104 NF5280M5</td>
+    <td>Intel® Server Board S2600WF H48104-872</td>
+  </tr>
+  <tr>
+    <td><strong>Memory</strong></td>
+    <td>Samsung 16 x 16GB @ 2666 MT/s DDR4</td>
+    <td>Hynix 16 x 16GB @ 2666 MT/s DDR4</td>
+  </tr>
+  <tr>
+    <td><strong>CPU</strong></td>
+    <td>Intel® Xeon® Platinum 8260M CPU @ 2.40GHz</td>
+    <td>Intel® Xeon® Gold 6252 CPU @ 2.10GHz</td>
+  </tr>
+  <tr>
+    <td><strong>Selected CPU Flags</strong></td>
+    <td>Hyper Threading, Turbo Boost, DL Boost</td>
+    <td>Hyper Threading, Turbo Boost, DL Boost</td>
+  </tr>
+  <tr>
+    <td><strong>CPU Thermal Design Power</strong></td>
+    <td>162 W</td>
+    <td>150 W</td>
+  </tr>
+  <tr>
+    <td><strong>Operating System</strong></td>
+    <td>Ubuntu 20.04.2 LTS</td>
+    <td>Ubuntu 20.04.2 LTS</td>
+  </tr>
+  <tr>
+    <td><strong>Kernel Version</strong></td>
+    <td>5.4.0-54-generic</td>
+    <td>5.4.0-65-generic</td>
+  </tr>
+  <tr>
+    <td><strong>BIOS Vendor</strong></td>
+    <td>American Megatrends Inc.</td>
+    <td>Intel® Corporation</td>
+  </tr>
+  <tr>
+    <td><strong>BIOS Version & Release</strong></td>
+    <td>4.1.16, date: 06/23/2020</td>
+    <td>SE5C620.86B.02.01, date: 03/26/2020</td>
+  </tr>
+  <tr>
+    <td><strong>Docker Version</strong></td>
+    <td>20.10.3</td>
+    <td>20.10.3</td>
+  </tr>
+  <tr>
+    <td><strong>Network Speed</strong></td>
+    <td colspan="2">40 Gb/s</td>
+  </tr>
+</table>

 **Platform with Intel® Xeon® Gold 6252**

@ -65,7 +143,7 @@ OpenVINO™ Model Server performance benchmark numbers are based on release 2021
    <td><strong>CPU Thermal Design Power</strong></td>
    <td>150 W</td>
    <td>162 W</td>
-  </tr>
+</tr>
  <tr>
    <td><strong>Operating System</strong></td>
    <td>Ubuntu 20.04.2 LTS</td>
--- a/docs/benchmarks/performance_int8_vs_fp32.md
+++ b/docs/benchmarks/performance_int8_vs_fp32.md
@ -20,25 +20,25 @@ The table below illustrates the speed-up factor for the performance gain by swit
    <td>bert-large-<br>uncased-whole-word-<br>masking-squad-0001</td>
    <td>SQuAD</td>
    <td>1.6</td>
-    <td>3.0</td>
-    <td>1.6</td>
-    <td>2.3</td>
+    <td>3.1</td>
+    <td>1.5</td>
+    <td>2.5</td>
  </tr>
  <tr>
    <td>brain-tumor-<br>segmentation-<br>0001-MXNET</td>
    <td>BraTS</td>
    <td>1.6</td>
-    <td>1.9</td>
-    <td>1.7</td>
-    <td>1.7</td>
+    <td>2.0</td>
+    <td>1.8</td>
+    <td>1.8</td>
  </tr>
  <tr>
    <td>deeplabv3-TF</td>
    <td>VOC 2012<br>Segmentation</td>
-    <td>2.1</td>
-    <td>3.1</td>
-    <td>3.1</td>
+    <td>1.9</td>
    <td>3.0</td>
+    <td>2.8</td>
+    <td>3.1</td>
  </tr>
  <tr>
    <td>densenet-121-TF</td>
@ -51,7 +51,7 @@ The table below illustrates the speed-up factor for the performance gain by swit
  <tr>
    <td>facenet-<br>20180408-<br>102900-TF</td>
    <td>LFW</td>
-    <td>2.0</td>
+    <td>2.1</td>
    <td>3.6</td>
    <td>2.2</td>
    <td>3.7</td>
@ -60,17 +60,9 @@ The table below illustrates the speed-up factor for the performance gain by swit
    <td>faster_rcnn_<br>resnet50_coco-TF</td>
    <td>MS COCO</td>
    <td>1.9</td>
-    <td>3.8</td>
+    <td>3.7</td>
    <td>2.0</td>
-    <td>3.5</td>
-  </tr>
-  <tr>
-    <td>googlenet-v1-TF</td>
-    <td>ImageNet</td>
-    <td>1.8</td>
-    <td>3.6</td>
-    <td>2.0</td>
-    <td>3.9</td>
+    <td>3.4</td>
  </tr>
  <tr>
    <td>inception-v3-TF</td>
@ -78,24 +70,16 @@ The table below illustrates the speed-up factor for the performance gain by swit
    <td>1.9</td>
    <td>3.8</td>
    <td>2.0</td>
-    <td>4.0</td>
+    <td>4.1</td>
  </tr>
  <tr>
    <td>mobilenet-<br>ssd-CF</td>
    <td>VOC2012</td>
-    <td>1.7</td>
+    <td>1.6</td>
    <td>3.1</td>
-    <td>1.8</td>
+    <td>1.9</td>
    <td>3.6</td>
  </tr>
-  <tr>
-    <td>mobilenet-v1-1.0-<br>224-TF</td>
-    <td>ImageNet</td>
-    <td>1.7</td>
-    <td>3.1</td>
-    <td>1.8</td>
-    <td>4.1</td>
-  </tr>
  <tr>
    <td>mobilenet-v2-1.0-<br>224-TF</td>
    <td>ImageNet</td>
@ -107,10 +91,10 @@ The table below illustrates the speed-up factor for the performance gain by swit
  <tr>
    <td>mobilenet-v2-<br>pytorch</td>
    <td>ImageNet</td>
-    <td>1.6</td>
+    <td>1.7</td>
    <td>2.4</td>
    <td>1.9</td>
-    <td>3.9</td>
+    <td>4.0</td>
  </tr>
  <tr>
    <td>resnet-18-<br>pytorch</td>
@ -124,7 +108,7 @@ The table below illustrates the speed-up factor for the performance gain by swit
    <td>resnet-50-<br>pytorch</td>
    <td>ImageNet</td>
    <td>1.9</td>
-    <td>3.7</td>
+    <td>3.6</td>
    <td>2.0</td>
    <td>3.9</td>
  </tr>
@ -147,16 +131,16 @@ The table below illustrates the speed-up factor for the performance gain by swit
  <tr>
    <td>ssd_mobilenet_<br>v1_coco-tf</td>
    <td>VOC2012</td>
-    <td>1.7</td>
-    <td>3.0</td>
-    <td>1.9</td>
+    <td>1.8</td>
+    <td>3.1</td>
+    <td>2.0</td>
    <td>3.6</td>
  </tr>
  <tr>
    <td>ssd300-CF</td>
    <td>MS COCO</td>
    <td>1.8</td>
-    <td>4.4</td>
+    <td>4.2</td>
    <td>1.9</td>
    <td>3.9</td>
  </tr>
@ -165,33 +149,57 @@ The table below illustrates the speed-up factor for the performance gain by swit
    <td>MS COCO</td>
    <td>1.7</td>
    <td>2.5</td>
-    <td>2.2</td>
-    <td>3.4</td>
-  </tr>
-  <tr>
-    <td>yolo_v3-TF</td>
-    <td>MS COCO</td>
-    <td>1.8</td>
-    <td>4.0</td>
-    <td>1.9</td>
-    <td>3.9</td>
+    <td>2.4</td>
+    <td>3.5</td>
  </tr>
  <tr>
    <td>yolo_v4-TF</td>
    <td>MS COCO</td>
-    <td>1.7</td>
+    <td>1.9</td>
+    <td>3.6</td>
+    <td>2.0</td>
    <td>3.4</td>
-    <td>1.7</td>
-    <td>2.8</td>
  </tr>
  <tr>
    <td>unet-camvid-onnx-0001</td>
    <td>MS COCO</td>
-    <td>1.6</td>
-    <td>3.8</td>
-    <td>1.6</td>
+    <td>1.7</td>
+    <td>3.9</td>
+    <td>1.7</td>
    <td>3.7</td>
  </tr>
+  <tr>
+    <td>ssd-resnet34-<br>1200-onnx</td>
+    <td>MS COCO</td>
+    <td>1.7</td>
+    <td>4.0</td>
+    <td>1.7</td>
+    <td>3.4</td>
+  </tr>
+  <tr>
+    <td>googlenet-v4-tf</td>
+    <td>ImageNet</td>
+    <td>1.9</td>
+    <td>3.9</td>
+    <td>2.0</td>
+    <td>4.1</td>
+  </tr>
+  <tr>
+    <td>vgg19-caffe</td>
+    <td>ImageNet</td>
+    <td>1.9</td>
+    <td>4.7</td>
+    <td>2.0</td>
+    <td>4.5</td>
+  </tr>
+  <tr>
+    <td>yolo-v3-tiny-tf</td>
+    <td>MS COCO</td>
+    <td>1.7</td>
+    <td>3.4</td>
+    <td>1.9</td>
+    <td>3.5</td>
+  </tr>
 </table>

 The following table shows the absolute accuracy drop that is calculated as the difference in accuracy between the FP32 representation of a model and its INT8 representation.
@ -217,18 +225,18 @@ The following table shows the absolute accuracy drop that is calculated as the d
    <td>SQuAD</td>
    <td>F1</td>
    <td>0.62</td>
-    <td>0.88</td>
-    <td>0.52</td>
+    <td>0.71</td>
+    <td>0.62</td>
    <td>0.62</td>
  </tr>
  <tr>
    <td>brain-tumor-<br>segmentation-<br>0001-MXNET</td>
    <td>BraTS</td>
    <td>Dice-index@ <br>Mean@ <br>Overall Tumor</td>
-    <td>0.09</td>
+    <td>0.08</td>
    <td>0.10</td>
-    <td>0.11</td>
-    <td>0.09</td>
+    <td>0.10</td>
+    <td>0.08</td>
  </tr>
  <tr>
    <td>deeplabv3-TF</td>
@ -243,10 +251,10 @@ The following table shows the absolute accuracy drop that is calculated as the d
    <td>densenet-121-TF</td>
    <td>ImageNet</td>
    <td>acc@top-1</td>
-    <td>0.54</td>
-    <td>0.57</td>
-    <td>0.57</td>
-    <td>0.54</td>
+    <td>0.49</td>
+    <td>0.56</td>
+    <td>0.56</td>
+    <td>0.49</td>
  </tr>
  <tr>
    <td>facenet-<br>20180408-<br>102900-TF</td>
@ -261,46 +269,28 @@ The following table shows the absolute accuracy drop that is calculated as the d
    <td>faster_rcnn_<br>resnet50_coco-TF</td>
    <td>MS COCO</td>
    <td>coco_<br>precision</td>
-    <td>0.04</td>
-    <td>0.04</td>
-    <td>0.04</td>
-    <td>0.04</td>
-  </tr>
-  <tr>
-    <td>googlenet-v1-TF</td>
-    <td>ImageNet</td>
-    <td>acc@top-1</td>
-    <td>0.01</td>
-    <td>0.00</td>
-    <td>0.00</td>
-    <td>0.01</td>
+    <td>0.09</td>
+    <td>0.09</td>
+    <td>0.09</td>
+    <td>0.09</td>
  </tr>
  <tr>
    <td>inception-v3-TF</td>
    <td>ImageNet</td>
    <td>acc@top-1</td>
-    <td>0.04</td>
-    <td>0.00</td>
-    <td>0.00</td>
-    <td>0.04</td>
+    <td>0.02</td>
+    <td>0.01</td>
+    <td>0.01</td>
+    <td>0.02</td>
  </tr>
  <tr>
    <td>mobilenet-<br>ssd-CF</td>
    <td>VOC2012</td>
    <td>mAP</td>
-    <td>0.77</td>
-    <td>0.77</td>
-    <td>0.77</td>
-    <td>0.77</td>
-  </tr>
-  <tr>
-    <td>mobilenet-v1-1.0-<br>224-TF</td>
-    <td>ImageNet</td>
-    <td>acc@top-1</td>
-    <td>0.26</td>
-    <td>0.28</td>
-    <td>0.28</td>
-    <td>0.26</td>
+    <td>0.06</td>
+    <td>0.04</td>
+    <td>0.04</td>
+    <td>0.06</td>
  </tr>
  <tr>
    <td>mobilenet-v2-1.0-<br>224-TF</td>
@ -342,37 +332,37 @@ The following table shows the absolute accuracy drop that is calculated as the d
    <td>resnet-50-<br>TF</td>
    <td>ImageNet</td>
    <td>acc@top-1</td>
-    <td>0.10</td>
-    <td>0.08</td>
-    <td>0.08</td>
-    <td>0.10</td>
+    <td>0.11</td>
+    <td>0.11</td>
+    <td>0.11</td>
+    <td>0.11</td>
  </tr>
  <tr>
    <td>squeezenet1.1-<br>CF</td>
    <td>ImageNet</td>
    <td>acc@top-1</td>
-    <td>0.63</td>
+    <td>0.64</td>
    <td>0.66</td>
    <td>0.66</td>
-    <td>0.63</td>
+    <td>0.64</td>
  </tr>
  <tr>
    <td>ssd_mobilenet_<br>v1_coco-tf</td>
    <td>VOC2012</td>
    <td>COCO mAp</td>
-    <td>0.18</td>
-    <td>3.06</td>
-    <td>3.06</td>
-    <td>0.18</td>
+    <td>0.17</td>
+    <td>2.96</td>
+    <td>2.96</td>
+    <td>0.17</td>
  </tr>
  <tr>
    <td>ssd300-CF</td>
    <td>MS COCO</td>
    <td>COCO mAp</td>
-    <td>0.05</td>
-    <td>0.05</td>
-    <td>0.05</td>
-    <td>0.05</td>
+    <td>0.18</td>
+    <td>3.06</td>
+    <td>3.06</td>
+    <td>0.18</td>
  </tr>
  <tr>
    <td>ssdlite_<br>mobilenet_<br>v2-TF</td>
@ -383,32 +373,59 @@ The following table shows the absolute accuracy drop that is calculated as the d
    <td>0.43</td>
    <td>0.11</td>
  </tr>
-  <tr>
-    <td>yolo_v3-TF</td>
-    <td>MS COCO</td>
-    <td>COCO mAp</td>
-    <td>0.11</td>
-    <td>0.24</td>
-    <td>0.24</td>
-    <td>0.11</td>
-  </tr>
  <tr>
    <td>yolo_v4-TF</td>
    <td>MS COCO</td>
    <td>COCO mAp</td>
-    <td>0.01</td>
-    <td>0.09</td>
-    <td>0.09</td>
-    <td>0.01</td>
+    <td>0.06</td>
+    <td>0.03</td>
+    <td>0.03</td>
+    <td>0.06</td>
  </tr>
  <tr>
    <td>unet-camvid-<br>onnx-0001</td>
    <td>MS COCO</td>
    <td>COCO mAp</td>
+    <td>0.29</td>
+    <td>0.29</td>
    <td>0.31</td>
-    <td>0.31</td>
-    <td>0.31</td>
-    <td>0.31</td>
+    <td>0.29</td>
+  </tr>
+  <tr>
+    <td>ssd-resnet34-<br>1200-onnx</td>
+    <td>MS COCO</td>
+    <td>COCO mAp</td>
+    <td>0.02</td>
+    <td>0.03</td>
+    <td>0.03</td>
+    <td>0.02</td>
+  </tr>
+  <tr>
+    <td>googlenet-v4-tf</td>
+    <td>ImageNet</td>
+    <td>COCO mAp</td>
+    <td>0.08</td>
+    <td>0.06</td>
+    <td>0.06</td>
+    <td>0.06</td>
+  </tr>
+  <tr>
+    <td>vgg19-caffe</td>
+    <td>ImageNet</td>
+    <td>COCO mAp</td>
+    <td>0.02</td>
+    <td>0.04</td>
+    <td>0.04</td>
+    <td>0.02</td>
+  </tr>
+  <tr>
+    <td>yolo-v3-tiny-tf</td>
+    <td>MS COCO</td>
+    <td>COCO mAp</td>
+    <td>0.02</td>
+    <td>0.6</td>
+    <td>0.6</td>
+    <td>0.02</td>
  </tr>
 </table>

--- a/docs/doxygen/doxy_md_filter.py
+++ b/docs/doxygen/doxy_md_filter.py
@ -51,6 +51,15 @@ def replace_links(content, items, folder, labels, docs_folder):
    return content


+def add_htmlonly(content):
+    content = content.replace('<details>', '\n\\htmlonly\n<details>')
+    content = content.replace('</summary>', '</summary>\n\\endhtmlonly')
+    content = content.replace('</details>', '\n\\htmlonly\n</details>\n\\endhtmlonly')
+    content = content.replace('<iframe', '\n\\htmlonly\n<iframe')
+    content = content.replace('</iframe>', '</iframe>\n\\endhtmlonly')
+    return content
+
+
 def process_github_md_links(content, items):
    """
    This is a workaround to support github markdown links in doxygen 1.8.12.
@ -81,6 +90,7 @@ def process(docs_folder):
        content = replace_links(content, inline_links, md_folder, labels, docs_folder)
        content = replace_links(content, reference_links, md_folder, labels, docs_folder)
        content = process_github_md_links(content, github_md_links)
+        content = add_htmlonly(content)
        if inline_links or reference_links or github_md_links:
            with open(md_file, 'w', encoding='utf-8') as f:
                f.write(content)
--- a/docs/doxygen/doxygen-ignore.txt
+++ b/docs/doxygen/doxygen-ignore.txt
@ -1,5 +1,6 @@
 openvino/inference-engine/samples/hello_reshape_ssd/README.md
 openvino/docs/index.md
+inference-engine/include/ie_icnn_network.hpp
 openvino/docs/get_started/get_started_dl_workbench.md
 openvino/docs/get_started/get_started_linux.md
 openvino/docs/get_started/get_started_raspbian.md
@ -10,10 +11,24 @@ openvino/docs/install_guides/deployment-manager-tool.md
 openvino/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md
 openvino/docs/ovsa/ovsa_get_started.md
 openvino/inference-engine/ie_bridges/c/docs/api_overview.md
+inference-engine/include/cpp/ie_infer_request.hpp
+inference-engine/include/ie_parallel.hpp
+inference-engine/include/gpu/gpu_context_api_ocl.hpp
+inference-engine/include/gpu/gpu_context_api_va.hpp
+inference-engine/include/ie_plugin_config.hpp
+inference-engine/include/ie_unicode.hpp
+inference-engine/include/vpu/myriad_config.hpp
+inference-engine/include/vpu/vpu_config.hpp
+inference-engine/include/vpu/vpu_plugin_config.hpp
 openvino/docs/benchmarks/performance_int8_vs_fp32.md
 openvino/docs/get_started/get_started_macos.md
-openvino/docs/optimization_guide/dldt_optimization_guide.md
-openvino/docs/IE_DG/ShapeInference.md
+inference-engine/include/details/ie_so_pointer.hpp
+inference-engine/include/ie_compound_blob.h
+inference-engine/include/ie_data.h
+inference-engine/include/ie_blob.h
+inference-engine/include/ie_precision.hpp
+inference-engine/include/ie_remote_context.hpp
+inference-engine/include/gpu/gpu_context_api_dx.hpp
 build/docs/openvino_docs.xml
 openvino/docs/install_guides/installing-openvino-linux-ivad-vpu.md
 inference-engine/src/inference_engine/include/ie/ie_parallel.hpp
--- a/docs/doxygen/ie_docs.xml
+++ b/docs/doxygen/ie_docs.xml
@ -19,11 +19,10 @@ limitations under the License.
 <doxygenlayout xmlns:xi="http://www.w3.org/2001/XInclude" version="1.0">
    <!-- Navigation index tabs for HTML output -->
    <navindex>
-        <tab id="converting_and_preparing_models" type="usergroup" title="Converting and Preparing Models" url="">
+        <tab id="converting_and_preparing_models" type="usergroup" title="Converting and Preparing Models" url="@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide">
            <!-- Model Optimizer Developer Guide-->
            <tab type="usergroup" title="Model Optimizer Developer Guide" url="@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide">
-                <tab type="usergroup" title="Preparing and Optimizing Your Trained Model" url="@ref openvino_docs_MO_DG_prepare_model_Prepare_Trained_Model">
-                    <tab type="user" title="Configuring the Model Optimizer" url="@ref openvino_docs_MO_DG_prepare_model_Config_Model_Optimizer"/>
+                    <tab type="user" title="Installing Model Optimizer Pre-Requisites" url="@ref openvino_docs_MO_DG_prepare_model_Config_Model_Optimizer"/>
                    <tab type="usergroup" title="Converting a Model to Intermediate Representation (IR)" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model">
                        <tab type="user" title="Converting a Model Using General Conversion Parameters" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model_General"/>
                        <tab type="user" title="Converting a Caffe* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe"/>
@ -57,12 +56,12 @@ limitations under the License.
                            <tab type="user" title="Convert ONNX* GPT-2 Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_GPT2"/>
                            <tab type="user" title="[DEPRECATED] Convert DLRM ONNX* Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_DLRM"/>
                            <tab type="usergroup" title="Converting Your PyTorch* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_PyTorch">
-                            <tab type="user" title="Convert PyTorch* QuartzNet Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_QuartzNet"/>
-                            <tab type="user" title="Convert PyTorch* RNN-T Model " url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT"/>
-                            <tab type="user" title="Convert PyTorch* YOLACT Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT"/>
-                            <tab type="user" title="Convert PyTorch* F3Net Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net"/>
-                            <tab type="user" title="Convert PyTorch* RCAN Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN"/>
-                            <tab type="user" title="Convert PyTorch* BERT-NER Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Bert_ner"/>
+                                <tab type="user" title="Convert PyTorch* QuartzNet Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_QuartzNet"/>
+                                <tab type="user" title="Convert PyTorch* RNN-T Model " url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT"/>
+                                <tab type="user" title="Convert PyTorch* YOLACT Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT"/>
+                                <tab type="user" title="Convert PyTorch* F3Net Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net"/>
+                                <tab type="user" title="Convert PyTorch* RCAN Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RCAN"/>
+                                <tab type="user" title="Convert PyTorch* BERT-NER Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_Bert_ner"/>
                            </tab>
                        </tab>
                        <tab type="user" title="Model Optimizations Techniques" url="@ref openvino_docs_MO_DG_prepare_model_Model_Optimization_Techniques"/>
@ -76,10 +75,8 @@ limitations under the License.
                        <tab type="user" title="Extending Model Optimizer with New Primitives" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_Model_Optimizer_with_New_Primitives"/>
                        <tab type="user" title="Extending Model Optimizer with Caffe* Python Layers" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_Model_Optimizer_With_Caffe_Python_Layers"/>
                        <tab type="user" title="Extending Model Optimizer for Custom MXNet* Operations" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_MXNet_Model_Optimizer_with_New_Primitives"/>
-                        <tab type="user" title="Legacy Mode for Caffe* Custom Layers" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Legacy_Mode_for_Caffe_Custom_Layers"/>
-                        <tab type="user" title="[DEPRECATED] Offloading Sub-Graph Inference" url="https://docs.openvinotoolkit.org/2020.1/_docs_MO_DG_prepare_model_customize_model_optimizer_Offloading_Sub_Graph_Inference.html"/>
+                        <tab type="user" title="[DEPRECATED] Legacy Mode for Caffe* Custom Layers" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Legacy_Mode_for_Caffe_Custom_Layers"/>
                    </tab>
-                </tab>
                <tab type="user" title="Model Optimizer Frequently Asked Questions" url="@ref openvino_docs_MO_DG_prepare_model_Model_Optimizer_FAQ"/>
                <tab type="user" title="Known Issues" url="@ref openvino_docs_MO_DG_Known_Issues_Limitations"/>
            </tab>
@ -375,4 +372,4 @@ limitations under the License.
            <tab type="user" title="Inference Engine Plugin Development Guide" url="ie_plugin_api/index.html"/>
        </tab>
    </navindex>
-</doxygenlayout>
+</doxygenlayout>
--- a/docs/doxygen/openvino_docs.xml
+++ b/docs/doxygen/openvino_docs.xml
@ -42,7 +42,7 @@ limitations under the License.
                    <tab type="user" title="Install Intel® Distribution of OpenVINO™ toolkit for Linux* from a Docker* Image" url="@ref openvino_docs_install_guides_installing_openvino_docker_linux"/>
                    <tab type="user" title="Install Intel® Distribution of OpenVINO™ toolkit for Windows* from a Docker* Image" url="@ref openvino_docs_install_guides_installing_openvino_docker_windows"/>
                </tab>
-                <tab type="user" title="Docker with DL Workbench" url="./workbench_docs_Workbench_DG_Install_from_Docker_Hub.html"/><!-- Link to the original Workbench topic -->
+                <tab type="user" title="Docker with DL Workbench" url="./workbench_docs_Workbench_DG_Run_Locally.html"/><!-- Link to the original Workbench topic -->
                <tab type="user" title="APT" url="@ref openvino_docs_install_guides_installing_openvino_apt"/>
                <tab type="user" title="YUM" url="@ref openvino_docs_install_guides_installing_openvino_yum"/>
                <tab type="user" title="Anaconda Cloud" url="@ref openvino_docs_install_guides_installing_openvino_conda"/>
@ -57,7 +57,7 @@ limitations under the License.
                <tab type="user" title="Windows" url="@ref openvino_docs_get_started_get_started_windows"/>
                <tab type="user" title="macOS" url="@ref openvino_docs_get_started_get_started_macos"/>
                <tab type="user" title="Raspbian" url="@ref openvino_docs_get_started_get_started_raspbian"/>
-                <tab type="user" title="Get Started with OpenVINO via DL Workbench" url="@ref openvino_docs_get_started_get_started_dl_workbench"/>
+                <tab type="user" title="DL Workbench: Quick Start with OpenVINO™ Toolkit" url="@ref openvino_docs_get_started_get_started_dl_workbench"/>
                <tab type="user" title="Legal Information" url="@ref openvino_docs_Legal_Information"/>
            </tab>
            <!-- Configuration for Hardware -->
@ -103,7 +103,7 @@ limitations under the License.
                <tab type="usergroup" title="Performance Benchmark Results" url="@ref openvino_docs_performance_benchmarks">
                    <tab type="usergroup" title="Intel® Distribution of OpenVINO™ toolkit Benchmark Results" url="@ref openvino_docs_performance_benchmarks_openvino">
                        <tab type="user" title="Performance Information Frequently Asked Questions" url="@ref openvino_docs_performance_benchmarks_faq"/>
-                        <tab type="user" title="Download Performance Data Spreadsheet in MS Excel* Format" url="https://docs.openvinotoolkit.org/downloads/benchmark_files/OV-2021.3-Download-Excel.xlsx"/>
+                        <tab type="user" title="Download Performance Data Spreadsheet in MS Excel* Format" url="https://docs.openvinotoolkit.org/downloads/benchmark_files/OV-2021.4-Download-Excel.xlsx"/>
                        <tab type="user" title="INT8 vs. FP32 Comparison on Select Networks and Platforms" url="@ref openvino_docs_performance_int8_vs_fp32"/>
                    </tab>
                    <tab type="user" title="OpenVINO™ Model Server Benchmark Results" url="@ref openvino_docs_performance_benchmarks_ovms"/>
@ -118,6 +118,9 @@ limitations under the License.
                    <xi:include href="omz_docs.xml" xpointer="omz_tools_accuracy_checker">
                        <xi:fallback/>
                    </xi:include>
+                    <xi:include href="omz_docs.xml" xpointer="omz_data">
+                        <xi:fallback/>
+                    </xi:include>
                    <tab type="user" title="Using Cross Check Tool for Per-Layer Comparison Between Plugins" url="@ref openvino_inference_engine_tools_cross_check_tool_README"/>
                </tab>
                <tab type="user" title="Case Studies" url="https://www.intel.com/openvino-success-stories"/>
@ -158,9 +161,6 @@ limitations under the License.
            <xi:include href="omz_docs.xml" xpointer="omz_models">
                <xi:fallback/>
            </xi:include>
-            <tab type="user" title="Dataset Preparation Guide" url="@ref omz_data_datasets"/>
-            <tab type="user" title="Intel's Pre-Trained Models Device Support" url="@ref omz_models_intel_device_support"/>
-            <tab type="user" title="Public Pre-Trained Models Device Support" url="@ref omz_models_public_device_support"/>
            <xi:include href="omz_docs.xml" xpointer="omz_demos">
                <xi:fallback/>
            </xi:include>
--- a/docs/get_started/dl_workbench_img/active_projects_page.png
+++ b/docs/get_started/dl_workbench_img/active_projects_page.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6038ccd7873a1a818d944139ea3144a115dae19f0d3094e590a8a0c2b7b3a46c
+size 95228
--- a/docs/get_started/dl_workbench_img/openvino_in_dl_wb.png
+++ b/docs/get_started/dl_workbench_img/openvino_in_dl_wb.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90e5ff4285c9d1069647097157eccf7d8a3f545f4ba8b93930b55d8b62c17a1a
+size 100677
--- a/docs/get_started/get_started_dl_workbench.md
+++ b/docs/get_started/get_started_dl_workbench.md
@ -1,139 +1,53 @@
-# Get Started with OpenVINO™ Toolkit via Deep Learning Workbench {#openvino_docs_get_started_get_started_dl_workbench}
+# Quick Start with OpenVINO™ Toolkit via Deep Learning Workbench {#openvino_docs_get_started_get_started_dl_workbench}

-The OpenVINO™ toolkit optimizes and runs Deep Learning Neural Network models on Intel® hardware. This guide helps you get started with the OpenVINO™ toolkit via the Deep Learning Workbench (DL Workbench) on Linux\*, Windows\*, or macOS\*. 
+The OpenVINO™ toolkit  is a comprehensive toolkit for optimizing pretrained deep learning models to achieve high performance and prepare them for deployment on Intel® platforms. Deep Learning Workbench (DL Workbench) is the OpenVINO™ toolkit UI designed to make the production of pretrained deep learning models significantly easier.

-In this guide, you will:
-* Learn the OpenVINO™ inference workflow.
-* Start DL Workbench on Linux. Links to instructions for other operating systems are provided as well.
-* Create a project and run a baseline inference.     
+Start working with the OpenVINO™ toolkit right from your browser: import a model, analyze its performance and accuracy, visualize the outputs, optimize and prepare the model for deployment in a matter of minutes. DL Workbench will take you through the full OpenVINO™ workflow, providing the opportunity to learn about various toolkit components.

-[DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is a web-based graphical environment that enables you to easily use various sophisticated
-OpenVINO™ toolkit components:
-* [Model Downloader](@ref omz_tools_downloader) to download models from the [Intel® Open Model Zoo](@ref omz_models_group_intel) 
-with pre-trained models for a range of different tasks
-* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) to transform models into
-the Intermediate Representation (IR) format
-* [Post-training Optimization Tool](@ref pot_README) to calibrate a model and then execute it in the
- INT8 precision
-* [Accuracy Checker](@ref omz_tools_accuracy_checker) to determine the accuracy of a model
-* [Benchmark Tool](@ref openvino_inference_engine_samples_benchmark_app_README) to estimate inference performance on supported devices
+![](./dl_workbench_img/openvino_in_dl_wb.png)

-![](./dl_workbench_img/DL_Workbench.jpg)
+##  User Goals

-DL Workbench supports the following scenarios:
-1. [Calibrate the model in INT8 precision](@ref workbench_docs_Workbench_DG_Int_8_Quantization)  
-2. [Find the best combination](@ref workbench_docs_Workbench_DG_View_Inference_Results) of inference parameters: [number of streams and batches](../optimization_guide/dldt_optimization_guide.md)
-3. [Analyze inference results](@ref workbench_docs_Workbench_DG_Visualize_Model) and [compare them across different configurations](@ref workbench_docs_Workbench_DG_Compare_Performance_between_Two_Versions_of_Models)
-4. [Implement an optimal configuration into your application](@ref workbench_docs_Workbench_DG_Deploy_and_Integrate_Performance_Criteria_into_Application)   
+* Learn what neural networks are, how they work, and how to examine their architectures with more than 200 deep learning models.
+* Measure and interpret model performance right after the import.
+* Tune the model for enhanced performance.
+* Analyze the quality of your model and visualize output.
+* Use preconfigured JupyterLab\* environment to learn OpenVINO™ workflow. 

-## Prerequisites
+## Run DL Workbench 

-Prerequisite | Linux* | Windows* | macOS*
-:----- | :----- |:----- |:-----
-Operating system|Ubuntu\* 18.04. Other Linux distributions, such as Ubuntu\* 16.04 and CentOS\* 7, are not validated.|Windows\* 10 | macOS\* 10.15 Catalina
-CPU | Intel® Core™ i5| Intel® Core™ i5 | Intel® Core™ i5
-GPU| Intel® Pentium® processor N4200/5 with Intel® HD Graphics | Not supported| Not supported
-HDDL, MYRIAD| Intel® Neural Compute Stick 2 <br> Intel® Vision Accelerator Design with Intel® Movidius™ VPUs| Not supported | Not supported
-Available RAM space| 4 GB| 4 GB| 4 GB
-Available storage space   | 8 GB + space for imported artifacts| 8 GB + space for imported artifacts| 8 GB + space for imported artifacts
-Docker\*| Docker CE 18.06.1 | Docker Desktop 2.1.0.1|Docker CE 18.06.1
-Web browser| Google Chrome\* 76 <br> Browsers like Mozilla Firefox\* 71 or Apple Safari\* 12 are not validated. <br> Microsoft Internet Explorer\* is not supported.|  Google Chrome\* 76 <br> Browsers like Mozilla Firefox\* 71 or Apple Safari\* 12 are not validated. <br> Microsoft Internet Explorer\* is not supported.|  Google Chrome\* 76 <br>Browsers like Mozilla Firefox\* 71 or Apple Safari\* 12 are not validated. <br> Microsoft Internet Explorer\* is not supported.
-Resolution| 1440 x 890|1440 x 890|1440 x 890
-Internet|Optional|Optional|Optional
-Installation method| From Docker Hub <br> From OpenVINO™ toolkit package|From Docker Hub|From Docker Hub
+You can [run DL Workbench](@ref workbench_docs_Workbench_DG_Install) on your local system or in the Intel® DevCloud for the Edge. Ensure that you have met the [prerequisites](@ref workbench_docs_Workbench_DG_Prerequisites).

-## Start DL Workbench 
+Run DL Workbench on your local system by using the installation form. Select your options and run the commands on the local machine:

-This section provides instructions to run the DL Workbench on Linux from Docker Hub. 
+<iframe style="width: 100%; height: 620px;" src="https://openvinotoolkit.github.io/workbench_aux/"  frameborder="0" allow="clipboard-write;"></iframe>

-Use the command below to pull the latest Docker image with the application and run it:
+Once DL Workbench is set up, open the http://127.0.0.1:5665 link.

-```bash
-wget https://raw.githubusercontent.com/openvinotoolkit/workbench_aux/master/start_workbench.sh && bash start_workbench.sh
-```
-DL Workbench uses [authentication tokens](@ref workbench_docs_Workbench_DG_Authentication) to access the application. A token 
-is generated automatically and displayed in the console output when you run the container for the first time. Once the command is executed, follow the link with the token. The **Get Started** page opens:
-![](./dl_workbench_img/Get_Started_Page-b.png)
+![](./dl_workbench_img/active_projects_page.png)

-For details and more installation options, visit the links below:
-* [Install DL Workbench from Docker Hub* on Linux* OS](@ref workbench_docs_Workbench_DG_Install_from_DockerHub_Linux)
-* [Install DL Workbench from Docker Hub on Windows*](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub_Win)
-* [Install DL Workbench from Docker Hub on macOS*](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub_mac)
-* [Install DL Workbench from the OpenVINO toolkit package on Linux](@ref workbench_docs_Workbench_DG_Install_from_Package)

-## <a name="workflow-overview"></a>OpenVINO™ DL Workbench Workflow Overview
+Congratulations, you have installed DL Workbench. Your next step is to [Get Started with DL Workbench](@ref workbench_docs_Workbench_DG_Work_with_Models_and_Sample_Datasets) and create your first project. 

-The simplified OpenVINO™ DL Workbench workflow is:
-1. **Get a trained model** for your inference task. Example inference tasks: pedestrian detection, face detection, vehicle detection, license plate recognition, head pose.
-2. **Run the trained model through the Model Optimizer** to convert the model to an Intermediate Representation, which consists of a pair of `.xml` and `.bin` files that are used as the input for Inference Engine.
-3. **Run inference against the Intermediate Representation** (optimized model) and output inference results.
+## Videos

-## Run Baseline Inference
+<table>
+  <tr>
+    <td>
+<iframe width="320" src="https://www.youtube.com/embed/on8xSSTKCt8" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
+</td>
+<td>
+    <iframe width="320" src="https://www.youtube.com/embed/JBDG2g5hsoM" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
+</td>
+  </tr>
+  <tr>
+    <td><strong>What is the OpenVINO™ toolkit DL Workbench</strong>. <br>Duration: 1:31</td>
+    <td><strong>How to Install the OpenVINO™ toolkit DL Workbench</strong>. <br>Duration: 8:20</td>
+  </tr>
+</table>

-This section illustrates a sample use case of how to infer a pre-trained model from the [Intel® Open Model Zoo](@ref omz_models_group_intel) with an autogenerated noise dataset on a CPU device.
-\htmlonly
-<iframe width="560" height="315" src="https://www.youtube.com/embed/9TRJwEmY0K4" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
-\endhtmlonly
-
-Once you log in to the DL Workbench, create a project, which is a combination of a model, a dataset, and a target device. Follow the steps below:
-
-### Step 1. Open a New Project 
-
-On the the **Active Projects** page, click **Create** to open the **Create Project** page:
-![](./dl_workbench_img/create_configuration.png)
-
-### Step 2. Choose a Pre-trained Model
-
-Click **Import** next to the **Model** table on the **Create Project** page. The **Import Model** page opens. Select the squeezenet1.1 model from the Open Model Zoo and click **Import**.
-![](./dl_workbench_img/import_model_02.png)
-
-### Step 3. Convert the Model into Intermediate Representation
-
-The **Convert Model to IR** tab opens. Keep the FP16 precision and click **Convert**.
-![](./dl_workbench_img/convert_model.png)
-
-You are directed back to the **Create Project** page where you can see the status of the chosen model.
-![](./dl_workbench_img/model_loading.png)
-
-### Step 4. Generate a Noise Dataset
-
-Scroll down to the **Validation Dataset** table. Click **Generate** next to the table heading.
-![](./dl_workbench_img/validation_dataset.png)
-
-The **Autogenerate Dataset** page opens. Click **Generate**.
-![](./dl_workbench_img/generate_dataset.png)
-
-You are directed back to the **Create Project** page where you can see the status of the dataset.
-![](./dl_workbench_img/dataset_loading.png)
-
-### Step 5. Create the Project and Run a Baseline Inference
-
-On the **Create Project** page, select the imported model, CPU target, and the generated dataset. Click **Create**.
-![](./dl_workbench_img/selected.png)
-
-The inference starts and you cannot proceed until it is done.
-![](./dl_workbench_img/inference_banner.png)
-
-Once the inference is complete, the **Projects** page opens automatically. Find your inference job in the **Projects Settings** table indicating all jobs.
-![](./dl_workbench_img/inference_complete.png)
-
-Congratulations, you have performed your first inference in the OpenVINO DL Workbench. Now you can proceed to:
-* [Select the inference](@ref workbench_docs_Workbench_DG_Run_Single_Inference) 
-* [Visualize statistics](@ref workbench_docs_Workbench_DG_Visualize_Model)
-* [Experiment with model optimization](@ref workbench_docs_Workbench_DG_Int_8_Quantization)
-and inference options to profile the configuration
-
-For detailed instructions to create a new project, visit the links below: 
-* [Select a model](@ref workbench_docs_Workbench_DG_Select_Model)
-* [Select a dataset](@ref workbench_docs_Workbench_DG_Select_Datasets)
-* [Select a target and an environment](@ref workbench_docs_Workbench_DG_Select_Environment). This can be your local workstation or a remote target. If you use a remote target, [register the remote machine](@ref workbench_docs_Workbench_DG_Add_Remote_Target) first. 
-
-## Additional Resources
-
-* [OpenVINO™ Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes)
+## See Also
+* [Get Started with DL Workbench](@ref workbench_docs_Workbench_DG_Work_with_Models_and_Sample_Datasets)
+* [DL Workbench Overview](@ref workbench_docs_Workbench_DG_Introduction)
+* [DL Workbench Educational Resources](@ref workbench_docs_Workbench_DG_Additional_Resources)
 * [OpenVINO™ Toolkit Overview](../index.md)
-* [DL Workbench Installation Guide](@ref workbench_docs_Workbench_DG_Install_Workbench)
-* [Inference Engine Developer Guide](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)
-* [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-* [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md)
-* [Overview of OpenVINO™ Toolkit Pre-Trained Models](https://software.intel.com/en-us/openvino-toolkit/documentation/pretrained-models)
--- a/docs/how_tos/how-to-links.md
+++ b/docs/how_tos/how-to-links.md
@ -17,36 +17,30 @@ To learn about what is *custom operation* and how to work with them in the Deep
 
 ## Introducing OpenVINO™ and Computer Vision | IoT Developer Show Season 2 | Intel Software

-[![](https://img.youtube.com/vi/M6Nyh2JDLQs/0.jpg)](https://www.youtube.com/watch?v=M6Nyh2JDLQs)
 <iframe width="560" height="315" src="https://www.youtube.com/embed/M6Nyh2JDLQs" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>


 ## OpenVINO™ Toolkit and Two Hardware Development Kits | IoT Developer Show Season 2 | Intel Software

-[![](https://img.youtube.com/vi/GtJPBYjuyVU/0.jpg)](https://www.youtube.com/watch?v=GtJPBYjuyVU)
 <iframe width="560" height="315" src="https://www.youtube.com/embed/GtJPBYjuyVU" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>


 ## Intel Demonstration of High Performance Vision Deployment - The OpenVINO Toolkit in Action

-[![](https://img.youtube.com/vi/1_iI_4Zgufw/0.jpg)](https://www.youtube.com/watch?v=1_iI_4Zgufw)
 <iframe width="560" height="315" src="https://www.youtube.com/embed/1_iI_4Zgufw" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>


 ## Computer Vision at the Edge with OpenVINO by Krishnakumar Shetti at ODSC_India

-[![](https://img.youtube.com/vi/RfRCrq35LXg/0.jpg)](https://www.youtube.com/watch?v=RfRCrq35LXg)
 <iframe width="560" height="315" src="https://www.youtube.com/embed/RfRCrq35LXg" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>


 ## Model optimizer concept

-[![](https://img.youtube.com/vi/Kl1ptVb7aI8/0.jpg)](https://www.youtube.com/watch?v=Kl1ptVb7aI8)
 <iframe width="560" height="315" src="https://www.youtube.com/embed/Kl1ptVb7aI8" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>

 ## Computer Vision with Intel

-[![](https://img.youtube.com/vi/FZZD4FCvO9c/0.jpg)](https://www.youtube.com/watch?v=FZZD4FCvO9c)
 <iframe width="560" height="315" src="https://www.youtube.com/embed/FZZD4FCvO9c" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>


--- a/docs/img/OpenVINO-diagram.png
+++ b/docs/img/OpenVINO-diagram.png
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00ec72e982f658698b599dd8cbcbd50996a1982c4223bce93b807fa6b0c0c825
-size 233866
+oid sha256:93dd39fa3196a41ff6d1523ef473c5d0c5c584c24074b4b628e80bc09bd80fbe
+size 112452
--- a/docs/img/int8vsfp32.png
+++ b/docs/img/int8vsfp32.png
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e14f77f61f12c96ccf302667d51348a1e03579679155199910e3ebdf7d6adf06
-size 37915
+oid sha256:8cbe1a1c1dc477edc6909a011c1467b375f4f2ba868007befa4b2eccbaa2f2b1
+size 28229
--- a/docs/img/throughput_ovms_3dunet.png
+++ b/docs/img/throughput_ovms_3dunet.png
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5a472a62de53998194bc1471539139807e00cbb75fd9edc605e7ed99b5630af
-size 18336
+oid sha256:d4cbf542d393f920c5731ce973f09836e08aaa35987ef0a19355e3e895179936
+size 17981
--- a/docs/img/throughput_ovms_bertlarge_fp32.png
+++ b/docs/img/throughput_ovms_bertlarge_fp32.png
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f7c58da93fc7966e154bdade48d408401b097f4b0306b7c85aa4256ad72b59d
-size 18118
+oid sha256:c57a6e967b6515a34e0c62c4dd850bebc2e009f75f17ddd0a5d74a1028e84668
+size 19028
--- a/docs/img/throughput_ovms_bertlarge_int8.png
+++ b/docs/img/throughput_ovms_bertlarge_int8.png
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:104d8cd5eac2d1714db85df9cba5c2cfcc113ec54d428cd6e979e75e10473be6
-size 17924
+oid sha256:690e57d94f5c0c0ea31fc04a214b56ab618eac988a72c89b3542f52b4f44d513
+size 19507
--- a/docs/img/throughput_ovms_bertsmall_fp32.png
+++ b/docs/img/throughput_ovms_bertsmall_fp32.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5663cfab7a1611e921fc0b775d946009d6f7a7019e5e9dc6ebe96ccb6c6f1d7f
+size 20145
--- a/docs/img/throughput_ovms_bertsmall_int8.png
+++ b/docs/img/throughput_ovms_bertsmall_int8.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aad18293f64089992862e6a17b5271cc982da89b6b7493516a59252368945c87
+size 20998
--- a/docs/img/throughput_ovms_mobilenet3large_fp32.png
+++ b/docs/img/throughput_ovms_mobilenet3large_fp32.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70daf9e0016e56d8c7bb2f0efe2ac592434962bb8bea95f9120acd7b14d8b5b0
+size 21763
--- a/docs/img/throughput_ovms_mobilenet3small_fp32.png
+++ b/docs/img/throughput_ovms_mobilenet3small_fp32.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3db1f5acdad5880e44965eb71a33ac47aee331ee2f4318e2214786ea5a1e5289
+size 21923
--- a/docs/img/throughput_ovms_resnet50_fp32_bs_1.png
+++ b/docs/img/throughput_ovms_resnet50_fp32_bs_1.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67a7444a934da6e70c77c937fc7a830d1ba2fbde99f3f3260479c39b9b7b1cee
+size 20279
--- a/docs/img/throughput_ovms_resnet50_int8.png
+++ b/docs/img/throughput_ovms_resnet50_int8.png
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32116d6d1acc20d8cb2fa10e290e052e3146ba1290f1c5e4aaf16a85388b6ec6
-size 19387
+oid sha256:5d96e146a1b7d4e48b683de3ed7665c41244ec68cdad94eb79ac497948af9b08
+size 21255
--- a/docs/img/throughput_ovms_ssdmobilenet1_fp32.png
+++ b/docs/img/throughput_ovms_ssdmobilenet1_fp32.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1ab823ea109f908b3e38bf88a7004cfdc374746b5ec4870547fade0f7684035
+size 20084
--- a/docs/img/throughput_ovms_yolo3_fp32.png
+++ b/docs/img/throughput_ovms_yolo3_fp32.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b16674fabd80d73e455c276ef262f3d0a1cf6b00152340dd4e2645330f358432
+size 19341
--- a/docs/img/throughput_ovms_yolo3tiny_fp32.png
+++ b/docs/img/throughput_ovms_yolo3tiny_fp32.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48bc60c34f141a3cb232ae8370468f2861ac36cb926be981ff3153f05d4d5187
+size 19992
--- a/docs/img/throughput_ovms_yolo4_fp32.png
+++ b/docs/img/throughput_ovms_yolo4_fp32.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f472d1fa6058d7ce988e9a2da8b5c6c106d8aa7e90bf2d383d2eaf685a725ab4
+size 19107
--- a/docs/img/workflow_steps.png
+++ b/docs/img/workflow_steps.png
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b630a7deb8bbcf1d5384c351baff7505dc96a1a5d59b5f6786845d549d93d9ab
-size 36881
+oid sha256:5281f26cbaa468dc4cafa4ce2fde35d338fe0f658bbb796abaaf793e951939f6
+size 13943
--- a/docs/index.md
+++ b/docs/index.md
@ -45,7 +45,7 @@ Useful documents for model optimization:
 ### Running and Tuning Inference
 The other core component of OpenVINO™ is the [Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md), which manages the loading and compiling of the optimized neural network model, runs inference operations on input data, and outputs the results. Inference Engine can execute synchronously or asynchronously, and its plugin architecture manages the appropriate compilations for execution on multiple Intel® devices, including both workhorse CPUs and specialized graphics and video processing platforms (see below, Packaging and Deployment).

-You can use OpenVINO™ Tuning Utilities with the Inference Engine to trial and test inference on your model. The Benchmark utility uses an input model to run iterative tests for throughput or latency measures, and the [Cross Check Utility](../inference-engine/tools/cross_check_tool/README.md) compares performance of differently configured inferences. 
+You can use OpenVINO™ Tuning Utilities with the Inference Engine to trial and test inference on your model. The Benchmark utility uses an input model to run iterative tests for throughput or latency measures, and the [Cross Check Utility](../tools/cross_check_tool/README.md) compares performance of differently configured inferences.

 For a full browser-based studio integrating these other key tuning utilities, try the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction).
 ![](img/OV-diagram-step3.png)
@ -81,7 +81,7 @@ The Inference Engine's plug-in architecture can be extended to meet other specia
 * [Deployment Manager Guide](./install_guides/deployment-manager-tool.md)


-## OpenVINO™ Toolkit Components 
+## OpenVINO™ Toolkit Components

 Intel® Distribution of OpenVINO™ toolkit includes the following components:

@ -90,11 +90,11 @@ Intel® Distribution of OpenVINO™ toolkit includes the following components:
 - [Inference Engine Samples](IE_DG/Samples_Overview.md): A set of simple console applications demonstrating how to use the Inference Engine in your applications.
 - [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction): A web-based graphical environment that allows you to easily use various sophisticated OpenVINO™ toolkit components.
 - [Post-training Optimization Tool](@ref pot_README): A tool to calibrate a model and then execute it in the INT8 precision.
- Additional Tools: A set of tools to work with your models including [Benchmark App](../inference-engine/tools/benchmark_tool/README.md), [Cross Check Tool](../inference-engine/tools/cross_check_tool/README.md), [Compile tool](../inference-engine/tools/compile_tool/README.md).
- [Open Model Zoo](@ref omz_models_group_intel)     
+- Additional Tools: A set of tools to work with your models including [Benchmark App](../tools/benchmark_tool/README.md), [Cross Check Tool](../tools/cross_check_tool/README.md), [Compile tool](../tools/compile_tool/README.md).
+- [Open Model Zoo](@ref omz_models_group_intel)
    - [Demos](@ref omz_demos): Console applications that provide robust application templates to help you implement specific deep learning scenarios.
    - Additional Tools: A set of tools to work with your models including [Accuracy Checker Utility](@ref omz_tools_accuracy_checker) and [Model Downloader](@ref omz_tools_downloader).
-    - [Documentation for Pretrained Models](@ref omz_models_group_intel): Documentation for pre-trained models that are available in the [Open Model Zoo repository](https://github.com/openvinotoolkit/open_model_zoo).
+    - [Documentation for Pre-trained Models](@ref omz_models_group_intel): Documentation for pre-trained models that are available in the [Open Model Zoo repository](https://github.com/openvinotoolkit/open_model_zoo).
 - Deep Learning Streamer (DL Streamer): Streaming analytics framework, based on GStreamer, for constructing graphs of media analytics components. DL Streamer can be installed by the Intel® Distribution of OpenVINO™ toolkit installer. Its open-source version is available on [GitHub](https://github.com/openvinotoolkit/dlstreamer_gst). For the DL Streamer documentation, see:
    - [DL Streamer Samples](@ref gst_samples_README)
    - [API Reference](https://openvinotoolkit.github.io/dlstreamer_gst/)
--- a/docs/install_guides/installing-openvino-conda.md
+++ b/docs/install_guides/installing-openvino-conda.md
@ -56,7 +56,7 @@ The **runtime package** includes the following components installed by default:
   ```sh
   conda install openvino-ie4py-ubuntu20 -c intel
   ```
- - Ubuntu* 18.04 
+ - Ubuntu* 18.04
   ```sh
   conda install openvino-ie4py-ubuntu18 -c intel
   ```
--- a/docs/install_guides/installing-openvino-images.md
+++ b/docs/install_guides/installing-openvino-images.md
@ -3,7 +3,7 @@
 You may install Intel® Distribution of OpenVINO™ toolkit from images and repositories using the **Install OpenVINO™** button above or directly from the [Get the Intel® Distribution of OpenVINO™ Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit/download.html) page. Use the documentation below if you need additional support: 

 * [Docker](installing-openvino-docker-linux.md)
-* [Docker with DL Workbench](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub)
+* [Docker with DL Workbench](@ref workbench_docs_Workbench_DG_Run_Locally)
 * [APT](installing-openvino-apt.md)
 * [YUM](installing-openvino-yum.md)
 * [Anaconda Cloud](installing-openvino-conda.md)
--- a/docs/install_guides/installing-openvino-linux.md
+++ b/docs/install_guides/installing-openvino-linux.md
@ -5,7 +5,14 @@
 > - If you are using Intel® Distribution of OpenVINO™ toolkit on Windows\* OS, see the [Installation Guide for Windows*](installing-openvino-windows.md).
 > - CentOS and Yocto installations will require some modifications that are not covered in this guide.
 > - An internet connection is required to follow the steps in this guide.
-> - [Intel® System Studio](https://software.intel.com/en-us/system-studio) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019).
+
+
+> **TIP**: If you want to [quick start with OpenVINO™ toolkit](@ref 
+> openvino_docs_get_started_get_started_dl_workbench), you can use 
+> the OpenVINO™ [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench). DL Workbench is the OpenVINO™ toolkit UI 
+> that enables you to import a
+> model, analyze its performance and accuracy, visualize the outputs, optimize and prepare the model for deployment
+> on various Intel® platforms. 

 ## Introduction

@ -13,7 +20,7 @@ OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applicatio

 The Intel® Distribution of OpenVINO™ toolkit for Linux\*:
 - Enables CNN-based deep learning inference on the edge
- Supports heterogeneous execution across Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
+- Supports heterogeneous execution across Intel® CPU, Intel® GPU, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
 - Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels
 - Includes optimized calls for computer vision standards including OpenCV\* and OpenCL™

@ -31,19 +38,6 @@ The Intel® Distribution of OpenVINO™ toolkit for Linux\*:
 | [Documentation for Pre-Trained Models ](@ref omz_models_group_intel)                                   | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/openvinotoolkit/open_model_zoo).  |
 | Deep Learning Streamer (DL Streamer)   | Streaming analytics framework, based on GStreamer, for constructing graphs of media analytics components. For the DL Streamer documentation, see [DL Streamer Samples](@ref gst_samples_README), [API Reference](https://openvinotoolkit.github.io/dlstreamer_gst/), [Elements](https://github.com/openvinotoolkit/dlstreamer_gst/wiki/Elements), [Tutorial](https://github.com/openvinotoolkit/dlstreamer_gst/wiki/DL-Streamer-Tutorial). |

-**Could Be Optionally Installed**
-
-[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models on various Intel® architecture
-configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components:
-* [Model Downloader](@ref omz_tools_downloader)
-* [Intel® Open Model Zoo](@ref omz_models_group_intel)
-* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-* [Post-training Optimization Tool](@ref pot_README)
-* [Accuracy Checker](@ref omz_tools_accuracy_checker)
-* [Benchmark Tool](../../inference-engine/samples/benchmark_app/README.md)
-
-Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started.
-
 ## System Requirements

 **Hardware**
@ -53,6 +47,7 @@ Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_I
 * Intel® Xeon® Scalable processor (formerly Skylake and Cascade Lake)
 * Intel Atom® processor with support for Intel® Streaming SIMD Extensions 4.1 (Intel® SSE4.1)
 * Intel Pentium® processor N4200/5, N3350/5, or N3450/5 with Intel® HD Graphics
+* Intel® Iris® Xe MAX Graphics
 * Intel® Neural Compute Stick 2
 * Intel® Vision Accelerator Design with Intel® Movidius™ VPUs

@ -69,6 +64,10 @@ Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_I
 - Ubuntu 20.04.0 long-term support (LTS), 64-bit
 - CentOS 7.6, 64-bit (for target only)
 - Yocto Project v3.0, 64-bit (for target only and requires modifications)
+- For deployment scenarios on Red Hat* Enterprise Linux* 8.2 (64 bit), you can use the of Intel® Distribution of OpenVINO™ toolkit run-time package that includes the Inference Engine core libraries, nGraph, OpenCV, Python bindings, CPU and GPU plugins. The package is available as: 
+   - [Downloadable archive](https://storage.openvinotoolkit.org/repositories/openvino/packages/2021.3/l_openvino_toolkit_runtime_rhel8_p_2021.3.394.tgz)
+   - [PyPi package](https://pypi.org/project/openvino/)
+   - [Docker image](https://catalog.redhat.com/software/containers/intel/openvino-runtime/606ff4d7ecb5241699188fb3)

 ## Overview

@ -285,20 +284,22 @@ The steps in this section are required only if you want to enable the toolkit co
 cd /opt/intel/openvino_2021/install_dependencies/
 ```

-2. Install the **Intel® Graphics Compute Runtime for OpenCL™** driver components required to use the GPU plugin and write custom layers for Intel® Integrated Graphics. The drivers are not included in the package, to install it, make sure you have the internet connection and run the installation script:
-```sh
-sudo -E ./install_NEO_OCL_driver.sh
-```
-   The script compares the driver version on the system to the current version. If the driver version on the system is higher or equal to the current version, the script does 
-not install a new driver. If the version of the driver is lower than the current version, the script uninstalls the lower and installs the current version with your permission:
+2. Install the **Intel® Graphics Compute Runtime for OpenCL™** driver components required to use the GPU plugin and write custom layers for Intel® Integrated Graphics. The drivers are not included in the package and must be installed separately.
+   > **NOTE**: To use the **Intel® Iris® Xe MAX Graphics**, see the [Intel® Iris® Xe MAX Graphics with Linux*](https://dgpu-docs.intel.com/devices/iris-xe-max-graphics/index.html) page for driver installation instructions.
+
+   To install the drivers, make sure you have the internet connection and run the installation script:
+   ```sh
+   sudo -E ./install_NEO_OCL_driver.sh
+   ```
+   The script compares the driver version on the system to the current version. If the driver version on the system is higher or equal to the current version, the script does not install a new driver. If the version of the driver is lower than the current version, the script uninstalls the lower and installs the current version with your permission:
   ![](../img/NEO_check_agreement.png) 
   Higher hardware versions require a higher driver version, namely 20.35 instead of 19.41. If the script fails to uninstall the driver, uninstall it manually. During the script execution, you may see the following command line output:  
 ```sh
 Add OpenCL user to video group    
 ```
-   Ignore this suggestion and continue.<br>You can also find the most recent version of the driver, installation procedure and other information in the [https://github.com/intel/compute-runtime/](https://github.com/intel/compute-runtime/) repository.
+   Ignore this suggestion and continue.<br>You can also find the most recent version of the driver, installation procedure and other information on the [Intel® software for general purpose GPU capabilities](https://dgpu-docs.intel.com/index.html) site.

-4. **Optional** Install header files to allow compiling a new code. You can find the header files at [Khronos OpenCL™ API Headers](https://github.com/KhronosGroup/OpenCL-Headers.git).
+3. **Optional** Install header files to allow compiling a new code. You can find the header files at [Khronos OpenCL™ API Headers](https://github.com/KhronosGroup/OpenCL-Headers.git).

 You've completed all required configuration steps to perform inference on processor graphics. 
 Proceed to the <a href="#get-started">Get Started</a> to get started with running code samples and demo applications.
--- a/docs/install_guides/installing-openvino-macos.md
+++ b/docs/install_guides/installing-openvino-macos.md
@ -4,12 +4,17 @@
 > - The Intel® Distribution of OpenVINO™ is supported on macOS\* 10.15.x versions.
 > - An internet connection is required to follow the steps in this guide. If you have access to the Internet through the proxy server only, please make sure that it is configured in your OS environment.

+> **TIP**: If you want to [quick start with OpenVINO™ toolkit](@ref 
+> openvino_docs_get_started_get_started_dl_workbench), you can use 
+> the OpenVINO™ [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench). DL Workbench is the OpenVINO™ toolkit UI 
+> that enables you to import a
+> model, analyze its performance and accuracy, visualize the outputs, optimize and prepare the model for deployment
+> on various Intel® platforms. 
+
 ## Introduction

 The Intel® Distribution of OpenVINO™ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNN), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance.

-The Intel® Distribution of OpenVINO™ toolkit for macOS* includes the Inference Engine, OpenCV* libraries and Model Optimizer tool to deploy applications for accelerated inference on Intel® CPUs and Intel® Neural Compute Stick 2.  
-
 The Intel® Distribution of OpenVINO™ toolkit for macOS*:

 - Enables CNN-based deep learning inference on the edge  
@ -31,18 +36,6 @@ The following components are installed by default:
 | Additional Tools                                   | A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader) and other  |
 | [Documentation for Pre-Trained Models ](@ref omz_models_group_intel)                                   | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/openvinotoolkit/open_model_zoo)  |

-**Could Be Optionally Installed**
-
-[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models on various Intel® architecture
-configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components:
-* [Model Downloader](@ref omz_tools_downloader)
-* [Intel® Open Model Zoo](@ref omz_models_group_intel)
-* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-* [Post-training Optimization Tool](@ref pot_README)
-* [Accuracy Checker](@ref omz_tools_accuracy_checker)
-* [Benchmark Tool](../../inference-engine/samples/benchmark_app/README.md)
-
-Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started.

 ## Development and Target Platform

@ -59,7 +52,7 @@ The development and target platforms have the same requirements, but you can sel

 **Software Requirements**

-* CMake 3.10 or higher
+* CMake 3.13 or higher
 	+ [Install](https://cmake.org/download/) (choose "macOS 10.13 or later")
 	+ Add `/Applications/CMake.app/Contents/bin` to path (for default install) 
 * Python 3.6 - 3.7
@ -114,7 +107,7 @@ The disk image is mounted to `/Volumes/m_openvino_toolkit_p_<version>` and autom

 5. Click **Next** and follow the instructions on your screen.

-6. If you are missing external dependencies, you will see a warning screen. Take note of any dependencies you are missing. After installing the Intel® Distribution of OpenVINO™ toolkit core components, you will need to install the missing dependencies. For example, the screen example below indicates you are missing two dependencies:
+6. If you are missing external dependencies, you will see a warning screen. Take note of any dependencies you are missing. After installing the Intel® Distribution of OpenVINO™ toolkit core components, you will need to install the missing dependencies. For example, the screen example below indicates you are missing a dependency:
   ![](../img/openvino-install-macos-02.png)

 7. Click **Next**.
@ -124,7 +117,7 @@ The disk image is mounted to `/Volumes/m_openvino_toolkit_p_<version>` and autom
   By default, the Intel® Distribution of OpenVINO™ is installed to the following directory, referred to as `<INSTALL_DIR>`:

   * For root or administrator: `/opt/intel/openvino_<version>/`
-   * For regular users: `/home/<USER>/intel/openvino_<version>/`
+   * For regular users: `/home/<USER>/intel/openvino_<version>/` 

   For simplicity, a symbolic link to the latest installation is also created: `/home/<user>/intel/openvino_2021/`.
 9. If needed, click **Customize** to change the installation directory or the components you want to install:
@ -279,7 +272,7 @@ Now you are ready to get started. To continue, see the following pages:

 Follow the steps below to uninstall the Intel® Distribution of OpenVINO™ Toolkit from your system:

-1. From the the installation directory (by default, `/opt/intel/openvino_2021`), locate and open `openvino_toolkit_uninstaller.app`.
+1. From the installation directory (by default, `/opt/intel/openvino_2021`), locate and open `openvino_toolkit_uninstaller.app`.
 2. Follow the uninstallation wizard instructions.
 3. When uninstallation is complete, click **Finish**. 

--- a/docs/install_guides/installing-openvino-pip.md
+++ b/docs/install_guides/installing-openvino-pip.md
@ -1,15 +1,15 @@
 # Install Intel® Distribution of OpenVINO™ Toolkit from PyPI Repository {#openvino_docs_install_guides_installing_openvino_pip}

-OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applications and solutions that solve a variety of tasks including emulation of human vision, automatic speech recognition, natural language processing, recommendation systems, and many others. Based on latest generations of artificial neural networks, including Convolutional Neural Networks (CNNs), recurrent and attention-based networks, the toolkit extends computer vision and non-vision workloads across Intel® hardware, maximizing performance. It accelerates applications with high-performance, AI and deep learning inference deployed from edge to cloud.
+OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applications and solutions that solve a variety of tasks including emulation of human vision, automatic speech recognition, natural language processing, recommendation systems, and many others. Based on the latest generations of artificial neural networks, including Convolutional Neural Networks (CNNs), recurrent and attention-based networks, the toolkit extends computer vision and non-vision workloads across Intel® hardware, maximizing performance. It accelerates applications with high-performance AI and deep learning inference deployed from edge to cloud.

 Intel® Distribution of OpenVINO™ Toolkit provides the following packages available for installation through the PyPI repository:

-* Runtime package with the Inference Engine inside: [https://pypi.org/project/openvino/](https://pypi.org/project/openvino/).
-* Developer package that includes the runtime package as a dependency, Model Optimizer and other developer tools: [https://pypi.org/project/openvino-dev](https://pypi.org/project/openvino-dev).
+* Runtime package with the Inference Engine inside: [https://pypi.org/project/openvino/](https://pypi.org/project/openvino/)
+* Developer package that includes the runtime package as a dependency, Model Optimizer, Accuracy Checker and Post-Training Optimization Tool: [https://pypi.org/project/openvino-dev](https://pypi.org/project/openvino-dev)

 ## Additional Resources

- [Intel® Distribution of OpenVINO™ toolkit](https://software.intel.com/en-us/openvino-toolkit).
- [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
- [Inference Engine Developer Guide](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md).
- [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md).
+- [Intel® Distribution of OpenVINO™ toolkit](https://software.intel.com/en-us/openvino-toolkit)
+- [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+- [Inference Engine Developer Guide](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)
+- [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md)
--- a/docs/install_guides/installing-openvino-windows.md
+++ b/docs/install_guides/installing-openvino-windows.md
@ -1,8 +1,14 @@
 # Install Intel® Distribution of OpenVINO™ toolkit for Windows* 10 {#openvino_docs_install_guides_installing_openvino_windows}

-> **NOTES**:
+> **NOTE**:
 > - This guide applies to Microsoft Windows\* 10 64-bit. For Linux* OS information and instructions, see the [Installation Guide for Linux](installing-openvino-linux.md).
-> - [Intel® System Studio](https://software.intel.com/en-us/system-studio) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019).
+
+> **TIP**: If you want to [quick start with OpenVINO™ toolkit](@ref 
+> openvino_docs_get_started_get_started_dl_workbench), you can use 
+> the OpenVINO™ [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench). DL Workbench is the OpenVINO™ toolkit UI 
+> that enables you to import a
+> model, analyze its performance and accuracy, visualize the outputs, optimize and prepare the model for deployment
+> on various Intel® platforms. 

 ## Introduction

@ -46,7 +52,7 @@ For more information, see the online [Intel® Distribution of OpenVINO™ toolk
 The Intel® Distribution of OpenVINO™ toolkit for Windows\* 10 OS:

 - Enables CNN-based deep learning inference on the edge
- Supports heterogeneous execution across Intel® CPU, Intel® Processor Graphics (GPU), Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
+- Supports heterogeneous execution across Intel® CPU, Intel® GPU, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
 - Speeds time-to-market through an easy-to-use library of computer vision functions and pre-optimized kernels
 - Includes optimized calls for computer vision standards including OpenCV\* and OpenCL™

@ -64,18 +70,6 @@ The following components are installed by default:
 | Additional Tools                                   | A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader) and other  |
 | [Documentation for Pre-Trained Models ](@ref omz_models_group_intel)                                   | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/openvinotoolkit/open_model_zoo)  |

-**Could Be Optionally Installed**
-
-[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models on various Intel® architecture
-configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components:
-* [Model Downloader](@ref omz_tools_downloader)
-* [Intel® Open Model Zoo](@ref omz_models_group_intel)
-* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-* [Post-training Optimization Tool](@ref pot_README)
-* [Accuracy Checker](@ref omz_tools_accuracy_checker)
-* [Benchmark Tool](../../inference-engine/samples/benchmark_app/README.md)
-
-Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started.

 ### System Requirements

@ -86,6 +80,7 @@ Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_I
 * Intel® Xeon® Scalable processor (formerly Skylake and Cascade Lake)
 * Intel Atom® processor with support for Intel® Streaming SIMD Extensions 4.1 (Intel® SSE4.1)
 * Intel Pentium® processor N4200/5, N3350/5, or N3450/5 with Intel® HD Graphics
+* Intel® Iris® Xe MAX Graphics
 * Intel® Neural Compute Stick 2
 * Intel® Vision Accelerator Design with Intel® Movidius™ VPUs

@ -101,9 +96,8 @@ Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_I
 - Microsoft Windows\* 10 64-bit

 **Software**
- [Microsoft Visual Studio* with C++ **2019 or 2017** with MSBuild](http://visualstudio.microsoft.com/downloads/)
- [CMake **3.10 or higher** 64-bit](https://cmake.org/download/)
-   > **NOTE**: If you want to use Microsoft Visual Studio 2019, you are required to install CMake 3.14.
+- [Microsoft Visual Studio* with C++ **2019 or 2017** with MSBuild](http://visualstudio.microsoft.com/downloads/). Make sure that C++ CMake tools for Windows is [enabled](https://docs.microsoft.com/en-us/cpp/build/cmake-projects-in-visual-studio?view=msvc-160#:~:text=The%20Visual%20C%2B%2B%20Tools%20for,Visual%20Studio%20generators%20are%20supported).
+- [CMake **3.10 or higher** 64-bit](https://cmake.org/download/). If you want to use Microsoft Visual Studio 2019, you are required to install CMake 3.14.
 - [Python **3.6** - **3.8** 64-bit](https://www.python.org/downloads/windows/)

 ## Installation Steps
@ -276,7 +270,7 @@ To perform inference on Intel® Vision Accelerator Design with Intel® Movidius
  1. Download and install <a href="https://www.microsoft.com/en-us/download/details.aspx?id=48145">Visual C++ Redistributable for Visual Studio 2017</a>
  2. Check with a support engineer if your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs card requires SMBUS connection to PCIe slot (most unlikely). Install the SMBUS driver only if confirmed (by default, it's not required):
      1. Go to the `<INSTALL_DIR>\deployment_tools\inference-engine\external\hddl\drivers\SMBusDriver` directory, where `<INSTALL_DIR>` is the directory in which the Intel Distribution of OpenVINO toolkit is installed.
-      2. Right click on the `hddlsmbus.inf` file and choose **Install** from the pop up menu.
+      2. Right click on the `hddlsmbus.inf` file and choose **Install** from the pop-up menu.

 You are done installing your device driver and are ready to use your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs.

@ -338,6 +332,7 @@ To learn more about converting deep learning models, go to:
 - [Convert Your TensorFlow* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md)
 - [Convert Your MXNet* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md)
 - [Convert Your ONNX* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md)
+- [Convert Your Kaldi* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md)

 ## Additional Resources

--- a/docs/install_guides/installing-openvino-yum.md
+++ b/docs/install_guides/installing-openvino-yum.md
@ -110,8 +110,7 @@ To install the full runtime version of the OpenVINO package:
 ```sh
 sudo yum install intel-openvino-runtime-centos7-<VERSION>.<UPDATE>.<BUILD_NUM>
 ```
-
-#### Examples
+For example:

 ```sh
 sudo yum install intel-openvino-runtime-centos7-2021.3.394
--- a/docs/install_guides/pypi-openvino-dev.md
+++ b/docs/install_guides/pypi-openvino-dev.md
@ -11,7 +11,7 @@ license terms for third party or open source software included in or with the So

 OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applications and solutions that solve a variety of tasks including emulation of human vision, automatic speech recognition, natural language processing, recommendation systems, and many others. Based on latest generations of artificial neural networks, including Convolutional Neural Networks (CNNs), recurrent and attention-based networks, the toolkit extends computer vision and non-vision workloads across Intel® hardware, maximizing performance. It accelerates applications with high-performance, AI and deep learning inference deployed from edge to cloud.

-The **developer package** includes the following components installed by default:
+**The developer package includes the following components installed by default:**

 | Component        | Console Script                                                                   | Description                                                                                                                                                                                                                                                                                                   |  
 |------------------|---------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@ -21,14 +21,14 @@ The **developer package** includes the following components installed by default
 | [Post-Training Optimization Tool](https://docs.openvinotoolkit.org/latest/pot_README.html)| `pot` |**Post-Training Optimization Tool** allows you to optimize trained models with advanced capabilities, such as quantization and low-precision optimizations, without the need to retrain or fine-tune models. Optimizations are also available through the [API](https://docs.openvinotoolkit.org/latest/pot_compression_api_README.html).                                            |
 | [Model Downloader and other Open Model Zoo tools](https://docs.openvinotoolkit.org/latest/omz_tools_downloader.html)| `omz_downloader` <br> `omz_converter` <br> `omz_quantizer` <br> `omz_info_dumper`| **Model Downloader** is a tool for getting access to the collection of high-quality and extremely fast pre-trained deep learning [public](https://docs.openvinotoolkit.org/latest/omz_models_group_public.html) and [Intel](https://docs.openvinotoolkit.org/latest/omz_models_group_intel.html)-trained models. These free pre-trained models can be used to speed up the development and production deployment process without training your own models. The tool downloads model files from online sources and, if necessary, patches them to make them more usable with Model Optimizer. A number of additional tools are also provided to automate the process of working with downloaded models:<br> **Model Converter** is a tool for converting Open Model Zoo models that are stored in an original deep learning framework format into the Inference Engine Intermediate Representation (IR) using Model Optimizer. <br> **Model Quantizer** is a tool for automatic quantization of full-precision models in the IR format into low-precision versions using the Post-Training Optimization Tool. <br> **Model Information Dumper** is a helper utility for dumping information about the models to a stable, machine-readable format.

+> **NOTE**: The developer package also installs the OpenVINO™ runtime package as a dependency.

-**Developer package** also provides the **runtime package** installed as a dependency. The runtime package includes the following components:
+**The runtime package installs the following components:**

 | Component                                                                                           | Description                                                                                                                                                                                                                                                                                                   |  
 |-----------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | [Inference Engine](https://pypi.org/project/openvino)               | This is the engine that runs the deep learning model. It includes a set of libraries for an easy inference integration into your applications.                                                                                                                                                                                                |

-
 ## System Requirements
 The complete list of supported hardware is available in the [Release Notes](https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html#inpage-nav-8).

@ -40,7 +40,7 @@ The table below lists the supported operating systems and Python* versions requi
 |   Ubuntu* 20.04 long-term support (LTS), 64-bit              | 3.6, 3.7, 3.8                                       |
 |   Red Hat* Enterprise Linux* 8, 64-bit                       | 3.6, 3.8                                            |
 |   CentOS* 7, 64-bit                                          | 3.6, 3.7, 3.8                                       |
-|   macOS* 10.15.x versions                                    | 3.6, 3.7, 3.8                                       |
+|   macOS* 10.15.x                                             | 3.6, 3.7, 3.8                                       |
 |   Windows 10*, 64-bit                                        | 3.6, 3.7, 3.8                                       |

 > **NOTE**: This package can be installed on other versions of macOS, Linux and Windows, but only the specific versions above are fully validated.
@ -112,7 +112,7 @@ For example, to install and configure the components for working with TensorFlow
   ```sh
   python -c "from openvino.inference_engine import IECore"
   ```
-   You will not see any error messages if installation finished successfully.
+   If installation was successful, you will not see any error messages (no console output).

 ## Troubleshooting

--- a/docs/install_guides/pypi-openvino-rt.md
+++ b/docs/install_guides/pypi-openvino-rt.md
@ -86,7 +86,7 @@ Run the command below:
 python -c "from openvino.inference_engine import IECore"
 ```
   
-You will not see any error messages if installation finished successfully.
+If installation was successful, you will not see any error messages (no console output).

 ## Troubleshooting

--- a/docs/ops/movement/Slice_8.md
+++ b/docs/ops/movement/Slice_8.md
@ -0,0 +1,419 @@
+## Slice <a name="Slice"></a> {#openvino_docs_ops_movement_Slice_8}
+
+**Versioned name**: *Slice-8*
+
+**Category**: *Data movement*
+
+**Short description**: *Slice* operation extracts a slice of the input tensor.
+
+**Detailed Description**: *Slice* operation selects a region of values from the `data` tensor.
+Selected values start at indexes provided in the `start` input (inclusively) and end
+at indexes provides in `stop` input (exclusively).
+
+The `step` input allows subsampling of `data`, selecting every *n*-th element,
+where `n` is equal to `step` element for corresponding axis.
+Negative `step` value indicates slicing backwards, so the sequence along the corresponding axis is reversed in the output tensor.
+To select all values contiguously set `step` to `1` for each axis.
+
+The optional `axes` input allows specifying slice indexes only on selected axes.
+Other axes will not be affected and will be output in full.
+
+The rules follow python language slicing `data[start:stop:step]`.
+
+**Attributes**: *Slice* operation has no attributes.
+
+**Inputs**
+
+* **1**: `data` - tensor (to be sliced) of type *T* and shape rank greater or equal to 1. **Required.**
+
+* **2**: `start` - 1D tensor of type *T_IND*. Indices corresponding to axes in `data`.
+  Defines the starting coordinate of the slice in the `data` tensor.
+  A negative index value represents counting elements from the end of that dimension.
+  A value larger than the size of a dimension is silently clamped. **Required.**
+
+* **3**: `stop` - 1D, type *T_IND*, similar to `start`.
+  Defines the coordinate of the opposite vertex of the slice, or where the slice ends.
+  Stop indexes are exclusive, which means values lying on the ending edge are
+  not included in the output slice.
+  To slice to the end of a dimension of unknown size `INT_MAX`
+  may be used (or `INT_MIN` if slicing backwards). **Required.**
+
+* **4**: `step` - 1D tensor of type *T_IND* and the same shape as `start` and `stop`.
+  Integer value that specifies the increment between each index used in slicing.
+  Value cannot be `0`, negative value indicates slicing backwards. **Required.**
+
+* **5**: `axes` - 1D tensor of type *T_AXIS*.
+  Optional 1D tensor indicating which dimensions the values in `start` and `stop` apply to.
+  Negative value means counting dimensions from the end. The range is `[-r, r - 1]`, where `r` is the rank of the `data` input tensor.
+  Values are required to be unique. If a particular axis is unspecified, it will be output in full and not sliced.
+  Default value: `[0, 1, 2, ..., start.shape[0] - 1]`. **Optional.**
+
+Number of elements in `start`, `stop`, `step`, and `axes` inputs are required to be equal.
+
+**Outputs**
+
+* **1**: Tensor of type *T* with values of the selected slice. The shape of the output tensor has the same rank as the shape of `data` input and reduced dimensions according to the values specified by `start`, `stop`, and `step` inputs.
+
+**Types**
+
+* *T*: any arbitrary supported type.
+* *T_IND*: any supported integer type.
+* *T_AXIS*: any supported integer type.
+
+
+**Examples**
+
+*Example 1: basic slicing*
+
+```xml
+<layer id="1" type="Slice" ...>
+    <input>
+        <port id="0">       <!-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] -->
+          <dim>10</dim>
+        </port>
+        <port id="1">       <!-- start: [1] -->
+          <dim>1</dim>
+        </port>
+        <port id="2">       <!-- stop: [8] -->
+          <dim>1</dim>
+        </port>
+        <port id="3">       <!-- step: [1] -->
+          <dim>1</dim>
+        </port>
+        <port id="4">       <!-- axes: [0] -->
+          <dim>1</dim>
+        </port>
+    </input>
+    <output>
+        <port id="5">       <!-- output: [1, 2, 3, 4, 5, 6, 7] -->
+            <dim>7</dim>
+        </port>
+    </output>
+</layer>
+```
+
+*Example 2: basic slicing, `axes` default*
+
+```xml
+<layer id="1" type="Slice" ...>
+    <input>
+        <port id="0">       <!-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] -->
+          <dim>10</dim>
+        </port>
+        <port id="1">       <!-- start: [1] -->
+          <dim>1</dim>
+        </port>
+        <port id="2">       <!-- stop: [8] -->
+          <dim>1</dim>
+        </port>
+        <port id="3">       <!-- step: [1] -->
+          <dim>1</dim>
+        </port>
+    </input>
+    <output>
+        <port id="4">       <!-- output: [1, 2, 3, 4, 5, 6, 7] -->
+            <dim>7</dim>
+        </port>
+    </output>
+</layer>
+```
+
+*Example 3: basic slicing, `step: [2]`*
+
+```xml
+<layer id="1" type="Slice" ...>
+    <input>
+        <port id="0">       <!-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] -->
+          <dim>10</dim>
+        </port>
+        <port id="1">       <!-- start: [1] -->
+          <dim>1</dim>
+        </port>
+        <port id="2">       <!-- stop: [8] -->
+          <dim>1</dim>
+        </port>
+        <port id="3">       <!-- step: [2] -->
+          <dim>1</dim>
+        </port>
+        <port id="4">       <!-- axes: [0] -->
+          <dim>1</dim>
+        </port>
+    </input>
+    <output>
+        <port id="5">       <!-- output: [1, 3, 5, 7] -->
+            <dim>4</dim>
+        </port>
+    </output>
+</layer>
+```
+
+*Example 4: `start` and `stop` out of the dimension size, `step: [1]`*
+
+```xml
+<layer id="1" type="Slice" ...>
+    <input>
+        <port id="0">       <!-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] -->
+          <dim>10</dim>
+        </port>
+        <port id="1">       <!-- start: [-100] -->
+          <dim>1</dim>
+        </port>
+        <port id="2">       <!-- stop: [100] -->
+          <dim>1</dim>
+        </port>
+        <port id="3">       <!-- step: [1] -->
+          <dim>1</dim>
+        </port>
+        <port id="4">       <!-- axes: [0] -->
+          <dim>1</dim>
+        </port>
+    </input>
+    <output>
+        <port id="5">       <!-- output: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] -->
+            <dim>10</dim>
+        </port>
+    </output>
+</layer>
+```
+
+*Example 5: slicing backward all elements, `step: [-1]`, `stop: [-11]`*
+
+```xml
+<layer id="1" type="Slice" ...>
+    <input>
+        <port id="0">       <!-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] -->
+          <dim>10</dim>
+        </port>
+        <port id="1">       <!-- start: [9] -->
+          <dim>1</dim>
+        </port>
+        <port id="2">       <!-- stop: [-11] -->
+          <dim>1</dim>
+        </port>
+        <port id="3">       <!-- step: [-1] -->
+          <dim>1</dim>
+        </port>
+        <port id="4">       <!-- axes: [0] -->
+          <dim>1</dim>
+        </port>
+    </input>
+    <output>
+        <port id="5">       <!-- output: [9, 8, 7, 6, 5, 4, 3, 2, 1, 0] -->
+            <dim>10</dim>
+        </port>
+    </output>
+</layer>
+```
+
+*Example 6: slicing backward, `step: [-1]`, `stop: [0]`*
+
+```xml
+<layer id="1" type="Slice" ...>
+    <input>
+        <port id="0">       <!-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] -->
+          <dim>10</dim>
+        </port>
+        <port id="1">       <!-- start: [9] -->
+          <dim>1</dim>
+        </port>
+        <port id="2">       <!-- stop: [0] -->
+          <dim>1</dim>
+        </port>
+        <port id="3">       <!-- step: [-1] -->
+          <dim>1</dim>
+        </port>
+        <port id="4">       <!-- axes: [0] -->
+          <dim>1</dim>
+        </port>
+    </input>
+    <output>
+        <port id="5">       <!-- output: [9, 8, 7, 6, 5, 4, 3, 2, 1] -->
+            <dim>9</dim>
+        </port>
+    </output>
+</layer>
+```
+
+*Example 7: slicing backward, `step: [-1]`, `stop: [-10]`*
+
+```xml
+<layer id="1" type="Slice" ...>
+    <input>
+        <port id="0">       <!-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] -->
+          <dim>10</dim>
+        </port>
+        <port id="1">       <!-- start: [9] -->
+          <dim>1</dim>
+        </port>
+        <port id="2">       <!-- stop: [-10] -->
+          <dim>1</dim>
+        </port>
+        <port id="3">       <!-- step: [-1] -->
+          <dim>1</dim>
+        </port>
+        <port id="4">       <!-- axes: [0] -->
+          <dim>1</dim>
+        </port>
+    </input>
+    <output>
+        <port id="5">       <!-- output: [9, 8, 7, 6, 5, 4, 3, 2, 1] -->
+            <dim>9</dim>
+        </port>
+    </output>
+</layer>
+```
+
+*Example 8: slicing backward, `step: [-2]`*
+
+```xml
+<layer id="1" type="Slice" ...>
+    <input>
+        <port id="0">       <!-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] -->
+          <dim>10</dim>
+        </port>
+        <port id="1">       <!-- start: [9] -->
+          <dim>1</dim>
+        </port>
+        <port id="2">       <!-- stop: [-11] -->
+          <dim>1</dim>
+        </port>
+        <port id="3">       <!-- step: [-2] -->
+          <dim>1</dim>
+        </port>
+        <port id="4">       <!-- axes: [0] -->
+          <dim>1</dim>
+        </port>
+    </input>
+    <output>
+        <port id="5">       <!-- output: [9, 7, 5, 3, 1] -->
+            <dim>5</dim>
+        </port>
+    </output>
+</layer>
+```
+
+*Example 9: `start` and `stop` out of the dimension size, slicing backward*
+
+```xml
+<layer id="1" type="Slice" ...>
+    <input>
+        <port id="0">       <!-- data: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] -->
+          <dim>10</dim>
+        </port>
+        <port id="1">       <!-- start: [100] -->
+          <dim>1</dim>
+        </port>
+        <port id="2">       <!-- stop: [-100] -->
+          <dim>1</dim>
+        </port>
+        <port id="3">       <!-- step: [-1] -->
+          <dim>1</dim>
+        </port>
+        <port id="4">       <!-- axes: [0] -->
+          <dim>1</dim>
+        </port>
+    </input>
+    <output>
+        <port id="5">       <!-- output: [9, 8, 7, 6, 5, 4, 3, 2, 1, 0] -->
+            <dim>10</dim>
+        </port>
+    </output>
+</layer>
+```
+
+*Example 10: slicing 2D tensor, all axes specified*
+
+```xml
+<layer id="1" type="Slice" ...>
+    <input>
+        <port id="0">       <!-- data: data: [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] -->
+          <dim>2</dim>
+          <dim>5</dim>
+        </port>
+        <port id="1">       <!-- start: [0, 1] -->
+          <dim>2</dim>
+        </port>
+        <port id="2">       <!-- stop: [2, 4] -->
+          <dim>2</dim>
+        </port>
+        <port id="3">       <!-- step: [1, 2] -->
+          <dim>2</dim>
+        </port>
+        <port id="4">       <!-- axes: [0, 1] -->
+          <dim>2</dim>
+        </port>
+    </input>
+    <output>
+        <port id="5">      <!-- output: [1, 3, 6, 8] -->
+            <dim>2</dim>
+            <dim>2</dim>
+        </port>
+    </output>
+</layer>
+```
+
+*Example 11: slicing 3D tensor, all axes specified*
+
+```xml
+<layer id="1" type="Slice" ...>
+    <input>
+        <port id="0">       <!-- data -->
+          <dim>20</dim>
+          <dim>10</dim>
+          <dim>5</dim>
+        </port>
+        <port id="1">       <!-- start: [0, 0, 0] -->
+          <dim>2</dim>
+        </port>
+        <port id="2">       <!-- stop: [4, 10, 5] -->
+          <dim>2</dim>
+        </port>
+        <port id="3">       <!-- step: [1, 1, 1] -->
+          <dim>2</dim>
+        </port>
+        <port id="4">       <!-- axes: [0, 1, 2] -->
+          <dim>2</dim>
+        </port>
+    </input>
+    <output>
+        <port id="5">       <!-- output -->
+            <dim>4</dim>
+            <dim>10</dim>
+            <dim>5</dim>
+        </port>
+    </output>
+</layer>
+```
+
+*Example 12: slicing 3D tensor, last axes default*
+
+```xml
+<layer id="1" type="Slice" ...>
+    <input>
+        <port id="0">       <!-- data -->
+          <dim>20</dim>
+          <dim>10</dim>
+          <dim>5</dim>
+        </port>
+        <port id="1">       <!-- start: [0, 0] -->
+          <dim>2</dim>
+        </port>
+        <port id="2">       <!-- stop: [4, 10] -->
+          <dim>2</dim>
+        </port>
+        <port id="3">       <!-- step: [1, 1] -->
+          <dim>2</dim>
+        </port>
+        <port id="4">       <!-- axes: [0, 1] -->
+          <dim>2</dim>
+        </port>
+    </input>
+    <output>
+        <port id="5">       <!-- output -->
+            <dim>4</dim>
+            <dim>10</dim>
+            <dim>5</dim>
+        </port>
+    </output>
+</layer>
+```
--- a/docs/optimization_guide/dldt_optimization_guide.md
+++ b/docs/optimization_guide/dldt_optimization_guide.md
@ -8,7 +8,7 @@ For information on the general workflow, refer to the documentation in <a href="

 ### Deep Learning Inference Engine Overview <a name="dldt-overview"></a>

-Deep Learning Inference Engine is a part of Intel&reg; Deep Learning Deployment Toolkit (Intel&reg; DL Deployment Toolkit) and OpenVINO&trade; toolkit. Inference Engine facilitates deployment of deep learning solutions by delivering a unified, device-agnostic API.
+Deep Learning Inference Engine is a part of OpenVINO™ toolkit. Inference Engine facilitates deployment of deep learning solutions by delivering a unified, device-agnostic API.

 Below, there are the three main steps of the deployment process:

@ -17,7 +17,7 @@ Below, there are the three main steps of the deployment process:

 	- *Performance flow*: This is an offline step where general topology-level optimizations happen automatically (see <a href="#mo-knobs-related-to-performance">Model Optimizer Knobs Related to Performance</a>).

-	- *Tools*: OpenVINO™ features the Model Optimizer that enables automatic and seamless transition from a training to deployment environment.
+	- *Tools*: OpenVINO™ features the Model Optimizer, which enables automatic and seamless transition from a training to deployment environment.

 2.	**Model Inference/Execution**<br>
 	After conversion, Inference Engine consumes the IR to perform inference. While Inference Engine API itself is target-agnostic, internally, it has a notion of plugins, which are device-specific libraries facilitating the hardware-assisted acceleration.
@ -25,14 +25,14 @@ Below, there are the three main steps of the deployment process:
 	-	*Performance flow*: Upon conversion to IR, the execution starts with existing [Inference Engine samples](../IE_DG/Samples_Overview.md) to measure and tweak the performance of the network on different devices.<br>
 		> **NOTE**: While consuming the same IR, each plugin performs additional device-specific optimizations at load time, so the resulting accuracy might differ. Also, enabling and optimizing custom kernels is error-prone (see <a href="#optimizing-custom-kernels">Optimizing Custom Kernels</a>).

-	-	*Tools*: Beyond inference performance that samples report (see <a href="#latency-vs-throughput">Latency vs. Throughput</a>), you can get further device- and kernel-level timing with the <a href="#performance-counters">Inference Engine performance counters</a> and <a href="#vtune-examples">Intel&reg; VTune&trade;</a>.
+	-	*Tools*: Beyond inference performance that samples report (see <a href="#latency-vs-throughput">Latency vs. Throughput</a>), you can get further device- and kernel-level timing with the <a href="#performance-counters">Inference Engine performance counters</a> and <a href="#vtune-examples">Intel® VTune™</a>.

 3.	**Integration to the product**<br>
 	After model inference is verified with the [samples](../IE_DG/Samples_Overview.md), the Inference Engine code is typically integrated into a real application or pipeline.

 	- *Performance flow*: The most important point is to preserve the sustained performance achieved with the stand-alone model execution. Take precautions when combining with other APIs and be careful testing the performance of every integration step.

-	- *Tools*: Beyond tracking the actual wall-clock time of your application, see <a href="#vtune-examples">Intel&reg; VTune&trade; Examples</a> for application-level and system-level information.
+	- *Tools*: Beyond tracking the actual wall-clock time of your application, see <a href="#vtune-examples">Intel® VTune™ Examples</a> for application-level and system-level information.


 ## Gathering the Performance Numbers <a name="gathering-performance-numbers"></a>
@ -50,12 +50,12 @@ When evaluating performance of your model with the Inference Engine, you must me

 ### Latency vs. Throughput <a name="latency-vs-throughput"></a>

-In the asynchronous case (see <a href="#new-request-based-api">Request-Based API and “GetBlob” Idiom</a>), the performance of an individual infer request is usually of less concern. Instead, you typically execute multiple requests asynchronously and measure the throughput in images per second by dividing the number of images that were processed by the processing time.
-In contrast, for the latency-oriented tasks, the time to a single frame is more important.
+In the asynchronous case (see <a href="#new-request-based-api">Request-Based API and “GetBlob” Idiom</a>), the performance of an individual infer request is usually of less concern. Instead, you typically execute multiple requests asynchronously and measure the throughput in images per second by dividing the number of images that were processed by the processing time. 
+In contrast, for latency-oriented tasks, the time to a single frame is more important.

 Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample, which allows latency vs. throughput measuring.

-> **NOTE**: The [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample also supports batching, that is automatically packing multiple input images into a single request. However, high batch size results in a latency penalty. So for more real-time oriented usages, batch sizes that are as low as a single input are usually used. Still, devices like CPU, Intel®Movidius™ Myriad™ 2 VPU, Intel® Movidius™ Myriad™ X VPU, or Intel® Vision Accelerator Design with Intel® Movidius™ VPU require a number of parallel requests instead of batching to leverage the performance. Running multiple requests should be coupled with a device configured to the corresponding number of streams. See <a href="#cpu-streams">details on CPU streams</a> for an example.
+> **NOTE**: The [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample also supports batching, that is, automatically packing multiple input images into a single request. However, high batch size results in a latency penalty. So for more real-time oriented usages, batch sizes that are as low as a single input are usually used. Still, devices like CPU, Intel®Movidius™ Myriad™ 2 VPU, Intel® Movidius™ Myriad™ X VPU, or Intel® Vision Accelerator Design with Intel® Movidius™ VPU require a number of parallel requests instead of batching to leverage the performance. Running multiple requests should be coupled with a device configured to the corresponding number of streams. See <a href="#cpu-streams">details on CPU streams</a> for an example.

 [OpenVINO™ Deep Learning Workbench tool](https://docs.openvinotoolkit.org/latest/workbench_docs_Workbench_DG_Introduction.html) provides throughput versus latency charts for different numbers of streams, requests, and batch sizes to find the performance sweet spot.

@ -65,7 +65,7 @@ When comparing the Inference Engine performance with the framework or another re

 -	Wrap exactly the inference execution (refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample for an example).
 -	Track model loading time separately.
-	Ensure the inputs are identical for the Inference Engine and the framework. For example, Caffe\* allows to auto-populate the input with random values. Notice that it might give different performance than on real images.
+-	Ensure the inputs are identical for the Inference Engine and the framework. For example, Caffe\* allows you to auto-populate the input with random values. Notice that it might give different performance than on real images.
 -	Similarly, for correct performance comparison, make sure the access pattern, for example, input layouts, is optimal for Inference Engine (currently, it is NCHW).
 -	Any user-side pre-processing should be tracked separately.
 -	Make sure to try the same environment settings that the framework developers recommend, for example, for TensorFlow*. In many cases, things that are more machine friendly, like respecting NUMA (see <a href="#cpu-checklist">CPU Checklist</a>), might work well for the Inference Engine as well.
@ -83,11 +83,11 @@ Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README

 ## Model Optimizer Knobs Related to Performance <a name="mo-knobs-related-to-performance"></a>

-Networks training is typically done on high-end data centers, using popular training frameworks like Caffe\*, TensorFlow\*, and MXNet\*. Model Optimizer converts the trained model in original proprietary formats to IR that describes the topology. IR is accompanied by a binary file with weights. These files in turn are consumed by the Inference Engine and used for scoring.
+Network training is typically done on high-end data centers, using popular training frameworks like Caffe\*, TensorFlow\*, and MXNet\*. Model Optimizer converts the trained model in original proprietary formats to IR that describes the topology. IR is accompanied by a binary file with weights. These files in turn are consumed by the Inference Engine and used for scoring.

 ![](../img/workflow_steps.png)

-As described in the [Model Optimizer Guide](../MO_DG/prepare_model/Prepare_Trained_Model.md), there are a number of device-agnostic optimizations the tool performs.  For example, certain primitives like linear operations (BatchNorm and ScaleShift), are automatically fused into convolutions. Generally, these layers should not be manifested in the resulting IR:
+As described in the [Model Optimizer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md), there are a number of device-agnostic optimizations the tool performs. For example, certain primitives like linear operations (BatchNorm and ScaleShift) are automatically fused into convolutions. Generally, these layers should not be manifested in the resulting IR:

 ![](../img/resnet_269.png)

@ -109,43 +109,42 @@ Also:
 	Notice that the devices like GPU are doing better with larger batch size. While it is possible to set the batch size in the runtime using the Inference Engine [ShapeInference feature](../IE_DG/ShapeInference.md).

 -	**Resulting IR precision**<br>
-The resulting IR precision, for instance, `FP16` or `FP32`, directly affects performance. As CPU now supports `FP16` (while internally upscaling to `FP32` anyway) and because this is the best precision for a GPU target, you may want to always convert models to `FP16`. Notice that this is the only precision that Intel&reg; Movidius&trade; Myriad&trade; 2 and Intel&reg; Myriad&trade; X VPUs support.
+The resulting IR precision, for instance, `FP16` or `FP32`, directly affects performance. As CPU now supports `FP16` (while internally upscaling to `FP32` anyway) and because this is the best precision for a GPU target, you may want to always convert models to `FP16`. Notice that this is the only precision that Intel® Movidius™ Myriad™ 2 and Intel® Myriad™ X VPUs support.


 ## Multi-Device Execution <a name="multi-device-optimizations"></a>
-OpenVINO&trade; toolkit supports automatic multi-device execution, please see [MULTI-Device plugin description](../IE_DG/supported_plugins/MULTI.md).
+OpenVINO™ toolkit supports automatic multi-device execution, please see [MULTI-Device plugin description](../IE_DG/supported_plugins/MULTI.md).
 In the next chapter you can find the device-specific tips, while this section covers few recommendations
 for the multi-device execution:
-	MULTI usually performs best when the fastest device is specified first in the list of the devices.
-    This is particularly important when the parallelism is not sufficient
-    (e.g. the number of request in the flight is not enough to saturate all devices).
- It is highly recommended to query the optimal number of inference requests directly from the instance of the ExecutionNetwork
-  (resulted from the LoadNetwork call with the specific multi-device configuration as a parameter).
-Please refer to the code of the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample for details.
-   Notice that for example CPU+GPU execution performs better with certain knobs
+-	MULTI usually performs best when the fastest device is specified first in the list of the devices. 
+    This is particularly important when the parallelism is not sufficient 
+    (e.g., the number of request in the flight is not enough to saturate all devices).
+- It is highly recommended to query the optimal number of inference requests directly from the instance of the ExecutionNetwork 
+  (resulted from the LoadNetwork call with the specific multi-device configuration as a parameter). 
+Please refer to the code of the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample for details.    
+-   Notice that for example CPU+GPU execution performs better with certain knobs 
    which you can find in the code of the same [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample.
    One specific example is disabling GPU driver polling, which in turn requires multiple GPU streams (which is already a default for the GPU) to amortize slower
    inference completion from the device to the host.
-	Multi-device logic always attempts to save on the (e.g. inputs) data copies between device-agnostic, user-facing inference requests
+-	Multi-device logic always attempts to save on the (e.g., inputs) data copies between device-agnostic, user-facing inference requests
    and device-specific 'worker' requests that are being actually scheduled behind the scene.
    To facilitate the copy savings, it is recommended to start the requests in the order that they were created
    (with ExecutableNetwork's CreateInferRequest).

-
 ## Device-Specific Optimizations <a name="device-specific-optimizations"></a>

-The Inference Engine supports several target devices (CPU, GPU, Intel&reg; Movidius&trade; Myriad&trade; 2 VPU, Intel&reg; Movidius&trade; Myriad&trade; X VPU, Intel® Vision Accelerator Design with Intel® Movidius™ Vision Processing Units (VPU) and FPGA), and each of them has a corresponding plugin. If you want to optimize a specific device, you must keep in mind the following tips to increase the performance.
+The Inference Engine supports several target devices (CPU, GPU, Intel® Neural Compute Stick 2, Intel® Vision Accelerator Design with Intel® Movidius™ Vision Processing Units (VPU)), and each of them has a corresponding plugin. If you want to optimize a specific device, keep in mind the following tips to increase performance.

 ### CPU Checklist <a name="cpu-checklist"></a>

-CPU plugin completely relies on the Intel&reg; Math Kernel Library for Deep Neural Networks (Intel&reg; MKL-DNN) for major primitives acceleration, for example, Convolutions or FullyConnected.
+The CPU plugin completely relies on the Intel® Math Kernel Library for Deep Neural Networks (Intel® MKL-DNN) for major primitives acceleration, for example, Convolutions or FullyConnected.

-The only hint you can get from that is how the major primitives are accelerated (and you cannot change this). For example, on the Core machines, you should see variations of the `jit_avx2` when inspecting the <a href="#performance-counters">internal inference performance counters</a> (and additional '_int8' postfix for [int8 inference](../IE_DG/Int8Inference.md)). If you are an advanced user, you can further trace the CPU execution with (see <a href="#vtune-examples">Intel&reg; VTune&trade;</a>).
+The only hint you can get from that is how the major primitives are accelerated (and you cannot change this). For example, on machines with Intel® Core™ processors, you should see variations of the `jit_avx2` when inspecting the <a href="#performance-counters">internal inference performance counters</a> (and additional '_int8' postfix for [int8 inference](../IE_DG/Int8Inference.md)). If you are an advanced user, you can further trace the CPU execution with (see <a href="#vtune-examples">Intel&reg; VTune™</a>).

-Internally, the Inference Engine has a threading abstraction level, which allows for compiling the [open source version](https://github.com/openvinotoolkit/openvino) with either Intel&reg; Threading Building Blocks (Intel&reg; TBB) which is now default, or OpenMP* as an alternative parallelism solution. When using inference on the CPU, this is particularly important to align threading model with the rest of your application (and any third-party libraries that you use) to avoid oversubscription. For more information, see <a href="#note-on-app-level-threading">Note on the App-Level Threading</a> section.
+Internally, the Inference Engine has a threading abstraction level, which allows for compiling the [open source version](https://github.com/openvinotoolkit/openvino) with either Intel® Threading Building Blocks (Intel® TBB) which is now default, or OpenMP* as an alternative parallelism solution. When using inference on the CPU, this is particularly important to align threading model with the rest of your application (and any third-party libraries that you use) to avoid oversubscription. For more information, see <a href="#note-on-app-level-threading">Note on the App-Level Threading</a> section.

- Since R1 2019, the OpenVINO&trade; toolkit comes pre-compiled with Intel TBB,
- so any  OpenMP* API or environment settings (like `OMP_NUM_THREADS`) has no effect.
+ Since R1 2019, OpenVINO™ toolkit comes pre-compiled with Intel TBB,
+ so any OpenMP* API or environment settings (like `OMP_NUM_THREADS`) have no effect.
 Certain tweaks (like number of threads used for inference on the CPU) are still possible via  [CPU configuration options](../IE_DG/supported_plugins/CPU.md).
 Finally, the OpenVINO CPU inference is NUMA-aware, please refer to the <a href="#note-on-numa">Tips for inference on NUMA systems</a> section.

@ -157,15 +156,16 @@ Other general recommendations:

 #### Throughput Mode for CPU <a name="cpu-streams"></a>
 Unlike most accelerators, CPU is perceived as an inherently latency-oriented device.
-In fact, the OpenVINO does support the "throughput" mode for the CPU, which allows the Inference Engine to efficiently run multiple inference requests on the CPU simultaneously, greatly improving the overall throughput.
+In fact, OpenVINO supports the "throughput" mode for the CPU, which allows the Inference Engine to efficiently run multiple inference requests on the CPU simultaneously, greatly improving the overall throughput.

 Internally, the execution resources are split/pinned into execution "streams".
 This feature usually provides much better performance for the networks than batching. This is especially true for the many-core server machines:
 ![](../img/cpu_streams_explained_1.png)
+
 Compared with the batching, the parallelism is somewhat transposed (i.e. performed over inputs, and much less within CNN ops):
 ![](../img/cpu_streams_explained.png)

-Try the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample and play with number of streams running in parallel. The rule of thumb is tying up to a number of CPU cores on your machine.
+Try the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample and play with the number of streams running in parallel. The rule of thumb is tying up to a number of CPU cores on your machine.
 For example, on an 8-core CPU, compare the `-nstreams 1` (which is a legacy, latency-oriented scenario) to the 2, 4, and 8 streams.
 Notice that on a multi-socket machine, the bare minimum of streams for a latency scenario equals the number of sockets.

@ -178,7 +178,7 @@ If your application is hard or impossible to change in accordance with the multi

 ### GPU Checklist <a name="gpu-checklist"></a>

-Inference Engine relies on the [Compute Library for Deep Neural Networks (clDNN)](https://01.org/cldnn) for Convolutional Neural Networks acceleration on Intel&reg; GPUs. Internally, clDNN uses OpenCL&trade; to implement the kernels. Thus, many general tips apply:
+Inference Engine relies on the [Compute Library for Deep Neural Networks (clDNN)](https://01.org/cldnn) for Convolutional Neural Networks acceleration on Intel® GPUs. Internally, clDNN uses OpenCL™ to implement the kernels. Thus, many general tips apply:

 -	Prefer `FP16` over `FP32`, as the Model Optimizer can generate both variants and the `FP32` is default.
 - 	Try to group individual infer jobs by using batches.
@ -190,22 +190,21 @@ Inference Engine relies on the [Compute Library for Deep Neural Networks (clDNN)
 Notice that while disabling the polling, this option might reduce the GPU performance, so usually this option is used with multiple [GPU streams](../IE_DG/supported_plugins/GPU.md).


-### Intel&reg; Movidius&trade; Myriad&trade; X Visual Processing Unit and Intel&reg; Vision Accelerator Design with Intel&reg; Movidius&trade; VPUs  <a name="myriad"></a>
+### Intel® Movidius™ Myriad™ X Visual Processing Unit and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs  <a name="myriad"></a>

-Since Intel&reg; Movidius&trade; Myriad&trade; X Visual Processing Unit (Intel&reg; Movidius&trade; Myriad&trade; 2 VPU) communicates with the host over USB, minimum four infer requests in flight are recommended to hide the data transfer costs. See <a href="#new-request-based-api">Request-Based API and “GetBlob” Idiom</a> and [Benchmark App Sample](../../inference-engine/samples/benchmark_app/README.md) for more information.
+Since Intel® Movidius™ Myriad™ X Visual Processing Unit (Intel® Movidius™ Myriad™ 2 VPU) communicates with the host over USB, minimum four infer requests in flight are recommended to hide the data transfer costs. See <a href="#new-request-based-api">Request-Based API and “GetBlob” Idiom</a> and [Benchmark App Sample](../../inference-engine/samples/benchmark_app/README.md) for more information.

-Intel&reg; Vision Accelerator Design with Intel&reg; Movidius&trade; VPUs requires to keep at least 32 inference requests in flight to fully saturate the device.
+Intel® Vision Accelerator Design with Intel® Movidius™ VPUs requires keeping at least 32 inference requests in flight to fully saturate the device.

 ### FPGA <a name="fpga"></a>

 Below are listed the most important tips for the efficient usage of the FPGA:

-	Just like for the Intel&reg; Movidius&trade; Myriad&trade; VPU flavors, for the FPGA, it is important to hide the communication overheads by running multiple inference requests in parallel. For examples, refer to the [Benchmark App Sample](../../inference-engine/samples/benchmark_app/README.md).
+-	Just like for the Intel® Movidius™ Myriad™ VPU flavors, for the FPGA, it is important to hide the communication overheads by running multiple inference requests in parallel. For examples, refer to the [Benchmark App Sample](../../inference-engine/samples/benchmark_app/README.md).
 -	Since the first inference iteration with FPGA is always significantly slower than the subsequent ones, make sure you run multiple iterations (all samples, except GUI-based demos, have the `-ni` or 'niter' option  to do that).
 -	FPGA performance heavily depends on the bitstream.
 -	Number of the infer request per executable network is limited to five, so “channel” parallelism (keeping individual infer request per camera/video input) would not work beyond five inputs. Instead, you need to mux the inputs into some queue that will internally use a pool of (5) requests.
 -	In most scenarios, the FPGA acceleration is leveraged through <a href="heterogeneity">heterogeneous execution</a> with further specific tips.
-	For multi-device FPGA execution please refer to the [FPGA plugin documentation](../IE_DG/supported_plugins/FPGA.md)

 ## Heterogeneity <a name="heterogeneity"></a>

@ -231,15 +230,15 @@ The execution through heterogeneous plugin has three distinct steps:
 	-	The affinity setting is made before loading the network to the (heterogeneous) plugin, so this is always a **static** setup with respect to execution.

 2.	**Loading a network to the heterogeneous plugin**, which internally splits the network into subgraphs.<br>
-	You can check the decisions the plugin makes, see <a href="#analyzing-heterogeneous-execution">Analysing the Heterogeneous Execution</a>.
+	You can check the decisions the plugin makes, see <a href="#analyzing-heterogeneous-execution">Analyzing the Heterogeneous Execution</a>.

 3.	**Executing the infer requests**. From user’s side, this looks identical to a single-device case, while internally, the subgraphs are executed by actual plugins/devices.

-Performance benefits of the heterogeneous execution depend heavily on the communications granularity between devices. If transmitting/converting data from one part device to another takes more time than the execution, the heterogeneous approach makes little or no sense. Using Intel&reg; VTune&trade; helps to visualize the execution flow on a timeline (see <a href="#vtune-examples">Intel&reg; VTune&trade; Examples</a>).
+Performance benefits of the heterogeneous execution depend heavily on the communications granularity between devices. If transmitting/converting data from one part device to another takes more time than the execution, the heterogeneous approach makes little or no sense. Using Intel® VTune™ helps to visualize the execution flow on a timeline (see <a href="#vtune-examples">Intel® VTune™ Examples</a>).

-Similarly, if there are too much subgraphs, the synchronization and data transfers might eat the entire performance. In some cases, you can define the (coarser) affinity manually to avoid sending data back and forth many times during one inference.
+Similarly, if there are too many subgraphs, the synchronization and data transfers might eat the entire performance. In some cases, you can define the (coarser) affinity manually to avoid sending data back and forth many times during one inference.

-The general affinity “rule of thumb” is to keep computationally-intensive kernels on the accelerator, and "glue" or helper  kernels on the CPU. Notice that this includes the granularity considerations. For example, running some custom activation (that comes after every accelerator-equipped convolution) on the CPU might result in performance degradation due to too much data type and/or layout conversions, even though the activation itself can be extremely fast. In this case, it might make sense to consider implementing the kernel for the accelerator (see <a href="#optimizing-custom-kernels">Optimizing Custom Kernels</a>). The conversions typically manifest themselves as outstanding (comparing to CPU-only execution) 'Reorder' entries (see <a href="#performance-counters">Internal Inference Performance Counters</a>).
+The general affinity rule of thumb is to keep computationally-intensive kernels on the accelerator, and "glue" or helper kernels on the CPU. Notice that this includes the granularity considerations. For example, running some custom activation (that comes after every accelerator-equipped convolution) on the CPU might result in performance degradation due to too much data type and/or layout conversions, even though the activation itself can be extremely fast. In this case, it might make sense to consider implementing the kernel for the accelerator (see <a href="#optimizing-custom-kernels">Optimizing Custom Kernels</a>). The conversions typically manifest themselves as outstanding (comparing to CPU-only execution) 'Reorder' entries (see <a href="#performance-counters">Internal Inference Performance Counters</a>).

 For general details on the heterogeneous plugin, refer to the [corresponding section in the Inference Engine Developer Guide](../IE_DG/supported_plugins/HETERO.md).

@ -264,7 +263,7 @@ You can point more than two devices: `-d HETERO:FPGA,GPU,CPU`.

 As FPGA is considered as an inference accelerator, most performance issues are related to the fact that due to the fallback, the CPU can be still used quite heavily.
 -	Yet in most cases, the CPU does only small/lightweight layers, for example, post-processing (`SoftMax` in most classification models or `DetectionOutput` in the SSD*-based topologies). In that case, limiting the number of CPU threads with [`KEY_CPU_THREADS_NUM`](../IE_DG/supported_plugins/CPU.md) config would further reduce the CPU utilization without significantly degrading the overall performance.
-	Also, if you are still using OpenVINO version earlier than R1 2019, or if you have recompiled the Inference Engine with OpemMP (say for backward compatibility), setting the `KMP_BLOCKTIME` environment variable to something less than default 200ms (we suggest 1ms) is particularly helpful. Use `KMP_BLOCKTIME=0` if the CPU subgraph is small.
+-	Also, if you are still using OpenVINO™ toolkit version earlier than R1 2019, or if you have recompiled the Inference Engine with OpenMP (say for backward compatibility), setting the `KMP_BLOCKTIME` environment variable to something less than default 200ms (we suggest 1ms) is particularly helpful. Use `KMP_BLOCKTIME=0` if the CPU subgraph is small.

 > **NOTE**: General threading tips (see <a href="#note-on-app-level-threading">Note on the App-Level Threading</a>) apply well, even when the entire topology fits the FPGA, because there is still a host-side code for data pre- and post-processing.

@ -278,11 +277,11 @@ The following tips are provided to give general guidance on optimizing execution

 -	The general affinity “rule of thumb” is to keep computationally-intensive kernels on the accelerator, and "glue" (or helper) kernels on the CPU. Notice that this includes the granularity considerations. For example, running some (custom) activation on the CPU would result in too many conversions.

-	It is advised to do <a href="#analyzing-hetero-execution">performance analysis</a> to determine “hotspot” kernels, which should be the first candidates for offloading. At the same time, it is often more efficient to offload some reasonably sized sequence of kernels, rather than individual kernels, to minimize scheduling and other runtime overhead.
+-	It is advised to do <a href="#analyzing-heterogeneous-execution">performance analysis</a> to determine “hotspot” kernels, which should be the first candidates for offloading. At the same time, it is often more efficient to offload some reasonably sized sequence of kernels, rather than individual kernels, to minimize scheduling and other runtime overhead.

-	Notice that GPU can be busy with other tasks (like rendering). Similarly, the CPU can be in charge for the general OS routines and other application threads (see <a href="#note-on-app-level-threading">Note on the App-Level Threading</a>). Also, a high interrupt rate due to many subgraphs can raise the frequency of the one device and drag the frequency of another down.
+-	Notice that the GPU can be busy with other tasks (like rendering). Similarly, the CPU can be in charge for the general OS routines and other application threads (see <a href="#note-on-app-level-threading">Note on the App-Level Threading</a>). Also, a high interrupt rate due to many subgraphs can raise the frequency of the device and drag down the frequency of another.

-	Device performance can be affected by dynamic frequency scaling. For example, running long kernels on both devices simultaneously might eventually result in one or both devices stopping use of the Intel&reg; Turbo Boost Technology. This might result in overall performance decrease, even comparing to single-device scenario.
+-	Device performance can be affected by dynamic frequency scaling. For example, running long kernels on both devices simultaneously might eventually result in one or both devices stopping use of the Intel® Turbo Boost Technology. This might result in overall performance decrease, even comparing to single-device scenario.

 -	Mixing the `FP16` (GPU) and `FP32` (CPU) execution results in conversions and, thus, performance issues. If you are seeing a lot of heavy outstanding (compared to the CPU-only execution) Reorders, consider implementing actual GPU kernels. Refer to <a href="#performance-counters">Internal Inference Performance Counters</a> for more information.

@ -295,22 +294,21 @@ After enabling the configuration key, the heterogeneous plugin generates two fil
 -	`hetero_affinity.dot` - per-layer affinities. This file is generated only if default fallback policy was executed (as otherwise you have set the affinities by yourself, so you know them).
 -	`hetero_subgraphs.dot` - affinities per sub-graph. This file is written to the disk during execution of `Core::LoadNetwork` for the heterogeneous flow.

-You can use GraphViz\* utility or `.dot` converters (for example, to `.png` or `.pdf`), like xdot\*, available on Linux\* OS with `sudo apt-get install xdot`. Below is an example of the output trimmed to the two last layers (one executed on the FPGA and another on the CPU):
+You can use the GraphViz\* utility or `.dot` converters (for example, to `.png` or `.pdf`), like xdot\*, available on Linux\* OS with `sudo apt-get install xdot`. Below is an example of the output trimmed to the two last layers (one executed on the FPGA and another on the CPU):

 ![](../img/output_trimmed.png)

-You can also use performance data (in the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md), it is an option `-pc`) to get performance data on each subgraph. Again, refer to the [HETERO plugin documentation](https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_supported_plugins_HETERO.html#analyzing_heterogeneous_execution) and to <a href="#performance-counters">Internal Inference Performance Counters</a> for a general counters information.
-
+You can also use performance data (in the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md), it is an option `-pc`) to get performance data on each subgraph. Again, refer to the [HETERO plugin documentation](https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_supported_plugins_HETERO.html#analyzing_heterogeneous_execution) and to <a href="#performance-counters">Internal Inference Performance Counters</a> for information on general counters.

 ## Optimizing Custom Kernels <a name="optimizing-custom-kernels"></a>

-### Few Initial Performance Considerations <a name="initial-performance-considerations"></a>
+### A Few Initial Performance Considerations <a name="initial-performance-considerations"></a>

 The Inference Engine supports CPU, GPU and VPU custom kernels. Typically, custom kernels are used to quickly implement missing layers for new topologies. You should not override standard layers implementation, especially on the critical path, for example, Convolutions.  Also, overriding existing layers can disable some existing performance optimizations, such as fusing.

 It is usually easier to start with the CPU extension and switch to the GPU after debugging with the CPU path. Sometimes, when the custom layers are at the very end of your pipeline, it is easier to implement them as regular post-processing in your application without wrapping them as kernels. This is particularly true for the kernels that do not fit the GPU well, for example, output bounding boxes sorting. In many cases, you can do such post-processing on the CPU.

-There are many cases when sequence of the custom kernels can be implemented as a "super" kernel allowing to save on data accesses.
+There are many cases when sequence of the custom kernels can be implemented as a "super" kernel, allowing you to save on data accesses.

 Finally, with the heterogeneous execution, it is possible to execute the vast majority of intensive computations with the accelerator and keep the custom pieces on the CPU. The tradeoff is granularity/costs of communication between different devices.

@ -322,10 +320,10 @@ In most cases, before actually implementing a full-blown code for the kernel, yo

 Other than that, when implementing the kernels, you can try the methods from the previous chapter to understand actual contribution and, if any custom kernel is in the hotspots, optimize that.

-### Few Device-Specific Tips <a name="device-specific-tips"></a>
+### A Few Device-Specific Tips <a name="device-specific-tips"></a>

-	As already outlined in the <a href="#cpu-checklist">CPU Checklist</a>, align the threading model that you use in your CPU kernels with the model that the rest of the Inference Engine compiled with.
-	For CPU extensions, consider kernel flavor that supports blocked layout, if your kernel is in the hotspots (see <a href="#performance-counters">Internal Inference Performance Counters</a>). Since Intel MKL-DNN internally operates on the blocked layouts, this would save you a data packing (Reorder) on tensor inputs/outputs of your kernel. For example of the blocked layout support, please, refer to the extensions in the `<OPENVINO_INSTALL_DIR>/deployment_tools/samples/extension/`.
+- As already outlined in the <a href="#cpu-checklist">CPU Checklist</a>, align the threading model that you use in your CPU kernels with the model that the rest of the Inference Engine compiled with.
+- For CPU extensions, consider kernel flavor that supports blocked layout, if your kernel is in the hotspots (see <a href="#performance-counters">Internal Inference Performance Counters</a>). Since Intel MKL-DNN internally operates on the blocked layouts, this would save you a data packing (Reorder) on tensor inputs/outputs of your kernel. For example of the blocked layout support, please, refer to the extensions in `<OPENVINO_INSTALL_DIR>/deployment_tools/samples/extension/`.

 ## Plugging Inference Engine to Applications <a name="plugging-ie-to-applications"></a>

@ -349,7 +347,7 @@ If you are building an app-level pipeline with third-party components like GStre
 - The rule of thumb is that you should try to have the overall number of active threads in your application equal to the number of cores in your machine. Keep in mind the spare core(s) that the OpenCL driver under the GPU plugin might also need.
 - One specific workaround to limit the number of threads for the Inference Engine is using the [CPU configuration options](../IE_DG/supported_plugins/CPU.md).
 - To avoid further oversubscription, use the same threading model in all modules/libraries that your application uses. Notice that third party components might bring their own threading. For example, using Inference Engine which is now compiled with the TBB by default might lead to [performance troubles](https://www.threadingbuildingblocks.org/docs/help/reference/appendices/known_issues/interoperability.html) when mixed in the same app with another computationally-intensive library, but compiled with OpenMP. You can try to compile the [open source version](https://github.com/openvinotoolkit/openvino) of the Inference Engine to use the OpenMP as well. But notice that in general, the TBB offers much better composability, than other threading solutions.
- If your code (or third party libraries) uses GNU OpenMP, the Intel&reg; OpenMP (if you have recompiled Inference Engine with that) must be initialized first. This can be achieved by linking your application with the Intel OpenMP instead of GNU OpenMP, or using `LD_PRELOAD` on Linux* OS.
+- If your code (or third party libraries) uses GNU OpenMP, the Intel® OpenMP (if you have recompiled Inference Engine with that) must be initialized first. This can be achieved by linking your application with the Intel OpenMP instead of GNU OpenMP, or using `LD_PRELOAD` on Linux* OS.

 ### Letting the Inference Engine Accelerate Image Pre-processing/Conversion <a name="image-preprocessing"></a>

@ -363,7 +361,7 @@ Note that in many cases, you can directly share the (input) data with the Infere

 ### Basic Interoperability with Other APIs <a name="basic-interoperability-with-other-apis"></a>

-The general approach for sharing data between Inference Engine and media/graphics APIs like Intel&reg; Media Server Studio (Intel&reg; MSS) is based on sharing the *system* memory.  That is, in your code, you should map or copy the data from the API to the CPU address space first.
+The general approach for sharing data between Inference Engine and media/graphics APIs like Intel® Media Server Studio (Intel® MSS) is based on sharing the *system* memory.  That is, in your code, you should map or copy the data from the API to the CPU address space first.

 For Intel® Media SDK, it is recommended to perform a viable pre-processing, for example, crop/resize, and then convert to RGB again with the [Video Processing Procedures (VPP)](https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/onevpl.htm). Then lock the result and create an Inference Engine blob on top of that. The resulting pointer can be used for `SetBlob`:

@ -401,22 +399,22 @@ More importantly, an infer request encapsulates the reference to the “executab

 ### Performance Aspects of Running Multiple Requests Simultaneously <a name="running-multiple-requests-simultaneously"></a>

-If your application simultaneously executes multiple infer requests:
+If your application simultaneously executes multiple inference requests:

- 	For the CPU, the best solution, you can use the <a href="#cpu-streams">CPU "throughput" mode</a>.
-	-	If latency is of more concern, you can try the `EXCLUSIVE_ASYNC_REQUESTS` [configuration option](../IE_DG/supported_plugins/CPU.md) that limits the number of the simultaneously executed requests for all (executable) networks that share the specific device to just one:<br>
+- 	For the CPU, the best solution is to use the <a href="#cpu-streams">CPU "throughput" mode</a>.
+	-	If latency is of more concern, you can try the `EXCLUSIVE_ASYNC_REQUESTS` [configuration option](../IE_DG/supported_plugins/CPU.md), which limits the number of the simultaneously executed requests for all (executable) networks that share the specific device to just one:<br>

@snippet snippets/dldt_optimization_guide7.cpp part7

-		<br>For more information on the executable networks notation, see <a href="#new-request-based-api">Request-Based API and “GetBlob” Idiom</a>.
+<br>For more information on the executable networks notation, see <a href="#new-request-based-api">Request-Based API and “GetBlob” Idiom</a>.

-	-	The heterogeneous device uses the `EXCLUSIVE_ASYNC_REQUESTS` by default.
+-	The heterogeneous device uses the `EXCLUSIVE_ASYNC_REQUESTS` by default.

-	-	`KEY_EXCLUSIVE_ASYNC_REQUESTS` option affects only device queues of the individual application.
+-	`KEY_EXCLUSIVE_ASYNC_REQUESTS` option affects only device queues of the individual application.

-	For FPGA and GPU, the actual work is serialized by a plugin and/or a driver anyway.
+-	For GPU, the actual work is serialized by a plugin and/or a driver anyway.

- 	Finally, for <a href="#myriad">any VPU flavor</a>, using multiple requests is a must for achieving good throughput.
+- 	Finally, for <a href="#myriad">any VPU model</a>, using multiple requests is a must for achieving good throughput. 

 In the Inference Engine, there is no notion of requests priorities. It is left to the user side (for example, not queuing the low priority infer request, until another higher priority is waiting). Notice that it would require additional logic to synchronize between executable networks (queues) in your application code.

@ -432,33 +430,33 @@ You can compare the pseudo-codes for the regular and async-based approaches:

@snippet snippets/dldt_optimization_guide8.cpp part8

-![Intel&reg; VTune&trade; screenshot](../img/vtune_regular.png)
+![Intel® VTune™ screenshot](../img/vtune_regular.png)

 -	In the "true" async mode, the `NEXT` request is populated in the main (application) thread, while the `CURRENT` request is processed:<br>

@snippet snippets/dldt_optimization_guide9.cpp part9

-![Intel&reg; VTune&trade; screenshot](../img/vtune_async.png)
+![Intel® VTune™ screenshot](../img/vtune_async.png)

 The technique can be generalized to any available parallel slack. For example, you can do inference and simultaneously encode the resulting or previous frames or run further inference, like emotion detection on top of the face detection results.

 There are important performance caveats though: for example, the tasks that run in parallel should try to avoid oversubscribing the shared compute resources. If the inference is performed on the FPGA and the CPU is essentially idle, it makes sense to do things on the CPU in parallel. However, multiple infer requests can oversubscribe that. Notice that heterogeneous execution can implicitly use the CPU, refer to <a href="#heterogeneity">Heterogeneity</a>.

-Also, if the inference is performed on the graphics processing unit (GPU), it can take little gain to do the encoding, for instance, of the resulting video, on the same GPU in parallel, because the device is already busy.
+Also, if the inference is performed on the graphics processing unit (GPU), there is little gain in doing the encoding of the resulting video on the same GPU in parallel, for instance, because the device is already busy.

 Refer to the [Object Detection SSD Demo](@ref omz_demos_object_detection_demo_cpp) (latency-oriented Async API showcase) and [Benchmark App Sample](../../inference-engine/samples/benchmark_app/README.md) (which has both latency and throughput-oriented modes) for complete examples of the Async API in action.

 ## Using Tools <a name="using-tools"></a>

-Whether you are tuning for the first time or doing advanced performance optimization, you need a a tool that provides accurate insights. Intel&reg; VTune&trade; Amplifier gives you the tool to mine it and interpret the profiling data.
+Whether you are tuning for the first time or doing advanced performance optimization, you need a a tool that provides accurate insights. Intel® VTune™ Amplifier gives you the tool to mine it and interpret the profiling data.

 Alternatively, you can gather the raw profiling data that samples report, the second chapter provides example of how to interpret these.

-### Intel&reg; VTune&trade; Examples <a name="vtune-examples"></a>
+### Intel® VTune™ Examples <a name="vtune-examples"></a>

-All major performance calls of the Inference Engine are instrumented with Instrumentation and Tracing Technology APIs. This allows viewing the Inference Engine calls on the Intel&reg; VTune&trade; timelines and aggregations plus correlating them to the underlying APIs, like OpenCL.  In turn, this enables careful per-layer execution breakdown.
+All major performance calls of the Inference Engine are instrumented with Instrumentation and Tracing Technology APIs. This allows viewing the Inference Engine calls on the Intel® VTune™ timelines and aggregations plus correlating them to the underlying APIs, like OpenCL.  In turn, this enables careful per-layer execution breakdown.

-When choosing the Analysis type in Intel&reg; VTune&trade; Amplifier, make sure to select the **Analyze user tasks, events, and counters** option:
+When choosing the Analysis type in Intel® VTune™ Amplifier, make sure to select the **Analyze user tasks, events, and counters** option:

 ![](../img/vtune_option.jpg)

@ -478,7 +476,7 @@ Example of Inference Engine calls:

 Similarly, you can use any GPU analysis in the Intel VTune Amplifier and get general correlation with Inference Engine API as well as the execution breakdown for OpenCL kernels.

-Just like with regular native application, further drill down in the counters is possible, however, this is mostly useful for <a href="#optimizing-custom-kernels">optimizing custom kernels</a>. Finally, with the Intel VTune Amplifier, the profiling is not limited to your user-level code (see the [corresponding section in the Intel&reg; VTune&trade; Amplifier User's Guide](https://software.intel.com/en-us/vtune-amplifier-help-analyze-performance)).
+Just like with regular native application, further drill down in the counters is possible, however, this is mostly useful for <a href="#optimizing-custom-kernels">optimizing custom kernels</a>. Finally, with the Intel VTune Amplifier, the profiling is not limited to your user-level code (see the [corresponding section in the Intel® VTune™ Amplifier User's Guide](https://software.intel.com/en-us/vtune-amplifier-help-analyze-performance)).

 ### Internal Inference Performance Counters <a name="performance-counters"></a>

--- a/docs/ovsa/ovsa_get_started.md
+++ b/docs/ovsa/ovsa_get_started.md
@ -51,7 +51,7 @@ After the license is successfully validated, the OpenVINO™ Model Server loads

 ![Security Add-on Diagram](ovsa_diagram.png)

-The binding between SWTPM (vTPM used in guest VM) and HW TPM (TPM on the host) is explained in [this document](https://github.com/openvinotoolkit/security_addon/blob/release_2021_3/docs/fingerprint-changes.md)
+The binding between SWTPM (vTPM used in guest VM) and HW TPM (TPM on the host) is explained in [this document](https://github.com/openvinotoolkit/security_addon/blob/release_2021_4/docs/fingerprint-changes.md)

 ## About the Installation
 The Model Developer, Independent Software Vendor, and User each must prepare one physical hardware machine and one Kernel-based Virtual Machine (KVM). In addition, each person must prepare a Guest Virtual Machine (Guest VM) for each role that person plays. 
@ -135,7 +135,7 @@ Begin this step on the Intel® Core™ or Xeon® processor machine that meets th
 10. Install the [`tpm2-tools`](https://github.com/tpm2-software/tpm2-tools/releases/download/4.3.0/tpm2-tools-4.3.0.tar.gz).<br>
    Installation information is at https://github.com/tpm2-software/tpm2-tools/blob/master/INSTALL.md
 11. Install the [Docker packages](https://docs.docker.com/engine/install/ubuntu/).	
-    > **NOTE**: Regardless of whether you used the `install_host_deps.sh` script, complete step 12 to finish setting up the packages on the Host Machine.
+    **NOTE**: Regardless of whether you used the `install_host_deps.sh` script, complete step 12 to finish setting up the packages on the Host Machine.
 12. If you are running behind a proxy, [set up a proxy for Docker](https://docs.docker.com/config/daemon/systemd/). 

 The following are installed and ready to use:
@ -255,7 +255,7 @@ Networking is set up on the Host Machine. Continue to the Step 3 to prepare a Gu
 Download the [OpenVINO™ Security Add-on](https://github.com/openvinotoolkit/security_addon).


-### Step 4: Set Up one Guest VM for the combined roles of Model Developer and Independent Software Vendor<a name="dev-isv-vm"></a>.
+### Step 4: Set Up one Guest VM for the combined roles of Model Developer and Independent Software Vendor<a name="dev-isv-vm"></a>

 For each separate role you play, you must prepare a virtual machine, called a Guest VM. Because in this release, the Model Developer and Independent Software Vendor roles are combined, these instructions guide you to set up one Guest VM, named `ovsa_isv`.

@ -489,7 +489,7 @@ This step is for the combined role of Model Developer and Independent Software V
 2. Build the OpenVINO™ Security Add-on:
   ```sh
   make clean all
-   sudo make package
+   sudo -s make package
   ```
   The following packages are created under the `release_files` directory:
   - `ovsa-kvm-host.tar.gz`: Host Machine file
@ -517,13 +517,13 @@ This step is for the combined role of Model Developer and Independent Software V

 If you are using more than one Host Machine repeat Step 3 on each.

-### Step 4: Set up packages on the Guest VM
+### Step 4: Install the OpenVINO™ Security Add-on Model Developer / ISV Components
 This step is for the combined role of Model Developer and Independent Software Vendor. References to the Guest VM are to `ovsa_isv_dev`.
 
-1. Log on to the Guest VM.
+1. Log on to the Guest VM  as `<user>`.
 2. Create the OpenVINO™ Security Add-on directory in the home directory
   ```sh
-   mkdir OVSA
+   mkdir -p ~/OVSA
   ```
 3. Go to the Host Machine, outside of the Guest VM.
 4. Copy `ovsa-developer.tar.gz` from `release_files` to the Guest VM:
@ -532,27 +532,25 @@ This step is for the combined role of Model Developer and Independent Software V
   scp ovsa-developer.tar.gz username@<isv-developer-vm-ip-address>:/<username-home-directory>/OVSA
   ```
 5. Go to the Guest VM.
-6. Install the software to the Guest VM:
+6. Create `ovsa` user
+   ``sh
+   sudo useradd -m ovsa
+   sudo passwd ovsa
+   ```
+7. Install the software to the Guest VM:
   ```sh
-   cd OVSA
+   cd ~/OVSA
   tar xvfz ovsa-developer.tar.gz
   cd ovsa-developer
-   sudo -s
-   ./install.sh
+   sudo ./install.sh
   ```
-7. Create  a directory named `artefacts`. This directory will hold artefacts required to create licenses:
+8. Start the license server on a separate terminal as `ovsa` user.
   ```sh
-   cd /<username-home-directory>/OVSA
-   mkdir artefacts
-   cd artefacts
-   ```
-8. Start the license server on a separate terminal.
-   ```sh
-   sudo -s
   source /opt/ovsa/scripts/setupvars.sh
   cd /opt/ovsa/bin
   ./license_server
   ```
+   **NOTE**: If you are behind a firewall, check and set your proxy settings to ensure the license server is able to validate the certificates.

 ### Step 5: Install the OpenVINO™ Security Add-on Model Hosting Component

@ -562,27 +560,27 @@ The Model Hosting components install the OpenVINO™ Security Add-on Runtime Doc
    
 1. Log on to the Guest VM as `<user>`.
 2. Create the OpenVINO™ Security Add-on directory in the home directory
-   ```sh
-   mkdir OVSA
-   ```
+    ```sh
+    mkdir -p ~/OVSA
+    ```
 3. While on the Host Machine copy the ovsa-model-hosting.tar.gz from release_files to the Guest VM:
   ```sh
   cd $OVSA_RELEASE_PATH
-   scp ovsa-model-hosting.tar.gz username@<isv-developer-vm-ip-address>:/<username-home-directory>/OVSA
+   scp ovsa-model-hosting.tar.gz username@<runtime-vm-ip-address>:/<username-home-directory>/OVSA
   ```
-4. Install the software to the Guest VM:
+4. Go to the Guest VM.
+5. Create `ovsa` user
   ```sh
-   cd OVSA
+   sudo useradd -m ovsa
+   sudo passwd ovsa
+   sudo usermod -aG docker ovsa
+   ``` 
+6. Install the software to the Guest VM:
+   ```sh
+   cd ~/OVSA
   tar xvfz ovsa-model-hosting.tar.gz
   cd ovsa-model-hosting
-   sudo -s
-   ./install.sh
-   ```
-5. Create a directory named `artefacts`:
-   ```sh
-   cd /<username-home-directory>/OVSA
-   mkdir artefacts
-   cd artefacts
+   sudo ./install.sh
   ```

 ## How to Use the OpenVINO™ Security Add-on
@ -599,24 +597,27 @@ The following figure describes the interactions between the Model Developer, Ind

 ### Model Developer Instructions

-The Model Developer creates model, defines access control and creates the user license. References to the Guest VM are to `ovsa_isv_dev`. After the model is created, access control enabled, and the license is ready, the Model Developer provides the license details to the Independent Software Vendor before sharing to the Model User.
+The Model Developer creates model, defines access control and creates the user license. After the model is created, access control enabled, and the license is ready, the Model Developer provides the license details to the Independent Software Vendor before sharing to the Model User.

-#### Step 1: Create a key store and add a certificate to it
+References to the Guest VM are to `ovsa_isv_dev`. Log on to the Guest VM as `ovsa` user.

-1. Set up a path to the artefacts directory:
-   ```sh
-   sudo -s
-   cd /<username-home-directory>/OVSA/artefacts
-	export OVSA_DEV_ARTEFACTS=$PWD
-   source /opt/ovsa/scripts/setupvars.sh
-   ```
-2. Create files to request a certificate:<br>
-   This example uses a self-signed certificate for demonstration purposes. In a production environment, use CSR files to request for a CA-signed certificate.
+#### Step 1: Set up the artefacts directory
+
+Create a directory named artefacts. This directory will hold artefacts required to create licenses:
+```sh
+mkdir -p ~/OVSA/artefacts
+cd ~/OVSA/artefacts
+export OVSA_DEV_ARTEFACTS=$PWD
+source /opt/ovsa/scripts/setupvars.sh
+```
+#### Step 2: Create a key store and add a certificate to it
+1. Create files to request a certificate:
+This example uses a self-signed certificate for demonstration purposes. In a production environment, use CSR files to request for a CA-signed certificate.
   ```sh
   cd $OVSA_DEV_ARTEFACTS
   /opt/ovsa/bin/ovsatool keygen -storekey -t ECDSA -n Intel -k isv_keystore -r  isv_keystore.csr -e "/C=IN/CN=localhost"
   ```
-   Two files are created:
+   Below two files are created along with the keystore file:
   - `isv_keystore.csr`- A Certificate Signing Request (CSR)  
   - `isv_keystore.csr.crt` - A self-signed certificate
   
@ -627,50 +628,38 @@ The Model Developer creates model, defines access control and creates the user l
   /opt/ovsa/bin/ovsatool keygen -storecert -c isv_keystore.csr.crt -k isv_keystore
   ```	
 	
-#### Step 2: Create the model
+#### Step 3: Create the model

 This example uses `curl` to download the `face-detection-retail-004` model from the OpenVINO Model Zoo. If you are behind a firewall, check and set your proxy settings.

-1. Log on to the Guest VM.
- 
-2. Download a model from the Model Zoo:
-   ```sh
-   cd $OVSA_DEV_ARTEFACTS	
-   curl --create-dirs https://storage.openvinotoolkit.org/repositories/open_model_zoo/2021.3/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.xml https://   storage.openvinotoolkit.org/repositories/open_model_zoo/2021.3/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.bin -o model/face-detection-retail-0004.xml -o    model/face-detection-retail-0004.bin
-   ```
-   The model is downloaded to the `OVSA_DEV_ARTEFACTS/model` directory.
-	
-#### Step 3: Define access control for  the model and create a master license for it
+Download a model from the Model Zoo:
+```sh
+curl --create-dirs https://download.01.org/opencv/2021/openvinotoolkit/2021.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.xml https://download.01.org/opencv/2021/openvinotoolkit/2021.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.bin -o model/face-detection-retail-0004.xml -o model/face-detection-retail-0004.bin
+```
+The model is downloaded to the `OVSA_DEV_ARTEFACTS/model` directory

-1. Go to the `artefacts` directory:
-   ```sh	
-   cd $OVSA_DEV_ARTEFACTS
-   ```
-2. Run the `uuidgen` command:
-   ```sh	
-   uuidgen
-   ```
-3. Define and enable the model access control and master license:
-   ```sh	
-	/opt/ovsa/bin/ovsatool controlAccess -i model/face-detection-retail-0004.xml model/face-detection-retail-0004.bin -n "face detection" -d "face detection retail" -v 0004 -p face_detection_model.dat -m face_detection_model.masterlic -k isv_keystore -g <output-of-uuidgen>
-   ```
-The Intermediate Representation files for the `face-detection-retail-0004` model are encrypted as `face_detection_model.dat` and a master license is generated as `face_detection_model.masterlic`.
+#### Step 4: Define access control for  the model and create a master license for it

-#### Step 4: Create a Runtime Reference TCB
+Define and enable the model access control and master license:
+```sh	
+uuid=$(uuidgen)
+/opt/ovsa/bin/ovsatool controlAccess -i model/face-detection-retail-0004.xml model/face-detection-retail-0004.bin -n "face detection" -d "face detection retail" -v 0004 -p face_detection_model.dat -m face_detection_model.masterlic -k isv_keystore -g $uuid
+```
+The Intermediate Representation files for the `face-detection-retail-0004` model are encrypted as `face_detection_model.dat` and a master license is generated as `face_detection_model.masterlic`
+
+#### Step 5: Create a Runtime Reference TCB

 Use the runtime reference TCB to create a customer license for the access controlled model and the specific runtime.

 Generate the reference TCB for the runtime
 ```sh
-cd $OVSA_DEV_ARTEFACTS
-source /opt/ovsa/scripts/setupvars.sh
-	/opt/ovsa/bin/ovsaruntime gen-tcb-signature -n "Face Detect @ Runtime VM" -v "1.0" -f face_detect_runtime_vm.tcb -k isv_keystore
+/opt/ovsa/bin/ovsaruntime gen-tcb-signature -n "Face Detect @ Runtime VM" -v "1.0" -f face_detect_runtime_vm.tcb -k isv_keystore
 ```
 	
-#### Step 5: Publish the access controlled Model and Runtime Reference TCB
+#### Step 6: Publish the access controlled Model and Runtime Reference TCB
 The access controlled model is ready to be shared with the User and the reference TCB is ready to perform license checks.

-#### Step 6: Receive a User Request
+#### Step 7: Receive a User Request
 1. Obtain artefacts from the User who needs access to a access controlled model:
   * Customer certificate from the customer's key store.
   * Other information that apply to your licensing practices, such as the length of time the user needs access to the model
@ -678,8 +667,9 @@ The access controlled model is ready to be shared with the User and the referenc
 2. Create a customer license configuration
   ```sh
   cd $OVSA_DEV_ARTEFACTS
-   /opt/ovsa/bin/ovsatool licgen -t TimeLimit -l30 -n "Time Limit License Config" -v 1.0 -u "<isv-developer-vm-ip-address>:<license_server-port>" -k isv_keystore -o 30daylicense.config
+   /opt/ovsa/bin/ovsatool licgen -t TimeLimit -l30 -n "Time Limit License Config" -v 1.0 -u "<isv-developer-vm-ip-address>:<license_server-port>" /opt/ovsa/certs/server.crt  -k isv_keystore -o 30daylicense.config
   ```
+   **NOTE**: The parameter /opt/ovsa/certs/server.crt  contains the certificate used by the License Server. The server certificate will be added to the customer license and validated during use. Refer to [OpenVINO™ Security Add-on License Server Certificate Pinning](https://github.com/openvinotoolkit/security_addon/blob/release_2021_4/docs/ovsa_license_server_cert_pinning.md)
 3. Create the customer license
   ```sh
   cd $OVSA_DEV_ARTEFACTS
@ -693,27 +683,30 @@ The access controlled model is ready to be shared with the User and the referenc
   ```

 5. Provide these files to the User:
-   * `face_detection_model.dat`
-   * `face_detection_model.lic`
+	* `face_detection_model.dat`
+	* `face_detection_model.lic`

-### User Instructions
-References to the Guest VM are to `ovsa_rumtime`.
+### Model User Instructions
+References to the Guest VM are to `ovsa_rumtime`. Log on to the Guest VM as `ovsa` user.

-#### Step 1: Add a CA-Signed Certificate to a Key Store
+#### Step 1: Setup up the artefacts directory

-1. Set up a path to the artefacts directory:
+1. Create a directory named artefacts. This directory will hold artefacts required to create licenses:
   ```sh
-   sudo -s
-   cd /<username-home-directory>/OVSA/artefacts
+   mkdir -p ~/OVSA/artefacts
+   cd ~/OVSA/artefacts
   export OVSA_RUNTIME_ARTEFACTS=$PWD
   source /opt/ovsa/scripts/setupvars.sh
   ```
-2. Generate a Customer key store file:
+
+#### Step 2: Add a CA-Signed Certificate to a Key Store
+
+1. Generate a Customer key store file:
   ```sh
   cd $OVSA_RUNTIME_ARTEFACTS
   /opt/ovsa/bin/ovsatool keygen -storekey -t ECDSA -n Intel -k custkeystore -r  custkeystore.csr -e "/C=IN/CN=localhost"
   ```
-   Two files are created:
+   Below two files are created along with the keystore file:
   * `custkeystore.csr` - A Certificate Signing Request (CSR)
   * `custkeystore.csr.crt` - A self-signed certificate

@ -724,20 +717,25 @@ References to the Guest VM are to `ovsa_rumtime`.
   /opt/ovsa/bin/ovsatool keygen -storecert -c custkeystore.csr.crt -k custkeystore
   ```

-#### Step 2: Request an access controlled Model from the Model Developer
+#### Step 3: Request an access controlled Model from the Model Developer
 This example uses scp to share data between the ovsa_runtime and ovsa_dev Guest VMs on the same Host Machine.

 1. Communicate your need for a model to the Model Developer. The Developer will ask you to provide the certificate from your key store and other information. This example uses the length of time the model needs to be available. 
-2. Generate an artefact file to provide to the Developer:
+2. The model user's certificate needs to be provided to the Developer:
   ```sh
   cd $OVSA_RUNTIME_ARTEFACTS
   scp custkeystore.csr.crt username@<developer-vm-ip-address>:/<username-home-directory>/OVSA/artefacts
   ```
+#### Step 4: Receive and load the access controlled model into the OpenVINO™ Model Server
+1. Receive the model as files named:
+   * face_detection_model.dat
+   * face_detection_model.lic
+   ```sh
+   cd $OVSA_RUNTIME_ARTEFACTS
+   scp username@<developer-vm-ip-address>:/<username-home-directory>/OVSA/artefacts/face_detection_model.dat .
+   scp username@<developer-vm-ip-address>:/<username-home-directory>/OVSA/artefacts/face_detection_model.lic .
+   ```

-#### Step 3: Receive and load the access controlled model into the OpenVINO™ Model Server
-1. Receive the model as files named
-   * `face_detection_model.dat`
-   * `face_detection_model.lic`
 2. Prepare the environment:
   ```sh
   cd $OVSA_RUNTIME_ARTEFACTS/..
@ -776,14 +774,14 @@ This example uses scp to share data between the ovsa_runtime and ovsa_dev Guest
   }
   ```

-#### Step 4: Start the NGINX Model Server
+#### Step 5: Start the NGINX Model Server
 The NGINX Model Server publishes the access controlled model.
   ```sh
   ./start_secure_ovsa_model_server.sh
   ```
 For information about the NGINX interface, see https://github.com/openvinotoolkit/model_server/blob/main/extras/nginx-mtls-auth/README.md

-#### Step 5: Prepare to run Inference
+#### Step 6: Prepare to run Inference

 1. Log on to the Guest VM from another terminal.

@ -798,7 +796,7 @@ For information about the NGINX interface, see https://github.com/openvinotoolki
   ```
 3. Copy the `face_detection.py` from the example_client in `/opt/ovsa/example_client`
   ```sh
-   cd /home/intel/OVSA/ovms
+   cd ~/OVSA/ovms
   cp /opt/ovsa/example_client/* .
   ```
 4. Copy the sample images for inferencing. An image directory is created that includes a sample image for inferencing.
@ -806,11 +804,11 @@ For information about the NGINX interface, see https://github.com/openvinotoolki
   curl --create-dirs https://raw.githubusercontent.com/openvinotoolkit/model_server/master/example_client/images/people/people1.jpeg -o images/people1.jpeg
   ```

-#### Step 6: Run Inference
+#### Step 7: Run Inference

 Run the `face_detection.py` script:
 ```sh
-python3 face_detection.py --grpc_port 3335 --batch_size 1 --width 300 --height 300 --input_images_dir images --output_dir results --tls --server_cert server.pem --client_cert client.pem --client_key client.key --model_name controlled-access-model
+python3 face_detection.py --grpc_port 3335 --batch_size 1 --width 300 --height 300 --input_images_dir images --output_dir results --tls --server_cert /var/OVSA/Modelserver/server.pem --client_cert /var/OVSA/Modelserver/client.pem --client_key /var/OVSA/Modelserver/client.key --model_name controlled-access-model
 ```	

 ## Summary
--- a/docs/template_plugin/tests/functional/op_reference/not_equal.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/not_equal.cpp
@ -0,0 +1,123 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ie_core.hpp>
+#include <ie_ngraph_utils.hpp>
+#include <ngraph/ngraph.hpp>
+#include <shared_test_classes/base/layer_test_utils.hpp>
+
+#include "comparison.hpp"
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using ComparisonTypes = ngraph::helpers::ComparisonTypes;
+
+
+namespace reference_tests {
+namespace ComparisonOpsRefTestDefinitions {
+namespace {
+
+template <element::Type_t IN_ET>
+std::vector<RefComparisonParams> generateComparisonParams(const element::Type& type) {
+    using T = typename element_type_traits<IN_ET>::value_type;
+    std::vector<RefComparisonParams> compParams {
+        // 1D // 2D // 3D // 4D
+        Builder {}
+            .compType(ComparisonTypes::NOT_EQUAL)
+            .input1({{2, 2}, type, std::vector<T> {1, 0, 10, 255}})
+            .input2({{2, 2}, type, std::vector<T> {1, 0, 10, 255}})
+            .expected({{2, 2}, element::boolean, std::vector<char> {0, 0, 0, 0}}),
+        Builder {}
+            .compType(ComparisonTypes::NOT_EQUAL)
+            .input1({{2, 3}, type, std::vector<T> {0, 15, 45, 10, 5, 10}})
+            .input2({{2, 3}, type, std::vector<T> {1, 15, 5, 10, 50, 10}})
+            .expected({{2, 3}, element::boolean, std::vector<char> {1, 0, 1, 0, 1, 0}}),
+        Builder {}
+            .compType(ComparisonTypes::NOT_EQUAL)
+            .input1({{1}, type, std::vector<T> {20}})
+            .input2({{1}, type, std::vector<T> {10}})
+            .expected({{1}, element::boolean, std::vector<char> {1}}),
+        Builder {}
+            .compType(ComparisonTypes::NOT_EQUAL)
+            .input1({{2, 4}, type, std::vector<T> {0, 12, 23, 0, 1, 5, 12, 8}})
+            .input2({{2, 4}, type, std::vector<T> {0, 12, 23, 0, 10, 5, 11, 8}})
+            .expected({{2, 4}, element::boolean, std::vector<char> {0, 0, 0, 0, 1, 0, 1, 0}}),
+        Builder {}
+                .compType(ComparisonTypes::NOT_EQUAL)
+                .input1({{3, 1, 2}, type, std::vector<T> {2, 7, 4, 7, 3, 7}})
+                .input2({{1, 2, 1}, type, std::vector<T> {7, 7}})
+                .expected({{3, 2, 2}, element::boolean, std::vector<char> {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}}),
+        Builder {}
+            .compType(ComparisonTypes::NOT_EQUAL)
+            .input1({{2, 1, 2, 1}, type, std::vector<T> {1, 2, 1, 4}})
+            .input2({{1, 2, 1}, type, std::vector<T> {1, 1}})
+            .expected({{2, 1, 2, 1}, element::boolean, std::vector<char> {0, 1, 0, 1}})};
+    return compParams;
+}
+
+std::vector<RefComparisonParams> generateComparisonCombinedParams() {
+    const std::vector<std::vector<RefComparisonParams>> compTypeParams {
+        generateComparisonParams<element::Type_t::f32>(element::f32),
+        generateComparisonParams<element::Type_t::f16>(element::f16),
+        generateComparisonParams<element::Type_t::i32>(element::i32),
+        generateComparisonParams<element::Type_t::u32>(element::u32),
+        generateComparisonParams<element::Type_t::u8>(element::boolean)};
+    std::vector<RefComparisonParams> combinedParams;
+
+    for (const auto& params : compTypeParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest,
+                         ::testing::ValuesIn(generateComparisonCombinedParams()),
+                         ReferenceComparisonLayerTest::getTestCaseName);
+
+template <element::Type_t IN_ET>
+std::vector<RefComparisonParams> generateNumericParams(const element::Type& type) {
+    using T = typename element_type_traits<IN_ET>::value_type;
+    std::vector<RefComparisonParams> compParams {
+        Builder {}
+            .compType(ComparisonTypes::NOT_EQUAL)
+            .input1({{4}, type, std::vector<T> {-2.5f, 25.5f, 2.25f, NAN}})
+            .input2({{4}, type, std::vector<T> {10.0f, 5.0f, 2.25f, 10.0f}})
+            .expected({{4}, element::boolean, std::vector<char> {1, 1, 0, 1}}),
+        Builder {}
+            .compType(ComparisonTypes::NOT_EQUAL)
+            .input1({{2, 3}, type, std::vector<T> {0.0f, NAN, NAN, 1.0f, 21.0f, -INFINITY}})
+            .input2({{2, 3}, type, std::vector<T> {1.0f, NAN, 23.0f, 1.0f, 19.0f, 21.0f}})
+            .expected({{2, 3}, element::boolean, std::vector<char> {1, 1, 1, 0, 1, 1}}),
+        Builder {}
+            .compType(ComparisonTypes::NOT_EQUAL)
+            .input1({{1}, type, std::vector<T> {INFINITY}})
+            .input2({{1}, type, std::vector<T> {INFINITY}})
+            .expected({{1}, element::boolean, std::vector<char> {0}}),
+        Builder {}
+            .compType(ComparisonTypes::NOT_EQUAL)
+            .input1({{5}, type, std::vector<T> {-2.5f, 25.5f, 2.25f, INFINITY, 6.0f}})
+            .input2({{5}, type, std::vector<T> {10.0f, 5.0f, 2.25f, 10.0f, -INFINITY}})
+            .expected({{5}, element::boolean, std::vector<char> {1, 1, 0, 1, 1}})};
+    return compParams;
+}
+
+std::vector<RefComparisonParams> generateNumericCombinedParams() {
+    const std::vector<std::vector<RefComparisonParams>> compTypeParams {
+        generateNumericParams<element::Type_t::f16>(element::f16),
+        generateNumericParams<element::Type_t::f32>(element::f32)};
+    std::vector<RefComparisonParams> combinedParams;
+
+    for (const auto& params : compTypeParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_Numeric_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateNumericCombinedParams()),
+                         ReferenceComparisonLayerTest::getTestCaseName);
+} // namespace
+} // namespace ComparisonOpsRefTestDefinitions
+} // namespace reference_tests
--- a/docs/template_plugin/tests/functional/op_reference/random_uniform.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/random_uniform.cpp
@ -0,0 +1,205 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+#include "base_reference_test.hpp"
+#include "ngraph/opsets/opset8.hpp"
+#include "ngraph/util.hpp"
+
+using namespace ngraph;
+
+namespace reference_tests {
+namespace {
+
+struct RandomUniformParams {
+    RandomUniformParams(const std::vector<int64_t>& paramOutShape,
+                        const Tensor& paramMinValue,
+                        const Tensor& paramMaxValue,
+                        ngraph::element::Type paramOutType,
+                        int64_t paramGlobalSeed,
+                        int64_t paramOpSeed,
+                        const Tensor& paramExpected,
+                        const std::string& test_name)
+        : out_shape(paramOutShape),
+          min_val(paramMinValue),
+          max_val(paramMaxValue),
+          out_type(paramOutType),
+          global_seed(paramGlobalSeed),
+          op_seed(paramOpSeed),
+          expected(paramExpected),
+          test_case_name(test_name) {}
+    std::vector<int64_t> out_shape;
+    Tensor min_val;
+    Tensor max_val;
+    ngraph::element::Type out_type;
+    int64_t global_seed;
+    int64_t op_seed;
+    Tensor expected;
+    std::string test_case_name;
+};
+
+class ReferenceRandomUniformLayerTest : public testing::TestWithParam<RandomUniformParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params.out_shape,
+                                  params.min_val,
+                                  params.max_val,
+                                  params.out_type,
+                                  params.global_seed,
+                                  params.op_seed);
+        inputData = {params.min_val.data, params.max_val.data};
+        refOutData = {params.expected.data};
+    }
+    static std::string getTestCaseName(const testing::TestParamInfo<RandomUniformParams>& obj) {
+        auto param = obj.param;
+        return param.test_case_name;
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const std::vector<int64_t>& out_shape,
+                                                    const Tensor& min_val,
+                                                    const Tensor& max_val,
+                                                    const ngraph::element::Type& out_type,
+                                                    int64_t global_seed,
+                                                    int64_t op_seed) {
+        const auto min_val_param = std::make_shared<opset8::Parameter>(min_val.type, min_val.shape);
+        const auto max_val_param = std::make_shared<opset8::Parameter>(max_val.type, max_val.shape);
+        auto out_shape_ = std::make_shared<opset8::Constant>(element::i64, Shape{out_shape.size()}, out_shape);
+
+        return std::make_shared<Function>(NodeVector{std::make_shared<opset8::RandomUniform>(out_shape_,
+                                                                                             min_val_param,
+                                                                                             max_val_param,
+                                                                                             out_type,
+                                                                                             global_seed,
+                                                                                             op_seed)},
+                                          ParameterVector{min_val_param, max_val_param});
+    }
+};
+
+TEST_P(ReferenceRandomUniformLayerTest, RandomUniformWithHardcodedRefs) {
+    Exec();
+}
+
+}  // namespace
+
+// Reference values for the following tests are obtained from single layer TensorFlow model with tf.random.uniform().
+INSTANTIATE_TEST_SUITE_P(
+    smoke_RandomUniform_With_Hardcoded_Refs,
+    ReferenceRandomUniformLayerTest,
+    ::testing::Values(
+        RandomUniformParams(std::vector<int64_t>{3, 2, 4},
+                            Tensor{{1}, element::f32, std::vector<float>{0}},
+                            Tensor{{1}, element::f32, std::vector<float>{1}},
+                            element::Type_t::f32,
+                            150,
+                            10,
+                            Tensor{{3, 2, 4},
+                                   element::f32,
+                                   std::vector<float>{0.70112360, 0.30539632, 0.93931055, 0.94560349, 0.11694777,
+                                                      0.50770056, 0.51971972, 0.22727466, 0.99137402, 0.35519040,
+                                                      0.82692313, 0.59864855, 0.31364107, 0.57481313, 0.41399086,
+                                                      0.96308255, 0.37140799, 0.85253167, 0.09358585, 0.08200955,
+                                                      0.23655081, 0.81056309, 0.74226606, 0.76106691}},
+                            "float32_default_min_max"),
+        RandomUniformParams(std::vector<int64_t>{3, 2, 4},
+                            Tensor{{1}, element::f16, std::vector<float16>{0}},
+                            Tensor{{1}, element::f16, std::vector<float16>{1}},
+                            element::Type_t::f16,
+                            150,
+                            10,
+                            Tensor{{3, 2, 4},
+                                   element::f16,
+                                   std::vector<float16>{0.60449219, 0.80664062, 0.83203125, 0.38378906, 0.03613281,
+                                                        0.08300781, 0.54394531, 0.83398438, 0.33593750, 0.71972656,
+                                                        0.15429688, 0.12890625, 0.34765625, 0.86914062, 0.41308594,
+                                                        0.57226562, 0.57421875, 0.93945312, 0.65527344, 0.82226562,
+                                                        0.82421875, 0.13281250, 0.64355469, 0.66015625}},
+                            "float16_default_min_max"),
+        RandomUniformParams(std::vector<int64_t>{3, 2, 4},
+                            Tensor{{1}, element::f32, std::vector<float>{-650}},
+                            Tensor{{1}, element::f32, std::vector<float>{450}},
+                            element::Type_t::f32,
+                            150,
+                            10,
+                            Tensor{{3, 2, 4},
+                                   element::f32,
+                                   std::vector<float>{121.23596191,  -314.06405640, 383.24157715,  390.16381836,
+                                                      -521.35742188, -91.52935791,  -78.30828857,  -399.99786377,
+                                                      440.51147461,  -259.29055786, 259.61541748,  8.51342773,
+                                                      -304.99481201, -17.70556641,  -194.61004639, 409.39074707,
+                                                      -241.45120239, 287.78485107,  -547.05554199, -559.78948975,
+                                                      -389.79409790, 241.61938477,  166.49267578,  187.17358398}},
+                            "float32_non_default_min_max"),
+        RandomUniformParams(std::vector<int64_t>{3, 2, 4},
+                            Tensor{{1}, element::f16, std::vector<float16>{-1.5}},
+                            Tensor{{1}, element::f16, std::vector<float16>{-1.0}},
+                            element::Type_t::f16,
+                            150,
+                            10,
+                            Tensor{{3, 2, 4},
+                                   element::f16,
+                                   std::vector<float16>{-1.19726562, -1.09667969, -1.08398438, -1.30859375, -1.48242188,
+                                                        -1.45898438, -1.22851562, -1.08300781, -1.33203125, -1.14062500,
+                                                        -1.42285156, -1.43554688, -1.32617188, -1.06542969, -1.29296875,
+                                                        -1.21386719, -1.21289062, -1.03027344, -1.17187500, -1.08886719,
+                                                        -1.08789062, -1.43359375, -1.17773438, -1.16992188}},
+                            "float16_non_default_min_max"),
+        RandomUniformParams(std::vector<int64_t>{2, 3, 4},
+                            Tensor{{1}, element::i32, std::vector<int32_t>{-100}},
+                            Tensor{{1}, element::i32, std::vector<int32_t>{50}},
+                            element::Type_t::i32,
+                            100,
+                            350,
+                            Tensor{{2, 3, 4},
+                                   element::i32,
+                                   std::vector<int32_t>{
+                                       22, -56, -33, -89, -98, -33, -3,  -48, -82, 5,  -66, 21,
+                                       29, -42, -73, -37, 3,   36,  -35, 20,  -11, -8, -78, 47,
+                                   }},
+                            "int32"),
+        RandomUniformParams(std::vector<int64_t>{5, 4, 3},
+                            Tensor{{1}, element::i64, std::vector<int64_t>{-2600}},
+                            Tensor{{1}, element::i64, std::vector<int64_t>{3700}},
+                            element::Type_t::i64,
+                            755,
+                            951,
+                            Tensor{{5, 4, 3},
+                                   element::i64,
+                                   std::vector<int64_t>{
+                                       2116, -1581, 2559,  -339,  -1660, 519,   90,   2027,  -210, 3330, 1831,  -1737,
+                                       2683, 2661,  3473,  1220,  3534,  -2384, 2199, 1935,  499,  2861, 2743,  3223,
+                                       -531, -836,  -65,   3435,  632,   1765,  2613, 1891,  1698, 3069, 169,   -792,
+                                       -32,  2976,  -1552, -2588, 3327,  -1756, 2637, -1084, 3567, -778, -1465, 2967,
+                                       1242, 2672,  -1585, -2271, 3536,  -1502, 400,  2241,  3126, 908,  1073,  -2110}},
+                            "int64"),
+        RandomUniformParams(std::vector<int64_t>{7, 3},
+                            Tensor{{1}, element::bf16, std::vector<bfloat16>{0}},
+                            Tensor{{1}, element::bf16, std::vector<bfloat16>{1}},
+                            element::Type_t::bf16,
+                            4978,
+                            5164,
+                            Tensor{{7, 3},
+                                   element::bf16,
+                                   std::vector<bfloat16>{0.8984375, 0.84375,   0.1640625, 0.1875,   0.46875,  0.6875,
+                                                         0.5234375, 0.3046875, 0.9140625, 0.453125, 0.953125, 0.328125,
+                                                         0.359375,  0.1875,    0.9453125, 0.390625, 0.21875,  0.9921875,
+                                                         0.8203125, 0.453125,  0.875}},
+                            "bfloat16_default_min_max"),
+        RandomUniformParams(std::vector<int64_t>{7, 3},
+                            Tensor{{1}, element::bf16, std::vector<bfloat16>{-150}},
+                            Tensor{{1}, element::bf16, std::vector<bfloat16>{200}},
+                            element::Type_t::bf16,
+                            4978,
+                            5164,
+                            Tensor{{7, 3},
+                                   element::bf16,
+                                   std::vector<bfloat16>{164, 146, -92.5, -84.5, 14,  90,    33,  -43.5, 170, 8,  182,
+                                                         -35, -24, -84.5, 180,   -14, -73.5, 198, 138,   8,   156}},
+                            "bfloat16_non_default_min_max")),
+    ReferenceRandomUniformLayerTest::getTestCaseName);
+}  // namespace reference_tests
--- a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/infer_request_dynamic.cpp
+++ b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/infer_request_dynamic.cpp
@ -21,10 +21,11 @@ const std::vector<std::map<std::string, std::string>> configs = {

 INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, InferRequestDynamicTests,
                        ::testing::Combine(
-                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(ngraph::builder::subgraph::makeSplitConvConcat()),
+                                ::testing::Values(std::vector<std::pair<std::vector<size_t>, std::vector<size_t>>>{{{1, 4, 20, 20}, {1, 10, 18, 18}},
+                                                                                                                   {{2, 4, 20, 20}, {2, 10, 18, 18}}}),
                                ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE),
                                ::testing::ValuesIn(configs)),
                        InferRequestDynamicTests::getTestCaseName);

 }  // namespace
-
--- a/inference-engine/CMakeLists.txt
+++ b/inference-engine/CMakeLists.txt
@ -12,7 +12,6 @@ if(ENABLE_PYTHON)
    add_subdirectory(ie_bridges/python)
 endif()

-add_subdirectory(tools)
 add_subdirectory(samples)

 openvino_developer_export_targets(COMPONENT openvino_common TARGETS format_reader ie_samples_utils)
--- a/inference-engine/ie_bridges/c/samples/hello_classification/README.md
+++ b/inference-engine/ie_bridges/c/samples/hello_classification/README.md
@ -48,7 +48,7 @@ To run the sample, you need specify a model and image:
 > - The sample accepts models in ONNX format (\*.onnx) that do not require preprocessing.

 ### Example
-1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader_README):
+1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader):
 ```
 python <path_to_omz_tools>/downloader.py --name alexnet
 ```
--- a/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md
+++ b/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/README.md
@ -63,7 +63,7 @@ ffmpeg -i cat.jpg -pix_fmt nv12 cat.yuv
 > - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.

 ### Example
-1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader_README):
+1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader):
 ```
 python <path_to_omz_tools>/downloader.py --name alexnet
 ```
--- a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md
+++ b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md
@ -77,7 +77,7 @@ Options:
 > - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.

 ### Example
-1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader_README):
+1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader):
 ```
 python <path_to_omz_tools>/downloader.py --name person-detection-retail-0013
 ```
--- a/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md
+++ b/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md
@ -80,7 +80,7 @@ To run the sample, you need specify a model and image:
 > - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.

 ### Example
-1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader_README):
+1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader):
 ```
 python <path_to_omz_tools>/downloader.py --name alexnet
 ```
--- a/inference-engine/ie_bridges/python/sample/hello_classification/README.md
+++ b/inference-engine/ie_bridges/python/sample/hello_classification/README.md
@ -69,7 +69,7 @@ To run the sample, you need specify a model and image:
 > - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.

 ### Example
-1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader_README):
+1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader):
 ```
 python <path_to_omz_tools>/downloader.py --name alexnet
 ```
--- a/inference-engine/ie_bridges/python/sample/hello_reshape_ssd/README.md
+++ b/inference-engine/ie_bridges/python/sample/hello_reshape_ssd/README.md
@ -77,7 +77,7 @@ To run the sample, you need specify a model and image:
 > - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.

 ### Example
-1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader_README):
+1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader):
 ```
 python <path_to_omz_tools>/downloader.py --name mobilenet-ssd
 ```
--- a/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md
+++ b/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md
@ -79,7 +79,7 @@ To run the sample, you need specify a model and image:
 > - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.

 ### Example
-1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader_README):
+1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader):
 ```
 python <path_to_omz_tools>/downloader.py --name mobilenet-ssd
 ```
--- a/inference-engine/ie_bridges/python/sample/speech_sample/README.md
+++ b/inference-engine/ie_bridges/python/sample/speech_sample/README.md
@ -80,7 +80,8 @@ Usage message:
 usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT       
                        [-o OUTPUT] [-r REFERENCE] [-d DEVICE]
                        [-bs BATCH_SIZE] [-qb QUANTIZATION_BITS]
-                        [-wg EXPORT_GNA_MODEL] [-iname INPUT_LAYERS]
+                        [-sf SCALE_FACTOR] [-wg EXPORT_GNA_MODEL] [-pc]       
+                        [-a {CORE,ATOM}] [-iname INPUT_LAYERS]
                        [-oname OUTPUT_LAYERS]

 optional arguments:
@ -94,9 +95,10 @@ optional arguments:
 Options:
  -h, --help            Show this help message and exit.
  -i INPUT, --input INPUT
-                        Required. Path to an input file (.ark or .npz).
+                        Required. Path to an input file (.ark or .npz).       
  -o OUTPUT, --output OUTPUT
-                        Optional. Output file name to save inference results (.ark or .npz).
+                        Optional. Output file name to save inference results  
+                        (.ark or .npz).
  -r REFERENCE, --reference REFERENCE
                        Optional. Read reference score file and compare
                        scores.
@ -113,9 +115,18 @@ Options:
  -qb QUANTIZATION_BITS, --quantization_bits QUANTIZATION_BITS
                        Optional. Weight bits for quantization: 8 or 16
                        (default 16).
+  -sf SCALE_FACTOR, --scale_factor SCALE_FACTOR
+                        Optional. The user-specified input scale factor for
+                        quantization.
  -wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL
                        Optional. Write GNA model to file using path/filename
                        provided.
+  -pc, --performance_counter
+                        Optional. Enables performance report (specify -a to
+                        ensure arch accurate results).
+  -a {CORE,ATOM}, --arch {CORE,ATOM}
+                        Optional. Specify architecture. CORE, ATOM with the
+                        combination of -pc.
  -iname INPUT_LAYERS, --input_layers INPUT_LAYERS
                        Optional. Layer names for input blobs. The names are
                        separated with ",". Allows to change the order of
--- a/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py
+++ b/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py
@ -28,10 +28,16 @@ def parse_args() -> argparse.Namespace:
    args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).')
    args.add_argument('-qb', '--quantization_bits', default=16, type=int,
                      help='Optional. Weight bits for quantization: 8 or 16 (default 16).')
+    args.add_argument('-sf', '--scale_factor', type=float,
+                      help='Optional. The user-specified input scale factor for quantization.')
    args.add_argument('-wg', '--export_gna_model', type=str,
                      help='Optional. Write GNA model to file using path/filename provided.')
    args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS)
    args.add_argument('-we_gen', '--embedded_gna_configuration', default='GNA1', type=str, help=argparse.SUPPRESS)
+    args.add_argument('-pc', '--performance_counter', action='store_true',
+                      help='Optional. Enables performance report (specify -a to ensure arch accurate results).')
+    args.add_argument('-a', '--arch', default='CORE', type=str.upper, choices=['CORE', 'ATOM'],
+                      help='Optional. Specify architecture. CORE, ATOM with the combination of -pc.')
    args.add_argument('-iname', '--input_layers', type=str,
                      help='Optional. Layer names for input blobs. The names are separated with ",". '
                      'Allows to change the order of input layers for -i flag. Example: Input1,Input2')
--- a/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py
+++ b/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py
@ -2,15 +2,21 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
+import argparse
 import logging as log
 import re
 import sys
 from timeit import default_timer
+from typing import Union

 import numpy as np
 from arg_parser import parse_args
 from file_options import read_utterance_file, write_utterance_file
-from openvino.inference_engine import ExecutableNetwork, IECore
+from openvino.inference_engine import ExecutableNetwork, IECore, IENetwork
+
+# Operating Frequency for GNA HW devices for Core and Atom architecture
+GNA_CORE_FREQUENCY = 400
+GNA_ATOM_FREQUENCY = 200


 def get_scale_factor(matrix: np.ndarray) -> float:
@ -75,6 +81,28 @@ def compare_with_reference(result: np.ndarray, reference: np.ndarray):
    log.info(f'stdev error: {stdev_error:.7f}')


+def get_input_layer_list(net: Union[IENetwork, ExecutableNetwork], args: argparse.Namespace) -> list:
+    """Get a list of input layer names"""
+    return re.split(', |,', args.input_layers) if args.input_layers else [next(iter(net.input_info))]
+
+
+def get_output_layer_list(net: Union[IENetwork, ExecutableNetwork],
+                          args: argparse.Namespace, with_ports: bool) -> list:
+    """Get a list of output layer names"""
+    if args.output_layers:
+        output_name_port = [output.split(':') for output in re.split(', |,', args.output_layers)]
+        if with_ports:
+            try:
+                return [(blob_name, int(port)) for blob_name, port in output_name_port]
+            except ValueError:
+                log.error('Incorrect value for -oname/--output_layers option, please specify a port for output layer.')
+                sys.exit(-4)
+        else:
+            return [blob_name for blob_name, _ in output_name_port]
+    else:
+        return [list(net.outputs.keys())[-1]]
+
+
 def main():
    log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout)
    args = parse_args()
@ -91,25 +119,13 @@ def main():

 # ---------------------------Step 3. Configure input & output----------------------------------------------------------
        log.info('Configuring input and output blobs')
-        # Get names of input and output blobs
-        if args.input_layers:
-            input_blobs = re.split(', |,', args.input_layers)
-        else:
-            input_blobs = [next(iter(net.input_info))]
-
+        # Mark layers from args.output_layers as outputs
        if args.output_layers:
-            output_name_port = [output.split(':') for output in re.split(', |,', args.output_layers)]
-            try:
-                output_name_port = [(blob_name, int(port)) for blob_name, port in output_name_port]
-            except ValueError:
-                log.error('Output Parameter does not have a port.')
-                sys.exit(-4)
+            net.add_outputs(get_output_layer_list(net, args, with_ports=True))

-            net.add_outputs(output_name_port)
-
-            output_blobs = [blob_name for blob_name, port in output_name_port]
-        else:
-            output_blobs = [list(net.outputs.keys())[-1]]
+        # Get names of input and output blobs
+        input_blobs = get_input_layer_list(net, args)
+        output_blobs = get_output_layer_list(net, args, with_ports=False)

        # Set input and output precision manually
        for blob_name in input_blobs:
@ -131,21 +147,26 @@ def main():
        plugin_config['GNA_DEVICE_MODE'] = gna_device_mode
        plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}'

-        # Get a GNA scale factor
+        # Set a GNA scale factor
        if args.import_gna_model:
            log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
+        elif args.scale_factor:
+            log.info(f'Using scale factor of {args.scale_factor:.7f} specified by user.')
+            plugin_config['GNA_SCALE_FACTOR'] = str(args.scale_factor)
        else:
            utterances = read_utterance_file(args.input.split(',')[0])
            key = sorted(utterances)[0]
            scale_factor = get_scale_factor(utterances[key])
            log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.')
-
            plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor)

        if args.export_embedded_gna_model:
            plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
            plugin_config['GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration

+        if args.performance_counter:
+            plugin_config['PERF_COUNT'] = 'YES'
+
    device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[0]

    log.info('Loading the model to the plugin')
@ -153,8 +174,8 @@ def main():
        exec_net = ie.load_network(net, device_str, plugin_config)
    else:
        exec_net = ie.import_network(args.import_gna_model, device_str, plugin_config)
-        input_blobs = [next(iter(exec_net.input_info))]
-        output_blobs = [list(exec_net.outputs.keys())[-1]]
+        input_blobs = get_input_layer_list(exec_net, args)
+        output_blobs = get_output_layer_list(exec_net, args, with_ports=False)

    if args.input:
        input_files = re.split(', |,', args.input)
@ -208,6 +229,7 @@ def main():
    log.info('Starting inference in synchronous mode')
    results = {blob_name: {} for blob_name in output_blobs}
    infer_times = []
+    perf_counters = []

    for key in sorted(input_data):
        start_infer_time = default_timer()
@ -223,6 +245,7 @@ def main():
            results[blob_name][key] = result[blob_name]

        infer_times.append(default_timer() - start_infer_time)
+        perf_counters.append(exec_net.requests[0].get_perf_counts())

 # ---------------------------Step 8. Process output--------------------------------------------------------------------
    for blob_name in output_blobs:
@ -235,6 +258,26 @@ def main():
            if args.reference:
                compare_with_reference(results[blob_name][key], references[blob_name][key])

+            if args.performance_counter:
+                if 'GNA' in args.device:
+                    pc = perf_counters[i]
+                    total_cycles = int(pc['1.1 Total scoring time in HW']['real_time'])
+                    stall_cycles = int(pc['1.2 Stall scoring time in HW']['real_time'])
+                    active_cycles = total_cycles - stall_cycles
+                    frequency = 10**6
+                    if args.arch == 'CORE':
+                        frequency *= GNA_CORE_FREQUENCY
+                    else:
+                        frequency *= GNA_ATOM_FREQUENCY
+                    total_inference_time = total_cycles / frequency
+                    active_time = active_cycles / frequency
+                    stall_time = stall_cycles / frequency
+                    log.info('')
+                    log.info('Performance Statistics of GNA Hardware')
+                    log.info(f'   Total Inference Time: {(total_inference_time * 1000):.4f} ms')
+                    log.info(f'   Active Time: {(active_time * 1000):.4f} ms')
+                    log.info(f'   Stall Time:  {(stall_time * 1000):.4f} ms')
+
            log.info('')

    log.info(f'Total sample time: {sum(infer_times) * 1000:.2f}ms')
--- a/inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md
+++ b/inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md
@ -91,7 +91,7 @@ To run the sample, you need specify a model and image:
 > - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.

 ### Example
-1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader_README):
+1. Download a pre-trained model using [Model Downloader](@ref omz_tools_downloader):
 ```
 python <path_to_omz_tools>/downloader.py --name fast-neural-style-mosaic-onnx
 ```
--- a/inference-engine/ie_bridges/python/src/openvino/init.py
+++ b/inference-engine/ie_bridges/python/src/openvino/init.py
@ -1,3 +1,4 @@
 # -*- coding: utf-8 -*-
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
+__path__ = __import__('pkgutil').extend_path(__path__, __name__)  # type: ignore  # mypy issue #1422
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd
@ -39,7 +39,7 @@ cdef class InferRequest:
    cpdef get_perf_counts(self)
    cdef void user_callback(self, int status) with gil
    cdef public:
-        _inputs_list, _outputs_list, _py_callback, _py_data, _py_callback_used, _py_callback_called, _user_blobs
+        _inputs_list, _outputs_list, _py_callback, _py_data, _py_callback_used, _py_callback_called, _user_blobs, _inputs_is_dynamic

 cdef class IENetwork:
    cdef C.IENetwork impl
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
@ -29,7 +29,6 @@ from .constants import WaitMode, StatusCode, MeanVariant, layout_str_to_enum, fo

 import numpy as np

-
 warnings.filterwarnings(action="module", category=DeprecationWarning)

 cdef extern from "<utility>" namespace "std" nogil:
@ -53,6 +52,11 @@ cdef c_map_to_dict(map[string, string] c_map):
    return py_dict


+cdef expand_dims_to_corresponding_layout(shape, layout):
+    single_axes = [1] * (len(layout) - len(shape))
+    return single_axes + list(shape)
+
+
 def get_version():
    return C.get_version().decode()

@ -271,6 +275,10 @@ cdef class Blob:
        tensor_desc = TensorDesc(precision, dims, layout_int_to_str_map[layout])
        return tensor_desc

+    def set_shape(self, new_shape):
+        self._initial_shape = new_shape
+        deref(self._ptr).setShape(new_shape)
+
 ## This class represents an Inference Engine entity and allows you to manipulate with plugins using unified interfaces.
 cdef class IECore:
    ## Class constructor
@ -304,7 +312,7 @@ cdef class IECore:
        return versions

    ## Reads a network from Intermediate Representation (IR) or ONNX formats and creates an `IENetwork`.
-    #  @param model: A `.xml`, `.onnx`or `.prototxt` model file or string with IR.
+    #  @param model: A `.xml` or `.onnx` model file or string with IR.
    #  @param weights: A `.bin` file of the IR. Depending on `init_from_buffer` value, can be a string path or
    #                  bytes with file content.
    #  @param init_from_buffer: Defines the way of how `model` and `weights` attributes are interpreted.
@ -815,6 +823,14 @@ cdef class DataPtr:
    def initialized(self):
        return deref(self._ptr).isInitialized()

+    @property
+    def is_dynamic(self):
+        return deref(self._ptr).isDynamic()
+
+    ## get capsule with ngraph::PartialShape
+    def _get_partial_shape_capsule(self):
+        return C.getPartialShape_capsule(self._ptr)
+

 ## This class is the layer constant data representation. Provides same interface as DataPtr object except properties setters
 cdef class CDataPtr:
@ -843,6 +859,14 @@ cdef class CDataPtr:
    def initialized(self):
        return deref(self._ptr).isInitialized()

+    @property
+    def is_dynamic(self):
+        return deref(self._ptr).isDynamic()
+
+    ## get capsule with ngraph::PartialShape
+    def _get_partial_shape_capsule(self):
+        return C.getPartialShape_capsule(self._ptr)
+

 ## This class represents a network instance loaded to plugin and ready for inference.
 cdef class ExecutableNetwork:
@ -912,6 +936,8 @@ cdef class ExecutableNetwork:
                    infer_request.impl = &(deref(self.impl).infer_requests[i])
                infer_request._inputs_list = list(self.input_info.keys())
                infer_request._outputs_list = list(self.outputs.keys())
+                for input_name in infer_request._inputs_list:
+                    infer_request._inputs_is_dynamic[input_name] = self.input_info[input_name].input_data.is_dynamic
                self._infer_requests.append(infer_request)

        if len(self._infer_requests) != c_infer_requests_size:
@ -1048,6 +1074,7 @@ cdef class InferRequest:
        self._py_callback_used = False
        self._py_callback_called = threading.Event()
        self._py_data = None
+        self._inputs_is_dynamic = {}

    cdef void user_callback(self, int status) with gil:
        if self._py_callback:
@ -1308,6 +1335,9 @@ cdef class InferRequest:
    def _fill_inputs(self, inputs):
        for k, v in inputs.items():
            assert k in self._inputs_list, f"No input with name {k} found in network"
+            if self._inputs_is_dynamic[k]:
+                shape = expand_dims_to_corresponding_layout(v.shape, self.input_blobs[k].tensor_desc.layout)
+                self.input_blobs[k].set_shape(shape)
            if self.input_blobs[k].tensor_desc.precision == "FP16":
                self.input_blobs[k].buffer[:] = v.view(dtype=np.int16)
            else:
@ -1452,15 +1482,25 @@ cdef class IENetwork:
    #  net.reshape({input_layer: (n, c, h*2, w*2)})
    #  ```
    def reshape(self, input_shapes: dict):
-        cdef map[string, vector[size_t]] c_input_shapes
-        cdef vector[size_t] c_shape
+        cdef map[string, vector[vector[int64_t]]] c_input_shapes
+        cdef vector[vector[int64_t]] c_shape
+        cdef vector[int64_t] dim
        net_inputs = self.input_info
        for input, shape in input_shapes.items():
            c_shape = []
            if input not in net_inputs:
                raise AttributeError(f"Specified '{input}' layer not in network inputs '{net_inputs}'! ")
            for v in shape:
-                c_shape.push_back(v)
+                if isinstance(v, list) or isinstance(v, tuple):
+                    if len(v) < 1 or len(v) > 2:
+                        raise ValueError(f"Incorrect PartialShape dimension definition '{v}' "
+                                         f"in shape '{shape}', expected one or two values for a dimension! ")
+                    for d in v:
+                        dim.push_back(d)
+                else:
+                    dim.push_back(v)
+                c_shape.push_back(dim)
+                dim.clear()
            c_input_shapes[input.encode()] = c_shape
        self.impl.reshape(c_input_shapes)

--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
@ -4,6 +4,8 @@

 #include "ie_api_impl.hpp"

+#include <ngraph/partial_shape.hpp>
+
 #include "ie_iinfer_request.hpp"
 #include "ie_plugin_config.hpp"

@ -206,6 +208,24 @@ InferenceEnginePython::IENetwork InferenceEnginePython::read_network(std::string
    return InferenceEnginePython::IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net));
 }

+PyObject* InferenceEnginePython::getPartialShape_capsule(InferenceEngine::CDataPtr data) {
+    const char* py_capsule_name = "ngraph_partial_shape";
+    auto ngraph_pShape_ptr = std::make_shared<ngraph::PartialShape>(data->getPartialShape());
+    auto* sp_copy = new std::shared_ptr<const ngraph::PartialShape>(ngraph_pShape_ptr);
+    auto sp_deleter = [](PyObject* capsule) {
+        auto* capsule_ptr = PyCapsule_GetPointer(capsule, "ngraph_partial_shape");
+        auto* function_sp = static_cast<std::shared_ptr<ngraph::PartialShape>*>(capsule_ptr);
+        if (function_sp) {
+            delete function_sp;
+        }
+    };
+    if (ngraph_pShape_ptr) {
+        return PyCapsule_New(sp_copy, py_capsule_name, sp_deleter);
+    } else {
+        return nullptr;
+    }
+}
+
 InferenceEnginePython::IENetwork::IENetwork(const std::shared_ptr<InferenceEngine::CNNNetwork>& cnn_network)
    : actual(cnn_network) {
    if (actual == nullptr)
@ -289,8 +309,21 @@ size_t InferenceEnginePython::IENetwork::getBatch() {
    return actual->getBatchSize();
 }

-void InferenceEnginePython::IENetwork::reshape(const std::map<std::string, std::vector<size_t>>& input_shapes) {
-    actual->reshape(input_shapes);
+void InferenceEnginePython::IENetwork::reshape(
+    const std::map<std::string, std::vector<std::vector<int64_t>>>& input_shapes) {
+    std::map<std::string, ngraph::PartialShape> inputShapes;
+    for (auto const& input : input_shapes) {
+        using ngraph::Dimension;
+        std::vector<Dimension> dims;
+        for (auto const& d : input.second) {
+            if (d.size() == 1)
+                dims.push_back(Dimension(d[0]));
+            else if (d.size() == 2)
+                dims.push_back(Dimension(d[0], d[1]));
+        }
+        inputShapes[input.first] = ngraph::PartialShape(dims);
+    }
+    actual->reshape(inputShapes);
 }

 InferenceEnginePython::IEExecNetwork::IEExecNetwork(const std::string& name, size_t num_requests)
--- a/Show More
+++ b/Show More