Merge remote-tracking branch 'upstream/master' into sy/test/ConvolutionLayerTest_dynamic_shape_case

2021-09-23 10:47:15 +09:00 · 2021-09-23 10:47:15 +09:00 · fa7e87b146
commit fa7e87b146
parent a8890950d3 d7dfce2091
617 changed files with 13313 additions and 5900 deletions
--- a/.ci/azure/linux_onnxruntime.yml
+++ b/.ci/azure/linux_onnxruntime.yml
@ -94,7 +94,7 @@ jobs:
        -DENABLE_PROFILING_ITT=OFF
        -DENABLE_SAMPLES=OFF
        -DNGRAPH_ONNX_FRONTEND_ENABLE=ON
-        -DNGRAPH_DEBUG_ENABLE=OFF
+        -DOPENVINO_DEBUG_ENABLE=OFF
        $(REPO_DIR)
      workingDirectory: $(BUILD_DIR)

--- a/.ci/azure/windows.yml
+++ b/.ci/azure/windows.yml
@ -16,7 +16,7 @@ jobs:
  timeoutInMinutes: 120

  pool:
-    name: WIN_VMSS_VENV_F8S_WU2
+    name: WIN_VMSS_VENV_F16S_WU2

  variables:
    system.debug: true
@ -34,8 +34,6 @@ jobs:
    INSTALL_DIR: $(WORK_DIR)\install_pkg
    INSTALL_TEST_DIR: $(INSTALL_DIR)\tests
    SETUPVARS: $(INSTALL_DIR)\setupvars.bat
-    IB_DIR: C:\Program Files (x86)\IncrediBuild
-    IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe

  steps:
  - script: |
@ -59,12 +57,6 @@ jobs:
      rd /Q /S $(BUILD_SAMPLES_DIR) & mkdir $(BUILD_SAMPLES_DIR)
    displayName: 'Make dir'

-  - script: |
-      certutil -urlcache -split -f https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
-      call install_ib_console.bat
-    workingDirectory: $(WORK_DIR)
-    displayName: 'Install IncrediBuild'
-
  - checkout: self
    clean: true
    lfs: false
@ -109,9 +101,7 @@ jobs:
  - script: dir $(REPO_DIR)\inference-engine\temp\ /s
    displayName: 'List temp SDKs'

-  - script: |
-      set PATH=$(WORK_DIR)\ninja-win;%PATH%
-      call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja"
+  - script: call "$(MSVS_VARS_PATH)" && $(WORK_DIR)\ninja-win\ninja
    workingDirectory: $(BUILD_DIR)
    displayName: 'Build Win'

@ -153,10 +143,8 @@ jobs:
    displayName: 'PaddlePaddle Frontend UT'
    continueOnError: false

-  - script: |
-      set PATH=$(IB_DIR);%PATH%
-      call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests-IB.xml
-    displayName: 'IE UT old - IB'
+  - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
+    displayName: 'IE UT old'
    continueOnError: false

  - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml
@ -187,11 +175,8 @@ jobs:
    displayName: 'TEMPLATE FuncTests'
    continueOnError: false

-    # call $(SETUPVARS) && $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-cpuFuncTests.xml
-  - script: |
-      set PATH=$(IB_DIR);%PATH%
-      call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke*:-*CompareWithRefs/base_size=16_pre_nms_topn=100_post_nms_topn=100_nms_thresh=0.7_feat_stride=1_min_size=1_ratio*:*smoke_GRUSequenceCommonZeroClip/GRUSequenceTest.CompareWithRefs/mode=CONVERT_TO_TI_MAX_SEQ_LEN_CONST_seq_lengths* --gtest_output=xml:TEST-cpuFuncTests-IB.xml /testlevel=24
-    displayName: 'CPU FuncTests - IB'
+  - script: $(SETUPVARS) && $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-cpuFuncTests.xml
+    displayName: 'CPU FuncTests'
    continueOnError: false

  - script: |
@ -213,8 +198,3 @@ jobs:
      buildPlatform: 'x64' # Optional
      buildConfiguration: 'Windows' # Optional
      #publishRunAttachments: true # Optional
-
-  - script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
-    displayName: Stop IncrediBuild
-    continueOnError: true
-    enabled: false
--- a/.ci/openvino-onnx/Dockerfile
+++ b/.ci/openvino-onnx/Dockerfile
@ -68,7 +68,7 @@ RUN cmake .. \
    -DENABLE_PYTHON=ON \
    -DPYTHON_EXECUTABLE=/usr/bin/python3 \
    -DNGRAPH_ONNX_FRONTEND_ENABLE=ON \
-    -DNGRAPH_DEBUG_ENABLE=OFF \
+    -DOPENVINO_DEBUG_ENABLE=OFF \
    -DCMAKE_INSTALL_PREFIX=/openvino/dist \
    -DNGRAPH_USE_PROTOBUF_LITE=${PROTOBUF_LITE}
 RUN make -j $(nproc) install
--- a/.ci/openvino-onnx/watchdog/src/watchdog.py
+++ b/.ci/openvino-onnx/watchdog/src/watchdog.py
@ -486,7 +486,7 @@ class Watchdog:
            self._queue_message(message, message_severity='warning', pr=pr)
        elif build_delta > _BUILD_DURATION_THRESHOLD:
            # CI job take too long, possibly froze - communicate failure
-            message = ('ONNX CI job build #{}, for PR #{} started,'
+            message = ('ONNX CI job build #{}, for PR #{} started, '
                       'but did not finish in designated time of {} '
                       'minutes!'.format(build_number, pr_number,
                                         str(_BUILD_DURATION_THRESHOLD.seconds / 60)))
--- a/.gitmodules
+++ b/.gitmodules
@ -53,3 +53,6 @@
 [submodule "ncc"]
 	path = cmake/developer_package/ncc_naming_style/ncc
 	url = https://github.com/nithinn/ncc.git
+[submodule "thirdparty/onednn_gpu"]
+	path = thirdparty/onednn_gpu
+	url = https://github.com/oneapi-src/oneDNN.git
--- a/cmake/dependencies.cmake
+++ b/cmake/dependencies.cmake
@ -83,7 +83,6 @@ if(THREADING STREQUAL "OMP")
        message(FATAL_ERROR "Intel OMP is not available on current platform")
    endif()
    update_deps_cache(OMP "${OMP}" "Path to OMP root folder")
-    log_rpath_from_dir(OMP "${OMP}/lib")
    debug_message(STATUS "intel_omp=" ${OMP})
    
    ie_cpack_add_component(omp REQUIRED)
@ -146,12 +145,6 @@ if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
    update_deps_cache(TBB_DIR "${TBB}/cmake" "Path to TBB cmake folder")

    update_deps_cache(TBBBIND_2_4_DIR "${TBBBIND_2_4}/cmake" "Path to TBBBIND_2_4 cmake folder")
-
-    if(WIN32)
-        log_rpath_from_dir(TBB "${TBB}/bin")
-    else ()
-        log_rpath_from_dir(TBB "${TBB}/lib")
-    endif()
    debug_message(STATUS "tbb=" ${TBB})
 endif()

@ -242,14 +235,6 @@ if(ENABLE_OPENCV)
    endif()

    update_deps_cache(OpenCV_DIR "${ocv_cmake_path}" "Path to OpenCV package folder")
-
-    if(WIN32)
-        log_rpath_from_dir(OPENCV "${OpenCV_DIR}/../bin")
-    elseif(ANDROID)
-        log_rpath_from_dir(OPENCV "${OpenCV_DIR}/../../../lib")
-    else()
-        log_rpath_from_dir(OPENCV "${OpenCV_DIR}/../lib")
-    endif()
    debug_message(STATUS "opencv=" ${OPENCV})
 else()
    reset_deps_cache(OpenCV_DIR)
@ -277,8 +262,8 @@ if(ENABLE_GNA)
            set(GNA_HASH "cc954e67525006bf8bd353a6682e38bf208f6d74e973e0fc292850e721f17452")
        endif()
        if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
-            set(GNA_VERSION "02.00.00.1226")
-            set(GNA_HASH "d5450af15c993e264c25ac4591a7dab44722e10d15fca4f222a1b84429d4e5b6")
+            set(GNA_VERSION "03.00.00.1377")
+            set(GNA_HASH "d45fb48994d8c2803a16e88e29ae48851066325b97c1c6c4a5bf4f4573d55c65")
        endif()

        set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include)
--- a/cmake/developer_package/debug.cmake
+++ b/cmake/developer_package/debug.cmake
@ -15,59 +15,3 @@ function(clean_message type)
    message (FATAL_ERROR)
  endif()  
 endfunction()
-
-file(REMOVE ${CMAKE_BINARY_DIR}/ld_library_rpath_64.txt)
-
-# log relative path to shared library that has to be used in LD_LIBRARY_PATH
-function (log_rpath_remove_top component component_remove_top lib lib_remove_top)
-  
-  set(top_lib_dir ${${component}})
-  set(lib_dir ${lib})
-
-#  debug_message(STATUS "LIB-IN=${lib} ")
-#  debug_message(STATUS "TOPLIB-IN=${top_lib_dir} ")
-  get_filename_component(top_lib_dir "${${component}}" DIRECTORY)
-
-  if (${component_remove_top} AND ${component})
-  else()
-    get_filename_component(add_name "${${component}}" NAME)
-    set(top_lib_dir "${top_lib_dir}/${add_name}")
-  endif()
-  if (${lib_remove_top} AND lib)
-    get_filename_component(lib_dir ${lib} DIRECTORY)
-  endif()
-
-  string (REPLACE "//" "/" top_lib_dir "${top_lib_dir}")
-  string (REPLACE "//" "/" lib_dir "${lib_dir}")
-
-  string (REPLACE "\\\\" "/" top_lib_dir "${top_lib_dir}")
-  string (REPLACE "\\\\" "/" lib_dir "${lib_dir}")
-
-#  debug_message(STATUS "LIB-OUT=${lib_dir}")
-#  debug_message(STATUS "TOPLIB-OUT=${top_lib_dir}")
-
-  if (WIN32)
-    string (TOLOWER "${top_lib_dir}" top_lib_dir)
-    string (TOLOWER "${lib_dir}" lib_dir)
-  endif()
-
-  string (REPLACE "${top_lib_dir}" "" component_dir "${lib_dir}")
-
-  set(RPATH_INFO "${component}=${component_dir}")
-  debug_message(STATUS "LD_LIBRARY_RPATH: ${RPATH_INFO}")
-  file(APPEND ${CMAKE_BINARY_DIR}/ld_library_rpath_64.txt "${RPATH_INFO}\n")
-endfunction()
-
-function (log_rpath_from_dir component lib_dir)
-  log_rpath_remove_top("${component}" TRUE "${lib_dir}" FALSE)
-endfunction()
-
-function (log_rpath component lib_path)
-  log_rpath_remove_top(${component} TRUE ${lib_path} TRUE)
-endfunction()
-
-# Just wrapping of the original message() function to make this macro known during IE build.
-# This macro is redefined (with additional checks) within the InferenceEngineConfig.cmake file.
-macro(ext_message TRACE_LEVEL)
-    message(${TRACE_LEVEL} "${ARGN}")
-endmacro()
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@ -122,7 +122,7 @@ endif()

 ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" ON "protoc_available" OFF)
 ie_dependent_option(NGRAPH_PDPD_FRONTEND_ENABLE "Enable PaddlePaddle FrontEnd" ON "protoc_available" OFF)
-ie_option(IR_FRONTEND_ENABLE "Enable IR FrontEnd" ON)
+ie_option(NGRAPH_IR_FRONTEND_ENABLE "Enable IR FrontEnd" ON)
 ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" ON
    "NGRAPH_ONNX_FRONTEND_ENABLE" OFF)
 ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF
@ -130,7 +130,7 @@ ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF
 ie_dependent_option(NGRAPH_UNIT_TEST_ENABLE "Enables ngraph unit tests" ON "ENABLE_TESTS;NOT ANDROID" OFF)
 ie_dependent_option(NGRAPH_UNIT_TEST_BACKENDS_ENABLE "Control the building of unit tests using backends" ON
    "NGRAPH_UNIT_TEST_ENABLE" OFF)
-ie_option(NGRAPH_DEBUG_ENABLE "Enable output for NGRAPH_DEBUG statements" OFF)
+ie_option(OPENVINO_DEBUG_ENABLE "Enable output for OPENVINO_DEBUG statements" OFF)
 ie_option(ENABLE_REQUIREMENTS_INSTALL "Dynamic dependencies install" ON)

 # WA for ngraph python build on Windows debug
--- a/cmake/templates/OpenVINOConfig.cmake.in
+++ b/cmake/templates/OpenVINOConfig.cmake.in
@ -63,6 +63,9 @@
 #   `OpenVINO_Frontend_PaddlePaddle_FOUND`
 #   OpenVINO PaddlePaddle frontend is available
 #
+#   `OpenVINO_Frontend_IR_FOUND`
+#   OpenVINO IR frontend is available
+#
 #  OpenVINO version variables:
 #
 #   `OpenVINO_VERSION_MAJOR`
@ -169,6 +172,7 @@ set(${CMAKE_FIND_PACKAGE_NAME}_PaddlePaddle_FOUND @NGRAPH_PDPD_FRONTEND_ENABLE@)

 set(${CMAKE_FIND_PACKAGE_NAME}_Frontend_ONNX_FOUND ${${CMAKE_FIND_PACKAGE_NAME}_ONNX_FOUND})
 set(${CMAKE_FIND_PACKAGE_NAME}_Frontend_PaddlePaddle_FOUND ${${CMAKE_FIND_PACKAGE_NAME}_PaddlePaddle_FOUND})
+set(${CMAKE_FIND_PACKAGE_NAME}_Frontend_IR_FOUND ${${CMAKE_FIND_PACKAGE_NAME}_IR_FOUND})

 # if no components specified, only Runtime is provided
 if(NOT ${CMAKE_FIND_PACKAGE_NAME}_FIND_COMPONENTS)
--- a/cmake/templates/ngraphConfig.cmake.in
+++ b/cmake/templates/ngraphConfig.cmake.in
@ -88,5 +88,6 @@ if(ngraph_onnx_importer_FOUND)
 endif()

 set(ngraph_paddlepaddle_frontend_FOUND ${OpenVINO_Frontend_PaddlePaddle_FOUND})
+set(ngraph_ir_frontend_FOUND ${OpenVINO_Frontend_IR_FOUND})

 check_required_components(ngraph)
--- a/cmake/test_model_zoo.cmake
+++ b/cmake/test_model_zoo.cmake
@ -2,6 +2,8 @@
 # SPDX-License-Identifier: Apache-2.0
 #

+set_property(GLOBAL PROPERTY JOB_POOLS four_jobs=4)
+
 function(ov_model_convert SRC DST OUT)
    set(onnx_gen_script ${OpenVINO_SOURCE_DIR}/ngraph/test/models/onnx/onnx_prototxt_converter.py)

@ -43,6 +45,7 @@ function(ov_model_convert SRC DST OUT)
                    "${SRC}/${in_file}" ${full_out_name}
                DEPENDS ${onnx_gen_script} "${SRC}/${in_file}"
                COMMENT "Generate ${rel_out_name}"
+                JOB_POOL four_jobs
                WORKING_DIRECTORY "${model_source_dir}")
        else()
            add_custom_command(OUTPUT ${full_out_name}
@ -50,6 +53,7 @@ function(ov_model_convert SRC DST OUT)
                    "${SRC}/${in_file}" ${full_out_name}
                DEPENDS ${onnx_gen_script} "${SRC}/${in_file}"
                COMMENT "Copy ${rel_out_name}"
+                JOB_POOL four_jobs
                WORKING_DIRECTORY "${model_source_dir}")
        endif()
        list(APPEND files "${full_out_name}")
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md
@ -73,19 +73,21 @@ inp = torch.randn([seq_length, batch_size, feature_length])
 feature_length = torch.LongTensor([seq_length])
 x_padded, x_lens = model.encoder(inp, feature_length)
 torch.onnx.export(model.encoder, (inp, feature_length), "rnnt_encoder.onnx", opset_version=12,
-                  input_names=['input.1', '1'], dynamic_axes={'input.1': {0: 'seq_len', 1: 'batch'}})
+                  input_names=['input', 'feature_length'], output_names=['x_padded', 'x_lens'],
+                  dynamic_axes={'input': {0: 'seq_len', 1: 'batch'}})

 symbol = torch.LongTensor([[20]])
 hidden = torch.randn([2, batch_size, 320]), torch.randn([2, batch_size, 320])
 g, hidden = model.prediction.forward(symbol, hidden)
 torch.onnx.export(model.prediction, (symbol, hidden), "rnnt_prediction.onnx", opset_version=12,
-                  input_names=['input.1', '1', '2'],
-                  dynamic_axes={'input.1': {0: 'batch'}, '1': {1: 'batch'}, '2': {1: 'batch'}})
+                  input_names=['symbol', 'hidden_in_1', 'hidden_in_2'],
+                  output_names=['g', 'hidden_out_1', 'hidden_out_2'],
+                  dynamic_axes={'symbol': {0: 'batch'}, 'hidden_in_1': {1: 'batch'}, 'hidden_in_2': {1: 'batch'}})

 f = torch.randn([batch_size, 1, 1024])
 model.joint.forward(f, g)
 torch.onnx.export(model.joint, (f, g), "rnnt_joint.onnx", opset_version=12,
-                  input_names=['0', '1'], dynamic_axes={'0': {0: 'batch'}, '1': {0: 'batch'}})
+                  input_names=['0', '1'], output_names=['result'], dynamic_axes={'0': {0: 'batch'}, '1': {0: 'batch'}})
 ```

 ```bash
@ -97,8 +99,8 @@ After completing this step, the files `rnnt_encoder.onnx`, `rnnt_prediction.onnx
 **Step 6**. Run the conversion command:

 ```bash
-python3 {path_to_openvino}/mo.py --input_model rnnt_encoder.onnx --input "input.1[157 1 240],1->157"
-python3 {path_to_openvino}/mo.py --input_model rnnt_prediction.onnx --input "input.1[1 1],1[2 1 320],2[2 1 320]"
+python3 {path_to_openvino}/mo.py --input_model rnnt_encoder.onnx --input "input[157 1 240],feature_length->157"
+python3 {path_to_openvino}/mo.py --input_model rnnt_prediction.onnx --input "symbol[1 1],hidden_in_1[2 1 320],hidden_in_2[2 1 320]"
 python3 {path_to_openvino}/mo.py --input_model rnnt_joint.onnx --input "0[1 1 1024],1[1 1 320]"
 ```
 Please note that hardcoded value for sequence length = 157 was taken from the MLCommons but conversion to IR preserves 
--- a/docs/index.md
+++ b/docs/index.md
@ -104,3 +104,9 @@ Intel® Distribution of OpenVINO™ toolkit includes the following components:
 - [Intel® Media SDK](https://software.intel.com/en-us/media-sdk) (in Intel® Distribution of OpenVINO™ toolkit for Linux only)

 OpenVINO™ Toolkit opensource version is available on [GitHub](https://github.com/openvinotoolkit/openvino). For building the Inference Engine from the source code, see the <a href="https://github.com/openvinotoolkit/openvino/wiki/BuildingCode">build instructions</a>.
+
+
+## OpenVINO™ API 2.0 
+
+The new OpenVINO™ API 2.0 was introduced to make OpenVINO™ interface more user-friendly and align OpenVINO™ with other frameworks.
+The [migration guide](@ref ov_2_0_transition_guide) should allow to simplify the process of migration application from old API to OpenVINO™ API 2.0.
--- a/docs/migration_ov_2_0/docs/common_inference_pipeline.md
+++ b/docs/migration_ov_2_0/docs/common_inference_pipeline.md
@ -0,0 +1,55 @@
+# OpenVINO™ Inference Pipeline {#ov_inference_pipeline}
+
+Usually to inference network with the OpenVINO™ toolkit users need to do next steps:
+ 1. Create Core
+ 2. (Optional) Read model from the disk
+     2.1. Configure Input and Output of the Model
+ 3. Load the Model to the Device
+ 4. Create an Inference Request
+ 5. Prepare Input
+ 6. Start Inference
+ 7. Process the Inference Results
+
+Code snippets below cover these steps and show how application code should be changed for migration to OpenVINO™ 2.0.
+
+## 1. Create Core
+
+Inference Engine API:
+
+@snippet snippets/ie_common.cpp ie:create_core
+
+OpenVINO™ 2.0 API:
+
+@snippet snippets/ov_common.cpp ov_api_2_0:create_core
+
+## 2. (Optional) Read model from the disk
+
+Inference Engine API:
+
+@snippet snippets/ie_common.cpp ie:read_model
+
+OpenVINO™ 2.0 API:
+
+@snippet snippets/ov_common.cpp ov_api_2_0:read_model
+
+### 2.1 Configure Input and Output of the Model
+
+Inference Engine API:
+
+@snippet snippets/ie_common.cpp ie:get_inputs_outputs
+
+OpenVINO™ 2.0 API:
+
+@snippet snippets/ov_common.cpp ov_api_2_0:get_inputs_outputs
+
+## 3. Load the Model to the Device
+
+Inference Engine API:
+
+@snippet snippets/ie_common.cpp ie:compile_model
+
+OpenVINO™ 2.0 API:
+
+@snippet snippets/ov_common.cpp ov_api_2_0:compile_model
+
+## 5. TBD
--- a/docs/migration_ov_2_0/docs/intro.md
+++ b/docs/migration_ov_2_0/docs/intro.md
@ -0,0 +1,12 @@
+# OpenVINO™ API 2.0 transition guide {#ov_2_0_transition_guide}
+
+The OpenVINO™ API 2.0 introduced in order to simplify migration from other frameworks and make the OpenVINO™ API more user-friendly.
+The list with differences between APIs below:
+
+ - OpenVINO™ API 2.0 uses tensor names or indexes to work with Inputs or Outputs, the old API works with operation names.
+ - Structures for Shapes, element types were changed.
+ - Naming style was changed. The old API uses CamelCaseStyle and OpenVINO™ API 2.0 uses snake_case for function names.
+ - Namespaces were aligned between components.
+
+Please look at next transition guides to understand how transit own application to OpenVINO™ API 2.0.
+ - [OpenVINO™ Common Inference pipeline](@ref ov_inference_pipeline)
--- a/docs/nGraph_DG/nGraph_debug_capabilities.md
+++ b/docs/nGraph_DG/nGraph_debug_capabilities.md
@ -3,7 +3,7 @@
 nGraph representation provides an API to get detailed information about the graph structure.

 To receive additional messages about applied graph modifications, rebuild the nGraph library with 
-the `-DNGRAPH_DEBUG_ENABLE=ON` option.
+the `-DOPENVINO_DEBUG_ENABLE=ON` option.

 To visualize the nGraph function to the xDot format or to an image file, use the 
 `ngraph::pass::VisualizeTree` graph transformation pass:
--- a/docs/snippets/ie_common.cpp
+++ b/docs/snippets/ie_common.cpp
@ -0,0 +1,43 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ie_core.hpp>
+
+int main() {
+    //! [ie:create_core]
+    InferenceEngine::Core core;
+    //! [ie:create_core]
+
+    //! [ie:read_model]
+    InferenceEngine::CNNNetwork network = core.ReadNetwork("model.xml");
+    //! [ie:read_model]
+
+    //! [ie:get_inputs_outputs]
+    InferenceEngine::InputsDataMap inputs = network.getInputsInfo();
+    InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo();
+    //! [ie:get_inputs_outputs]
+
+    //! [ie:compile_model]
+    InferenceEngine::ExecutableNetwork exec_network = core.LoadNetwork(network, "CPU");
+    //! [ie:compile_model]
+
+    //! [ie:create_infer_request]
+    InferenceEngine::InferRequest infer_request = exec_network.CreateInferRequest();
+    //! [ie:create_infer_request]
+
+    //! [ie:get_input_tensor]
+    InferenceEngine::Blob::Ptr input_blob = infer_request.GetBlob(inputs.begin()->first);
+    // fill input blob
+    //! [ie:get_input_tensor]
+
+    //! [ie:inference]
+    infer_request.Infer();
+    //! [ie:inference]
+
+    //! [ie:get_output_tensor]
+    InferenceEngine::Blob::Ptr output_blob = infer_request.GetBlob(outputs.begin()->first);
+    // process output data
+    //! [ie:get_output_tensor]
+    return 0;
+}
--- a/docs/snippets/nGraphTutorial.cpp
+++ b/docs/snippets/nGraphTutorial.cpp
@ -23,12 +23,10 @@ acos0->set_argument(0, add0);
 add1->set_argument(0, acos0);
 add1->set_argument(1, abs0);

-// Run shape inference on the nodes
-NodeVector ops{arg0, arg1, add0, abs0, acos0, add1};
-validate_nodes_and_infer_types(ops);
-
 // Create a graph with one output (add1) and four inputs (arg0, arg1)
 auto ng_function = make_shared<Function>(OutputVector{add1}, ParameterVector{arg0, arg1});
+// Run shape inference on the nodes
+ng_function->validate_nodes_and_infer_types();

 //! [part0]

--- a/docs/snippets/ov_common.cpp
+++ b/docs/snippets/ov_common.cpp
@ -0,0 +1,34 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <openvino/core/function.hpp>
+#include <openvino/runtime/runtime.hpp>
+
+int main() {
+    //! [ov_api_2_0:create_core]
+    ov::runtime::Core core;
+    //! [ov_api_2_0:create_core]
+
+    //! [ov_api_2_0:read_model]
+    std::shared_ptr<ov::Function> network = core.read_model("model.xml");
+    //! [ov_api_2_0:read_model]
+
+    //! [ov_api_2_0:get_inputs_outputs]
+    ov::ParameterVector inputs = network->get_parameters();
+    ov::ResultVector outputs = network->get_results();
+    //! [ov_api_2_0:get_inputs_outputs]
+
+    //! [ov_api_2_0:compile_model]
+    ov::runtime::ExecutableNetwork exec_network = core.compile_model(network, "CPU");
+    //! [ov_api_2_0:compile_model]
+
+    ov::runtime::InferRequest infer_request = exec_network.create_infer_request();
+    //
+    // InferenceEngine::Blob::Ptr input_blob = infer_request.GetBlob(inputs.begin()->first);
+    // // fill input blob
+    // infer_request.Infer();
+    //
+    // InferenceEngine::Blob::Ptr output_blob = infer_request.GetBlob(outputs.begin()->first);
+    // process output data
+    return 0;
+}
--- a/inference-engine/cmake/vpu_dependencies.cmake
+++ b/inference-engine/cmake/vpu_dependencies.cmake
@ -110,8 +110,6 @@ if(ANDROID)

    set(LIBUSB_INCLUDE_DIR "${LIBUSB}/include")
    set(LIBUSB_LIBRARY "${LIBUSB}/libs/${ANDROID_ABI}/libusb1.0.so")
-
-    log_rpath_from_dir(LIBUSB "${LIBUSB}/libs/${ANDROID_ABI}")
 endif()

 #
--- a/inference-engine/ie_bridges/python/sample/speech_sample/README.md
+++ b/inference-engine/ie_bridges/python/sample/speech_sample/README.md
@ -117,7 +117,8 @@ Options:
                        (default 16).
  -sf SCALE_FACTOR, --scale_factor SCALE_FACTOR
                        Optional. The user-specified input scale factor for
-                        quantization.
+                        quantization. If the network contains multiple inputs,
+                        provide scale factors by separating them with commas.
  -wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL
                        Optional. Write GNA model to file using path/filename
                        provided.
@ -176,27 +177,30 @@ The sample application logs each step in a standard output stream.
 [ INFO ] Creating Inference Engine
 [ INFO ] Reading the network: wsj_dnn5b.xml
 [ INFO ] Configuring input and output blobs
-[ INFO ] Using scale factor of 2175.4322417 calculated from first utterance.
+[ INFO ] Using scale factor(s) calculated from first utterance
+[ INFO ] For input 0 using scale factor of 2175.4322418
 [ INFO ] Loading the model to the plugin
 [ INFO ] Starting inference in synchronous mode
 [ INFO ] Utterance 0 (4k0c0301)
+[ INFO ] Output blob name: affinetransform14/Fused_Add_
 [ INFO ] Frames in utterance: 1294
-[ INFO ] Total time in Infer (HW and SW): 5305.47ms
-[ INFO ] max error: 0.7051839
-[ INFO ] avg error: 0.0448387
+[ INFO ] Total time in Infer (HW and SW): 6211.45ms
+[ INFO ] max error: 0.7051840
+[ INFO ] avg error: 0.0448388
 [ INFO ] avg rms error: 0.0582387
-[ INFO ] stdev error: 0.0371649
+[ INFO ] stdev error: 0.0371650
 [ INFO ]
 [ INFO ] Utterance 1 (4k0c0302)
+[ INFO ] Output blob name: affinetransform14/Fused_Add_
 [ INFO ] Frames in utterance: 1005
-[ INFO ] Total time in Infer (HW and SW): 5031.53ms
+[ INFO ] Total time in Infer (HW and SW): 4742.27ms
 [ INFO ] max error: 0.7575974
 [ INFO ] avg error: 0.0452166
 [ INFO ] avg rms error: 0.0586013
 [ INFO ] stdev error: 0.0372769
-[ INFO ]
 ...
-[ INFO ] Total sample time: 38033.09ms
+[ INFO ] Total sample time: 40219.99ms
+[ INFO ] File result.npz was created!
 [ INFO ] This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
 ```

--- a/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py
+++ b/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py
@ -28,8 +28,9 @@ def parse_args() -> argparse.Namespace:
    args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).')
    args.add_argument('-qb', '--quantization_bits', default=16, type=int,
                      help='Optional. Weight bits for quantization: 8 or 16 (default 16).')
-    args.add_argument('-sf', '--scale_factor', type=float,
-                      help='Optional. The user-specified input scale factor for quantization.')
+    args.add_argument('-sf', '--scale_factor', type=str,
+                      help='Optional. The user-specified input scale factor for quantization. '
+                      'If the network contains multiple inputs, provide scale factors by separating them with commas.')
    args.add_argument('-wg', '--export_gna_model', type=str,
                      help='Optional. Write GNA model to file using path/filename provided.')
    args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS)
--- a/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py
+++ b/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py
@ -103,6 +103,32 @@ def get_output_layer_list(net: Union[IENetwork, ExecutableNetwork],
        return [list(net.outputs.keys())[-1]]


+def parse_scale_factors(args: argparse.Namespace) -> list:
+    """Get a list of scale factors for input files"""
+    input_files = re.split(', |,', args.input)
+    scale_factors = re.split(', |,', str(args.scale_factor))
+    scale_factors = list(map(float, scale_factors))
+
+    if len(input_files) != len(scale_factors):
+        log.error(f'Incorrect command line for multiple inputs: {len(scale_factors)} scale factors provided for '
+                  f'{len(input_files)} input files.')
+        sys.exit(-7)
+
+    for i, scale_factor in enumerate(scale_factors):
+        if float(scale_factor) < 0:
+            log.error(f'Scale factor for input #{i} (counting from zero) is out of range (must be positive).')
+            sys.exit(-8)
+
+    return scale_factors
+
+
+def set_scale_factors(plugin_config: dict, scale_factors: list):
+    """Set a scale factor provided for each input"""
+    for i, scale_factor in enumerate(scale_factors):
+        log.info(f'For input {i} using scale factor of {scale_factor:.7f}')
+        plugin_config[f'GNA_SCALE_FACTOR_{i}'] = str(scale_factor)
+
+
 def main():
    log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout)
    args = parse_args()
@ -149,16 +175,23 @@ def main():

        # Set a GNA scale factor
        if args.import_gna_model:
-            log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
-        elif args.scale_factor:
-            log.info(f'Using scale factor of {args.scale_factor:.7f} specified by user.')
-            plugin_config['GNA_SCALE_FACTOR'] = str(args.scale_factor)
+            if args.scale_factor:
+                log.warning(f'Custom scale factor will be used for imported GNA model: {args.import_gna_model}')
+                set_scale_factors(plugin_config, parse_scale_factors(args))
+            else:
+                log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
        else:
-            utterances = read_utterance_file(args.input.split(',')[0])
-            key = sorted(utterances)[0]
-            scale_factor = get_scale_factor(utterances[key])
-            log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.')
-            plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor)
+            if args.scale_factor:
+                set_scale_factors(plugin_config, parse_scale_factors(args))
+            else:
+                scale_factors = []
+
+                for file_name in re.split(', |,', args.input):
+                    first_utterance = next(iter(read_utterance_file(file_name).values()))
+                    scale_factors.append(get_scale_factor(first_utterance))
+
+                log.info('Using scale factor(s) calculated from first utterance')
+                set_scale_factors(plugin_config, scale_factors)

        if args.export_embedded_gna_model:
            plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
@ -196,6 +196,10 @@ public:
        versionInfo = &ExtensionDescription;
    }

+    std::map<std::string, ngraph::OpSet> getOpSets() override {
+        return {{"framework_node_ext", ngraph::OpSet()}};
+    }
+
    void Unload() noexcept override {}
 };

--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt
@ -59,10 +59,8 @@ add_custom_command(TARGET ${TARGET_NAME}
 # ie_cpack_add_component(${PYTHON_VERSION}_dev DEPENDS ${PYTHON_COMPONENT})

 install(TARGETS ${TARGET_NAME}
-        RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations
-        COMPONENT ${PYTHON_COMPONENT}
-        LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations
-        COMPONENT ${PYTHON_COMPONENT})
+        RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_COMPONENT}
+        LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_COMPONENT})

 install(PROGRAMS __init__.py
        DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations
--- a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt
@ -52,3 +52,16 @@ add_custom_command(TARGET ${TARGET_NAME}

 add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}
                        EXCLUDE_PATTERNS ".*\\.cxx;.*\\.pxd;.*\\.pyx")
+
+# install
+
+install(TARGETS ${TARGET_NAME}
+        RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/test_utils
+        COMPONENT tests EXCLUDE_FROM_ALL
+        LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/test_utils
+        COMPONENT tests EXCLUDE_FROM_ALL)
+
+install(PROGRAMS __init__.py
+        DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/test_utils
+        COMPONENT tests
+        EXCLUDE_FROM_ALL)
--- a/inference-engine/ie_bridges/python/tests/conftest.py
+++ b/inference-engine/ie_bridges/python/tests/conftest.py
@ -51,10 +51,44 @@ def pytest_configure(config):
    )


-def create_ngraph_function(inputShape):
+def create_encoder(input_shape, levels = 4):
    import ngraph as ng
-    inputShape = ng.impl.PartialShape(inputShape)
-    param = ng.parameter(inputShape, dtype=np.float32, name="data")
-    result = ng.relu(param, name='out')
+    # input
+    input_node = ng.parameter(input_shape, np.float32, name="data")
+
+    padding_begin = padding_end = [0, 0]
+    strides = [1, 1]
+    dilations = [1, 1]
+    input_channels = [input_shape[1]]
+    last_output = input_node
+
+    # convolution layers
+    for i in range(levels):
+        input_c = input_channels[-1]
+        output_c = input_c * 2
+        conv_w = np.random.uniform(0, 1, [output_c, input_c, 5, 5]).astype(np.float32)
+        conv_node = ng.convolution(last_output, conv_w, strides, padding_begin, padding_end, dilations)
+        input_channels.append(output_c)
+        last_output = conv_node
+
+    # deconvolution layers
+    for i in range(levels):
+        input_c = input_channels[-2]
+        output_c = input_channels.pop(-1)
+        deconv_w = np.random.uniform(0, 1, [output_c, input_c, 5, 5]).astype(np.float32)
+        deconv_node = ng.convolution_backprop_data(last_output, deconv_w, strides)
+        last_output = deconv_node
+
+    # result
+    last_output.set_friendly_name("out")
+    result_node = ng.result(last_output)
+    return ng.Function(result_node, [input_node], "Encoder")
+
+
+def create_relu(input_shape):
+    import ngraph as ng
+    input_shape = ng.impl.PartialShape(input_shape)
+    param = ng.parameter(input_shape, dtype=np.float32, name="data")
+    result = ng.relu(param, name="out")
    function  = ng.Function(result, [param], "TestFunction")
    return function
--- a/inference-engine/ie_bridges/python/tests/test_Blob.py
+++ b/inference-engine/ie_bridges/python/tests/test_Blob.py
@ -140,10 +140,11 @@ def test_set_shape():
@pytest.mark.ngraph_dependent_test
@pytest.mark.template_plugin
 def test_blob_set_shape_after_async_infer():
-    from conftest import create_ngraph_function
+    from conftest import create_encoder
    import ngraph as ng
-    function = create_ngraph_function([ng.Dimension(0,5), ng.Dimension(4), ng.Dimension(20), ng.Dimension(20)])
+    function = create_encoder([1, 4, 20, 20])
    net = ng.function_to_cnn(function)
+    net.reshape({"data": [(1, 5), 4, 20, 20]})
    ie_core = IECore()
    ie_core.register_plugin("templatePlugin", "TEMPLATE")
    exec_net = ie_core.load_network(net, "TEMPLATE")
@ -152,3 +153,4 @@ def test_blob_set_shape_after_async_infer():
    with pytest.raises(RuntimeError) as e:
        request.input_blobs['data'].set_shape([3, 4, 20, 20])
    assert "REQUEST_BUSY" in str(e.value)
+    request.wait()
--- a/inference-engine/ie_bridges/python/tests/test_CDataPtr.py
+++ b/inference-engine/ie_bridges/python/tests/test_CDataPtr.py
@ -61,9 +61,9 @@ def test_initialized(device):
@pytest.mark.ngraph_dependent_test
@pytest.mark.template_plugin
 def test_is_dynamic():
-    from conftest import create_ngraph_function
+    from conftest import create_relu
    import ngraph as ng
-    function = create_ngraph_function([-1, 3, 20, 20])
+    function = create_relu([-1, 3, 20, 20])
    net = ng.function_to_cnn(function)
    ie = IECore()
    ie.register_plugin("templatePlugin", "TEMPLATE")
--- a/inference-engine/ie_bridges/python/tests/test_DataPtr.py
+++ b/inference-engine/ie_bridges/python/tests/test_DataPtr.py
@ -48,9 +48,9 @@ def test_initialized():
@pytest.mark.ngraph_dependent_test
@pytest.mark.template_plugin
 def test_is_dynamic():
-    from conftest import create_ngraph_function
+    from conftest import create_relu
    import ngraph as ng
-    function = create_ngraph_function([-1, 3, 20, 20])
+    function = create_relu([-1, 3, 20, 20])
    net = ng.function_to_cnn(function)
    assert net.input_info["data"].input_data.is_dynamic
    assert net.outputs["out"].is_dynamic
--- a/inference-engine/ie_bridges/python/tests/test_IENetwork.py
+++ b/inference-engine/ie_bridges/python/tests/test_IENetwork.py
@ -166,9 +166,9 @@ def test_reshape():
    ([1, 3, -1, 25], [1, 3, 22, -1])
 ])
 def test_reshape_with_partial_shape(device, shape, p_shape):
-    from conftest import create_ngraph_function
+    from conftest import create_relu
    import ngraph as ng
-    function = create_ngraph_function(shape)
+    function = create_relu(shape)
    net = ng.function_to_cnn(function)
    net.reshape({"data": p_shape})
    changedFunction = ng.function_from_cnn(net)
@ -185,9 +185,9 @@ def test_reshape_with_partial_shape(device, shape, p_shape):

@pytest.mark.ngraph_dependent_test
 def test_incorrect_reshape(device):
-    from conftest import create_ngraph_function
+    from conftest import create_relu
    import ngraph as ng
-    function = create_ngraph_function([1, 3, 22, 22])
+    function = create_relu([1, 3, 22, 22])
    net = ng.function_to_cnn(function)
    with pytest.raises(ValueError) as e:
        net.reshape({"data": [(2, 4, 6), 3, 22, 22]})
@ -287,9 +287,9 @@ def test_tensor_names():
@pytest.mark.ngraph_dependent_test
@pytest.mark.template_plugin
 def test_create_two_exec_net():
-    from conftest import create_ngraph_function
+    from conftest import create_relu
    import ngraph as ng
-    function = create_ngraph_function([ng.Dimension(0,5), ng.Dimension(4), ng.Dimension(20), ng.Dimension(20)])
+    function = create_relu([ng.Dimension(0,5), ng.Dimension(4), ng.Dimension(20), ng.Dimension(20)])
    net = ng.function_to_cnn(function)
    ie_core = IECore()
    ie_core.register_plugin("templatePlugin", "TEMPLATE")
--- a/inference-engine/ie_bridges/python/tests/test_InferRequest.py
+++ b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
@ -589,13 +589,13 @@ def test_query_state_write_buffer(device, input_shape, data_type, mode):
@pytest.mark.parametrize("shape, p_shape, ref_shape", [
    ([1, 4, 20, 20], [-1, 4, 20, 20], [5, 4, 20, 20]),
    ([1, 4, 20, 20], [(0,5), 4, 20, 20], [3, 4, 20, 20]),
-    ([1, 4, 20, 20], [(3,5), 3, 20, 20], [2, 4, 20, 20]),
-    ([1, 4, 20, 20], [(3,5), 3, 20, 20], [6, 4, 20, 20]),
+    ([1, 4, 20, 20], [(3,5), 4, 20, 20], [2, 4, 20, 20]),
+    ([1, 4, 20, 20], [(3,5), 4, 20, 20], [6, 4, 20, 20]),
 ])
 def test_infer_dynamic_network_with_set_shape(shape, p_shape, ref_shape):
-    from conftest import create_ngraph_function
+    from conftest import create_encoder
    import ngraph as ng
-    function = create_ngraph_function(shape)
+    function = create_encoder(shape)
    net = ng.function_to_cnn(function)
    net.reshape({"data": p_shape})
    ie_core = ie.IECore()
@ -616,13 +616,13 @@ def test_infer_dynamic_network_with_set_shape(shape, p_shape, ref_shape):
@pytest.mark.parametrize("shape, p_shape, ref_shape", [
    ([1, 4, 20, 20], [-1, 4, 20, 20], [5, 4, 20, 20]),
    ([1, 4, 20, 20], [(0,5), 4, 20, 20], [3, 4, 20, 20]),
-    ([1, 4, 20, 20], [(3,5), 3, 20, 20], [2, 4, 20, 20]),
-    ([1, 4, 20, 20], [(3,5), 3, 20, 20], [6, 4, 20, 20]),
+    ([1, 4, 20, 20], [(3,5), 4, 20, 20], [2, 4, 20, 20]),
+    ([1, 4, 20, 20], [(3,5), 4, 20, 20], [6, 4, 20, 20]),
 ])
 def test_infer_dynamic_network_without_set_shape(shape, p_shape, ref_shape):
-    from conftest import create_ngraph_function
+    from conftest import create_encoder
    import ngraph as ng
-    function = create_ngraph_function(shape)
+    function = create_encoder(shape)
    net = ng.function_to_cnn(function)
    net.reshape({"data": p_shape})
    ie_core = ie.IECore()
@ -642,13 +642,13 @@ def test_infer_dynamic_network_without_set_shape(shape, p_shape, ref_shape):
@pytest.mark.parametrize("shape, p_shape, ref_shape", [
    ([1, 4, 20, 20], [-1, 4, 20, 20], [5, 4, 20, 20]),
    ([1, 4, 20, 20], [(0,5), 4, 20, 20], [3, 4, 20, 20]),
-    ([1, 4, 20, 20], [(3,5), 3, 20, 20], [2, 4, 20, 20]),
-    ([1, 4, 20, 20], [(3,5), 3, 20, 20], [6, 4, 20, 20]),
+    ([1, 4, 20, 20], [(3,5), 4, 20, 20], [2, 4, 20, 20]),
+    ([1, 4, 20, 20], [(3,5), 4, 20, 20], [6, 4, 20, 20]),
 ])
 def test_infer_dynamic_network_with_set_blob(shape, p_shape, ref_shape):
-    from conftest import create_ngraph_function
+    from conftest import create_encoder
    import ngraph as ng
-    function = create_ngraph_function(shape)
+    function = create_encoder(shape)
    net = ng.function_to_cnn(function)
    net.reshape({"data": p_shape})
    ie_core = ie.IECore()
@ -670,11 +670,11 @@ def test_infer_dynamic_network_with_set_blob(shape, p_shape, ref_shape):
@pytest.mark.ngraph_dependent_test
@pytest.mark.template_plugin
 def test_infer_dynamic_network_twice():
-    from conftest import create_ngraph_function
+    from conftest import create_encoder
    import ngraph as ng
    shape, p_shape = [1, 4, 20, 20], [(0,5), 4, 20, 20]
    ref_shape1, ref_shape2 = [2, 4, 20, 20], [3, 4, 20, 20]
-    function = create_ngraph_function(shape)
+    function = create_encoder(shape)
    net = ng.function_to_cnn(function)
    net.reshape({"data": p_shape})
    ie_core = ie.IECore()
@ -692,11 +692,11 @@ def test_infer_dynamic_network_twice():
@pytest.mark.ngraph_dependent_test
@pytest.mark.template_plugin
 def test_infer_dynamic_network_with_set_blob_twice():
-    from conftest import create_ngraph_function
+    from conftest import create_encoder
    import ngraph as ng
    shape, p_shape = [1, 4, 20, 20], [(0,5), 4, 20, 20]
    ref_shape1, ref_shape2 = [2, 4, 20, 20], [3, 4, 20, 20]
-    function = create_ngraph_function(shape)
+    function = create_encoder(shape)
    net = ng.function_to_cnn(function)
    net.reshape({"data": p_shape})
    ie_core = ie.IECore()
@ -723,14 +723,14 @@ def test_infer_dynamic_network_with_set_blob_twice():
@pytest.mark.template_plugin
@pytest.mark.parametrize("shapes", [
    ([3, 4, 20, 20], [3, 4, 20, 20], [3, 4, 20, 20]),
-    ([3, 4, 20, 20], [3, 6, 20, 20], [3, 8, 20, 20]),
+    ([3, 4, 20, 20], [3, 4, 28, 28], [3, 4, 45, 45]),
 ])
 def test_async_infer_dynamic_network_3_requests(shapes):
-    from conftest import create_ngraph_function
+    from conftest import create_encoder
    import ngraph as ng
-    function = create_ngraph_function([3, 4, 20, 20])
+    function = create_encoder([3, 4, 20, 20])
    net = ng.function_to_cnn(function)
-    net.reshape({"data": [3, (2, 10), 20, 20]})
+    net.reshape({"data": [3, 4, (20, 50), (20, 50)]})
    ie_core = ie.IECore()
    ie_core.register_plugin("templatePlugin", "TEMPLATE")
    exec_net = ie_core.load_network(net, "TEMPLATE", num_requests=3)
@ -745,9 +745,9 @@ def test_async_infer_dynamic_network_3_requests(shapes):
@pytest.mark.ngraph_dependent_test
@pytest.mark.template_plugin
 def test_set_blob_with_incorrect_name():
-    from conftest import create_ngraph_function
+    from conftest import create_encoder
    import ngraph as ng
-    function = create_ngraph_function([4, 4, 20, 20])
+    function = create_encoder([4, 4, 20, 20])
    net = ng.function_to_cnn(function)
    ie_core = ie.IECore()
    ie_core.register_plugin("templatePlugin", "TEMPLATE")
@ -763,9 +763,9 @@ def test_set_blob_with_incorrect_name():
@pytest.mark.ngraph_dependent_test
@pytest.mark.template_plugin
 def test_set_blob_with_incorrect_size():
-    from conftest import create_ngraph_function
+    from conftest import create_encoder
    import ngraph as ng
-    function = create_ngraph_function([4, 4, 20, 20])
+    function = create_encoder([4, 4, 20, 20])
    net = ng.function_to_cnn(function)
    ie_core = ie.IECore()
    ie_core.register_plugin("templatePlugin", "TEMPLATE")
@ -773,6 +773,7 @@ def test_set_blob_with_incorrect_size():
    tensor_desc = exec_net.requests[0].input_blobs["data"].tensor_desc
    tensor_desc.dims = [tensor_desc.dims[0]*2, 4, 20, 20]
    blob = ie.Blob(tensor_desc)
+    print(exec_net.requests[0].output_blobs)
    with pytest.raises(RuntimeError) as e:
        exec_net.requests[0].set_blob("data", blob)
    assert f"Input blob size is not equal network input size" in str(e.value)
@ -784,10 +785,11 @@ def test_set_blob_with_incorrect_size():
@pytest.mark.ngraph_dependent_test
@pytest.mark.template_plugin
 def test_set_blob_after_async_infer():
-    from conftest import create_ngraph_function
+    from conftest import create_encoder
    import ngraph as ng
-    function = create_ngraph_function([ng.Dimension(0,5), ng.Dimension(4), ng.Dimension(20), ng.Dimension(20)])
+    function = create_encoder([1, 4, 20, 20])
    net = ng.function_to_cnn(function)
+    net.reshape({"data": [(0, 5), 4, 20, 20]})
    ie_core = ie.IECore()
    ie_core.register_plugin("templatePlugin", "TEMPLATE")
    exec_net = ie_core.load_network(net, "TEMPLATE")
@ -799,3 +801,4 @@ def test_set_blob_after_async_infer():
    with pytest.raises(RuntimeError) as e:
        request.set_blob("data", blob)
    assert "REQUEST_BUSY" in str(e.value)
+    request.wait()
--- a/inference-engine/ie_bridges/python/tests/test_NGraph.py
+++ b/inference-engine/ie_bridges/python/tests/test_NGraph.py
@ -6,14 +6,14 @@ import ngraph as ng
 from ngraph.impl.op import Parameter
 from ngraph.impl import Function, Shape, Type

-from conftest import model_path, create_ngraph_function
+from conftest import model_path, create_relu


 test_net_xml, test_net_bin = model_path()


 def test_create_IENetwork_from_nGraph():
-    func = create_ngraph_function([1, 3, 22, 22])
+    func = create_relu([1, 3, 22, 22])
    caps = Function.to_capsule(func)
    cnnNetwork = IENetwork(caps)
    assert cnnNetwork != None
@ -23,7 +23,7 @@ def test_create_IENetwork_from_nGraph():


 def test_get_IENetwork_from_nGraph():
-    func = create_ngraph_function([1, 3, 22, 22])
+    func = create_relu([1, 3, 22, 22])
    caps = Function.to_capsule(func)
    cnnNetwork = IENetwork(caps)
    assert cnnNetwork != None
--- a/inference-engine/samples/benchmark_app/CMakeLists.txt
+++ b/inference-engine/samples/benchmark_app/CMakeLists.txt
@ -2,11 +2,46 @@
 # SPDX-License-Identifier: Apache-2.0
 #

+set(TARGET_NAME "benchmark_app")
+
 file (GLOB SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
 file (GLOB HDR ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)

-ie_add_sample(NAME benchmark_app
+ie_add_sample(NAME ${TARGET_NAME}
              SOURCES ${SRC}
              HEADERS ${HDR}
              DEPENDENCIES format_reader ie_samples_utils
              OPENCV_DEPENDENCIES core)
+
+find_package(OpenCL)
+
+find_path(OpenCL_HPP_INCLUDE_DIR
+    NAMES
+      CL/cl2.hpp OpenCL/cl2.hpp
+    HINTS
+      ${opencl_root_hints}
+      ENV "PROGRAMFILES(X86)"
+      ENV AMDAPPSDKROOT
+      ENV INTELOCLSDKROOT
+      ENV NVSDKCOMPUTE_ROOT
+      ENV CUDA_PATH
+      ENV ATISTREAMSDKROOT
+      ENV OCL_ROOT
+    PATH_SUFFIXES
+      include
+      OpenCL/common/inc
+      "AMD APP/include")
+
+if(OPENCL_HEADERS_DIR)
+    # Use OpenCL CPP headers from sources if present
+    set(OpenCL_HEADERS OPENCL_HEADERS_DIR)
+elseif(OpenCL_HPP_INCLUDE_DIR)
+    # Append OpenCL CPP headers to C headers and use both
+    set(OpenCL_HEADERS OpenCL_INCLUDE_DIR OpenCL_HPP_INCLUDE_DIR)
+endif()
+
+if(OpenCL_FOUND AND OpenCL_HEADERS)
+    target_link_libraries(${TARGET_NAME} PRIVATE OpenCL::OpenCL)
+    target_include_directories(${TARGET_NAME} PRIVATE ${OpenCL_HEADERS})
+    target_compile_definitions(${TARGET_NAME} PRIVATE HAVE_GPU_DEVICE_MEM_SUPPORT)
+endif()
--- a/inference-engine/samples/benchmark_app/benchmark_app.hpp
+++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@ -4,6 +4,10 @@

 #pragma once

+#if defined(HAVE_GPU_DEVICE_MEM_SUPPORT)
+#    define HAVE_DEVICE_MEM_SUPPORT
+#endif
+
 #include <gflags/gflags.h>

 #include <iostream>
@ -132,6 +136,12 @@ static const char progress_message[] =
 // @brief message for performance counters option
 static const char pc_message[] = "Optional. Report performance counters.";

+#ifdef HAVE_DEVICE_MEM_SUPPORT
+// @brief message for switching memory allocation type option
+static const char use_device_mem_message[] =
+    "Optional. Switch between host and device memory allocation for input and output buffers.";
+#endif
+
 #ifdef USE_OPENCV
 // @brief message for load config option
 static const char load_config_message[] =
@ -266,6 +276,11 @@ DEFINE_bool(progress, false, progress_message);
 /// @brief Define flag for showing performance counters <br>
 DEFINE_bool(pc, false, pc_message);

+#ifdef HAVE_DEVICE_MEM_SUPPORT
+/// @brief Define flag for switching beetwen host and device memory allocation for input and output buffers
+DEFINE_bool(use_device_mem, false, use_device_mem_message);
+#endif
+
 #ifdef USE_OPENCV
 /// @brief Define flag for loading configuration file <br>
 DEFINE_string(load_config, "", load_config_message);
@ -339,6 +354,9 @@ static void showUsage() {
    std::cout << "    -nthreads \"<integer>\"     " << infer_num_threads_message << std::endl;
    std::cout << "    -enforcebf16=<true/false>     " << enforce_bf16_message << std::endl;
    std::cout << "    -pin \"YES\"/\"HYBRID_AWARE\"/\"NO\"/\"NUMA\"   " << infer_threads_pinning_message << std::endl;
+#ifdef HAVE_DEVICE_MEM_SUPPORT
+    std::cout << "    -use_device_mem           " << use_device_mem_message << std::endl;
+#endif
    std::cout << std::endl << "  Statistics dumping options:" << std::endl;
    std::cout << "    -report_type \"<type>\"     " << report_type_message << std::endl;
    std::cout << "    -report_folder            " << report_folder_message << std::endl;
--- a/inference-engine/samples/benchmark_app/infer_request_wrap.hpp
+++ b/inference-engine/samples/benchmark_app/infer_request_wrap.hpp
@ -65,6 +65,10 @@ public:
        return _request.GetBlob(name);
    }

+    void setBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) {
+        _request.SetBlob(name, data);
+    }
+
    double getExecutionTimeInMilliseconds() const {
        auto execTime = std::chrono::duration_cast<ns>(_endTime - _startTime);
        return static_cast<double>(execTime.count()) * 0.000001;
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@ -21,6 +21,7 @@
 #include "infer_request_wrap.hpp"
 #include "inputs_filling.hpp"
 #include "progress_bar.hpp"
+#include "remote_blobs_filling.hpp"
 #include "statistics_report.hpp"
 #include "utils.hpp"

@ -592,7 +593,16 @@ int main(int argc, char* argv[]) {
        next_step();

        InferRequestsQueue inferRequestsQueue(exeNetwork, nireq);
-        fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests);
+        if (isFlagSetInCommandLine("use_device_mem")) {
+            if (device_name.find("GPU") == 0)
+                ::gpu::fillRemoteBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests, exeNetwork);
+            else if (device_name.find("CPU") == 0)
+                fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests);
+            else
+                IE_THROW() << "Requested device doesn't support `use_device_mem` option.";
+        } else {
+            fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests);
+        }

        // ----------------- 10. Measuring performance
        // ------------------------------------------------------------------
--- a/inference-engine/samples/benchmark_app/remote_blobs_filling.cpp
+++ b/inference-engine/samples/benchmark_app/remote_blobs_filling.cpp
@ -0,0 +1,140 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "remote_blobs_filling.hpp"
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace gpu {
+
+template <typename T>
+using uniformDistribution = typename std::conditional<
+    std::is_floating_point<T>::value,
+    std::uniform_real_distribution<T>,
+    typename std::conditional<std::is_integral<T>::value, std::uniform_int_distribution<T>, void>::type>::type;
+
+template <typename T, typename T2>
+void fillBufferRandom(void* inputBuffer,
+                      size_t elementsNum,
+                      T rand_min = std::numeric_limits<uint8_t>::min(),
+                      T rand_max = std::numeric_limits<uint8_t>::max()) {
+    std::mt19937 gen(0);
+    uniformDistribution<T2> distribution(rand_min, rand_max);
+    auto inputBufferData = static_cast<T*>(inputBuffer);
+    for (size_t i = 0; i < elementsNum; i++) {
+        inputBufferData[i] = static_cast<T>(distribution(gen));
+    }
+}
+
+void fillBuffer(void* inputBuffer, size_t elementsNum, InferenceEngine::Precision precision) {
+    if (precision == InferenceEngine::Precision::FP32) {
+        fillBufferRandom<float, float>(inputBuffer, elementsNum);
+    } else if (precision == InferenceEngine::Precision::FP16) {
+        fillBufferRandom<short, short>(inputBuffer, elementsNum);
+    } else if (precision == InferenceEngine::Precision::I32) {
+        fillBufferRandom<int32_t, int32_t>(inputBuffer, elementsNum);
+    } else if (precision == InferenceEngine::Precision::I64) {
+        fillBufferRandom<int64_t, int64_t>(inputBuffer, elementsNum);
+    } else if (precision == InferenceEngine::Precision::U8) {
+        // uniform_int_distribution<uint8_t> is not allowed in the C++17
+        // standard and vs2017/19
+        fillBufferRandom<uint8_t, uint32_t>(inputBuffer, elementsNum);
+    } else if (precision == InferenceEngine::Precision::I8) {
+        // uniform_int_distribution<int8_t> is not allowed in the C++17 standard
+        // and vs2017/19
+        fillBufferRandom<int8_t, int32_t>(inputBuffer, elementsNum);
+    } else if (precision == InferenceEngine::Precision::U16) {
+        fillBufferRandom<uint16_t, uint16_t>(inputBuffer, elementsNum);
+    } else if (precision == InferenceEngine::Precision::I16) {
+        fillBufferRandom<int16_t, int16_t>(inputBuffer, elementsNum);
+    } else if (precision == InferenceEngine::Precision::BOOL) {
+        fillBufferRandom<uint8_t, uint32_t>(inputBuffer, elementsNum, 0, 1);
+    } else {
+        IE_THROW() << "Requested precision is not supported";
+    }
+}
+
+size_t getBytesPerElement(InferenceEngine::Precision precision) {
+    switch (precision) {
+    case InferenceEngine::Precision::FP32:
+        return 4;
+    case InferenceEngine::Precision::FP16:
+        return 2;
+    case InferenceEngine::Precision::I32:
+        return 4;
+    case InferenceEngine::Precision::I64:
+        return 8;
+    case InferenceEngine::Precision::U8:
+        return 1;
+    case InferenceEngine::Precision::I8:
+        return 1;
+    case InferenceEngine::Precision::U16:
+        return 2;
+    case InferenceEngine::Precision::I16:
+        return 2;
+    case InferenceEngine::Precision::BOOL:
+        return 1;
+    default:
+        IE_THROW() << "Requested precision is not supported";
+    }
+}
+
+void fillRemoteBlobs(const std::vector<std::string>& inputFiles,
+                     const size_t& batchSize,
+                     benchmark_app::InputsInfo& app_inputs_info,
+                     std::vector<InferReqWrap::Ptr> requests,
+                     const InferenceEngine::ExecutableNetwork& exeNetwork) {
+#ifdef HAVE_DEVICE_MEM_SUPPORT
+    slog::info << "Device memory will be used for input and output blobs" << slog::endl;
+    if (inputFiles.size()) {
+        slog::warn << "Device memory supports only random data at this moment, input images will be ignored"
+                   << slog::endl;
+    }
+    auto context = exeNetwork.GetContext();
+    auto oclContext = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(context)->get();
+    auto oclInstance = std::make_shared<OpenCL>(oclContext);
+
+    auto setShared = [&](size_t requestId,
+                         const std::string name,
+                         const InferenceEngine::TensorDesc& desc,
+                         bool fillRandom = false) {
+        cl_int err;
+        auto inputDims = desc.getDims();
+        auto elementsNum = std::accumulate(begin(inputDims), end(inputDims), 1, std::multiplies<size_t>());
+        auto inputSize = elementsNum * getBytesPerElement(desc.getPrecision());
+
+        cl::Buffer sharedBuffer =
+            cl::Buffer(oclInstance->_context, CL_MEM_READ_WRITE, (cl::size_type)inputSize, NULL, &err);
+
+        if (fillRandom) {
+            void* mappedPtr = oclInstance->_queue.enqueueMapBuffer(sharedBuffer,
+                                                                   CL_TRUE,
+                                                                   CL_MEM_READ_WRITE,
+                                                                   0,
+                                                                   (cl::size_type)inputSize);
+            fillBuffer(mappedPtr, elementsNum, desc.getPrecision());
+            oclInstance->_queue.enqueueUnmapMemObject(sharedBuffer, mappedPtr);
+        }
+
+        InferenceEngine::Blob::Ptr sharedBlob = InferenceEngine::gpu::make_shared_blob(desc, context, sharedBuffer);
+
+        requests.at(requestId)->setBlob(name, sharedBlob);
+    };
+
+    for (size_t requestId = 0; requestId < requests.size(); requestId++) {
+        for (auto& item : exeNetwork.GetInputsInfo())
+            setShared(requestId, item.first, item.second->getTensorDesc(), true);
+
+        for (auto& item : exeNetwork.GetOutputsInfo())
+            setShared(requestId, item.first, item.second->getTensorDesc());
+    }
+#else
+    IE_THROW() << "Device memory requested for GPU device, but OpenCL was not linked";
+#endif
+}
+
+}  // namespace gpu
--- a/inference-engine/samples/benchmark_app/remote_blobs_filling.hpp
+++ b/inference-engine/samples/benchmark_app/remote_blobs_filling.hpp
@ -0,0 +1,64 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#if defined(HAVE_GPU_DEVICE_MEM_SUPPORT)
+#    define HAVE_DEVICE_MEM_SUPPORT
+#    include <gpu/gpu_context_api_ocl.hpp>
+#endif
+
+#include <inference_engine.hpp>
+
+#include "infer_request_wrap.hpp"
+#include "utils.hpp"
+
+namespace gpu {
+
+#ifdef HAVE_DEVICE_MEM_SUPPORT
+struct OpenCL {
+    cl::Context _context;
+    cl::Device _device;
+    cl::CommandQueue _queue;
+
+    explicit OpenCL(std::shared_ptr<std::vector<cl_context_properties>> media_api_context_properties = nullptr) {
+        // get Intel GPU OCL device, create context and queue
+        {
+            std::vector<cl::Device> devices;
+            std::vector<cl::Platform> platforms;
+            const unsigned int refVendorID = 0x8086;
+
+            cl::Platform::get(&platforms);
+            for (auto& p : platforms) {
+                p.getDevices(CL_DEVICE_TYPE_GPU, &devices);
+                for (auto& d : devices) {
+                    if (refVendorID == d.getInfo<CL_DEVICE_VENDOR_ID>()) {
+                        _device = d;
+                        _context = cl::Context(_device);
+                        break;
+                    }
+                }
+            }
+
+            cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
+            _queue = cl::CommandQueue(_context, _device, props);
+        }
+    }
+
+    explicit OpenCL(cl_context context) {
+        // user-supplied context handle
+        _context = cl::Context(context, true);
+        _device = cl::Device(_context.getInfo<CL_CONTEXT_DEVICES>()[0].get(), true);
+
+        cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
+        _queue = cl::CommandQueue(_context, _device, props);
+    }
+};
+#endif
+
+void fillRemoteBlobs(const std::vector<std::string>& inputFiles,
+                     const size_t& batchSize,
+                     benchmark_app::InputsInfo& app_inputs_info,
+                     std::vector<InferReqWrap::Ptr> requests,
+                     const InferenceEngine::ExecutableNetwork& exeNetwork);
+
+}  // namespace gpu
--- a/inference-engine/scripts/dependencies.bat
+++ b/inference-engine/scripts/dependencies.bat
@ -1,96 +0,0 @@
-@echo off
-
-:: Copyright (C) 2018-2021 Intel Corporation
-:: SPDX-License-Identifier: Apache-2.0
-
-setlocal enabledelayedexpansion
-
-for /f "delims=" %%x in (dependencies_64.txt) do (set "%%x")
-
-for %%A in ("%MKL%") do set MKL_FILENAME=%%~nxA
-for %%A in ("%OMP%") do set OMP_FILENAME=%%~nxA
-for %%A in ("%MYRIAD%") do set MYRIAD_FILENAME=%%~nxA
-for %%A in ("%GNA%") do set GNA_FILENAME=%%~nxA
-for %%A in ("%OPENCV%") do set OPENCV_FILENAME=%%~nxA
-for %%A in ("%HDDL%") do set HDDL_FILENAME=%%~nxA
-for %%A in ("%VPU_FIRMWARE_MA2X8X%") do set VPU_FIRMWARE_MA2X8X_FILENAME=%%~nxA
-for %%A in ("%TBB%") do set TBB_FILENAME=%%~nxA
-
-call :DownloadFile MKL %MKL%
-call :DownloadFile OMP %OMP%
-call :DownloadFile MYRIAD %MYRIAD%
-call :DownloadFile GNA %GNA%
-call :DownloadFile OPENCV %OPENCV%
-call :DownloadFile HDDL %HDDL%
-call :DownloadFile VPU_FIRMWARE_MA2X8X %VPU_FIRMWARE_MA2X8X%
-call :DownloadFile TBB %TBB%
-
-for /f "delims=" %%x in (ld_library_rpath_64.txt) do (set "%%x")
-
-set PATH=%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%;%PATH%
-set PATH=%DL_SDK_TEMP%\test_dependencies\MKL\%MKL_FILENAME%%MKL%;%PATH%
-set PATH=%DL_SDK_TEMP%\test_dependencies\OMP\%OMP_FILENAME%%OMP%;%PATH%
-set PATH=%DL_SDK_TEMP%\test_dependencies\GNA\%GNA_FILENAME%%GNA%;%PATH%
-set PATH=%DL_SDK_TEMP%\test_dependencies\OPENCV\%OPENCV_FILENAME%%OPENCV%;%PATH%
-set PATH=%DL_SDK_TEMP%\test_dependencies\TBB\%TBB_FILENAME%%TBB%;%PATH%
-
-set PATH=%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%;%PATH%
-
-if not "%MYRIAD%"=="" (
-	if exist "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\mvnc" (
-		echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%" intel64  /S /I /Y /R
-		xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%" intel64  /S /I /Y /R
-	)
-
-	if exist "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\..\bin\mvnc" (
-		echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\..\bin\*" intel64  /S /I /Y /R
-		xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\..\bin\*" intel64  /S /I /Y /R
-	)
-)
-
-if not "%VPU_FIRMWARE_MA2X8X%"=="" (
-	if exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%" (
-		echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" intel64  /S /I /Y /R
-		xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" intel64  /S /I /Y /R
-	)
-)
-
-set PATH=%DL_SDK_TEMP%\test_dependencies\HDDL\%HDDL_FILENAME%%HDDL%\..\bin;%PATH%
-
-if not "%HDDL%"=="" (
-	set HDDL_INSTALL_DIR=%DL_SDK_TEMP%\test_dependencies\HDDL\%HDDL_FILENAME%%HDDL%\..
-	if exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%" (
-		echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" %HDDL_INSTALL_DIR%\lib  /S /I /Y /R
-		xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" "%HDDL_INSTALL_DIR%\lib"  /S /I /Y /R
-	)
-)
-
-echo PATH=%PATH%
-
-endlocal & set PATH=%PATH%
-
-exit /B %ERRORLEVEL%
-
-:DownloadFile
-set DEPENDENCY=%~1
-set DEPENDENCY_URL=%~2
-set DEPENDENCY_FILE=%~nx2
-set DEPENDENCY_EXT=%~x2
-
-if not "%DEPENDENCY_URL%"=="" (
-	if not exist "%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE%" (
-		mkdir "%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE%"
-		for /L %%a in (1,1,10) do (
-			powershell -command "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; iwr -outf '%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\_%DEPENDENCY_FILE%' %DEPENDENCY_URL%"
-			call "C:\Program Files\7-Zip\7z.exe" x -y %DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\_%DEPENDENCY_FILE% -o%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE%
-			if !ERRORLEVEL! equ 0 goto :DownloadFileContinue
-			timeout /T 15
-		)
-	)
-)
-goto:eof
-
-:DownloadFileContinue
-if "%DEPENDENCY_EXT%" == ".txz" call "C:\Program Files\7-Zip\7z.exe" x -y %DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE%\_%DEPENDENCY_FILE:txz=tar% -o%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE%
-del "%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\_%DEPENDENCY_FILE%" /F /Q
-goto:eof
--- a/inference-engine/scripts/dependencies.sh
+++ b/inference-engine/scripts/dependencies.sh
@ -1,68 +0,0 @@
-#!/bin/bash
-
-# Copyright (C) 2018-2021 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-if [ "$1" = "" ]; then
-        dep_arch=64
-    else
-        dep_arch=$1
-fi
-
-item_path=""
-add_path() {
-    component=$1
-    item_path=""
-    echo "Read file: dependencies_${dep_arch}.txt"
-    grep_component="\b${component}\b"
-
-    if [[ $(grep -m 1 "$grep_component" "dependencies_${dep_arch}.txt") ]];then
-        archive_path=$(grep -m 1 "$grep_component" "dependencies_${dep_arch}.txt" | sed -E "s/${component}=//g")
-        library_rpath=$(grep -m 1 "$grep_component" "ld_library_rpath_${dep_arch}.txt" | sed -E "s/${component}=//g")
-        filename=$(basename "$archive_path")
-        if [[ (! -d "$DL_SDK_TEMP/test_dependencies/$component/$filename") ||
-                (-d "$DL_SDK_TEMP/test_dependencies/$component/$filename"  &&
-                    ! $(ls -A "$DL_SDK_TEMP/test_dependencies/$component/$filename")) ]]; then
-            mkdir -p "$DL_SDK_TEMP/test_dependencies/$component/$filename"
-            wget -q "$archive_path" -O "$DL_SDK_TEMP/test_dependencies/$filename"
-            if [[ $filename == *.zip ]]; then
-                unzip "$DL_SDK_TEMP/test_dependencies/$filename" -d "$DL_SDK_TEMP/test_dependencies/$component/$filename"
-            elif [[ $filename == *.7z ]]; then
-                7za x -y "$DL_SDK_TEMP/test_dependencies/$filename" -o "$DL_SDK_TEMP/test_dependencies/$component/$filename"
-            else
-                tar xf "$DL_SDK_TEMP/test_dependencies/$filename" -C "$DL_SDK_TEMP/test_dependencies/$component/$filename"
-            fi
-            rm "$DL_SDK_TEMP/test_dependencies/$filename"
-        fi
-        item_path=$component/$filename/$library_rpath
-    fi
-}
-
-runtimes=(MKL CLDNN MYRIAD GNA DLIA OPENCV VPU_FIRMWARE_USB-MA2X8X HDDL OMP TBB AOCL_RTE LIBUSB)
-
-export_library_path() {
-    export LD_LIBRARY_PATH=$DL_SDK_TEMP/test_dependencies/$1:$LD_LIBRARY_PATH
-}
-
-export_env_variable() {
-    export "$2"="$DL_SDK_TEMP/test_dependencies/$1"
-}
-
-ma2480_path=""
-for i in "${runtimes[@]}"
-do
-   add_path "$i"
-   export_library_path "$item_path"
-   if [ "$i" == "VPU_FIRMWARE_USB-MA2X8X" ]
-   then
-       ma2480_path="$item_path"
-   fi
-   if [ "$i" == "HDDL" ]
-   then
-       cp -r "$DL_SDK_TEMP/test_dependencies/$ma2480_path/"* "$DL_SDK_TEMP/test_dependencies/$item_path"
-       export HDDL_INSTALL_DIR="$DL_SDK_TEMP/test_dependencies/$item_path/.."
-   fi
-done
-
-echo DATA_PATH="$DATA_PATH"
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:lib:/usr/local/lib
--- a/inference-engine/src/CMakeLists.txt
+++ b/inference-engine/src/CMakeLists.txt
@ -47,9 +47,8 @@ add_subdirectory(snippets)
 add_custom_target(ie_libraries ALL
                  DEPENDS inference_engine_transformations inference_engine_legacy
                          inference_engine inference_engine_preproc
-                          inference_engine_ir_v7_reader inference_engine_ir_reader
-                          inference_engine_lp_transformations inference_engine_snippets
-                          ir_frontend)
+                          inference_engine_ir_v7_reader ir_ngraph_frontend
+                          inference_engine_lp_transformations inference_engine_snippets)

 if(NGRAPH_ONNX_FRONTEND_ENABLE)
    add_dependencies(ie_libraries onnx_ngraph_frontend)
--- a/inference-engine/src/cldnn_engine/cldnn_config.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_config.cpp
@ -28,7 +28,7 @@ namespace CLDNNPlugin {

 static void createDirectory(std::string _path) {
 #if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
-    std::wstring widepath = FileUtils::multiByteCharToWString(_path.c_str());
+    std::wstring widepath = ov::util::string_to_wstring(_path.c_str());
    const wchar_t* path = widepath.c_str();
 #else
    const char* path = _path.c_str();
--- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp
@ -60,8 +60,6 @@ CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
 void CLDNNGraph::UpdateLayersMaps() {
    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdateLayersMaps");
    primitiveIDs = m_program->primitiveIDs;
-    primitivesToIRLayersMap = m_program->primitivesToIRLayersMap;
-    IRToNgraphLayersMap = m_program->IRToNgraphLayersMap;
    prevPrimitiveIDs = m_program->prevPrimitiveIDs;
    profilingIDs = m_program->profilingIDs;
    perfMap = m_program->perfMap;
@ -219,25 +217,6 @@ std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
        return res;
    };

-    auto split_string = [](std::string src, std::string delimiter = ",") -> std::vector<std::string> {
-        std::vector<std::string> tokens;
-        std::string tokenBuf;
-        size_t prev = 0, pos = 0, srcLength = src.length(), delimLength = delimiter.length();
-        do {
-            pos = src.find(delimiter, prev);
-            if (pos == std::string::npos) {
-                pos = srcLength;
-            }
-            tokenBuf = src.substr(prev, pos - prev);
-            if (!tokenBuf.empty()) {
-                tokens.push_back(tokenBuf);
-            }
-            prev = pos + delimLength;
-        } while (pos < srcLength && prev < srcLength);
-
-        return tokens;
-    };
-
    auto remove_type_from_name = [](const std::string& name) -> std::string {
        auto it = std::find(name.begin(), name.end(), ':');
        if (it == name.end() || (it + 1) == name.end())
@ -246,22 +225,13 @@ std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
        return std::string((it+1), name.end());
    };

+    auto extIdMap = GetNetwork()->get_ext_id_mapping();
+
    auto find_origin_layers = [&](const std::string& name) -> std::vector<std::string> {
-        if (primitivesToIRLayersMap.find(name) == primitivesToIRLayersMap.end())
+        if (extIdMap.find(name) == extIdMap.end()) {
            return {};
-
-        auto cnn_names = primitivesToIRLayersMap.at(name);
-        std::vector<std::string> res;
-
-        for (auto& cnn_name : cnn_names) {
-            if (IRToNgraphLayersMap.find(cnn_name) != IRToNgraphLayersMap.end()) {
-                auto ngraph_names = split_string(IRToNgraphLayersMap.at(cnn_name));
-                res.insert(res.end(), ngraph_names.begin(), ngraph_names.end());
-            } else {
-                res.push_back(cnn_name);
-            }
        }
-        return res;
+        return { extIdMap.at(name) };
    };

    auto get_inputs = [&] (const cldnn::primitive_info& prim_info) {
@ -599,13 +569,21 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
    auto allIds = GetNetwork()->get_all_primitive_org_ids();
    auto executedPrimitives = GetNetwork()->get_executed_primitives();
    auto primitivesInfo = GetNetwork()->get_primitives_info();
+    auto extIdMap = GetNetwork()->get_ext_id_mapping();

-    auto getUpperCaseName = [&](std::string name) {
+    auto getUpperCaseName = [](std::string name) {
        if (name.length() > 0)
            name[0] = toupper(name[0]);
        return name;
    };

+    auto getClearName = [](std::string name) {
+        if (name.find(":") != std::string::npos) {
+            name = name.substr(name.find(":") + 1, name.length());
+        }
+        return name;
+    };
+
    auto getFromProfiling = [&](std::string primId) -> bool {
        auto perfIter = perfMap.find(primId);

@ -696,10 +674,7 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
                }
            }

-            std::string layerName = primId;
-            if (primId.find(":") != std::string::npos) {
-                layerName = primId.substr(primId.find(":") + 1, primId.length());
-            }
+            std::string layerName = getClearName(primId);

            for (auto& pi : primitivesInfo) {
                if (pi.original_id == primId) {
@ -735,10 +710,27 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
    }

    // Step 3. Checking primitives which has been deleted from execution order but added by clDNNPlugin
-    for (auto& primId : profilingIDs)
+    for (auto& primId : profilingIDs) {
        if (std::find(allIds.begin(), allIds.end(), primId) == allIds.end()) {
            getFromProfiling(primId);
        }
+    }
+
+    for (auto& p : extIdMap) {
+        if (p.first.find(p.second) != std::string::npos) {
+            continue;
+        }
+        auto first_res = result.find(getClearName(p.first));
+        auto second_res = result.find(getClearName(p.second));
+
+        if (first_res != result.end() && second_res != result.end() && first_res != second_res) {
+            std::swap(first_res->second.cpu_uSec,        second_res->second.cpu_uSec);
+            std::swap(first_res->second.realTime_uSec,   second_res->second.realTime_uSec);
+            std::swap(first_res->second.status,          second_res->second.status);
+            std::swap(first_res->second.exec_type,       second_res->second.exec_type);
+            std::swap(first_res->second.execution_index, second_res->second.execution_index);
+        }
+    }
    return result;
 }

--- a/inference-engine/src/cldnn_engine/cldnn_graph.h
+++ b/inference-engine/src/cldnn_engine/cldnn_graph.h
@ -61,8 +61,6 @@ protected:
    InferenceEngine::gpu::ClContext::Ptr m_context;
    std::vector<std::shared_ptr<cldnn::network>> m_networks;
    std::map<std::string, cldnn::primitive_id> primitiveIDs;
-    std::map<cldnn::primitive_id, std::vector<std::string>> primitivesToIRLayersMap;
-    std::map<cldnn::primitive_id, std::string> IRToNgraphLayersMap;
    std::map<std::string, std::vector<cldnn::primitive_id>> prevPrimitiveIDs;

    std::map<cldnn::primitive_id, std::pair<std::string, PerfCounter>> perfMap;
--- a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp
@ -12,6 +12,7 @@
 #include "cldnn_remote_context.h"
 #include "cldnn_executable_network.h"
 #include "cldnn_itt.h"
+#include "cldnn/runtime/debug_configuration.hpp"
 #include <ie_algorithm.hpp>
 #include <debug.h>

@ -622,6 +623,10 @@ void CLDNNInferRequest::allocate_inputs() {
                IE_THROW() << "Input layout for " << name << " is not found";
            }

+            GPU_DEBUG_GET_INSTANCE(debug_config);
+            GPU_DEBUG_IF(debug_config->verbose >= 2) {
+                GPU_DEBUG_COUT << "[" << name << ": input blob]" << std::endl;
+            }
            if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
                TensorDesc desc_fp32 = desc;
                desc_fp32.setPrecision(Precision::FP32);
@ -673,6 +678,10 @@ void CLDNNInferRequest::allocate_outputs() {
        const cldnn::layout output_layout = m_graph->GetNetwork()->get_output_memory(outputID)->get_layout();
        const TensorDesc& desc = no.second->getTensorDesc();

+        GPU_DEBUG_GET_INSTANCE(debug_config);
+        GPU_DEBUG_IF(debug_config->verbose >= 2) {
+            GPU_DEBUG_COUT << "[" << no.first << ": output blob]" << std::endl;
+        }
        auto blobPtr = create_device_blob(desc, output_layout);
        _deviceOutputs[no.first] = blobPtr;
        _outputs[no.first] = blobPtr;
--- a/inference-engine/src/cldnn_engine/cldnn_program.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp
@ -284,14 +284,12 @@ std::vector<cldnn::primitive_id> Program::GetInputPrimitiveIDs(const std::shared
 void Program::AddPrimitiveToProfiler(const std::shared_ptr<ngraph::Node>& op,
                                     cldnn::primitive_id customOutputId) {
    auto id = layer_type_name_ID(op);
-    primitivesToIRLayersMap[id] = { op->get_friendly_name() };
    primitiveIDs[id] = customOutputId.empty() ? id : customOutputId;
    profilingIDs.push_back(id);
 }

 void Program::AddPrimitiveToProfiler(cldnn::primitive_id id, const std::shared_ptr<ngraph::Node>& op,
                                     cldnn::primitive_id customOutputId) {
-    primitivesToIRLayersMap[id] = { op->get_friendly_name() };
    primitiveIDs[id] = customOutputId.empty() ? id : customOutputId;
    profilingIDs.push_back(id);
 }
@ -299,7 +297,6 @@ void Program::AddPrimitiveToProfiler(cldnn::primitive_id id, const std::shared_p
 void Program::AddInnerPrimitiveToProfiler(cldnn::primitive_id id, cldnn::primitive_id parentId,
                                          const std::shared_ptr<ngraph::Node>& op) {
    InitProfileInfo(id, layer_type_lower(op), false, InferenceEngine::InferenceEngineProfileInfo::EXECUTED, parentId);
-    primitivesToIRLayersMap[id] = { op->get_friendly_name() };
    primitiveIDs[id] = id;
    profilingIDs.push_back(id);
 }
@ -328,28 +325,24 @@ void Program::InitProfileInfo(const std::string& layerName,

 // TODO: Does it make sense to add such method to ngraph core?
 bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node) {
-    std::list<std::shared_ptr<ngraph::Node>> nodes_to_process = { node };
-    while (!nodes_to_process.empty()) {
-        auto current_node = nodes_to_process.front();
-        nodes_to_process.pop_front();
-
-        for (size_t i = 0; i < current_node->get_input_size(); i++) {
-            auto input_node = current_node->get_input_node_shared_ptr(i);
-
-            // If input is constant, then drop if from the processing list
-            if (std::dynamic_pointer_cast<ngraph::op::v0::Constant>(input_node) != nullptr)
-                continue;
-
-            // If the node doesn't have any parents and it's not a constant, then we deal with dynamic path
-            if (input_node->get_input_size() == 0) {
+    std::set<std::shared_ptr<ngraph::Node>> nodes_processed = {};
+    std::function<bool(const std::shared_ptr<ngraph::Node>&)> is_const_node = [&nodes_processed, &is_const_node](const std::shared_ptr<ngraph::Node>& node) {
+        if (nodes_processed.count(node)) return true;
+        nodes_processed.insert(node);
+        // If input is constant, then drop if from the processing list
+        if (std::dynamic_pointer_cast<ngraph::op::v0::Constant>(node) != nullptr)
+            return true;
+        // If the node doesn't have any parents and it's not a constant, then we deal with dynamic path
+        if (node->get_input_size() == 0)
+            return false;
+        for (size_t i = 0; i < node->get_input_size(); i++) {
+            auto input_node = node->get_input_node_shared_ptr(i);
+            if (!is_const_node(input_node))
                return false;
-            }
-
-            nodes_to_process.insert(nodes_to_process.end(), input_node);
        }
-    }
-
-    return true;
+        return true;
+    };
+    return is_const_node(node);
 }

 }  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/cldnn_program.h
+++ b/inference-engine/src/cldnn_engine/cldnn_program.h
@ -76,8 +76,6 @@ public:
    static const cldnn::primitive_id m_postCustomLayerTag;

    std::map<std::string, cldnn::primitive_id> primitiveIDs;
-    std::map<cldnn::primitive_id, std::vector<std::string>> primitivesToIRLayersMap;
-    std::map<cldnn::primitive_id, std::string> IRToNgraphLayersMap;
    std::map<std::string, std::vector<cldnn::primitive_id>> prevPrimitiveIDs;
    std::map<cldnn::primitive_id, std::pair<std::string, PerfCounter>> perfMap;

--- a/inference-engine/src/cldnn_engine/ops/batch_to_space.cpp
+++ b/inference-engine/src/cldnn_engine/ops/batch_to_space.cpp
@ -42,7 +42,8 @@ void CreateBatchToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v1::Batc
                                                  inputs[0], // block_shape
                                                  inputs[1], // crops_begin
                                                  inputs[2], // crops_end
-                                                  out_size);
+                                                  out_size,
+                                                  op->get_friendly_name());

    p.AddPrimitive(batchToSpacePrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/broadcast.cpp
+++ b/inference-engine/src/cldnn_engine/ops/broadcast.cpp
@ -31,8 +31,13 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr<ngraph::No
        if (targetFormat.value != DefaultFormatForDims(inputRank).value) {
            auto reorderName = layerName + "_cldnn_in_reorder";
            auto targetDatatype = DataTypeFromPrecision(op->get_input_element_type(0));
-            auto reorderPrim = cldnn::reorder(reorderName, inputPrimitive, targetFormat, targetDatatype);
-
+            auto reorderPrim = cldnn::reorder(reorderName,
+                                              inputPrimitive,
+                                              targetFormat,
+                                              targetDatatype,
+                                              std::vector<float>(),
+                                              cldnn::reorder_mean_mode::subtract,
+                                              op->get_friendly_name());
            p.AddPrimitive(reorderPrim);
            p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);

@ -66,7 +71,7 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr<ngraph::No

        auto targetShape = CldnnTensorFromIEDims(inputShape);

-        auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitive, targetShape);
+        auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitive, targetShape, op->get_friendly_name());
        p.AddPrimitive(reshapePrim);
        p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);

@ -75,7 +80,9 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr<ngraph::No

    auto broadcastPrim = cldnn::broadcast(layerName,
                                          inputPrimitive,
-                                          CldnnTensorFromIEDims(op->get_output_shape(0)));
+                                          CldnnTensorFromIEDims(op->get_output_shape(0)),
+                                          {},
+                                          op->get_friendly_name());

    p.AddPrimitive(broadcastPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/concat.cpp
+++ b/inference-engine/src/cldnn_engine/ops/concat.cpp
@ -45,7 +45,8 @@ void CreateConcatOp(Program& p, const std::shared_ptr<ngraph::op::v0::Concat>& o
        layerName,
        inputPrimitives,
        GetConcatAxis(op->get_axis(), op->get_input_shape(0).size()),
-        DataTypeFromPrecision(op->get_output_element_type(0)));
+        DataTypeFromPrecision(op->get_output_element_type(0)),
+        op->get_friendly_name());

    p.AddPrimitive(concatPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/constant.cpp
+++ b/inference-engine/src/cldnn_engine/ops/constant.cpp
@ -18,6 +18,7 @@
 #include "ngraph/op/util/op_types.hpp"

 #include "cldnn/primitives/data.hpp"
+#include "cldnn/runtime/debug_configuration.hpp"

 namespace CLDNNPlugin {

@ -169,6 +170,10 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
    if (bufIter != p.blobMemCache.end()) {
        constPrimID = bufIter->second;
    } else {
+        GPU_DEBUG_GET_INSTANCE(debug_config);
+        GPU_DEBUG_IF(debug_config->verbose >= 2) {
+            GPU_DEBUG_COUT << "[" << initialconstPrimID << ": constant]" << std::endl;
+        }
        cldnn::memory::ptr mem = p.GetEngine().allocate_memory(constLayout, false);
        auto& stream = p.GetEngine().get_program_stream();
        cldnn::mem_lock<char> lock{mem, stream};
@ -199,7 +204,7 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
        } else {
            std::memcpy(&buf[0], &data[0], bufSize);
        }
-        p.AddPrimitive(cldnn::data(initialconstPrimID, mem));
+        p.AddPrimitive(cldnn::data(initialconstPrimID, mem, op->get_friendly_name()));
        p.blobMemCache[std::make_pair(data, constDims)] = initialconstPrimID;
        constPrimID = initialconstPrimID;
    }
--- a/inference-engine/src/cldnn_engine/ops/convert.cpp
+++ b/inference-engine/src/cldnn_engine/ops/convert.cpp
@ -19,8 +19,13 @@ void CreateConvertLikeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Conve

    auto outDataType = DataTypeFromPrecision(op->get_input_element_type(1));

-    auto reorderPrim = cldnn::reorder(layerName, inputPrimitives[0], cldnn::format::any, outDataType);
-
+    auto reorderPrim = cldnn::reorder(layerName,
+                                      inputPrimitives[0],
+                                      cldnn::format::any,
+                                      outDataType,
+                                      std::vector<float>(),
+                                      cldnn::reorder_mean_mode::subtract,
+                                      op->get_friendly_name());
    p.AddPrimitive(reorderPrim);
    p.AddPrimitiveToProfiler(op);
 }
@ -32,7 +37,13 @@ void CreateConvertOp(Program& p, const std::shared_ptr<ngraph::op::v0::Convert>&

    auto outDataType = DataTypeFromPrecision(op->get_destination_type());

-    auto reorderPrim = cldnn::reorder(layerName, inputPrimitives[0], cldnn::format::any, outDataType);
+    auto reorderPrim = cldnn::reorder(layerName,
+                                      inputPrimitives[0],
+                                      cldnn::format::any,
+                                      outDataType,
+                                      std::vector<float>(),
+                                      cldnn::reorder_mean_mode::subtract,
+                                      op->get_friendly_name());

    p.AddPrimitive(reorderPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/convolution.cpp
+++ b/inference-engine/src/cldnn_engine/ops/convolution.cpp
@ -84,7 +84,8 @@ void CreateGroupConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::
                                       params.dilation,
                                       CldnnTensorFromIEDims(outDims),
                                       DataTypeFromPrecision(outPrecision),
-                                       weights_have_group_dim);
+                                       weights_have_group_dim,
+                                       op->get_friendly_name());

    p.AddPrimitive(convPrim);
    p.AddPrimitiveToProfiler(op);
@ -112,7 +113,8 @@ void CreateConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::Convo
                                       params.dilation,
                                       CldnnTensorFromIEDims(outDims),
                                       DataTypeFromPrecision(outPrecision),
-                                       weights_have_group_dim);
+                                       weights_have_group_dim,
+                                       op->get_friendly_name());

    p.AddPrimitive(convPrim);
    p.AddPrimitiveToProfiler(op);
@ -146,7 +148,8 @@ void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngraph::o
        std::swap(permute_order[1], permute_order[0]);
        auto permutePrim = cldnn::permute(permuteName,
                                          weightsName,
-                                          ConvertPermuteOrder(permute_order, weights_rank));
+                                          ConvertPermuteOrder(permute_order, weights_rank),
+                                          op->get_friendly_name());

        p.AddPrimitive(permutePrim);
        p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
@ -159,14 +162,15 @@ void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngraph::o

    auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), 1);
    auto deconvPrim = cldnn::deconvolution(layerName,
-        inputs[0],
-        weights,
-        {},
-        params.groups,
-        params.stride,
-        params.padding,
-        CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
-        weights_have_group_dim);
+                                           inputs[0],
+                                           weights,
+                                           {},
+                                           params.groups,
+                                           params.stride,
+                                           params.padding,
+                                           CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
+                                           weights_have_group_dim,
+                                           op->get_friendly_name());

    p.AddPrimitive(deconvPrim);
    p.AddPrimitiveToProfiler(op);
@ -202,7 +206,8 @@ void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngra
        std::swap(permute_order[2], permute_order[1]);
        auto permutePrim = cldnn::permute(permuteName,
                                          weightsName,
-                                          ConvertPermuteOrder(permute_order, weights_rank));
+                                          ConvertPermuteOrder(permute_order, weights_rank),
+                                          op->get_friendly_name());

        p.AddPrimitive(permutePrim);
        p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
@ -214,14 +219,15 @@ void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngra
    const bool weights_have_group_dim = true;

    auto deconvPrim = cldnn::deconvolution(layerName,
-        inputs[0],
-        weights,
-        {},
-        params.groups,
-        params.stride,
-        params.padding,
-        CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
-        weights_have_group_dim);
+                                           inputs[0],
+                                           weights,
+                                           {},
+                                           params.groups,
+                                           params.stride,
+                                           params.padding,
+                                           CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
+                                           weights_have_group_dim,
+                                           op->get_friendly_name());

    p.AddPrimitive(deconvPrim);
    p.AddPrimitiveToProfiler(op);
@ -247,7 +253,8 @@ void CreateDeformableConvolutionOp(Program& p, const std::shared_ptr<ngraph::op:
                                           params.stride,
                                           params.padding,
                                           params.dilation,
-                                           CldnnTensorFromIEDims(outDims));
+                                           CldnnTensorFromIEDims(outDims),
+                                           op->get_friendly_name());

        p.AddPrimitive(convPrim);
        p.AddPrimitiveToProfiler(op);
@ -280,7 +287,8 @@ void CreateDeformableConvolutionOp(Program& p, const std::shared_ptr<ngraph::op:
                                                          params.padding,
                                                          params.dilation,
                                                          CldnnTensorFromIEDims(outDims),
-                                                          kernel);
+                                                          kernel,
+                                                          op->get_friendly_name());
        p.AddPrimitive(defConvPrimInterp);
        p.AddInnerPrimitiveToProfiler(defConvLayerNameInterp, defConvLayerNameConv, op);
        auto defConvPrim = cldnn::deformable_conv(defConvLayerNameConv,
@ -288,7 +296,8 @@ void CreateDeformableConvolutionOp(Program& p, const std::shared_ptr<ngraph::op:
                                                  weights,
                                                  {},
                                                  params.groups,
-                                                  CldnnTensorFromIEDims(outDims));
+                                                  CldnnTensorFromIEDims(outDims),
+                                                  op->get_friendly_name());
        p.AddPrimitive(defConvPrim);
        p.AddPrimitiveToProfiler(defConvLayerNameConv, op);
    }
@ -313,7 +322,8 @@ void CreateBinaryConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1:
                                              CldnnTensorFromIEDims(outDims),
                                              params.groups,
                                              op->get_pad_value(),
-                                              calc_precision);
+                                              calc_precision,
+                                              op->get_friendly_name());

    p.AddPrimitive(convPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/ctc_greedy_decoder.cpp
+++ b/inference-engine/src/cldnn_engine/ops/ctc_greedy_decoder.cpp
@ -11,6 +11,7 @@
 #include "cldnn/primitives/ctc_greedy_decoder.hpp"
 #include "cldnn/primitives/reorder.hpp"
 #include "cldnn/primitives/mutable_data.hpp"
+#include "cldnn/runtime/debug_configuration.hpp"

 #include "transformations/utils/utils.hpp"

@ -33,7 +34,10 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::No
            auto preprocessPrim = cldnn::reorder(reorderPrimName,
                                                 inputPrimitives[portIndex],
                                                 targetFormat,
-                                                 cldnn::data_types::i32);
+                                                 cldnn::data_types::i32,
+                                                 std::vector<float>(),
+                                                 cldnn::reorder_mean_mode::subtract,
+                                                 op->get_friendly_name());
            p.AddPrimitive(preprocessPrim);
            p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
            reorderedInputs[portIndex] = (reorderPrimName);
@ -70,11 +74,16 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::No
            DefaultFormatForDims(op->get_output_shape(1).size()),
            CldnnTensorFromIEDims(op->get_output_shape(1)));

+        GPU_DEBUG_GET_INSTANCE(debug_config);
+        GPU_DEBUG_IF(debug_config->verbose >= 2) {
+            GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
+        }
        shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayout));

        cldnn::primitive_id ctc_gd_mutable_id_w = layer_type_name_ID(op) + "_md_write";
-        auto ctc_gd_mutable_prim = cldnn::mutable_data(ctc_gd_mutable_id_w, shared_memory[0]);
-        p.primitivesToIRLayersMap[ctc_gd_mutable_id_w] = { op->get_friendly_name() };
+        auto ctc_gd_mutable_prim = cldnn::mutable_data(ctc_gd_mutable_id_w,
+                                                       shared_memory[0],
+                                                       op->get_friendly_name());
        p.primitiveIDs[ctc_gd_mutable_id_w] = ctc_gd_mutable_id_w;
        p.AddPrimitive(ctc_gd_mutable_prim);
        reorderedInputs.push_back(ctc_gd_mutable_id_w);
@ -86,7 +95,8 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::No
                reorderedInputs,
                blank_index,
                ctc_merge_repeated,
-                CldnnTensorFromIEDims(op->get_output_shape(0)));
+                CldnnTensorFromIEDims(op->get_output_shape(0)),
+                op->get_friendly_name());

    // clDNN primitive supports only i32 as output data type
    primitive.output_data_type = DataTypeFromPrecision(ngraph::element::i32);
@ -99,8 +109,10 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::No

    if (num_output == 2) {
        cldnn::primitive_id ctc_gd_mutable_id_r = layer_type_name_ID(op) + ".1";
-        auto ctc_gd_mutable_prim_r = cldnn::mutable_data(ctc_gd_mutable_id_r, { CTCGreedyDecoderLayerName }, shared_memory[0]);
-        p.primitivesToIRLayersMap[ctc_gd_mutable_id_r] = { op->get_friendly_name() };
+        auto ctc_gd_mutable_prim_r = cldnn::mutable_data(ctc_gd_mutable_id_r,
+                                                         { CTCGreedyDecoderLayerName },
+                                                         shared_memory[0],
+                                                         op->get_friendly_name());
        p.primitiveIDs[ctc_gd_mutable_id_r] = ctc_gd_mutable_id_r;
        p.AddPrimitive(ctc_gd_mutable_prim_r);
    }
--- a/inference-engine/src/cldnn_engine/ops/cum_sum.cpp
+++ b/inference-engine/src/cldnn_engine/ops/cum_sum.cpp
@ -63,7 +63,8 @@ void CreateCumSumOp(Program& p, const std::shared_ptr<ngraph::op::v0::CumSum>& o
                                    inputPrimitives[0],
                                    GetCumSumAxis(axis, rank),
                                    exclusive,
-                                    reverse);
+                                    reverse,
+                                    op->get_friendly_name());

    p.AddPrimitive(primitive);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/custom.cpp
+++ b/inference-engine/src/cldnn_engine/ops/custom.cpp
@ -145,7 +145,10 @@ void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCu
                        reorderPrimName,
                        inputPrimitives[param.portIndex],
                        param.format,
-                        DataTypeFromPrecision(op->get_input_element_type(param.portIndex)));
+                        DataTypeFromPrecision(op->get_input_element_type(param.portIndex)),
+                        std::vector<float>(),
+                        cldnn::reorder_mean_mode::subtract,
+                        op->get_friendly_name());

                    p.AddPrimitive(preprocessPrim);
                    p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
@ -229,7 +232,8 @@ void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCu
                                                  customLayer->CompilerOptions(),
                                                  outputLayout,
                                                  gws,
-                                                  lws);
+                                                  lws,
+                                                  op->get_friendly_name());

    auto prevLayerName = genericLayerName;
    if (outputLayout.format != cldnn::format::any) {
@ -239,7 +243,10 @@ void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCu
            cldnn::reorder(reorderPrimName,
                           genericLayerName,
                           DefaultFormatForDims(op->get_output_shape(0).size()),
-                           customPrim.output_layout.data_type));
+                           customPrim.output_layout.data_type,
+                           std::vector<float>(),
+                           cldnn::reorder_mean_mode::subtract,
+                           op->get_friendly_name()));
        prevLayerName = reorderPrimName;
        p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
    }
--- a/inference-engine/src/cldnn_engine/ops/depth_to_space.cpp
+++ b/inference-engine/src/cldnn_engine/ops/depth_to_space.cpp
@ -33,7 +33,8 @@ void CreateDepthToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v0::Dept
    auto depthToSpacePrim = cldnn::depth_to_space(layerName,
                                                  inputPrimitives[0],
                                                  blockSize,
-                                                  mode);
+                                                  mode,
+                                                  op->get_friendly_name());

    p.AddPrimitive(depthToSpacePrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/detection_output.cpp
+++ b/inference-engine/src/cldnn_engine/ops/detection_output.cpp
@ -75,7 +75,8 @@ void CreateDetectionOutputOp(Program& p, const std::shared_ptr<ngraph::op::v0::D
                                                 input_height,
                                                 decrease_label_id,
                                                 clip_before_nms,
-                                                 clip_after_nms);
+                                                 clip_after_nms,
+                                                 op->get_friendly_name());

    p.AddPrimitive(detectionPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/eltwise.cpp
+++ b/inference-engine/src/cldnn_engine/ops/eltwise.cpp
@ -46,7 +46,13 @@ void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cl
            if (targetFormat.value != DefaultFormatForDims(inputRank).value) {
                auto reorderName = layerName + "_cldnn_in" + std::to_string(i) + "_reorder";
                auto targetDatatype = DataTypeFromPrecision(op->get_input_element_type(i));
-                auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype);
+                auto reorderPrim = cldnn::reorder(reorderName,
+                                                  inputPrimitives[i],
+                                                  targetFormat,
+                                                  targetDatatype,
+                                                  std::vector<float>(),
+                                                  cldnn::reorder_mean_mode::subtract,
+                                                  op->get_friendly_name());

                p.AddPrimitive(reorderPrim);
                p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
@ -61,7 +67,7 @@ void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cl

            auto targetShape = CldnnTensorFromIEDims(inputShape);

-            auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape);
+            auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape, op->get_friendly_name());
            p.AddPrimitive(reshapePrim);
            p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);

@ -74,7 +80,8 @@ void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cl
                                      inputPrimitives,
                                      mode,
                                      {},
-                                      out_dt);
+                                      out_dt,
+                                      op->get_friendly_name());

    p.AddPrimitive(eltwisePrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/embedding_bag.cpp
+++ b/inference-engine/src/cldnn_engine/ops/embedding_bag.cpp
@ -49,7 +49,10 @@ void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptr<ngraph::op
            auto preprocessPrim = cldnn::reorder(reorderPrimName,
                                                 inputPrimitives[portIndex],
                                                 targetFormat,
-                                                 cldnn::data_types::i32);
+                                                 cldnn::data_types::i32,
+                                                 std::vector<float>(),
+                                                 cldnn::reorder_mean_mode::subtract,
+                                                 op->get_friendly_name());
            p.AddPrimitive(preprocessPrim);
            p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
            reorderedInputs[portIndex] = (reorderPrimName);
@ -62,7 +65,8 @@ void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptr<ngraph::op
                                                 reorderedInputs,
                                                 cldnn::embedding_bag::offsets_sum,
                                                 CldnnTensorFromIEDims(op->get_output_shape(0)),
-                                                 defaultIndex);
+                                                 defaultIndex,
+                                                 op->get_friendly_name());

    p.AddPrimitive(embeddingBagPrim);
    p.AddPrimitiveToProfiler(op);
@ -86,7 +90,10 @@ void CreateEmbeddingBagPackedSumOp(Program& p, const std::shared_ptr<ngraph::op:
            auto preprocessPrim = cldnn::reorder(reorderPrimName,
                                                 inputPrimitives[portIndex],
                                                 targetFormat,
-                                                 cldnn::data_types::i32);
+                                                 cldnn::data_types::i32,
+                                                 std::vector<float>(),
+                                                 cldnn::reorder_mean_mode::subtract,
+                                                 op->get_friendly_name());
            p.AddPrimitive(preprocessPrim);
            p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
            reorderedInputs[portIndex] = (reorderPrimName);
@ -98,7 +105,9 @@ void CreateEmbeddingBagPackedSumOp(Program& p, const std::shared_ptr<ngraph::op:
    auto embeddingBagPrim = cldnn::embedding_bag(layerName,
                                                 reorderedInputs,
                                                 cldnn::embedding_bag::packed_sum,
-                                                 CldnnTensorFromIEDims(op->get_output_shape(0)));
+                                                 CldnnTensorFromIEDims(op->get_output_shape(0)),
+                                                 -1,
+                                                 op->get_friendly_name());

    p.AddPrimitive(embeddingBagPrim);
    p.AddPrimitiveToProfiler(op);
@ -140,7 +149,10 @@ void CreateEmbeddingSegmentsSumOp(Program& p, const std::shared_ptr<ngraph::op::
            auto preprocessPrim = cldnn::reorder(reorderPrimName,
                                                 inputPrimitives[portIndex],
                                                 targetFormat,
-                                                 cldnn::data_types::i32);
+                                                 cldnn::data_types::i32,
+                                                 std::vector<float>(),
+                                                 cldnn::reorder_mean_mode::subtract,
+                                                 op->get_friendly_name());
            p.AddPrimitive(preprocessPrim);
            p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
            reorderedInputs[portIndex] = (reorderPrimName);
@ -153,7 +165,8 @@ void CreateEmbeddingSegmentsSumOp(Program& p, const std::shared_ptr<ngraph::op::
                                                 reorderedInputs,
                                                 cldnn::embedding_bag::segments_sum,
                                                 CldnnTensorFromIEDims(op->get_output_shape(0)),
-                                                 defaultIndex);
+                                                 defaultIndex,
+                                                 op->get_friendly_name());

    p.AddPrimitive(embeddingBagPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/extract_image_patches.cpp
+++ b/inference-engine/src/cldnn_engine/ops/extract_image_patches.cpp
@ -38,7 +38,8 @@ void CreateExtractImagePatchesOp(Program& p, const std::shared_ptr<ngraph::op::v
                                                                strides,
                                                                rates,
                                                                auto_pad,
-                                                                CldnnTensorFromIEDims(op->get_output_shape(0)));
+                                                                CldnnTensorFromIEDims(op->get_output_shape(0)),
+                                                                op->get_friendly_name());

    p.AddPrimitive(extractImagePatchesPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/fake_quantize.cpp
+++ b/inference-engine/src/cldnn_engine/ops/fake_quantize.cpp
@ -31,7 +31,8 @@ void CreateFakeQuantizeOp(Program& p, const std::shared_ptr<ngraph::op::v0::Fake
                                            output_low_id,
                                            output_high_id,
                                            levels,
-                                            dt);
+                                            dt,
+                                            op->get_friendly_name());

    p.AddPrimitive(quantizationPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/gather
+++ b/inference-engine/src/cldnn_engine/ops/gather
@ -30,7 +30,10 @@ void CreateGatherTreeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Gather
            auto preprocessPrim = cldnn::reorder(reorderPrimName,
                                                 inputPrimitives[portIndex],
                                                 targetFormat,
-                                                 cldnn::data_types::i32);
+                                                 cldnn::data_types::i32,
+                                                 std::vector<float>(),
+                                                 cldnn::reorder_mean_mode::subtract,
+                                                 op->get_friendly_name());
            p.AddPrimitive(preprocessPrim);
            p.AddInnerPrimitiveToProfiler(reorderPrimName, layerName, op);
            reorderedInputs[portIndex] = reorderPrimName;
@ -43,7 +46,8 @@ void CreateGatherTreeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Gather
                                             reorderedInputs[0],
                                             reorderedInputs[1],
                                             reorderedInputs[2],
-                                             reorderedInputs[3]);
+                                             reorderedInputs[3],
+                                             op->get_friendly_name());

    p.AddPrimitive(gatherTreePrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/gather.cpp
+++ b/inference-engine/src/cldnn_engine/ops/gather.cpp
@ -77,7 +77,10 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t
            auto preprocessPrim = cldnn::reorder(reorderPrimName,
                                                 inputPrimitives[portIndex],
                                                 targetFormat,
-                                                 cldnn::data_types::i32);
+                                                 cldnn::data_types::i32,
+                                                 std::vector<float>(),
+                                                 cldnn::reorder_mean_mode::subtract,
+                                                 op->get_friendly_name());
            p.AddPrimitive(preprocessPrim);
            p.AddInnerPrimitiveToProfiler(reorderPrimName, layerName, op);
            reorderedInputs[portIndex] = reorderPrimName;
@ -94,7 +97,8 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t
                                    outLayout,
                                    CldnnTensorFromIEDims(op->get_output_shape(0)),
                                    batch_dim,
-                                    support_neg_ind);
+                                    support_neg_ind,
+                                    op->get_friendly_name());

    p.AddPrimitive(gatherPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/gather_elements.cpp
+++ b/inference-engine/src/cldnn_engine/ops/gather_elements.cpp
@ -55,7 +55,8 @@ void CreateGatherElementsOp(Program& p, const std::shared_ptr<ngraph::op::v6::Ga
                                            inputPrimitives[1],
                                            outLayout,
                                            CldnnTensorFromIEDims(op->get_output_shape(0)),
-                                            GetGatherAxis(axis, rank));
+                                            GetGatherAxis(axis, rank),
+                                            op->get_friendly_name());

    p.AddPrimitive(primitive);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/gather_nd.cpp
+++ b/inference-engine/src/cldnn_engine/ops/gather_nd.cpp
@ -22,10 +22,11 @@ void CreateGatherNDOp(Program& p, const std::shared_ptr<ngraph::op::v5::GatherND
    auto batch_dims = op->get_batch_dims();

    auto primitive = cldnn::gather_nd(layerName,
-                                           inputPrimitives[0],
-                                           inputPrimitives[1],
-                                           indices_rank,
-                                           batch_dims);
+                                      inputPrimitives[0],
+                                      inputPrimitives[1],
+                                      indices_rank,
+                                      batch_dims,
+                                      op->get_friendly_name());

    p.AddPrimitive(primitive);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/grn.cpp
+++ b/inference-engine/src/cldnn_engine/ops/grn.cpp
@ -19,7 +19,8 @@ void CreateGRNOp(Program& p, const std::shared_ptr<ngraph::op::v0::GRN>& op) {
    auto primitive = cldnn::grn(layerName,
                                inputPrimitives[0],
                                op->get_bias(),
-                                DataTypeFromPrecision(op->get_output_element_type(0)));
+                                DataTypeFromPrecision(op->get_output_element_type(0)),
+                                op->get_friendly_name());

    p.AddPrimitive(primitive);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/interpolate.cpp
+++ b/inference-engine/src/cldnn_engine/ops/interpolate.cpp
@ -193,7 +193,8 @@ void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4::Inter
                                        cldnnSampleType,
                                        shapeCalcMode,
                                        coordTransMode,
-                                        nearestMode);
+                                        nearestMode,
+                                        op->get_friendly_name());

    p.AddPrimitive(resamplePrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/loop.cpp
+++ b/inference-engine/src/cldnn_engine/ops/loop.cpp
@ -29,11 +29,11 @@ using Loop = ngraph::op::v5::Loop;
 namespace CLDNNPlugin {

 template<class DATA_TYPE>
-static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) {
+static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num, const cldnn::primitive_id& ext_prim_id) {
    auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
    cldnn::mem_lock<int64_t> ptr{mem, p.GetEngine().get_program_stream()};
    *ptr.begin() = num;
-    return {id, mem};
+    return {id, mem, ext_prim_id};
 }

 static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::shared_ptr<ngraph::Node>& op,
@ -44,7 +44,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha
    const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx));
    cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
    auto mem = p.GetEngine().allocate_memory(output_layout);
-    auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency
+    auto md = cldnn::mutable_data(id, {input}, mem, op->get_friendly_name()); // cldnn::data cannot set dependency
    return md;
 }

@ -161,8 +161,7 @@ void CreateLoopOp(Program& p, const std::shared_ptr<Loop>& op) {
    }
    const cldnn::primitive_id num_iteration_id = layerName + "_numIteration";
    {
-        cldnn::mutable_data num_iteration = CreateScalarData<cldnn::mutable_data>(p, num_iteration_id, 0);
-        p.primitivesToIRLayersMap[num_iteration_id] = { op->get_friendly_name() };
+        cldnn::mutable_data num_iteration = CreateScalarData<cldnn::mutable_data>(p, num_iteration_id, 0, op->get_friendly_name());
        p.primitiveIDs[num_iteration_id] = num_iteration_id;
        p.AddPrimitive(num_iteration);
        p.AddInnerPrimitiveToProfiler(num_iteration_id, layerName, op);
@ -216,7 +215,8 @@ void CreateLoopOp(Program& p, const std::shared_ptr<Loop>& op) {
        back_edges,             /* back edge mapping */
        num_iterations,         /* max iteration, i.e. length of iteration axis */
        body_current_iteration_id,
-        body_execution_condition_id);
+        body_execution_condition_id,
+        op->get_friendly_name());

    p.AddPrimitive(loopPrimitive);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/lrn.cpp
+++ b/inference-engine/src/cldnn_engine/ops/lrn.cpp
@ -38,7 +38,8 @@ void CreateLRNOp(Program& p, const std::shared_ptr<ngraph::op::v0::LRN>& op) {
                              static_cast<float>(op->get_bias()),
                              static_cast<float>(op->get_alpha()),
                              static_cast<float>(op->get_beta()),
-                              GetNormRegion(axis_value));
+                              GetNormRegion(axis_value),
+                              op->get_friendly_name());

    p.AddPrimitive(lrnPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/matmul.cpp
+++ b/inference-engine/src/cldnn_engine/ops/matmul.cpp
@ -89,7 +89,8 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
            auto permuteName = op->get_friendly_name() + "/transpose_b";
            auto permutePrim = cldnn::permute(permuteName,
                                              weightsName,
-                                              cldnn_permute_order);
+                                              cldnn_permute_order,
+                                              op->get_friendly_name());
            p.AddPrimitive(permutePrim);
            p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
            weightsName = permuteName;
@ -108,7 +109,8 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
            auto permuteName = op->get_friendly_name() + "/transpose_a";
            auto permutePrim = cldnn::permute(permuteName,
                                              inputName,
-                                              cldnn_permute_order);
+                                              cldnn_permute_order,
+                                              op->get_friendly_name());
            p.AddPrimitive(permutePrim);
            p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
            inputName = permuteName;
@ -124,7 +126,10 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
                IE_THROW() << "Inconsistent reshape in Matmul op: " << op->get_friendly_name();

            auto reshapeInName = op->get_friendly_name() + suffix;
-            auto reshapeInPrim = cldnn::reshape(reshapeInName, inputName, CldnnTensorFromIEDims(reshapeSize));
+            auto reshapeInPrim = cldnn::reshape(reshapeInName,
+                                                inputName,
+                                                CldnnTensorFromIEDims(reshapeSize),
+                                                op->get_friendly_name());
            p.AddPrimitive(reshapeInPrim);
            p.AddInnerPrimitiveToProfiler(reshapeInName, layerName, op);
            return reshapeInName;
@ -144,6 +149,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
                                             weightsName,
                                             "",
                                             DataTypeFromPrecision(op->get_output_element_type(0)),
+                                             op->get_friendly_name(),
                                             cldnn::padding(),
                                             input_rank);

@ -153,7 +159,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
        if (reshape_fc) {
            auto outputShape = CldnnTensorFromIEDims(op->get_output_shape(0));
            auto outReshapeName = layerName + "_cldnn_out_reshape";
-            auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape);
+            auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape, op->get_friendly_name());

            p.AddPrimitive(outReshapePrim);
            p.AddInnerPrimitiveToProfiler(outReshapeName, layerName, op);
@ -188,7 +194,13 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
            if (targetFormat.value != DefaultFormatForDims(inputDimsN).value) {
                auto reorderName = layerName + "_cldnn_in" + std::to_string(i) + "_reorder";
                auto targetDatatype = DataTypeFromPrecision(op->get_output_element_type(0));
-                auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype);
+                auto reorderPrim = cldnn::reorder(reorderName,
+                                                  inputPrimitives[i],
+                                                  targetFormat,
+                                                  targetDatatype,
+                                                  std::vector<float>(),
+                                                  cldnn::reorder_mean_mode::subtract,
+                                                  op->get_friendly_name());

                p.AddPrimitive(reorderPrim);
                p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
@ -227,7 +239,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o

                auto targetShape = gemmSpecificTensor(inputDims);

-                auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape);
+                auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape, op->get_friendly_name());

                p.AddPrimitive(reshapePrim);
                p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
@ -248,7 +260,8 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
                                    transA,
                                    transB,
                                    alpha,
-                                    beta);
+                                    beta,
+                                    op->get_friendly_name());

        p.AddPrimitive(gemmPrim);

@ -258,7 +271,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
        if (outDimsN < 4) {
            auto outputShape = CldnnTensorFromIEDims(outDims);
            auto outReshapeName = layerName + "_cldnn_out_reshape";
-            auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape);
+            auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape, op->get_friendly_name());

            p.AddPrimitive(outReshapePrim);
            p.AddInnerPrimitiveToProfiler(outReshapeName, layerName, op);
--- a/inference-engine/src/cldnn_engine/ops/mvn.cpp
+++ b/inference-engine/src/cldnn_engine/ops/mvn.cpp
@ -24,7 +24,8 @@ static void CreateCommonMVNOp(Program& p, const std::shared_ptr<ngraph::Node>& o
                              normalize_variance,
                              eps,
                              eps_inside_sqrt,
-                              across_channels);
+                              across_channels,
+                              op->get_friendly_name());

    p.AddPrimitive(mvnPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/non_max_suppression.cpp
+++ b/inference-engine/src/cldnn_engine/ops/non_max_suppression.cpp
@ -12,6 +12,7 @@
 #include "cldnn/primitives/reorder.hpp"
 #include "cldnn/primitives/mutable_data.hpp"
 #include "cldnn/primitives/non_max_suppression.hpp"
+#include "cldnn/runtime/debug_configuration.hpp"

 namespace CLDNNPlugin {

@ -41,7 +42,10 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
            auto preprocessPrim = cldnn::reorder(reorderPrimName,
                                                 inputPrimitives[portIndex],
                                                 targetFormat,
-                                                 cldnn::data_types::i32);
+                                                 cldnn::data_types::i32,
+                                                 std::vector<float>(),
+                                                 cldnn::reorder_mean_mode::subtract,
+                                                 op->get_friendly_name());
            p.AddPrimitive(preprocessPrim);
            p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
            reorderedInputs[portIndex] = (reorderPrimName);
@ -63,6 +67,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
    std::size_t num_output = op->get_output_size();

    std::vector<cldnn::memory::ptr> shared_memory;
+    GPU_DEBUG_GET_INSTANCE(debug_config);
    switch (num_output) {
        case 3: {
            auto mutable_precision_second = op->get_output_element_type(2);
@ -74,11 +79,15 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
                DefaultFormatForDims(op->get_output_shape(2).size()),
                CldnnTensorFromIEDims(op->get_output_shape(2)));

+            GPU_DEBUG_IF(debug_config->verbose >= 2) {
+                GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
+            }
            shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutSecond));

            cldnn::primitive_id non_max_supression_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second";
-            auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second, shared_memory.back());
-            p.primitivesToIRLayersMap[non_max_supression_mutable_id_w_second] = { op->get_friendly_name() };
+            auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second,
+                                                               shared_memory.back(),
+                                                               op->get_friendly_name());
            p.primitiveIDs[non_max_supression_mutable_id_w_second] = non_max_supression_mutable_id_w_second;
            p.AddPrimitive(nms_mutable_prim_second);
            inputPrimitives.push_back(non_max_supression_mutable_id_w_second);
@ -91,11 +100,15 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
                cldnn::format::bfyx,
                cldnn::tensor(static_cast<int32_t>(outputIndices), 3, 1, 1));

+            GPU_DEBUG_IF(debug_config->verbose >= 2) {
+                GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
+            }
            shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutFirst));

            cldnn::primitive_id non_max_supression_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first";
-            auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first, shared_memory.back());
-            p.primitivesToIRLayersMap[non_max_supression_mutable_id_w_first] = { op->get_friendly_name() };
+            auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first,
+                                                              shared_memory.back(),
+                                                              op->get_friendly_name());
            p.primitiveIDs[non_max_supression_mutable_id_w_first] = non_max_supression_mutable_id_w_first;
            p.AddPrimitive(nms_mutable_prim_first);
            inputPrimitives.push_back(non_max_supression_mutable_id_w_first);
@ -112,7 +125,9 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
            reorderedInputs[1],
            static_cast<int>(outputIndices),
            op->m_center_point_box,
-            op->m_sort_result_descending);
+            op->m_sort_result_descending,
+            "", "", "", "", "", "",
+            op->get_friendly_name());

    prim.output_data_type = DataTypeFromPrecision(out_type);

@ -136,15 +151,19 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
    switch (num_output) {
        case 3: {
            cldnn::primitive_id non_max_supression_id_r_second = layer_type_name_ID(op) + ".2";
-            auto nms_mutable_prim_r_second = cldnn::mutable_data(non_max_supression_id_r_second, { nonMaxSupressionLayerName }, shared_memory.front());
-            p.primitivesToIRLayersMap[non_max_supression_id_r_second] = { op->get_friendly_name() };
+            auto nms_mutable_prim_r_second = cldnn::mutable_data(non_max_supression_id_r_second,
+                                                                 { nonMaxSupressionLayerName },
+                                                                 shared_memory.front(),
+                                                                 op->get_friendly_name());
            p.primitiveIDs[non_max_supression_id_r_second] = non_max_supression_id_r_second;
            p.AddPrimitive(nms_mutable_prim_r_second);
        }
        case 2: {
            cldnn::primitive_id non_max_supression_id_r_first = layer_type_name_ID(op) + ".1";
-            auto nms_mutable_prim_r_first = cldnn::mutable_data(non_max_supression_id_r_first, { nonMaxSupressionLayerName }, shared_memory.back());
-            p.primitivesToIRLayersMap[non_max_supression_id_r_first] = { op->get_friendly_name() };
+            auto nms_mutable_prim_r_first = cldnn::mutable_data(non_max_supression_id_r_first,
+                                                                { nonMaxSupressionLayerName },
+                                                                shared_memory.back(),
+                                                                op->get_friendly_name());
            p.primitiveIDs[non_max_supression_id_r_first] = non_max_supression_id_r_first;
            p.AddPrimitive(nms_mutable_prim_r_first);
        }
--- a/inference-engine/src/cldnn_engine/ops/normalize_l2.cpp
+++ b/inference-engine/src/cldnn_engine/ops/normalize_l2.cpp
@ -45,14 +45,15 @@ void CreateNormalizeL2Op(Program& p, const std::shared_ptr<ngraph::op::v0::Norma

    std::memcpy(&buf[0], scale->get_data_ptr(), bufSize);
    auto scalesName = layerName + "_cldnn_input_scales";
-    p.AddPrimitive(cldnn::data(scalesName, mem));
+    p.AddPrimitive(cldnn::data(scalesName, mem, op->get_friendly_name()));
    p.AddInnerPrimitiveToProfiler(scalesName, layerName, op);

    auto normPrim = cldnn::normalize(layerName,
                                     inputPrimitives[0],
                                     scalesName,
                                     across_spatial,
-                                     eps);
+                                     eps,
+                                     op->get_friendly_name());

    p.AddPrimitive(normPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/one_hot.cpp
+++ b/inference-engine/src/cldnn_engine/ops/one_hot.cpp
@ -53,7 +53,8 @@ void CreateOneHotOp(Program& p, const std::shared_ptr<ngraph::op::v1::OneHot>& o
                                     DataTypeFromPrecision(op->get_output_element_type(0)),
                                     static_cast<uint16_t>(axis),
                                     on_value,
-                                     off_value);
+                                     off_value,
+                                     op->get_friendly_name());

    p.AddPrimitive(oneHotPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/pad.cpp
+++ b/inference-engine/src/cldnn_engine/ops/pad.cpp
@ -66,7 +66,8 @@ void CreatePadOp(Program& p, const std::shared_ptr<ngraph::op::v1::Pad>& op) {
                                  pads_begin,
                                  pads_end,
                                  border_mode,
-                                  pad_value);
+                                  pad_value,
+                                  op->get_friendly_name());

    p.AddPrimitive(tilePrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/parameter.cpp
+++ b/inference-engine/src/cldnn_engine/ops/parameter.cpp
@ -195,8 +195,8 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
                                    cldnn::format::nv12, { 1, 1, width, height });
            cldnn::layout uv_layout(DataTypeFromPrecision(ip),
                                    cldnn::format::nv12, { 1, 2, width / 2, height / 2 });
-            auto inputY = cldnn::input_layout(y_name, y_layout);
-            auto inputUV = cldnn::input_layout(uv_name, uv_layout);
+            auto inputY = cldnn::input_layout(y_name, y_layout, inputInfo->name());
+            auto inputUV = cldnn::input_layout(uv_name, uv_layout, inputInfo->name());

            p.AddPrimitive(inputY);
            p.inputLayouts.insert({ inputInfo->name() + "_Y" + std::to_string(i), y_layout });
@ -205,20 +205,29 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
            switch (preProcess.getMeanVariant()) {
            case NONE:
            case MEAN_VALUE: {
-                p.AddPrimitive(cldnn::reorder(preprocessPrimID, y_name, uv_name, networkInputLayout, meanValues));
+                p.AddPrimitive(cldnn::reorder(preprocessPrimID,
+                                              y_name,
+                                              uv_name,
+                                              networkInputLayout,
+                                              meanValues,
+                                              cldnn::reorder_mean_mode::subtract,
+                                              inputInfo->name()));
                break;
            }
            case MEAN_IMAGE: {
-                p.AddPrimitive(cldnn::reorder(preprocessPrimID, y_name, uv_name, networkInputLayout, meanBlobID));
+                p.AddPrimitive(cldnn::reorder(preprocessPrimID,
+                                              y_name,
+                                              uv_name,
+                                              networkInputLayout,
+                                              meanBlobID,
+                                              cldnn::reorder_mean_mode::subtract,
+                                              inputInfo->name()));
                break;
            }
            default: IE_THROW(Unexpected) << "Invalid mean variant in input " + inputName;
                break;
            }

-            p.primitivesToIRLayersMap[preprocessPrimID] = { inputInfo->name() };
-            p.primitivesToIRLayersMap[y_name] = { inputInfo->name() };
-            p.primitivesToIRLayersMap[uv_name] = { inputInfo->name() };
            p.profilingIDs.push_back(preprocessPrimID);
            p.InitProfileInfo(preprocessPrimID, "Reorder");
            p.primitiveIDs[inputName] = preprocessPrimID;  // If it is batched blob, it will be overwritten afterwards.
@ -228,7 +237,7 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet

        if (inputDims[0] > 1) {
            auto concatPrimID = "concat:" + inputName + Program::m_preProcessTag;
-            p.AddPrimitive(cldnn::concatenation(concatPrimID, reorders, cldnn::concatenation::along_b));
+            p.AddPrimitive(cldnn::concatenation(concatPrimID, reorders, cldnn::concatenation::along_b, op->get_friendly_name()));
            p.primitiveIDs[inputName] = concatPrimID;
        }
    } else {
@ -237,20 +246,26 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
        inputLayout.data_type = DataTypeFromPrecision(ip);
        p.inputLayouts.insert({ inputInfo->name(), inputLayout });

-        p.AddPrimitive(cldnn::input_layout(inputName, inputLayout));
-        p.primitivesToIRLayersMap[inputName] = { inputInfo->name() };
+        p.AddPrimitive(cldnn::input_layout(inputName, inputLayout, inputInfo->name()));

        switch (preProcess.getMeanVariant()) {
        case NONE:
        case MEAN_VALUE: {
-            p.AddPrimitive(cldnn::reorder(preprocessPrimID, inputName, networkInputLayout, meanValues));
+            p.AddPrimitive(cldnn::reorder(preprocessPrimID,
+                                          inputName,
+                                          networkInputLayout,
+                                          meanValues,
+                                          cldnn::reorder_mean_mode::subtract,
+                                          op->get_friendly_name()));
            break;
        }
        case MEAN_IMAGE: {
            p.AddPrimitive(cldnn::reorder(preprocessPrimID,
-                                        inputName,
-                                        networkInputLayout,
-                                        meanBlobID));
+                                          inputName,
+                                          networkInputLayout,
+                                          meanBlobID,
+                                          cldnn::reorder_mean_mode::subtract,
+                                          op->get_friendly_name()));
            break;
        }
        default: IE_THROW() << "Invalid mean variant in input " << inputName;
--- a/inference-engine/src/cldnn_engine/ops/pooling.cpp
+++ b/inference-engine/src/cldnn_engine/ops/pooling.cpp
@ -70,7 +70,8 @@ void CreateAvgPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::AvgPool>&
                                   params.stride,
                                   params.pad_begin,
                                   CldnnTensorFromIEDims(op->get_output_shape(0)),
-                                   DataTypeFromPrecision(op->get_output_element_type(0)));
+                                   DataTypeFromPrecision(op->get_output_element_type(0)),
+                                   op->get_friendly_name());
    poolPrim.pad_end = params.pad_end;
    p.AddPrimitive(poolPrim);
    p.AddPrimitiveToProfiler(op);
@ -89,7 +90,8 @@ void CreateMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::MaxPool>&
                                   params.stride,
                                   params.pad_begin,
                                   CldnnTensorFromIEDims(op->get_output_shape(0)),
-                                   DataTypeFromPrecision(op->get_output_element_type(0)));
+                                   DataTypeFromPrecision(op->get_output_element_type(0)),
+                                   op->get_friendly_name());
    poolPrim.pad_end = params.pad_end;
    p.AddPrimitive(poolPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/prior_box.cpp
+++ b/inference-engine/src/cldnn_engine/ops/prior_box.cpp
@ -54,7 +54,8 @@ void CreatePriorBoxClusteredOp(Program& p, const std::shared_ptr<ngraph::op::v0:
                                         offset,
                                         width,
                                         height,
-                                         DataTypeFromPrecision(op->get_output_element_type(0)));
+                                         DataTypeFromPrecision(op->get_output_element_type(0)),
+                                         op->get_friendly_name());

    p.AddPrimitive(priorBoxPrim);
    p.AddPrimitiveToProfiler(op);
@ -103,7 +104,8 @@ void CreatePriorBoxOp(Program& p, const std::shared_ptr<ngraph::op::v0::PriorBox
                                         scale_all_sizes,
                                         fixed_ratio,
                                         fixed_size,
-                                         density);
+                                         density,
+                                         op->get_friendly_name());

    p.AddPrimitive(priorBoxPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/proposal.cpp
+++ b/inference-engine/src/cldnn_engine/ops/proposal.cpp
@ -9,6 +9,7 @@

 #include "cldnn/primitives/proposal.hpp"
 #include "cldnn/primitives/mutable_data.hpp"
+#include "cldnn/runtime/debug_configuration.hpp"

 namespace CLDNNPlugin {

@ -62,11 +63,16 @@ void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::Proposal
                                                    DefaultFormatForDims(op->get_output_shape(1).size()),
                                                    CldnnTensorFromIEDims(op->get_output_shape(1)));

+        GPU_DEBUG_GET_INSTANCE(debug_config);
+        GPU_DEBUG_IF(debug_config->verbose >= 2) {
+            GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
+        }
        auto shared_memory = p.GetEngine().allocate_memory(mutableLayout);

        cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(op) + "_md_write";
-        auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w, shared_memory);
-        p.primitivesToIRLayersMap[proposal_mutable_id_w] = { op->get_friendly_name() };
+        auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w,
+                                                       shared_memory,
+                                                       op->get_friendly_name());
        p.primitiveIDs[proposal_mutable_id_w] = proposal_mutable_id_w;
        p.AddPrimitive(argmax_mutable_prim);
        inputPrimitives.push_back(proposal_mutable_id_w);
@ -96,13 +102,16 @@ void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::Proposal
                                            clip_after_nms,
                                            round_ratios,
                                            shift_anchors,
-                                            normalize);
+                                            normalize,
+                                            op->get_friendly_name());

        p.AddPrimitive(proposalPrim);

        cldnn::primitive_id proposal_mutable_id_r = layer_type_name_ID(op) + ".1";
-        auto argmax_mutable_prim_r = cldnn::mutable_data(proposal_mutable_id_r, { proposalLayerName }, shared_memory);
-        p.primitivesToIRLayersMap[proposal_mutable_id_r] = { op->get_friendly_name() };
+        auto argmax_mutable_prim_r = cldnn::mutable_data(proposal_mutable_id_r,
+                                                         { proposalLayerName },
+                                                         shared_memory,
+                                                         op->get_friendly_name());
        p.primitiveIDs[proposal_mutable_id_r] = proposal_mutable_id_r;
        p.AddPrimitive(argmax_mutable_prim_r);

@ -134,7 +143,8 @@ void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::Proposal
                                        clip_after_nms,
                                        round_ratios,
                                        shift_anchors,
-                                        normalize);
+                                        normalize,
+                                        op->get_friendly_name());

    p.AddPrimitive(proposalPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/reduce.cpp
+++ b/inference-engine/src/cldnn_engine/ops/reduce.cpp
@ -75,7 +75,8 @@ void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::
                                    inputPrimitives[0],
                                    mode,
                                    axes,
-                                    static_cast<int32_t>(keep_dims));
+                                    static_cast<int32_t>(keep_dims),
+                                    op->get_friendly_name());

    p.AddPrimitive(reducePrim);

@ -96,7 +97,7 @@ void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::
                outTensor = cldnn::tensor(TensorValue(out_shape[0]), TensorValue(out_shape[1]),
                                          1, TensorValue(out_shape[2]));
        }
-        auto reshape_prim = cldnn::reshape(resultLayerName, layerName, outTensor);
+        auto reshape_prim = cldnn::reshape(resultLayerName, layerName, outTensor, op->get_friendly_name());
        p.AddPrimitive(reshape_prim);
        p.AddPrimitiveToProfiler(op, resultLayerName);
    }
@ -112,7 +113,13 @@ void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::
        else if (rank - rawAxes.size() <= 4)
            out_format = cldnn::format::bfyx;

-        auto reorder_prim = cldnn::reorder(reorderLayerName, resultLayerName, out_format, out_dt);
+        auto reorder_prim = cldnn::reorder(reorderLayerName,
+                                           resultLayerName,
+                                           out_format,
+                                           out_dt,
+                                           std::vector<float>(),
+                                           cldnn::reorder_mean_mode::subtract,
+                                           op->get_friendly_name());
        p.AddPrimitive(reorder_prim);
        p.AddPrimitiveToProfiler(op, reorderLayerName);
    } else {
--- a/inference-engine/src/cldnn_engine/ops/region_yolo.cpp
+++ b/inference-engine/src/cldnn_engine/ops/region_yolo.cpp
@ -28,7 +28,8 @@ void CreateRegionYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0::Region
                                         classes,
                                         num,
                                         mask_size,
-                                         do_softmax);
+                                         do_softmax,
+                                         op->get_friendly_name());

    p.AddPrimitive(regionPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/reorg_yolo.cpp
+++ b/inference-engine/src/cldnn_engine/ops/reorg_yolo.cpp
@ -20,7 +20,8 @@ void CreateReorgYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0::ReorgYo

    auto reorgPrim = cldnn::reorg_yolo(layerName,
                                       inputPrimitives[0],
-                                       stride);
+                                       stride,
+                                       op->get_friendly_name());

    p.AddPrimitive(reorgPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/reshape.cpp
+++ b/inference-engine/src/cldnn_engine/ops/reshape.cpp
@ -36,9 +36,13 @@ void CreateCommonReshapeOp(Program& p, const std::shared_ptr<ngraph::Node>& op)
        }

        cldnn::layout outputLayout(DataTypeFromPrecision(op->get_output_element_type(0)), outputFormat, outTensor);
-        p.AddPrimitive(cldnn::reorder(reorderId, reshapeInputId, outputLayout));
+        p.AddPrimitive(cldnn::reorder(reorderId,
+                                      reshapeInputId,
+                                      outputLayout,
+                                      std::vector<float>(),
+                                      cldnn::reorder_mean_mode::subtract,
+                                      op->get_friendly_name()));
        p.InitProfileInfo(reorderId, "Reorder", false, InferenceEngine::InferenceEngineProfileInfo::EXECUTED, layerName);
-        p.primitivesToIRLayersMap[reorderId] = { op->get_friendly_name() };
        p.primitiveIDs[layerName + "_reorder"] = reorderId;
        p.primitiveIDs[reorderId] = reorderId;
        p.profilingIDs.push_back(reorderId);
@ -47,7 +51,8 @@ void CreateCommonReshapeOp(Program& p, const std::shared_ptr<ngraph::Node>& op)

    auto reshapePrim = cldnn::reshape(layerName,
                                      reshapeInputId,
-                                      outTensor);
+                                      outTensor,
+                                      op->get_friendly_name());

    p.AddPrimitive(reshapePrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/result.cpp
+++ b/inference-engine/src/cldnn_engine/ops/result.cpp
@ -56,9 +56,12 @@ void CreateResultOp(Program& p, const std::shared_ptr<ngraph::op::v0::Result>& o
    std::string outputID = inputs[0];

    p.AddPrimitive(cldnn::reorder(outLayerName,
-                                outputID,
-                                FormatFromLayout(outputData->getLayout()),
-                                DataTypeFromPrecision(precision)));
+                                  outputID,
+                                  FormatFromLayout(outputData->getLayout()),
+                                  DataTypeFromPrecision(precision),
+                                  std::vector<float>(),
+                                  cldnn::reorder_mean_mode::subtract,
+                                  op->get_friendly_name()));
    p.InitProfileInfo(outLayerName, "reorder");
    p.profilingIDs.push_back(outLayerName);
    p.primitiveIDs[outLayerName] = outLayerName;
--- a/inference-engine/src/cldnn_engine/ops/reverse_sequence.cpp
+++ b/inference-engine/src/cldnn_engine/ops/reverse_sequence.cpp
@ -22,7 +22,8 @@ void CreateReverseSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v0::R
                                                       inputPrimitives[0],
                                                       inputPrimitives[1],
                                                       seq_axis,
-                                                       batch_axis);
+                                                       batch_axis,
+                                                       op->get_friendly_name());

    p.AddPrimitive(reverseSequencePrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/rnn.cpp
+++ b/inference-engine/src/cldnn_engine/ops/rnn.cpp
@ -107,8 +107,13 @@ void CreateLSTMCellOp(Program& p, const std::shared_ptr<ngraph::op::v4::LSTMCell
    cldnn::tensor inStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
    cldnn::layout inputLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inputShape);
    cldnn::layout hiddenLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inStateShape);
-    p.AddPrimitive(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
-    p.AddPrimitive(cldnn::reorder(permuteID, inReshapeID, inputLayout));
+    p.AddPrimitive(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape, op->get_friendly_name()));
+    p.AddPrimitive(cldnn::reorder(permuteID,
+                                  inReshapeID,
+                                  inputLayout,
+                                  std::vector<float>(),
+                                  cldnn::reorder_mean_mode::subtract,
+                                  op->get_friendly_name()));

    p.AddInnerPrimitiveToProfiler(inReshapeID, op->get_friendly_name(), op);
    p.AddInnerPrimitiveToProfiler(permuteID, op->get_friendly_name(), op);
@ -117,11 +122,24 @@ void CreateLSTMCellOp(Program& p, const std::shared_ptr<ngraph::op::v4::LSTMCell
    std::string hiddenInStr = inHiddenReorderID + "_1";
    std::string cellInResh = inHiddenReshapeID + "_2";
    std::string cellInStr = inHiddenReorderID + "_2";
-    p.AddPrimitive(cldnn::reshape(hiddenInResh, inputPrimitives[1], inStateShape));
-    p.AddPrimitive(cldnn::reorder(hiddenInStr, hiddenInResh, hiddenLayout));
-    p.AddPrimitive(cldnn::reshape(cellInResh, inputPrimitives[2], inStateShape));
-    p.AddPrimitive(cldnn::reorder(cellInStr, cellInResh, hiddenLayout));
-    p.AddPrimitive(cldnn::concatenation(input_concatID, { permuteID, hiddenInStr }, cldnn::concatenation::concatenation_axis::along_x));
+    p.AddPrimitive(cldnn::reshape(hiddenInResh, inputPrimitives[1], inStateShape, op->get_friendly_name()));
+    p.AddPrimitive(cldnn::reorder(hiddenInStr,
+                                  hiddenInResh,
+                                  hiddenLayout,
+                                  std::vector<float>(),
+                                  cldnn::reorder_mean_mode::subtract,
+                                  op->get_friendly_name()));
+    p.AddPrimitive(cldnn::reshape(cellInResh, inputPrimitives[2], inStateShape, op->get_friendly_name()));
+    p.AddPrimitive(cldnn::reorder(cellInStr,
+                                  cellInResh,
+                                  hiddenLayout,
+                                  std::vector<float>(),
+                                  cldnn::reorder_mean_mode::subtract,
+                                  op->get_friendly_name()));
+    p.AddPrimitive(cldnn::concatenation(input_concatID,
+                                        { permuteID, hiddenInStr },
+                                        cldnn::concatenation::concatenation_axis::along_x,
+                                        op->get_friendly_name()));

    p.AddInnerPrimitiveToProfiler(hiddenInResh, op->get_friendly_name(), op);
    p.AddInnerPrimitiveToProfiler(hiddenInStr, op->get_friendly_name(), op);
@ -139,14 +157,19 @@ void CreateLSTMCellOp(Program& p, const std::shared_ptr<ngraph::op::v4::LSTMCell
    std::string crop_id = layerName + "_crop";

    cldnn::primitive_id WRconcatID = layerName + "_WRconcat";
-    p.AddPrimitive(cldnn::concatenation(WRconcatID, { weightID, recurrentID }, cldnn::concatenation::concatenation_axis::along_f));
+    p.AddPrimitive(cldnn::concatenation(WRconcatID, { weightID, recurrentID }, cldnn::concatenation::concatenation_axis::along_f, op->get_friendly_name()));
    p.AddInnerPrimitiveToProfiler(WRconcatID, op->get_friendly_name(), op);

-    p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, input_concatID, WRconcatID, hasBias ? biasID : ""));
-    p.AddPrimitive(cldnn::reshape(gemmReshapeID, lstm_fc_id, gemmSz));
-    p.AddPrimitive(cldnn::reorder(gemmReorderID, gemmReshapeID, gemmLayout));
-    p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, gemmReorderID, cellInStr,
-                                 clip, 0, activations, activation_params, cldnn::lstm_weights_order::fizo));
+    p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, input_concatID, WRconcatID, hasBias ? biasID : "", op->get_friendly_name()));
+    p.AddPrimitive(cldnn::reshape(gemmReshapeID, lstm_fc_id, gemmSz, op->get_friendly_name()));
+    p.AddPrimitive(cldnn::reorder(gemmReorderID,
+                                  gemmReshapeID,
+                                  gemmLayout,
+                                  std::vector<float>(),
+                                  cldnn::reorder_mean_mode::subtract,
+                                  op->get_friendly_name()));
+    p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, gemmReorderID, cellInStr, clip, 0, activations,
+                                   activation_params, cldnn::lstm_weights_order::fizo, 0, op->get_friendly_name()));

    p.AddInnerPrimitiveToProfiler(lstm_fc_id, op->get_friendly_name(), op);
    p.AddInnerPrimitiveToProfiler(gemmReshapeID, op->get_friendly_name(), op);
@ -156,16 +179,16 @@ void CreateLSTMCellOp(Program& p, const std::shared_ptr<ngraph::op::v4::LSTMCell
    cldnn::tensor outSz = cldnn::tensor{ lstm_batch_size, lstm_hidden_size, 1, 1 };
    cldnn::primitive_id outputHiddenCropID = layerName + "_hc";
    cldnn::primitive_id outputHiddenID = layerName + ".0";
-    p.AddPrimitive(cldnn::crop(outputHiddenCropID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}));
+    p.AddPrimitive(cldnn::crop(outputHiddenCropID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}, op->get_friendly_name()));
    p.AddInnerPrimitiveToProfiler(outputHiddenCropID, op->get_friendly_name(), op);
-    p.AddPrimitive(cldnn::reshape(outputHiddenID, outputHiddenCropID, outSz));
+    p.AddPrimitive(cldnn::reshape(outputHiddenID, outputHiddenCropID, outSz, op->get_friendly_name()));
    p.AddInnerPrimitiveToProfiler(outputHiddenID, op->get_friendly_name(), op);

    cldnn::primitive_id outputCellCropID = layerName + "_cc";
    cldnn::primitive_id outputCellID = layerName + ".1";
-    p.AddPrimitive(cldnn::crop(outputCellCropID, lstm_elt_id, hiddenSz, cellCropSz));
+    p.AddPrimitive(cldnn::crop(outputCellCropID, lstm_elt_id, hiddenSz, cellCropSz, op->get_friendly_name()));
    p.AddInnerPrimitiveToProfiler(outputCellCropID, op->get_friendly_name(), op);
-    p.AddPrimitive(cldnn::reshape(outputCellID, outputCellCropID, outSz));
+    p.AddPrimitive(cldnn::reshape(outputCellID, outputCellCropID, outSz, op->get_friendly_name()));
    p.AddInnerPrimitiveToProfiler(outputCellID, op->get_friendly_name(), op);

    // output primitive IDs
@ -223,11 +246,16 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v5::LSTM
    cldnn::tensor inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, 1 };
    cldnn::tensor inStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
    cldnn::layout inputLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inputShape);
-    p.AddPrimitive(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
-    p.AddPrimitive(cldnn::reorder(permuteID, inReshapeID, inputLayout));
+    p.AddPrimitive(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape, op->get_friendly_name()));
+    p.AddPrimitive(cldnn::reorder(permuteID,
+                                  inReshapeID,
+                                  inputLayout,
+                                  std::vector<float>(),
+                                  cldnn::reorder_mean_mode::subtract,
+                                  op->get_friendly_name()));

-    p.AddPrimitive(cldnn::reshape(inHiddenStateID, inputPrimitives[1], inStateShape));
-    p.AddPrimitive(cldnn::reshape(inCellStateID, inputPrimitives[2], inStateShape));
+    p.AddPrimitive(cldnn::reshape(inHiddenStateID, inputPrimitives[1], inStateShape, op->get_friendly_name()));
+    p.AddPrimitive(cldnn::reshape(inCellStateID, inputPrimitives[2], inStateShape, op->get_friendly_name()));

    p.AddInnerPrimitiveToProfiler(inReshapeID, op->get_friendly_name(), op);
    p.AddInnerPrimitiveToProfiler(permuteID, op->get_friendly_name(), op);
@ -243,12 +271,12 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v5::LSTM
    cldnn::primitive_id inputCropID = layerName + "_inputCrop";

    cldnn::primitive_id WRconcatID = layerName + "_WRconcat";
-    p.AddPrimitive(cldnn::concatenation(WRconcatID, { weightID, recurrentID }, cldnn::concatenation::concatenation_axis::along_y));
+    p.AddPrimitive(cldnn::concatenation(WRconcatID, { weightID, recurrentID }, cldnn::concatenation::concatenation_axis::along_y, op->get_friendly_name()));
    p.AddInnerPrimitiveToProfiler(WRconcatID, op->get_friendly_name(), op);

    std::vector<size_t> WRreshapeSize = { 4 * size_t(lstm_hidden_size), size_t(lstm_input_size + lstm_hidden_size) };
    cldnn::primitive_id WRreshapeID = WRconcatID + "_reshape";
-    auto reshapeInPrim = cldnn::reshape(WRreshapeID, WRconcatID, CldnnTensorFromIEDims(WRreshapeSize));
+    auto reshapeInPrim = cldnn::reshape(WRreshapeID, WRconcatID, CldnnTensorFromIEDims(WRreshapeSize), op->get_friendly_name());
    p.AddPrimitive(reshapeInPrim);
    p.AddInnerPrimitiveToProfiler(WRreshapeID, op->get_friendly_name(), op);

@ -267,30 +295,35 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v5::LSTM
        cldnn::tensor crop_tensor{ inputShape.batch[0], 1, inputShape.spatial[0], inputShape.spatial[1] };
        cldnn::tensor offset_tensor{ 0, static_cast<cldnn::tensor::value_type>(seqIdx), 0, 0 };
        cldnn::primitive_id inputCrop_id = inputCropID + ":" + seqIdx_str;
-        p.AddPrimitive(cldnn::crop(inputCrop_id, permuteID, crop_tensor, offset_tensor));
+        p.AddPrimitive(cldnn::crop(inputCrop_id, permuteID, crop_tensor, offset_tensor, op->get_friendly_name()));
        p.AddInnerPrimitiveToProfiler(inputCrop_id, op->get_friendly_name(), op);

-        p.AddPrimitive(cldnn::concatenation(concatID, { inputCrop_id, hiddenStr }, cldnn::concatenation::concatenation_axis::along_x));
+        p.AddPrimitive(cldnn::concatenation(concatID, { inputCrop_id, hiddenStr }, cldnn::concatenation::concatenation_axis::along_x, op->get_friendly_name()));
        p.AddInnerPrimitiveToProfiler(concatID, op->get_friendly_name(), op);
-        p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, concatID, WRreshapeID, biasID));
+        p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, concatID, WRreshapeID, biasID, op->get_friendly_name()));
        p.AddInnerPrimitiveToProfiler(lstm_fc_id, op->get_friendly_name(), op);

-        p.AddPrimitive(cldnn::reshape(lstm_fc_resh_id, lstm_fc_id, gemmSz));
-        p.AddPrimitive(cldnn::reorder(lstm_fc_reor_id, lstm_fc_resh_id, gemmLayout));
-        p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, lstm_fc_reor_id, cellStr,
-                                     clip, 0, activations, activation_params, cldnn::lstm_weights_order::fizo));
+        p.AddPrimitive(cldnn::reshape(lstm_fc_resh_id, lstm_fc_id, gemmSz, op->get_friendly_name()));
+        p.AddPrimitive(cldnn::reorder(lstm_fc_reor_id,
+                                      lstm_fc_resh_id,
+                                      gemmLayout,
+                                      std::vector<float>(),
+                                      cldnn::reorder_mean_mode::subtract,
+                                      op->get_friendly_name()));
+        p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, lstm_fc_reor_id, cellStr, clip, 0, activations,
+                                       activation_params, cldnn::lstm_weights_order::fizo, 0, op->get_friendly_name()));
        p.AddInnerPrimitiveToProfiler(lstm_fc_resh_id, op->get_friendly_name(), op);
        p.AddInnerPrimitiveToProfiler(lstm_fc_reor_id, op->get_friendly_name(), op);
        p.AddInnerPrimitiveToProfiler(lstm_elt_id, op->get_friendly_name(), op);

        hiddenStr = crop_id + ":hidden";
        cellStr = crop_id + ":cell";
-        p.AddPrimitive(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 }));
+        p.AddPrimitive(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 }, op->get_friendly_name()));
        p.AddInnerPrimitiveToProfiler(hiddenStr, op->get_friendly_name(), op);
        output_ids_offsets.push_back(hiddenStr);

        if (i < lstm_sequence_len - 1) {
-            p.AddPrimitive(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
+            p.AddPrimitive(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz, op->get_friendly_name()));
            p.AddInnerPrimitiveToProfiler(cellStr, op->get_friendly_name(), op);
        } else {
            // last hidden state crop (output 2)
@ -299,7 +332,7 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v5::LSTM
            p.primitiveIDs[outputHiddenID] = hiddenStr;

            // last cell state crop (output 3)
-            p.AddPrimitive(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
+            p.AddPrimitive(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz, op->get_friendly_name()));
            cldnn::primitive_id outputCellID = layerName + ".2";
            p.AddInnerPrimitiveToProfiler(cellStr, op->get_friendly_name(), op);
            p.primitiveIDs[outputCellID] = cellStr;
@ -310,7 +343,7 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v5::LSTM
    // concatenated hidden state (output 1)
    cldnn::primitive_id outputConcatID = layerName + ".0";
    cldnn::primitive_id concatStr = layerName + ":hiddenConcat";
-    p.AddPrimitive(cldnn::concatenation(concatStr, output_ids_offsets, cldnn::concatenation::along_f));
+    p.AddPrimitive(cldnn::concatenation(concatStr, output_ids_offsets, cldnn::concatenation::along_f, op->get_friendly_name()));

    p.primitiveIDs[outputConcatID] = concatStr;
    p.primitiveIDs[layerName] = concatStr;
--- a/inference-engine/src/cldnn_engine/ops/roi_pooling.cpp
+++ b/inference-engine/src/cldnn_engine/ops/roi_pooling.cpp
@ -57,7 +57,8 @@ void CreateDeformablePSROIPoolingOp(Program& p, const std::shared_ptr<ngraph::op
                                               group_size,
                                               output_dim,
                                               spatial_bins_x,
-                                               spatial_bins_y);
+                                               spatial_bins_y,
+                                               op->get_friendly_name());
    p.AddPrimitive(psROIPoolingPrim);
    p.AddPrimitiveToProfiler(op);
 }
@ -85,7 +86,8 @@ void CreatePSROIPoolingOp(Program& p, const std::shared_ptr<ngraph::op::v0::PSRO
                                               spatial_scale,
                                               output_dim,
                                               spatial_bins_x,
-                                               spatial_bins_y);
+                                               spatial_bins_y,
+                                               op->get_friendly_name());
    p.AddPrimitive(psROIPoolingPrim);
    p.AddPrimitiveToProfiler(op);
 }
@ -110,7 +112,11 @@ void CreateROIPoolingOp(Program& p, const std::shared_ptr<ngraph::op::v0::ROIPoo
                                             position_sensitive,
                                             pooled_width,
                                             pooled_height,
-                                             spatial_scale);
+                                             spatial_scale,
+                                             0,
+                                             1,
+                                             1,
+                                             op->get_friendly_name());

    p.AddPrimitive(roiPoolingPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/scatter_elements_update.cpp
+++ b/inference-engine/src/cldnn_engine/ops/scatter_elements_update.cpp
@ -54,10 +54,11 @@ void CreateScatterElementsUpdateOp(Program& p, const std::shared_ptr<ngraph::op:
    int32_t axis = axes_constant->cast_vector<int32_t>()[0];

    auto primitive = cldnn::scatter_elements_update(layerName,
-                                           inputPrimitives[0],
-                                           inputPrimitives[1],
-                                           inputPrimitives[2],
-                                           GetScatterElementsUpdateAxis(axis, rank));
+                                                    inputPrimitives[0],
+                                                    inputPrimitives[1],
+                                                    inputPrimitives[2],
+                                                    GetScatterElementsUpdateAxis(axis, rank),
+                                                    op->get_friendly_name());

    p.AddPrimitive(primitive);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/scatter_nd_update.cpp
+++ b/inference-engine/src/cldnn_engine/ops/scatter_nd_update.cpp
@ -19,10 +19,11 @@ void CreateScatterNDUpdateOp(Program& p, const std::shared_ptr<ngraph::op::v3::S
    auto indices_rank = op->get_input_shape(1).size();

    auto primitive = cldnn::scatter_nd_update(layerName,
-                                           inputPrimitives[0],
-                                           inputPrimitives[1],
-                                           inputPrimitives[2],
-                                           indices_rank);
+                                              inputPrimitives[0],
+                                              inputPrimitives[1],
+                                              inputPrimitives[2],
+                                              indices_rank,
+                                              op->get_friendly_name());

    p.AddPrimitive(primitive);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/scatter_update.cpp
+++ b/inference-engine/src/cldnn_engine/ops/scatter_update.cpp
@ -57,7 +57,8 @@ void CreateScatterUpdateOp(Program& p, const std::shared_ptr<ngraph::op::v3::Sca
                                           inputPrimitives[0],
                                           inputPrimitives[1],
                                           inputPrimitives[2],
-                                           GetScatterUpdateAxis(axis, rank));
+                                           GetScatterUpdateAxis(axis, rank),
+                                           op->get_friendly_name());

    p.AddPrimitive(primitive);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/select.cpp
+++ b/inference-engine/src/cldnn_engine/ops/select.cpp
@ -40,7 +40,13 @@ void CreateSelectOp(Program& p, const std::shared_ptr<ngraph::op::v1::Select>& o
            if (targetFormat.value != DefaultFormatForDims(inputDimsN).value) {
                auto reorderName = layerName + "_cldnn_in" + std::to_string(i) + "_reorder";
                auto targetDatatype = DataTypeFromPrecision(op->get_input_element_type(i));
-                auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype);
+                auto reorderPrim = cldnn::reorder(reorderName,
+                                                  inputPrimitives[i],
+                                                  targetFormat,
+                                                  targetDatatype,
+                                                  std::vector<float>(),
+                                                  cldnn::reorder_mean_mode::subtract,
+                                                  op->get_friendly_name());

                p.AddPrimitive(reorderPrim);
                p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
@ -57,7 +63,7 @@ void CreateSelectOp(Program& p, const std::shared_ptr<ngraph::op::v1::Select>& o

                auto targetShape = CldnnTensorFromIEDims(inputDims);

-                auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape);
+                auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape, op->get_friendly_name());

                p.AddPrimitive(reshapePrim);
                p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
@ -73,6 +79,7 @@ void CreateSelectOp(Program& p, const std::shared_ptr<ngraph::op::v1::Select>& o
                                    inputPrimitives[0],
                                    inputPrimitives[1],
                                    inputPrimitives[2],
+                                    op->get_friendly_name(),
                                    cldnn::padding(),
                                    bc_string);

--- a/inference-engine/src/cldnn_engine/ops/shuffle_channels.cpp
+++ b/inference-engine/src/cldnn_engine/ops/shuffle_channels.cpp
@ -36,7 +36,8 @@ void CreateShuffleChannelsOp(Program& p, const std::shared_ptr<ngraph::op::v0::S
    auto shuffleChannelsPrim = cldnn::shuffle_channels(layerName,
                                                       inputPrimitives[0],
                                                       group,
-                                                       axis);
+                                                       axis,
+                                                       op->get_friendly_name());

    p.AddPrimitive(shuffleChannelsPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/softmax.cpp
+++ b/inference-engine/src/cldnn_engine/ops/softmax.cpp
@ -41,7 +41,8 @@ void CreateSoftmaxOp(Program& p, const std::shared_ptr<ngraph::op::v1::Softmax>&
    std::string layerName = layer_type_name_ID(op);
    auto softmaxPrim = cldnn::softmax(layerName,
                                      inputPrimitives[0],
-                                      GetSoftmaxAxis(op->get_axis(), op->get_input_shape(0).size()));
+                                      GetSoftmaxAxis(op->get_axis(), op->get_input_shape(0).size()),
+                                      op->get_friendly_name());
    p.AddPrimitive(softmaxPrim);
    p.AddPrimitiveToProfiler(op);
 }
@ -58,9 +59,10 @@ void CreateLogSoftmaxOp(Program& p, const std::shared_ptr<ngraph::op::v5::LogSof

    auto softmaxPrim = cldnn::softmax(layerNameSoftmax,
                                      inputPrimitives[0],
-                                      GetSoftmaxAxis(static_cast<size_t>(axis), op->get_input_shape(0).size()));
+                                      GetSoftmaxAxis(static_cast<size_t>(axis), op->get_input_shape(0).size()),
+                                      op->get_friendly_name());

-    auto logPrim = cldnn::activation(layerName, layerNameSoftmax, cldnn::activation_func::log);
+    auto logPrim = cldnn::activation(layerName, layerNameSoftmax, cldnn::activation_func::log, {(0.0F), (0.0F)}, op->get_friendly_name());

    p.AddPrimitive(softmaxPrim);
    p.AddPrimitive(logPrim);
--- a/inference-engine/src/cldnn_engine/ops/space_to_batch.cpp
+++ b/inference-engine/src/cldnn_engine/ops/space_to_batch.cpp
@ -42,7 +42,8 @@ void CreateSpaceToBatchOp(Program& p, const std::shared_ptr<ngraph::op::v1::Spac
                                                  inputs[0],          // block_shape
                                                  inputs[1],          // crops_begin
                                                  inputs[2],          // crops_end
-                                                  out_size);
+                                                  out_size,
+                                                  op->get_friendly_name());

    p.AddPrimitive(batchToSpacePrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/space_to_depth.cpp
+++ b/inference-engine/src/cldnn_engine/ops/space_to_depth.cpp
@ -27,7 +27,8 @@ void CreateSpaceToDepthOp(Program& p, const std::shared_ptr<ngraph::op::v0::Spac
    auto spaceToDepthPrim = cldnn::space_to_depth(layerName,
                                                  inputPrimitives[0],
                                                  GetDepthMode(op->get_mode()),
-                                                  op->get_block_size());
+                                                  op->get_block_size(),
+                                                  op->get_friendly_name());

    p.AddPrimitive(spaceToDepthPrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/split.cpp
+++ b/inference-engine/src/cldnn_engine/ops/split.cpp
@ -40,8 +40,7 @@ void CreateCommonSplitOp(Program& p, const std::shared_ptr<ngraph::Node>& op) {
        auto outTensor = CldnnTensorFromIEDims(outLayerDims, 1);
        auto offsetTensor = CldnnTensorFromIEDims(startOffset, 0);

-        auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, offsetTensor);
-        p.primitivesToIRLayersMap[outLayerName] = { op->get_friendly_name() };
+        auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, offsetTensor, op->get_friendly_name());
        p.primitiveIDs[outLayerName] = outLayerName;

        p.AddPrimitive(cropPrim);
--- a/inference-engine/src/cldnn_engine/ops/strided_slice.cpp
+++ b/inference-engine/src/cldnn_engine/ops/strided_slice.cpp
@ -189,7 +189,7 @@ void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v1::Stri
        if (!new_axis_mask.empty()) {
            auto targetShape = CldnnTensorFromIEDims(reshape_pattern);
            auto reshapeInName = op->get_friendly_name() + "/Reshape_before";
-            auto reshapePrim = cldnn::reshape(reshapeInName, inputPrimitives[0], targetShape);
+            auto reshapePrim = cldnn::reshape(reshapeInName, inputPrimitives[0], targetShape, op->get_friendly_name());
            p.AddPrimitive(reshapePrim);
            p.AddInnerPrimitiveToProfiler(reshapeInName, layerName, op);
            inPrimitive = reshapeInName;
@ -215,7 +215,7 @@ void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v1::Stri
        cldnn::tensor offSize = CldnnTensorFromIEDims(offset, 0);


-        auto cropPrim = cldnn::crop(layerName, inPrimitive, refSize, offSize);
+        auto cropPrim = cldnn::crop(layerName, inPrimitive, refSize, offSize, op->get_friendly_name());
        p.AddPrimitive(cropPrim);
        p.AddPrimitiveToProfiler(layerName, op);

@ -223,7 +223,7 @@ void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v1::Stri
        if (!shrink_axis_mask.empty()) {
            auto targetShape = CldnnTensorFromIEDims(output_shape);
            auto reshapeOutName = op->get_friendly_name() + "/Crop";
-            auto reshapePrim = cldnn::reshape(reshapeOutName, layerName, targetShape);
+            auto reshapePrim = cldnn::reshape(reshapeOutName, layerName, targetShape, op->get_friendly_name());
            p.AddPrimitive(reshapePrim);
            p.AddInnerPrimitiveToProfiler(reshapeOutName, layerName, op);
        }
@ -258,7 +258,8 @@ void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v1::Stri
                                                 end_mask,
                                                 new_axis_mask,
                                                 shrink_axis_mask,
-                                                 out_size);
+                                                 out_size,
+                                                 op->get_friendly_name());

    p.AddPrimitive(stridedSlicePrim);
    p.AddPrimitiveToProfiler(op);
--- a/inference-engine/src/cldnn_engine/ops/tensor_iterator.cpp
+++ b/inference-engine/src/cldnn_engine/ops/tensor_iterator.cpp
@ -27,11 +27,11 @@ using TensorIterator = ngraph::op::v0::TensorIterator;
 namespace CLDNNPlugin {

 template<class DATA_TYPE>
-static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) {
+static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num, const cldnn::primitive_id& ext_prim_id) {
    auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
    cldnn::mem_lock<int64_t> ptr{mem, p.GetEngine().get_program_stream()};
    *ptr.begin() = num;
-    return {id, mem};
+    return {id, mem, ext_prim_id};
 }

 static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::shared_ptr<ngraph::Node>& op,
@ -42,7 +42,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha
    const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx));
    cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
    auto mem = p.GetEngine().allocate_memory(output_layout);
-    auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency
+    auto md = cldnn::mutable_data(id, {input}, mem, op->get_friendly_name()); // cldnn::data cannot set dependency
    return md;
 }

@ -122,24 +122,21 @@ void CreateTensorIteratorOp(Program &p, const std::shared_ptr<TensorIterator> &o
        throw std::runtime_error("tensor iterator's num_iteration cannot be negative");
    }
    {
-        cldnn::data trip_count = CreateScalarData<cldnn::data>(p, trip_count_id, num_iterations);
-        p.primitivesToIRLayersMap[trip_count_id] = { op->get_friendly_name() };
+        cldnn::data trip_count = CreateScalarData<cldnn::data>(p, trip_count_id, num_iterations, op->get_friendly_name());
        p.primitiveIDs[trip_count_id] = trip_count_id;
        p.AddPrimitive(trip_count);
        p.AddInnerPrimitiveToProfiler(trip_count_id, layerName, op);
    }
    const cldnn::primitive_id execution_condition_id = layerName + "_initialExecutionCondition";
    {
-        cldnn::mutable_data execution_condition = CreateScalarData<cldnn::mutable_data>(p, execution_condition_id, 1);
-        p.primitivesToIRLayersMap[execution_condition_id] = { op->get_friendly_name() };
+        cldnn::mutable_data execution_condition = CreateScalarData<cldnn::mutable_data>(p, execution_condition_id, 1, op->get_friendly_name());
        p.primitiveIDs[execution_condition_id] = execution_condition_id;
        p.AddPrimitive(execution_condition);
        p.AddInnerPrimitiveToProfiler(execution_condition_id, layerName, op);
    }
    const cldnn::primitive_id num_iteration_id = layerName + "_numIteration";
    {
-        cldnn::mutable_data num_iteration = CreateScalarData<cldnn::mutable_data>(p, num_iteration_id, 0);
-        p.primitivesToIRLayersMap[num_iteration_id] = { op->get_friendly_name() };
+        cldnn::mutable_data num_iteration = CreateScalarData<cldnn::mutable_data>(p, num_iteration_id, 0, op->get_friendly_name());
        p.primitiveIDs[num_iteration_id] = num_iteration_id;
        p.AddPrimitive(num_iteration);
        p.AddInnerPrimitiveToProfiler(num_iteration_id, layerName, op);
@ -191,7 +188,10 @@ void CreateTensorIteratorOp(Program &p, const std::shared_ptr<TensorIterator> &o
        input_primitive_maps,         /* input mappings connecting outer network and inner network */
        output_primitive_maps,        /* output mappings connecting outer network and inner network */
        back_edges,             /* back edge mapping */
-        num_iterations);        /* max iteration, i.e. length of iteration axis */
+        num_iterations,         /* max iteration, i.e. length of iteration axis */
+        "",
+        "",
+        op->get_friendly_name());

    p.AddPrimitive(loopPrimitive);
    p.AddPrimitiveToProfiler(op);
--- a/Show More
+++ b/Show More