diff --git a/.ci/azure/linux_onnxruntime.yml b/.ci/azure/linux_onnxruntime.yml index ad5e630820d..4fd14f2c1b1 100644 --- a/.ci/azure/linux_onnxruntime.yml +++ b/.ci/azure/linux_onnxruntime.yml @@ -94,7 +94,7 @@ jobs: -DENABLE_PROFILING_ITT=OFF -DENABLE_SAMPLES=OFF -DNGRAPH_ONNX_FRONTEND_ENABLE=ON - -DNGRAPH_DEBUG_ENABLE=OFF + -DOPENVINO_DEBUG_ENABLE=OFF $(REPO_DIR) workingDirectory: $(BUILD_DIR) diff --git a/.ci/azure/windows.yml b/.ci/azure/windows.yml index 62e0fa7c712..81c1ba95601 100644 --- a/.ci/azure/windows.yml +++ b/.ci/azure/windows.yml @@ -16,7 +16,7 @@ jobs: timeoutInMinutes: 120 pool: - name: WIN_VMSS_VENV_F8S_WU2 + name: WIN_VMSS_VENV_F16S_WU2 variables: system.debug: true @@ -34,8 +34,6 @@ jobs: INSTALL_DIR: $(WORK_DIR)\install_pkg INSTALL_TEST_DIR: $(INSTALL_DIR)\tests SETUPVARS: $(INSTALL_DIR)\setupvars.bat - IB_DIR: C:\Program Files (x86)\IncrediBuild - IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe steps: - script: | @@ -59,12 +57,6 @@ jobs: rd /Q /S $(BUILD_SAMPLES_DIR) & mkdir $(BUILD_SAMPLES_DIR) displayName: 'Make dir' - - script: | - certutil -urlcache -split -f https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat - call install_ib_console.bat - workingDirectory: $(WORK_DIR) - displayName: 'Install IncrediBuild' - - checkout: self clean: true lfs: false @@ -109,9 +101,7 @@ jobs: - script: dir $(REPO_DIR)\inference-engine\temp\ /s displayName: 'List temp SDKs' - - script: | - set PATH=$(WORK_DIR)\ninja-win;%PATH% - call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja" + - script: call "$(MSVS_VARS_PATH)" && $(WORK_DIR)\ninja-win\ninja workingDirectory: $(BUILD_DIR) displayName: 'Build Win' @@ -153,10 +143,8 @@ jobs: displayName: 'PaddlePaddle Frontend UT' continueOnError: false - - script: | - set PATH=$(IB_DIR);%PATH% - call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests-IB.xml - displayName: 'IE UT old - IB' + - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests.xml + displayName: 'IE UT old' continueOnError: false - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml @@ -187,11 +175,8 @@ jobs: displayName: 'TEMPLATE FuncTests' continueOnError: false - # call $(SETUPVARS) && $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-cpuFuncTests.xml - - script: | - set PATH=$(IB_DIR);%PATH% - call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke*:-*CompareWithRefs/base_size=16_pre_nms_topn=100_post_nms_topn=100_nms_thresh=0.7_feat_stride=1_min_size=1_ratio*:*smoke_GRUSequenceCommonZeroClip/GRUSequenceTest.CompareWithRefs/mode=CONVERT_TO_TI_MAX_SEQ_LEN_CONST_seq_lengths* --gtest_output=xml:TEST-cpuFuncTests-IB.xml /testlevel=24 - displayName: 'CPU FuncTests - IB' + - script: $(SETUPVARS) && $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-cpuFuncTests.xml + displayName: 'CPU FuncTests' continueOnError: false - script: | @@ -213,8 +198,3 @@ jobs: buildPlatform: 'x64' # Optional buildConfiguration: 'Windows' # Optional #publishRunAttachments: true # Optional - - - script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent - displayName: Stop IncrediBuild - continueOnError: true - enabled: false diff --git a/.ci/openvino-onnx/Dockerfile b/.ci/openvino-onnx/Dockerfile index 8e2365e4ebc..ada3c36adb1 100644 --- a/.ci/openvino-onnx/Dockerfile +++ b/.ci/openvino-onnx/Dockerfile @@ -68,7 +68,7 @@ RUN cmake .. \ -DENABLE_PYTHON=ON \ -DPYTHON_EXECUTABLE=/usr/bin/python3 \ -DNGRAPH_ONNX_FRONTEND_ENABLE=ON \ - -DNGRAPH_DEBUG_ENABLE=OFF \ + -DOPENVINO_DEBUG_ENABLE=OFF \ -DCMAKE_INSTALL_PREFIX=/openvino/dist \ -DNGRAPH_USE_PROTOBUF_LITE=${PROTOBUF_LITE} RUN make -j $(nproc) install diff --git a/.ci/openvino-onnx/watchdog/src/watchdog.py b/.ci/openvino-onnx/watchdog/src/watchdog.py index 9c695ef22be..3edbe478683 100644 --- a/.ci/openvino-onnx/watchdog/src/watchdog.py +++ b/.ci/openvino-onnx/watchdog/src/watchdog.py @@ -486,7 +486,7 @@ class Watchdog: self._queue_message(message, message_severity='warning', pr=pr) elif build_delta > _BUILD_DURATION_THRESHOLD: # CI job take too long, possibly froze - communicate failure - message = ('ONNX CI job build #{}, for PR #{} started,' + message = ('ONNX CI job build #{}, for PR #{} started, ' 'but did not finish in designated time of {} ' 'minutes!'.format(build_number, pr_number, str(_BUILD_DURATION_THRESHOLD.seconds / 60))) diff --git a/.gitmodules b/.gitmodules index 8569ecbb958..ce94901db10 100644 --- a/.gitmodules +++ b/.gitmodules @@ -53,3 +53,6 @@ [submodule "ncc"] path = cmake/developer_package/ncc_naming_style/ncc url = https://github.com/nithinn/ncc.git +[submodule "thirdparty/onednn_gpu"] + path = thirdparty/onednn_gpu + url = https://github.com/oneapi-src/oneDNN.git diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index b4946d85d11..70562b8cc96 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -83,7 +83,6 @@ if(THREADING STREQUAL "OMP") message(FATAL_ERROR "Intel OMP is not available on current platform") endif() update_deps_cache(OMP "${OMP}" "Path to OMP root folder") - log_rpath_from_dir(OMP "${OMP}/lib") debug_message(STATUS "intel_omp=" ${OMP}) ie_cpack_add_component(omp REQUIRED) @@ -146,12 +145,6 @@ if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") update_deps_cache(TBB_DIR "${TBB}/cmake" "Path to TBB cmake folder") update_deps_cache(TBBBIND_2_4_DIR "${TBBBIND_2_4}/cmake" "Path to TBBBIND_2_4 cmake folder") - - if(WIN32) - log_rpath_from_dir(TBB "${TBB}/bin") - else () - log_rpath_from_dir(TBB "${TBB}/lib") - endif() debug_message(STATUS "tbb=" ${TBB}) endif() @@ -242,14 +235,6 @@ if(ENABLE_OPENCV) endif() update_deps_cache(OpenCV_DIR "${ocv_cmake_path}" "Path to OpenCV package folder") - - if(WIN32) - log_rpath_from_dir(OPENCV "${OpenCV_DIR}/../bin") - elseif(ANDROID) - log_rpath_from_dir(OPENCV "${OpenCV_DIR}/../../../lib") - else() - log_rpath_from_dir(OPENCV "${OpenCV_DIR}/../lib") - endif() debug_message(STATUS "opencv=" ${OPENCV}) else() reset_deps_cache(OpenCV_DIR) @@ -277,8 +262,8 @@ if(ENABLE_GNA) set(GNA_HASH "cc954e67525006bf8bd353a6682e38bf208f6d74e973e0fc292850e721f17452") endif() if(GNA_LIBRARY_VERSION STREQUAL "GNA2") - set(GNA_VERSION "02.00.00.1226") - set(GNA_HASH "d5450af15c993e264c25ac4591a7dab44722e10d15fca4f222a1b84429d4e5b6") + set(GNA_VERSION "03.00.00.1377") + set(GNA_HASH "d45fb48994d8c2803a16e88e29ae48851066325b97c1c6c4a5bf4f4573d55c65") endif() set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include) diff --git a/cmake/developer_package/debug.cmake b/cmake/developer_package/debug.cmake index bb4538dfe02..e14f30b4c0f 100644 --- a/cmake/developer_package/debug.cmake +++ b/cmake/developer_package/debug.cmake @@ -15,59 +15,3 @@ function(clean_message type) message (FATAL_ERROR) endif() endfunction() - -file(REMOVE ${CMAKE_BINARY_DIR}/ld_library_rpath_64.txt) - -# log relative path to shared library that has to be used in LD_LIBRARY_PATH -function (log_rpath_remove_top component component_remove_top lib lib_remove_top) - - set(top_lib_dir ${${component}}) - set(lib_dir ${lib}) - -# debug_message(STATUS "LIB-IN=${lib} ") -# debug_message(STATUS "TOPLIB-IN=${top_lib_dir} ") - get_filename_component(top_lib_dir "${${component}}" DIRECTORY) - - if (${component_remove_top} AND ${component}) - else() - get_filename_component(add_name "${${component}}" NAME) - set(top_lib_dir "${top_lib_dir}/${add_name}") - endif() - if (${lib_remove_top} AND lib) - get_filename_component(lib_dir ${lib} DIRECTORY) - endif() - - string (REPLACE "//" "/" top_lib_dir "${top_lib_dir}") - string (REPLACE "//" "/" lib_dir "${lib_dir}") - - string (REPLACE "\\\\" "/" top_lib_dir "${top_lib_dir}") - string (REPLACE "\\\\" "/" lib_dir "${lib_dir}") - -# debug_message(STATUS "LIB-OUT=${lib_dir}") -# debug_message(STATUS "TOPLIB-OUT=${top_lib_dir}") - - if (WIN32) - string (TOLOWER "${top_lib_dir}" top_lib_dir) - string (TOLOWER "${lib_dir}" lib_dir) - endif() - - string (REPLACE "${top_lib_dir}" "" component_dir "${lib_dir}") - - set(RPATH_INFO "${component}=${component_dir}") - debug_message(STATUS "LD_LIBRARY_RPATH: ${RPATH_INFO}") - file(APPEND ${CMAKE_BINARY_DIR}/ld_library_rpath_64.txt "${RPATH_INFO}\n") -endfunction() - -function (log_rpath_from_dir component lib_dir) - log_rpath_remove_top("${component}" TRUE "${lib_dir}" FALSE) -endfunction() - -function (log_rpath component lib_path) - log_rpath_remove_top(${component} TRUE ${lib_path} TRUE) -endfunction() - -# Just wrapping of the original message() function to make this macro known during IE build. -# This macro is redefined (with additional checks) within the InferenceEngineConfig.cmake file. -macro(ext_message TRACE_LEVEL) - message(${TRACE_LEVEL} "${ARGN}") -endmacro() diff --git a/cmake/features.cmake b/cmake/features.cmake index 450b0d255cc..7528b89d47d 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -122,7 +122,7 @@ endif() ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" ON "protoc_available" OFF) ie_dependent_option(NGRAPH_PDPD_FRONTEND_ENABLE "Enable PaddlePaddle FrontEnd" ON "protoc_available" OFF) -ie_option(IR_FRONTEND_ENABLE "Enable IR FrontEnd" ON) +ie_option(NGRAPH_IR_FRONTEND_ENABLE "Enable IR FrontEnd" ON) ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" ON "NGRAPH_ONNX_FRONTEND_ENABLE" OFF) ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF @@ -130,7 +130,7 @@ ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF ie_dependent_option(NGRAPH_UNIT_TEST_ENABLE "Enables ngraph unit tests" ON "ENABLE_TESTS;NOT ANDROID" OFF) ie_dependent_option(NGRAPH_UNIT_TEST_BACKENDS_ENABLE "Control the building of unit tests using backends" ON "NGRAPH_UNIT_TEST_ENABLE" OFF) -ie_option(NGRAPH_DEBUG_ENABLE "Enable output for NGRAPH_DEBUG statements" OFF) +ie_option(OPENVINO_DEBUG_ENABLE "Enable output for OPENVINO_DEBUG statements" OFF) ie_option(ENABLE_REQUIREMENTS_INSTALL "Dynamic dependencies install" ON) # WA for ngraph python build on Windows debug diff --git a/cmake/templates/OpenVINOConfig.cmake.in b/cmake/templates/OpenVINOConfig.cmake.in index 14fc57b36c2..eb903e14889 100644 --- a/cmake/templates/OpenVINOConfig.cmake.in +++ b/cmake/templates/OpenVINOConfig.cmake.in @@ -63,6 +63,9 @@ # `OpenVINO_Frontend_PaddlePaddle_FOUND` # OpenVINO PaddlePaddle frontend is available # +# `OpenVINO_Frontend_IR_FOUND` +# OpenVINO IR frontend is available +# # OpenVINO version variables: # # `OpenVINO_VERSION_MAJOR` @@ -169,6 +172,7 @@ set(${CMAKE_FIND_PACKAGE_NAME}_PaddlePaddle_FOUND @NGRAPH_PDPD_FRONTEND_ENABLE@) set(${CMAKE_FIND_PACKAGE_NAME}_Frontend_ONNX_FOUND ${${CMAKE_FIND_PACKAGE_NAME}_ONNX_FOUND}) set(${CMAKE_FIND_PACKAGE_NAME}_Frontend_PaddlePaddle_FOUND ${${CMAKE_FIND_PACKAGE_NAME}_PaddlePaddle_FOUND}) +set(${CMAKE_FIND_PACKAGE_NAME}_Frontend_IR_FOUND ${${CMAKE_FIND_PACKAGE_NAME}_IR_FOUND}) # if no components specified, only Runtime is provided if(NOT ${CMAKE_FIND_PACKAGE_NAME}_FIND_COMPONENTS) diff --git a/cmake/templates/ngraphConfig.cmake.in b/cmake/templates/ngraphConfig.cmake.in index a94e6b50a58..c5b467f2dd3 100644 --- a/cmake/templates/ngraphConfig.cmake.in +++ b/cmake/templates/ngraphConfig.cmake.in @@ -88,5 +88,6 @@ if(ngraph_onnx_importer_FOUND) endif() set(ngraph_paddlepaddle_frontend_FOUND ${OpenVINO_Frontend_PaddlePaddle_FOUND}) +set(ngraph_ir_frontend_FOUND ${OpenVINO_Frontend_IR_FOUND}) check_required_components(ngraph) diff --git a/cmake/test_model_zoo.cmake b/cmake/test_model_zoo.cmake index 580cab35ec4..00e360e3800 100644 --- a/cmake/test_model_zoo.cmake +++ b/cmake/test_model_zoo.cmake @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 # +set_property(GLOBAL PROPERTY JOB_POOLS four_jobs=4) + function(ov_model_convert SRC DST OUT) set(onnx_gen_script ${OpenVINO_SOURCE_DIR}/ngraph/test/models/onnx/onnx_prototxt_converter.py) @@ -43,6 +45,7 @@ function(ov_model_convert SRC DST OUT) "${SRC}/${in_file}" ${full_out_name} DEPENDS ${onnx_gen_script} "${SRC}/${in_file}" COMMENT "Generate ${rel_out_name}" + JOB_POOL four_jobs WORKING_DIRECTORY "${model_source_dir}") else() add_custom_command(OUTPUT ${full_out_name} @@ -50,6 +53,7 @@ function(ov_model_convert SRC DST OUT) "${SRC}/${in_file}" ${full_out_name} DEPENDS ${onnx_gen_script} "${SRC}/${in_file}" COMMENT "Copy ${rel_out_name}" + JOB_POOL four_jobs WORKING_DIRECTORY "${model_source_dir}") endif() list(APPEND files "${full_out_name}") diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md index 31de647f379..a304a0bb6b2 100644 --- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md +++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md @@ -73,19 +73,21 @@ inp = torch.randn([seq_length, batch_size, feature_length]) feature_length = torch.LongTensor([seq_length]) x_padded, x_lens = model.encoder(inp, feature_length) torch.onnx.export(model.encoder, (inp, feature_length), "rnnt_encoder.onnx", opset_version=12, - input_names=['input.1', '1'], dynamic_axes={'input.1': {0: 'seq_len', 1: 'batch'}}) + input_names=['input', 'feature_length'], output_names=['x_padded', 'x_lens'], + dynamic_axes={'input': {0: 'seq_len', 1: 'batch'}}) symbol = torch.LongTensor([[20]]) hidden = torch.randn([2, batch_size, 320]), torch.randn([2, batch_size, 320]) g, hidden = model.prediction.forward(symbol, hidden) torch.onnx.export(model.prediction, (symbol, hidden), "rnnt_prediction.onnx", opset_version=12, - input_names=['input.1', '1', '2'], - dynamic_axes={'input.1': {0: 'batch'}, '1': {1: 'batch'}, '2': {1: 'batch'}}) + input_names=['symbol', 'hidden_in_1', 'hidden_in_2'], + output_names=['g', 'hidden_out_1', 'hidden_out_2'], + dynamic_axes={'symbol': {0: 'batch'}, 'hidden_in_1': {1: 'batch'}, 'hidden_in_2': {1: 'batch'}}) f = torch.randn([batch_size, 1, 1024]) model.joint.forward(f, g) torch.onnx.export(model.joint, (f, g), "rnnt_joint.onnx", opset_version=12, - input_names=['0', '1'], dynamic_axes={'0': {0: 'batch'}, '1': {0: 'batch'}}) + input_names=['0', '1'], output_names=['result'], dynamic_axes={'0': {0: 'batch'}, '1': {0: 'batch'}}) ``` ```bash @@ -97,10 +99,10 @@ After completing this step, the files `rnnt_encoder.onnx`, `rnnt_prediction.onnx **Step 6**. Run the conversion command: ```bash -python3 {path_to_openvino}/mo.py --input_model rnnt_encoder.onnx --input "input.1[157 1 240],1->157" -python3 {path_to_openvino}/mo.py --input_model rnnt_prediction.onnx --input "input.1[1 1],1[2 1 320],2[2 1 320]" +python3 {path_to_openvino}/mo.py --input_model rnnt_encoder.onnx --input "input[157 1 240],feature_length->157" +python3 {path_to_openvino}/mo.py --input_model rnnt_prediction.onnx --input "symbol[1 1],hidden_in_1[2 1 320],hidden_in_2[2 1 320]" python3 {path_to_openvino}/mo.py --input_model rnnt_joint.onnx --input "0[1 1 1024],1[1 1 320]" ``` Please note that hardcoded value for sequence length = 157 was taken from the MLCommons but conversion to IR preserves network [reshapeability](../../../../IE_DG/ShapeInference.md), this means you can change input shapes manually to any value either during conversion or -inference. \ No newline at end of file +inference. diff --git a/docs/index.md b/docs/index.md index 9ad04bfe960..7cc4eb90f7b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -104,3 +104,9 @@ Intel® Distribution of OpenVINO™ toolkit includes the following components: - [Intel® Media SDK](https://software.intel.com/en-us/media-sdk) (in Intel® Distribution of OpenVINO™ toolkit for Linux only) OpenVINO™ Toolkit opensource version is available on [GitHub](https://github.com/openvinotoolkit/openvino). For building the Inference Engine from the source code, see the build instructions. + + +## OpenVINO™ API 2.0 + +The new OpenVINO™ API 2.0 was introduced to make OpenVINO™ interface more user-friendly and align OpenVINO™ with other frameworks. +The [migration guide](@ref ov_2_0_transition_guide) should allow to simplify the process of migration application from old API to OpenVINO™ API 2.0. diff --git a/docs/migration_ov_2_0/docs/common_inference_pipeline.md b/docs/migration_ov_2_0/docs/common_inference_pipeline.md new file mode 100644 index 00000000000..af2dbf25304 --- /dev/null +++ b/docs/migration_ov_2_0/docs/common_inference_pipeline.md @@ -0,0 +1,55 @@ +# OpenVINO™ Inference Pipeline {#ov_inference_pipeline} + +Usually to inference network with the OpenVINO™ toolkit users need to do next steps: + 1. Create Core + 2. (Optional) Read model from the disk + 2.1. Configure Input and Output of the Model + 3. Load the Model to the Device + 4. Create an Inference Request + 5. Prepare Input + 6. Start Inference + 7. Process the Inference Results + +Code snippets below cover these steps and show how application code should be changed for migration to OpenVINO™ 2.0. + +## 1. Create Core + +Inference Engine API: + +@snippet snippets/ie_common.cpp ie:create_core + +OpenVINO™ 2.0 API: + +@snippet snippets/ov_common.cpp ov_api_2_0:create_core + +## 2. (Optional) Read model from the disk + +Inference Engine API: + +@snippet snippets/ie_common.cpp ie:read_model + +OpenVINO™ 2.0 API: + +@snippet snippets/ov_common.cpp ov_api_2_0:read_model + +### 2.1 Configure Input and Output of the Model + +Inference Engine API: + +@snippet snippets/ie_common.cpp ie:get_inputs_outputs + +OpenVINO™ 2.0 API: + +@snippet snippets/ov_common.cpp ov_api_2_0:get_inputs_outputs + +## 3. Load the Model to the Device + +Inference Engine API: + +@snippet snippets/ie_common.cpp ie:compile_model + +OpenVINO™ 2.0 API: + +@snippet snippets/ov_common.cpp ov_api_2_0:compile_model + +## 5. TBD diff --git a/docs/migration_ov_2_0/docs/intro.md b/docs/migration_ov_2_0/docs/intro.md new file mode 100644 index 00000000000..5d89b7aff3d --- /dev/null +++ b/docs/migration_ov_2_0/docs/intro.md @@ -0,0 +1,12 @@ +# OpenVINO™ API 2.0 transition guide {#ov_2_0_transition_guide} + +The OpenVINO™ API 2.0 introduced in order to simplify migration from other frameworks and make the OpenVINO™ API more user-friendly. +The list with differences between APIs below: + + - OpenVINO™ API 2.0 uses tensor names or indexes to work with Inputs or Outputs, the old API works with operation names. + - Structures for Shapes, element types were changed. + - Naming style was changed. The old API uses CamelCaseStyle and OpenVINO™ API 2.0 uses snake_case for function names. + - Namespaces were aligned between components. + +Please look at next transition guides to understand how transit own application to OpenVINO™ API 2.0. + - [OpenVINO™ Common Inference pipeline](@ref ov_inference_pipeline) diff --git a/docs/nGraph_DG/nGraph_debug_capabilities.md b/docs/nGraph_DG/nGraph_debug_capabilities.md index e2ae82743c1..bda4dff150c 100644 --- a/docs/nGraph_DG/nGraph_debug_capabilities.md +++ b/docs/nGraph_DG/nGraph_debug_capabilities.md @@ -3,7 +3,7 @@ nGraph representation provides an API to get detailed information about the graph structure. To receive additional messages about applied graph modifications, rebuild the nGraph library with -the `-DNGRAPH_DEBUG_ENABLE=ON` option. +the `-DOPENVINO_DEBUG_ENABLE=ON` option. To visualize the nGraph function to the xDot format or to an image file, use the `ngraph::pass::VisualizeTree` graph transformation pass: diff --git a/docs/snippets/ie_common.cpp b/docs/snippets/ie_common.cpp new file mode 100644 index 00000000000..6a558129243 --- /dev/null +++ b/docs/snippets/ie_common.cpp @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +int main() { + //! [ie:create_core] + InferenceEngine::Core core; + //! [ie:create_core] + + //! [ie:read_model] + InferenceEngine::CNNNetwork network = core.ReadNetwork("model.xml"); + //! [ie:read_model] + + //! [ie:get_inputs_outputs] + InferenceEngine::InputsDataMap inputs = network.getInputsInfo(); + InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo(); + //! [ie:get_inputs_outputs] + + //! [ie:compile_model] + InferenceEngine::ExecutableNetwork exec_network = core.LoadNetwork(network, "CPU"); + //! [ie:compile_model] + + //! [ie:create_infer_request] + InferenceEngine::InferRequest infer_request = exec_network.CreateInferRequest(); + //! [ie:create_infer_request] + + //! [ie:get_input_tensor] + InferenceEngine::Blob::Ptr input_blob = infer_request.GetBlob(inputs.begin()->first); + // fill input blob + //! [ie:get_input_tensor] + + //! [ie:inference] + infer_request.Infer(); + //! [ie:inference] + + //! [ie:get_output_tensor] + InferenceEngine::Blob::Ptr output_blob = infer_request.GetBlob(outputs.begin()->first); + // process output data + //! [ie:get_output_tensor] + return 0; +} diff --git a/docs/snippets/nGraphTutorial.cpp b/docs/snippets/nGraphTutorial.cpp index 6011becf4ec..e39e783d5eb 100644 --- a/docs/snippets/nGraphTutorial.cpp +++ b/docs/snippets/nGraphTutorial.cpp @@ -23,12 +23,10 @@ acos0->set_argument(0, add0); add1->set_argument(0, acos0); add1->set_argument(1, abs0); -// Run shape inference on the nodes -NodeVector ops{arg0, arg1, add0, abs0, acos0, add1}; -validate_nodes_and_infer_types(ops); - // Create a graph with one output (add1) and four inputs (arg0, arg1) auto ng_function = make_shared(OutputVector{add1}, ParameterVector{arg0, arg1}); +// Run shape inference on the nodes +ng_function->validate_nodes_and_infer_types(); //! [part0] diff --git a/docs/snippets/ov_common.cpp b/docs/snippets/ov_common.cpp new file mode 100644 index 00000000000..7cb9e344f7c --- /dev/null +++ b/docs/snippets/ov_common.cpp @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include +#include + +int main() { + //! [ov_api_2_0:create_core] + ov::runtime::Core core; + //! [ov_api_2_0:create_core] + + //! [ov_api_2_0:read_model] + std::shared_ptr network = core.read_model("model.xml"); + //! [ov_api_2_0:read_model] + + //! [ov_api_2_0:get_inputs_outputs] + ov::ParameterVector inputs = network->get_parameters(); + ov::ResultVector outputs = network->get_results(); + //! [ov_api_2_0:get_inputs_outputs] + + //! [ov_api_2_0:compile_model] + ov::runtime::ExecutableNetwork exec_network = core.compile_model(network, "CPU"); + //! [ov_api_2_0:compile_model] + + ov::runtime::InferRequest infer_request = exec_network.create_infer_request(); + // + // InferenceEngine::Blob::Ptr input_blob = infer_request.GetBlob(inputs.begin()->first); + // // fill input blob + // infer_request.Infer(); + // + // InferenceEngine::Blob::Ptr output_blob = infer_request.GetBlob(outputs.begin()->first); + // process output data + return 0; +} diff --git a/inference-engine/cmake/vpu_dependencies.cmake b/inference-engine/cmake/vpu_dependencies.cmake index e6ec3799a3c..00cf63344ce 100644 --- a/inference-engine/cmake/vpu_dependencies.cmake +++ b/inference-engine/cmake/vpu_dependencies.cmake @@ -110,8 +110,6 @@ if(ANDROID) set(LIBUSB_INCLUDE_DIR "${LIBUSB}/include") set(LIBUSB_LIBRARY "${LIBUSB}/libs/${ANDROID_ABI}/libusb1.0.so") - - log_rpath_from_dir(LIBUSB "${LIBUSB}/libs/${ANDROID_ABI}") endif() # diff --git a/inference-engine/ie_bridges/python/sample/speech_sample/README.md b/inference-engine/ie_bridges/python/sample/speech_sample/README.md index 54403416bc4..2809cc72a5f 100644 --- a/inference-engine/ie_bridges/python/sample/speech_sample/README.md +++ b/inference-engine/ie_bridges/python/sample/speech_sample/README.md @@ -89,15 +89,15 @@ optional arguments: Path to an .xml file with a trained model (required if -rg is missing). -rg IMPORT_GNA_MODEL, --import_gna_model IMPORT_GNA_MODEL - Read GNA model from file using path/filename provided + Read GNA model from file using path/filename provided (required if -m is missing). Options: -h, --help Show this help message and exit. -i INPUT, --input INPUT - Required. Path to an input file (.ark or .npz). + Required. Path to an input file (.ark or .npz). -o OUTPUT, --output OUTPUT - Optional. Output file name to save inference results + Optional. Output file name to save inference results (.ark or .npz). -r REFERENCE, --reference REFERENCE Optional. Read reference score file and compare @@ -117,7 +117,8 @@ Options: (default 16). -sf SCALE_FACTOR, --scale_factor SCALE_FACTOR Optional. The user-specified input scale factor for - quantization. + quantization. If the network contains multiple inputs, + provide scale factors by separating them with commas. -wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL Optional. Write GNA model to file using path/filename provided. @@ -176,27 +177,30 @@ The sample application logs each step in a standard output stream. [ INFO ] Creating Inference Engine [ INFO ] Reading the network: wsj_dnn5b.xml [ INFO ] Configuring input and output blobs -[ INFO ] Using scale factor of 2175.4322417 calculated from first utterance. +[ INFO ] Using scale factor(s) calculated from first utterance +[ INFO ] For input 0 using scale factor of 2175.4322418 [ INFO ] Loading the model to the plugin [ INFO ] Starting inference in synchronous mode [ INFO ] Utterance 0 (4k0c0301) +[ INFO ] Output blob name: affinetransform14/Fused_Add_ [ INFO ] Frames in utterance: 1294 -[ INFO ] Total time in Infer (HW and SW): 5305.47ms -[ INFO ] max error: 0.7051839 -[ INFO ] avg error: 0.0448387 -[ INFO ] avg rms error: 0.0582387 -[ INFO ] stdev error: 0.0371649 +[ INFO ] Total time in Infer (HW and SW): 6211.45ms +[ INFO ] max error: 0.7051840 +[ INFO ] avg error: 0.0448388 +[ INFO ] avg rms error: 0.0582387 +[ INFO ] stdev error: 0.0371650 [ INFO ] [ INFO ] Utterance 1 (4k0c0302) +[ INFO ] Output blob name: affinetransform14/Fused_Add_ [ INFO ] Frames in utterance: 1005 -[ INFO ] Total time in Infer (HW and SW): 5031.53ms +[ INFO ] Total time in Infer (HW and SW): 4742.27ms [ INFO ] max error: 0.7575974 [ INFO ] avg error: 0.0452166 [ INFO ] avg rms error: 0.0586013 [ INFO ] stdev error: 0.0372769 -[ INFO ] ... -[ INFO ] Total sample time: 38033.09ms +[ INFO ] Total sample time: 40219.99ms +[ INFO ] File result.npz was created! [ INFO ] This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool ``` diff --git a/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py b/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py index 1d2ad5c7d71..d4e2b345ea2 100644 --- a/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py +++ b/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py @@ -28,8 +28,9 @@ def parse_args() -> argparse.Namespace: args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).') args.add_argument('-qb', '--quantization_bits', default=16, type=int, help='Optional. Weight bits for quantization: 8 or 16 (default 16).') - args.add_argument('-sf', '--scale_factor', type=float, - help='Optional. The user-specified input scale factor for quantization.') + args.add_argument('-sf', '--scale_factor', type=str, + help='Optional. The user-specified input scale factor for quantization. ' + 'If the network contains multiple inputs, provide scale factors by separating them with commas.') args.add_argument('-wg', '--export_gna_model', type=str, help='Optional. Write GNA model to file using path/filename provided.') args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS) diff --git a/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py b/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py index 67601e1f379..14d2e4fa441 100755 --- a/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py +++ b/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py @@ -103,6 +103,32 @@ def get_output_layer_list(net: Union[IENetwork, ExecutableNetwork], return [list(net.outputs.keys())[-1]] +def parse_scale_factors(args: argparse.Namespace) -> list: + """Get a list of scale factors for input files""" + input_files = re.split(', |,', args.input) + scale_factors = re.split(', |,', str(args.scale_factor)) + scale_factors = list(map(float, scale_factors)) + + if len(input_files) != len(scale_factors): + log.error(f'Incorrect command line for multiple inputs: {len(scale_factors)} scale factors provided for ' + f'{len(input_files)} input files.') + sys.exit(-7) + + for i, scale_factor in enumerate(scale_factors): + if float(scale_factor) < 0: + log.error(f'Scale factor for input #{i} (counting from zero) is out of range (must be positive).') + sys.exit(-8) + + return scale_factors + + +def set_scale_factors(plugin_config: dict, scale_factors: list): + """Set a scale factor provided for each input""" + for i, scale_factor in enumerate(scale_factors): + log.info(f'For input {i} using scale factor of {scale_factor:.7f}') + plugin_config[f'GNA_SCALE_FACTOR_{i}'] = str(scale_factor) + + def main(): log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) args = parse_args() @@ -149,16 +175,23 @@ def main(): # Set a GNA scale factor if args.import_gna_model: - log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}') - elif args.scale_factor: - log.info(f'Using scale factor of {args.scale_factor:.7f} specified by user.') - plugin_config['GNA_SCALE_FACTOR'] = str(args.scale_factor) + if args.scale_factor: + log.warning(f'Custom scale factor will be used for imported GNA model: {args.import_gna_model}') + set_scale_factors(plugin_config, parse_scale_factors(args)) + else: + log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}') else: - utterances = read_utterance_file(args.input.split(',')[0]) - key = sorted(utterances)[0] - scale_factor = get_scale_factor(utterances[key]) - log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.') - plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor) + if args.scale_factor: + set_scale_factors(plugin_config, parse_scale_factors(args)) + else: + scale_factors = [] + + for file_name in re.split(', |,', args.input): + first_utterance = next(iter(read_utterance_file(file_name).values())) + scale_factors.append(get_scale_factor(first_utterance)) + + log.info('Using scale factor(s) calculated from first utterance') + set_scale_factors(plugin_config, scale_factors) if args.export_embedded_gna_model: plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp index fcd8613f444..a0763f79765 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp @@ -196,6 +196,10 @@ public: versionInfo = &ExtensionDescription; } + std::map getOpSets() override { + return {{"framework_node_ext", ngraph::OpSet()}}; + } + void Unload() noexcept override {} }; diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt index 0aa8280bc5c..bc9115229e6 100644 --- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt @@ -59,10 +59,8 @@ add_custom_command(TARGET ${TARGET_NAME} # ie_cpack_add_component(${PYTHON_VERSION}_dev DEPENDS ${PYTHON_COMPONENT}) install(TARGETS ${TARGET_NAME} - RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations - COMPONENT ${PYTHON_COMPONENT} - LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations - COMPONENT ${PYTHON_COMPONENT}) + RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_COMPONENT} + LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_COMPONENT}) install(PROGRAMS __init__.py DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations diff --git a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt index cb071162e35..c6ae33c7951 100644 --- a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt @@ -51,4 +51,17 @@ add_custom_command(TARGET ${TARGET_NAME} ) add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME} - EXCLUDE_PATTERNS ".*\\.cxx;.*\\.pxd;.*\\.pyx") \ No newline at end of file + EXCLUDE_PATTERNS ".*\\.cxx;.*\\.pxd;.*\\.pyx") + +# install + +install(TARGETS ${TARGET_NAME} + RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/test_utils + COMPONENT tests EXCLUDE_FROM_ALL + LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/test_utils + COMPONENT tests EXCLUDE_FROM_ALL) + +install(PROGRAMS __init__.py + DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/test_utils + COMPONENT tests + EXCLUDE_FROM_ALL) diff --git a/inference-engine/ie_bridges/python/tests/conftest.py b/inference-engine/ie_bridges/python/tests/conftest.py index f0e5059d040..365e2f2f368 100644 --- a/inference-engine/ie_bridges/python/tests/conftest.py +++ b/inference-engine/ie_bridges/python/tests/conftest.py @@ -51,10 +51,44 @@ def pytest_configure(config): ) -def create_ngraph_function(inputShape): +def create_encoder(input_shape, levels = 4): import ngraph as ng - inputShape = ng.impl.PartialShape(inputShape) - param = ng.parameter(inputShape, dtype=np.float32, name="data") - result = ng.relu(param, name='out') + # input + input_node = ng.parameter(input_shape, np.float32, name="data") + + padding_begin = padding_end = [0, 0] + strides = [1, 1] + dilations = [1, 1] + input_channels = [input_shape[1]] + last_output = input_node + + # convolution layers + for i in range(levels): + input_c = input_channels[-1] + output_c = input_c * 2 + conv_w = np.random.uniform(0, 1, [output_c, input_c, 5, 5]).astype(np.float32) + conv_node = ng.convolution(last_output, conv_w, strides, padding_begin, padding_end, dilations) + input_channels.append(output_c) + last_output = conv_node + + # deconvolution layers + for i in range(levels): + input_c = input_channels[-2] + output_c = input_channels.pop(-1) + deconv_w = np.random.uniform(0, 1, [output_c, input_c, 5, 5]).astype(np.float32) + deconv_node = ng.convolution_backprop_data(last_output, deconv_w, strides) + last_output = deconv_node + + # result + last_output.set_friendly_name("out") + result_node = ng.result(last_output) + return ng.Function(result_node, [input_node], "Encoder") + + +def create_relu(input_shape): + import ngraph as ng + input_shape = ng.impl.PartialShape(input_shape) + param = ng.parameter(input_shape, dtype=np.float32, name="data") + result = ng.relu(param, name="out") function = ng.Function(result, [param], "TestFunction") return function diff --git a/inference-engine/ie_bridges/python/tests/test_Blob.py b/inference-engine/ie_bridges/python/tests/test_Blob.py index cd2a48a2724..14624fa3daa 100644 --- a/inference-engine/ie_bridges/python/tests/test_Blob.py +++ b/inference-engine/ie_bridges/python/tests/test_Blob.py @@ -140,10 +140,11 @@ def test_set_shape(): @pytest.mark.ngraph_dependent_test @pytest.mark.template_plugin def test_blob_set_shape_after_async_infer(): - from conftest import create_ngraph_function + from conftest import create_encoder import ngraph as ng - function = create_ngraph_function([ng.Dimension(0,5), ng.Dimension(4), ng.Dimension(20), ng.Dimension(20)]) + function = create_encoder([1, 4, 20, 20]) net = ng.function_to_cnn(function) + net.reshape({"data": [(1, 5), 4, 20, 20]}) ie_core = IECore() ie_core.register_plugin("templatePlugin", "TEMPLATE") exec_net = ie_core.load_network(net, "TEMPLATE") @@ -152,3 +153,4 @@ def test_blob_set_shape_after_async_infer(): with pytest.raises(RuntimeError) as e: request.input_blobs['data'].set_shape([3, 4, 20, 20]) assert "REQUEST_BUSY" in str(e.value) + request.wait() diff --git a/inference-engine/ie_bridges/python/tests/test_CDataPtr.py b/inference-engine/ie_bridges/python/tests/test_CDataPtr.py index 4969aba4a5c..838c8c8f282 100644 --- a/inference-engine/ie_bridges/python/tests/test_CDataPtr.py +++ b/inference-engine/ie_bridges/python/tests/test_CDataPtr.py @@ -61,9 +61,9 @@ def test_initialized(device): @pytest.mark.ngraph_dependent_test @pytest.mark.template_plugin def test_is_dynamic(): - from conftest import create_ngraph_function + from conftest import create_relu import ngraph as ng - function = create_ngraph_function([-1, 3, 20, 20]) + function = create_relu([-1, 3, 20, 20]) net = ng.function_to_cnn(function) ie = IECore() ie.register_plugin("templatePlugin", "TEMPLATE") diff --git a/inference-engine/ie_bridges/python/tests/test_DataPtr.py b/inference-engine/ie_bridges/python/tests/test_DataPtr.py index 40ae28b2001..27b6fec64ed 100644 --- a/inference-engine/ie_bridges/python/tests/test_DataPtr.py +++ b/inference-engine/ie_bridges/python/tests/test_DataPtr.py @@ -48,9 +48,9 @@ def test_initialized(): @pytest.mark.ngraph_dependent_test @pytest.mark.template_plugin def test_is_dynamic(): - from conftest import create_ngraph_function + from conftest import create_relu import ngraph as ng - function = create_ngraph_function([-1, 3, 20, 20]) + function = create_relu([-1, 3, 20, 20]) net = ng.function_to_cnn(function) assert net.input_info["data"].input_data.is_dynamic assert net.outputs["out"].is_dynamic diff --git a/inference-engine/ie_bridges/python/tests/test_IENetwork.py b/inference-engine/ie_bridges/python/tests/test_IENetwork.py index 607c8296f4b..7a6daac7214 100644 --- a/inference-engine/ie_bridges/python/tests/test_IENetwork.py +++ b/inference-engine/ie_bridges/python/tests/test_IENetwork.py @@ -166,9 +166,9 @@ def test_reshape(): ([1, 3, -1, 25], [1, 3, 22, -1]) ]) def test_reshape_with_partial_shape(device, shape, p_shape): - from conftest import create_ngraph_function + from conftest import create_relu import ngraph as ng - function = create_ngraph_function(shape) + function = create_relu(shape) net = ng.function_to_cnn(function) net.reshape({"data": p_shape}) changedFunction = ng.function_from_cnn(net) @@ -185,9 +185,9 @@ def test_reshape_with_partial_shape(device, shape, p_shape): @pytest.mark.ngraph_dependent_test def test_incorrect_reshape(device): - from conftest import create_ngraph_function + from conftest import create_relu import ngraph as ng - function = create_ngraph_function([1, 3, 22, 22]) + function = create_relu([1, 3, 22, 22]) net = ng.function_to_cnn(function) with pytest.raises(ValueError) as e: net.reshape({"data": [(2, 4, 6), 3, 22, 22]}) @@ -287,9 +287,9 @@ def test_tensor_names(): @pytest.mark.ngraph_dependent_test @pytest.mark.template_plugin def test_create_two_exec_net(): - from conftest import create_ngraph_function + from conftest import create_relu import ngraph as ng - function = create_ngraph_function([ng.Dimension(0,5), ng.Dimension(4), ng.Dimension(20), ng.Dimension(20)]) + function = create_relu([ng.Dimension(0,5), ng.Dimension(4), ng.Dimension(20), ng.Dimension(20)]) net = ng.function_to_cnn(function) ie_core = IECore() ie_core.register_plugin("templatePlugin", "TEMPLATE") diff --git a/inference-engine/ie_bridges/python/tests/test_InferRequest.py b/inference-engine/ie_bridges/python/tests/test_InferRequest.py index a1ea7ce8bce..f82cbf5327f 100644 --- a/inference-engine/ie_bridges/python/tests/test_InferRequest.py +++ b/inference-engine/ie_bridges/python/tests/test_InferRequest.py @@ -589,13 +589,13 @@ def test_query_state_write_buffer(device, input_shape, data_type, mode): @pytest.mark.parametrize("shape, p_shape, ref_shape", [ ([1, 4, 20, 20], [-1, 4, 20, 20], [5, 4, 20, 20]), ([1, 4, 20, 20], [(0,5), 4, 20, 20], [3, 4, 20, 20]), - ([1, 4, 20, 20], [(3,5), 3, 20, 20], [2, 4, 20, 20]), - ([1, 4, 20, 20], [(3,5), 3, 20, 20], [6, 4, 20, 20]), + ([1, 4, 20, 20], [(3,5), 4, 20, 20], [2, 4, 20, 20]), + ([1, 4, 20, 20], [(3,5), 4, 20, 20], [6, 4, 20, 20]), ]) def test_infer_dynamic_network_with_set_shape(shape, p_shape, ref_shape): - from conftest import create_ngraph_function + from conftest import create_encoder import ngraph as ng - function = create_ngraph_function(shape) + function = create_encoder(shape) net = ng.function_to_cnn(function) net.reshape({"data": p_shape}) ie_core = ie.IECore() @@ -616,13 +616,13 @@ def test_infer_dynamic_network_with_set_shape(shape, p_shape, ref_shape): @pytest.mark.parametrize("shape, p_shape, ref_shape", [ ([1, 4, 20, 20], [-1, 4, 20, 20], [5, 4, 20, 20]), ([1, 4, 20, 20], [(0,5), 4, 20, 20], [3, 4, 20, 20]), - ([1, 4, 20, 20], [(3,5), 3, 20, 20], [2, 4, 20, 20]), - ([1, 4, 20, 20], [(3,5), 3, 20, 20], [6, 4, 20, 20]), + ([1, 4, 20, 20], [(3,5), 4, 20, 20], [2, 4, 20, 20]), + ([1, 4, 20, 20], [(3,5), 4, 20, 20], [6, 4, 20, 20]), ]) def test_infer_dynamic_network_without_set_shape(shape, p_shape, ref_shape): - from conftest import create_ngraph_function + from conftest import create_encoder import ngraph as ng - function = create_ngraph_function(shape) + function = create_encoder(shape) net = ng.function_to_cnn(function) net.reshape({"data": p_shape}) ie_core = ie.IECore() @@ -642,13 +642,13 @@ def test_infer_dynamic_network_without_set_shape(shape, p_shape, ref_shape): @pytest.mark.parametrize("shape, p_shape, ref_shape", [ ([1, 4, 20, 20], [-1, 4, 20, 20], [5, 4, 20, 20]), ([1, 4, 20, 20], [(0,5), 4, 20, 20], [3, 4, 20, 20]), - ([1, 4, 20, 20], [(3,5), 3, 20, 20], [2, 4, 20, 20]), - ([1, 4, 20, 20], [(3,5), 3, 20, 20], [6, 4, 20, 20]), + ([1, 4, 20, 20], [(3,5), 4, 20, 20], [2, 4, 20, 20]), + ([1, 4, 20, 20], [(3,5), 4, 20, 20], [6, 4, 20, 20]), ]) def test_infer_dynamic_network_with_set_blob(shape, p_shape, ref_shape): - from conftest import create_ngraph_function + from conftest import create_encoder import ngraph as ng - function = create_ngraph_function(shape) + function = create_encoder(shape) net = ng.function_to_cnn(function) net.reshape({"data": p_shape}) ie_core = ie.IECore() @@ -670,11 +670,11 @@ def test_infer_dynamic_network_with_set_blob(shape, p_shape, ref_shape): @pytest.mark.ngraph_dependent_test @pytest.mark.template_plugin def test_infer_dynamic_network_twice(): - from conftest import create_ngraph_function + from conftest import create_encoder import ngraph as ng shape, p_shape = [1, 4, 20, 20], [(0,5), 4, 20, 20] ref_shape1, ref_shape2 = [2, 4, 20, 20], [3, 4, 20, 20] - function = create_ngraph_function(shape) + function = create_encoder(shape) net = ng.function_to_cnn(function) net.reshape({"data": p_shape}) ie_core = ie.IECore() @@ -692,11 +692,11 @@ def test_infer_dynamic_network_twice(): @pytest.mark.ngraph_dependent_test @pytest.mark.template_plugin def test_infer_dynamic_network_with_set_blob_twice(): - from conftest import create_ngraph_function + from conftest import create_encoder import ngraph as ng shape, p_shape = [1, 4, 20, 20], [(0,5), 4, 20, 20] ref_shape1, ref_shape2 = [2, 4, 20, 20], [3, 4, 20, 20] - function = create_ngraph_function(shape) + function = create_encoder(shape) net = ng.function_to_cnn(function) net.reshape({"data": p_shape}) ie_core = ie.IECore() @@ -723,14 +723,14 @@ def test_infer_dynamic_network_with_set_blob_twice(): @pytest.mark.template_plugin @pytest.mark.parametrize("shapes", [ ([3, 4, 20, 20], [3, 4, 20, 20], [3, 4, 20, 20]), - ([3, 4, 20, 20], [3, 6, 20, 20], [3, 8, 20, 20]), + ([3, 4, 20, 20], [3, 4, 28, 28], [3, 4, 45, 45]), ]) def test_async_infer_dynamic_network_3_requests(shapes): - from conftest import create_ngraph_function + from conftest import create_encoder import ngraph as ng - function = create_ngraph_function([3, 4, 20, 20]) + function = create_encoder([3, 4, 20, 20]) net = ng.function_to_cnn(function) - net.reshape({"data": [3, (2, 10), 20, 20]}) + net.reshape({"data": [3, 4, (20, 50), (20, 50)]}) ie_core = ie.IECore() ie_core.register_plugin("templatePlugin", "TEMPLATE") exec_net = ie_core.load_network(net, "TEMPLATE", num_requests=3) @@ -745,9 +745,9 @@ def test_async_infer_dynamic_network_3_requests(shapes): @pytest.mark.ngraph_dependent_test @pytest.mark.template_plugin def test_set_blob_with_incorrect_name(): - from conftest import create_ngraph_function + from conftest import create_encoder import ngraph as ng - function = create_ngraph_function([4, 4, 20, 20]) + function = create_encoder([4, 4, 20, 20]) net = ng.function_to_cnn(function) ie_core = ie.IECore() ie_core.register_plugin("templatePlugin", "TEMPLATE") @@ -763,9 +763,9 @@ def test_set_blob_with_incorrect_name(): @pytest.mark.ngraph_dependent_test @pytest.mark.template_plugin def test_set_blob_with_incorrect_size(): - from conftest import create_ngraph_function + from conftest import create_encoder import ngraph as ng - function = create_ngraph_function([4, 4, 20, 20]) + function = create_encoder([4, 4, 20, 20]) net = ng.function_to_cnn(function) ie_core = ie.IECore() ie_core.register_plugin("templatePlugin", "TEMPLATE") @@ -773,6 +773,7 @@ def test_set_blob_with_incorrect_size(): tensor_desc = exec_net.requests[0].input_blobs["data"].tensor_desc tensor_desc.dims = [tensor_desc.dims[0]*2, 4, 20, 20] blob = ie.Blob(tensor_desc) + print(exec_net.requests[0].output_blobs) with pytest.raises(RuntimeError) as e: exec_net.requests[0].set_blob("data", blob) assert f"Input blob size is not equal network input size" in str(e.value) @@ -784,10 +785,11 @@ def test_set_blob_with_incorrect_size(): @pytest.mark.ngraph_dependent_test @pytest.mark.template_plugin def test_set_blob_after_async_infer(): - from conftest import create_ngraph_function + from conftest import create_encoder import ngraph as ng - function = create_ngraph_function([ng.Dimension(0,5), ng.Dimension(4), ng.Dimension(20), ng.Dimension(20)]) + function = create_encoder([1, 4, 20, 20]) net = ng.function_to_cnn(function) + net.reshape({"data": [(0, 5), 4, 20, 20]}) ie_core = ie.IECore() ie_core.register_plugin("templatePlugin", "TEMPLATE") exec_net = ie_core.load_network(net, "TEMPLATE") @@ -799,3 +801,4 @@ def test_set_blob_after_async_infer(): with pytest.raises(RuntimeError) as e: request.set_blob("data", blob) assert "REQUEST_BUSY" in str(e.value) + request.wait() diff --git a/inference-engine/ie_bridges/python/tests/test_NGraph.py b/inference-engine/ie_bridges/python/tests/test_NGraph.py index 139d132eb0f..13d2061ecbb 100644 --- a/inference-engine/ie_bridges/python/tests/test_NGraph.py +++ b/inference-engine/ie_bridges/python/tests/test_NGraph.py @@ -6,14 +6,14 @@ import ngraph as ng from ngraph.impl.op import Parameter from ngraph.impl import Function, Shape, Type -from conftest import model_path, create_ngraph_function +from conftest import model_path, create_relu test_net_xml, test_net_bin = model_path() def test_create_IENetwork_from_nGraph(): - func = create_ngraph_function([1, 3, 22, 22]) + func = create_relu([1, 3, 22, 22]) caps = Function.to_capsule(func) cnnNetwork = IENetwork(caps) assert cnnNetwork != None @@ -23,7 +23,7 @@ def test_create_IENetwork_from_nGraph(): def test_get_IENetwork_from_nGraph(): - func = create_ngraph_function([1, 3, 22, 22]) + func = create_relu([1, 3, 22, 22]) caps = Function.to_capsule(func) cnnNetwork = IENetwork(caps) assert cnnNetwork != None diff --git a/inference-engine/samples/benchmark_app/CMakeLists.txt b/inference-engine/samples/benchmark_app/CMakeLists.txt index b37495e5e43..e3412774f27 100644 --- a/inference-engine/samples/benchmark_app/CMakeLists.txt +++ b/inference-engine/samples/benchmark_app/CMakeLists.txt @@ -2,11 +2,46 @@ # SPDX-License-Identifier: Apache-2.0 # +set(TARGET_NAME "benchmark_app") + file (GLOB SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) file (GLOB HDR ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp) -ie_add_sample(NAME benchmark_app +ie_add_sample(NAME ${TARGET_NAME} SOURCES ${SRC} HEADERS ${HDR} DEPENDENCIES format_reader ie_samples_utils OPENCV_DEPENDENCIES core) + +find_package(OpenCL) + +find_path(OpenCL_HPP_INCLUDE_DIR + NAMES + CL/cl2.hpp OpenCL/cl2.hpp + HINTS + ${opencl_root_hints} + ENV "PROGRAMFILES(X86)" + ENV AMDAPPSDKROOT + ENV INTELOCLSDKROOT + ENV NVSDKCOMPUTE_ROOT + ENV CUDA_PATH + ENV ATISTREAMSDKROOT + ENV OCL_ROOT + PATH_SUFFIXES + include + OpenCL/common/inc + "AMD APP/include") + +if(OPENCL_HEADERS_DIR) + # Use OpenCL CPP headers from sources if present + set(OpenCL_HEADERS OPENCL_HEADERS_DIR) +elseif(OpenCL_HPP_INCLUDE_DIR) + # Append OpenCL CPP headers to C headers and use both + set(OpenCL_HEADERS OpenCL_INCLUDE_DIR OpenCL_HPP_INCLUDE_DIR) +endif() + +if(OpenCL_FOUND AND OpenCL_HEADERS) + target_link_libraries(${TARGET_NAME} PRIVATE OpenCL::OpenCL) + target_include_directories(${TARGET_NAME} PRIVATE ${OpenCL_HEADERS}) + target_compile_definitions(${TARGET_NAME} PRIVATE HAVE_GPU_DEVICE_MEM_SUPPORT) +endif() diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp index c062cee1960..6395db4ff29 100644 --- a/inference-engine/samples/benchmark_app/benchmark_app.hpp +++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp @@ -4,6 +4,10 @@ #pragma once +#if defined(HAVE_GPU_DEVICE_MEM_SUPPORT) +# define HAVE_DEVICE_MEM_SUPPORT +#endif + #include #include @@ -132,6 +136,12 @@ static const char progress_message[] = // @brief message for performance counters option static const char pc_message[] = "Optional. Report performance counters."; +#ifdef HAVE_DEVICE_MEM_SUPPORT +// @brief message for switching memory allocation type option +static const char use_device_mem_message[] = + "Optional. Switch between host and device memory allocation for input and output buffers."; +#endif + #ifdef USE_OPENCV // @brief message for load config option static const char load_config_message[] = @@ -266,6 +276,11 @@ DEFINE_bool(progress, false, progress_message); /// @brief Define flag for showing performance counters
DEFINE_bool(pc, false, pc_message); +#ifdef HAVE_DEVICE_MEM_SUPPORT +/// @brief Define flag for switching beetwen host and device memory allocation for input and output buffers +DEFINE_bool(use_device_mem, false, use_device_mem_message); +#endif + #ifdef USE_OPENCV /// @brief Define flag for loading configuration file
DEFINE_string(load_config, "", load_config_message); @@ -339,6 +354,9 @@ static void showUsage() { std::cout << " -nthreads \"\" " << infer_num_threads_message << std::endl; std::cout << " -enforcebf16= " << enforce_bf16_message << std::endl; std::cout << " -pin \"YES\"/\"HYBRID_AWARE\"/\"NO\"/\"NUMA\" " << infer_threads_pinning_message << std::endl; +#ifdef HAVE_DEVICE_MEM_SUPPORT + std::cout << " -use_device_mem " << use_device_mem_message << std::endl; +#endif std::cout << std::endl << " Statistics dumping options:" << std::endl; std::cout << " -report_type \"\" " << report_type_message << std::endl; std::cout << " -report_folder " << report_folder_message << std::endl; diff --git a/inference-engine/samples/benchmark_app/infer_request_wrap.hpp b/inference-engine/samples/benchmark_app/infer_request_wrap.hpp index 5e15f597e7e..dd10a28eb7a 100644 --- a/inference-engine/samples/benchmark_app/infer_request_wrap.hpp +++ b/inference-engine/samples/benchmark_app/infer_request_wrap.hpp @@ -65,6 +65,10 @@ public: return _request.GetBlob(name); } + void setBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) { + _request.SetBlob(name, data); + } + double getExecutionTimeInMilliseconds() const { auto execTime = std::chrono::duration_cast(_endTime - _startTime); return static_cast(execTime.count()) * 0.000001; diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp index 6c643d45486..9120ce7136e 100644 --- a/inference-engine/samples/benchmark_app/main.cpp +++ b/inference-engine/samples/benchmark_app/main.cpp @@ -21,6 +21,7 @@ #include "infer_request_wrap.hpp" #include "inputs_filling.hpp" #include "progress_bar.hpp" +#include "remote_blobs_filling.hpp" #include "statistics_report.hpp" #include "utils.hpp" @@ -592,7 +593,16 @@ int main(int argc, char* argv[]) { next_step(); InferRequestsQueue inferRequestsQueue(exeNetwork, nireq); - fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests); + if (isFlagSetInCommandLine("use_device_mem")) { + if (device_name.find("GPU") == 0) + ::gpu::fillRemoteBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests, exeNetwork); + else if (device_name.find("CPU") == 0) + fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests); + else + IE_THROW() << "Requested device doesn't support `use_device_mem` option."; + } else { + fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests); + } // ----------------- 10. Measuring performance // ------------------------------------------------------------------ diff --git a/inference-engine/samples/benchmark_app/remote_blobs_filling.cpp b/inference-engine/samples/benchmark_app/remote_blobs_filling.cpp new file mode 100644 index 00000000000..dc6d9fbf34a --- /dev/null +++ b/inference-engine/samples/benchmark_app/remote_blobs_filling.cpp @@ -0,0 +1,140 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "remote_blobs_filling.hpp" + +#include +#include +#include +#include + +namespace gpu { + +template +using uniformDistribution = typename std::conditional< + std::is_floating_point::value, + std::uniform_real_distribution, + typename std::conditional::value, std::uniform_int_distribution, void>::type>::type; + +template +void fillBufferRandom(void* inputBuffer, + size_t elementsNum, + T rand_min = std::numeric_limits::min(), + T rand_max = std::numeric_limits::max()) { + std::mt19937 gen(0); + uniformDistribution distribution(rand_min, rand_max); + auto inputBufferData = static_cast(inputBuffer); + for (size_t i = 0; i < elementsNum; i++) { + inputBufferData[i] = static_cast(distribution(gen)); + } +} + +void fillBuffer(void* inputBuffer, size_t elementsNum, InferenceEngine::Precision precision) { + if (precision == InferenceEngine::Precision::FP32) { + fillBufferRandom(inputBuffer, elementsNum); + } else if (precision == InferenceEngine::Precision::FP16) { + fillBufferRandom(inputBuffer, elementsNum); + } else if (precision == InferenceEngine::Precision::I32) { + fillBufferRandom(inputBuffer, elementsNum); + } else if (precision == InferenceEngine::Precision::I64) { + fillBufferRandom(inputBuffer, elementsNum); + } else if (precision == InferenceEngine::Precision::U8) { + // uniform_int_distribution is not allowed in the C++17 + // standard and vs2017/19 + fillBufferRandom(inputBuffer, elementsNum); + } else if (precision == InferenceEngine::Precision::I8) { + // uniform_int_distribution is not allowed in the C++17 standard + // and vs2017/19 + fillBufferRandom(inputBuffer, elementsNum); + } else if (precision == InferenceEngine::Precision::U16) { + fillBufferRandom(inputBuffer, elementsNum); + } else if (precision == InferenceEngine::Precision::I16) { + fillBufferRandom(inputBuffer, elementsNum); + } else if (precision == InferenceEngine::Precision::BOOL) { + fillBufferRandom(inputBuffer, elementsNum, 0, 1); + } else { + IE_THROW() << "Requested precision is not supported"; + } +} + +size_t getBytesPerElement(InferenceEngine::Precision precision) { + switch (precision) { + case InferenceEngine::Precision::FP32: + return 4; + case InferenceEngine::Precision::FP16: + return 2; + case InferenceEngine::Precision::I32: + return 4; + case InferenceEngine::Precision::I64: + return 8; + case InferenceEngine::Precision::U8: + return 1; + case InferenceEngine::Precision::I8: + return 1; + case InferenceEngine::Precision::U16: + return 2; + case InferenceEngine::Precision::I16: + return 2; + case InferenceEngine::Precision::BOOL: + return 1; + default: + IE_THROW() << "Requested precision is not supported"; + } +} + +void fillRemoteBlobs(const std::vector& inputFiles, + const size_t& batchSize, + benchmark_app::InputsInfo& app_inputs_info, + std::vector requests, + const InferenceEngine::ExecutableNetwork& exeNetwork) { +#ifdef HAVE_DEVICE_MEM_SUPPORT + slog::info << "Device memory will be used for input and output blobs" << slog::endl; + if (inputFiles.size()) { + slog::warn << "Device memory supports only random data at this moment, input images will be ignored" + << slog::endl; + } + auto context = exeNetwork.GetContext(); + auto oclContext = std::dynamic_pointer_cast(context)->get(); + auto oclInstance = std::make_shared(oclContext); + + auto setShared = [&](size_t requestId, + const std::string name, + const InferenceEngine::TensorDesc& desc, + bool fillRandom = false) { + cl_int err; + auto inputDims = desc.getDims(); + auto elementsNum = std::accumulate(begin(inputDims), end(inputDims), 1, std::multiplies()); + auto inputSize = elementsNum * getBytesPerElement(desc.getPrecision()); + + cl::Buffer sharedBuffer = + cl::Buffer(oclInstance->_context, CL_MEM_READ_WRITE, (cl::size_type)inputSize, NULL, &err); + + if (fillRandom) { + void* mappedPtr = oclInstance->_queue.enqueueMapBuffer(sharedBuffer, + CL_TRUE, + CL_MEM_READ_WRITE, + 0, + (cl::size_type)inputSize); + fillBuffer(mappedPtr, elementsNum, desc.getPrecision()); + oclInstance->_queue.enqueueUnmapMemObject(sharedBuffer, mappedPtr); + } + + InferenceEngine::Blob::Ptr sharedBlob = InferenceEngine::gpu::make_shared_blob(desc, context, sharedBuffer); + + requests.at(requestId)->setBlob(name, sharedBlob); + }; + + for (size_t requestId = 0; requestId < requests.size(); requestId++) { + for (auto& item : exeNetwork.GetInputsInfo()) + setShared(requestId, item.first, item.second->getTensorDesc(), true); + + for (auto& item : exeNetwork.GetOutputsInfo()) + setShared(requestId, item.first, item.second->getTensorDesc()); + } +#else + IE_THROW() << "Device memory requested for GPU device, but OpenCL was not linked"; +#endif +} + +} // namespace gpu diff --git a/inference-engine/samples/benchmark_app/remote_blobs_filling.hpp b/inference-engine/samples/benchmark_app/remote_blobs_filling.hpp new file mode 100644 index 00000000000..66e2b1b2c66 --- /dev/null +++ b/inference-engine/samples/benchmark_app/remote_blobs_filling.hpp @@ -0,0 +1,64 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#if defined(HAVE_GPU_DEVICE_MEM_SUPPORT) +# define HAVE_DEVICE_MEM_SUPPORT +# include +#endif + +#include + +#include "infer_request_wrap.hpp" +#include "utils.hpp" + +namespace gpu { + +#ifdef HAVE_DEVICE_MEM_SUPPORT +struct OpenCL { + cl::Context _context; + cl::Device _device; + cl::CommandQueue _queue; + + explicit OpenCL(std::shared_ptr> media_api_context_properties = nullptr) { + // get Intel GPU OCL device, create context and queue + { + std::vector devices; + std::vector platforms; + const unsigned int refVendorID = 0x8086; + + cl::Platform::get(&platforms); + for (auto& p : platforms) { + p.getDevices(CL_DEVICE_TYPE_GPU, &devices); + for (auto& d : devices) { + if (refVendorID == d.getInfo()) { + _device = d; + _context = cl::Context(_device); + break; + } + } + } + + cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + _queue = cl::CommandQueue(_context, _device, props); + } + } + + explicit OpenCL(cl_context context) { + // user-supplied context handle + _context = cl::Context(context, true); + _device = cl::Device(_context.getInfo()[0].get(), true); + + cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + _queue = cl::CommandQueue(_context, _device, props); + } +}; +#endif + +void fillRemoteBlobs(const std::vector& inputFiles, + const size_t& batchSize, + benchmark_app::InputsInfo& app_inputs_info, + std::vector requests, + const InferenceEngine::ExecutableNetwork& exeNetwork); + +} // namespace gpu diff --git a/inference-engine/scripts/dependencies.bat b/inference-engine/scripts/dependencies.bat deleted file mode 100644 index 5241d428d18..00000000000 --- a/inference-engine/scripts/dependencies.bat +++ /dev/null @@ -1,96 +0,0 @@ -@echo off - -:: Copyright (C) 2018-2021 Intel Corporation -:: SPDX-License-Identifier: Apache-2.0 - -setlocal enabledelayedexpansion - -for /f "delims=" %%x in (dependencies_64.txt) do (set "%%x") - -for %%A in ("%MKL%") do set MKL_FILENAME=%%~nxA -for %%A in ("%OMP%") do set OMP_FILENAME=%%~nxA -for %%A in ("%MYRIAD%") do set MYRIAD_FILENAME=%%~nxA -for %%A in ("%GNA%") do set GNA_FILENAME=%%~nxA -for %%A in ("%OPENCV%") do set OPENCV_FILENAME=%%~nxA -for %%A in ("%HDDL%") do set HDDL_FILENAME=%%~nxA -for %%A in ("%VPU_FIRMWARE_MA2X8X%") do set VPU_FIRMWARE_MA2X8X_FILENAME=%%~nxA -for %%A in ("%TBB%") do set TBB_FILENAME=%%~nxA - -call :DownloadFile MKL %MKL% -call :DownloadFile OMP %OMP% -call :DownloadFile MYRIAD %MYRIAD% -call :DownloadFile GNA %GNA% -call :DownloadFile OPENCV %OPENCV% -call :DownloadFile HDDL %HDDL% -call :DownloadFile VPU_FIRMWARE_MA2X8X %VPU_FIRMWARE_MA2X8X% -call :DownloadFile TBB %TBB% - -for /f "delims=" %%x in (ld_library_rpath_64.txt) do (set "%%x") - -set PATH=%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%;%PATH% -set PATH=%DL_SDK_TEMP%\test_dependencies\MKL\%MKL_FILENAME%%MKL%;%PATH% -set PATH=%DL_SDK_TEMP%\test_dependencies\OMP\%OMP_FILENAME%%OMP%;%PATH% -set PATH=%DL_SDK_TEMP%\test_dependencies\GNA\%GNA_FILENAME%%GNA%;%PATH% -set PATH=%DL_SDK_TEMP%\test_dependencies\OPENCV\%OPENCV_FILENAME%%OPENCV%;%PATH% -set PATH=%DL_SDK_TEMP%\test_dependencies\TBB\%TBB_FILENAME%%TBB%;%PATH% - -set PATH=%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%;%PATH% - -if not "%MYRIAD%"=="" ( - if exist "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\mvnc" ( - echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%" intel64 /S /I /Y /R - xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%" intel64 /S /I /Y /R - ) - - if exist "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\..\bin\mvnc" ( - echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\..\bin\*" intel64 /S /I /Y /R - xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\..\bin\*" intel64 /S /I /Y /R - ) -) - -if not "%VPU_FIRMWARE_MA2X8X%"=="" ( - if exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%" ( - echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" intel64 /S /I /Y /R - xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" intel64 /S /I /Y /R - ) -) - -set PATH=%DL_SDK_TEMP%\test_dependencies\HDDL\%HDDL_FILENAME%%HDDL%\..\bin;%PATH% - -if not "%HDDL%"=="" ( - set HDDL_INSTALL_DIR=%DL_SDK_TEMP%\test_dependencies\HDDL\%HDDL_FILENAME%%HDDL%\.. - if exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%" ( - echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" %HDDL_INSTALL_DIR%\lib /S /I /Y /R - xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" "%HDDL_INSTALL_DIR%\lib" /S /I /Y /R - ) -) - -echo PATH=%PATH% - -endlocal & set PATH=%PATH% - -exit /B %ERRORLEVEL% - -:DownloadFile -set DEPENDENCY=%~1 -set DEPENDENCY_URL=%~2 -set DEPENDENCY_FILE=%~nx2 -set DEPENDENCY_EXT=%~x2 - -if not "%DEPENDENCY_URL%"=="" ( - if not exist "%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE%" ( - mkdir "%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE%" - for /L %%a in (1,1,10) do ( - powershell -command "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; iwr -outf '%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\_%DEPENDENCY_FILE%' %DEPENDENCY_URL%" - call "C:\Program Files\7-Zip\7z.exe" x -y %DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\_%DEPENDENCY_FILE% -o%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE% - if !ERRORLEVEL! equ 0 goto :DownloadFileContinue - timeout /T 15 - ) - ) -) -goto:eof - -:DownloadFileContinue -if "%DEPENDENCY_EXT%" == ".txz" call "C:\Program Files\7-Zip\7z.exe" x -y %DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE%\_%DEPENDENCY_FILE:txz=tar% -o%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE% -del "%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\_%DEPENDENCY_FILE%" /F /Q -goto:eof diff --git a/inference-engine/scripts/dependencies.sh b/inference-engine/scripts/dependencies.sh deleted file mode 100755 index 640ed7e9947..00000000000 --- a/inference-engine/scripts/dependencies.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/bin/bash - -# Copyright (C) 2018-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -if [ "$1" = "" ]; then - dep_arch=64 - else - dep_arch=$1 -fi - -item_path="" -add_path() { - component=$1 - item_path="" - echo "Read file: dependencies_${dep_arch}.txt" - grep_component="\b${component}\b" - - if [[ $(grep -m 1 "$grep_component" "dependencies_${dep_arch}.txt") ]];then - archive_path=$(grep -m 1 "$grep_component" "dependencies_${dep_arch}.txt" | sed -E "s/${component}=//g") - library_rpath=$(grep -m 1 "$grep_component" "ld_library_rpath_${dep_arch}.txt" | sed -E "s/${component}=//g") - filename=$(basename "$archive_path") - if [[ (! -d "$DL_SDK_TEMP/test_dependencies/$component/$filename") || - (-d "$DL_SDK_TEMP/test_dependencies/$component/$filename" && - ! $(ls -A "$DL_SDK_TEMP/test_dependencies/$component/$filename")) ]]; then - mkdir -p "$DL_SDK_TEMP/test_dependencies/$component/$filename" - wget -q "$archive_path" -O "$DL_SDK_TEMP/test_dependencies/$filename" - if [[ $filename == *.zip ]]; then - unzip "$DL_SDK_TEMP/test_dependencies/$filename" -d "$DL_SDK_TEMP/test_dependencies/$component/$filename" - elif [[ $filename == *.7z ]]; then - 7za x -y "$DL_SDK_TEMP/test_dependencies/$filename" -o "$DL_SDK_TEMP/test_dependencies/$component/$filename" - else - tar xf "$DL_SDK_TEMP/test_dependencies/$filename" -C "$DL_SDK_TEMP/test_dependencies/$component/$filename" - fi - rm "$DL_SDK_TEMP/test_dependencies/$filename" - fi - item_path=$component/$filename/$library_rpath - fi -} - -runtimes=(MKL CLDNN MYRIAD GNA DLIA OPENCV VPU_FIRMWARE_USB-MA2X8X HDDL OMP TBB AOCL_RTE LIBUSB) - -export_library_path() { - export LD_LIBRARY_PATH=$DL_SDK_TEMP/test_dependencies/$1:$LD_LIBRARY_PATH -} - -export_env_variable() { - export "$2"="$DL_SDK_TEMP/test_dependencies/$1" -} - -ma2480_path="" -for i in "${runtimes[@]}" -do - add_path "$i" - export_library_path "$item_path" - if [ "$i" == "VPU_FIRMWARE_USB-MA2X8X" ] - then - ma2480_path="$item_path" - fi - if [ "$i" == "HDDL" ] - then - cp -r "$DL_SDK_TEMP/test_dependencies/$ma2480_path/"* "$DL_SDK_TEMP/test_dependencies/$item_path" - export HDDL_INSTALL_DIR="$DL_SDK_TEMP/test_dependencies/$item_path/.." - fi -done - -echo DATA_PATH="$DATA_PATH" -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:lib:/usr/local/lib \ No newline at end of file diff --git a/inference-engine/src/CMakeLists.txt b/inference-engine/src/CMakeLists.txt index ceb077de7a7..8b198bfbf28 100644 --- a/inference-engine/src/CMakeLists.txt +++ b/inference-engine/src/CMakeLists.txt @@ -47,9 +47,8 @@ add_subdirectory(snippets) add_custom_target(ie_libraries ALL DEPENDS inference_engine_transformations inference_engine_legacy inference_engine inference_engine_preproc - inference_engine_ir_v7_reader inference_engine_ir_reader - inference_engine_lp_transformations inference_engine_snippets - ir_frontend) + inference_engine_ir_v7_reader ir_ngraph_frontend + inference_engine_lp_transformations inference_engine_snippets) if(NGRAPH_ONNX_FRONTEND_ENABLE) add_dependencies(ie_libraries onnx_ngraph_frontend) diff --git a/inference-engine/src/cldnn_engine/cldnn_config.cpp b/inference-engine/src/cldnn_engine/cldnn_config.cpp index 28aa8063e47..533e32271bd 100644 --- a/inference-engine/src/cldnn_engine/cldnn_config.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_config.cpp @@ -28,7 +28,7 @@ namespace CLDNNPlugin { static void createDirectory(std::string _path) { #if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) - std::wstring widepath = FileUtils::multiByteCharToWString(_path.c_str()); + std::wstring widepath = ov::util::string_to_wstring(_path.c_str()); const wchar_t* path = widepath.c_str(); #else const char* path = _path.c_str(); diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.cpp b/inference-engine/src/cldnn_engine/cldnn_graph.cpp index 75ea9d2a251..49d5212d37e 100644 --- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp @@ -60,8 +60,6 @@ CLDNNGraph::CLDNNGraph(std::shared_ptr graph, uint16_t stream_id) void CLDNNGraph::UpdateLayersMaps() { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdateLayersMaps"); primitiveIDs = m_program->primitiveIDs; - primitivesToIRLayersMap = m_program->primitivesToIRLayersMap; - IRToNgraphLayersMap = m_program->IRToNgraphLayersMap; prevPrimitiveIDs = m_program->prevPrimitiveIDs; profilingIDs = m_program->profilingIDs; perfMap = m_program->perfMap; @@ -219,25 +217,6 @@ std::shared_ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s return res; }; - auto split_string = [](std::string src, std::string delimiter = ",") -> std::vector { - std::vector tokens; - std::string tokenBuf; - size_t prev = 0, pos = 0, srcLength = src.length(), delimLength = delimiter.length(); - do { - pos = src.find(delimiter, prev); - if (pos == std::string::npos) { - pos = srcLength; - } - tokenBuf = src.substr(prev, pos - prev); - if (!tokenBuf.empty()) { - tokens.push_back(tokenBuf); - } - prev = pos + delimLength; - } while (pos < srcLength && prev < srcLength); - - return tokens; - }; - auto remove_type_from_name = [](const std::string& name) -> std::string { auto it = std::find(name.begin(), name.end(), ':'); if (it == name.end() || (it + 1) == name.end()) @@ -246,22 +225,13 @@ std::shared_ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s return std::string((it+1), name.end()); }; + auto extIdMap = GetNetwork()->get_ext_id_mapping(); + auto find_origin_layers = [&](const std::string& name) -> std::vector { - if (primitivesToIRLayersMap.find(name) == primitivesToIRLayersMap.end()) + if (extIdMap.find(name) == extIdMap.end()) { return {}; - - auto cnn_names = primitivesToIRLayersMap.at(name); - std::vector res; - - for (auto& cnn_name : cnn_names) { - if (IRToNgraphLayersMap.find(cnn_name) != IRToNgraphLayersMap.end()) { - auto ngraph_names = split_string(IRToNgraphLayersMap.at(cnn_name)); - res.insert(res.end(), ngraph_names.begin(), ngraph_names.end()); - } else { - res.push_back(cnn_name); - } } - return res; + return { extIdMap.at(name) }; }; auto get_inputs = [&] (const cldnn::primitive_info& prim_info) { @@ -599,13 +569,21 @@ std::map CLDNNGraph::G auto allIds = GetNetwork()->get_all_primitive_org_ids(); auto executedPrimitives = GetNetwork()->get_executed_primitives(); auto primitivesInfo = GetNetwork()->get_primitives_info(); + auto extIdMap = GetNetwork()->get_ext_id_mapping(); - auto getUpperCaseName = [&](std::string name) { + auto getUpperCaseName = [](std::string name) { if (name.length() > 0) name[0] = toupper(name[0]); return name; }; + auto getClearName = [](std::string name) { + if (name.find(":") != std::string::npos) { + name = name.substr(name.find(":") + 1, name.length()); + } + return name; + }; + auto getFromProfiling = [&](std::string primId) -> bool { auto perfIter = perfMap.find(primId); @@ -696,10 +674,7 @@ std::map CLDNNGraph::G } } - std::string layerName = primId; - if (primId.find(":") != std::string::npos) { - layerName = primId.substr(primId.find(":") + 1, primId.length()); - } + std::string layerName = getClearName(primId); for (auto& pi : primitivesInfo) { if (pi.original_id == primId) { @@ -735,10 +710,27 @@ std::map CLDNNGraph::G } // Step 3. Checking primitives which has been deleted from execution order but added by clDNNPlugin - for (auto& primId : profilingIDs) + for (auto& primId : profilingIDs) { if (std::find(allIds.begin(), allIds.end(), primId) == allIds.end()) { getFromProfiling(primId); } + } + + for (auto& p : extIdMap) { + if (p.first.find(p.second) != std::string::npos) { + continue; + } + auto first_res = result.find(getClearName(p.first)); + auto second_res = result.find(getClearName(p.second)); + + if (first_res != result.end() && second_res != result.end() && first_res != second_res) { + std::swap(first_res->second.cpu_uSec, second_res->second.cpu_uSec); + std::swap(first_res->second.realTime_uSec, second_res->second.realTime_uSec); + std::swap(first_res->second.status, second_res->second.status); + std::swap(first_res->second.exec_type, second_res->second.exec_type); + std::swap(first_res->second.execution_index, second_res->second.execution_index); + } + } return result; } diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.h b/inference-engine/src/cldnn_engine/cldnn_graph.h index 206c58aaccd..d220c4009f5 100644 --- a/inference-engine/src/cldnn_engine/cldnn_graph.h +++ b/inference-engine/src/cldnn_engine/cldnn_graph.h @@ -61,8 +61,6 @@ protected: InferenceEngine::gpu::ClContext::Ptr m_context; std::vector> m_networks; std::map primitiveIDs; - std::map> primitivesToIRLayersMap; - std::map IRToNgraphLayersMap; std::map> prevPrimitiveIDs; std::map> perfMap; diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp index 9a55217975c..6c0e2527eac 100644 --- a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp @@ -12,6 +12,7 @@ #include "cldnn_remote_context.h" #include "cldnn_executable_network.h" #include "cldnn_itt.h" +#include "cldnn/runtime/debug_configuration.hpp" #include #include @@ -622,6 +623,10 @@ void CLDNNInferRequest::allocate_inputs() { IE_THROW() << "Input layout for " << name << " is not found"; } + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(debug_config->verbose >= 2) { + GPU_DEBUG_COUT << "[" << name << ": input blob]" << std::endl; + } if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) { TensorDesc desc_fp32 = desc; desc_fp32.setPrecision(Precision::FP32); @@ -673,6 +678,10 @@ void CLDNNInferRequest::allocate_outputs() { const cldnn::layout output_layout = m_graph->GetNetwork()->get_output_memory(outputID)->get_layout(); const TensorDesc& desc = no.second->getTensorDesc(); + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(debug_config->verbose >= 2) { + GPU_DEBUG_COUT << "[" << no.first << ": output blob]" << std::endl; + } auto blobPtr = create_device_blob(desc, output_layout); _deviceOutputs[no.first] = blobPtr; _outputs[no.first] = blobPtr; diff --git a/inference-engine/src/cldnn_engine/cldnn_program.cpp b/inference-engine/src/cldnn_engine/cldnn_program.cpp index 7386501f0b1..22a55d743bd 100644 --- a/inference-engine/src/cldnn_engine/cldnn_program.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp @@ -284,14 +284,12 @@ std::vector Program::GetInputPrimitiveIDs(const std::shared void Program::AddPrimitiveToProfiler(const std::shared_ptr& op, cldnn::primitive_id customOutputId) { auto id = layer_type_name_ID(op); - primitivesToIRLayersMap[id] = { op->get_friendly_name() }; primitiveIDs[id] = customOutputId.empty() ? id : customOutputId; profilingIDs.push_back(id); } void Program::AddPrimitiveToProfiler(cldnn::primitive_id id, const std::shared_ptr& op, cldnn::primitive_id customOutputId) { - primitivesToIRLayersMap[id] = { op->get_friendly_name() }; primitiveIDs[id] = customOutputId.empty() ? id : customOutputId; profilingIDs.push_back(id); } @@ -299,7 +297,6 @@ void Program::AddPrimitiveToProfiler(cldnn::primitive_id id, const std::shared_p void Program::AddInnerPrimitiveToProfiler(cldnn::primitive_id id, cldnn::primitive_id parentId, const std::shared_ptr& op) { InitProfileInfo(id, layer_type_lower(op), false, InferenceEngine::InferenceEngineProfileInfo::EXECUTED, parentId); - primitivesToIRLayersMap[id] = { op->get_friendly_name() }; primitiveIDs[id] = id; profilingIDs.push_back(id); } @@ -328,28 +325,24 @@ void Program::InitProfileInfo(const std::string& layerName, // TODO: Does it make sense to add such method to ngraph core? bool IsNodeOnConstPath(const std::shared_ptr& node) { - std::list> nodes_to_process = { node }; - while (!nodes_to_process.empty()) { - auto current_node = nodes_to_process.front(); - nodes_to_process.pop_front(); - - for (size_t i = 0; i < current_node->get_input_size(); i++) { - auto input_node = current_node->get_input_node_shared_ptr(i); - - // If input is constant, then drop if from the processing list - if (std::dynamic_pointer_cast(input_node) != nullptr) - continue; - - // If the node doesn't have any parents and it's not a constant, then we deal with dynamic path - if (input_node->get_input_size() == 0) { + std::set> nodes_processed = {}; + std::function&)> is_const_node = [&nodes_processed, &is_const_node](const std::shared_ptr& node) { + if (nodes_processed.count(node)) return true; + nodes_processed.insert(node); + // If input is constant, then drop if from the processing list + if (std::dynamic_pointer_cast(node) != nullptr) + return true; + // If the node doesn't have any parents and it's not a constant, then we deal with dynamic path + if (node->get_input_size() == 0) + return false; + for (size_t i = 0; i < node->get_input_size(); i++) { + auto input_node = node->get_input_node_shared_ptr(i); + if (!is_const_node(input_node)) return false; - } - - nodes_to_process.insert(nodes_to_process.end(), input_node); } - } - - return true; + return true; + }; + return is_const_node(node); } } // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_program.h b/inference-engine/src/cldnn_engine/cldnn_program.h index 23b6313cac9..f499104a9b9 100644 --- a/inference-engine/src/cldnn_engine/cldnn_program.h +++ b/inference-engine/src/cldnn_engine/cldnn_program.h @@ -76,8 +76,6 @@ public: static const cldnn::primitive_id m_postCustomLayerTag; std::map primitiveIDs; - std::map> primitivesToIRLayersMap; - std::map IRToNgraphLayersMap; std::map> prevPrimitiveIDs; std::map> perfMap; diff --git a/inference-engine/src/cldnn_engine/ops/batch_to_space.cpp b/inference-engine/src/cldnn_engine/ops/batch_to_space.cpp index e46643f0617..d67f9e9888a 100644 --- a/inference-engine/src/cldnn_engine/ops/batch_to_space.cpp +++ b/inference-engine/src/cldnn_engine/ops/batch_to_space.cpp @@ -42,7 +42,8 @@ void CreateBatchToSpaceOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(batchToSpacePrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/broadcast.cpp b/inference-engine/src/cldnn_engine/ops/broadcast.cpp index 6d6e6303200..16264a31fea 100644 --- a/inference-engine/src/cldnn_engine/ops/broadcast.cpp +++ b/inference-engine/src/cldnn_engine/ops/broadcast.cpp @@ -31,8 +31,13 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptrget_input_element_type(0)); - auto reorderPrim = cldnn::reorder(reorderName, inputPrimitive, targetFormat, targetDatatype); - + auto reorderPrim = cldnn::reorder(reorderName, + inputPrimitive, + targetFormat, + targetDatatype, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name()); p.AddPrimitive(reorderPrim); p.AddInnerPrimitiveToProfiler(reorderName, layerName, op); @@ -66,7 +71,7 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(reshapePrim); p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op); @@ -75,7 +80,9 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptrget_output_shape(0))); + CldnnTensorFromIEDims(op->get_output_shape(0)), + {}, + op->get_friendly_name()); p.AddPrimitive(broadcastPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/concat.cpp b/inference-engine/src/cldnn_engine/ops/concat.cpp index 453e9996530..fc1e51d1981 100644 --- a/inference-engine/src/cldnn_engine/ops/concat.cpp +++ b/inference-engine/src/cldnn_engine/ops/concat.cpp @@ -45,7 +45,8 @@ void CreateConcatOp(Program& p, const std::shared_ptr& o layerName, inputPrimitives, GetConcatAxis(op->get_axis(), op->get_input_shape(0).size()), - DataTypeFromPrecision(op->get_output_element_type(0))); + DataTypeFromPrecision(op->get_output_element_type(0)), + op->get_friendly_name()); p.AddPrimitive(concatPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/constant.cpp b/inference-engine/src/cldnn_engine/ops/constant.cpp index fea42f31d98..58bc0a18d4a 100644 --- a/inference-engine/src/cldnn_engine/ops/constant.cpp +++ b/inference-engine/src/cldnn_engine/ops/constant.cpp @@ -18,6 +18,7 @@ #include "ngraph/op/util/op_types.hpp" #include "cldnn/primitives/data.hpp" +#include "cldnn/runtime/debug_configuration.hpp" namespace CLDNNPlugin { @@ -169,6 +170,10 @@ void CreateConstantOp(Program& p, const std::shared_ptrsecond; } else { + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(debug_config->verbose >= 2) { + GPU_DEBUG_COUT << "[" << initialconstPrimID << ": constant]" << std::endl; + } cldnn::memory::ptr mem = p.GetEngine().allocate_memory(constLayout, false); auto& stream = p.GetEngine().get_program_stream(); cldnn::mem_lock lock{mem, stream}; @@ -199,7 +204,7 @@ void CreateConstantOp(Program& p, const std::shared_ptrget_friendly_name())); p.blobMemCache[std::make_pair(data, constDims)] = initialconstPrimID; constPrimID = initialconstPrimID; } diff --git a/inference-engine/src/cldnn_engine/ops/convert.cpp b/inference-engine/src/cldnn_engine/ops/convert.cpp index 6af5bee759d..603eb26abeb 100644 --- a/inference-engine/src/cldnn_engine/ops/convert.cpp +++ b/inference-engine/src/cldnn_engine/ops/convert.cpp @@ -19,8 +19,13 @@ void CreateConvertLikeOp(Program& p, const std::shared_ptrget_input_element_type(1)); - auto reorderPrim = cldnn::reorder(layerName, inputPrimitives[0], cldnn::format::any, outDataType); - + auto reorderPrim = cldnn::reorder(layerName, + inputPrimitives[0], + cldnn::format::any, + outDataType, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name()); p.AddPrimitive(reorderPrim); p.AddPrimitiveToProfiler(op); } @@ -32,7 +37,13 @@ void CreateConvertOp(Program& p, const std::shared_ptr& auto outDataType = DataTypeFromPrecision(op->get_destination_type()); - auto reorderPrim = cldnn::reorder(layerName, inputPrimitives[0], cldnn::format::any, outDataType); + auto reorderPrim = cldnn::reorder(layerName, + inputPrimitives[0], + cldnn::format::any, + outDataType, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name()); p.AddPrimitive(reorderPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/convolution.cpp b/inference-engine/src/cldnn_engine/ops/convolution.cpp index 83f536a68b7..e8c44693dbd 100644 --- a/inference-engine/src/cldnn_engine/ops/convolution.cpp +++ b/inference-engine/src/cldnn_engine/ops/convolution.cpp @@ -84,7 +84,8 @@ void CreateGroupConvolutionOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(convPrim); p.AddPrimitiveToProfiler(op); @@ -112,7 +113,8 @@ void CreateConvolutionOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(convPrim); p.AddPrimitiveToProfiler(op); @@ -146,7 +148,8 @@ void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(permutePrim); p.AddInnerPrimitiveToProfiler(permuteName, layerName, op); @@ -159,14 +162,15 @@ void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptrget_pads_begin(), op->get_dilations(), op->get_strides(), 1); auto deconvPrim = cldnn::deconvolution(layerName, - inputs[0], - weights, - {}, - params.groups, - params.stride, - params.padding, - CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()), - weights_have_group_dim); + inputs[0], + weights, + {}, + params.groups, + params.stride, + params.padding, + CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()), + weights_have_group_dim, + op->get_friendly_name()); p.AddPrimitive(deconvPrim); p.AddPrimitiveToProfiler(op); @@ -202,7 +206,8 @@ void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(permutePrim); p.AddInnerPrimitiveToProfiler(permuteName, layerName, op); @@ -214,14 +219,15 @@ void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_ptrget_output_tensor(0).get_shape()), - weights_have_group_dim); + inputs[0], + weights, + {}, + params.groups, + params.stride, + params.padding, + CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()), + weights_have_group_dim, + op->get_friendly_name()); p.AddPrimitive(deconvPrim); p.AddPrimitiveToProfiler(op); @@ -247,7 +253,8 @@ void CreateDeformableConvolutionOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(convPrim); p.AddPrimitiveToProfiler(op); @@ -280,7 +287,8 @@ void CreateDeformableConvolutionOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(defConvPrimInterp); p.AddInnerPrimitiveToProfiler(defConvLayerNameInterp, defConvLayerNameConv, op); auto defConvPrim = cldnn::deformable_conv(defConvLayerNameConv, @@ -288,7 +296,8 @@ void CreateDeformableConvolutionOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(defConvPrim); p.AddPrimitiveToProfiler(defConvLayerNameConv, op); } @@ -313,7 +322,8 @@ void CreateBinaryConvolutionOp(Program& p, const std::shared_ptrget_pad_value(), - calc_precision); + calc_precision, + op->get_friendly_name()); p.AddPrimitive(convPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/ctc_greedy_decoder.cpp b/inference-engine/src/cldnn_engine/ops/ctc_greedy_decoder.cpp index c8bd8d54e07..4ff72462a47 100644 --- a/inference-engine/src/cldnn_engine/ops/ctc_greedy_decoder.cpp +++ b/inference-engine/src/cldnn_engine/ops/ctc_greedy_decoder.cpp @@ -11,6 +11,7 @@ #include "cldnn/primitives/ctc_greedy_decoder.hpp" #include "cldnn/primitives/reorder.hpp" #include "cldnn/primitives/mutable_data.hpp" +#include "cldnn/runtime/debug_configuration.hpp" #include "transformations/utils/utils.hpp" @@ -33,7 +34,10 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name()); p.AddPrimitive(preprocessPrim); p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op); reorderedInputs[portIndex] = (reorderPrimName); @@ -70,11 +74,16 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptrget_output_shape(1).size()), CldnnTensorFromIEDims(op->get_output_shape(1))); + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(debug_config->verbose >= 2) { + GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl; + } shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayout)); cldnn::primitive_id ctc_gd_mutable_id_w = layer_type_name_ID(op) + "_md_write"; - auto ctc_gd_mutable_prim = cldnn::mutable_data(ctc_gd_mutable_id_w, shared_memory[0]); - p.primitivesToIRLayersMap[ctc_gd_mutable_id_w] = { op->get_friendly_name() }; + auto ctc_gd_mutable_prim = cldnn::mutable_data(ctc_gd_mutable_id_w, + shared_memory[0], + op->get_friendly_name()); p.primitiveIDs[ctc_gd_mutable_id_w] = ctc_gd_mutable_id_w; p.AddPrimitive(ctc_gd_mutable_prim); reorderedInputs.push_back(ctc_gd_mutable_id_w); @@ -86,7 +95,8 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptrget_output_shape(0))); + CldnnTensorFromIEDims(op->get_output_shape(0)), + op->get_friendly_name()); // clDNN primitive supports only i32 as output data type primitive.output_data_type = DataTypeFromPrecision(ngraph::element::i32); @@ -99,8 +109,10 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptrget_friendly_name() }; + auto ctc_gd_mutable_prim_r = cldnn::mutable_data(ctc_gd_mutable_id_r, + { CTCGreedyDecoderLayerName }, + shared_memory[0], + op->get_friendly_name()); p.primitiveIDs[ctc_gd_mutable_id_r] = ctc_gd_mutable_id_r; p.AddPrimitive(ctc_gd_mutable_prim_r); } diff --git a/inference-engine/src/cldnn_engine/ops/cum_sum.cpp b/inference-engine/src/cldnn_engine/ops/cum_sum.cpp index 1bdcec2957e..9f8e2a463f2 100644 --- a/inference-engine/src/cldnn_engine/ops/cum_sum.cpp +++ b/inference-engine/src/cldnn_engine/ops/cum_sum.cpp @@ -63,7 +63,8 @@ void CreateCumSumOp(Program& p, const std::shared_ptr& o inputPrimitives[0], GetCumSumAxis(axis, rank), exclusive, - reverse); + reverse, + op->get_friendly_name()); p.AddPrimitive(primitive); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/custom.cpp b/inference-engine/src/cldnn_engine/ops/custom.cpp index 85945bfbdb9..c967100d1f9 100644 --- a/inference-engine/src/cldnn_engine/ops/custom.cpp +++ b/inference-engine/src/cldnn_engine/ops/custom.cpp @@ -145,7 +145,10 @@ void CreateCustomOp(Program& p, const std::shared_ptr& op, CLDNNCu reorderPrimName, inputPrimitives[param.portIndex], param.format, - DataTypeFromPrecision(op->get_input_element_type(param.portIndex))); + DataTypeFromPrecision(op->get_input_element_type(param.portIndex)), + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name()); p.AddPrimitive(preprocessPrim); p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op); @@ -229,7 +232,8 @@ void CreateCustomOp(Program& p, const std::shared_ptr& op, CLDNNCu customLayer->CompilerOptions(), outputLayout, gws, - lws); + lws, + op->get_friendly_name()); auto prevLayerName = genericLayerName; if (outputLayout.format != cldnn::format::any) { @@ -239,7 +243,10 @@ void CreateCustomOp(Program& p, const std::shared_ptr& op, CLDNNCu cldnn::reorder(reorderPrimName, genericLayerName, DefaultFormatForDims(op->get_output_shape(0).size()), - customPrim.output_layout.data_type)); + customPrim.output_layout.data_type, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name())); prevLayerName = reorderPrimName; p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op); } diff --git a/inference-engine/src/cldnn_engine/ops/depth_to_space.cpp b/inference-engine/src/cldnn_engine/ops/depth_to_space.cpp index b53262ab23d..aa762a356d7 100644 --- a/inference-engine/src/cldnn_engine/ops/depth_to_space.cpp +++ b/inference-engine/src/cldnn_engine/ops/depth_to_space.cpp @@ -33,7 +33,8 @@ void CreateDepthToSpaceOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(depthToSpacePrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/detection_output.cpp b/inference-engine/src/cldnn_engine/ops/detection_output.cpp index aa2b505f0e7..80616090655 100644 --- a/inference-engine/src/cldnn_engine/ops/detection_output.cpp +++ b/inference-engine/src/cldnn_engine/ops/detection_output.cpp @@ -75,7 +75,8 @@ void CreateDetectionOutputOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(detectionPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/eltwise.cpp b/inference-engine/src/cldnn_engine/ops/eltwise.cpp index 817512d5bf9..f486bff593d 100644 --- a/inference-engine/src/cldnn_engine/ops/eltwise.cpp +++ b/inference-engine/src/cldnn_engine/ops/eltwise.cpp @@ -46,7 +46,13 @@ void CreateElementwiseOp(Program& p, const std::shared_ptr& op, cl if (targetFormat.value != DefaultFormatForDims(inputRank).value) { auto reorderName = layerName + "_cldnn_in" + std::to_string(i) + "_reorder"; auto targetDatatype = DataTypeFromPrecision(op->get_input_element_type(i)); - auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype); + auto reorderPrim = cldnn::reorder(reorderName, + inputPrimitives[i], + targetFormat, + targetDatatype, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name()); p.AddPrimitive(reorderPrim); p.AddInnerPrimitiveToProfiler(reorderName, layerName, op); @@ -61,7 +67,7 @@ void CreateElementwiseOp(Program& p, const std::shared_ptr& op, cl auto targetShape = CldnnTensorFromIEDims(inputShape); - auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape); + auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape, op->get_friendly_name()); p.AddPrimitive(reshapePrim); p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op); @@ -74,7 +80,8 @@ void CreateElementwiseOp(Program& p, const std::shared_ptr& op, cl inputPrimitives, mode, {}, - out_dt); + out_dt, + op->get_friendly_name()); p.AddPrimitive(eltwisePrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/embedding_bag.cpp b/inference-engine/src/cldnn_engine/ops/embedding_bag.cpp index 2e97a60aebf..369c0eca44a 100644 --- a/inference-engine/src/cldnn_engine/ops/embedding_bag.cpp +++ b/inference-engine/src/cldnn_engine/ops/embedding_bag.cpp @@ -49,7 +49,10 @@ void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptr(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name()); p.AddPrimitive(preprocessPrim); p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op); reorderedInputs[portIndex] = (reorderPrimName); @@ -62,7 +65,8 @@ void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptrget_output_shape(0)), - defaultIndex); + defaultIndex, + op->get_friendly_name()); p.AddPrimitive(embeddingBagPrim); p.AddPrimitiveToProfiler(op); @@ -86,7 +90,10 @@ void CreateEmbeddingBagPackedSumOp(Program& p, const std::shared_ptr(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name()); p.AddPrimitive(preprocessPrim); p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op); reorderedInputs[portIndex] = (reorderPrimName); @@ -98,7 +105,9 @@ void CreateEmbeddingBagPackedSumOp(Program& p, const std::shared_ptrget_output_shape(0))); + CldnnTensorFromIEDims(op->get_output_shape(0)), + -1, + op->get_friendly_name()); p.AddPrimitive(embeddingBagPrim); p.AddPrimitiveToProfiler(op); @@ -140,7 +149,10 @@ void CreateEmbeddingSegmentsSumOp(Program& p, const std::shared_ptr(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name()); p.AddPrimitive(preprocessPrim); p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op); reorderedInputs[portIndex] = (reorderPrimName); @@ -153,7 +165,8 @@ void CreateEmbeddingSegmentsSumOp(Program& p, const std::shared_ptrget_output_shape(0)), - defaultIndex); + defaultIndex, + op->get_friendly_name()); p.AddPrimitive(embeddingBagPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/extract_image_patches.cpp b/inference-engine/src/cldnn_engine/ops/extract_image_patches.cpp index 23b5f014320..088a0fcd413 100644 --- a/inference-engine/src/cldnn_engine/ops/extract_image_patches.cpp +++ b/inference-engine/src/cldnn_engine/ops/extract_image_patches.cpp @@ -38,7 +38,8 @@ void CreateExtractImagePatchesOp(Program& p, const std::shared_ptrget_output_shape(0))); + CldnnTensorFromIEDims(op->get_output_shape(0)), + op->get_friendly_name()); p.AddPrimitive(extractImagePatchesPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/fake_quantize.cpp b/inference-engine/src/cldnn_engine/ops/fake_quantize.cpp index 345a70f34bb..52ea33e5d3d 100644 --- a/inference-engine/src/cldnn_engine/ops/fake_quantize.cpp +++ b/inference-engine/src/cldnn_engine/ops/fake_quantize.cpp @@ -31,7 +31,8 @@ void CreateFakeQuantizeOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(quantizationPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/gather tree.cpp b/inference-engine/src/cldnn_engine/ops/gather tree.cpp index 6b73131fd29..5476aa40d97 100644 --- a/inference-engine/src/cldnn_engine/ops/gather tree.cpp +++ b/inference-engine/src/cldnn_engine/ops/gather tree.cpp @@ -30,7 +30,10 @@ void CreateGatherTreeOp(Program& p, const std::shared_ptr(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name()); p.AddPrimitive(preprocessPrim); p.AddInnerPrimitiveToProfiler(reorderPrimName, layerName, op); reorderedInputs[portIndex] = reorderPrimName; @@ -43,7 +46,8 @@ void CreateGatherTreeOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(gatherTreePrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/gather.cpp b/inference-engine/src/cldnn_engine/ops/gather.cpp index d22258e0673..bcf632f3194 100644 --- a/inference-engine/src/cldnn_engine/ops/gather.cpp +++ b/inference-engine/src/cldnn_engine/ops/gather.cpp @@ -77,7 +77,10 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr& op, const int64_t auto preprocessPrim = cldnn::reorder(reorderPrimName, inputPrimitives[portIndex], targetFormat, - cldnn::data_types::i32); + cldnn::data_types::i32, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name()); p.AddPrimitive(preprocessPrim); p.AddInnerPrimitiveToProfiler(reorderPrimName, layerName, op); reorderedInputs[portIndex] = reorderPrimName; @@ -94,7 +97,8 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr& op, const int64_t outLayout, CldnnTensorFromIEDims(op->get_output_shape(0)), batch_dim, - support_neg_ind); + support_neg_ind, + op->get_friendly_name()); p.AddPrimitive(gatherPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/gather_elements.cpp b/inference-engine/src/cldnn_engine/ops/gather_elements.cpp index d6138280750..50c25c37f1c 100644 --- a/inference-engine/src/cldnn_engine/ops/gather_elements.cpp +++ b/inference-engine/src/cldnn_engine/ops/gather_elements.cpp @@ -55,7 +55,8 @@ void CreateGatherElementsOp(Program& p, const std::shared_ptrget_output_shape(0)), - GetGatherAxis(axis, rank)); + GetGatherAxis(axis, rank), + op->get_friendly_name()); p.AddPrimitive(primitive); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/gather_nd.cpp b/inference-engine/src/cldnn_engine/ops/gather_nd.cpp index cbdc5659bb3..266ada7f904 100644 --- a/inference-engine/src/cldnn_engine/ops/gather_nd.cpp +++ b/inference-engine/src/cldnn_engine/ops/gather_nd.cpp @@ -22,10 +22,11 @@ void CreateGatherNDOp(Program& p, const std::shared_ptrget_batch_dims(); auto primitive = cldnn::gather_nd(layerName, - inputPrimitives[0], - inputPrimitives[1], - indices_rank, - batch_dims); + inputPrimitives[0], + inputPrimitives[1], + indices_rank, + batch_dims, + op->get_friendly_name()); p.AddPrimitive(primitive); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/grn.cpp b/inference-engine/src/cldnn_engine/ops/grn.cpp index 960dd034947..c6d07fe6191 100644 --- a/inference-engine/src/cldnn_engine/ops/grn.cpp +++ b/inference-engine/src/cldnn_engine/ops/grn.cpp @@ -19,7 +19,8 @@ void CreateGRNOp(Program& p, const std::shared_ptr& op) { auto primitive = cldnn::grn(layerName, inputPrimitives[0], op->get_bias(), - DataTypeFromPrecision(op->get_output_element_type(0))); + DataTypeFromPrecision(op->get_output_element_type(0)), + op->get_friendly_name()); p.AddPrimitive(primitive); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/interpolate.cpp b/inference-engine/src/cldnn_engine/ops/interpolate.cpp index df99e6972ee..190032897b4 100644 --- a/inference-engine/src/cldnn_engine/ops/interpolate.cpp +++ b/inference-engine/src/cldnn_engine/ops/interpolate.cpp @@ -193,7 +193,8 @@ void CreateInterpolateOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(resamplePrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/loop.cpp b/inference-engine/src/cldnn_engine/ops/loop.cpp index 1ac452265b8..604f73b7e5a 100644 --- a/inference-engine/src/cldnn_engine/ops/loop.cpp +++ b/inference-engine/src/cldnn_engine/ops/loop.cpp @@ -29,11 +29,11 @@ using Loop = ngraph::op::v5::Loop; namespace CLDNNPlugin { template -static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) { +static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num, const cldnn::primitive_id& ext_prim_id) { auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } }); cldnn::mem_lock ptr{mem, p.GetEngine().get_program_stream()}; *ptr.begin() = num; - return {id, mem}; + return {id, mem, ext_prim_id}; } static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::shared_ptr& op, @@ -44,7 +44,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx)); cldnn::layout output_layout = cldnn::layout(precision, format, tensor); auto mem = p.GetEngine().allocate_memory(output_layout); - auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency + auto md = cldnn::mutable_data(id, {input}, mem, op->get_friendly_name()); // cldnn::data cannot set dependency return md; } @@ -161,8 +161,7 @@ void CreateLoopOp(Program& p, const std::shared_ptr& op) { } const cldnn::primitive_id num_iteration_id = layerName + "_numIteration"; { - cldnn::mutable_data num_iteration = CreateScalarData(p, num_iteration_id, 0); - p.primitivesToIRLayersMap[num_iteration_id] = { op->get_friendly_name() }; + cldnn::mutable_data num_iteration = CreateScalarData(p, num_iteration_id, 0, op->get_friendly_name()); p.primitiveIDs[num_iteration_id] = num_iteration_id; p.AddPrimitive(num_iteration); p.AddInnerPrimitiveToProfiler(num_iteration_id, layerName, op); @@ -216,7 +215,8 @@ void CreateLoopOp(Program& p, const std::shared_ptr& op) { back_edges, /* back edge mapping */ num_iterations, /* max iteration, i.e. length of iteration axis */ body_current_iteration_id, - body_execution_condition_id); + body_execution_condition_id, + op->get_friendly_name()); p.AddPrimitive(loopPrimitive); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/lrn.cpp b/inference-engine/src/cldnn_engine/ops/lrn.cpp index c13c17daaeb..28e55bf038f 100644 --- a/inference-engine/src/cldnn_engine/ops/lrn.cpp +++ b/inference-engine/src/cldnn_engine/ops/lrn.cpp @@ -38,7 +38,8 @@ void CreateLRNOp(Program& p, const std::shared_ptr& op) { static_cast(op->get_bias()), static_cast(op->get_alpha()), static_cast(op->get_beta()), - GetNormRegion(axis_value)); + GetNormRegion(axis_value), + op->get_friendly_name()); p.AddPrimitive(lrnPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/matmul.cpp b/inference-engine/src/cldnn_engine/ops/matmul.cpp index 3d09fc7fd4e..53b8fecd6c2 100644 --- a/inference-engine/src/cldnn_engine/ops/matmul.cpp +++ b/inference-engine/src/cldnn_engine/ops/matmul.cpp @@ -89,7 +89,8 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o auto permuteName = op->get_friendly_name() + "/transpose_b"; auto permutePrim = cldnn::permute(permuteName, weightsName, - cldnn_permute_order); + cldnn_permute_order, + op->get_friendly_name()); p.AddPrimitive(permutePrim); p.AddInnerPrimitiveToProfiler(permuteName, layerName, op); weightsName = permuteName; @@ -108,7 +109,8 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o auto permuteName = op->get_friendly_name() + "/transpose_a"; auto permutePrim = cldnn::permute(permuteName, inputName, - cldnn_permute_order); + cldnn_permute_order, + op->get_friendly_name()); p.AddPrimitive(permutePrim); p.AddInnerPrimitiveToProfiler(permuteName, layerName, op); inputName = permuteName; @@ -124,7 +126,10 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o IE_THROW() << "Inconsistent reshape in Matmul op: " << op->get_friendly_name(); auto reshapeInName = op->get_friendly_name() + suffix; - auto reshapeInPrim = cldnn::reshape(reshapeInName, inputName, CldnnTensorFromIEDims(reshapeSize)); + auto reshapeInPrim = cldnn::reshape(reshapeInName, + inputName, + CldnnTensorFromIEDims(reshapeSize), + op->get_friendly_name()); p.AddPrimitive(reshapeInPrim); p.AddInnerPrimitiveToProfiler(reshapeInName, layerName, op); return reshapeInName; @@ -144,6 +149,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o weightsName, "", DataTypeFromPrecision(op->get_output_element_type(0)), + op->get_friendly_name(), cldnn::padding(), input_rank); @@ -153,7 +159,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o if (reshape_fc) { auto outputShape = CldnnTensorFromIEDims(op->get_output_shape(0)); auto outReshapeName = layerName + "_cldnn_out_reshape"; - auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape); + auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape, op->get_friendly_name()); p.AddPrimitive(outReshapePrim); p.AddInnerPrimitiveToProfiler(outReshapeName, layerName, op); @@ -188,7 +194,13 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o if (targetFormat.value != DefaultFormatForDims(inputDimsN).value) { auto reorderName = layerName + "_cldnn_in" + std::to_string(i) + "_reorder"; auto targetDatatype = DataTypeFromPrecision(op->get_output_element_type(0)); - auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype); + auto reorderPrim = cldnn::reorder(reorderName, + inputPrimitives[i], + targetFormat, + targetDatatype, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name()); p.AddPrimitive(reorderPrim); p.AddInnerPrimitiveToProfiler(reorderName, layerName, op); @@ -227,7 +239,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o auto targetShape = gemmSpecificTensor(inputDims); - auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape); + auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape, op->get_friendly_name()); p.AddPrimitive(reshapePrim); p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op); @@ -248,7 +260,8 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o transA, transB, alpha, - beta); + beta, + op->get_friendly_name()); p.AddPrimitive(gemmPrim); @@ -258,7 +271,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr& o if (outDimsN < 4) { auto outputShape = CldnnTensorFromIEDims(outDims); auto outReshapeName = layerName + "_cldnn_out_reshape"; - auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape); + auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape, op->get_friendly_name()); p.AddPrimitive(outReshapePrim); p.AddInnerPrimitiveToProfiler(outReshapeName, layerName, op); diff --git a/inference-engine/src/cldnn_engine/ops/mvn.cpp b/inference-engine/src/cldnn_engine/ops/mvn.cpp index b9cb376a24e..abd2128326b 100644 --- a/inference-engine/src/cldnn_engine/ops/mvn.cpp +++ b/inference-engine/src/cldnn_engine/ops/mvn.cpp @@ -24,7 +24,8 @@ static void CreateCommonMVNOp(Program& p, const std::shared_ptr& o normalize_variance, eps, eps_inside_sqrt, - across_channels); + across_channels, + op->get_friendly_name()); p.AddPrimitive(mvnPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/non_max_suppression.cpp b/inference-engine/src/cldnn_engine/ops/non_max_suppression.cpp index 8adaa3cfa76..b5c2e05e711 100644 --- a/inference-engine/src/cldnn_engine/ops/non_max_suppression.cpp +++ b/inference-engine/src/cldnn_engine/ops/non_max_suppression.cpp @@ -12,6 +12,7 @@ #include "cldnn/primitives/reorder.hpp" #include "cldnn/primitives/mutable_data.hpp" #include "cldnn/primitives/non_max_suppression.hpp" +#include "cldnn/runtime/debug_configuration.hpp" namespace CLDNNPlugin { @@ -41,7 +42,10 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name()); p.AddPrimitive(preprocessPrim); p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op); reorderedInputs[portIndex] = (reorderPrimName); @@ -63,6 +67,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptrget_output_size(); std::vector shared_memory; + GPU_DEBUG_GET_INSTANCE(debug_config); switch (num_output) { case 3: { auto mutable_precision_second = op->get_output_element_type(2); @@ -74,11 +79,15 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptrget_output_shape(2).size()), CldnnTensorFromIEDims(op->get_output_shape(2))); + GPU_DEBUG_IF(debug_config->verbose >= 2) { + GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl; + } shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutSecond)); cldnn::primitive_id non_max_supression_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second"; - auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second, shared_memory.back()); - p.primitivesToIRLayersMap[non_max_supression_mutable_id_w_second] = { op->get_friendly_name() }; + auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second, + shared_memory.back(), + op->get_friendly_name()); p.primitiveIDs[non_max_supression_mutable_id_w_second] = non_max_supression_mutable_id_w_second; p.AddPrimitive(nms_mutable_prim_second); inputPrimitives.push_back(non_max_supression_mutable_id_w_second); @@ -91,11 +100,15 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr(outputIndices), 3, 1, 1)); + GPU_DEBUG_IF(debug_config->verbose >= 2) { + GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl; + } shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutFirst)); cldnn::primitive_id non_max_supression_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first"; - auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first, shared_memory.back()); - p.primitivesToIRLayersMap[non_max_supression_mutable_id_w_first] = { op->get_friendly_name() }; + auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first, + shared_memory.back(), + op->get_friendly_name()); p.primitiveIDs[non_max_supression_mutable_id_w_first] = non_max_supression_mutable_id_w_first; p.AddPrimitive(nms_mutable_prim_first); inputPrimitives.push_back(non_max_supression_mutable_id_w_first); @@ -112,7 +125,9 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr(outputIndices), op->m_center_point_box, - op->m_sort_result_descending); + op->m_sort_result_descending, + "", "", "", "", "", "", + op->get_friendly_name()); prim.output_data_type = DataTypeFromPrecision(out_type); @@ -136,15 +151,19 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptrget_friendly_name() }; + auto nms_mutable_prim_r_second = cldnn::mutable_data(non_max_supression_id_r_second, + { nonMaxSupressionLayerName }, + shared_memory.front(), + op->get_friendly_name()); p.primitiveIDs[non_max_supression_id_r_second] = non_max_supression_id_r_second; p.AddPrimitive(nms_mutable_prim_r_second); } case 2: { cldnn::primitive_id non_max_supression_id_r_first = layer_type_name_ID(op) + ".1"; - auto nms_mutable_prim_r_first = cldnn::mutable_data(non_max_supression_id_r_first, { nonMaxSupressionLayerName }, shared_memory.back()); - p.primitivesToIRLayersMap[non_max_supression_id_r_first] = { op->get_friendly_name() }; + auto nms_mutable_prim_r_first = cldnn::mutable_data(non_max_supression_id_r_first, + { nonMaxSupressionLayerName }, + shared_memory.back(), + op->get_friendly_name()); p.primitiveIDs[non_max_supression_id_r_first] = non_max_supression_id_r_first; p.AddPrimitive(nms_mutable_prim_r_first); } diff --git a/inference-engine/src/cldnn_engine/ops/normalize_l2.cpp b/inference-engine/src/cldnn_engine/ops/normalize_l2.cpp index 85f2eb95de8..315dee55952 100644 --- a/inference-engine/src/cldnn_engine/ops/normalize_l2.cpp +++ b/inference-engine/src/cldnn_engine/ops/normalize_l2.cpp @@ -45,14 +45,15 @@ void CreateNormalizeL2Op(Program& p, const std::shared_ptrget_data_ptr(), bufSize); auto scalesName = layerName + "_cldnn_input_scales"; - p.AddPrimitive(cldnn::data(scalesName, mem)); + p.AddPrimitive(cldnn::data(scalesName, mem, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(scalesName, layerName, op); auto normPrim = cldnn::normalize(layerName, inputPrimitives[0], scalesName, across_spatial, - eps); + eps, + op->get_friendly_name()); p.AddPrimitive(normPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/one_hot.cpp b/inference-engine/src/cldnn_engine/ops/one_hot.cpp index 3d792bda8ae..b7c4fe8a126 100644 --- a/inference-engine/src/cldnn_engine/ops/one_hot.cpp +++ b/inference-engine/src/cldnn_engine/ops/one_hot.cpp @@ -53,7 +53,8 @@ void CreateOneHotOp(Program& p, const std::shared_ptr& o DataTypeFromPrecision(op->get_output_element_type(0)), static_cast(axis), on_value, - off_value); + off_value, + op->get_friendly_name()); p.AddPrimitive(oneHotPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/pad.cpp b/inference-engine/src/cldnn_engine/ops/pad.cpp index 0d409414b58..40336df057c 100644 --- a/inference-engine/src/cldnn_engine/ops/pad.cpp +++ b/inference-engine/src/cldnn_engine/ops/pad.cpp @@ -66,7 +66,8 @@ void CreatePadOp(Program& p, const std::shared_ptr& op) { pads_begin, pads_end, border_mode, - pad_value); + pad_value, + op->get_friendly_name()); p.AddPrimitive(tilePrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/parameter.cpp b/inference-engine/src/cldnn_engine/ops/parameter.cpp index b68593dd0a5..6dd43841cf3 100644 --- a/inference-engine/src/cldnn_engine/ops/parameter.cpp +++ b/inference-engine/src/cldnn_engine/ops/parameter.cpp @@ -195,8 +195,8 @@ void CreateParameterOp(Program& p, const std::shared_ptrname()); + auto inputUV = cldnn::input_layout(uv_name, uv_layout, inputInfo->name()); p.AddPrimitive(inputY); p.inputLayouts.insert({ inputInfo->name() + "_Y" + std::to_string(i), y_layout }); @@ -205,20 +205,29 @@ void CreateParameterOp(Program& p, const std::shared_ptrname())); break; } case MEAN_IMAGE: { - p.AddPrimitive(cldnn::reorder(preprocessPrimID, y_name, uv_name, networkInputLayout, meanBlobID)); + p.AddPrimitive(cldnn::reorder(preprocessPrimID, + y_name, + uv_name, + networkInputLayout, + meanBlobID, + cldnn::reorder_mean_mode::subtract, + inputInfo->name())); break; } default: IE_THROW(Unexpected) << "Invalid mean variant in input " + inputName; break; } - p.primitivesToIRLayersMap[preprocessPrimID] = { inputInfo->name() }; - p.primitivesToIRLayersMap[y_name] = { inputInfo->name() }; - p.primitivesToIRLayersMap[uv_name] = { inputInfo->name() }; p.profilingIDs.push_back(preprocessPrimID); p.InitProfileInfo(preprocessPrimID, "Reorder"); p.primitiveIDs[inputName] = preprocessPrimID; // If it is batched blob, it will be overwritten afterwards. @@ -228,7 +237,7 @@ void CreateParameterOp(Program& p, const std::shared_ptr 1) { auto concatPrimID = "concat:" + inputName + Program::m_preProcessTag; - p.AddPrimitive(cldnn::concatenation(concatPrimID, reorders, cldnn::concatenation::along_b)); + p.AddPrimitive(cldnn::concatenation(concatPrimID, reorders, cldnn::concatenation::along_b, op->get_friendly_name())); p.primitiveIDs[inputName] = concatPrimID; } } else { @@ -237,20 +246,26 @@ void CreateParameterOp(Program& p, const std::shared_ptrname(), inputLayout }); - p.AddPrimitive(cldnn::input_layout(inputName, inputLayout)); - p.primitivesToIRLayersMap[inputName] = { inputInfo->name() }; + p.AddPrimitive(cldnn::input_layout(inputName, inputLayout, inputInfo->name())); switch (preProcess.getMeanVariant()) { case NONE: case MEAN_VALUE: { - p.AddPrimitive(cldnn::reorder(preprocessPrimID, inputName, networkInputLayout, meanValues)); + p.AddPrimitive(cldnn::reorder(preprocessPrimID, + inputName, + networkInputLayout, + meanValues, + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name())); break; } case MEAN_IMAGE: { p.AddPrimitive(cldnn::reorder(preprocessPrimID, - inputName, - networkInputLayout, - meanBlobID)); + inputName, + networkInputLayout, + meanBlobID, + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name())); break; } default: IE_THROW() << "Invalid mean variant in input " << inputName; diff --git a/inference-engine/src/cldnn_engine/ops/pooling.cpp b/inference-engine/src/cldnn_engine/ops/pooling.cpp index f1bf6952292..1e0db7bd204 100644 --- a/inference-engine/src/cldnn_engine/ops/pooling.cpp +++ b/inference-engine/src/cldnn_engine/ops/pooling.cpp @@ -70,7 +70,8 @@ void CreateAvgPoolOp(Program& p, const std::shared_ptr& params.stride, params.pad_begin, CldnnTensorFromIEDims(op->get_output_shape(0)), - DataTypeFromPrecision(op->get_output_element_type(0))); + DataTypeFromPrecision(op->get_output_element_type(0)), + op->get_friendly_name()); poolPrim.pad_end = params.pad_end; p.AddPrimitive(poolPrim); p.AddPrimitiveToProfiler(op); @@ -89,7 +90,8 @@ void CreateMaxPoolOp(Program& p, const std::shared_ptr& params.stride, params.pad_begin, CldnnTensorFromIEDims(op->get_output_shape(0)), - DataTypeFromPrecision(op->get_output_element_type(0))); + DataTypeFromPrecision(op->get_output_element_type(0)), + op->get_friendly_name()); poolPrim.pad_end = params.pad_end; p.AddPrimitive(poolPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/prior_box.cpp b/inference-engine/src/cldnn_engine/ops/prior_box.cpp index 6cf0aaa6535..43eb5a69941 100644 --- a/inference-engine/src/cldnn_engine/ops/prior_box.cpp +++ b/inference-engine/src/cldnn_engine/ops/prior_box.cpp @@ -54,7 +54,8 @@ void CreatePriorBoxClusteredOp(Program& p, const std::shared_ptrget_output_element_type(0))); + DataTypeFromPrecision(op->get_output_element_type(0)), + op->get_friendly_name()); p.AddPrimitive(priorBoxPrim); p.AddPrimitiveToProfiler(op); @@ -103,7 +104,8 @@ void CreatePriorBoxOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(priorBoxPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/proposal.cpp b/inference-engine/src/cldnn_engine/ops/proposal.cpp index d5b906e5e6e..9be2f1d4e99 100644 --- a/inference-engine/src/cldnn_engine/ops/proposal.cpp +++ b/inference-engine/src/cldnn_engine/ops/proposal.cpp @@ -9,6 +9,7 @@ #include "cldnn/primitives/proposal.hpp" #include "cldnn/primitives/mutable_data.hpp" +#include "cldnn/runtime/debug_configuration.hpp" namespace CLDNNPlugin { @@ -62,11 +63,16 @@ void CreateProposalOp(Program& p, const std::shared_ptrget_output_shape(1).size()), CldnnTensorFromIEDims(op->get_output_shape(1))); + GPU_DEBUG_GET_INSTANCE(debug_config); + GPU_DEBUG_IF(debug_config->verbose >= 2) { + GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl; + } auto shared_memory = p.GetEngine().allocate_memory(mutableLayout); cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(op) + "_md_write"; - auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w, shared_memory); - p.primitivesToIRLayersMap[proposal_mutable_id_w] = { op->get_friendly_name() }; + auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w, + shared_memory, + op->get_friendly_name()); p.primitiveIDs[proposal_mutable_id_w] = proposal_mutable_id_w; p.AddPrimitive(argmax_mutable_prim); inputPrimitives.push_back(proposal_mutable_id_w); @@ -96,13 +102,16 @@ void CreateProposalOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(proposalPrim); cldnn::primitive_id proposal_mutable_id_r = layer_type_name_ID(op) + ".1"; - auto argmax_mutable_prim_r = cldnn::mutable_data(proposal_mutable_id_r, { proposalLayerName }, shared_memory); - p.primitivesToIRLayersMap[proposal_mutable_id_r] = { op->get_friendly_name() }; + auto argmax_mutable_prim_r = cldnn::mutable_data(proposal_mutable_id_r, + { proposalLayerName }, + shared_memory, + op->get_friendly_name()); p.primitiveIDs[proposal_mutable_id_r] = proposal_mutable_id_r; p.AddPrimitive(argmax_mutable_prim_r); @@ -134,7 +143,8 @@ void CreateProposalOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(proposalPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/reduce.cpp b/inference-engine/src/cldnn_engine/ops/reduce.cpp index b336a2e78fa..47a54c70299 100644 --- a/inference-engine/src/cldnn_engine/ops/reduce.cpp +++ b/inference-engine/src/cldnn_engine/ops/reduce.cpp @@ -75,7 +75,8 @@ void CreateReduceOp(Program& p, const std::shared_ptr& op, cldnn:: inputPrimitives[0], mode, axes, - static_cast(keep_dims)); + static_cast(keep_dims), + op->get_friendly_name()); p.AddPrimitive(reducePrim); @@ -96,7 +97,7 @@ void CreateReduceOp(Program& p, const std::shared_ptr& op, cldnn:: outTensor = cldnn::tensor(TensorValue(out_shape[0]), TensorValue(out_shape[1]), 1, TensorValue(out_shape[2])); } - auto reshape_prim = cldnn::reshape(resultLayerName, layerName, outTensor); + auto reshape_prim = cldnn::reshape(resultLayerName, layerName, outTensor, op->get_friendly_name()); p.AddPrimitive(reshape_prim); p.AddPrimitiveToProfiler(op, resultLayerName); } @@ -112,7 +113,13 @@ void CreateReduceOp(Program& p, const std::shared_ptr& op, cldnn:: else if (rank - rawAxes.size() <= 4) out_format = cldnn::format::bfyx; - auto reorder_prim = cldnn::reorder(reorderLayerName, resultLayerName, out_format, out_dt); + auto reorder_prim = cldnn::reorder(reorderLayerName, + resultLayerName, + out_format, + out_dt, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name()); p.AddPrimitive(reorder_prim); p.AddPrimitiveToProfiler(op, reorderLayerName); } else { diff --git a/inference-engine/src/cldnn_engine/ops/region_yolo.cpp b/inference-engine/src/cldnn_engine/ops/region_yolo.cpp index 348dd0f7eeb..314950027cb 100644 --- a/inference-engine/src/cldnn_engine/ops/region_yolo.cpp +++ b/inference-engine/src/cldnn_engine/ops/region_yolo.cpp @@ -28,7 +28,8 @@ void CreateRegionYoloOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(regionPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/reorg_yolo.cpp b/inference-engine/src/cldnn_engine/ops/reorg_yolo.cpp index 4a7f54cf810..9c47ccc9fb0 100644 --- a/inference-engine/src/cldnn_engine/ops/reorg_yolo.cpp +++ b/inference-engine/src/cldnn_engine/ops/reorg_yolo.cpp @@ -20,7 +20,8 @@ void CreateReorgYoloOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(reorgPrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/reshape.cpp b/inference-engine/src/cldnn_engine/ops/reshape.cpp index f0084bb6a1c..a4978fbae29 100644 --- a/inference-engine/src/cldnn_engine/ops/reshape.cpp +++ b/inference-engine/src/cldnn_engine/ops/reshape.cpp @@ -36,9 +36,13 @@ void CreateCommonReshapeOp(Program& p, const std::shared_ptr& op) } cldnn::layout outputLayout(DataTypeFromPrecision(op->get_output_element_type(0)), outputFormat, outTensor); - p.AddPrimitive(cldnn::reorder(reorderId, reshapeInputId, outputLayout)); + p.AddPrimitive(cldnn::reorder(reorderId, + reshapeInputId, + outputLayout, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name())); p.InitProfileInfo(reorderId, "Reorder", false, InferenceEngine::InferenceEngineProfileInfo::EXECUTED, layerName); - p.primitivesToIRLayersMap[reorderId] = { op->get_friendly_name() }; p.primitiveIDs[layerName + "_reorder"] = reorderId; p.primitiveIDs[reorderId] = reorderId; p.profilingIDs.push_back(reorderId); @@ -47,7 +51,8 @@ void CreateCommonReshapeOp(Program& p, const std::shared_ptr& op) auto reshapePrim = cldnn::reshape(layerName, reshapeInputId, - outTensor); + outTensor, + op->get_friendly_name()); p.AddPrimitive(reshapePrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/result.cpp b/inference-engine/src/cldnn_engine/ops/result.cpp index fe0d0f05658..c1219ad8fba 100644 --- a/inference-engine/src/cldnn_engine/ops/result.cpp +++ b/inference-engine/src/cldnn_engine/ops/result.cpp @@ -56,9 +56,12 @@ void CreateResultOp(Program& p, const std::shared_ptr& o std::string outputID = inputs[0]; p.AddPrimitive(cldnn::reorder(outLayerName, - outputID, - FormatFromLayout(outputData->getLayout()), - DataTypeFromPrecision(precision))); + outputID, + FormatFromLayout(outputData->getLayout()), + DataTypeFromPrecision(precision), + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name())); p.InitProfileInfo(outLayerName, "reorder"); p.profilingIDs.push_back(outLayerName); p.primitiveIDs[outLayerName] = outLayerName; diff --git a/inference-engine/src/cldnn_engine/ops/reverse_sequence.cpp b/inference-engine/src/cldnn_engine/ops/reverse_sequence.cpp index 766bbc89a31..6421a01dc75 100644 --- a/inference-engine/src/cldnn_engine/ops/reverse_sequence.cpp +++ b/inference-engine/src/cldnn_engine/ops/reverse_sequence.cpp @@ -22,7 +22,8 @@ void CreateReverseSequenceOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(reverseSequencePrim); p.AddPrimitiveToProfiler(op); diff --git a/inference-engine/src/cldnn_engine/ops/rnn.cpp b/inference-engine/src/cldnn_engine/ops/rnn.cpp index 2d4705f1a91..1ebaa0a7868 100644 --- a/inference-engine/src/cldnn_engine/ops/rnn.cpp +++ b/inference-engine/src/cldnn_engine/ops/rnn.cpp @@ -107,8 +107,13 @@ void CreateLSTMCellOp(Program& p, const std::shared_ptrget_friendly_name())); + p.AddPrimitive(cldnn::reorder(permuteID, + inReshapeID, + inputLayout, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(inReshapeID, op->get_friendly_name(), op); p.AddInnerPrimitiveToProfiler(permuteID, op->get_friendly_name(), op); @@ -117,11 +122,24 @@ void CreateLSTMCellOp(Program& p, const std::shared_ptrget_friendly_name())); + p.AddPrimitive(cldnn::reorder(hiddenInStr, + hiddenInResh, + hiddenLayout, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name())); + p.AddPrimitive(cldnn::reshape(cellInResh, inputPrimitives[2], inStateShape, op->get_friendly_name())); + p.AddPrimitive(cldnn::reorder(cellInStr, + cellInResh, + hiddenLayout, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name())); + p.AddPrimitive(cldnn::concatenation(input_concatID, + { permuteID, hiddenInStr }, + cldnn::concatenation::concatenation_axis::along_x, + op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(hiddenInResh, op->get_friendly_name(), op); p.AddInnerPrimitiveToProfiler(hiddenInStr, op->get_friendly_name(), op); @@ -139,14 +157,19 @@ void CreateLSTMCellOp(Program& p, const std::shared_ptrget_friendly_name())); p.AddInnerPrimitiveToProfiler(WRconcatID, op->get_friendly_name(), op); - p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, input_concatID, WRconcatID, hasBias ? biasID : "")); - p.AddPrimitive(cldnn::reshape(gemmReshapeID, lstm_fc_id, gemmSz)); - p.AddPrimitive(cldnn::reorder(gemmReorderID, gemmReshapeID, gemmLayout)); - p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, gemmReorderID, cellInStr, - clip, 0, activations, activation_params, cldnn::lstm_weights_order::fizo)); + p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, input_concatID, WRconcatID, hasBias ? biasID : "", op->get_friendly_name())); + p.AddPrimitive(cldnn::reshape(gemmReshapeID, lstm_fc_id, gemmSz, op->get_friendly_name())); + p.AddPrimitive(cldnn::reorder(gemmReorderID, + gemmReshapeID, + gemmLayout, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name())); + p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, gemmReorderID, cellInStr, clip, 0, activations, + activation_params, cldnn::lstm_weights_order::fizo, 0, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(lstm_fc_id, op->get_friendly_name(), op); p.AddInnerPrimitiveToProfiler(gemmReshapeID, op->get_friendly_name(), op); @@ -156,16 +179,16 @@ void CreateLSTMCellOp(Program& p, const std::shared_ptrget_friendly_name())); p.AddInnerPrimitiveToProfiler(outputHiddenCropID, op->get_friendly_name(), op); - p.AddPrimitive(cldnn::reshape(outputHiddenID, outputHiddenCropID, outSz)); + p.AddPrimitive(cldnn::reshape(outputHiddenID, outputHiddenCropID, outSz, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(outputHiddenID, op->get_friendly_name(), op); cldnn::primitive_id outputCellCropID = layerName + "_cc"; cldnn::primitive_id outputCellID = layerName + ".1"; - p.AddPrimitive(cldnn::crop(outputCellCropID, lstm_elt_id, hiddenSz, cellCropSz)); + p.AddPrimitive(cldnn::crop(outputCellCropID, lstm_elt_id, hiddenSz, cellCropSz, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(outputCellCropID, op->get_friendly_name(), op); - p.AddPrimitive(cldnn::reshape(outputCellID, outputCellCropID, outSz)); + p.AddPrimitive(cldnn::reshape(outputCellID, outputCellCropID, outSz, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(outputCellID, op->get_friendly_name(), op); // output primitive IDs @@ -223,11 +246,16 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptrget_friendly_name())); + p.AddPrimitive(cldnn::reorder(permuteID, + inReshapeID, + inputLayout, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name())); - p.AddPrimitive(cldnn::reshape(inHiddenStateID, inputPrimitives[1], inStateShape)); - p.AddPrimitive(cldnn::reshape(inCellStateID, inputPrimitives[2], inStateShape)); + p.AddPrimitive(cldnn::reshape(inHiddenStateID, inputPrimitives[1], inStateShape, op->get_friendly_name())); + p.AddPrimitive(cldnn::reshape(inCellStateID, inputPrimitives[2], inStateShape, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(inReshapeID, op->get_friendly_name(), op); p.AddInnerPrimitiveToProfiler(permuteID, op->get_friendly_name(), op); @@ -243,12 +271,12 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptrget_friendly_name())); p.AddInnerPrimitiveToProfiler(WRconcatID, op->get_friendly_name(), op); std::vector WRreshapeSize = { 4 * size_t(lstm_hidden_size), size_t(lstm_input_size + lstm_hidden_size) }; cldnn::primitive_id WRreshapeID = WRconcatID + "_reshape"; - auto reshapeInPrim = cldnn::reshape(WRreshapeID, WRconcatID, CldnnTensorFromIEDims(WRreshapeSize)); + auto reshapeInPrim = cldnn::reshape(WRreshapeID, WRconcatID, CldnnTensorFromIEDims(WRreshapeSize), op->get_friendly_name()); p.AddPrimitive(reshapeInPrim); p.AddInnerPrimitiveToProfiler(WRreshapeID, op->get_friendly_name(), op); @@ -267,30 +295,35 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptr(seqIdx), 0, 0 }; cldnn::primitive_id inputCrop_id = inputCropID + ":" + seqIdx_str; - p.AddPrimitive(cldnn::crop(inputCrop_id, permuteID, crop_tensor, offset_tensor)); + p.AddPrimitive(cldnn::crop(inputCrop_id, permuteID, crop_tensor, offset_tensor, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(inputCrop_id, op->get_friendly_name(), op); - p.AddPrimitive(cldnn::concatenation(concatID, { inputCrop_id, hiddenStr }, cldnn::concatenation::concatenation_axis::along_x)); + p.AddPrimitive(cldnn::concatenation(concatID, { inputCrop_id, hiddenStr }, cldnn::concatenation::concatenation_axis::along_x, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(concatID, op->get_friendly_name(), op); - p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, concatID, WRreshapeID, biasID)); + p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, concatID, WRreshapeID, biasID, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(lstm_fc_id, op->get_friendly_name(), op); - p.AddPrimitive(cldnn::reshape(lstm_fc_resh_id, lstm_fc_id, gemmSz)); - p.AddPrimitive(cldnn::reorder(lstm_fc_reor_id, lstm_fc_resh_id, gemmLayout)); - p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, lstm_fc_reor_id, cellStr, - clip, 0, activations, activation_params, cldnn::lstm_weights_order::fizo)); + p.AddPrimitive(cldnn::reshape(lstm_fc_resh_id, lstm_fc_id, gemmSz, op->get_friendly_name())); + p.AddPrimitive(cldnn::reorder(lstm_fc_reor_id, + lstm_fc_resh_id, + gemmLayout, + std::vector(), + cldnn::reorder_mean_mode::subtract, + op->get_friendly_name())); + p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, lstm_fc_reor_id, cellStr, clip, 0, activations, + activation_params, cldnn::lstm_weights_order::fizo, 0, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(lstm_fc_resh_id, op->get_friendly_name(), op); p.AddInnerPrimitiveToProfiler(lstm_fc_reor_id, op->get_friendly_name(), op); p.AddInnerPrimitiveToProfiler(lstm_elt_id, op->get_friendly_name(), op); hiddenStr = crop_id + ":hidden"; cellStr = crop_id + ":cell"; - p.AddPrimitive(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 })); + p.AddPrimitive(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 }, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(hiddenStr, op->get_friendly_name(), op); output_ids_offsets.push_back(hiddenStr); if (i < lstm_sequence_len - 1) { - p.AddPrimitive(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz)); + p.AddPrimitive(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(cellStr, op->get_friendly_name(), op); } else { // last hidden state crop (output 2) @@ -299,7 +332,7 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptrget_friendly_name())); cldnn::primitive_id outputCellID = layerName + ".2"; p.AddInnerPrimitiveToProfiler(cellStr, op->get_friendly_name(), op); p.primitiveIDs[outputCellID] = cellStr; @@ -310,7 +343,7 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptr