Merge remote-tracking branch 'upstream/master' into sy/test/ConvolutionLayerTest_dynamic_shape_case
This commit is contained in:
commit
fa7e87b146
@ -94,7 +94,7 @@ jobs:
|
||||
-DENABLE_PROFILING_ITT=OFF
|
||||
-DENABLE_SAMPLES=OFF
|
||||
-DNGRAPH_ONNX_FRONTEND_ENABLE=ON
|
||||
-DNGRAPH_DEBUG_ENABLE=OFF
|
||||
-DOPENVINO_DEBUG_ENABLE=OFF
|
||||
$(REPO_DIR)
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
|
||||
|
@ -16,7 +16,7 @@ jobs:
|
||||
timeoutInMinutes: 120
|
||||
|
||||
pool:
|
||||
name: WIN_VMSS_VENV_F8S_WU2
|
||||
name: WIN_VMSS_VENV_F16S_WU2
|
||||
|
||||
variables:
|
||||
system.debug: true
|
||||
@ -34,8 +34,6 @@ jobs:
|
||||
INSTALL_DIR: $(WORK_DIR)\install_pkg
|
||||
INSTALL_TEST_DIR: $(INSTALL_DIR)\tests
|
||||
SETUPVARS: $(INSTALL_DIR)\setupvars.bat
|
||||
IB_DIR: C:\Program Files (x86)\IncrediBuild
|
||||
IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe
|
||||
|
||||
steps:
|
||||
- script: |
|
||||
@ -59,12 +57,6 @@ jobs:
|
||||
rd /Q /S $(BUILD_SAMPLES_DIR) & mkdir $(BUILD_SAMPLES_DIR)
|
||||
displayName: 'Make dir'
|
||||
|
||||
- script: |
|
||||
certutil -urlcache -split -f https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
|
||||
call install_ib_console.bat
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Install IncrediBuild'
|
||||
|
||||
- checkout: self
|
||||
clean: true
|
||||
lfs: false
|
||||
@ -109,9 +101,7 @@ jobs:
|
||||
- script: dir $(REPO_DIR)\inference-engine\temp\ /s
|
||||
displayName: 'List temp SDKs'
|
||||
|
||||
- script: |
|
||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
||||
call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja"
|
||||
- script: call "$(MSVS_VARS_PATH)" && $(WORK_DIR)\ninja-win\ninja
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Build Win'
|
||||
|
||||
@ -153,10 +143,8 @@ jobs:
|
||||
displayName: 'PaddlePaddle Frontend UT'
|
||||
continueOnError: false
|
||||
|
||||
- script: |
|
||||
set PATH=$(IB_DIR);%PATH%
|
||||
call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests-IB.xml
|
||||
displayName: 'IE UT old - IB'
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
|
||||
displayName: 'IE UT old'
|
||||
continueOnError: false
|
||||
|
||||
- script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml
|
||||
@ -187,11 +175,8 @@ jobs:
|
||||
displayName: 'TEMPLATE FuncTests'
|
||||
continueOnError: false
|
||||
|
||||
# call $(SETUPVARS) && $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-cpuFuncTests.xml
|
||||
- script: |
|
||||
set PATH=$(IB_DIR);%PATH%
|
||||
call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke*:-*CompareWithRefs/base_size=16_pre_nms_topn=100_post_nms_topn=100_nms_thresh=0.7_feat_stride=1_min_size=1_ratio*:*smoke_GRUSequenceCommonZeroClip/GRUSequenceTest.CompareWithRefs/mode=CONVERT_TO_TI_MAX_SEQ_LEN_CONST_seq_lengths* --gtest_output=xml:TEST-cpuFuncTests-IB.xml /testlevel=24
|
||||
displayName: 'CPU FuncTests - IB'
|
||||
- script: $(SETUPVARS) && $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-cpuFuncTests.xml
|
||||
displayName: 'CPU FuncTests'
|
||||
continueOnError: false
|
||||
|
||||
- script: |
|
||||
@ -213,8 +198,3 @@ jobs:
|
||||
buildPlatform: 'x64' # Optional
|
||||
buildConfiguration: 'Windows' # Optional
|
||||
#publishRunAttachments: true # Optional
|
||||
|
||||
- script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
|
||||
displayName: Stop IncrediBuild
|
||||
continueOnError: true
|
||||
enabled: false
|
||||
|
@ -68,7 +68,7 @@ RUN cmake .. \
|
||||
-DENABLE_PYTHON=ON \
|
||||
-DPYTHON_EXECUTABLE=/usr/bin/python3 \
|
||||
-DNGRAPH_ONNX_FRONTEND_ENABLE=ON \
|
||||
-DNGRAPH_DEBUG_ENABLE=OFF \
|
||||
-DOPENVINO_DEBUG_ENABLE=OFF \
|
||||
-DCMAKE_INSTALL_PREFIX=/openvino/dist \
|
||||
-DNGRAPH_USE_PROTOBUF_LITE=${PROTOBUF_LITE}
|
||||
RUN make -j $(nproc) install
|
||||
|
@ -486,7 +486,7 @@ class Watchdog:
|
||||
self._queue_message(message, message_severity='warning', pr=pr)
|
||||
elif build_delta > _BUILD_DURATION_THRESHOLD:
|
||||
# CI job take too long, possibly froze - communicate failure
|
||||
message = ('ONNX CI job build #{}, for PR #{} started,'
|
||||
message = ('ONNX CI job build #{}, for PR #{} started, '
|
||||
'but did not finish in designated time of {} '
|
||||
'minutes!'.format(build_number, pr_number,
|
||||
str(_BUILD_DURATION_THRESHOLD.seconds / 60)))
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -53,3 +53,6 @@
|
||||
[submodule "ncc"]
|
||||
path = cmake/developer_package/ncc_naming_style/ncc
|
||||
url = https://github.com/nithinn/ncc.git
|
||||
[submodule "thirdparty/onednn_gpu"]
|
||||
path = thirdparty/onednn_gpu
|
||||
url = https://github.com/oneapi-src/oneDNN.git
|
||||
|
@ -83,7 +83,6 @@ if(THREADING STREQUAL "OMP")
|
||||
message(FATAL_ERROR "Intel OMP is not available on current platform")
|
||||
endif()
|
||||
update_deps_cache(OMP "${OMP}" "Path to OMP root folder")
|
||||
log_rpath_from_dir(OMP "${OMP}/lib")
|
||||
debug_message(STATUS "intel_omp=" ${OMP})
|
||||
|
||||
ie_cpack_add_component(omp REQUIRED)
|
||||
@ -146,12 +145,6 @@ if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
|
||||
update_deps_cache(TBB_DIR "${TBB}/cmake" "Path to TBB cmake folder")
|
||||
|
||||
update_deps_cache(TBBBIND_2_4_DIR "${TBBBIND_2_4}/cmake" "Path to TBBBIND_2_4 cmake folder")
|
||||
|
||||
if(WIN32)
|
||||
log_rpath_from_dir(TBB "${TBB}/bin")
|
||||
else ()
|
||||
log_rpath_from_dir(TBB "${TBB}/lib")
|
||||
endif()
|
||||
debug_message(STATUS "tbb=" ${TBB})
|
||||
endif()
|
||||
|
||||
@ -242,14 +235,6 @@ if(ENABLE_OPENCV)
|
||||
endif()
|
||||
|
||||
update_deps_cache(OpenCV_DIR "${ocv_cmake_path}" "Path to OpenCV package folder")
|
||||
|
||||
if(WIN32)
|
||||
log_rpath_from_dir(OPENCV "${OpenCV_DIR}/../bin")
|
||||
elseif(ANDROID)
|
||||
log_rpath_from_dir(OPENCV "${OpenCV_DIR}/../../../lib")
|
||||
else()
|
||||
log_rpath_from_dir(OPENCV "${OpenCV_DIR}/../lib")
|
||||
endif()
|
||||
debug_message(STATUS "opencv=" ${OPENCV})
|
||||
else()
|
||||
reset_deps_cache(OpenCV_DIR)
|
||||
@ -277,8 +262,8 @@ if(ENABLE_GNA)
|
||||
set(GNA_HASH "cc954e67525006bf8bd353a6682e38bf208f6d74e973e0fc292850e721f17452")
|
||||
endif()
|
||||
if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
|
||||
set(GNA_VERSION "02.00.00.1226")
|
||||
set(GNA_HASH "d5450af15c993e264c25ac4591a7dab44722e10d15fca4f222a1b84429d4e5b6")
|
||||
set(GNA_VERSION "03.00.00.1377")
|
||||
set(GNA_HASH "d45fb48994d8c2803a16e88e29ae48851066325b97c1c6c4a5bf4f4573d55c65")
|
||||
endif()
|
||||
|
||||
set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include)
|
||||
|
@ -15,59 +15,3 @@ function(clean_message type)
|
||||
message (FATAL_ERROR)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
file(REMOVE ${CMAKE_BINARY_DIR}/ld_library_rpath_64.txt)
|
||||
|
||||
# log relative path to shared library that has to be used in LD_LIBRARY_PATH
|
||||
function (log_rpath_remove_top component component_remove_top lib lib_remove_top)
|
||||
|
||||
set(top_lib_dir ${${component}})
|
||||
set(lib_dir ${lib})
|
||||
|
||||
# debug_message(STATUS "LIB-IN=${lib} ")
|
||||
# debug_message(STATUS "TOPLIB-IN=${top_lib_dir} ")
|
||||
get_filename_component(top_lib_dir "${${component}}" DIRECTORY)
|
||||
|
||||
if (${component_remove_top} AND ${component})
|
||||
else()
|
||||
get_filename_component(add_name "${${component}}" NAME)
|
||||
set(top_lib_dir "${top_lib_dir}/${add_name}")
|
||||
endif()
|
||||
if (${lib_remove_top} AND lib)
|
||||
get_filename_component(lib_dir ${lib} DIRECTORY)
|
||||
endif()
|
||||
|
||||
string (REPLACE "//" "/" top_lib_dir "${top_lib_dir}")
|
||||
string (REPLACE "//" "/" lib_dir "${lib_dir}")
|
||||
|
||||
string (REPLACE "\\\\" "/" top_lib_dir "${top_lib_dir}")
|
||||
string (REPLACE "\\\\" "/" lib_dir "${lib_dir}")
|
||||
|
||||
# debug_message(STATUS "LIB-OUT=${lib_dir}")
|
||||
# debug_message(STATUS "TOPLIB-OUT=${top_lib_dir}")
|
||||
|
||||
if (WIN32)
|
||||
string (TOLOWER "${top_lib_dir}" top_lib_dir)
|
||||
string (TOLOWER "${lib_dir}" lib_dir)
|
||||
endif()
|
||||
|
||||
string (REPLACE "${top_lib_dir}" "" component_dir "${lib_dir}")
|
||||
|
||||
set(RPATH_INFO "${component}=${component_dir}")
|
||||
debug_message(STATUS "LD_LIBRARY_RPATH: ${RPATH_INFO}")
|
||||
file(APPEND ${CMAKE_BINARY_DIR}/ld_library_rpath_64.txt "${RPATH_INFO}\n")
|
||||
endfunction()
|
||||
|
||||
function (log_rpath_from_dir component lib_dir)
|
||||
log_rpath_remove_top("${component}" TRUE "${lib_dir}" FALSE)
|
||||
endfunction()
|
||||
|
||||
function (log_rpath component lib_path)
|
||||
log_rpath_remove_top(${component} TRUE ${lib_path} TRUE)
|
||||
endfunction()
|
||||
|
||||
# Just wrapping of the original message() function to make this macro known during IE build.
|
||||
# This macro is redefined (with additional checks) within the InferenceEngineConfig.cmake file.
|
||||
macro(ext_message TRACE_LEVEL)
|
||||
message(${TRACE_LEVEL} "${ARGN}")
|
||||
endmacro()
|
||||
|
@ -122,7 +122,7 @@ endif()
|
||||
|
||||
ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" ON "protoc_available" OFF)
|
||||
ie_dependent_option(NGRAPH_PDPD_FRONTEND_ENABLE "Enable PaddlePaddle FrontEnd" ON "protoc_available" OFF)
|
||||
ie_option(IR_FRONTEND_ENABLE "Enable IR FrontEnd" ON)
|
||||
ie_option(NGRAPH_IR_FRONTEND_ENABLE "Enable IR FrontEnd" ON)
|
||||
ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" ON
|
||||
"NGRAPH_ONNX_FRONTEND_ENABLE" OFF)
|
||||
ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF
|
||||
@ -130,7 +130,7 @@ ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF
|
||||
ie_dependent_option(NGRAPH_UNIT_TEST_ENABLE "Enables ngraph unit tests" ON "ENABLE_TESTS;NOT ANDROID" OFF)
|
||||
ie_dependent_option(NGRAPH_UNIT_TEST_BACKENDS_ENABLE "Control the building of unit tests using backends" ON
|
||||
"NGRAPH_UNIT_TEST_ENABLE" OFF)
|
||||
ie_option(NGRAPH_DEBUG_ENABLE "Enable output for NGRAPH_DEBUG statements" OFF)
|
||||
ie_option(OPENVINO_DEBUG_ENABLE "Enable output for OPENVINO_DEBUG statements" OFF)
|
||||
ie_option(ENABLE_REQUIREMENTS_INSTALL "Dynamic dependencies install" ON)
|
||||
|
||||
# WA for ngraph python build on Windows debug
|
||||
|
@ -63,6 +63,9 @@
|
||||
# `OpenVINO_Frontend_PaddlePaddle_FOUND`
|
||||
# OpenVINO PaddlePaddle frontend is available
|
||||
#
|
||||
# `OpenVINO_Frontend_IR_FOUND`
|
||||
# OpenVINO IR frontend is available
|
||||
#
|
||||
# OpenVINO version variables:
|
||||
#
|
||||
# `OpenVINO_VERSION_MAJOR`
|
||||
@ -169,6 +172,7 @@ set(${CMAKE_FIND_PACKAGE_NAME}_PaddlePaddle_FOUND @NGRAPH_PDPD_FRONTEND_ENABLE@)
|
||||
|
||||
set(${CMAKE_FIND_PACKAGE_NAME}_Frontend_ONNX_FOUND ${${CMAKE_FIND_PACKAGE_NAME}_ONNX_FOUND})
|
||||
set(${CMAKE_FIND_PACKAGE_NAME}_Frontend_PaddlePaddle_FOUND ${${CMAKE_FIND_PACKAGE_NAME}_PaddlePaddle_FOUND})
|
||||
set(${CMAKE_FIND_PACKAGE_NAME}_Frontend_IR_FOUND ${${CMAKE_FIND_PACKAGE_NAME}_IR_FOUND})
|
||||
|
||||
# if no components specified, only Runtime is provided
|
||||
if(NOT ${CMAKE_FIND_PACKAGE_NAME}_FIND_COMPONENTS)
|
||||
|
@ -88,5 +88,6 @@ if(ngraph_onnx_importer_FOUND)
|
||||
endif()
|
||||
|
||||
set(ngraph_paddlepaddle_frontend_FOUND ${OpenVINO_Frontend_PaddlePaddle_FOUND})
|
||||
set(ngraph_ir_frontend_FOUND ${OpenVINO_Frontend_IR_FOUND})
|
||||
|
||||
check_required_components(ngraph)
|
||||
|
@ -2,6 +2,8 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
set_property(GLOBAL PROPERTY JOB_POOLS four_jobs=4)
|
||||
|
||||
function(ov_model_convert SRC DST OUT)
|
||||
set(onnx_gen_script ${OpenVINO_SOURCE_DIR}/ngraph/test/models/onnx/onnx_prototxt_converter.py)
|
||||
|
||||
@ -43,6 +45,7 @@ function(ov_model_convert SRC DST OUT)
|
||||
"${SRC}/${in_file}" ${full_out_name}
|
||||
DEPENDS ${onnx_gen_script} "${SRC}/${in_file}"
|
||||
COMMENT "Generate ${rel_out_name}"
|
||||
JOB_POOL four_jobs
|
||||
WORKING_DIRECTORY "${model_source_dir}")
|
||||
else()
|
||||
add_custom_command(OUTPUT ${full_out_name}
|
||||
@ -50,6 +53,7 @@ function(ov_model_convert SRC DST OUT)
|
||||
"${SRC}/${in_file}" ${full_out_name}
|
||||
DEPENDS ${onnx_gen_script} "${SRC}/${in_file}"
|
||||
COMMENT "Copy ${rel_out_name}"
|
||||
JOB_POOL four_jobs
|
||||
WORKING_DIRECTORY "${model_source_dir}")
|
||||
endif()
|
||||
list(APPEND files "${full_out_name}")
|
||||
|
@ -73,19 +73,21 @@ inp = torch.randn([seq_length, batch_size, feature_length])
|
||||
feature_length = torch.LongTensor([seq_length])
|
||||
x_padded, x_lens = model.encoder(inp, feature_length)
|
||||
torch.onnx.export(model.encoder, (inp, feature_length), "rnnt_encoder.onnx", opset_version=12,
|
||||
input_names=['input.1', '1'], dynamic_axes={'input.1': {0: 'seq_len', 1: 'batch'}})
|
||||
input_names=['input', 'feature_length'], output_names=['x_padded', 'x_lens'],
|
||||
dynamic_axes={'input': {0: 'seq_len', 1: 'batch'}})
|
||||
|
||||
symbol = torch.LongTensor([[20]])
|
||||
hidden = torch.randn([2, batch_size, 320]), torch.randn([2, batch_size, 320])
|
||||
g, hidden = model.prediction.forward(symbol, hidden)
|
||||
torch.onnx.export(model.prediction, (symbol, hidden), "rnnt_prediction.onnx", opset_version=12,
|
||||
input_names=['input.1', '1', '2'],
|
||||
dynamic_axes={'input.1': {0: 'batch'}, '1': {1: 'batch'}, '2': {1: 'batch'}})
|
||||
input_names=['symbol', 'hidden_in_1', 'hidden_in_2'],
|
||||
output_names=['g', 'hidden_out_1', 'hidden_out_2'],
|
||||
dynamic_axes={'symbol': {0: 'batch'}, 'hidden_in_1': {1: 'batch'}, 'hidden_in_2': {1: 'batch'}})
|
||||
|
||||
f = torch.randn([batch_size, 1, 1024])
|
||||
model.joint.forward(f, g)
|
||||
torch.onnx.export(model.joint, (f, g), "rnnt_joint.onnx", opset_version=12,
|
||||
input_names=['0', '1'], dynamic_axes={'0': {0: 'batch'}, '1': {0: 'batch'}})
|
||||
input_names=['0', '1'], output_names=['result'], dynamic_axes={'0': {0: 'batch'}, '1': {0: 'batch'}})
|
||||
```
|
||||
|
||||
```bash
|
||||
@ -97,8 +99,8 @@ After completing this step, the files `rnnt_encoder.onnx`, `rnnt_prediction.onnx
|
||||
**Step 6**. Run the conversion command:
|
||||
|
||||
```bash
|
||||
python3 {path_to_openvino}/mo.py --input_model rnnt_encoder.onnx --input "input.1[157 1 240],1->157"
|
||||
python3 {path_to_openvino}/mo.py --input_model rnnt_prediction.onnx --input "input.1[1 1],1[2 1 320],2[2 1 320]"
|
||||
python3 {path_to_openvino}/mo.py --input_model rnnt_encoder.onnx --input "input[157 1 240],feature_length->157"
|
||||
python3 {path_to_openvino}/mo.py --input_model rnnt_prediction.onnx --input "symbol[1 1],hidden_in_1[2 1 320],hidden_in_2[2 1 320]"
|
||||
python3 {path_to_openvino}/mo.py --input_model rnnt_joint.onnx --input "0[1 1 1024],1[1 1 320]"
|
||||
```
|
||||
Please note that hardcoded value for sequence length = 157 was taken from the MLCommons but conversion to IR preserves
|
||||
|
@ -104,3 +104,9 @@ Intel® Distribution of OpenVINO™ toolkit includes the following components:
|
||||
- [Intel® Media SDK](https://software.intel.com/en-us/media-sdk) (in Intel® Distribution of OpenVINO™ toolkit for Linux only)
|
||||
|
||||
OpenVINO™ Toolkit opensource version is available on [GitHub](https://github.com/openvinotoolkit/openvino). For building the Inference Engine from the source code, see the <a href="https://github.com/openvinotoolkit/openvino/wiki/BuildingCode">build instructions</a>.
|
||||
|
||||
|
||||
## OpenVINO™ API 2.0
|
||||
|
||||
The new OpenVINO™ API 2.0 was introduced to make OpenVINO™ interface more user-friendly and align OpenVINO™ with other frameworks.
|
||||
The [migration guide](@ref ov_2_0_transition_guide) should allow to simplify the process of migration application from old API to OpenVINO™ API 2.0.
|
||||
|
55
docs/migration_ov_2_0/docs/common_inference_pipeline.md
Normal file
55
docs/migration_ov_2_0/docs/common_inference_pipeline.md
Normal file
@ -0,0 +1,55 @@
|
||||
# OpenVINO™ Inference Pipeline {#ov_inference_pipeline}
|
||||
|
||||
Usually to inference network with the OpenVINO™ toolkit users need to do next steps:
|
||||
1. Create Core
|
||||
2. (Optional) Read model from the disk
|
||||
2.1. Configure Input and Output of the Model
|
||||
3. Load the Model to the Device
|
||||
4. Create an Inference Request
|
||||
5. Prepare Input
|
||||
6. Start Inference
|
||||
7. Process the Inference Results
|
||||
|
||||
Code snippets below cover these steps and show how application code should be changed for migration to OpenVINO™ 2.0.
|
||||
|
||||
## 1. Create Core
|
||||
|
||||
Inference Engine API:
|
||||
|
||||
@snippet snippets/ie_common.cpp ie:create_core
|
||||
|
||||
OpenVINO™ 2.0 API:
|
||||
|
||||
@snippet snippets/ov_common.cpp ov_api_2_0:create_core
|
||||
|
||||
## 2. (Optional) Read model from the disk
|
||||
|
||||
Inference Engine API:
|
||||
|
||||
@snippet snippets/ie_common.cpp ie:read_model
|
||||
|
||||
OpenVINO™ 2.0 API:
|
||||
|
||||
@snippet snippets/ov_common.cpp ov_api_2_0:read_model
|
||||
|
||||
### 2.1 Configure Input and Output of the Model
|
||||
|
||||
Inference Engine API:
|
||||
|
||||
@snippet snippets/ie_common.cpp ie:get_inputs_outputs
|
||||
|
||||
OpenVINO™ 2.0 API:
|
||||
|
||||
@snippet snippets/ov_common.cpp ov_api_2_0:get_inputs_outputs
|
||||
|
||||
## 3. Load the Model to the Device
|
||||
|
||||
Inference Engine API:
|
||||
|
||||
@snippet snippets/ie_common.cpp ie:compile_model
|
||||
|
||||
OpenVINO™ 2.0 API:
|
||||
|
||||
@snippet snippets/ov_common.cpp ov_api_2_0:compile_model
|
||||
|
||||
## 5. TBD
|
12
docs/migration_ov_2_0/docs/intro.md
Normal file
12
docs/migration_ov_2_0/docs/intro.md
Normal file
@ -0,0 +1,12 @@
|
||||
# OpenVINO™ API 2.0 transition guide {#ov_2_0_transition_guide}
|
||||
|
||||
The OpenVINO™ API 2.0 introduced in order to simplify migration from other frameworks and make the OpenVINO™ API more user-friendly.
|
||||
The list with differences between APIs below:
|
||||
|
||||
- OpenVINO™ API 2.0 uses tensor names or indexes to work with Inputs or Outputs, the old API works with operation names.
|
||||
- Structures for Shapes, element types were changed.
|
||||
- Naming style was changed. The old API uses CamelCaseStyle and OpenVINO™ API 2.0 uses snake_case for function names.
|
||||
- Namespaces were aligned between components.
|
||||
|
||||
Please look at next transition guides to understand how transit own application to OpenVINO™ API 2.0.
|
||||
- [OpenVINO™ Common Inference pipeline](@ref ov_inference_pipeline)
|
@ -3,7 +3,7 @@
|
||||
nGraph representation provides an API to get detailed information about the graph structure.
|
||||
|
||||
To receive additional messages about applied graph modifications, rebuild the nGraph library with
|
||||
the `-DNGRAPH_DEBUG_ENABLE=ON` option.
|
||||
the `-DOPENVINO_DEBUG_ENABLE=ON` option.
|
||||
|
||||
To visualize the nGraph function to the xDot format or to an image file, use the
|
||||
`ngraph::pass::VisualizeTree` graph transformation pass:
|
||||
|
43
docs/snippets/ie_common.cpp
Normal file
43
docs/snippets/ie_common.cpp
Normal file
@ -0,0 +1,43 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
int main() {
|
||||
//! [ie:create_core]
|
||||
InferenceEngine::Core core;
|
||||
//! [ie:create_core]
|
||||
|
||||
//! [ie:read_model]
|
||||
InferenceEngine::CNNNetwork network = core.ReadNetwork("model.xml");
|
||||
//! [ie:read_model]
|
||||
|
||||
//! [ie:get_inputs_outputs]
|
||||
InferenceEngine::InputsDataMap inputs = network.getInputsInfo();
|
||||
InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo();
|
||||
//! [ie:get_inputs_outputs]
|
||||
|
||||
//! [ie:compile_model]
|
||||
InferenceEngine::ExecutableNetwork exec_network = core.LoadNetwork(network, "CPU");
|
||||
//! [ie:compile_model]
|
||||
|
||||
//! [ie:create_infer_request]
|
||||
InferenceEngine::InferRequest infer_request = exec_network.CreateInferRequest();
|
||||
//! [ie:create_infer_request]
|
||||
|
||||
//! [ie:get_input_tensor]
|
||||
InferenceEngine::Blob::Ptr input_blob = infer_request.GetBlob(inputs.begin()->first);
|
||||
// fill input blob
|
||||
//! [ie:get_input_tensor]
|
||||
|
||||
//! [ie:inference]
|
||||
infer_request.Infer();
|
||||
//! [ie:inference]
|
||||
|
||||
//! [ie:get_output_tensor]
|
||||
InferenceEngine::Blob::Ptr output_blob = infer_request.GetBlob(outputs.begin()->first);
|
||||
// process output data
|
||||
//! [ie:get_output_tensor]
|
||||
return 0;
|
||||
}
|
@ -23,12 +23,10 @@ acos0->set_argument(0, add0);
|
||||
add1->set_argument(0, acos0);
|
||||
add1->set_argument(1, abs0);
|
||||
|
||||
// Run shape inference on the nodes
|
||||
NodeVector ops{arg0, arg1, add0, abs0, acos0, add1};
|
||||
validate_nodes_and_infer_types(ops);
|
||||
|
||||
// Create a graph with one output (add1) and four inputs (arg0, arg1)
|
||||
auto ng_function = make_shared<Function>(OutputVector{add1}, ParameterVector{arg0, arg1});
|
||||
// Run shape inference on the nodes
|
||||
ng_function->validate_nodes_and_infer_types();
|
||||
|
||||
//! [part0]
|
||||
|
||||
|
34
docs/snippets/ov_common.cpp
Normal file
34
docs/snippets/ov_common.cpp
Normal file
@ -0,0 +1,34 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <openvino/core/function.hpp>
|
||||
#include <openvino/runtime/runtime.hpp>
|
||||
|
||||
int main() {
|
||||
//! [ov_api_2_0:create_core]
|
||||
ov::runtime::Core core;
|
||||
//! [ov_api_2_0:create_core]
|
||||
|
||||
//! [ov_api_2_0:read_model]
|
||||
std::shared_ptr<ov::Function> network = core.read_model("model.xml");
|
||||
//! [ov_api_2_0:read_model]
|
||||
|
||||
//! [ov_api_2_0:get_inputs_outputs]
|
||||
ov::ParameterVector inputs = network->get_parameters();
|
||||
ov::ResultVector outputs = network->get_results();
|
||||
//! [ov_api_2_0:get_inputs_outputs]
|
||||
|
||||
//! [ov_api_2_0:compile_model]
|
||||
ov::runtime::ExecutableNetwork exec_network = core.compile_model(network, "CPU");
|
||||
//! [ov_api_2_0:compile_model]
|
||||
|
||||
ov::runtime::InferRequest infer_request = exec_network.create_infer_request();
|
||||
//
|
||||
// InferenceEngine::Blob::Ptr input_blob = infer_request.GetBlob(inputs.begin()->first);
|
||||
// // fill input blob
|
||||
// infer_request.Infer();
|
||||
//
|
||||
// InferenceEngine::Blob::Ptr output_blob = infer_request.GetBlob(outputs.begin()->first);
|
||||
// process output data
|
||||
return 0;
|
||||
}
|
@ -110,8 +110,6 @@ if(ANDROID)
|
||||
|
||||
set(LIBUSB_INCLUDE_DIR "${LIBUSB}/include")
|
||||
set(LIBUSB_LIBRARY "${LIBUSB}/libs/${ANDROID_ABI}/libusb1.0.so")
|
||||
|
||||
log_rpath_from_dir(LIBUSB "${LIBUSB}/libs/${ANDROID_ABI}")
|
||||
endif()
|
||||
|
||||
#
|
||||
|
@ -117,7 +117,8 @@ Options:
|
||||
(default 16).
|
||||
-sf SCALE_FACTOR, --scale_factor SCALE_FACTOR
|
||||
Optional. The user-specified input scale factor for
|
||||
quantization.
|
||||
quantization. If the network contains multiple inputs,
|
||||
provide scale factors by separating them with commas.
|
||||
-wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL
|
||||
Optional. Write GNA model to file using path/filename
|
||||
provided.
|
||||
@ -176,27 +177,30 @@ The sample application logs each step in a standard output stream.
|
||||
[ INFO ] Creating Inference Engine
|
||||
[ INFO ] Reading the network: wsj_dnn5b.xml
|
||||
[ INFO ] Configuring input and output blobs
|
||||
[ INFO ] Using scale factor of 2175.4322417 calculated from first utterance.
|
||||
[ INFO ] Using scale factor(s) calculated from first utterance
|
||||
[ INFO ] For input 0 using scale factor of 2175.4322418
|
||||
[ INFO ] Loading the model to the plugin
|
||||
[ INFO ] Starting inference in synchronous mode
|
||||
[ INFO ] Utterance 0 (4k0c0301)
|
||||
[ INFO ] Output blob name: affinetransform14/Fused_Add_
|
||||
[ INFO ] Frames in utterance: 1294
|
||||
[ INFO ] Total time in Infer (HW and SW): 5305.47ms
|
||||
[ INFO ] max error: 0.7051839
|
||||
[ INFO ] avg error: 0.0448387
|
||||
[ INFO ] Total time in Infer (HW and SW): 6211.45ms
|
||||
[ INFO ] max error: 0.7051840
|
||||
[ INFO ] avg error: 0.0448388
|
||||
[ INFO ] avg rms error: 0.0582387
|
||||
[ INFO ] stdev error: 0.0371649
|
||||
[ INFO ] stdev error: 0.0371650
|
||||
[ INFO ]
|
||||
[ INFO ] Utterance 1 (4k0c0302)
|
||||
[ INFO ] Output blob name: affinetransform14/Fused_Add_
|
||||
[ INFO ] Frames in utterance: 1005
|
||||
[ INFO ] Total time in Infer (HW and SW): 5031.53ms
|
||||
[ INFO ] Total time in Infer (HW and SW): 4742.27ms
|
||||
[ INFO ] max error: 0.7575974
|
||||
[ INFO ] avg error: 0.0452166
|
||||
[ INFO ] avg rms error: 0.0586013
|
||||
[ INFO ] stdev error: 0.0372769
|
||||
[ INFO ]
|
||||
...
|
||||
[ INFO ] Total sample time: 38033.09ms
|
||||
[ INFO ] Total sample time: 40219.99ms
|
||||
[ INFO ] File result.npz was created!
|
||||
[ INFO ] This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
|
||||
```
|
||||
|
||||
|
@ -28,8 +28,9 @@ def parse_args() -> argparse.Namespace:
|
||||
args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).')
|
||||
args.add_argument('-qb', '--quantization_bits', default=16, type=int,
|
||||
help='Optional. Weight bits for quantization: 8 or 16 (default 16).')
|
||||
args.add_argument('-sf', '--scale_factor', type=float,
|
||||
help='Optional. The user-specified input scale factor for quantization.')
|
||||
args.add_argument('-sf', '--scale_factor', type=str,
|
||||
help='Optional. The user-specified input scale factor for quantization. '
|
||||
'If the network contains multiple inputs, provide scale factors by separating them with commas.')
|
||||
args.add_argument('-wg', '--export_gna_model', type=str,
|
||||
help='Optional. Write GNA model to file using path/filename provided.')
|
||||
args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS)
|
||||
|
@ -103,6 +103,32 @@ def get_output_layer_list(net: Union[IENetwork, ExecutableNetwork],
|
||||
return [list(net.outputs.keys())[-1]]
|
||||
|
||||
|
||||
def parse_scale_factors(args: argparse.Namespace) -> list:
|
||||
"""Get a list of scale factors for input files"""
|
||||
input_files = re.split(', |,', args.input)
|
||||
scale_factors = re.split(', |,', str(args.scale_factor))
|
||||
scale_factors = list(map(float, scale_factors))
|
||||
|
||||
if len(input_files) != len(scale_factors):
|
||||
log.error(f'Incorrect command line for multiple inputs: {len(scale_factors)} scale factors provided for '
|
||||
f'{len(input_files)} input files.')
|
||||
sys.exit(-7)
|
||||
|
||||
for i, scale_factor in enumerate(scale_factors):
|
||||
if float(scale_factor) < 0:
|
||||
log.error(f'Scale factor for input #{i} (counting from zero) is out of range (must be positive).')
|
||||
sys.exit(-8)
|
||||
|
||||
return scale_factors
|
||||
|
||||
|
||||
def set_scale_factors(plugin_config: dict, scale_factors: list):
|
||||
"""Set a scale factor provided for each input"""
|
||||
for i, scale_factor in enumerate(scale_factors):
|
||||
log.info(f'For input {i} using scale factor of {scale_factor:.7f}')
|
||||
plugin_config[f'GNA_SCALE_FACTOR_{i}'] = str(scale_factor)
|
||||
|
||||
|
||||
def main():
|
||||
log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout)
|
||||
args = parse_args()
|
||||
@ -149,16 +175,23 @@ def main():
|
||||
|
||||
# Set a GNA scale factor
|
||||
if args.import_gna_model:
|
||||
log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
|
||||
elif args.scale_factor:
|
||||
log.info(f'Using scale factor of {args.scale_factor:.7f} specified by user.')
|
||||
plugin_config['GNA_SCALE_FACTOR'] = str(args.scale_factor)
|
||||
if args.scale_factor:
|
||||
log.warning(f'Custom scale factor will be used for imported GNA model: {args.import_gna_model}')
|
||||
set_scale_factors(plugin_config, parse_scale_factors(args))
|
||||
else:
|
||||
log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
|
||||
else:
|
||||
utterances = read_utterance_file(args.input.split(',')[0])
|
||||
key = sorted(utterances)[0]
|
||||
scale_factor = get_scale_factor(utterances[key])
|
||||
log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.')
|
||||
plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor)
|
||||
if args.scale_factor:
|
||||
set_scale_factors(plugin_config, parse_scale_factors(args))
|
||||
else:
|
||||
scale_factors = []
|
||||
|
||||
for file_name in re.split(', |,', args.input):
|
||||
first_utterance = next(iter(read_utterance_file(file_name).values()))
|
||||
scale_factors.append(get_scale_factor(first_utterance))
|
||||
|
||||
log.info('Using scale factor(s) calculated from first utterance')
|
||||
set_scale_factors(plugin_config, scale_factors)
|
||||
|
||||
if args.export_embedded_gna_model:
|
||||
plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
|
||||
|
@ -196,6 +196,10 @@ public:
|
||||
versionInfo = &ExtensionDescription;
|
||||
}
|
||||
|
||||
std::map<std::string, ngraph::OpSet> getOpSets() override {
|
||||
return {{"framework_node_ext", ngraph::OpSet()}};
|
||||
}
|
||||
|
||||
void Unload() noexcept override {}
|
||||
};
|
||||
|
||||
|
@ -59,10 +59,8 @@ add_custom_command(TARGET ${TARGET_NAME}
|
||||
# ie_cpack_add_component(${PYTHON_VERSION}_dev DEPENDS ${PYTHON_COMPONENT})
|
||||
|
||||
install(TARGETS ${TARGET_NAME}
|
||||
RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations
|
||||
COMPONENT ${PYTHON_COMPONENT}
|
||||
LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations
|
||||
COMPONENT ${PYTHON_COMPONENT})
|
||||
RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_COMPONENT}
|
||||
LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_COMPONENT})
|
||||
|
||||
install(PROGRAMS __init__.py
|
||||
DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/offline_transformations
|
||||
|
@ -52,3 +52,16 @@ add_custom_command(TARGET ${TARGET_NAME}
|
||||
|
||||
add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}
|
||||
EXCLUDE_PATTERNS ".*\\.cxx;.*\\.pxd;.*\\.pyx")
|
||||
|
||||
# install
|
||||
|
||||
install(TARGETS ${TARGET_NAME}
|
||||
RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/test_utils
|
||||
COMPONENT tests EXCLUDE_FROM_ALL
|
||||
LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/test_utils
|
||||
COMPONENT tests EXCLUDE_FROM_ALL)
|
||||
|
||||
install(PROGRAMS __init__.py
|
||||
DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/test_utils
|
||||
COMPONENT tests
|
||||
EXCLUDE_FROM_ALL)
|
||||
|
@ -51,10 +51,44 @@ def pytest_configure(config):
|
||||
)
|
||||
|
||||
|
||||
def create_ngraph_function(inputShape):
|
||||
def create_encoder(input_shape, levels = 4):
|
||||
import ngraph as ng
|
||||
inputShape = ng.impl.PartialShape(inputShape)
|
||||
param = ng.parameter(inputShape, dtype=np.float32, name="data")
|
||||
result = ng.relu(param, name='out')
|
||||
# input
|
||||
input_node = ng.parameter(input_shape, np.float32, name="data")
|
||||
|
||||
padding_begin = padding_end = [0, 0]
|
||||
strides = [1, 1]
|
||||
dilations = [1, 1]
|
||||
input_channels = [input_shape[1]]
|
||||
last_output = input_node
|
||||
|
||||
# convolution layers
|
||||
for i in range(levels):
|
||||
input_c = input_channels[-1]
|
||||
output_c = input_c * 2
|
||||
conv_w = np.random.uniform(0, 1, [output_c, input_c, 5, 5]).astype(np.float32)
|
||||
conv_node = ng.convolution(last_output, conv_w, strides, padding_begin, padding_end, dilations)
|
||||
input_channels.append(output_c)
|
||||
last_output = conv_node
|
||||
|
||||
# deconvolution layers
|
||||
for i in range(levels):
|
||||
input_c = input_channels[-2]
|
||||
output_c = input_channels.pop(-1)
|
||||
deconv_w = np.random.uniform(0, 1, [output_c, input_c, 5, 5]).astype(np.float32)
|
||||
deconv_node = ng.convolution_backprop_data(last_output, deconv_w, strides)
|
||||
last_output = deconv_node
|
||||
|
||||
# result
|
||||
last_output.set_friendly_name("out")
|
||||
result_node = ng.result(last_output)
|
||||
return ng.Function(result_node, [input_node], "Encoder")
|
||||
|
||||
|
||||
def create_relu(input_shape):
|
||||
import ngraph as ng
|
||||
input_shape = ng.impl.PartialShape(input_shape)
|
||||
param = ng.parameter(input_shape, dtype=np.float32, name="data")
|
||||
result = ng.relu(param, name="out")
|
||||
function = ng.Function(result, [param], "TestFunction")
|
||||
return function
|
||||
|
@ -140,10 +140,11 @@ def test_set_shape():
|
||||
@pytest.mark.ngraph_dependent_test
|
||||
@pytest.mark.template_plugin
|
||||
def test_blob_set_shape_after_async_infer():
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_encoder
|
||||
import ngraph as ng
|
||||
function = create_ngraph_function([ng.Dimension(0,5), ng.Dimension(4), ng.Dimension(20), ng.Dimension(20)])
|
||||
function = create_encoder([1, 4, 20, 20])
|
||||
net = ng.function_to_cnn(function)
|
||||
net.reshape({"data": [(1, 5), 4, 20, 20]})
|
||||
ie_core = IECore()
|
||||
ie_core.register_plugin("templatePlugin", "TEMPLATE")
|
||||
exec_net = ie_core.load_network(net, "TEMPLATE")
|
||||
@ -152,3 +153,4 @@ def test_blob_set_shape_after_async_infer():
|
||||
with pytest.raises(RuntimeError) as e:
|
||||
request.input_blobs['data'].set_shape([3, 4, 20, 20])
|
||||
assert "REQUEST_BUSY" in str(e.value)
|
||||
request.wait()
|
||||
|
@ -61,9 +61,9 @@ def test_initialized(device):
|
||||
@pytest.mark.ngraph_dependent_test
|
||||
@pytest.mark.template_plugin
|
||||
def test_is_dynamic():
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_relu
|
||||
import ngraph as ng
|
||||
function = create_ngraph_function([-1, 3, 20, 20])
|
||||
function = create_relu([-1, 3, 20, 20])
|
||||
net = ng.function_to_cnn(function)
|
||||
ie = IECore()
|
||||
ie.register_plugin("templatePlugin", "TEMPLATE")
|
||||
|
@ -48,9 +48,9 @@ def test_initialized():
|
||||
@pytest.mark.ngraph_dependent_test
|
||||
@pytest.mark.template_plugin
|
||||
def test_is_dynamic():
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_relu
|
||||
import ngraph as ng
|
||||
function = create_ngraph_function([-1, 3, 20, 20])
|
||||
function = create_relu([-1, 3, 20, 20])
|
||||
net = ng.function_to_cnn(function)
|
||||
assert net.input_info["data"].input_data.is_dynamic
|
||||
assert net.outputs["out"].is_dynamic
|
||||
|
@ -166,9 +166,9 @@ def test_reshape():
|
||||
([1, 3, -1, 25], [1, 3, 22, -1])
|
||||
])
|
||||
def test_reshape_with_partial_shape(device, shape, p_shape):
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_relu
|
||||
import ngraph as ng
|
||||
function = create_ngraph_function(shape)
|
||||
function = create_relu(shape)
|
||||
net = ng.function_to_cnn(function)
|
||||
net.reshape({"data": p_shape})
|
||||
changedFunction = ng.function_from_cnn(net)
|
||||
@ -185,9 +185,9 @@ def test_reshape_with_partial_shape(device, shape, p_shape):
|
||||
|
||||
@pytest.mark.ngraph_dependent_test
|
||||
def test_incorrect_reshape(device):
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_relu
|
||||
import ngraph as ng
|
||||
function = create_ngraph_function([1, 3, 22, 22])
|
||||
function = create_relu([1, 3, 22, 22])
|
||||
net = ng.function_to_cnn(function)
|
||||
with pytest.raises(ValueError) as e:
|
||||
net.reshape({"data": [(2, 4, 6), 3, 22, 22]})
|
||||
@ -287,9 +287,9 @@ def test_tensor_names():
|
||||
@pytest.mark.ngraph_dependent_test
|
||||
@pytest.mark.template_plugin
|
||||
def test_create_two_exec_net():
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_relu
|
||||
import ngraph as ng
|
||||
function = create_ngraph_function([ng.Dimension(0,5), ng.Dimension(4), ng.Dimension(20), ng.Dimension(20)])
|
||||
function = create_relu([ng.Dimension(0,5), ng.Dimension(4), ng.Dimension(20), ng.Dimension(20)])
|
||||
net = ng.function_to_cnn(function)
|
||||
ie_core = IECore()
|
||||
ie_core.register_plugin("templatePlugin", "TEMPLATE")
|
||||
|
@ -589,13 +589,13 @@ def test_query_state_write_buffer(device, input_shape, data_type, mode):
|
||||
@pytest.mark.parametrize("shape, p_shape, ref_shape", [
|
||||
([1, 4, 20, 20], [-1, 4, 20, 20], [5, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(0,5), 4, 20, 20], [3, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(3,5), 3, 20, 20], [2, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(3,5), 3, 20, 20], [6, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(3,5), 4, 20, 20], [2, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(3,5), 4, 20, 20], [6, 4, 20, 20]),
|
||||
])
|
||||
def test_infer_dynamic_network_with_set_shape(shape, p_shape, ref_shape):
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_encoder
|
||||
import ngraph as ng
|
||||
function = create_ngraph_function(shape)
|
||||
function = create_encoder(shape)
|
||||
net = ng.function_to_cnn(function)
|
||||
net.reshape({"data": p_shape})
|
||||
ie_core = ie.IECore()
|
||||
@ -616,13 +616,13 @@ def test_infer_dynamic_network_with_set_shape(shape, p_shape, ref_shape):
|
||||
@pytest.mark.parametrize("shape, p_shape, ref_shape", [
|
||||
([1, 4, 20, 20], [-1, 4, 20, 20], [5, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(0,5), 4, 20, 20], [3, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(3,5), 3, 20, 20], [2, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(3,5), 3, 20, 20], [6, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(3,5), 4, 20, 20], [2, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(3,5), 4, 20, 20], [6, 4, 20, 20]),
|
||||
])
|
||||
def test_infer_dynamic_network_without_set_shape(shape, p_shape, ref_shape):
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_encoder
|
||||
import ngraph as ng
|
||||
function = create_ngraph_function(shape)
|
||||
function = create_encoder(shape)
|
||||
net = ng.function_to_cnn(function)
|
||||
net.reshape({"data": p_shape})
|
||||
ie_core = ie.IECore()
|
||||
@ -642,13 +642,13 @@ def test_infer_dynamic_network_without_set_shape(shape, p_shape, ref_shape):
|
||||
@pytest.mark.parametrize("shape, p_shape, ref_shape", [
|
||||
([1, 4, 20, 20], [-1, 4, 20, 20], [5, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(0,5), 4, 20, 20], [3, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(3,5), 3, 20, 20], [2, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(3,5), 3, 20, 20], [6, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(3,5), 4, 20, 20], [2, 4, 20, 20]),
|
||||
([1, 4, 20, 20], [(3,5), 4, 20, 20], [6, 4, 20, 20]),
|
||||
])
|
||||
def test_infer_dynamic_network_with_set_blob(shape, p_shape, ref_shape):
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_encoder
|
||||
import ngraph as ng
|
||||
function = create_ngraph_function(shape)
|
||||
function = create_encoder(shape)
|
||||
net = ng.function_to_cnn(function)
|
||||
net.reshape({"data": p_shape})
|
||||
ie_core = ie.IECore()
|
||||
@ -670,11 +670,11 @@ def test_infer_dynamic_network_with_set_blob(shape, p_shape, ref_shape):
|
||||
@pytest.mark.ngraph_dependent_test
|
||||
@pytest.mark.template_plugin
|
||||
def test_infer_dynamic_network_twice():
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_encoder
|
||||
import ngraph as ng
|
||||
shape, p_shape = [1, 4, 20, 20], [(0,5), 4, 20, 20]
|
||||
ref_shape1, ref_shape2 = [2, 4, 20, 20], [3, 4, 20, 20]
|
||||
function = create_ngraph_function(shape)
|
||||
function = create_encoder(shape)
|
||||
net = ng.function_to_cnn(function)
|
||||
net.reshape({"data": p_shape})
|
||||
ie_core = ie.IECore()
|
||||
@ -692,11 +692,11 @@ def test_infer_dynamic_network_twice():
|
||||
@pytest.mark.ngraph_dependent_test
|
||||
@pytest.mark.template_plugin
|
||||
def test_infer_dynamic_network_with_set_blob_twice():
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_encoder
|
||||
import ngraph as ng
|
||||
shape, p_shape = [1, 4, 20, 20], [(0,5), 4, 20, 20]
|
||||
ref_shape1, ref_shape2 = [2, 4, 20, 20], [3, 4, 20, 20]
|
||||
function = create_ngraph_function(shape)
|
||||
function = create_encoder(shape)
|
||||
net = ng.function_to_cnn(function)
|
||||
net.reshape({"data": p_shape})
|
||||
ie_core = ie.IECore()
|
||||
@ -723,14 +723,14 @@ def test_infer_dynamic_network_with_set_blob_twice():
|
||||
@pytest.mark.template_plugin
|
||||
@pytest.mark.parametrize("shapes", [
|
||||
([3, 4, 20, 20], [3, 4, 20, 20], [3, 4, 20, 20]),
|
||||
([3, 4, 20, 20], [3, 6, 20, 20], [3, 8, 20, 20]),
|
||||
([3, 4, 20, 20], [3, 4, 28, 28], [3, 4, 45, 45]),
|
||||
])
|
||||
def test_async_infer_dynamic_network_3_requests(shapes):
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_encoder
|
||||
import ngraph as ng
|
||||
function = create_ngraph_function([3, 4, 20, 20])
|
||||
function = create_encoder([3, 4, 20, 20])
|
||||
net = ng.function_to_cnn(function)
|
||||
net.reshape({"data": [3, (2, 10), 20, 20]})
|
||||
net.reshape({"data": [3, 4, (20, 50), (20, 50)]})
|
||||
ie_core = ie.IECore()
|
||||
ie_core.register_plugin("templatePlugin", "TEMPLATE")
|
||||
exec_net = ie_core.load_network(net, "TEMPLATE", num_requests=3)
|
||||
@ -745,9 +745,9 @@ def test_async_infer_dynamic_network_3_requests(shapes):
|
||||
@pytest.mark.ngraph_dependent_test
|
||||
@pytest.mark.template_plugin
|
||||
def test_set_blob_with_incorrect_name():
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_encoder
|
||||
import ngraph as ng
|
||||
function = create_ngraph_function([4, 4, 20, 20])
|
||||
function = create_encoder([4, 4, 20, 20])
|
||||
net = ng.function_to_cnn(function)
|
||||
ie_core = ie.IECore()
|
||||
ie_core.register_plugin("templatePlugin", "TEMPLATE")
|
||||
@ -763,9 +763,9 @@ def test_set_blob_with_incorrect_name():
|
||||
@pytest.mark.ngraph_dependent_test
|
||||
@pytest.mark.template_plugin
|
||||
def test_set_blob_with_incorrect_size():
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_encoder
|
||||
import ngraph as ng
|
||||
function = create_ngraph_function([4, 4, 20, 20])
|
||||
function = create_encoder([4, 4, 20, 20])
|
||||
net = ng.function_to_cnn(function)
|
||||
ie_core = ie.IECore()
|
||||
ie_core.register_plugin("templatePlugin", "TEMPLATE")
|
||||
@ -773,6 +773,7 @@ def test_set_blob_with_incorrect_size():
|
||||
tensor_desc = exec_net.requests[0].input_blobs["data"].tensor_desc
|
||||
tensor_desc.dims = [tensor_desc.dims[0]*2, 4, 20, 20]
|
||||
blob = ie.Blob(tensor_desc)
|
||||
print(exec_net.requests[0].output_blobs)
|
||||
with pytest.raises(RuntimeError) as e:
|
||||
exec_net.requests[0].set_blob("data", blob)
|
||||
assert f"Input blob size is not equal network input size" in str(e.value)
|
||||
@ -784,10 +785,11 @@ def test_set_blob_with_incorrect_size():
|
||||
@pytest.mark.ngraph_dependent_test
|
||||
@pytest.mark.template_plugin
|
||||
def test_set_blob_after_async_infer():
|
||||
from conftest import create_ngraph_function
|
||||
from conftest import create_encoder
|
||||
import ngraph as ng
|
||||
function = create_ngraph_function([ng.Dimension(0,5), ng.Dimension(4), ng.Dimension(20), ng.Dimension(20)])
|
||||
function = create_encoder([1, 4, 20, 20])
|
||||
net = ng.function_to_cnn(function)
|
||||
net.reshape({"data": [(0, 5), 4, 20, 20]})
|
||||
ie_core = ie.IECore()
|
||||
ie_core.register_plugin("templatePlugin", "TEMPLATE")
|
||||
exec_net = ie_core.load_network(net, "TEMPLATE")
|
||||
@ -799,3 +801,4 @@ def test_set_blob_after_async_infer():
|
||||
with pytest.raises(RuntimeError) as e:
|
||||
request.set_blob("data", blob)
|
||||
assert "REQUEST_BUSY" in str(e.value)
|
||||
request.wait()
|
||||
|
@ -6,14 +6,14 @@ import ngraph as ng
|
||||
from ngraph.impl.op import Parameter
|
||||
from ngraph.impl import Function, Shape, Type
|
||||
|
||||
from conftest import model_path, create_ngraph_function
|
||||
from conftest import model_path, create_relu
|
||||
|
||||
|
||||
test_net_xml, test_net_bin = model_path()
|
||||
|
||||
|
||||
def test_create_IENetwork_from_nGraph():
|
||||
func = create_ngraph_function([1, 3, 22, 22])
|
||||
func = create_relu([1, 3, 22, 22])
|
||||
caps = Function.to_capsule(func)
|
||||
cnnNetwork = IENetwork(caps)
|
||||
assert cnnNetwork != None
|
||||
@ -23,7 +23,7 @@ def test_create_IENetwork_from_nGraph():
|
||||
|
||||
|
||||
def test_get_IENetwork_from_nGraph():
|
||||
func = create_ngraph_function([1, 3, 22, 22])
|
||||
func = create_relu([1, 3, 22, 22])
|
||||
caps = Function.to_capsule(func)
|
||||
cnnNetwork = IENetwork(caps)
|
||||
assert cnnNetwork != None
|
||||
|
@ -2,11 +2,46 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
set(TARGET_NAME "benchmark_app")
|
||||
|
||||
file (GLOB SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
|
||||
file (GLOB HDR ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
|
||||
|
||||
ie_add_sample(NAME benchmark_app
|
||||
ie_add_sample(NAME ${TARGET_NAME}
|
||||
SOURCES ${SRC}
|
||||
HEADERS ${HDR}
|
||||
DEPENDENCIES format_reader ie_samples_utils
|
||||
OPENCV_DEPENDENCIES core)
|
||||
|
||||
find_package(OpenCL)
|
||||
|
||||
find_path(OpenCL_HPP_INCLUDE_DIR
|
||||
NAMES
|
||||
CL/cl2.hpp OpenCL/cl2.hpp
|
||||
HINTS
|
||||
${opencl_root_hints}
|
||||
ENV "PROGRAMFILES(X86)"
|
||||
ENV AMDAPPSDKROOT
|
||||
ENV INTELOCLSDKROOT
|
||||
ENV NVSDKCOMPUTE_ROOT
|
||||
ENV CUDA_PATH
|
||||
ENV ATISTREAMSDKROOT
|
||||
ENV OCL_ROOT
|
||||
PATH_SUFFIXES
|
||||
include
|
||||
OpenCL/common/inc
|
||||
"AMD APP/include")
|
||||
|
||||
if(OPENCL_HEADERS_DIR)
|
||||
# Use OpenCL CPP headers from sources if present
|
||||
set(OpenCL_HEADERS OPENCL_HEADERS_DIR)
|
||||
elseif(OpenCL_HPP_INCLUDE_DIR)
|
||||
# Append OpenCL CPP headers to C headers and use both
|
||||
set(OpenCL_HEADERS OpenCL_INCLUDE_DIR OpenCL_HPP_INCLUDE_DIR)
|
||||
endif()
|
||||
|
||||
if(OpenCL_FOUND AND OpenCL_HEADERS)
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE OpenCL::OpenCL)
|
||||
target_include_directories(${TARGET_NAME} PRIVATE ${OpenCL_HEADERS})
|
||||
target_compile_definitions(${TARGET_NAME} PRIVATE HAVE_GPU_DEVICE_MEM_SUPPORT)
|
||||
endif()
|
||||
|
@ -4,6 +4,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(HAVE_GPU_DEVICE_MEM_SUPPORT)
|
||||
# define HAVE_DEVICE_MEM_SUPPORT
|
||||
#endif
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
#include <iostream>
|
||||
@ -132,6 +136,12 @@ static const char progress_message[] =
|
||||
// @brief message for performance counters option
|
||||
static const char pc_message[] = "Optional. Report performance counters.";
|
||||
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
// @brief message for switching memory allocation type option
|
||||
static const char use_device_mem_message[] =
|
||||
"Optional. Switch between host and device memory allocation for input and output buffers.";
|
||||
#endif
|
||||
|
||||
#ifdef USE_OPENCV
|
||||
// @brief message for load config option
|
||||
static const char load_config_message[] =
|
||||
@ -266,6 +276,11 @@ DEFINE_bool(progress, false, progress_message);
|
||||
/// @brief Define flag for showing performance counters <br>
|
||||
DEFINE_bool(pc, false, pc_message);
|
||||
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
/// @brief Define flag for switching beetwen host and device memory allocation for input and output buffers
|
||||
DEFINE_bool(use_device_mem, false, use_device_mem_message);
|
||||
#endif
|
||||
|
||||
#ifdef USE_OPENCV
|
||||
/// @brief Define flag for loading configuration file <br>
|
||||
DEFINE_string(load_config, "", load_config_message);
|
||||
@ -339,6 +354,9 @@ static void showUsage() {
|
||||
std::cout << " -nthreads \"<integer>\" " << infer_num_threads_message << std::endl;
|
||||
std::cout << " -enforcebf16=<true/false> " << enforce_bf16_message << std::endl;
|
||||
std::cout << " -pin \"YES\"/\"HYBRID_AWARE\"/\"NO\"/\"NUMA\" " << infer_threads_pinning_message << std::endl;
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
std::cout << " -use_device_mem " << use_device_mem_message << std::endl;
|
||||
#endif
|
||||
std::cout << std::endl << " Statistics dumping options:" << std::endl;
|
||||
std::cout << " -report_type \"<type>\" " << report_type_message << std::endl;
|
||||
std::cout << " -report_folder " << report_folder_message << std::endl;
|
||||
|
@ -65,6 +65,10 @@ public:
|
||||
return _request.GetBlob(name);
|
||||
}
|
||||
|
||||
void setBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) {
|
||||
_request.SetBlob(name, data);
|
||||
}
|
||||
|
||||
double getExecutionTimeInMilliseconds() const {
|
||||
auto execTime = std::chrono::duration_cast<ns>(_endTime - _startTime);
|
||||
return static_cast<double>(execTime.count()) * 0.000001;
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "infer_request_wrap.hpp"
|
||||
#include "inputs_filling.hpp"
|
||||
#include "progress_bar.hpp"
|
||||
#include "remote_blobs_filling.hpp"
|
||||
#include "statistics_report.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
@ -592,7 +593,16 @@ int main(int argc, char* argv[]) {
|
||||
next_step();
|
||||
|
||||
InferRequestsQueue inferRequestsQueue(exeNetwork, nireq);
|
||||
fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests);
|
||||
if (isFlagSetInCommandLine("use_device_mem")) {
|
||||
if (device_name.find("GPU") == 0)
|
||||
::gpu::fillRemoteBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests, exeNetwork);
|
||||
else if (device_name.find("CPU") == 0)
|
||||
fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests);
|
||||
else
|
||||
IE_THROW() << "Requested device doesn't support `use_device_mem` option.";
|
||||
} else {
|
||||
fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests);
|
||||
}
|
||||
|
||||
// ----------------- 10. Measuring performance
|
||||
// ------------------------------------------------------------------
|
||||
|
140
inference-engine/samples/benchmark_app/remote_blobs_filling.cpp
Normal file
140
inference-engine/samples/benchmark_app/remote_blobs_filling.cpp
Normal file
@ -0,0 +1,140 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "remote_blobs_filling.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace gpu {
|
||||
|
||||
template <typename T>
|
||||
using uniformDistribution = typename std::conditional<
|
||||
std::is_floating_point<T>::value,
|
||||
std::uniform_real_distribution<T>,
|
||||
typename std::conditional<std::is_integral<T>::value, std::uniform_int_distribution<T>, void>::type>::type;
|
||||
|
||||
template <typename T, typename T2>
|
||||
void fillBufferRandom(void* inputBuffer,
|
||||
size_t elementsNum,
|
||||
T rand_min = std::numeric_limits<uint8_t>::min(),
|
||||
T rand_max = std::numeric_limits<uint8_t>::max()) {
|
||||
std::mt19937 gen(0);
|
||||
uniformDistribution<T2> distribution(rand_min, rand_max);
|
||||
auto inputBufferData = static_cast<T*>(inputBuffer);
|
||||
for (size_t i = 0; i < elementsNum; i++) {
|
||||
inputBufferData[i] = static_cast<T>(distribution(gen));
|
||||
}
|
||||
}
|
||||
|
||||
void fillBuffer(void* inputBuffer, size_t elementsNum, InferenceEngine::Precision precision) {
|
||||
if (precision == InferenceEngine::Precision::FP32) {
|
||||
fillBufferRandom<float, float>(inputBuffer, elementsNum);
|
||||
} else if (precision == InferenceEngine::Precision::FP16) {
|
||||
fillBufferRandom<short, short>(inputBuffer, elementsNum);
|
||||
} else if (precision == InferenceEngine::Precision::I32) {
|
||||
fillBufferRandom<int32_t, int32_t>(inputBuffer, elementsNum);
|
||||
} else if (precision == InferenceEngine::Precision::I64) {
|
||||
fillBufferRandom<int64_t, int64_t>(inputBuffer, elementsNum);
|
||||
} else if (precision == InferenceEngine::Precision::U8) {
|
||||
// uniform_int_distribution<uint8_t> is not allowed in the C++17
|
||||
// standard and vs2017/19
|
||||
fillBufferRandom<uint8_t, uint32_t>(inputBuffer, elementsNum);
|
||||
} else if (precision == InferenceEngine::Precision::I8) {
|
||||
// uniform_int_distribution<int8_t> is not allowed in the C++17 standard
|
||||
// and vs2017/19
|
||||
fillBufferRandom<int8_t, int32_t>(inputBuffer, elementsNum);
|
||||
} else if (precision == InferenceEngine::Precision::U16) {
|
||||
fillBufferRandom<uint16_t, uint16_t>(inputBuffer, elementsNum);
|
||||
} else if (precision == InferenceEngine::Precision::I16) {
|
||||
fillBufferRandom<int16_t, int16_t>(inputBuffer, elementsNum);
|
||||
} else if (precision == InferenceEngine::Precision::BOOL) {
|
||||
fillBufferRandom<uint8_t, uint32_t>(inputBuffer, elementsNum, 0, 1);
|
||||
} else {
|
||||
IE_THROW() << "Requested precision is not supported";
|
||||
}
|
||||
}
|
||||
|
||||
size_t getBytesPerElement(InferenceEngine::Precision precision) {
|
||||
switch (precision) {
|
||||
case InferenceEngine::Precision::FP32:
|
||||
return 4;
|
||||
case InferenceEngine::Precision::FP16:
|
||||
return 2;
|
||||
case InferenceEngine::Precision::I32:
|
||||
return 4;
|
||||
case InferenceEngine::Precision::I64:
|
||||
return 8;
|
||||
case InferenceEngine::Precision::U8:
|
||||
return 1;
|
||||
case InferenceEngine::Precision::I8:
|
||||
return 1;
|
||||
case InferenceEngine::Precision::U16:
|
||||
return 2;
|
||||
case InferenceEngine::Precision::I16:
|
||||
return 2;
|
||||
case InferenceEngine::Precision::BOOL:
|
||||
return 1;
|
||||
default:
|
||||
IE_THROW() << "Requested precision is not supported";
|
||||
}
|
||||
}
|
||||
|
||||
void fillRemoteBlobs(const std::vector<std::string>& inputFiles,
|
||||
const size_t& batchSize,
|
||||
benchmark_app::InputsInfo& app_inputs_info,
|
||||
std::vector<InferReqWrap::Ptr> requests,
|
||||
const InferenceEngine::ExecutableNetwork& exeNetwork) {
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
slog::info << "Device memory will be used for input and output blobs" << slog::endl;
|
||||
if (inputFiles.size()) {
|
||||
slog::warn << "Device memory supports only random data at this moment, input images will be ignored"
|
||||
<< slog::endl;
|
||||
}
|
||||
auto context = exeNetwork.GetContext();
|
||||
auto oclContext = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(context)->get();
|
||||
auto oclInstance = std::make_shared<OpenCL>(oclContext);
|
||||
|
||||
auto setShared = [&](size_t requestId,
|
||||
const std::string name,
|
||||
const InferenceEngine::TensorDesc& desc,
|
||||
bool fillRandom = false) {
|
||||
cl_int err;
|
||||
auto inputDims = desc.getDims();
|
||||
auto elementsNum = std::accumulate(begin(inputDims), end(inputDims), 1, std::multiplies<size_t>());
|
||||
auto inputSize = elementsNum * getBytesPerElement(desc.getPrecision());
|
||||
|
||||
cl::Buffer sharedBuffer =
|
||||
cl::Buffer(oclInstance->_context, CL_MEM_READ_WRITE, (cl::size_type)inputSize, NULL, &err);
|
||||
|
||||
if (fillRandom) {
|
||||
void* mappedPtr = oclInstance->_queue.enqueueMapBuffer(sharedBuffer,
|
||||
CL_TRUE,
|
||||
CL_MEM_READ_WRITE,
|
||||
0,
|
||||
(cl::size_type)inputSize);
|
||||
fillBuffer(mappedPtr, elementsNum, desc.getPrecision());
|
||||
oclInstance->_queue.enqueueUnmapMemObject(sharedBuffer, mappedPtr);
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr sharedBlob = InferenceEngine::gpu::make_shared_blob(desc, context, sharedBuffer);
|
||||
|
||||
requests.at(requestId)->setBlob(name, sharedBlob);
|
||||
};
|
||||
|
||||
for (size_t requestId = 0; requestId < requests.size(); requestId++) {
|
||||
for (auto& item : exeNetwork.GetInputsInfo())
|
||||
setShared(requestId, item.first, item.second->getTensorDesc(), true);
|
||||
|
||||
for (auto& item : exeNetwork.GetOutputsInfo())
|
||||
setShared(requestId, item.first, item.second->getTensorDesc());
|
||||
}
|
||||
#else
|
||||
IE_THROW() << "Device memory requested for GPU device, but OpenCL was not linked";
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace gpu
|
@ -0,0 +1,64 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#if defined(HAVE_GPU_DEVICE_MEM_SUPPORT)
|
||||
# define HAVE_DEVICE_MEM_SUPPORT
|
||||
# include <gpu/gpu_context_api_ocl.hpp>
|
||||
#endif
|
||||
|
||||
#include <inference_engine.hpp>
|
||||
|
||||
#include "infer_request_wrap.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace gpu {
|
||||
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
struct OpenCL {
|
||||
cl::Context _context;
|
||||
cl::Device _device;
|
||||
cl::CommandQueue _queue;
|
||||
|
||||
explicit OpenCL(std::shared_ptr<std::vector<cl_context_properties>> media_api_context_properties = nullptr) {
|
||||
// get Intel GPU OCL device, create context and queue
|
||||
{
|
||||
std::vector<cl::Device> devices;
|
||||
std::vector<cl::Platform> platforms;
|
||||
const unsigned int refVendorID = 0x8086;
|
||||
|
||||
cl::Platform::get(&platforms);
|
||||
for (auto& p : platforms) {
|
||||
p.getDevices(CL_DEVICE_TYPE_GPU, &devices);
|
||||
for (auto& d : devices) {
|
||||
if (refVendorID == d.getInfo<CL_DEVICE_VENDOR_ID>()) {
|
||||
_device = d;
|
||||
_context = cl::Context(_device);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
|
||||
_queue = cl::CommandQueue(_context, _device, props);
|
||||
}
|
||||
}
|
||||
|
||||
explicit OpenCL(cl_context context) {
|
||||
// user-supplied context handle
|
||||
_context = cl::Context(context, true);
|
||||
_device = cl::Device(_context.getInfo<CL_CONTEXT_DEVICES>()[0].get(), true);
|
||||
|
||||
cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
|
||||
_queue = cl::CommandQueue(_context, _device, props);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
void fillRemoteBlobs(const std::vector<std::string>& inputFiles,
|
||||
const size_t& batchSize,
|
||||
benchmark_app::InputsInfo& app_inputs_info,
|
||||
std::vector<InferReqWrap::Ptr> requests,
|
||||
const InferenceEngine::ExecutableNetwork& exeNetwork);
|
||||
|
||||
} // namespace gpu
|
@ -1,96 +0,0 @@
|
||||
@echo off
|
||||
|
||||
:: Copyright (C) 2018-2021 Intel Corporation
|
||||
:: SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
setlocal enabledelayedexpansion
|
||||
|
||||
for /f "delims=" %%x in (dependencies_64.txt) do (set "%%x")
|
||||
|
||||
for %%A in ("%MKL%") do set MKL_FILENAME=%%~nxA
|
||||
for %%A in ("%OMP%") do set OMP_FILENAME=%%~nxA
|
||||
for %%A in ("%MYRIAD%") do set MYRIAD_FILENAME=%%~nxA
|
||||
for %%A in ("%GNA%") do set GNA_FILENAME=%%~nxA
|
||||
for %%A in ("%OPENCV%") do set OPENCV_FILENAME=%%~nxA
|
||||
for %%A in ("%HDDL%") do set HDDL_FILENAME=%%~nxA
|
||||
for %%A in ("%VPU_FIRMWARE_MA2X8X%") do set VPU_FIRMWARE_MA2X8X_FILENAME=%%~nxA
|
||||
for %%A in ("%TBB%") do set TBB_FILENAME=%%~nxA
|
||||
|
||||
call :DownloadFile MKL %MKL%
|
||||
call :DownloadFile OMP %OMP%
|
||||
call :DownloadFile MYRIAD %MYRIAD%
|
||||
call :DownloadFile GNA %GNA%
|
||||
call :DownloadFile OPENCV %OPENCV%
|
||||
call :DownloadFile HDDL %HDDL%
|
||||
call :DownloadFile VPU_FIRMWARE_MA2X8X %VPU_FIRMWARE_MA2X8X%
|
||||
call :DownloadFile TBB %TBB%
|
||||
|
||||
for /f "delims=" %%x in (ld_library_rpath_64.txt) do (set "%%x")
|
||||
|
||||
set PATH=%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%;%PATH%
|
||||
set PATH=%DL_SDK_TEMP%\test_dependencies\MKL\%MKL_FILENAME%%MKL%;%PATH%
|
||||
set PATH=%DL_SDK_TEMP%\test_dependencies\OMP\%OMP_FILENAME%%OMP%;%PATH%
|
||||
set PATH=%DL_SDK_TEMP%\test_dependencies\GNA\%GNA_FILENAME%%GNA%;%PATH%
|
||||
set PATH=%DL_SDK_TEMP%\test_dependencies\OPENCV\%OPENCV_FILENAME%%OPENCV%;%PATH%
|
||||
set PATH=%DL_SDK_TEMP%\test_dependencies\TBB\%TBB_FILENAME%%TBB%;%PATH%
|
||||
|
||||
set PATH=%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%;%PATH%
|
||||
|
||||
if not "%MYRIAD%"=="" (
|
||||
if exist "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\mvnc" (
|
||||
echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%" intel64 /S /I /Y /R
|
||||
xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%" intel64 /S /I /Y /R
|
||||
)
|
||||
|
||||
if exist "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\..\bin\mvnc" (
|
||||
echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\..\bin\*" intel64 /S /I /Y /R
|
||||
xcopy.exe "%DL_SDK_TEMP%\test_dependencies\MYRIAD\%MYRIAD_FILENAME%%MYRIAD%\..\bin\*" intel64 /S /I /Y /R
|
||||
)
|
||||
)
|
||||
|
||||
if not "%VPU_FIRMWARE_MA2X8X%"=="" (
|
||||
if exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%" (
|
||||
echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" intel64 /S /I /Y /R
|
||||
xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" intel64 /S /I /Y /R
|
||||
)
|
||||
)
|
||||
|
||||
set PATH=%DL_SDK_TEMP%\test_dependencies\HDDL\%HDDL_FILENAME%%HDDL%\..\bin;%PATH%
|
||||
|
||||
if not "%HDDL%"=="" (
|
||||
set HDDL_INSTALL_DIR=%DL_SDK_TEMP%\test_dependencies\HDDL\%HDDL_FILENAME%%HDDL%\..
|
||||
if exist "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%" (
|
||||
echo xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" %HDDL_INSTALL_DIR%\lib /S /I /Y /R
|
||||
xcopy.exe "%DL_SDK_TEMP%\test_dependencies\VPU\%VPU_FIRMWARE_MA2X8X_FILENAME%\*" "%HDDL_INSTALL_DIR%\lib" /S /I /Y /R
|
||||
)
|
||||
)
|
||||
|
||||
echo PATH=%PATH%
|
||||
|
||||
endlocal & set PATH=%PATH%
|
||||
|
||||
exit /B %ERRORLEVEL%
|
||||
|
||||
:DownloadFile
|
||||
set DEPENDENCY=%~1
|
||||
set DEPENDENCY_URL=%~2
|
||||
set DEPENDENCY_FILE=%~nx2
|
||||
set DEPENDENCY_EXT=%~x2
|
||||
|
||||
if not "%DEPENDENCY_URL%"=="" (
|
||||
if not exist "%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE%" (
|
||||
mkdir "%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE%"
|
||||
for /L %%a in (1,1,10) do (
|
||||
powershell -command "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; iwr -outf '%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\_%DEPENDENCY_FILE%' %DEPENDENCY_URL%"
|
||||
call "C:\Program Files\7-Zip\7z.exe" x -y %DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\_%DEPENDENCY_FILE% -o%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE%
|
||||
if !ERRORLEVEL! equ 0 goto :DownloadFileContinue
|
||||
timeout /T 15
|
||||
)
|
||||
)
|
||||
)
|
||||
goto:eof
|
||||
|
||||
:DownloadFileContinue
|
||||
if "%DEPENDENCY_EXT%" == ".txz" call "C:\Program Files\7-Zip\7z.exe" x -y %DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE%\_%DEPENDENCY_FILE:txz=tar% -o%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\%DEPENDENCY_FILE%
|
||||
del "%DL_SDK_TEMP%\test_dependencies\%DEPENDENCY%\_%DEPENDENCY_FILE%" /F /Q
|
||||
goto:eof
|
@ -1,68 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
if [ "$1" = "" ]; then
|
||||
dep_arch=64
|
||||
else
|
||||
dep_arch=$1
|
||||
fi
|
||||
|
||||
item_path=""
|
||||
add_path() {
|
||||
component=$1
|
||||
item_path=""
|
||||
echo "Read file: dependencies_${dep_arch}.txt"
|
||||
grep_component="\b${component}\b"
|
||||
|
||||
if [[ $(grep -m 1 "$grep_component" "dependencies_${dep_arch}.txt") ]];then
|
||||
archive_path=$(grep -m 1 "$grep_component" "dependencies_${dep_arch}.txt" | sed -E "s/${component}=//g")
|
||||
library_rpath=$(grep -m 1 "$grep_component" "ld_library_rpath_${dep_arch}.txt" | sed -E "s/${component}=//g")
|
||||
filename=$(basename "$archive_path")
|
||||
if [[ (! -d "$DL_SDK_TEMP/test_dependencies/$component/$filename") ||
|
||||
(-d "$DL_SDK_TEMP/test_dependencies/$component/$filename" &&
|
||||
! $(ls -A "$DL_SDK_TEMP/test_dependencies/$component/$filename")) ]]; then
|
||||
mkdir -p "$DL_SDK_TEMP/test_dependencies/$component/$filename"
|
||||
wget -q "$archive_path" -O "$DL_SDK_TEMP/test_dependencies/$filename"
|
||||
if [[ $filename == *.zip ]]; then
|
||||
unzip "$DL_SDK_TEMP/test_dependencies/$filename" -d "$DL_SDK_TEMP/test_dependencies/$component/$filename"
|
||||
elif [[ $filename == *.7z ]]; then
|
||||
7za x -y "$DL_SDK_TEMP/test_dependencies/$filename" -o "$DL_SDK_TEMP/test_dependencies/$component/$filename"
|
||||
else
|
||||
tar xf "$DL_SDK_TEMP/test_dependencies/$filename" -C "$DL_SDK_TEMP/test_dependencies/$component/$filename"
|
||||
fi
|
||||
rm "$DL_SDK_TEMP/test_dependencies/$filename"
|
||||
fi
|
||||
item_path=$component/$filename/$library_rpath
|
||||
fi
|
||||
}
|
||||
|
||||
runtimes=(MKL CLDNN MYRIAD GNA DLIA OPENCV VPU_FIRMWARE_USB-MA2X8X HDDL OMP TBB AOCL_RTE LIBUSB)
|
||||
|
||||
export_library_path() {
|
||||
export LD_LIBRARY_PATH=$DL_SDK_TEMP/test_dependencies/$1:$LD_LIBRARY_PATH
|
||||
}
|
||||
|
||||
export_env_variable() {
|
||||
export "$2"="$DL_SDK_TEMP/test_dependencies/$1"
|
||||
}
|
||||
|
||||
ma2480_path=""
|
||||
for i in "${runtimes[@]}"
|
||||
do
|
||||
add_path "$i"
|
||||
export_library_path "$item_path"
|
||||
if [ "$i" == "VPU_FIRMWARE_USB-MA2X8X" ]
|
||||
then
|
||||
ma2480_path="$item_path"
|
||||
fi
|
||||
if [ "$i" == "HDDL" ]
|
||||
then
|
||||
cp -r "$DL_SDK_TEMP/test_dependencies/$ma2480_path/"* "$DL_SDK_TEMP/test_dependencies/$item_path"
|
||||
export HDDL_INSTALL_DIR="$DL_SDK_TEMP/test_dependencies/$item_path/.."
|
||||
fi
|
||||
done
|
||||
|
||||
echo DATA_PATH="$DATA_PATH"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:lib:/usr/local/lib
|
@ -47,9 +47,8 @@ add_subdirectory(snippets)
|
||||
add_custom_target(ie_libraries ALL
|
||||
DEPENDS inference_engine_transformations inference_engine_legacy
|
||||
inference_engine inference_engine_preproc
|
||||
inference_engine_ir_v7_reader inference_engine_ir_reader
|
||||
inference_engine_lp_transformations inference_engine_snippets
|
||||
ir_frontend)
|
||||
inference_engine_ir_v7_reader ir_ngraph_frontend
|
||||
inference_engine_lp_transformations inference_engine_snippets)
|
||||
|
||||
if(NGRAPH_ONNX_FRONTEND_ENABLE)
|
||||
add_dependencies(ie_libraries onnx_ngraph_frontend)
|
||||
|
@ -28,7 +28,7 @@ namespace CLDNNPlugin {
|
||||
|
||||
static void createDirectory(std::string _path) {
|
||||
#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
|
||||
std::wstring widepath = FileUtils::multiByteCharToWString(_path.c_str());
|
||||
std::wstring widepath = ov::util::string_to_wstring(_path.c_str());
|
||||
const wchar_t* path = widepath.c_str();
|
||||
#else
|
||||
const char* path = _path.c_str();
|
||||
|
@ -60,8 +60,6 @@ CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
|
||||
void CLDNNGraph::UpdateLayersMaps() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdateLayersMaps");
|
||||
primitiveIDs = m_program->primitiveIDs;
|
||||
primitivesToIRLayersMap = m_program->primitivesToIRLayersMap;
|
||||
IRToNgraphLayersMap = m_program->IRToNgraphLayersMap;
|
||||
prevPrimitiveIDs = m_program->prevPrimitiveIDs;
|
||||
profilingIDs = m_program->profilingIDs;
|
||||
perfMap = m_program->perfMap;
|
||||
@ -219,25 +217,6 @@ std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
|
||||
return res;
|
||||
};
|
||||
|
||||
auto split_string = [](std::string src, std::string delimiter = ",") -> std::vector<std::string> {
|
||||
std::vector<std::string> tokens;
|
||||
std::string tokenBuf;
|
||||
size_t prev = 0, pos = 0, srcLength = src.length(), delimLength = delimiter.length();
|
||||
do {
|
||||
pos = src.find(delimiter, prev);
|
||||
if (pos == std::string::npos) {
|
||||
pos = srcLength;
|
||||
}
|
||||
tokenBuf = src.substr(prev, pos - prev);
|
||||
if (!tokenBuf.empty()) {
|
||||
tokens.push_back(tokenBuf);
|
||||
}
|
||||
prev = pos + delimLength;
|
||||
} while (pos < srcLength && prev < srcLength);
|
||||
|
||||
return tokens;
|
||||
};
|
||||
|
||||
auto remove_type_from_name = [](const std::string& name) -> std::string {
|
||||
auto it = std::find(name.begin(), name.end(), ':');
|
||||
if (it == name.end() || (it + 1) == name.end())
|
||||
@ -246,22 +225,13 @@ std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
|
||||
return std::string((it+1), name.end());
|
||||
};
|
||||
|
||||
auto extIdMap = GetNetwork()->get_ext_id_mapping();
|
||||
|
||||
auto find_origin_layers = [&](const std::string& name) -> std::vector<std::string> {
|
||||
if (primitivesToIRLayersMap.find(name) == primitivesToIRLayersMap.end())
|
||||
if (extIdMap.find(name) == extIdMap.end()) {
|
||||
return {};
|
||||
|
||||
auto cnn_names = primitivesToIRLayersMap.at(name);
|
||||
std::vector<std::string> res;
|
||||
|
||||
for (auto& cnn_name : cnn_names) {
|
||||
if (IRToNgraphLayersMap.find(cnn_name) != IRToNgraphLayersMap.end()) {
|
||||
auto ngraph_names = split_string(IRToNgraphLayersMap.at(cnn_name));
|
||||
res.insert(res.end(), ngraph_names.begin(), ngraph_names.end());
|
||||
} else {
|
||||
res.push_back(cnn_name);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
return { extIdMap.at(name) };
|
||||
};
|
||||
|
||||
auto get_inputs = [&] (const cldnn::primitive_info& prim_info) {
|
||||
@ -599,13 +569,21 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
|
||||
auto allIds = GetNetwork()->get_all_primitive_org_ids();
|
||||
auto executedPrimitives = GetNetwork()->get_executed_primitives();
|
||||
auto primitivesInfo = GetNetwork()->get_primitives_info();
|
||||
auto extIdMap = GetNetwork()->get_ext_id_mapping();
|
||||
|
||||
auto getUpperCaseName = [&](std::string name) {
|
||||
auto getUpperCaseName = [](std::string name) {
|
||||
if (name.length() > 0)
|
||||
name[0] = toupper(name[0]);
|
||||
return name;
|
||||
};
|
||||
|
||||
auto getClearName = [](std::string name) {
|
||||
if (name.find(":") != std::string::npos) {
|
||||
name = name.substr(name.find(":") + 1, name.length());
|
||||
}
|
||||
return name;
|
||||
};
|
||||
|
||||
auto getFromProfiling = [&](std::string primId) -> bool {
|
||||
auto perfIter = perfMap.find(primId);
|
||||
|
||||
@ -696,10 +674,7 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
|
||||
}
|
||||
}
|
||||
|
||||
std::string layerName = primId;
|
||||
if (primId.find(":") != std::string::npos) {
|
||||
layerName = primId.substr(primId.find(":") + 1, primId.length());
|
||||
}
|
||||
std::string layerName = getClearName(primId);
|
||||
|
||||
for (auto& pi : primitivesInfo) {
|
||||
if (pi.original_id == primId) {
|
||||
@ -735,10 +710,27 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
|
||||
}
|
||||
|
||||
// Step 3. Checking primitives which has been deleted from execution order but added by clDNNPlugin
|
||||
for (auto& primId : profilingIDs)
|
||||
for (auto& primId : profilingIDs) {
|
||||
if (std::find(allIds.begin(), allIds.end(), primId) == allIds.end()) {
|
||||
getFromProfiling(primId);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& p : extIdMap) {
|
||||
if (p.first.find(p.second) != std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
auto first_res = result.find(getClearName(p.first));
|
||||
auto second_res = result.find(getClearName(p.second));
|
||||
|
||||
if (first_res != result.end() && second_res != result.end() && first_res != second_res) {
|
||||
std::swap(first_res->second.cpu_uSec, second_res->second.cpu_uSec);
|
||||
std::swap(first_res->second.realTime_uSec, second_res->second.realTime_uSec);
|
||||
std::swap(first_res->second.status, second_res->second.status);
|
||||
std::swap(first_res->second.exec_type, second_res->second.exec_type);
|
||||
std::swap(first_res->second.execution_index, second_res->second.execution_index);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -61,8 +61,6 @@ protected:
|
||||
InferenceEngine::gpu::ClContext::Ptr m_context;
|
||||
std::vector<std::shared_ptr<cldnn::network>> m_networks;
|
||||
std::map<std::string, cldnn::primitive_id> primitiveIDs;
|
||||
std::map<cldnn::primitive_id, std::vector<std::string>> primitivesToIRLayersMap;
|
||||
std::map<cldnn::primitive_id, std::string> IRToNgraphLayersMap;
|
||||
std::map<std::string, std::vector<cldnn::primitive_id>> prevPrimitiveIDs;
|
||||
|
||||
std::map<cldnn::primitive_id, std::pair<std::string, PerfCounter>> perfMap;
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "cldnn_remote_context.h"
|
||||
#include "cldnn_executable_network.h"
|
||||
#include "cldnn_itt.h"
|
||||
#include "cldnn/runtime/debug_configuration.hpp"
|
||||
#include <ie_algorithm.hpp>
|
||||
#include <debug.h>
|
||||
|
||||
@ -622,6 +623,10 @@ void CLDNNInferRequest::allocate_inputs() {
|
||||
IE_THROW() << "Input layout for " << name << " is not found";
|
||||
}
|
||||
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
GPU_DEBUG_COUT << "[" << name << ": input blob]" << std::endl;
|
||||
}
|
||||
if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
|
||||
TensorDesc desc_fp32 = desc;
|
||||
desc_fp32.setPrecision(Precision::FP32);
|
||||
@ -673,6 +678,10 @@ void CLDNNInferRequest::allocate_outputs() {
|
||||
const cldnn::layout output_layout = m_graph->GetNetwork()->get_output_memory(outputID)->get_layout();
|
||||
const TensorDesc& desc = no.second->getTensorDesc();
|
||||
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
GPU_DEBUG_COUT << "[" << no.first << ": output blob]" << std::endl;
|
||||
}
|
||||
auto blobPtr = create_device_blob(desc, output_layout);
|
||||
_deviceOutputs[no.first] = blobPtr;
|
||||
_outputs[no.first] = blobPtr;
|
||||
|
@ -284,14 +284,12 @@ std::vector<cldnn::primitive_id> Program::GetInputPrimitiveIDs(const std::shared
|
||||
void Program::AddPrimitiveToProfiler(const std::shared_ptr<ngraph::Node>& op,
|
||||
cldnn::primitive_id customOutputId) {
|
||||
auto id = layer_type_name_ID(op);
|
||||
primitivesToIRLayersMap[id] = { op->get_friendly_name() };
|
||||
primitiveIDs[id] = customOutputId.empty() ? id : customOutputId;
|
||||
profilingIDs.push_back(id);
|
||||
}
|
||||
|
||||
void Program::AddPrimitiveToProfiler(cldnn::primitive_id id, const std::shared_ptr<ngraph::Node>& op,
|
||||
cldnn::primitive_id customOutputId) {
|
||||
primitivesToIRLayersMap[id] = { op->get_friendly_name() };
|
||||
primitiveIDs[id] = customOutputId.empty() ? id : customOutputId;
|
||||
profilingIDs.push_back(id);
|
||||
}
|
||||
@ -299,7 +297,6 @@ void Program::AddPrimitiveToProfiler(cldnn::primitive_id id, const std::shared_p
|
||||
void Program::AddInnerPrimitiveToProfiler(cldnn::primitive_id id, cldnn::primitive_id parentId,
|
||||
const std::shared_ptr<ngraph::Node>& op) {
|
||||
InitProfileInfo(id, layer_type_lower(op), false, InferenceEngine::InferenceEngineProfileInfo::EXECUTED, parentId);
|
||||
primitivesToIRLayersMap[id] = { op->get_friendly_name() };
|
||||
primitiveIDs[id] = id;
|
||||
profilingIDs.push_back(id);
|
||||
}
|
||||
@ -328,28 +325,24 @@ void Program::InitProfileInfo(const std::string& layerName,
|
||||
|
||||
// TODO: Does it make sense to add such method to ngraph core?
|
||||
bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node) {
|
||||
std::list<std::shared_ptr<ngraph::Node>> nodes_to_process = { node };
|
||||
while (!nodes_to_process.empty()) {
|
||||
auto current_node = nodes_to_process.front();
|
||||
nodes_to_process.pop_front();
|
||||
|
||||
for (size_t i = 0; i < current_node->get_input_size(); i++) {
|
||||
auto input_node = current_node->get_input_node_shared_ptr(i);
|
||||
|
||||
// If input is constant, then drop if from the processing list
|
||||
if (std::dynamic_pointer_cast<ngraph::op::v0::Constant>(input_node) != nullptr)
|
||||
continue;
|
||||
|
||||
// If the node doesn't have any parents and it's not a constant, then we deal with dynamic path
|
||||
if (input_node->get_input_size() == 0) {
|
||||
std::set<std::shared_ptr<ngraph::Node>> nodes_processed = {};
|
||||
std::function<bool(const std::shared_ptr<ngraph::Node>&)> is_const_node = [&nodes_processed, &is_const_node](const std::shared_ptr<ngraph::Node>& node) {
|
||||
if (nodes_processed.count(node)) return true;
|
||||
nodes_processed.insert(node);
|
||||
// If input is constant, then drop if from the processing list
|
||||
if (std::dynamic_pointer_cast<ngraph::op::v0::Constant>(node) != nullptr)
|
||||
return true;
|
||||
// If the node doesn't have any parents and it's not a constant, then we deal with dynamic path
|
||||
if (node->get_input_size() == 0)
|
||||
return false;
|
||||
for (size_t i = 0; i < node->get_input_size(); i++) {
|
||||
auto input_node = node->get_input_node_shared_ptr(i);
|
||||
if (!is_const_node(input_node))
|
||||
return false;
|
||||
}
|
||||
|
||||
nodes_to_process.insert(nodes_to_process.end(), input_node);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return true;
|
||||
};
|
||||
return is_const_node(node);
|
||||
}
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
|
@ -76,8 +76,6 @@ public:
|
||||
static const cldnn::primitive_id m_postCustomLayerTag;
|
||||
|
||||
std::map<std::string, cldnn::primitive_id> primitiveIDs;
|
||||
std::map<cldnn::primitive_id, std::vector<std::string>> primitivesToIRLayersMap;
|
||||
std::map<cldnn::primitive_id, std::string> IRToNgraphLayersMap;
|
||||
std::map<std::string, std::vector<cldnn::primitive_id>> prevPrimitiveIDs;
|
||||
std::map<cldnn::primitive_id, std::pair<std::string, PerfCounter>> perfMap;
|
||||
|
||||
|
@ -42,7 +42,8 @@ void CreateBatchToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v1::Batc
|
||||
inputs[0], // block_shape
|
||||
inputs[1], // crops_begin
|
||||
inputs[2], // crops_end
|
||||
out_size);
|
||||
out_size,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(batchToSpacePrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -31,8 +31,13 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr<ngraph::No
|
||||
if (targetFormat.value != DefaultFormatForDims(inputRank).value) {
|
||||
auto reorderName = layerName + "_cldnn_in_reorder";
|
||||
auto targetDatatype = DataTypeFromPrecision(op->get_input_element_type(0));
|
||||
auto reorderPrim = cldnn::reorder(reorderName, inputPrimitive, targetFormat, targetDatatype);
|
||||
|
||||
auto reorderPrim = cldnn::reorder(reorderName,
|
||||
inputPrimitive,
|
||||
targetFormat,
|
||||
targetDatatype,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(reorderPrim);
|
||||
p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
|
||||
|
||||
@ -66,7 +71,7 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr<ngraph::No
|
||||
|
||||
auto targetShape = CldnnTensorFromIEDims(inputShape);
|
||||
|
||||
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitive, targetShape);
|
||||
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitive, targetShape, op->get_friendly_name());
|
||||
p.AddPrimitive(reshapePrim);
|
||||
p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
|
||||
|
||||
@ -75,7 +80,9 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr<ngraph::No
|
||||
|
||||
auto broadcastPrim = cldnn::broadcast(layerName,
|
||||
inputPrimitive,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)));
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
{},
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(broadcastPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -45,7 +45,8 @@ void CreateConcatOp(Program& p, const std::shared_ptr<ngraph::op::v0::Concat>& o
|
||||
layerName,
|
||||
inputPrimitives,
|
||||
GetConcatAxis(op->get_axis(), op->get_input_shape(0).size()),
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)));
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(concatPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "ngraph/op/util/op_types.hpp"
|
||||
|
||||
#include "cldnn/primitives/data.hpp"
|
||||
#include "cldnn/runtime/debug_configuration.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
|
||||
@ -169,6 +170,10 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
|
||||
if (bufIter != p.blobMemCache.end()) {
|
||||
constPrimID = bufIter->second;
|
||||
} else {
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
GPU_DEBUG_COUT << "[" << initialconstPrimID << ": constant]" << std::endl;
|
||||
}
|
||||
cldnn::memory::ptr mem = p.GetEngine().allocate_memory(constLayout, false);
|
||||
auto& stream = p.GetEngine().get_program_stream();
|
||||
cldnn::mem_lock<char> lock{mem, stream};
|
||||
@ -199,7 +204,7 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
|
||||
} else {
|
||||
std::memcpy(&buf[0], &data[0], bufSize);
|
||||
}
|
||||
p.AddPrimitive(cldnn::data(initialconstPrimID, mem));
|
||||
p.AddPrimitive(cldnn::data(initialconstPrimID, mem, op->get_friendly_name()));
|
||||
p.blobMemCache[std::make_pair(data, constDims)] = initialconstPrimID;
|
||||
constPrimID = initialconstPrimID;
|
||||
}
|
||||
|
@ -19,8 +19,13 @@ void CreateConvertLikeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Conve
|
||||
|
||||
auto outDataType = DataTypeFromPrecision(op->get_input_element_type(1));
|
||||
|
||||
auto reorderPrim = cldnn::reorder(layerName, inputPrimitives[0], cldnn::format::any, outDataType);
|
||||
|
||||
auto reorderPrim = cldnn::reorder(layerName,
|
||||
inputPrimitives[0],
|
||||
cldnn::format::any,
|
||||
outDataType,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(reorderPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
}
|
||||
@ -32,7 +37,13 @@ void CreateConvertOp(Program& p, const std::shared_ptr<ngraph::op::v0::Convert>&
|
||||
|
||||
auto outDataType = DataTypeFromPrecision(op->get_destination_type());
|
||||
|
||||
auto reorderPrim = cldnn::reorder(layerName, inputPrimitives[0], cldnn::format::any, outDataType);
|
||||
auto reorderPrim = cldnn::reorder(layerName,
|
||||
inputPrimitives[0],
|
||||
cldnn::format::any,
|
||||
outDataType,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(reorderPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -84,7 +84,8 @@ void CreateGroupConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::
|
||||
params.dilation,
|
||||
CldnnTensorFromIEDims(outDims),
|
||||
DataTypeFromPrecision(outPrecision),
|
||||
weights_have_group_dim);
|
||||
weights_have_group_dim,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(convPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
@ -112,7 +113,8 @@ void CreateConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::Convo
|
||||
params.dilation,
|
||||
CldnnTensorFromIEDims(outDims),
|
||||
DataTypeFromPrecision(outPrecision),
|
||||
weights_have_group_dim);
|
||||
weights_have_group_dim,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(convPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
@ -146,7 +148,8 @@ void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngraph::o
|
||||
std::swap(permute_order[1], permute_order[0]);
|
||||
auto permutePrim = cldnn::permute(permuteName,
|
||||
weightsName,
|
||||
ConvertPermuteOrder(permute_order, weights_rank));
|
||||
ConvertPermuteOrder(permute_order, weights_rank),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(permutePrim);
|
||||
p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
|
||||
@ -159,14 +162,15 @@ void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngraph::o
|
||||
|
||||
auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), 1);
|
||||
auto deconvPrim = cldnn::deconvolution(layerName,
|
||||
inputs[0],
|
||||
weights,
|
||||
{},
|
||||
params.groups,
|
||||
params.stride,
|
||||
params.padding,
|
||||
CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
|
||||
weights_have_group_dim);
|
||||
inputs[0],
|
||||
weights,
|
||||
{},
|
||||
params.groups,
|
||||
params.stride,
|
||||
params.padding,
|
||||
CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
|
||||
weights_have_group_dim,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(deconvPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
@ -202,7 +206,8 @@ void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngra
|
||||
std::swap(permute_order[2], permute_order[1]);
|
||||
auto permutePrim = cldnn::permute(permuteName,
|
||||
weightsName,
|
||||
ConvertPermuteOrder(permute_order, weights_rank));
|
||||
ConvertPermuteOrder(permute_order, weights_rank),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(permutePrim);
|
||||
p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
|
||||
@ -214,14 +219,15 @@ void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngra
|
||||
const bool weights_have_group_dim = true;
|
||||
|
||||
auto deconvPrim = cldnn::deconvolution(layerName,
|
||||
inputs[0],
|
||||
weights,
|
||||
{},
|
||||
params.groups,
|
||||
params.stride,
|
||||
params.padding,
|
||||
CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
|
||||
weights_have_group_dim);
|
||||
inputs[0],
|
||||
weights,
|
||||
{},
|
||||
params.groups,
|
||||
params.stride,
|
||||
params.padding,
|
||||
CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
|
||||
weights_have_group_dim,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(deconvPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
@ -247,7 +253,8 @@ void CreateDeformableConvolutionOp(Program& p, const std::shared_ptr<ngraph::op:
|
||||
params.stride,
|
||||
params.padding,
|
||||
params.dilation,
|
||||
CldnnTensorFromIEDims(outDims));
|
||||
CldnnTensorFromIEDims(outDims),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(convPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
@ -280,7 +287,8 @@ void CreateDeformableConvolutionOp(Program& p, const std::shared_ptr<ngraph::op:
|
||||
params.padding,
|
||||
params.dilation,
|
||||
CldnnTensorFromIEDims(outDims),
|
||||
kernel);
|
||||
kernel,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(defConvPrimInterp);
|
||||
p.AddInnerPrimitiveToProfiler(defConvLayerNameInterp, defConvLayerNameConv, op);
|
||||
auto defConvPrim = cldnn::deformable_conv(defConvLayerNameConv,
|
||||
@ -288,7 +296,8 @@ void CreateDeformableConvolutionOp(Program& p, const std::shared_ptr<ngraph::op:
|
||||
weights,
|
||||
{},
|
||||
params.groups,
|
||||
CldnnTensorFromIEDims(outDims));
|
||||
CldnnTensorFromIEDims(outDims),
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(defConvPrim);
|
||||
p.AddPrimitiveToProfiler(defConvLayerNameConv, op);
|
||||
}
|
||||
@ -313,7 +322,8 @@ void CreateBinaryConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1:
|
||||
CldnnTensorFromIEDims(outDims),
|
||||
params.groups,
|
||||
op->get_pad_value(),
|
||||
calc_precision);
|
||||
calc_precision,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(convPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "cldnn/primitives/ctc_greedy_decoder.hpp"
|
||||
#include "cldnn/primitives/reorder.hpp"
|
||||
#include "cldnn/primitives/mutable_data.hpp"
|
||||
#include "cldnn/runtime/debug_configuration.hpp"
|
||||
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
@ -33,7 +34,10 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::No
|
||||
auto preprocessPrim = cldnn::reorder(reorderPrimName,
|
||||
inputPrimitives[portIndex],
|
||||
targetFormat,
|
||||
cldnn::data_types::i32);
|
||||
cldnn::data_types::i32,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(preprocessPrim);
|
||||
p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
|
||||
reorderedInputs[portIndex] = (reorderPrimName);
|
||||
@ -70,11 +74,16 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::No
|
||||
DefaultFormatForDims(op->get_output_shape(1).size()),
|
||||
CldnnTensorFromIEDims(op->get_output_shape(1)));
|
||||
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
|
||||
}
|
||||
shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayout));
|
||||
|
||||
cldnn::primitive_id ctc_gd_mutable_id_w = layer_type_name_ID(op) + "_md_write";
|
||||
auto ctc_gd_mutable_prim = cldnn::mutable_data(ctc_gd_mutable_id_w, shared_memory[0]);
|
||||
p.primitivesToIRLayersMap[ctc_gd_mutable_id_w] = { op->get_friendly_name() };
|
||||
auto ctc_gd_mutable_prim = cldnn::mutable_data(ctc_gd_mutable_id_w,
|
||||
shared_memory[0],
|
||||
op->get_friendly_name());
|
||||
p.primitiveIDs[ctc_gd_mutable_id_w] = ctc_gd_mutable_id_w;
|
||||
p.AddPrimitive(ctc_gd_mutable_prim);
|
||||
reorderedInputs.push_back(ctc_gd_mutable_id_w);
|
||||
@ -86,7 +95,8 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::No
|
||||
reorderedInputs,
|
||||
blank_index,
|
||||
ctc_merge_repeated,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)));
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
op->get_friendly_name());
|
||||
|
||||
// clDNN primitive supports only i32 as output data type
|
||||
primitive.output_data_type = DataTypeFromPrecision(ngraph::element::i32);
|
||||
@ -99,8 +109,10 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::No
|
||||
|
||||
if (num_output == 2) {
|
||||
cldnn::primitive_id ctc_gd_mutable_id_r = layer_type_name_ID(op) + ".1";
|
||||
auto ctc_gd_mutable_prim_r = cldnn::mutable_data(ctc_gd_mutable_id_r, { CTCGreedyDecoderLayerName }, shared_memory[0]);
|
||||
p.primitivesToIRLayersMap[ctc_gd_mutable_id_r] = { op->get_friendly_name() };
|
||||
auto ctc_gd_mutable_prim_r = cldnn::mutable_data(ctc_gd_mutable_id_r,
|
||||
{ CTCGreedyDecoderLayerName },
|
||||
shared_memory[0],
|
||||
op->get_friendly_name());
|
||||
p.primitiveIDs[ctc_gd_mutable_id_r] = ctc_gd_mutable_id_r;
|
||||
p.AddPrimitive(ctc_gd_mutable_prim_r);
|
||||
}
|
||||
|
@ -63,7 +63,8 @@ void CreateCumSumOp(Program& p, const std::shared_ptr<ngraph::op::v0::CumSum>& o
|
||||
inputPrimitives[0],
|
||||
GetCumSumAxis(axis, rank),
|
||||
exclusive,
|
||||
reverse);
|
||||
reverse,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(primitive);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -145,7 +145,10 @@ void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCu
|
||||
reorderPrimName,
|
||||
inputPrimitives[param.portIndex],
|
||||
param.format,
|
||||
DataTypeFromPrecision(op->get_input_element_type(param.portIndex)));
|
||||
DataTypeFromPrecision(op->get_input_element_type(param.portIndex)),
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(preprocessPrim);
|
||||
p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
|
||||
@ -229,7 +232,8 @@ void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCu
|
||||
customLayer->CompilerOptions(),
|
||||
outputLayout,
|
||||
gws,
|
||||
lws);
|
||||
lws,
|
||||
op->get_friendly_name());
|
||||
|
||||
auto prevLayerName = genericLayerName;
|
||||
if (outputLayout.format != cldnn::format::any) {
|
||||
@ -239,7 +243,10 @@ void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCu
|
||||
cldnn::reorder(reorderPrimName,
|
||||
genericLayerName,
|
||||
DefaultFormatForDims(op->get_output_shape(0).size()),
|
||||
customPrim.output_layout.data_type));
|
||||
customPrim.output_layout.data_type,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name()));
|
||||
prevLayerName = reorderPrimName;
|
||||
p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
|
||||
}
|
||||
|
@ -33,7 +33,8 @@ void CreateDepthToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v0::Dept
|
||||
auto depthToSpacePrim = cldnn::depth_to_space(layerName,
|
||||
inputPrimitives[0],
|
||||
blockSize,
|
||||
mode);
|
||||
mode,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(depthToSpacePrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -75,7 +75,8 @@ void CreateDetectionOutputOp(Program& p, const std::shared_ptr<ngraph::op::v0::D
|
||||
input_height,
|
||||
decrease_label_id,
|
||||
clip_before_nms,
|
||||
clip_after_nms);
|
||||
clip_after_nms,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(detectionPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -46,7 +46,13 @@ void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cl
|
||||
if (targetFormat.value != DefaultFormatForDims(inputRank).value) {
|
||||
auto reorderName = layerName + "_cldnn_in" + std::to_string(i) + "_reorder";
|
||||
auto targetDatatype = DataTypeFromPrecision(op->get_input_element_type(i));
|
||||
auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype);
|
||||
auto reorderPrim = cldnn::reorder(reorderName,
|
||||
inputPrimitives[i],
|
||||
targetFormat,
|
||||
targetDatatype,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(reorderPrim);
|
||||
p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
|
||||
@ -61,7 +67,7 @@ void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cl
|
||||
|
||||
auto targetShape = CldnnTensorFromIEDims(inputShape);
|
||||
|
||||
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape);
|
||||
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape, op->get_friendly_name());
|
||||
p.AddPrimitive(reshapePrim);
|
||||
p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
|
||||
|
||||
@ -74,7 +80,8 @@ void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cl
|
||||
inputPrimitives,
|
||||
mode,
|
||||
{},
|
||||
out_dt);
|
||||
out_dt,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(eltwisePrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -49,7 +49,10 @@ void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptr<ngraph::op
|
||||
auto preprocessPrim = cldnn::reorder(reorderPrimName,
|
||||
inputPrimitives[portIndex],
|
||||
targetFormat,
|
||||
cldnn::data_types::i32);
|
||||
cldnn::data_types::i32,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(preprocessPrim);
|
||||
p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
|
||||
reorderedInputs[portIndex] = (reorderPrimName);
|
||||
@ -62,7 +65,8 @@ void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptr<ngraph::op
|
||||
reorderedInputs,
|
||||
cldnn::embedding_bag::offsets_sum,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
defaultIndex);
|
||||
defaultIndex,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(embeddingBagPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
@ -86,7 +90,10 @@ void CreateEmbeddingBagPackedSumOp(Program& p, const std::shared_ptr<ngraph::op:
|
||||
auto preprocessPrim = cldnn::reorder(reorderPrimName,
|
||||
inputPrimitives[portIndex],
|
||||
targetFormat,
|
||||
cldnn::data_types::i32);
|
||||
cldnn::data_types::i32,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(preprocessPrim);
|
||||
p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
|
||||
reorderedInputs[portIndex] = (reorderPrimName);
|
||||
@ -98,7 +105,9 @@ void CreateEmbeddingBagPackedSumOp(Program& p, const std::shared_ptr<ngraph::op:
|
||||
auto embeddingBagPrim = cldnn::embedding_bag(layerName,
|
||||
reorderedInputs,
|
||||
cldnn::embedding_bag::packed_sum,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)));
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
-1,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(embeddingBagPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
@ -140,7 +149,10 @@ void CreateEmbeddingSegmentsSumOp(Program& p, const std::shared_ptr<ngraph::op::
|
||||
auto preprocessPrim = cldnn::reorder(reorderPrimName,
|
||||
inputPrimitives[portIndex],
|
||||
targetFormat,
|
||||
cldnn::data_types::i32);
|
||||
cldnn::data_types::i32,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(preprocessPrim);
|
||||
p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
|
||||
reorderedInputs[portIndex] = (reorderPrimName);
|
||||
@ -153,7 +165,8 @@ void CreateEmbeddingSegmentsSumOp(Program& p, const std::shared_ptr<ngraph::op::
|
||||
reorderedInputs,
|
||||
cldnn::embedding_bag::segments_sum,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
defaultIndex);
|
||||
defaultIndex,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(embeddingBagPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -38,7 +38,8 @@ void CreateExtractImagePatchesOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
strides,
|
||||
rates,
|
||||
auto_pad,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)));
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(extractImagePatchesPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -31,7 +31,8 @@ void CreateFakeQuantizeOp(Program& p, const std::shared_ptr<ngraph::op::v0::Fake
|
||||
output_low_id,
|
||||
output_high_id,
|
||||
levels,
|
||||
dt);
|
||||
dt,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(quantizationPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -30,7 +30,10 @@ void CreateGatherTreeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Gather
|
||||
auto preprocessPrim = cldnn::reorder(reorderPrimName,
|
||||
inputPrimitives[portIndex],
|
||||
targetFormat,
|
||||
cldnn::data_types::i32);
|
||||
cldnn::data_types::i32,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(preprocessPrim);
|
||||
p.AddInnerPrimitiveToProfiler(reorderPrimName, layerName, op);
|
||||
reorderedInputs[portIndex] = reorderPrimName;
|
||||
@ -43,7 +46,8 @@ void CreateGatherTreeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Gather
|
||||
reorderedInputs[0],
|
||||
reorderedInputs[1],
|
||||
reorderedInputs[2],
|
||||
reorderedInputs[3]);
|
||||
reorderedInputs[3],
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(gatherTreePrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -77,7 +77,10 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t
|
||||
auto preprocessPrim = cldnn::reorder(reorderPrimName,
|
||||
inputPrimitives[portIndex],
|
||||
targetFormat,
|
||||
cldnn::data_types::i32);
|
||||
cldnn::data_types::i32,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(preprocessPrim);
|
||||
p.AddInnerPrimitiveToProfiler(reorderPrimName, layerName, op);
|
||||
reorderedInputs[portIndex] = reorderPrimName;
|
||||
@ -94,7 +97,8 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t
|
||||
outLayout,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
batch_dim,
|
||||
support_neg_ind);
|
||||
support_neg_ind,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(gatherPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -55,7 +55,8 @@ void CreateGatherElementsOp(Program& p, const std::shared_ptr<ngraph::op::v6::Ga
|
||||
inputPrimitives[1],
|
||||
outLayout,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
GetGatherAxis(axis, rank));
|
||||
GetGatherAxis(axis, rank),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(primitive);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -22,10 +22,11 @@ void CreateGatherNDOp(Program& p, const std::shared_ptr<ngraph::op::v5::GatherND
|
||||
auto batch_dims = op->get_batch_dims();
|
||||
|
||||
auto primitive = cldnn::gather_nd(layerName,
|
||||
inputPrimitives[0],
|
||||
inputPrimitives[1],
|
||||
indices_rank,
|
||||
batch_dims);
|
||||
inputPrimitives[0],
|
||||
inputPrimitives[1],
|
||||
indices_rank,
|
||||
batch_dims,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(primitive);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -19,7 +19,8 @@ void CreateGRNOp(Program& p, const std::shared_ptr<ngraph::op::v0::GRN>& op) {
|
||||
auto primitive = cldnn::grn(layerName,
|
||||
inputPrimitives[0],
|
||||
op->get_bias(),
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)));
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(primitive);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -193,7 +193,8 @@ void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4::Inter
|
||||
cldnnSampleType,
|
||||
shapeCalcMode,
|
||||
coordTransMode,
|
||||
nearestMode);
|
||||
nearestMode,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(resamplePrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -29,11 +29,11 @@ using Loop = ngraph::op::v5::Loop;
|
||||
namespace CLDNNPlugin {
|
||||
|
||||
template<class DATA_TYPE>
|
||||
static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) {
|
||||
static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num, const cldnn::primitive_id& ext_prim_id) {
|
||||
auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
|
||||
cldnn::mem_lock<int64_t> ptr{mem, p.GetEngine().get_program_stream()};
|
||||
*ptr.begin() = num;
|
||||
return {id, mem};
|
||||
return {id, mem, ext_prim_id};
|
||||
}
|
||||
|
||||
static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::shared_ptr<ngraph::Node>& op,
|
||||
@ -44,7 +44,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha
|
||||
const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx));
|
||||
cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
|
||||
auto mem = p.GetEngine().allocate_memory(output_layout);
|
||||
auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency
|
||||
auto md = cldnn::mutable_data(id, {input}, mem, op->get_friendly_name()); // cldnn::data cannot set dependency
|
||||
return md;
|
||||
}
|
||||
|
||||
@ -161,8 +161,7 @@ void CreateLoopOp(Program& p, const std::shared_ptr<Loop>& op) {
|
||||
}
|
||||
const cldnn::primitive_id num_iteration_id = layerName + "_numIteration";
|
||||
{
|
||||
cldnn::mutable_data num_iteration = CreateScalarData<cldnn::mutable_data>(p, num_iteration_id, 0);
|
||||
p.primitivesToIRLayersMap[num_iteration_id] = { op->get_friendly_name() };
|
||||
cldnn::mutable_data num_iteration = CreateScalarData<cldnn::mutable_data>(p, num_iteration_id, 0, op->get_friendly_name());
|
||||
p.primitiveIDs[num_iteration_id] = num_iteration_id;
|
||||
p.AddPrimitive(num_iteration);
|
||||
p.AddInnerPrimitiveToProfiler(num_iteration_id, layerName, op);
|
||||
@ -216,7 +215,8 @@ void CreateLoopOp(Program& p, const std::shared_ptr<Loop>& op) {
|
||||
back_edges, /* back edge mapping */
|
||||
num_iterations, /* max iteration, i.e. length of iteration axis */
|
||||
body_current_iteration_id,
|
||||
body_execution_condition_id);
|
||||
body_execution_condition_id,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(loopPrimitive);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -38,7 +38,8 @@ void CreateLRNOp(Program& p, const std::shared_ptr<ngraph::op::v0::LRN>& op) {
|
||||
static_cast<float>(op->get_bias()),
|
||||
static_cast<float>(op->get_alpha()),
|
||||
static_cast<float>(op->get_beta()),
|
||||
GetNormRegion(axis_value));
|
||||
GetNormRegion(axis_value),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(lrnPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -89,7 +89,8 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
|
||||
auto permuteName = op->get_friendly_name() + "/transpose_b";
|
||||
auto permutePrim = cldnn::permute(permuteName,
|
||||
weightsName,
|
||||
cldnn_permute_order);
|
||||
cldnn_permute_order,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(permutePrim);
|
||||
p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
|
||||
weightsName = permuteName;
|
||||
@ -108,7 +109,8 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
|
||||
auto permuteName = op->get_friendly_name() + "/transpose_a";
|
||||
auto permutePrim = cldnn::permute(permuteName,
|
||||
inputName,
|
||||
cldnn_permute_order);
|
||||
cldnn_permute_order,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(permutePrim);
|
||||
p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
|
||||
inputName = permuteName;
|
||||
@ -124,7 +126,10 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
|
||||
IE_THROW() << "Inconsistent reshape in Matmul op: " << op->get_friendly_name();
|
||||
|
||||
auto reshapeInName = op->get_friendly_name() + suffix;
|
||||
auto reshapeInPrim = cldnn::reshape(reshapeInName, inputName, CldnnTensorFromIEDims(reshapeSize));
|
||||
auto reshapeInPrim = cldnn::reshape(reshapeInName,
|
||||
inputName,
|
||||
CldnnTensorFromIEDims(reshapeSize),
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(reshapeInPrim);
|
||||
p.AddInnerPrimitiveToProfiler(reshapeInName, layerName, op);
|
||||
return reshapeInName;
|
||||
@ -144,6 +149,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
|
||||
weightsName,
|
||||
"",
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)),
|
||||
op->get_friendly_name(),
|
||||
cldnn::padding(),
|
||||
input_rank);
|
||||
|
||||
@ -153,7 +159,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
|
||||
if (reshape_fc) {
|
||||
auto outputShape = CldnnTensorFromIEDims(op->get_output_shape(0));
|
||||
auto outReshapeName = layerName + "_cldnn_out_reshape";
|
||||
auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape);
|
||||
auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape, op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(outReshapePrim);
|
||||
p.AddInnerPrimitiveToProfiler(outReshapeName, layerName, op);
|
||||
@ -188,7 +194,13 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
|
||||
if (targetFormat.value != DefaultFormatForDims(inputDimsN).value) {
|
||||
auto reorderName = layerName + "_cldnn_in" + std::to_string(i) + "_reorder";
|
||||
auto targetDatatype = DataTypeFromPrecision(op->get_output_element_type(0));
|
||||
auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype);
|
||||
auto reorderPrim = cldnn::reorder(reorderName,
|
||||
inputPrimitives[i],
|
||||
targetFormat,
|
||||
targetDatatype,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(reorderPrim);
|
||||
p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
|
||||
@ -227,7 +239,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
|
||||
|
||||
auto targetShape = gemmSpecificTensor(inputDims);
|
||||
|
||||
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape);
|
||||
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape, op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(reshapePrim);
|
||||
p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
|
||||
@ -248,7 +260,8 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
|
||||
transA,
|
||||
transB,
|
||||
alpha,
|
||||
beta);
|
||||
beta,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(gemmPrim);
|
||||
|
||||
@ -258,7 +271,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
|
||||
if (outDimsN < 4) {
|
||||
auto outputShape = CldnnTensorFromIEDims(outDims);
|
||||
auto outReshapeName = layerName + "_cldnn_out_reshape";
|
||||
auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape);
|
||||
auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape, op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(outReshapePrim);
|
||||
p.AddInnerPrimitiveToProfiler(outReshapeName, layerName, op);
|
||||
|
@ -24,7 +24,8 @@ static void CreateCommonMVNOp(Program& p, const std::shared_ptr<ngraph::Node>& o
|
||||
normalize_variance,
|
||||
eps,
|
||||
eps_inside_sqrt,
|
||||
across_channels);
|
||||
across_channels,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(mvnPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "cldnn/primitives/reorder.hpp"
|
||||
#include "cldnn/primitives/mutable_data.hpp"
|
||||
#include "cldnn/primitives/non_max_suppression.hpp"
|
||||
#include "cldnn/runtime/debug_configuration.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
|
||||
@ -41,7 +42,10 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
|
||||
auto preprocessPrim = cldnn::reorder(reorderPrimName,
|
||||
inputPrimitives[portIndex],
|
||||
targetFormat,
|
||||
cldnn::data_types::i32);
|
||||
cldnn::data_types::i32,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(preprocessPrim);
|
||||
p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
|
||||
reorderedInputs[portIndex] = (reorderPrimName);
|
||||
@ -63,6 +67,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
|
||||
std::size_t num_output = op->get_output_size();
|
||||
|
||||
std::vector<cldnn::memory::ptr> shared_memory;
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
switch (num_output) {
|
||||
case 3: {
|
||||
auto mutable_precision_second = op->get_output_element_type(2);
|
||||
@ -74,11 +79,15 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
|
||||
DefaultFormatForDims(op->get_output_shape(2).size()),
|
||||
CldnnTensorFromIEDims(op->get_output_shape(2)));
|
||||
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
|
||||
}
|
||||
shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutSecond));
|
||||
|
||||
cldnn::primitive_id non_max_supression_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second";
|
||||
auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second, shared_memory.back());
|
||||
p.primitivesToIRLayersMap[non_max_supression_mutable_id_w_second] = { op->get_friendly_name() };
|
||||
auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second,
|
||||
shared_memory.back(),
|
||||
op->get_friendly_name());
|
||||
p.primitiveIDs[non_max_supression_mutable_id_w_second] = non_max_supression_mutable_id_w_second;
|
||||
p.AddPrimitive(nms_mutable_prim_second);
|
||||
inputPrimitives.push_back(non_max_supression_mutable_id_w_second);
|
||||
@ -91,11 +100,15 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
|
||||
cldnn::format::bfyx,
|
||||
cldnn::tensor(static_cast<int32_t>(outputIndices), 3, 1, 1));
|
||||
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
|
||||
}
|
||||
shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutFirst));
|
||||
|
||||
cldnn::primitive_id non_max_supression_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first";
|
||||
auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first, shared_memory.back());
|
||||
p.primitivesToIRLayersMap[non_max_supression_mutable_id_w_first] = { op->get_friendly_name() };
|
||||
auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first,
|
||||
shared_memory.back(),
|
||||
op->get_friendly_name());
|
||||
p.primitiveIDs[non_max_supression_mutable_id_w_first] = non_max_supression_mutable_id_w_first;
|
||||
p.AddPrimitive(nms_mutable_prim_first);
|
||||
inputPrimitives.push_back(non_max_supression_mutable_id_w_first);
|
||||
@ -112,7 +125,9 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
|
||||
reorderedInputs[1],
|
||||
static_cast<int>(outputIndices),
|
||||
op->m_center_point_box,
|
||||
op->m_sort_result_descending);
|
||||
op->m_sort_result_descending,
|
||||
"", "", "", "", "", "",
|
||||
op->get_friendly_name());
|
||||
|
||||
prim.output_data_type = DataTypeFromPrecision(out_type);
|
||||
|
||||
@ -136,15 +151,19 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngrap
|
||||
switch (num_output) {
|
||||
case 3: {
|
||||
cldnn::primitive_id non_max_supression_id_r_second = layer_type_name_ID(op) + ".2";
|
||||
auto nms_mutable_prim_r_second = cldnn::mutable_data(non_max_supression_id_r_second, { nonMaxSupressionLayerName }, shared_memory.front());
|
||||
p.primitivesToIRLayersMap[non_max_supression_id_r_second] = { op->get_friendly_name() };
|
||||
auto nms_mutable_prim_r_second = cldnn::mutable_data(non_max_supression_id_r_second,
|
||||
{ nonMaxSupressionLayerName },
|
||||
shared_memory.front(),
|
||||
op->get_friendly_name());
|
||||
p.primitiveIDs[non_max_supression_id_r_second] = non_max_supression_id_r_second;
|
||||
p.AddPrimitive(nms_mutable_prim_r_second);
|
||||
}
|
||||
case 2: {
|
||||
cldnn::primitive_id non_max_supression_id_r_first = layer_type_name_ID(op) + ".1";
|
||||
auto nms_mutable_prim_r_first = cldnn::mutable_data(non_max_supression_id_r_first, { nonMaxSupressionLayerName }, shared_memory.back());
|
||||
p.primitivesToIRLayersMap[non_max_supression_id_r_first] = { op->get_friendly_name() };
|
||||
auto nms_mutable_prim_r_first = cldnn::mutable_data(non_max_supression_id_r_first,
|
||||
{ nonMaxSupressionLayerName },
|
||||
shared_memory.back(),
|
||||
op->get_friendly_name());
|
||||
p.primitiveIDs[non_max_supression_id_r_first] = non_max_supression_id_r_first;
|
||||
p.AddPrimitive(nms_mutable_prim_r_first);
|
||||
}
|
||||
|
@ -45,14 +45,15 @@ void CreateNormalizeL2Op(Program& p, const std::shared_ptr<ngraph::op::v0::Norma
|
||||
|
||||
std::memcpy(&buf[0], scale->get_data_ptr(), bufSize);
|
||||
auto scalesName = layerName + "_cldnn_input_scales";
|
||||
p.AddPrimitive(cldnn::data(scalesName, mem));
|
||||
p.AddPrimitive(cldnn::data(scalesName, mem, op->get_friendly_name()));
|
||||
p.AddInnerPrimitiveToProfiler(scalesName, layerName, op);
|
||||
|
||||
auto normPrim = cldnn::normalize(layerName,
|
||||
inputPrimitives[0],
|
||||
scalesName,
|
||||
across_spatial,
|
||||
eps);
|
||||
eps,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(normPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -53,7 +53,8 @@ void CreateOneHotOp(Program& p, const std::shared_ptr<ngraph::op::v1::OneHot>& o
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)),
|
||||
static_cast<uint16_t>(axis),
|
||||
on_value,
|
||||
off_value);
|
||||
off_value,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(oneHotPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -66,7 +66,8 @@ void CreatePadOp(Program& p, const std::shared_ptr<ngraph::op::v1::Pad>& op) {
|
||||
pads_begin,
|
||||
pads_end,
|
||||
border_mode,
|
||||
pad_value);
|
||||
pad_value,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(tilePrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -195,8 +195,8 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
|
||||
cldnn::format::nv12, { 1, 1, width, height });
|
||||
cldnn::layout uv_layout(DataTypeFromPrecision(ip),
|
||||
cldnn::format::nv12, { 1, 2, width / 2, height / 2 });
|
||||
auto inputY = cldnn::input_layout(y_name, y_layout);
|
||||
auto inputUV = cldnn::input_layout(uv_name, uv_layout);
|
||||
auto inputY = cldnn::input_layout(y_name, y_layout, inputInfo->name());
|
||||
auto inputUV = cldnn::input_layout(uv_name, uv_layout, inputInfo->name());
|
||||
|
||||
p.AddPrimitive(inputY);
|
||||
p.inputLayouts.insert({ inputInfo->name() + "_Y" + std::to_string(i), y_layout });
|
||||
@ -205,20 +205,29 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
|
||||
switch (preProcess.getMeanVariant()) {
|
||||
case NONE:
|
||||
case MEAN_VALUE: {
|
||||
p.AddPrimitive(cldnn::reorder(preprocessPrimID, y_name, uv_name, networkInputLayout, meanValues));
|
||||
p.AddPrimitive(cldnn::reorder(preprocessPrimID,
|
||||
y_name,
|
||||
uv_name,
|
||||
networkInputLayout,
|
||||
meanValues,
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
inputInfo->name()));
|
||||
break;
|
||||
}
|
||||
case MEAN_IMAGE: {
|
||||
p.AddPrimitive(cldnn::reorder(preprocessPrimID, y_name, uv_name, networkInputLayout, meanBlobID));
|
||||
p.AddPrimitive(cldnn::reorder(preprocessPrimID,
|
||||
y_name,
|
||||
uv_name,
|
||||
networkInputLayout,
|
||||
meanBlobID,
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
inputInfo->name()));
|
||||
break;
|
||||
}
|
||||
default: IE_THROW(Unexpected) << "Invalid mean variant in input " + inputName;
|
||||
break;
|
||||
}
|
||||
|
||||
p.primitivesToIRLayersMap[preprocessPrimID] = { inputInfo->name() };
|
||||
p.primitivesToIRLayersMap[y_name] = { inputInfo->name() };
|
||||
p.primitivesToIRLayersMap[uv_name] = { inputInfo->name() };
|
||||
p.profilingIDs.push_back(preprocessPrimID);
|
||||
p.InitProfileInfo(preprocessPrimID, "Reorder");
|
||||
p.primitiveIDs[inputName] = preprocessPrimID; // If it is batched blob, it will be overwritten afterwards.
|
||||
@ -228,7 +237,7 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
|
||||
|
||||
if (inputDims[0] > 1) {
|
||||
auto concatPrimID = "concat:" + inputName + Program::m_preProcessTag;
|
||||
p.AddPrimitive(cldnn::concatenation(concatPrimID, reorders, cldnn::concatenation::along_b));
|
||||
p.AddPrimitive(cldnn::concatenation(concatPrimID, reorders, cldnn::concatenation::along_b, op->get_friendly_name()));
|
||||
p.primitiveIDs[inputName] = concatPrimID;
|
||||
}
|
||||
} else {
|
||||
@ -237,20 +246,26 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
|
||||
inputLayout.data_type = DataTypeFromPrecision(ip);
|
||||
p.inputLayouts.insert({ inputInfo->name(), inputLayout });
|
||||
|
||||
p.AddPrimitive(cldnn::input_layout(inputName, inputLayout));
|
||||
p.primitivesToIRLayersMap[inputName] = { inputInfo->name() };
|
||||
p.AddPrimitive(cldnn::input_layout(inputName, inputLayout, inputInfo->name()));
|
||||
|
||||
switch (preProcess.getMeanVariant()) {
|
||||
case NONE:
|
||||
case MEAN_VALUE: {
|
||||
p.AddPrimitive(cldnn::reorder(preprocessPrimID, inputName, networkInputLayout, meanValues));
|
||||
p.AddPrimitive(cldnn::reorder(preprocessPrimID,
|
||||
inputName,
|
||||
networkInputLayout,
|
||||
meanValues,
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name()));
|
||||
break;
|
||||
}
|
||||
case MEAN_IMAGE: {
|
||||
p.AddPrimitive(cldnn::reorder(preprocessPrimID,
|
||||
inputName,
|
||||
networkInputLayout,
|
||||
meanBlobID));
|
||||
inputName,
|
||||
networkInputLayout,
|
||||
meanBlobID,
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name()));
|
||||
break;
|
||||
}
|
||||
default: IE_THROW() << "Invalid mean variant in input " << inputName;
|
||||
|
@ -70,7 +70,8 @@ void CreateAvgPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::AvgPool>&
|
||||
params.stride,
|
||||
params.pad_begin,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)));
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)),
|
||||
op->get_friendly_name());
|
||||
poolPrim.pad_end = params.pad_end;
|
||||
p.AddPrimitive(poolPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
@ -89,7 +90,8 @@ void CreateMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::MaxPool>&
|
||||
params.stride,
|
||||
params.pad_begin,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)));
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)),
|
||||
op->get_friendly_name());
|
||||
poolPrim.pad_end = params.pad_end;
|
||||
p.AddPrimitive(poolPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -54,7 +54,8 @@ void CreatePriorBoxClusteredOp(Program& p, const std::shared_ptr<ngraph::op::v0:
|
||||
offset,
|
||||
width,
|
||||
height,
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)));
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(priorBoxPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
@ -103,7 +104,8 @@ void CreatePriorBoxOp(Program& p, const std::shared_ptr<ngraph::op::v0::PriorBox
|
||||
scale_all_sizes,
|
||||
fixed_ratio,
|
||||
fixed_size,
|
||||
density);
|
||||
density,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(priorBoxPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "cldnn/primitives/proposal.hpp"
|
||||
#include "cldnn/primitives/mutable_data.hpp"
|
||||
#include "cldnn/runtime/debug_configuration.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
|
||||
@ -62,11 +63,16 @@ void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::Proposal
|
||||
DefaultFormatForDims(op->get_output_shape(1).size()),
|
||||
CldnnTensorFromIEDims(op->get_output_shape(1)));
|
||||
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
|
||||
}
|
||||
auto shared_memory = p.GetEngine().allocate_memory(mutableLayout);
|
||||
|
||||
cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(op) + "_md_write";
|
||||
auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w, shared_memory);
|
||||
p.primitivesToIRLayersMap[proposal_mutable_id_w] = { op->get_friendly_name() };
|
||||
auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w,
|
||||
shared_memory,
|
||||
op->get_friendly_name());
|
||||
p.primitiveIDs[proposal_mutable_id_w] = proposal_mutable_id_w;
|
||||
p.AddPrimitive(argmax_mutable_prim);
|
||||
inputPrimitives.push_back(proposal_mutable_id_w);
|
||||
@ -96,13 +102,16 @@ void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::Proposal
|
||||
clip_after_nms,
|
||||
round_ratios,
|
||||
shift_anchors,
|
||||
normalize);
|
||||
normalize,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(proposalPrim);
|
||||
|
||||
cldnn::primitive_id proposal_mutable_id_r = layer_type_name_ID(op) + ".1";
|
||||
auto argmax_mutable_prim_r = cldnn::mutable_data(proposal_mutable_id_r, { proposalLayerName }, shared_memory);
|
||||
p.primitivesToIRLayersMap[proposal_mutable_id_r] = { op->get_friendly_name() };
|
||||
auto argmax_mutable_prim_r = cldnn::mutable_data(proposal_mutable_id_r,
|
||||
{ proposalLayerName },
|
||||
shared_memory,
|
||||
op->get_friendly_name());
|
||||
p.primitiveIDs[proposal_mutable_id_r] = proposal_mutable_id_r;
|
||||
p.AddPrimitive(argmax_mutable_prim_r);
|
||||
|
||||
@ -134,7 +143,8 @@ void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::Proposal
|
||||
clip_after_nms,
|
||||
round_ratios,
|
||||
shift_anchors,
|
||||
normalize);
|
||||
normalize,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(proposalPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -75,7 +75,8 @@ void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::
|
||||
inputPrimitives[0],
|
||||
mode,
|
||||
axes,
|
||||
static_cast<int32_t>(keep_dims));
|
||||
static_cast<int32_t>(keep_dims),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(reducePrim);
|
||||
|
||||
@ -96,7 +97,7 @@ void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::
|
||||
outTensor = cldnn::tensor(TensorValue(out_shape[0]), TensorValue(out_shape[1]),
|
||||
1, TensorValue(out_shape[2]));
|
||||
}
|
||||
auto reshape_prim = cldnn::reshape(resultLayerName, layerName, outTensor);
|
||||
auto reshape_prim = cldnn::reshape(resultLayerName, layerName, outTensor, op->get_friendly_name());
|
||||
p.AddPrimitive(reshape_prim);
|
||||
p.AddPrimitiveToProfiler(op, resultLayerName);
|
||||
}
|
||||
@ -112,7 +113,13 @@ void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::
|
||||
else if (rank - rawAxes.size() <= 4)
|
||||
out_format = cldnn::format::bfyx;
|
||||
|
||||
auto reorder_prim = cldnn::reorder(reorderLayerName, resultLayerName, out_format, out_dt);
|
||||
auto reorder_prim = cldnn::reorder(reorderLayerName,
|
||||
resultLayerName,
|
||||
out_format,
|
||||
out_dt,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(reorder_prim);
|
||||
p.AddPrimitiveToProfiler(op, reorderLayerName);
|
||||
} else {
|
||||
|
@ -28,7 +28,8 @@ void CreateRegionYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0::Region
|
||||
classes,
|
||||
num,
|
||||
mask_size,
|
||||
do_softmax);
|
||||
do_softmax,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(regionPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -20,7 +20,8 @@ void CreateReorgYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0::ReorgYo
|
||||
|
||||
auto reorgPrim = cldnn::reorg_yolo(layerName,
|
||||
inputPrimitives[0],
|
||||
stride);
|
||||
stride,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(reorgPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -36,9 +36,13 @@ void CreateCommonReshapeOp(Program& p, const std::shared_ptr<ngraph::Node>& op)
|
||||
}
|
||||
|
||||
cldnn::layout outputLayout(DataTypeFromPrecision(op->get_output_element_type(0)), outputFormat, outTensor);
|
||||
p.AddPrimitive(cldnn::reorder(reorderId, reshapeInputId, outputLayout));
|
||||
p.AddPrimitive(cldnn::reorder(reorderId,
|
||||
reshapeInputId,
|
||||
outputLayout,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name()));
|
||||
p.InitProfileInfo(reorderId, "Reorder", false, InferenceEngine::InferenceEngineProfileInfo::EXECUTED, layerName);
|
||||
p.primitivesToIRLayersMap[reorderId] = { op->get_friendly_name() };
|
||||
p.primitiveIDs[layerName + "_reorder"] = reorderId;
|
||||
p.primitiveIDs[reorderId] = reorderId;
|
||||
p.profilingIDs.push_back(reorderId);
|
||||
@ -47,7 +51,8 @@ void CreateCommonReshapeOp(Program& p, const std::shared_ptr<ngraph::Node>& op)
|
||||
|
||||
auto reshapePrim = cldnn::reshape(layerName,
|
||||
reshapeInputId,
|
||||
outTensor);
|
||||
outTensor,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(reshapePrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -56,9 +56,12 @@ void CreateResultOp(Program& p, const std::shared_ptr<ngraph::op::v0::Result>& o
|
||||
std::string outputID = inputs[0];
|
||||
|
||||
p.AddPrimitive(cldnn::reorder(outLayerName,
|
||||
outputID,
|
||||
FormatFromLayout(outputData->getLayout()),
|
||||
DataTypeFromPrecision(precision)));
|
||||
outputID,
|
||||
FormatFromLayout(outputData->getLayout()),
|
||||
DataTypeFromPrecision(precision),
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name()));
|
||||
p.InitProfileInfo(outLayerName, "reorder");
|
||||
p.profilingIDs.push_back(outLayerName);
|
||||
p.primitiveIDs[outLayerName] = outLayerName;
|
||||
|
@ -22,7 +22,8 @@ void CreateReverseSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v0::R
|
||||
inputPrimitives[0],
|
||||
inputPrimitives[1],
|
||||
seq_axis,
|
||||
batch_axis);
|
||||
batch_axis,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(reverseSequencePrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -107,8 +107,13 @@ void CreateLSTMCellOp(Program& p, const std::shared_ptr<ngraph::op::v4::LSTMCell
|
||||
cldnn::tensor inStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
|
||||
cldnn::layout inputLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inputShape);
|
||||
cldnn::layout hiddenLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inStateShape);
|
||||
p.AddPrimitive(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
|
||||
p.AddPrimitive(cldnn::reorder(permuteID, inReshapeID, inputLayout));
|
||||
p.AddPrimitive(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape, op->get_friendly_name()));
|
||||
p.AddPrimitive(cldnn::reorder(permuteID,
|
||||
inReshapeID,
|
||||
inputLayout,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name()));
|
||||
|
||||
p.AddInnerPrimitiveToProfiler(inReshapeID, op->get_friendly_name(), op);
|
||||
p.AddInnerPrimitiveToProfiler(permuteID, op->get_friendly_name(), op);
|
||||
@ -117,11 +122,24 @@ void CreateLSTMCellOp(Program& p, const std::shared_ptr<ngraph::op::v4::LSTMCell
|
||||
std::string hiddenInStr = inHiddenReorderID + "_1";
|
||||
std::string cellInResh = inHiddenReshapeID + "_2";
|
||||
std::string cellInStr = inHiddenReorderID + "_2";
|
||||
p.AddPrimitive(cldnn::reshape(hiddenInResh, inputPrimitives[1], inStateShape));
|
||||
p.AddPrimitive(cldnn::reorder(hiddenInStr, hiddenInResh, hiddenLayout));
|
||||
p.AddPrimitive(cldnn::reshape(cellInResh, inputPrimitives[2], inStateShape));
|
||||
p.AddPrimitive(cldnn::reorder(cellInStr, cellInResh, hiddenLayout));
|
||||
p.AddPrimitive(cldnn::concatenation(input_concatID, { permuteID, hiddenInStr }, cldnn::concatenation::concatenation_axis::along_x));
|
||||
p.AddPrimitive(cldnn::reshape(hiddenInResh, inputPrimitives[1], inStateShape, op->get_friendly_name()));
|
||||
p.AddPrimitive(cldnn::reorder(hiddenInStr,
|
||||
hiddenInResh,
|
||||
hiddenLayout,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name()));
|
||||
p.AddPrimitive(cldnn::reshape(cellInResh, inputPrimitives[2], inStateShape, op->get_friendly_name()));
|
||||
p.AddPrimitive(cldnn::reorder(cellInStr,
|
||||
cellInResh,
|
||||
hiddenLayout,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name()));
|
||||
p.AddPrimitive(cldnn::concatenation(input_concatID,
|
||||
{ permuteID, hiddenInStr },
|
||||
cldnn::concatenation::concatenation_axis::along_x,
|
||||
op->get_friendly_name()));
|
||||
|
||||
p.AddInnerPrimitiveToProfiler(hiddenInResh, op->get_friendly_name(), op);
|
||||
p.AddInnerPrimitiveToProfiler(hiddenInStr, op->get_friendly_name(), op);
|
||||
@ -139,14 +157,19 @@ void CreateLSTMCellOp(Program& p, const std::shared_ptr<ngraph::op::v4::LSTMCell
|
||||
std::string crop_id = layerName + "_crop";
|
||||
|
||||
cldnn::primitive_id WRconcatID = layerName + "_WRconcat";
|
||||
p.AddPrimitive(cldnn::concatenation(WRconcatID, { weightID, recurrentID }, cldnn::concatenation::concatenation_axis::along_f));
|
||||
p.AddPrimitive(cldnn::concatenation(WRconcatID, { weightID, recurrentID }, cldnn::concatenation::concatenation_axis::along_f, op->get_friendly_name()));
|
||||
p.AddInnerPrimitiveToProfiler(WRconcatID, op->get_friendly_name(), op);
|
||||
|
||||
p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, input_concatID, WRconcatID, hasBias ? biasID : ""));
|
||||
p.AddPrimitive(cldnn::reshape(gemmReshapeID, lstm_fc_id, gemmSz));
|
||||
p.AddPrimitive(cldnn::reorder(gemmReorderID, gemmReshapeID, gemmLayout));
|
||||
p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, gemmReorderID, cellInStr,
|
||||
clip, 0, activations, activation_params, cldnn::lstm_weights_order::fizo));
|
||||
p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, input_concatID, WRconcatID, hasBias ? biasID : "", op->get_friendly_name()));
|
||||
p.AddPrimitive(cldnn::reshape(gemmReshapeID, lstm_fc_id, gemmSz, op->get_friendly_name()));
|
||||
p.AddPrimitive(cldnn::reorder(gemmReorderID,
|
||||
gemmReshapeID,
|
||||
gemmLayout,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name()));
|
||||
p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, gemmReorderID, cellInStr, clip, 0, activations,
|
||||
activation_params, cldnn::lstm_weights_order::fizo, 0, op->get_friendly_name()));
|
||||
|
||||
p.AddInnerPrimitiveToProfiler(lstm_fc_id, op->get_friendly_name(), op);
|
||||
p.AddInnerPrimitiveToProfiler(gemmReshapeID, op->get_friendly_name(), op);
|
||||
@ -156,16 +179,16 @@ void CreateLSTMCellOp(Program& p, const std::shared_ptr<ngraph::op::v4::LSTMCell
|
||||
cldnn::tensor outSz = cldnn::tensor{ lstm_batch_size, lstm_hidden_size, 1, 1 };
|
||||
cldnn::primitive_id outputHiddenCropID = layerName + "_hc";
|
||||
cldnn::primitive_id outputHiddenID = layerName + ".0";
|
||||
p.AddPrimitive(cldnn::crop(outputHiddenCropID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}));
|
||||
p.AddPrimitive(cldnn::crop(outputHiddenCropID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}, op->get_friendly_name()));
|
||||
p.AddInnerPrimitiveToProfiler(outputHiddenCropID, op->get_friendly_name(), op);
|
||||
p.AddPrimitive(cldnn::reshape(outputHiddenID, outputHiddenCropID, outSz));
|
||||
p.AddPrimitive(cldnn::reshape(outputHiddenID, outputHiddenCropID, outSz, op->get_friendly_name()));
|
||||
p.AddInnerPrimitiveToProfiler(outputHiddenID, op->get_friendly_name(), op);
|
||||
|
||||
cldnn::primitive_id outputCellCropID = layerName + "_cc";
|
||||
cldnn::primitive_id outputCellID = layerName + ".1";
|
||||
p.AddPrimitive(cldnn::crop(outputCellCropID, lstm_elt_id, hiddenSz, cellCropSz));
|
||||
p.AddPrimitive(cldnn::crop(outputCellCropID, lstm_elt_id, hiddenSz, cellCropSz, op->get_friendly_name()));
|
||||
p.AddInnerPrimitiveToProfiler(outputCellCropID, op->get_friendly_name(), op);
|
||||
p.AddPrimitive(cldnn::reshape(outputCellID, outputCellCropID, outSz));
|
||||
p.AddPrimitive(cldnn::reshape(outputCellID, outputCellCropID, outSz, op->get_friendly_name()));
|
||||
p.AddInnerPrimitiveToProfiler(outputCellID, op->get_friendly_name(), op);
|
||||
|
||||
// output primitive IDs
|
||||
@ -223,11 +246,16 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v5::LSTM
|
||||
cldnn::tensor inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, 1 };
|
||||
cldnn::tensor inStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
|
||||
cldnn::layout inputLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inputShape);
|
||||
p.AddPrimitive(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
|
||||
p.AddPrimitive(cldnn::reorder(permuteID, inReshapeID, inputLayout));
|
||||
p.AddPrimitive(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape, op->get_friendly_name()));
|
||||
p.AddPrimitive(cldnn::reorder(permuteID,
|
||||
inReshapeID,
|
||||
inputLayout,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name()));
|
||||
|
||||
p.AddPrimitive(cldnn::reshape(inHiddenStateID, inputPrimitives[1], inStateShape));
|
||||
p.AddPrimitive(cldnn::reshape(inCellStateID, inputPrimitives[2], inStateShape));
|
||||
p.AddPrimitive(cldnn::reshape(inHiddenStateID, inputPrimitives[1], inStateShape, op->get_friendly_name()));
|
||||
p.AddPrimitive(cldnn::reshape(inCellStateID, inputPrimitives[2], inStateShape, op->get_friendly_name()));
|
||||
|
||||
p.AddInnerPrimitiveToProfiler(inReshapeID, op->get_friendly_name(), op);
|
||||
p.AddInnerPrimitiveToProfiler(permuteID, op->get_friendly_name(), op);
|
||||
@ -243,12 +271,12 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v5::LSTM
|
||||
cldnn::primitive_id inputCropID = layerName + "_inputCrop";
|
||||
|
||||
cldnn::primitive_id WRconcatID = layerName + "_WRconcat";
|
||||
p.AddPrimitive(cldnn::concatenation(WRconcatID, { weightID, recurrentID }, cldnn::concatenation::concatenation_axis::along_y));
|
||||
p.AddPrimitive(cldnn::concatenation(WRconcatID, { weightID, recurrentID }, cldnn::concatenation::concatenation_axis::along_y, op->get_friendly_name()));
|
||||
p.AddInnerPrimitiveToProfiler(WRconcatID, op->get_friendly_name(), op);
|
||||
|
||||
std::vector<size_t> WRreshapeSize = { 4 * size_t(lstm_hidden_size), size_t(lstm_input_size + lstm_hidden_size) };
|
||||
cldnn::primitive_id WRreshapeID = WRconcatID + "_reshape";
|
||||
auto reshapeInPrim = cldnn::reshape(WRreshapeID, WRconcatID, CldnnTensorFromIEDims(WRreshapeSize));
|
||||
auto reshapeInPrim = cldnn::reshape(WRreshapeID, WRconcatID, CldnnTensorFromIEDims(WRreshapeSize), op->get_friendly_name());
|
||||
p.AddPrimitive(reshapeInPrim);
|
||||
p.AddInnerPrimitiveToProfiler(WRreshapeID, op->get_friendly_name(), op);
|
||||
|
||||
@ -267,30 +295,35 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v5::LSTM
|
||||
cldnn::tensor crop_tensor{ inputShape.batch[0], 1, inputShape.spatial[0], inputShape.spatial[1] };
|
||||
cldnn::tensor offset_tensor{ 0, static_cast<cldnn::tensor::value_type>(seqIdx), 0, 0 };
|
||||
cldnn::primitive_id inputCrop_id = inputCropID + ":" + seqIdx_str;
|
||||
p.AddPrimitive(cldnn::crop(inputCrop_id, permuteID, crop_tensor, offset_tensor));
|
||||
p.AddPrimitive(cldnn::crop(inputCrop_id, permuteID, crop_tensor, offset_tensor, op->get_friendly_name()));
|
||||
p.AddInnerPrimitiveToProfiler(inputCrop_id, op->get_friendly_name(), op);
|
||||
|
||||
p.AddPrimitive(cldnn::concatenation(concatID, { inputCrop_id, hiddenStr }, cldnn::concatenation::concatenation_axis::along_x));
|
||||
p.AddPrimitive(cldnn::concatenation(concatID, { inputCrop_id, hiddenStr }, cldnn::concatenation::concatenation_axis::along_x, op->get_friendly_name()));
|
||||
p.AddInnerPrimitiveToProfiler(concatID, op->get_friendly_name(), op);
|
||||
p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, concatID, WRreshapeID, biasID));
|
||||
p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, concatID, WRreshapeID, biasID, op->get_friendly_name()));
|
||||
p.AddInnerPrimitiveToProfiler(lstm_fc_id, op->get_friendly_name(), op);
|
||||
|
||||
p.AddPrimitive(cldnn::reshape(lstm_fc_resh_id, lstm_fc_id, gemmSz));
|
||||
p.AddPrimitive(cldnn::reorder(lstm_fc_reor_id, lstm_fc_resh_id, gemmLayout));
|
||||
p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, lstm_fc_reor_id, cellStr,
|
||||
clip, 0, activations, activation_params, cldnn::lstm_weights_order::fizo));
|
||||
p.AddPrimitive(cldnn::reshape(lstm_fc_resh_id, lstm_fc_id, gemmSz, op->get_friendly_name()));
|
||||
p.AddPrimitive(cldnn::reorder(lstm_fc_reor_id,
|
||||
lstm_fc_resh_id,
|
||||
gemmLayout,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name()));
|
||||
p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, lstm_fc_reor_id, cellStr, clip, 0, activations,
|
||||
activation_params, cldnn::lstm_weights_order::fizo, 0, op->get_friendly_name()));
|
||||
p.AddInnerPrimitiveToProfiler(lstm_fc_resh_id, op->get_friendly_name(), op);
|
||||
p.AddInnerPrimitiveToProfiler(lstm_fc_reor_id, op->get_friendly_name(), op);
|
||||
p.AddInnerPrimitiveToProfiler(lstm_elt_id, op->get_friendly_name(), op);
|
||||
|
||||
hiddenStr = crop_id + ":hidden";
|
||||
cellStr = crop_id + ":cell";
|
||||
p.AddPrimitive(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 }));
|
||||
p.AddPrimitive(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 }, op->get_friendly_name()));
|
||||
p.AddInnerPrimitiveToProfiler(hiddenStr, op->get_friendly_name(), op);
|
||||
output_ids_offsets.push_back(hiddenStr);
|
||||
|
||||
if (i < lstm_sequence_len - 1) {
|
||||
p.AddPrimitive(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
|
||||
p.AddPrimitive(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz, op->get_friendly_name()));
|
||||
p.AddInnerPrimitiveToProfiler(cellStr, op->get_friendly_name(), op);
|
||||
} else {
|
||||
// last hidden state crop (output 2)
|
||||
@ -299,7 +332,7 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v5::LSTM
|
||||
p.primitiveIDs[outputHiddenID] = hiddenStr;
|
||||
|
||||
// last cell state crop (output 3)
|
||||
p.AddPrimitive(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
|
||||
p.AddPrimitive(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz, op->get_friendly_name()));
|
||||
cldnn::primitive_id outputCellID = layerName + ".2";
|
||||
p.AddInnerPrimitiveToProfiler(cellStr, op->get_friendly_name(), op);
|
||||
p.primitiveIDs[outputCellID] = cellStr;
|
||||
@ -310,7 +343,7 @@ void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v5::LSTM
|
||||
// concatenated hidden state (output 1)
|
||||
cldnn::primitive_id outputConcatID = layerName + ".0";
|
||||
cldnn::primitive_id concatStr = layerName + ":hiddenConcat";
|
||||
p.AddPrimitive(cldnn::concatenation(concatStr, output_ids_offsets, cldnn::concatenation::along_f));
|
||||
p.AddPrimitive(cldnn::concatenation(concatStr, output_ids_offsets, cldnn::concatenation::along_f, op->get_friendly_name()));
|
||||
|
||||
p.primitiveIDs[outputConcatID] = concatStr;
|
||||
p.primitiveIDs[layerName] = concatStr;
|
||||
|
@ -57,7 +57,8 @@ void CreateDeformablePSROIPoolingOp(Program& p, const std::shared_ptr<ngraph::op
|
||||
group_size,
|
||||
output_dim,
|
||||
spatial_bins_x,
|
||||
spatial_bins_y);
|
||||
spatial_bins_y,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(psROIPoolingPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
}
|
||||
@ -85,7 +86,8 @@ void CreatePSROIPoolingOp(Program& p, const std::shared_ptr<ngraph::op::v0::PSRO
|
||||
spatial_scale,
|
||||
output_dim,
|
||||
spatial_bins_x,
|
||||
spatial_bins_y);
|
||||
spatial_bins_y,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(psROIPoolingPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
}
|
||||
@ -110,7 +112,11 @@ void CreateROIPoolingOp(Program& p, const std::shared_ptr<ngraph::op::v0::ROIPoo
|
||||
position_sensitive,
|
||||
pooled_width,
|
||||
pooled_height,
|
||||
spatial_scale);
|
||||
spatial_scale,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(roiPoolingPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -54,10 +54,11 @@ void CreateScatterElementsUpdateOp(Program& p, const std::shared_ptr<ngraph::op:
|
||||
int32_t axis = axes_constant->cast_vector<int32_t>()[0];
|
||||
|
||||
auto primitive = cldnn::scatter_elements_update(layerName,
|
||||
inputPrimitives[0],
|
||||
inputPrimitives[1],
|
||||
inputPrimitives[2],
|
||||
GetScatterElementsUpdateAxis(axis, rank));
|
||||
inputPrimitives[0],
|
||||
inputPrimitives[1],
|
||||
inputPrimitives[2],
|
||||
GetScatterElementsUpdateAxis(axis, rank),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(primitive);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -19,10 +19,11 @@ void CreateScatterNDUpdateOp(Program& p, const std::shared_ptr<ngraph::op::v3::S
|
||||
auto indices_rank = op->get_input_shape(1).size();
|
||||
|
||||
auto primitive = cldnn::scatter_nd_update(layerName,
|
||||
inputPrimitives[0],
|
||||
inputPrimitives[1],
|
||||
inputPrimitives[2],
|
||||
indices_rank);
|
||||
inputPrimitives[0],
|
||||
inputPrimitives[1],
|
||||
inputPrimitives[2],
|
||||
indices_rank,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(primitive);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -57,7 +57,8 @@ void CreateScatterUpdateOp(Program& p, const std::shared_ptr<ngraph::op::v3::Sca
|
||||
inputPrimitives[0],
|
||||
inputPrimitives[1],
|
||||
inputPrimitives[2],
|
||||
GetScatterUpdateAxis(axis, rank));
|
||||
GetScatterUpdateAxis(axis, rank),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(primitive);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -40,7 +40,13 @@ void CreateSelectOp(Program& p, const std::shared_ptr<ngraph::op::v1::Select>& o
|
||||
if (targetFormat.value != DefaultFormatForDims(inputDimsN).value) {
|
||||
auto reorderName = layerName + "_cldnn_in" + std::to_string(i) + "_reorder";
|
||||
auto targetDatatype = DataTypeFromPrecision(op->get_input_element_type(i));
|
||||
auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype);
|
||||
auto reorderPrim = cldnn::reorder(reorderName,
|
||||
inputPrimitives[i],
|
||||
targetFormat,
|
||||
targetDatatype,
|
||||
std::vector<float>(),
|
||||
cldnn::reorder_mean_mode::subtract,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(reorderPrim);
|
||||
p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
|
||||
@ -57,7 +63,7 @@ void CreateSelectOp(Program& p, const std::shared_ptr<ngraph::op::v1::Select>& o
|
||||
|
||||
auto targetShape = CldnnTensorFromIEDims(inputDims);
|
||||
|
||||
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape);
|
||||
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape, op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(reshapePrim);
|
||||
p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
|
||||
@ -73,6 +79,7 @@ void CreateSelectOp(Program& p, const std::shared_ptr<ngraph::op::v1::Select>& o
|
||||
inputPrimitives[0],
|
||||
inputPrimitives[1],
|
||||
inputPrimitives[2],
|
||||
op->get_friendly_name(),
|
||||
cldnn::padding(),
|
||||
bc_string);
|
||||
|
||||
|
@ -36,7 +36,8 @@ void CreateShuffleChannelsOp(Program& p, const std::shared_ptr<ngraph::op::v0::S
|
||||
auto shuffleChannelsPrim = cldnn::shuffle_channels(layerName,
|
||||
inputPrimitives[0],
|
||||
group,
|
||||
axis);
|
||||
axis,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(shuffleChannelsPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -41,7 +41,8 @@ void CreateSoftmaxOp(Program& p, const std::shared_ptr<ngraph::op::v1::Softmax>&
|
||||
std::string layerName = layer_type_name_ID(op);
|
||||
auto softmaxPrim = cldnn::softmax(layerName,
|
||||
inputPrimitives[0],
|
||||
GetSoftmaxAxis(op->get_axis(), op->get_input_shape(0).size()));
|
||||
GetSoftmaxAxis(op->get_axis(), op->get_input_shape(0).size()),
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(softmaxPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
}
|
||||
@ -58,9 +59,10 @@ void CreateLogSoftmaxOp(Program& p, const std::shared_ptr<ngraph::op::v5::LogSof
|
||||
|
||||
auto softmaxPrim = cldnn::softmax(layerNameSoftmax,
|
||||
inputPrimitives[0],
|
||||
GetSoftmaxAxis(static_cast<size_t>(axis), op->get_input_shape(0).size()));
|
||||
GetSoftmaxAxis(static_cast<size_t>(axis), op->get_input_shape(0).size()),
|
||||
op->get_friendly_name());
|
||||
|
||||
auto logPrim = cldnn::activation(layerName, layerNameSoftmax, cldnn::activation_func::log);
|
||||
auto logPrim = cldnn::activation(layerName, layerNameSoftmax, cldnn::activation_func::log, {(0.0F), (0.0F)}, op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(softmaxPrim);
|
||||
p.AddPrimitive(logPrim);
|
||||
|
@ -42,7 +42,8 @@ void CreateSpaceToBatchOp(Program& p, const std::shared_ptr<ngraph::op::v1::Spac
|
||||
inputs[0], // block_shape
|
||||
inputs[1], // crops_begin
|
||||
inputs[2], // crops_end
|
||||
out_size);
|
||||
out_size,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(batchToSpacePrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -27,7 +27,8 @@ void CreateSpaceToDepthOp(Program& p, const std::shared_ptr<ngraph::op::v0::Spac
|
||||
auto spaceToDepthPrim = cldnn::space_to_depth(layerName,
|
||||
inputPrimitives[0],
|
||||
GetDepthMode(op->get_mode()),
|
||||
op->get_block_size());
|
||||
op->get_block_size(),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(spaceToDepthPrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -40,8 +40,7 @@ void CreateCommonSplitOp(Program& p, const std::shared_ptr<ngraph::Node>& op) {
|
||||
auto outTensor = CldnnTensorFromIEDims(outLayerDims, 1);
|
||||
auto offsetTensor = CldnnTensorFromIEDims(startOffset, 0);
|
||||
|
||||
auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, offsetTensor);
|
||||
p.primitivesToIRLayersMap[outLayerName] = { op->get_friendly_name() };
|
||||
auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, offsetTensor, op->get_friendly_name());
|
||||
p.primitiveIDs[outLayerName] = outLayerName;
|
||||
|
||||
p.AddPrimitive(cropPrim);
|
||||
|
@ -189,7 +189,7 @@ void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v1::Stri
|
||||
if (!new_axis_mask.empty()) {
|
||||
auto targetShape = CldnnTensorFromIEDims(reshape_pattern);
|
||||
auto reshapeInName = op->get_friendly_name() + "/Reshape_before";
|
||||
auto reshapePrim = cldnn::reshape(reshapeInName, inputPrimitives[0], targetShape);
|
||||
auto reshapePrim = cldnn::reshape(reshapeInName, inputPrimitives[0], targetShape, op->get_friendly_name());
|
||||
p.AddPrimitive(reshapePrim);
|
||||
p.AddInnerPrimitiveToProfiler(reshapeInName, layerName, op);
|
||||
inPrimitive = reshapeInName;
|
||||
@ -215,7 +215,7 @@ void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v1::Stri
|
||||
cldnn::tensor offSize = CldnnTensorFromIEDims(offset, 0);
|
||||
|
||||
|
||||
auto cropPrim = cldnn::crop(layerName, inPrimitive, refSize, offSize);
|
||||
auto cropPrim = cldnn::crop(layerName, inPrimitive, refSize, offSize, op->get_friendly_name());
|
||||
p.AddPrimitive(cropPrim);
|
||||
p.AddPrimitiveToProfiler(layerName, op);
|
||||
|
||||
@ -223,7 +223,7 @@ void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v1::Stri
|
||||
if (!shrink_axis_mask.empty()) {
|
||||
auto targetShape = CldnnTensorFromIEDims(output_shape);
|
||||
auto reshapeOutName = op->get_friendly_name() + "/Crop";
|
||||
auto reshapePrim = cldnn::reshape(reshapeOutName, layerName, targetShape);
|
||||
auto reshapePrim = cldnn::reshape(reshapeOutName, layerName, targetShape, op->get_friendly_name());
|
||||
p.AddPrimitive(reshapePrim);
|
||||
p.AddInnerPrimitiveToProfiler(reshapeOutName, layerName, op);
|
||||
}
|
||||
@ -258,7 +258,8 @@ void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v1::Stri
|
||||
end_mask,
|
||||
new_axis_mask,
|
||||
shrink_axis_mask,
|
||||
out_size);
|
||||
out_size,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(stridedSlicePrim);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
@ -27,11 +27,11 @@ using TensorIterator = ngraph::op::v0::TensorIterator;
|
||||
namespace CLDNNPlugin {
|
||||
|
||||
template<class DATA_TYPE>
|
||||
static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) {
|
||||
static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num, const cldnn::primitive_id& ext_prim_id) {
|
||||
auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } });
|
||||
cldnn::mem_lock<int64_t> ptr{mem, p.GetEngine().get_program_stream()};
|
||||
*ptr.begin() = num;
|
||||
return {id, mem};
|
||||
return {id, mem, ext_prim_id};
|
||||
}
|
||||
|
||||
static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::shared_ptr<ngraph::Node>& op,
|
||||
@ -42,7 +42,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha
|
||||
const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx));
|
||||
cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
|
||||
auto mem = p.GetEngine().allocate_memory(output_layout);
|
||||
auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency
|
||||
auto md = cldnn::mutable_data(id, {input}, mem, op->get_friendly_name()); // cldnn::data cannot set dependency
|
||||
return md;
|
||||
}
|
||||
|
||||
@ -122,24 +122,21 @@ void CreateTensorIteratorOp(Program &p, const std::shared_ptr<TensorIterator> &o
|
||||
throw std::runtime_error("tensor iterator's num_iteration cannot be negative");
|
||||
}
|
||||
{
|
||||
cldnn::data trip_count = CreateScalarData<cldnn::data>(p, trip_count_id, num_iterations);
|
||||
p.primitivesToIRLayersMap[trip_count_id] = { op->get_friendly_name() };
|
||||
cldnn::data trip_count = CreateScalarData<cldnn::data>(p, trip_count_id, num_iterations, op->get_friendly_name());
|
||||
p.primitiveIDs[trip_count_id] = trip_count_id;
|
||||
p.AddPrimitive(trip_count);
|
||||
p.AddInnerPrimitiveToProfiler(trip_count_id, layerName, op);
|
||||
}
|
||||
const cldnn::primitive_id execution_condition_id = layerName + "_initialExecutionCondition";
|
||||
{
|
||||
cldnn::mutable_data execution_condition = CreateScalarData<cldnn::mutable_data>(p, execution_condition_id, 1);
|
||||
p.primitivesToIRLayersMap[execution_condition_id] = { op->get_friendly_name() };
|
||||
cldnn::mutable_data execution_condition = CreateScalarData<cldnn::mutable_data>(p, execution_condition_id, 1, op->get_friendly_name());
|
||||
p.primitiveIDs[execution_condition_id] = execution_condition_id;
|
||||
p.AddPrimitive(execution_condition);
|
||||
p.AddInnerPrimitiveToProfiler(execution_condition_id, layerName, op);
|
||||
}
|
||||
const cldnn::primitive_id num_iteration_id = layerName + "_numIteration";
|
||||
{
|
||||
cldnn::mutable_data num_iteration = CreateScalarData<cldnn::mutable_data>(p, num_iteration_id, 0);
|
||||
p.primitivesToIRLayersMap[num_iteration_id] = { op->get_friendly_name() };
|
||||
cldnn::mutable_data num_iteration = CreateScalarData<cldnn::mutable_data>(p, num_iteration_id, 0, op->get_friendly_name());
|
||||
p.primitiveIDs[num_iteration_id] = num_iteration_id;
|
||||
p.AddPrimitive(num_iteration);
|
||||
p.AddInnerPrimitiveToProfiler(num_iteration_id, layerName, op);
|
||||
@ -191,7 +188,10 @@ void CreateTensorIteratorOp(Program &p, const std::shared_ptr<TensorIterator> &o
|
||||
input_primitive_maps, /* input mappings connecting outer network and inner network */
|
||||
output_primitive_maps, /* output mappings connecting outer network and inner network */
|
||||
back_edges, /* back edge mapping */
|
||||
num_iterations); /* max iteration, i.e. length of iteration axis */
|
||||
num_iterations, /* max iteration, i.e. length of iteration axis */
|
||||
"",
|
||||
"",
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(loopPrimitive);
|
||||
p.AddPrimitiveToProfiler(op);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user