diff --git a/.ci/azure/ci_utils/onnxruntime/version b/.ci/azure/ci_utils/onnxruntime/version index 3abd49542da..27e0d15e9f8 100644 --- a/.ci/azure/ci_utils/onnxruntime/version +++ b/.ci/azure/ci_utils/onnxruntime/version @@ -1 +1 @@ -rel-1.7.1 +rel-1.8.1 diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml index 52c8bbd3840..2b9dda46708 100644 --- a/.ci/azure/linux.yml +++ b/.ci/azure/linux.yml @@ -16,13 +16,12 @@ jobs: timeoutInMinutes: 90 pool: - name: LIN_VMSS_VENV_F16S_WU2 + name: LIN_VMSS_VENV_F16S_U20_WU2 variables: system.debug: true VSTS_HTTP_RETRY: 5 VSTS_HTTP_TIMEOUT: 200 - WORKERS_NUMBER: 16 BUILD_TYPE: Release REPO_DIR: $(Build.Repository.LocalPath) OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)/../openvino_contrib @@ -43,6 +42,7 @@ jobs: echo Python info ; which python ; python --version echo Java info ; which java ; java -version echo gcc info ; which gcc ; gcc --version + echo cmake info ; which cmake ; cmake --version lsb_release env cat /proc/cpuinfo @@ -74,27 +74,26 @@ jobs: submodules: recursive path: openvino_contrib - - checkout: testdata - clean: true - lfs: true - path: testdata - - script: | - sudo apt --assume-yes install libusb-1.0-0-dev - # For opencv-python: setuptools and upgrade - sudo apt-get install python3-setuptools patchelf + set -e + $(REPO_DIR)/install_build_dependencies.sh + # Move jdk into contrib + sudo apt --assume-yes install openjdk-11-jdk + # For opencv-python: python3-setuptools and pip upgrade python3 -m pip install --upgrade pip python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/wheel/requirements-dev.txt # For running Python API tests python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt - # For running nGraph unit tests dependent on Python frameworks - python3 -m pip install -r $(REPO_DIR)/ngraph/test/requirements_test.txt + # For running PaddlePaddle frontend unit tests + python3 -m pip install -r $(REPO_DIR)/ngraph/test/frontend/paddlepaddle/requirements_dev.txt + # For running ONNX frontend unit tests + python3 -m pip install -r $(REPO_DIR)/ngraph/test/requirements_test_onnx.txt # For MO unit tests python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements.txt python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements_dev.txt # Speed up build - wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip + wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip unzip ninja-linux.zip sudo cp -v ninja /usr/local/bin/ # Speed up tests @@ -102,6 +101,11 @@ jobs: workingDirectory: $(WORK_DIR) displayName: 'Install dependencies' + - checkout: testdata + clean: true + lfs: true + path: testdata + - task: CMake@1 inputs: # CMake must get Python 3.x version by default @@ -110,13 +114,14 @@ jobs: -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON - -DPYTHON_EXECUTABLE=/usr/bin/python3.6 + -DPYTHON_EXECUTABLE=/usr/bin/python3.8 -DENABLE_WHEEL=ON -DENABLE_TESTS=ON -DNGRAPH_ONNX_IMPORT_ENABLE=ON -DNGRAPH_ONNX_FRONTEND_ENABLE=ON -DENABLE_FASTER_BUILD=ON -DENABLE_STRICT_DEPENDENCIES=OFF + -DENABLE_REQUIREMENTS_INSTALL=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR) workingDirectory: $(BUILD_DIR) @@ -139,8 +144,10 @@ jobs: displayName: 'List install files' - script: | + set -e mkdir $(INSTALL_DIR)/opencv/ - cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake && cp -R $(REPO_DIR)/inference-engine/temp/opencv_4.5.2_ubuntu18/opencv/* $(INSTALL_DIR)/opencv/ + cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake + cp -R $(REPO_DIR)/inference-engine/temp/opencv_4.5.2_ubuntu20/opencv/* $(INSTALL_DIR)/opencv/ workingDirectory: $(BUILD_DIR) displayName: 'Install tests' @@ -155,17 +162,31 @@ jobs: workingDirectory: $(BUILD_SAMPLES_DIR) displayName: 'Build c samples' + - script: rm -fr $(BUILD_DIR) + displayName: 'Clean build dir' + continueOnError: false + + # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time + - script: . $(SETUPVARS) -pyver 3.8 && python3 -m pytest -s $(INSTALL_TEST_DIR)/pyngraph --junitxml=TEST-Pyngraph.xml --ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_zoo_models.py --ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_backend.py + displayName: 'nGraph Python Bindings Tests' + continueOnError: false + - script: | export MO_ROOT=$(INSTALL_DIR)/deployment_tools/model_optimizer - . $(SETUPVARS) -pyver 3.6 && python3 -m pytest -s $(INSTALL_DIR)/deployment_tools/model_optimizer/unit_tests --junitxml=TEST-ModelOptimizer.xml + . $(SETUPVARS) -pyver 3.8 && python3 -m pytest -s $(INSTALL_DIR)/deployment_tools/model_optimizer/unit_tests --junitxml=TEST-ModelOptimizer.xml displayName: 'Model Optimizer UT' continueOnError: false - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml + workingDirectory: $(INSTALL_TEST_DIR) displayName: 'nGraph UT' continueOnError: false - # . $(SETUPVARS) && python3 $(WORK_DIR)/gtest-parallel/gtest_parallel.py $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=InferenceEngineUnitTests.json --gtest_filter=*smoke* -- --gtest_print_time=1 + - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/paddlepaddle_tests --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-PaddlePaddle.xml + displayName: 'PaddlePaddle Frontend UT' + continueOnError: false + + # . $(SETUPVARS) && python3 $(WORK_DIR)/gtest-parallel/gtest_parallel.py $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --workers=16 --dump_json_test_results=InferenceEngineUnitTests.json --gtest_filter=*smoke* -- --gtest_print_time=1 - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml displayName: 'IE UT old' continueOnError: false @@ -213,10 +234,9 @@ jobs: export DATA_PATH=$(MODELS_PATH) export MODELS_PATH=$(MODELS_PATH) cd $(REPO_DIR)/inference-engine/ie_bridges/python/tests - . $(SETUPVARS) -pyver 3.6 && pytest pytest --junitxml=TEST-PythonAPI.xml + . $(SETUPVARS) -pyver 3.8 && python3 -m pytest --junitxml=TEST-PythonAPI.xml displayName: 'Python API Tests' continueOnError: false - enabled: false - task: PublishTestResults@2 condition: always() diff --git a/.ci/azure/linux_conditional_compilation.yml b/.ci/azure/linux_conditional_compilation.yml index 6d2d33574b7..a4063d2c903 100644 --- a/.ci/azure/linux_conditional_compilation.yml +++ b/.ci/azure/linux_conditional_compilation.yml @@ -4,20 +4,18 @@ jobs: timeoutInMinutes: 90 pool: - name: LIN_VMSS_VENV_F16S_WU2 + name: LIN_VMSS_VENV_F16S_U20_WU2 variables: system.debug: true VSTS_HTTP_RETRY: 5 VSTS_HTTP_TIMEOUT: 200 - WORKERS_NUMBER: 16 BUILD_TYPE: Release REPO_DIR: $(Build.Repository.LocalPath) OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)/../openvino_contrib MODELS_PATH: $(REPO_DIR)/../testdata WORK_DIR: $(Pipeline.Workspace)/_w BUILD_DIR: $(WORK_DIR)/build - BIN_DIR: $(REPO_DIR)/bin/intel64/$(BUILD_TYPE) INSTALL_DIR: $(WORK_DIR)/install_pkg SETUPVARS: $(INSTALL_DIR)/bin/setupvars.sh @@ -30,6 +28,7 @@ jobs: echo Python info ; which python ; python --version echo Java info ; which java ; java -version echo gcc info ; which gcc ; gcc --version + echo cmake info ; which cmake ; cmake --version lsb_release env cat /proc/cpuinfo @@ -53,10 +52,11 @@ jobs: path: openvino - script: | - sudo apt --assume-yes install libusb-1.0-0-dev + set -e + $(REPO_DIR)/install_build_dependencies.sh python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt # Speed up build - wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip + wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip unzip ninja-linux.zip sudo cp -v ninja /usr/local/bin/ workingDirectory: $(WORK_DIR) @@ -76,12 +76,14 @@ jobs: - script: ninja workingDirectory: $(BUILD_DIR) - displayName: 'Build' + displayName: 'Build LinCC' - script: ls -alR $(REPO_DIR)/bin/ - displayName: 'List files' + displayName: 'List bin files' - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake workingDirectory: $(BUILD_DIR) displayName: 'Install' + - script: ls -alR $(INSTALL_DIR) + displayName: 'List install files' diff --git a/.ci/azure/linux_ngraph_onnx.yml b/.ci/azure/linux_ngraph_onnx.yml index 28326c89053..c6071fc127f 100644 --- a/.ci/azure/linux_ngraph_onnx.yml +++ b/.ci/azure/linux_ngraph_onnx.yml @@ -20,13 +20,12 @@ jobs: timeoutInMinutes: 90 pool: - name: LIN_VMSS_VENV_ONNX_WU2 + name: LIN_VMSS_VENV_ONNX_U20_WU2 variables: system.debug: true VSTS_HTTP_RETRY: 5 VSTS_HTTP_TIMEOUT: 200 - WORKERS_NUMBER: 8 REPO_DIR: $(Build.Repository.LocalPath) WORK_DIR: $(Pipeline.Workspace)/_w MODELS_DIR: /mount/cinfsshare/onnxtestdata @@ -43,6 +42,7 @@ jobs: echo Python info ; which python ; python --version echo Java info ; which java ; java -version echo gcc info ; which gcc ; gcc --version + echo cmake info ; which cmake ; cmake --version lsb_release env cat /proc/cpuinfo @@ -68,16 +68,23 @@ jobs: submodules: recursive path: openvino - - script: docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg PROTOBUF_LITE=$(PROTOBUF_LITE) . + - script: | + set -e + sudo apt --assume-yes install git-lfs uidmap + curl -fsSL https://get.docker.com -o get-docker.sh + sudo sh get-docker.sh + workingDirectory: $(WORK_DIR) + displayName: 'Install dependencies' + + - script: sudo docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg PROTOBUF_LITE=$(PROTOBUF_LITE) . displayName: 'Docker build $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)' - script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(TMP_DIR) -o -s "$(ONNX_MODEL_ZOO_SHA)" displayName: 'Get models' condition: ne(variables['BUILD_TYPE'], 'Debug') - - script: sudo fallocate -l 48G /swapfile ; sudo mkswap /swapfile ; sudo swapon /swapfile ; df ; free -h + - script: sudo fallocate -l 64G /swapfile ; sudo mkswap /swapfile ; sudo swapon /swapfile ; df ; free -h displayName: 'Create swap' - - script: | - docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "$(TOX_COMMAND)" + - script: sudo docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "$(TOX_COMMAND)" displayName: 'Docker run $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)' diff --git a/.ci/azure/linux_onnxruntime.yml b/.ci/azure/linux_onnxruntime.yml index a2bfee8c70a..0229c37c0b0 100644 --- a/.ci/azure/linux_onnxruntime.yml +++ b/.ci/azure/linux_onnxruntime.yml @@ -3,23 +3,23 @@ jobs: timeoutInMinutes: 90 pool: - name: LIN_VMSS_VENV_ONNX_WU2 + name: LIN_VMSS_VENV_ONNX_U20_WU2 variables: system.debug: true VSTS_HTTP_RETRY: 5 VSTS_HTTP_TIMEOUT: 200 - WORKERS_NUMBER: 8 BUILD_TYPE: Release REPO_DIR: $(Build.Repository.LocalPath) ONNXRUNTIME_REPO_DIR: $(REPO_DIR)/../onnxruntime WORK_DIR: $(Pipeline.Workspace)/_w MODELS_DIR: /mount/cinfsshare/onnxtestdata TMP_DIR: /mnt/tmp - INSTALL_DIR: $(WORK_DIR)/install_pkg + INSTALL_DIR: $(WORK_DIR)/install_pkg/openvino BUILD_DIR: $(WORK_DIR)/build ONNXRUNTIME_UTILS: $(REPO_DIR)/.ci/azure/ci_utils/onnxruntime ONNXRUNTIME_BUILD_DIR: $(ONNXRUNTIME_REPO_DIR)/build + steps: - script: | curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01" @@ -29,6 +29,7 @@ jobs: echo Python info ; which python ; python --version echo Java info ; which java ; java -version echo gcc info ; which gcc ; gcc --version + echo cmake info ; which cmake ; cmake --version lsb_release env cat /proc/cpuinfo @@ -60,15 +61,14 @@ jobs: displayName: 'Clone onnxruntime' - script: | - sudo apt --assume-yes install libusb-1.0-0-dev - # For opencv-python: setuptools and upgrade - sudo apt-get install python3-setuptools + set -e + $(REPO_DIR)/install_build_dependencies.sh python3 -m pip install --upgrade pip python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt # For running Python API tests python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt # Speed up build - wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip + wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip unzip ninja-linux.zip sudo cp -v ninja /usr/local/bin/ # Speed up tests @@ -83,7 +83,7 @@ jobs: -GNinja -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON - -DPYTHON_EXECUTABLE=/usr/bin/python3.6 + -DPYTHON_EXECUTABLE=/usr/bin/python3.8 -DENABLE_VPU=OFF -DENABLE_GNA=OFF -DENABLE_OPENCV=OFF @@ -102,10 +102,10 @@ jobs: - script: ninja workingDirectory: $(BUILD_DIR) - displayName: 'Build Lin' + displayName: 'Build Lin ONNX' - script: ls -alR $(REPO_DIR)/bin/ - displayName: 'List files' + displayName: 'List bin files' - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake workingDirectory: $(BUILD_DIR) @@ -113,10 +113,9 @@ jobs: - script: | source $(INSTALL_DIR)/bin/setupvars.sh - echo "2021.2" > $(INSTALL_DIR)/deployment_tools/inference_engine/version.txt CXXFLAGS="-Wno-error=deprecated-declarations" ./build.sh --config RelWithDebInfo --use_openvino CPU_FP32 --build_shared_lib --parallel --skip_tests --build_dir $(ONNXRUNTIME_BUILD_DIR) workingDirectory: $(ONNXRUNTIME_REPO_DIR) - displayName: 'Build ONNX Runtime' + displayName: 'Build Lin ONNX Runtime' - script: | source $(INSTALL_DIR)/bin/setupvars.sh diff --git a/.ci/azure/mac.yml b/.ci/azure/mac.yml index 90fc812bbaa..b07ff48f78c 100644 --- a/.ci/azure/mac.yml +++ b/.ci/azure/mac.yml @@ -22,7 +22,6 @@ jobs: system.debug: true VSTS_HTTP_RETRY: 5 VSTS_HTTP_TIMEOUT: 200 - WORKERS_NUMBER: 3 BUILD_TYPE: Release REPO_DIR: $(Build.Repository.LocalPath) OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)/../openvino_contrib @@ -76,6 +75,7 @@ jobs: - script: | brew install cython brew install automake + python3 -m pip install -r $(REPO_DIR)/ngraph/test/requirements_test_onnx.txt # Speed up build brew install ninja # Speed up tests @@ -87,7 +87,7 @@ jobs: export PATH="/usr/local/opt/cython/bin:$PATH" export CC=gcc export CXX=g++ - cmake -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR) + cmake -GNinja -DVERBOSE_BUILD=ON -DENABLE_REQUIREMENTS_INSTALL=OFF -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR) workingDirectory: $(BUILD_DIR) displayName: 'CMake' @@ -103,6 +103,7 @@ jobs: displayName: 'Install' - script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru* --gtest_output=xml:TEST-NGraphUT.xml + workingDirectory: $(BIN_DIR) displayName: 'nGraph UT' continueOnError: false diff --git a/.ci/azure/windows.yml b/.ci/azure/windows.yml index c368776c8f4..e5ec0486f9b 100644 --- a/.ci/azure/windows.yml +++ b/.ci/azure/windows.yml @@ -22,7 +22,6 @@ jobs: system.debug: true VSTS_HTTP_RETRY: 5 VSTS_HTTP_TIMEOUT: 200 - WORKERS_NUMBER: 8 BUILD_TYPE: Release REPO_DIR: $(Build.Repository.LocalPath) OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)\..\openvino_contrib @@ -42,6 +41,7 @@ jobs: - script: | powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom" where python3 + python3 --version where python python --version where java @@ -83,7 +83,18 @@ jobs: path: testdata - script: | - certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-win.zip ninja-win.zip + python -m pip install --upgrade pip + rem For running Python API tests + python -m pip install -r $(REPO_DIR)\inference-engine\ie_bridges\python\src\requirements-dev.txt + rem For running PaddlePaddle frontend unit tests + python -m pip install -r $(REPO_DIR)\ngraph\test\frontend\paddlepaddle\requirements_dev.txt + rem For running ONNX frontend unit tests + python -m pip install -r $(REPO_DIR)\ngraph\test\requirements_test_onnx.txt + rem For MO unit tests + python -m pip install -r $(REPO_DIR)\model-optimizer\requirements.txt + python -m pip install -r $(REPO_DIR)\model-optimizer\requirements_dev.txt + rem Speed up build + certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip ninja-win.zip powershell -command "Expand-Archive -Force ninja-win.zip" git clone https://github.com/google/gtest-parallel.git workingDirectory: $(WORK_DIR) @@ -91,7 +102,7 @@ jobs: - script: | set PATH=$(WORK_DIR)\ninja-win;%PATH% - call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_FASTER_BUILD=ON -DENABLE_TEMPLATE_PLUGIN=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR) + call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_REQUIREMENTS_INSTALL=OFF -DENABLE_FASTER_BUILD=ON -DENABLE_TEMPLATE_PLUGIN=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.7.6\x64\python.exe" -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR) workingDirectory: $(BUILD_DIR) displayName: 'CMake' @@ -129,10 +140,19 @@ jobs: workingDirectory: $(BUILD_SAMPLES_DIR) displayName: 'Build c samples' + - script: rd /Q /S $(BUILD_DIR) + displayName: 'Clean build dir' + continueOnError: false + - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml + workingDirectory: $(INSTALL_TEST_DIR) displayName: 'nGraph UT' continueOnError: false + - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\paddlepaddle_tests --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-PaddlePaddle.xml + displayName: 'PaddlePaddle Frontend UT' + continueOnError: false + - script: | set PATH=$(IB_DIR);%PATH% call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests-IB.xml diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index 7969cf13aa1..f452feb67d7 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -28,7 +28,7 @@ jobs: cmake -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT .. - name: Check code style - run: cmake --build build --target clang_format_check_all + run: cmake --build build --target clang_format_check_all -j8 - name: Create code style diff if: failure() @@ -64,5 +64,29 @@ jobs: cmake .. - name: ShellCheck - run: make ie_shellcheck + run: cmake --build . --target ie_shellcheck -j8 + working-directory: build + + NamingConventionCheck: + runs-on: ubuntu-18.04 + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + + - name: Install Clang dependency + run: sudo apt --assume-yes install libusb-1.0-0-dev libclang-9-dev + + - name: Install Python-based dependencies + run: | + python3 -m pip install pyyaml clang==9.0 + + - name: CMake + run: | + mkdir build + cd build + cmake .. + + - name: Naming convention check + run: cmake --build . --target ncc_all -j8 working-directory: build diff --git a/.gitmodules b/.gitmodules index d3f72b54c4f..0b76a4b239e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -40,7 +40,7 @@ ignore = dirty [submodule "thirdparty/onnx"] path = thirdparty/onnx/onnx - url = https://github.com/openvinotoolkit/onnx.git + url = https://github.com/onnx/onnx.git [submodule "thirdparty/protobuf"] path = thirdparty/protobuf/protobuf url = https://github.com/protocolbuffers/protobuf.git @@ -50,3 +50,6 @@ [submodule "thirdparty/ittapi/ittapi"] path = thirdparty/ittapi/ittapi url = https://github.com/intel/ittapi.git +[submodule "ncc"] + path = cmake/developer_package/ncc_naming_style/ncc + url = https://github.com/nithinn/ncc.git diff --git a/CMakeLists.txt b/CMakeLists.txt index cfbb8e5ea45..61a96ae9f4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,6 +77,9 @@ endfunction() ie_cpack_add_component(ngraph REQUIRED) ie_cpack_add_component(ngraph_dev REQUIRED DEPENDS ngraph) +# add target with processed tests model zoo +include(cmake/test_model_zoo.cmake) + add_subdirectory(thirdparty) add_subdirectory(openvino) add_subdirectory(ngraph) diff --git a/CODEOWNERS b/CODEOWNERS index d47170c4716..2894fac8ff3 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -30,13 +30,13 @@ azure-pipelines.yml @openvinotoolkit/openvino-admins # IE GPU: /inference-engine/src/cldnn_engine/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers -/inference-engine/include/gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers -/inference-engine/include/cldnn/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers +/inference-engine/src/inference_engine/include/gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers +/inference-engine/src/inference_engine/include/cldnn/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers /inference-engine/thirdparty/clDNN/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers # IE VPU: /inference-engine/src/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers -/inference-engine/include/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers +/inference-engine/src/inference_engine/include/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers /inference-engine/thirdparty/movidius/ @openvinotoolkit/openvino-ie-vpu-maintainers /inference-engine/tests_deprecated/unit/engines/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers /inference-engine/tests_deprecated/functional/vpu/ @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers @@ -49,11 +49,11 @@ azure-pipelines.yml @openvinotoolkit/openvino-admins # IE GNA: /inference-engine/src/gna_plugin/ @openvinotoolkit/openvino-ie-gna-maintainers -/inference-engine/include/gna/ @openvinotoolkit/openvino-ie-gna-maintainers +/inference-engine/src/inference_engine/include/gna/ @openvinotoolkit/openvino-ie-gna-maintainers # IE MULTI: /inference-engine/src/multi_device/ @openvinotoolkit/openvino-ie-multi-maintainers -/inference-engine/include/multi-device/ @openvinotoolkit/openvino-ie-multi-maintainers +/inference-engine/src/inference_engine/include/multi-device/ @openvinotoolkit/openvino-ie-multi-maintainers # IE Tests: /inference-engine/tests/ @openvinotoolkit/openvino-ie-tests-maintainers @@ -77,4 +77,4 @@ azure-pipelines.yml @openvinotoolkit/openvino-admins # Control 3d party dependencies *requirements* @openvino-configuration-mgmt *setup.py @openvino-configuration-mgmt -/scripts/install_dependencies/ @openvino-configuration-mgmt \ No newline at end of file +/scripts/install_dependencies/ @openvino-configuration-mgmt diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index e84a7cdc718..82f98b4c515 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -7,10 +7,6 @@ cmake_policy(SET CMP0054 NEW) # TODO: fix it set_temp_directory(TEMP "${IE_MAIN_SOURCE_DIR}") -if(CMAKE_CROSSCOMPILING) - set(CMAKE_STAGING_PREFIX "${TEMP}") -endif() - if(ENABLE_SAME_BRANCH_FOR_MODELS) branchName(MODELS_BRANCH) else() @@ -315,25 +311,25 @@ if(ENABLE_SPEECH_DEMO) if(DEFINED IE_PATH_TO_DEPS) if(WIN32 AND X86_64) RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS - ARCHIVE_WIN "speech_demo_1.0.0.774_windows.zip" + ARCHIVE_WIN "speech_demo_1.0.0.780_windows.zip" VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*" - TARGET_PATH "${TEMP}/speech_demo_1.0.0.774" - SHA256 "67b25170be5e89a4f0e90e8b39623b60c9a15b965c30329385e295fcd2edc856") + TARGET_PATH "${TEMP}/speech_demo_1.0.0.780" + SHA256 "957bd274a1f6dc1d83a46879c7ef3b3b06f17d11af85cc45c18919051d145abd") debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS}) elseif(LINUX AND X86_64) if(LINUX_OS_NAME STREQUAL "CentOS 7" OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9") RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS - ARCHIVE_LIN "speech_demo_1.0.0.774_centos.tgz" + ARCHIVE_LIN "speech_demo_1.0.0.780_centos.tgz" VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*" - TARGET_PATH "${TEMP}/speech_demo_1.0.0.774" - SHA256 "5ec3b7be9ae05376aefae5bd5fd4a39b12c274e82817fd3218120b8e8fc8ff5a") + TARGET_PATH "${TEMP}/speech_demo_1.0.0.780" + SHA256 "6d8d1111d0e662fe71d71cd3debad2995f6fb6fe5df3b92196dae06ff7abdf44") debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS}) else() RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS - ARCHIVE_LIN "speech_demo_1.0.0.774_linux.tgz" + ARCHIVE_LIN "speech_demo_1.0.0.780_linux.tgz" VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*" - TARGET_PATH "${TEMP}/speech_demo_1.0.0.774" - SHA256 "f0bbd0a6218b0365e7cfb1f860b34e4ace7e0d47dd60b369cdea8a480329810f") + TARGET_PATH "${TEMP}/speech_demo_1.0.0.780" + SHA256 "0ec6f1e47c00d781dc918af5d3055ab474ff47b9978dd6fe2add73e3339b0763") debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS}) endif() else() diff --git a/cmake/developer_package/IEDevScriptsConfig.cmake b/cmake/developer_package/IEDevScriptsConfig.cmake index 46423aa61c7..febe20339bb 100644 --- a/cmake/developer_package/IEDevScriptsConfig.cmake +++ b/cmake/developer_package/IEDevScriptsConfig.cmake @@ -132,7 +132,7 @@ set(IE_DEBUG_POSTFIX_WIN "d") set(IE_RELEASE_POSTFIX_WIN "") set(IE_DEBUG_POSTFIX_LIN "") set(IE_RELEASE_POSTFIX_LIN "") -set(IE_DEBUG_POSTFIX_MAC "d") +set(IE_DEBUG_POSTFIX_MAC "") set(IE_RELEASE_POSTFIX_MAC "") if(WIN32) @@ -187,8 +187,8 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON) # Enable CMAKE__COMPILER_ID AppleClang set(CMAKE_POLICY_DEFAULT_CMP0025 NEW) -set(CMAKE_WARN_DEPRECATED OFF) -set(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION ON) +set(CMAKE_WARN_DEPRECATED OFF CACHE BOOL "Don't warn about obsolete cmake versions in 3rdparty") +set(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION ON CACHE BOOL "Warn about absolute paths in destination") # LTO @@ -251,20 +251,40 @@ endfunction() # check python package -function(ie_check_pip_package name message_type) +function(ie_check_pip_package full_name message_type) find_package(PythonInterp 3 REQUIRED) + get_filename_component(PYTHON_EXEC_DIR ${PYTHON_EXECUTABLE} DIRECTORY) + + # extract version if any + if(full_name MATCHES "^([a-z_]+)[~=<>!]*(.*)$") + set(name ${CMAKE_MATCH_1}) + set(req_version ${CMAKE_MATCH_2}) + else() + set(name ${full_name}) + endif() + execute_process( COMMAND ${PYTHON_EXECUTABLE} -m pip show ${name} + WORKING_DIRECTORY ${PYTHON_EXEC_DIR} RESULT_VARIABLE PIP_EXIT_CODE - OUTPUT_QUIET - ) + OUTPUT_VARIABLE output) if(NOT PIP_EXIT_CODE EQUAL 0) set(${name}_FOUND OFF PARENT_SCOPE) - message(${message_type} "${name} package is not installed. Please use \"${PYTHON_EXECUTABLE} -m pip install ${name}\".") + message(${message_type} "${name} package is not installed. Please use \"${PYTHON_EXECUTABLE} -m pip install ${full_name}\".") else() - set(${name}_FOUND ON PARENT_SCOPE) + if(req_version) + string(REGEX MATCH "Version: ([0-9]+\.?[0-9]*\.?[0-9]*)\n" installed_version "${output}") + if(installed_version) + set(installed_version "${CMAKE_MATCH_1}") + endif() + + message(${message_type} "${name} package is installed, but may have different version (${installed_version}). " + "Please use \"${PYTHON_EXECUTABLE} -m pip install ${full_name}\".") + else() + set(${name}_FOUND ON PARENT_SCOPE) + endif() endif() endfunction() @@ -272,6 +292,7 @@ endfunction() include(cpplint/cpplint) include(clang_format/clang_format) +include(ncc_naming_style/ncc_naming_style) # Restore state set(CMAKE_MODULE_PATH ${OLD_CMAKE_MODULE_PATH}) diff --git a/cmake/developer_package/clang_format/clang_format.cmake b/cmake/developer_package/clang_format/clang_format.cmake index 7a1487ea705..a94f1891466 100644 --- a/cmake/developer_package/clang_format/clang_format.cmake +++ b/cmake/developer_package/clang_format/clang_format.cmake @@ -2,17 +2,17 @@ # SPDX-License-Identifier: Apache-2.0 # -if (ENABLE_CLANG_FORMAT) +if(ENABLE_CLANG_FORMAT) set(CLANG_FORMAT_FILENAME clang-format-9 clang-format) - find_program(CLANG_FORMAT NAMES ${CLANG_FORMAT_FILENAME} PATHS ENV PATH) - if (CLANG_FORMAT) + find_host_program(CLANG_FORMAT NAMES ${CLANG_FORMAT_FILENAME} PATHS ENV PATH) + if(CLANG_FORMAT) execute_process(COMMAND ${CLANG_FORMAT} ${CMAKE_CURRENT_SOURCE_DIR} ARGS --version OUTPUT_VARIABLE CLANG_VERSION) - if (NOT CLANG_VERSION OR CLANG_VERSION STREQUAL "") + if(NOT CLANG_VERSION OR CLANG_VERSION STREQUAL "") message(WARNING "Supported clang-format version is 9!") set(ENABLE_CLANG_FORMAT OFF) else() string(REGEX REPLACE "[^0-9]+([0-9]+)\\..*" "\\1" CLANG_FORMAT_MAJOR_VERSION ${CLANG_VERSION}) - if (NOT ${CLANG_FORMAT_MAJOR_VERSION} EQUAL "9") + if(NOT ${CLANG_FORMAT_MAJOR_VERSION} EQUAL "9") message(WARNING "Supported clang-format version is 9!") set(ENABLE_CLANG_FORMAT OFF) endif() diff --git a/cmake/developer_package/compile_flags/os_flags.cmake b/cmake/developer_package/compile_flags/os_flags.cmake index 868c3455d5d..d2c51130a95 100644 --- a/cmake/developer_package/compile_flags/os_flags.cmake +++ b/cmake/developer_package/compile_flags/os_flags.cmake @@ -68,13 +68,13 @@ function(ie_sse42_optimization_flags flags) if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") # No such option for MSVC 2019 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - set(${flags} /arch:SSE4.2 /QxSSE4.2 PARENT_SCOPE) + set(${flags} /QxSSE4.2 PARENT_SCOPE) else() message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}") endif() else() if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - set(${flags} -msse4.2 -xSSE4.2 PARENT_SCOPE) + set(${flags} -xSSE4.2 PARENT_SCOPE) else() set(${flags} -msse4.2 PARENT_SCOPE) endif() @@ -95,7 +95,7 @@ function(ie_avx2_optimization_flags flags) endif() else() if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - set(${flags} -march=core-avx2 -xCORE-AVX2 -mtune=core-avx2 PARENT_SCOPE) + set(${flags} -xCORE-AVX2 PARENT_SCOPE) else() set(${flags} -mavx2 -mfma PARENT_SCOPE) endif() @@ -152,6 +152,24 @@ function(ie_arm_neon_optimization_flags flags) endif() endfunction() +# +# Disables all warnings for 3rd party targets +# +function(ov_disable_all_warnings) + foreach(target IN LISTS ARGN) + if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + target_compile_options(${target} PRIVATE /WX-) + elseif(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG) + target_compile_options(${target} PRIVATE -w) + elseif(UNIX AND CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + # 193: zero used for undefined preprocessing identifier "XXX" + # 1011: missing return statement at end of non-void function "XXX" + # 2415: variable "xxx" of static storage duration was declared but never referenced + target_compile_options(${target} PRIVATE -diag-disable=warn,193,1011,2415) + endif() + endforeach() +endfunction() + # # Enables Link Time Optimization compilation # @@ -286,15 +304,13 @@ else() ie_add_compiler_flags(-Wreturn-type) ie_add_compiler_flags(-Wunused-variable) - # Disable noisy warnings - if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") ie_add_compiler_flags(-Wswitch) elseif(UNIX) ie_add_compiler_flags(-Wuninitialized -Winit-self) if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - ie_add_compiler_flags(-Wno-error=switch - -Winconsistent-missing-override) + ie_add_compiler_flags(-Winconsistent-missing-override + -Wstring-plus-int) else() ie_add_compiler_flags(-Wmaybe-uninitialized) check_cxx_compiler_flag("-Wsuggest-override" SUGGEST_OVERRIDE_SUPPORTED) @@ -304,10 +320,11 @@ else() endif() endif() + # Disable noisy warnings + if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - ie_add_compiler_flags(-diag-disable=remark) - # noisy warnings from Intel Compiler 19.1.1.217 20200306 - ie_add_compiler_flags(-diag-disable=2196) + # 177: function "XXX" was declared but never referenced + ie_add_compiler_flags(-diag-disable=remark,177,2196) endif() # Linker flags @@ -315,7 +332,6 @@ else() if(APPLE) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-dead_strip") set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-dead_strip") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-dead_strip") elseif(LINUX) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections -Wl,--exclude-libs,ALL") set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,--gc-sections -Wl,--exclude-libs,ALL") diff --git a/cmake/developer_package/compile_flags/sanitizer.cmake b/cmake/developer_package/compile_flags/sanitizer.cmake index 35343b129f3..298f4243799 100644 --- a/cmake/developer_package/compile_flags/sanitizer.cmake +++ b/cmake/developer_package/compile_flags/sanitizer.cmake @@ -18,6 +18,8 @@ if (ENABLE_UB_SANITIZER) # TODO: Remove -fno-sanitize=null as thirdparty/ocl/clhpp_headers UBSAN compatibility resolved: # https://github.com/KhronosGroup/OpenCL-CLHPP/issues/17 set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=undefined -fno-sanitize=null") + # TODO: Remove -Wno-maybe-uninitialized after CVS-61143 fix + set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -Wno-maybe-uninitialized") check_cxx_compiler_flag("-fsanitize-recover=undefined" SANITIZE_RECOVER_UNDEFINED_SUPPORTED) if (SANITIZE_RECOVER_UNDEFINED_SUPPORTED) set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=undefined") @@ -33,17 +35,18 @@ endif() # common sanitizer options if (DEFINED SANITIZER_COMPILER_FLAGS) - # ensure sumbols are present + # ensure symbols are present set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -g -fno-omit-frame-pointer") + if(NOT OV_COMPILER_IS_CLANG) + # GPU plugin tests compilation is slow with -fvar-tracking-assignments on GCC. + # Clang has no var-tracking-assignments. + set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fno-var-tracking-assignments") + endif() # prevent unloading libraries at runtime, so sanitizer can resolve their symbols set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -Wl,-z,nodelete") - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=gold") - elseif(OV_COMPILER_IS_CLANG AND NOT WIN32) - if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0) - set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld") - endif() + if(OV_COMPILER_IS_CLANG AND NOT WIN32 AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0) + set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld") endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SANITIZER_COMPILER_FLAGS}") diff --git a/cmake/developer_package/features.cmake b/cmake/developer_package/features.cmake index 33e3530bac3..a0d4d17db32 100644 --- a/cmake/developer_package/features.cmake +++ b/cmake/developer_package/features.cmake @@ -44,16 +44,14 @@ ie_option (BUILD_SHARED_LIBS "Build as a shared library" ON) ie_dependent_option (ENABLE_FASTER_BUILD "Enable build features (PCH, UNITY) to speed up build time" OFF "CMAKE_VERSION VERSION_GREATER_EQUAL 3.16" OFF) -if(NOT DEFINED ENABLE_CPPLINT) - ie_dependent_option (ENABLE_CPPLINT "Enable cpplint checks during the build" ON "UNIX;NOT ANDROID" OFF) -endif() +ie_dependent_option (ENABLE_CPPLINT "Enable cpplint checks during the build" ON "UNIX;NOT ANDROID" OFF) -if(NOT DEFINED ENABLE_CPPLINT_REPORT) - ie_dependent_option (ENABLE_CPPLINT_REPORT "Build cpplint report instead of failing the build" OFF "ENABLE_CPPLINT" OFF) -endif() +ie_dependent_option (ENABLE_CPPLINT_REPORT "Build cpplint report instead of failing the build" OFF "ENABLE_CPPLINT" OFF) ie_dependent_option (ENABLE_CLANG_FORMAT "Enable clang-format checks during the build" ON "UNIX;NOT ANDROID" OFF) +ie_dependent_option (ENABLE_NCC_STYLE "Enable ncc style check" ON "UNIX;NOT ANDROID" OFF) + ie_option (VERBOSE_BUILD "shows extra information about build" OFF) ie_option (ENABLE_UNSAFE_LOCATIONS "skip check for MD5 for dependency" OFF) diff --git a/cmake/developer_package/ncc_naming_style/ncc b/cmake/developer_package/ncc_naming_style/ncc new file mode 160000 index 00000000000..d7d83049708 --- /dev/null +++ b/cmake/developer_package/ncc_naming_style/ncc @@ -0,0 +1 @@ +Subproject commit d7d83049708eaa18ea6796adf0eeef85b28ebc1f diff --git a/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake b/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake new file mode 100644 index 00000000000..60b03e2f726 --- /dev/null +++ b/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake @@ -0,0 +1,137 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +if(NOT COMMAND ie_check_pip_package) + message(FATAL_ERROR "ncc_naming_style.cmake must be included after ie_check_pip_package") +endif() + +set(ncc_style_dir "${IEDevScripts_DIR}/ncc_naming_style") +set(ncc_style_bin_dir "${CMAKE_CURRENT_BINARY_DIR}/ncc_naming_style") + +# try to find_package(Clang QUIET) +# ClangConfig.cmake contains bug that if libclang-XX-dev is not +# installed, then find_package fails with errors even in QUIET mode +configure_file("${ncc_style_dir}/try_find_clang.cmake" + "${ncc_style_bin_dir}/source/CMakeLists.txt" COPYONLY) +execute_process( + COMMAND + "${CMAKE_COMMAND}" -S "${ncc_style_bin_dir}/source" + -B "${ncc_style_bin_dir}/build" + RESULT_VARIABLE clang_find_result + OUTPUT_VARIABLE output + ERROR_VARIABLE output) + +if(NOT clang_find_result EQUAL "0") + message(WARNING "Please, install libclang-[N]-dev package (required for ncc naming style check)") + set(ENABLE_NCC_STYLE OFF) +endif() + +# Since we were able to find_package(Clang) in a separate process +# let's try to find in current process +if(ENABLE_NCC_STYLE) + find_host_package(Clang QUIET) + if(Clang_FOUND AND TARGET libclang) + get_target_property(libclang_location libclang LOCATION) + set(ncc_wrapper_py "${ncc_style_bin_dir}/ncc_wrapper.py") + configure_file("${ncc_style_dir}/ncc_wrapper.py.in" ${ncc_wrapper_py} @ONLY) + message(STATUS "Found libclang: ${libclang_location}") + else() + message(WARNING "libclang is not found (required for ncc naming style check)") + set(ENABLE_NCC_STYLE OFF) + endif() +endif() + +# find python3 + +find_package(PythonInterp 3 QUIET) +if(NOT PYTHONINTERP_FOUND) + message(WARNING "Python3 interpreter was not found (required for ncc naming style check)") + set(ENABLE_NCC_STYLE OFF) +endif() + +# check python requirements_dev.txt + +set(req_file "${ncc_style_dir}/requirements_dev.txt") +file(STRINGS ${req_file} req_lines) + +foreach(req IN LISTS req_lines) + ie_check_pip_package(${req} STATUS) +endforeach() + +set(ncc_script_dir "${ncc_style_dir}/ncc/") +set(ncc_script_py "${ncc_style_dir}/ncc/ncc.py") + +if(NOT EXISTS ${ncc_script_py}) + message(WARNING "ncc.py is not downloaded via submodule") + set(ENABLE_NCC_STYLE OFF) +endif() + +# create high-level target + +if(ENABLE_NCC_STYLE AND NOT TARGET ncc_all) + add_custom_target(ncc_all ALL) + set_target_properties(ncc_all PROPERTIES FOLDER ncc_naming_style) +endif() + +# +# ov_ncc_naming_style(FOR_TARGET target_name +# INCLUDE_DIRECTORY dir +# [ADDITIONAL_INCLUDE_DIRECTORIES dir1 dir2 ..]) +# +# FOR_TARGET - name of the target +# INCLUDE_DIRECTORY - directory to check headers from +# ADDITIONAL_INCLUDE_DIRECTORIES - additional include directories used in checked headers +# +function(ov_ncc_naming_style) + if(NOT ENABLE_NCC_STYLE) + return() + endif() + + cmake_parse_arguments(NCC_STYLE "" + "FOR_TARGET;INCLUDE_DIRECTORY" "ADDITIONAL_INCLUDE_DIRECTORIES" ${ARGN}) + + file(GLOB_RECURSE headers + RELATIVE "${NCC_STYLE_INCLUDE_DIRECTORY}" + "${NCC_STYLE_INCLUDE_DIRECTORY}/*.hpp") + + set(new_pythonpath "${ncc_script_dir}:$ENV{PYTHOPATH}") + list(APPEND ADDITIONAL_INCLUDE_DIRECTORIES "${NCC_STYLE_INCLUDE_DIRECTORY}") + + foreach(header IN LISTS headers) + set(output_file "${ncc_style_bin_dir}/${header}.ncc_style") + set(full_header_path "${NCC_STYLE_INCLUDE_DIRECTORY}/${header}") + + add_custom_command( + OUTPUT + ${output_file} + COMMAND + "${CMAKE_COMMAND}" -E env PYTHONPATH=${new_pythonpath} + "${CMAKE_COMMAND}" + -D "PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}" + -D "NCC_PY_SCRIPT=${ncc_wrapper_py}" + -D "INPUT_FILE=${full_header_path}" + -D "OUTPUT_FILE=${output_file}" + -D "STYLE_FILE=${ncc_style_dir}/openvino.style" + -D "ADDITIONAL_INCLUDE_DIRECTORIES=${ADDITIONAL_INCLUDE_DIRECTORIES}" + -P "${ncc_style_dir}/ncc_run.cmake" + DEPENDS + "${full_header_path}" + "${ncc_style_dir}/openvino.style" + "${ncc_script_py}" + "${ncc_wrapper_py}" + "${ncc_style_dir}/ncc_run.cmake" + COMMENT + "[ncc naming style] ${header}" + VERBATIM) + list(APPEND output_files ${output_file}) + endforeach() + + set(ncc_target ${NCC_STYLE_FOR_TARGET}_ncc_check) + add_custom_target(${ncc_target} + DEPENDS ${output_files} + COMMENT "[ncc naming style] ${NCC_STYLE_FOR_TARGET}") + + add_dependencies(${NCC_STYLE_FOR_TARGET} ${ncc_target}) + add_dependencies(ncc_all ${ncc_target}) +endfunction() diff --git a/cmake/developer_package/ncc_naming_style/ncc_run.cmake b/cmake/developer_package/ncc_naming_style/ncc_run.cmake new file mode 100644 index 00000000000..9d161b9c373 --- /dev/null +++ b/cmake/developer_package/ncc_naming_style/ncc_run.cmake @@ -0,0 +1,31 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +foreach(var NCC_PY_SCRIPT PYTHON_EXECUTABLE OUTPUT_FILE + INPUT_FILE ADDITIONAL_INCLUDE_DIRECTORIES STYLE_FILE) + if(NOT DEFINED ${var}) + message(FATAL_ERROR "${var} is not defined for ncc_run.cmake") + endif() +endforeach() + +file(REMOVE "${OUTPUT_FILE}") + +execute_process( + COMMAND + "${PYTHON_EXECUTABLE}" + "${NCC_PY_SCRIPT}" + --path ${INPUT_FILE} + --style ${STYLE_FILE} + --include ${ADDITIONAL_INCLUDE_DIRECTORIES} + RESULT_VARIABLE result + OUTPUT_VARIABLE output + ERROR_VARIABLE output) + +file(WRITE "${OUTPUT_FILE}" "${output}") + +if(NOT result EQUAL "0") + # Display the output to console (to parse it form IDE) + message("${output}") + message(FATAL_ERROR "[ncc naming style] Naming style check failed for ${INPUT_FILE}") +endif() diff --git a/cmake/developer_package/ncc_naming_style/ncc_wrapper.py.in b/cmake/developer_package/ncc_naming_style/ncc_wrapper.py.in new file mode 100644 index 00000000000..ed70e960b50 --- /dev/null +++ b/cmake/developer_package/ncc_naming_style/ncc_wrapper.py.in @@ -0,0 +1,52 @@ +#!/usr/bin/python3 + +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +import os +import sys + +from clang.cindex import Config +from ncc import Options, RulesDb, do_validate, Validator + +if __name__ == "__main__": + # set path to speicific clang library location + Config.set_library_file('@libclang_location@') + + logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s', + filename='log.txt', filemode='w') + + """ Parse all command line arguments and validate """ + op = Options() + op.parse_cmd_line() + + if op.args.path is None: + sys.exit(1) + + """ Creating the rules database """ + rules_db = RulesDb(op._style_file) + + """ Check the source code against the configured rules """ + errors = 0 + for path in op.args.path: + if os.path.isfile(path): + if do_validate(op, path): + v = Validator(rules_db, path, op) + errors += v.validate() + elif os.path.isdir(path): + for (root, subdirs, files) in os.walk(path): + for filename in files: + path = root + '/' + filename + if do_validate(op, path): + v = Validator(rules_db, path, op) + errors += v.validate() + + if not op.args.recurse: + break + else: + sys.stderr.write("File '{}' not found!\n".format(path)) + + if errors: + print("Total number of errors = {}".format(errors)) + sys.exit(1) diff --git a/cmake/developer_package/ncc_naming_style/openvino.style b/cmake/developer_package/ncc_naming_style/openvino.style new file mode 100644 index 00000000000..c44fc5c5e4a --- /dev/null +++ b/cmake/developer_package/ncc_naming_style/openvino.style @@ -0,0 +1,129 @@ +# custom OpenVINO values +CppMethod: '^(operator\W+|[a-z_\d]+|signaling_NaN|quiet_NaN)$' +# TODO: remove stopwatch|unsupported_op +ClassName: '^([A-Z][\w]+|b?float16|numeric_limits|ngraph_error|stopwatch|unsupported_op)$' +# TODO: remove oi_pair +StructName: '^([A-Z][\w]+|element_type_traits|hash|oi_pair)$' +FunctionName: '^(operator\W+|[a-z_\d]+)$' +Namespace: '^[a-z\d_]+$' +NamespaceAlias: '^[a-z\d_]+$' +UnionName: '[A-Z][\w]+$' +TemplateTemplateParameter: '[A-Z][\w]+' +NamespaceReference: '^[a-z\d_]+$' +TemplateNonTypeParameter: '^\w*$' +ClassTemplate: '^([A-Z][\w]+|element_type_traits)$' +TemplateTypeParameter: '^\w*$' +ParameterName: '^\w*$' +FunctionTemplate: '^(operator.+|\w+)$' +TypeAliasName: '^\w+$' +VariableReference: '^\w+$' + +# TODO: align +EnumConstantName: '^.*$' +EnumName: '^.*$' +UsingDeclaration: '^.*$' +TypedefName: '^.*$' + +# not needed values +ClassTemplatePartialSpecialization: 'XXXX' +ConversionFunction: '^.*$' +UsingDirective: 'XXXX' +ClassAccessSpecifier: '^.*$' # looks like can be fixed +TypeReference: '^.*$' # looks like can be fixed +CxxBaseSpecifier: '^.*$' # looks like can be fixed +TemplateReference: '^.*$' +MemberReference: '^.*$' +LabelReference: 'XXXX' +OverloadedDeclarationReference: '^.*$' +InvalidFile: 'XXXX' +NoDeclarationFound: 'XXXX' +NotImplemented: 'XXXX' +InvalidCode: 'XXXX' +UnexposedExpression: '^.*$' +DeclarationReferenceExpression: '^.*$' +MemberReferenceExpression: '^.*$' +CallExpression: '^.*$' +BlockExpression: 'XXXX' +IntegerLiteral: '^.*$' +FloatingLiteral: '^.*$' +ImaginaryLiteral: 'XXXX' +StringLiteral: '^.*$' +CharacterLiteral: '^.*$' +ParenExpression: '^.*$' +UnaryOperator: '^.*$' +ArraySubscriptExpression: '^.*$' +BinaryOperator: '^.*$' +CompoundAssignmentOperator: '^.*$' +ConditionalOperator: '^.*$' +CstyleCastExpression: '^.*$' +CompoundLiteralExpression: 'XXXX' +InitListExpression: '^.*$' +AddrLabelExpression: 'XXXX' +StatementExpression: 'XXXX' +GenericSelectionExpression: 'XXXX' +GnuNullExpression: 'XXXX' +CxxStaticCastExpression: '^.*$' +CxxDynamicCastExpression: 'XXXX' +CxxReinterpretCastExpression: '^.*$' +CxxConstCastExpression: 'XXXX' +CxxFunctionalCastExpression: '^.*$' +CxxTypeidExpression: 'XXXX' +CxxBoolLiteralExpression: '^.*$' +CxxNullPointerLiteralExpression: '^.*$' +CxxThisExpression: '^.*$' +CxxThrowExpression: '^.*$' +CxxNewExpression: '^.*$' +CxxDeleteExpression: 'XXXX' +CxxUnaryExpression: '^.*$' +PackExpansionExpression: '^.*$' +SizeOfPackExpression: '^.*$' +LambdaExpression: '^.*$' +ObjectBoolLiteralExpression: 'XXXX' +ObjectSelfExpression: 'XXXX' +UnexposedStatement: 'XXXX' +LabelStatement: 'XXXX' +CompoundStatement: '^.*$' +CaseStatement: '^.*$' +DefaultStatement: '^.*$' +IfStatement: '^.*$' +SwitchStatement: '^.*$' +WhileStatement: '^.*$' +DoStatement: '^.*$' +ForStatement: '^.*$' +GotoStatement: 'XXXX' +IndirectGotoStatement: 'XXXX' +ContinueStatement: '^.*$' +BreakStatement: '^.*$' +ReturnStatement: '^.*$' +AsmStatement: 'XXXX' +CxxCatchStatement: 'XXXX' +CxxTryStatement: 'XXXX' +CxxForRangeStatement: '^.*$' +MsAsmStatement: 'XXXX' +NullStatement: 'XXXX' +DeclarationStatement: '^.*$' +TranslationUnit: 'XXXX' +UnexposedAttribute: '^.*$' +CxxFinalAttribute: 'XXXX' +CxxOverrideAttribute: '^.*$' +AnnotateAttribute: 'XXXX' +AsmLabelAttribute: 'XXXX' +PackedAttribute: 'XXXX' +PureAttribute: 'XXXX' +ConstAttribute: 'XXXX' +NoduplicateAttribute: 'XXXX' +PreprocessingDirective: 'XXXX' +MacroDefinition: 'XXXX' +MacroInstantiation: 'XXXX' +InclusionDirective: 'XXXX' +VariableName: + ScopePrefix: + Global: '' + Static: '' + ClassMember: '' + DataTypePrefix: + String: '' + Integer: '' + Bool: '' + Pointer: '' + Pattern: '^.*$' diff --git a/cmake/developer_package/ncc_naming_style/requirements_dev.txt b/cmake/developer_package/ncc_naming_style/requirements_dev.txt new file mode 100644 index 00000000000..b06650ce6ac --- /dev/null +++ b/cmake/developer_package/ncc_naming_style/requirements_dev.txt @@ -0,0 +1,2 @@ +clang==9.0 +pyyaml \ No newline at end of file diff --git a/cmake/developer_package/ncc_naming_style/try_find_clang.cmake b/cmake/developer_package/ncc_naming_style/try_find_clang.cmake new file mode 100644 index 00000000000..70f2bfd0545 --- /dev/null +++ b/cmake/developer_package/ncc_naming_style/try_find_clang.cmake @@ -0,0 +1,8 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +cmake_minimum_required(VERSION 3.13) +project(try_find_clang) + +find_package(Clang QUIET) diff --git a/cmake/developer_package/version.cmake b/cmake/developer_package/version.cmake index 054bc10c78e..cfd3e22e71a 100644 --- a/cmake/developer_package/version.cmake +++ b/cmake/developer_package/version.cmake @@ -42,7 +42,7 @@ macro(ie_parse_ci_build_number) return() endif() - set(ie_version_hpp "${OpenVINO_SOURCE_DIR}/inference-engine/include/ie_version.hpp") + set(ie_version_hpp "${OpenVINO_SOURCE_DIR}/inference-engine/src/inference_engine/include/ie/ie_version.hpp") if(NOT EXISTS ${ie_version_hpp}) message(FATAL_ERROR "File ie_version.hpp with IE_VERSION definitions is not found") endif() diff --git a/cmake/features.cmake b/cmake/features.cmake index b7e23ee9226..26bf48f3824 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -98,7 +98,7 @@ ie_dependent_option (ENABLE_FUNCTIONAL_TESTS "functional tests" ON "ENABLE_TESTS ie_dependent_option (ENABLE_SAMPLES "console samples are part of inference engine package" ON "NOT MINGW" OFF) -ie_dependent_option (ENABLE_SPEECH_DEMO "enable speech demo integration" ON "NOT APPLE;NOT ANDROID;X86 OR X86_64" OFF) +ie_dependent_option (ENABLE_SPEECH_DEMO "enable speech demo integration" ON "NOT APPLE;NOT ANDROID;X86_64" OFF) ie_option (ENABLE_OPENCV "enables OpenCV" ON) @@ -125,14 +125,15 @@ endif() ie_dependent_option(NGRAPH_ONNX_IMPORT_ENABLE "Enable ONNX importer" ON "protoc_available" OFF) ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" OFF "NGRAPH_ONNX_IMPORT_ENABLE" OFF) ie_dependent_option(NGRAPH_PDPD_FRONTEND_ENABLE "Enable PaddlePaddle FrontEnd" ON "protoc_available" OFF) -ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" OFF - "NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF) +ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" ON + "NGRAPH_ONNX_IMPORT_ENABLE" OFF) ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF "NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF) ie_dependent_option(NGRAPH_UNIT_TEST_ENABLE "Enables ngraph unit tests" ON "ENABLE_TESTS;NOT ANDROID" OFF) ie_dependent_option(NGRAPH_UNIT_TEST_BACKENDS_ENABLE "Control the building of unit tests using backends" ON "NGRAPH_UNIT_TEST_ENABLE" OFF) option(NGRAPH_DEBUG_ENABLE "Enable output for NGRAPH_DEBUG statements" OFF) +option(ENABLE_REQUIREMENTS_INSTALL "Dynamic dependencies install" ON) # WA for ngraph python build on Windows debug list(REMOVE_ITEM IE_OPTIONS NGRAPH_UNIT_TEST_ENABLE NGRAPH_UNIT_TEST_BACKENDS_ENABLE) diff --git a/cmake/test_model_zoo.cmake b/cmake/test_model_zoo.cmake new file mode 100644 index 00000000000..c3f158626cd --- /dev/null +++ b/cmake/test_model_zoo.cmake @@ -0,0 +1,131 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +function(ov_model_convert SRC DST OUT) + set(onnx_gen_script ${OpenVINO_SOURCE_DIR}/ngraph/test/models/onnx/onnx_prototxt_converter.py) + + file(GLOB_RECURSE prototxt_models RELATIVE "${SRC}" "${SRC}/*.prototxt") + file(GLOB_RECURSE xml_models RELATIVE "${SRC}" "${SRC}/*.xml") + file(GLOB_RECURSE bin_models RELATIVE "${SRC}" "${SRC}/*.bin") + file(GLOB_RECURSE onnx_models RELATIVE "${SRC}" "${SRC}/*.onnx") + file(GLOB_RECURSE data_models RELATIVE "${SRC}" "${SRC}/*.data") + + foreach(in_file IN LISTS prototxt_models xml_models bin_models onnx_models data_models) + get_filename_component(ext "${in_file}" EXT) + get_filename_component(rel_dir "${in_file}" DIRECTORY) + get_filename_component(name_we "${in_file}" NAME_WE) + set(model_source_dir "${SRC}/${rel_dir}") + + if(NOT NGRAPH_ONNX_IMPORT_ENABLE AND ext MATCHES "^\\.(onnx|prototxt)$") + # don't copy / process ONNX / prototxt files + continue() + endif() + + if(ext STREQUAL ".prototxt") + # convert model + set(rel_out_name "${name_we}.onnx") + if(rel_dir) + set(rel_out_name "${rel_dir}/${rel_out_name}") + endif() + else() + # copy as is + set(rel_out_name "${in_file}") + endif() + + set(full_out_name "${DST}/${rel_out_name}") + file(MAKE_DIRECTORY "${DST}/${rel_dir}") + + if(ext STREQUAL ".prototxt") + # convert .prototxt models to .onnx binary + add_custom_command(OUTPUT ${full_out_name} + COMMAND ${PYTHON_EXECUTABLE} ${onnx_gen_script} + "${SRC}/${in_file}" ${full_out_name} + DEPENDS ${onnx_gen_script} "${SRC}/${in_file}" + COMMENT "Generate ${rel_out_name}" + WORKING_DIRECTORY "${model_source_dir}") + else() + add_custom_command(OUTPUT ${full_out_name} + COMMAND "${CMAKE_COMMAND}" -E copy_if_different + "${SRC}/${in_file}" ${full_out_name} + DEPENDS ${onnx_gen_script} "${SRC}/${in_file}" + COMMENT "Copy ${rel_out_name}" + WORKING_DIRECTORY "${model_source_dir}") + endif() + list(APPEND files "${full_out_name}") + endforeach() + + set(${OUT} ${files} PARENT_SCOPE) +endfunction() + +ov_model_convert("${CMAKE_CURRENT_SOURCE_DIR}/ngraph/test" + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/ngraph" + onnx_out_files) + +set(rel_path "inference-engine/tests/functional/inference_engine/onnx_reader") +ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}" + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_reader" + ie_onnx_out_files) + +set(rel_path "inference-engine/tests/functional/inference_engine/ir_serialization") +ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}" + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/ir_serialization" + ie_serialize_out_files) + +set(rel_path "inference-engine/tests/unit/frontends/onnx_import/models") +ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}" + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_import" + ie_onnx_import_out_files) + +if(ENABLE_TESTS) + if(NGRAPH_ONNX_IMPORT_ENABLE AND ENABLE_REQUIREMENTS_INSTALL) + find_package(PythonInterp 3 REQUIRED) + + get_filename_component(PYTHON_EXEC_DIR ${PYTHON_EXECUTABLE} DIRECTORY) + execute_process(COMMAND "${PYTHON_EXECUTABLE}" -m pip --version + WORKING_DIRECTORY ${PYTHON_EXEC_DIR} + RESULT_VARIABLE pip3_exit_code + OUTPUT_VARIABLE pip3_version) + + if(NOT pip3_exit_code EQUAL 0) + message(FATAL_ERROR "Failed to extract pip module version") + endif() + + if(pip3_version MATCHES ".* ([0-9]+)+\.([0-9]+)([\.0-9 ]).*") + set(pip3_version ${CMAKE_MATCH_1}.${CMAKE_MATCH_2}) + else() + message(FATAL_ERROR "Failed to parse ${pip3_version}") + endif() + + message(STATUS "pip version is ${pip3_version}") + set(args --quiet) + if(pip3_version VERSION_GREATER 20.2.2) + list(APPEND args --use-feature=2020-resolver) + endif() + + set(reqs "${OpenVINO_SOURCE_DIR}/ngraph/test/requirements_test_onnx.txt") + add_custom_target(test_pip_prerequsites ALL + "${PYTHON_EXECUTABLE}" -m pip install ${args} -r ${reqs} + COMMENT "Install requirements_test.txt" + VERBATIM + SOURCES ${reqs}) + endif() + + add_custom_target(test_model_zoo DEPENDS ${onnx_out_files} + ${ie_onnx_out_files} + ${ie_serialize_out_files} + ${ie_onnx_import_out_files}) + + if(TARGET test_pip_prerequsites) + add_dependencies(test_model_zoo test_pip_prerequsites) + endif() + + if (NGRAPH_PDPD_FRONTEND_ENABLE) + add_dependencies(test_model_zoo paddlepaddle_test_models) + endif() + + install(DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo" + DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL) + + set(TEST_MODEL_ZOO "./test_model_zoo" CACHE PATH "Path to test model zoo") +endif() diff --git a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md index 03a58a23387..d5383275ad6 100644 --- a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md +++ b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md @@ -519,3 +519,67 @@ Standard ONNX\* operators: | Upsample | No | | Where | No | | Xor | No | + + +## PaddlePaddle\* Supported Operators + +Standard PaddlePaddle(paddlepaddle>=2.1)\* Operators: + +| Operator Name in PaddlePaddle\*| Limitations| +| :----------| :----------| +| adpative_pool2d | 'NHWC' data_layout is not supported | +| arg_max | 'int32' output data_type is not supported | +| assign_value | No | +| batch_norm | No | +| bilinear_interp | 'NCW' 'NWC' 'NHWC' 'NCDHW' 'NDHWC' data_layout are not supported | +| bilinear_interp_v2 | 'NCW' 'NWC' 'NHWC' 'NCDHW' 'NDHWC' data_layout are not supported | +| bmm | No | +| cast | No | +| clip | No | +| concat | No | +| conv2d | 'NHWC' data_layout is not supported | +| depthwise_conv2d | 'NHWC' data_layout is not supported | +| deformable_conv | No | +| elementwise_add | No | +| elementwise_div | No | +| elementwise_max | No | +| elementwise_min | No | +| elementwise_mul | No | +| elementwise_pow | No | +| elementwise_sub | No | +| equal | No | +| expand_v2 | No | +| fill_constant_batch_size_like | No | +| fill_constant | No | +| flatten_contiguous_range | No | +| greater_equal | No | +| hard_sigmoid | No | +| hard_swish | No | +| leaky_relu | No | +| log | No | +| logical_not | No | +| matmul | No | +| matrix_nms | Only supports IE CPU plugin with 'number of selected boxes' static shape(eg: min(min(num_boxes, nms_top_k) * num_classes_output, keep_top_k)) | +| max_pool2d_with_index | No | +| mul | No | +| multiclass_nms | Only supports IE CPU plugin with 'number of selected boxes' static shape(eg: min(min(num_boxes, nms_top_k) * num_classes_output, keep_top_k)) | +| nearest_interp | 'NCW' 'NWC' 'NHWC' 'NCDHW' 'NDHWC' data_layout are not supported | +| nearest_interp_v2 | 'NCW' 'NWC' 'NHWC' 'NCDHW' 'NDHWC' data_layout are not supported | +| pad3d | 'Circular' mode is not supported | +| pow | No | +| pool2d | 'NHWC' data_layout is not supported | +| range | No | +| relu | No | +| relu6 | No | +| reshape2 | No | +| rnn | 'SimpleRNN' and 'GRU' modes are not supported | +| scale | No | +| shape | No | +| slice | No | +| softmax | No | +| sigmoid | No | +| split | No | +| squeeze2 | No | +| transpose2 | No | +| unsqueeze2 | No | +| yolo_box | No | diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md index 6feec5f627a..eabe4840eb8 100644 --- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md @@ -1,6 +1,7 @@ # Converting TensorFlow* Object Detection API Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models} > **NOTES**: +> * Starting with the 2022.1 release, the Model Optimizer can convert the TensorFlow\* Object Detection API Faster and Mask RCNNs topologies differently. By default, the Model Optimizer adds operation "Proposal" to the generated IR. This operation needs an additional input to the model with name "image_info" which should be fed with several values describing the pre-processing applied to the input image (refer to the [Proposal](../../../../ops/detection/Proposal_4.md) operation specification for more information). However, this input is redundant for the models trained and inferred with equal size images. Model Optimizer can generate IR for such models and insert operation [DetectionOutput](../../../../ops/detection/DetectionOutput_1.md) instead of `Proposal`. The `DetectionOutput` operation does not require additional model input "image_info" and moreover, for some models the produced inference results are closer to the original TensorFlow\* model. In order to trigger new behaviour the attribute "operation_to_add" in the corresponding JSON transformation configuration file should be set to value "DetectionOutput" instead of default one "Proposal". > * Starting with the 2021.1 release, the Model Optimizer converts the TensorFlow\* Object Detection API SSDs, Faster and Mask RCNNs topologies keeping shape-calculating sub-graphs by default, so topologies can be re-shaped in the Inference Engine using dedicated reshape API. Refer to [Using Shape Inference](../../../../IE_DG/ShapeInference.md) for more information on how to use this feature. It is possible to change the both spatial dimensions of the input image and batch size. > * To generate IRs for SSD topologies, the Model Optimizer creates a number of `PriorBoxClustered` layers instead of a constant node with prior boxes calculated for the particular input image size. This change allows you to reshape the topology in the Inference Engine using dedicated Inference Engine API. The reshaping is supported for all SSD topologies except FPNs which contain hardcoded shapes for some operations preventing from changing topology input shape. @@ -29,14 +30,16 @@ To convert a TensorFlow\* Object Detection API model, go to the `/d * `faster_rcnn_support_api_v1.13.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.13.X * `faster_rcnn_support_api_v1.14.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.14.0 up to 1.14.X inclusively * `faster_rcnn_support_api_v1.15.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.15.0 up to 2.0 - * `faster_rcnn_support_api_v2.0.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.0 or higher + * `faster_rcnn_support_api_v2.0.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.0 up to 2.3.X inclusively + * `faster_rcnn_support_api_v2.4.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.4 or higher * `mask_rcnn_support.json` --- for Mask R-CNN topologies from the TF 1.X models zoo trained with TensorFlow\* version 1.9.0 or lower. * `mask_rcnn_support_api_v1.7.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.7.0 up to 1.9.X inclusively * `mask_rcnn_support_api_v1.11.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.11.0 up to 1.12.X inclusively * `mask_rcnn_support_api_v1.13.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.13.0 up to 1.13.X inclusively * `mask_rcnn_support_api_v1.14.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.14.0 up to 1.14.X inclusively * `mask_rcnn_support_api_v1.15.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.15.0 up to 2.0 - * `mask_rcnn_support_api_v2.0.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.0 or higher + * `mask_rcnn_support_api_v2.0.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.0 up to 2.3.X inclusively + * `mask_rcnn_support_api_v2.4.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.4 or higher * `rfcn_support.json` --- for RFCN topology from the models zoo trained with TensorFlow\* version up to 1.9.X inclusively * `rfcn_support_api_v1.10.json` --- for RFCN topology from the models zoo frozen with TensorFlow\* version 1.10.0 up to 1.12.X inclusively * `rfcn_support_api_v1.13.json` --- for RFCN topology from the models zoo frozen with TensorFlow\* version 1.13.X diff --git a/docs/doxygen/doxygen-ignore.txt b/docs/doxygen/doxygen-ignore.txt index b1f27a4972c..c2bc8a0825c 100644 --- a/docs/doxygen/doxygen-ignore.txt +++ b/docs/doxygen/doxygen-ignore.txt @@ -16,8 +16,8 @@ openvino/docs/optimization_guide/dldt_optimization_guide.md openvino/docs/IE_DG/ShapeInference.md build/docs/openvino_docs.xml openvino/docs/install_guides/installing-openvino-linux-ivad-vpu.md -inference-engine/include/ie_parallel.hpp -inference-engine/include/ie_plugin_config.hpp -inference-engine/include/vpu/myriad_config.hpp -inference-engine/include/vpu/vpu_config.hpp -inference-engine/include/vpu/vpu_plugin_config.hpp \ No newline at end of file +inference-engine/src/inference_engine/include/ie/ie_parallel.hpp +inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp +inference-engine/src/inference_engine/include/ie/vpu/myriad_config.hpp +inference-engine/src/inference_engine/include/ie/vpu/vpu_config.hpp +inference-engine/src/inference_engine/include/ie/vpu/vpu_plugin_config.hpp diff --git a/docs/doxygen/ie_docs.config b/docs/doxygen/ie_docs.config index bbd203c931c..792d001bdcf 100644 --- a/docs/doxygen/ie_docs.config +++ b/docs/doxygen/ie_docs.config @@ -824,7 +824,7 @@ WARN_LOGFILE = "@DOCS_BUILD_DIR@/ie_docs.log" # Note: If this tag is empty the current directory is searched. INPUT = "@DOCS_BUILD_DIR@" \ - "@IE_SOURCE_DIR@/include" + "@IE_SOURCE_DIR@/src/inference_engine/include" # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses diff --git a/docs/ops/arithmetic/Erf_1.md b/docs/ops/arithmetic/Erf_1.md index 6b445dafad2..52d2d0301cb 100644 --- a/docs/ops/arithmetic/Erf_1.md +++ b/docs/ops/arithmetic/Erf_1.md @@ -4,34 +4,32 @@ **Category**: Arithmetic unary operation -**Short description**: *Erf* calculates the Gauss error function element-wise with given tensor. +**Short description**: *Erf* performs element-wise Gauss error function (erf) on a given input tensor. **Detailed Description** -For each element from the input tensor calculates corresponding element in the output tensor with the following formula: +*Erf* performs element-wise erf operation on a given input tensor, based on the following mathematical formula: + \f[ erf(x) = \pi^{-1} \int_{-x}^{x} e^{-t^2} dt \f] -**Attributes**: - - No attributes available. +**Attributes**: *Erf* operation has no attributes. **Inputs** -* **1**: A tensor of type *T*. **Required.** +* **1**: A tensor of type *T* and arbitrary shape. **Required.** **Outputs** -* **1**: The result of element-wise operation. A tensor of type *T*. +* **1**: The result of element-wise *Erf* function applied to the input tensor. A tensor of type *T* and the same shape as the input tensor. **Types** -* *T*: any supported floating-point type. +* *T*: any supported numeric type. -**Examples** -*Example 1* +**Example** ```xml diff --git a/docs/ops/arithmetic/Tan_1.md b/docs/ops/arithmetic/Tan_1.md index 6ea7d1e9a6b..d9086f7ad5f 100644 --- a/docs/ops/arithmetic/Tan_1.md +++ b/docs/ops/arithmetic/Tan_1.md @@ -6,32 +6,39 @@ **Short description**: *Tan* performs element-wise tangent operation with given tensor. -**Attributes**: - - No attributes available. - -**Inputs** - -* **1**: An tensor of type *T*. **Required.** - -**Outputs** - -* **1**: The result of element-wise tan operation. A tensor of type *T*. - -**Types** - -* *T*: any numeric type. - -*Tan* does the following with the input tensor *a*: +**Detailed description**: Operation takes one input tensor and performs the element-wise tangent function on a given input tensor, based on the following mathematical formula: \f[ a_{i} = tan(a_{i}) \f] -**Examples** - *Example 1* + input = [0.0, 0.25, -0.25, 0.5, -0.5] + output = [0.0, 0.25534192, -0.25534192, 0.54630249, -0.54630249] + +*Example 2* + + input = [-2, -1, 0, 1, 2] + output = [2, -2, 0, 2, -2] + +**Attributes**: *tan* operation has no attributes. + +**Inputs** + +* **1**: A tensor of type *T* and arbitrary shape, measured in radians. **Required.** + +**Outputs** + +* **1**: The result of element-wise *tan* applied to the input tensor. A tensor of type *T* and same shape as the input tensor. + +**Types** + +* *T*: any supported numeric type. + + +**Examples** + ```xml diff --git a/docs/ops/comparison/LessEqual_1.md b/docs/ops/comparison/LessEqual_1.md index 4144095bed4..a8b7c810181 100644 --- a/docs/ops/comparison/LessEqual_1.md +++ b/docs/ops/comparison/LessEqual_1.md @@ -4,32 +4,7 @@ **Category**: Comparison binary operation -**Short description**: *LessEqual* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules. - -**Attributes**: - -* *auto_broadcast* - - * **Description**: specifies rules used for auto-broadcasting of input tensors. - * **Range of values**: - * *none* - no auto-broadcasting is allowed, all input shapes should match - * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in ONNX docs. - * **Type**: string - * **Default value**: "numpy" - * **Required**: *no* - -**Inputs** - -* **1**: A tensor of type *T*. **Required.** -* **2**: A tensor of type *T*. **Required.** - -**Outputs** - -* **1**: The result of element-wise comparison operation. A tensor of type boolean. - -**Types** - -* *T*: arbitrary supported type. +**Short description**: *LessEqual* performs element-wise comparison operation with two given tensors applying broadcast rules specified in the *auto_broadcast* attribute. **Detailed description** Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value. @@ -40,12 +15,39 @@ After broadcasting *LessEqual* does the following with the input tensors *a* and o_{i} = a_{i} <= b_{i} \f] +**Attributes**: + +* *auto_broadcast* + + * **Description**: specifies rules used for auto-broadcasting of input tensors. + * **Range of values**: + * *none* - no auto-broadcasting is allowed, all input shapes should match, + * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md), + * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md). + * **Type**: string + * **Default value**: "numpy" + * **Required**: *no* + +**Inputs** + +* **1**: A tensor of type *T* and arbitrary shape. **Required.** +* **2**: A tensor of type *T* and arbitrary shape. **Required.** + +**Outputs** + +* **1**: The result of element-wise comparison operation applied to the input tensors. A tensor of type **boolean** and shape equal to broadcasted shape of two inputs. + +**Types** + +* *T*: arbitrary supported type. + **Examples** -*Example 1* +*Example 1: no broadcast* ```xml + 256 @@ -65,9 +67,10 @@ o_{i} = a_{i} <= b_{i} ``` -*Example 2: broadcast* +*Example 2: numpy broadcast* ```xml + 8 diff --git a/docs/ops/comparison/Less_1.md b/docs/ops/comparison/Less_1.md index 79a154a6c57..dcf210d6579 100644 --- a/docs/ops/comparison/Less_1.md +++ b/docs/ops/comparison/Less_1.md @@ -6,6 +6,16 @@ **Short description**: *Less* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules. +**Detailed description** +Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value. + +After broadcasting *Less* does the following with the input tensors *a* and *b*: + +\f[ +o_{i} = a_{i} < b_{i} +\f] + + **Attributes**: * *auto_broadcast* @@ -13,8 +23,9 @@ * **Description**: specifies rules used for auto-broadcasting of input tensors. * **Range of values**: * *none* - no auto-broadcasting is allowed, all input shapes should match - * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in ONNX docs. - * **Type**: string + * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md) + * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md) + * **Type**: `string` * **Default value**: "numpy" * **Required**: *no* @@ -31,15 +42,6 @@ * *T*: arbitrary supported type. -**Detailed description** -Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value. - -After broadcasting *Less* does the following with the input tensors *a* and *b*: - -\f[ -o_{i} = a_{i} < b_{i} -\f] - **Examples** *Example 1* diff --git a/docs/ops/logical/LogicalAnd_1.md b/docs/ops/logical/LogicalAnd_1.md index 4f39b236fef..a653d1abbc2 100644 --- a/docs/ops/logical/LogicalAnd_1.md +++ b/docs/ops/logical/LogicalAnd_1.md @@ -6,39 +6,40 @@ **Short description**: *LogicalAnd* performs element-wise logical AND operation with two given tensors applying multi-directional broadcast rules. +**Detailed description**: Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value. + +After broadcasting *LogicalAnd* does the following with the input tensors *a* and *b*: + +\f[ +o_{i} = a_{i} \wedge b_{i} +\f] + **Attributes**: * *auto_broadcast* * **Description**: specifies rules used for auto-broadcasting of input tensors. * **Range of values**: - * *none* - no auto-broadcasting is allowed, all input shapes should match - * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in ONNX docs. + * *none* - no auto-broadcasting is allowed, all input shapes must match, + * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md), + * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md). * **Type**: string * **Default value**: "numpy" * **Required**: *no* **Inputs** -* **1**: A tensor of type *T*. **Required.** -* **2**: A tensor of type *T*. **Required.** +* **1**: A tensor of type *T* and arbitrary shape. **Required.** +* **2**: A tensor of type *T* and arbitrary shape. **Required.** **Outputs** -* **1**: The result of element-wise logical AND operation. A tensor of type boolean. +* **1**: The result of element-wise *LogicalAnd* operation. A tensor of type boolean. **Types** * *T*: boolean type. -**Detailed description** -Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value. - -After broadcasting *LogicalAnd* does the following with the input tensors *a* and *b*: - -\f[ -o_{i} = a_{i} and b_{i} -\f] **Examples** diff --git a/docs/template_extension/cpu_kernel.cpp b/docs/template_extension/cpu_kernel.cpp index aa2486589cb..b1d426b1582 100644 --- a/docs/template_extension/cpu_kernel.cpp +++ b/docs/template_extension/cpu_kernel.cpp @@ -102,6 +102,7 @@ InferenceEngine::StatusCode OpImplementation::init(InferenceEngine::LayerConfig& IE_THROW() << "Operation supports only FP32 precisions!"; } } catch (InferenceEngine::Exception& ex) { + error = ex.what(); if (resp) { strncpy(resp->msg, error.c_str(), sizeof(resp->msg) - 1); resp->msg[sizeof(resp->msg) - 1] = 0; diff --git a/docs/template_extension/fft_kernel.cpp b/docs/template_extension/fft_kernel.cpp index 12554a70c75..3fcf71a8f64 100644 --- a/docs/template_extension/fft_kernel.cpp +++ b/docs/template_extension/fft_kernel.cpp @@ -66,6 +66,7 @@ InferenceEngine::StatusCode FFTImpl::init(InferenceEngine::LayerConfig& config, IE_THROW() << "Operation supports only FP32 precisions!"; } } catch (InferenceEngine::Exception& ex) { + error = ex.what(); if (resp) { strncpy(resp->msg, error.c_str(), sizeof(resp->msg) - 1); resp->msg[sizeof(resp->msg) - 1] = 0; diff --git a/docs/template_plugin/tests/functional/op_reference/comparison.hpp b/docs/template_plugin/tests/functional/op_reference/comparison.hpp new file mode 100644 index 00000000000..0d520b73ba2 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/comparison.hpp @@ -0,0 +1,63 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" +#include "ngraph_functions/builders.hpp" + +namespace reference_tests { +namespace ComparisonOpsRefTestDefinitions { + +struct RefComparisonParams { + ngraph::helpers::ComparisonTypes compType; + Tensor input1; + Tensor input2; + Tensor expected; +}; + +struct Builder : ParamsBuilder { + REFERENCE_TESTS_ADD_SET_PARAM(Builder, compType); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, input1); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, input2); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, expected); +}; + +class ReferenceComparisonLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + const auto& params = GetParam(); + function = CreateFunction(params.compType, params.input1.shape, params.input2.shape, params.input1.type, params.expected.type); + inputData = {params.input1.data, params.input2.data}; + refOutData = {params.expected.data}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + const auto& param = obj.param; + std::ostringstream result; + result << "comparisonType=" << param.compType << "_"; + result << "inpt_shape1=" << param.input1.shape << "_"; + result << "inpt_shape2=" << param.input2.shape << "_"; + result << "iType=" << param.input1.type << "_"; + result << "oType=" << param.expected.type; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(ngraph::helpers::ComparisonTypes comp_op_type, const ngraph::PartialShape& input_shape1, + const ngraph::PartialShape& input_shape2, const ngraph::element::Type& input_type, + const ngraph::element::Type& expected_output_type) { + const auto in = std::make_shared(input_type, input_shape1); + const auto in2 = std::make_shared(input_type, input_shape2); + const auto comp = ngraph::builder::makeComparison(in, in2, comp_op_type); + return std::make_shared(ngraph::NodeVector {comp}, ngraph::ParameterVector {in, in2}); + } +}; +} // namespace ComparisonOpsRefTestDefinitions +} // namespace reference_tests \ No newline at end of file diff --git a/docs/template_plugin/tests/functional/op_reference/conversion.cpp b/docs/template_plugin/tests/functional/op_reference/conversion.cpp new file mode 100644 index 00000000000..36f616cdaae --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/conversion.cpp @@ -0,0 +1,15 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "conversion.hpp" + +namespace reference_tests { +namespace ConversionOpsRefTestDefinitions { +namespace { +TEST_P(ReferenceConversionLayerTest, CompareWithHardcodedRefs) { + Exec(); +} +} // namespace +} // namespace ConversionOpsRefTestDefinitions +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/conversion.hpp b/docs/template_plugin/tests/functional/op_reference/conversion.hpp new file mode 100644 index 00000000000..f3846c7eab6 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/conversion.hpp @@ -0,0 +1,67 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" +#include "ngraph_functions/builders.hpp" + +namespace reference_tests { +namespace ConversionOpsRefTestDefinitions { + +static std::map conversionNames = { + {ngraph::helpers::ConversionTypes::CONVERT, "Convert"}, + {ngraph::helpers::ConversionTypes::CONVERT_LIKE, "ConvertLike"} +}; + +struct ConvertParams { + template + ConvertParams(ngraph::helpers::ConversionTypes convType, const ngraph::PartialShape& shape, const ngraph::element::Type& iType, + const ngraph::element::Type& oType, const std::vector& iValues, const std::vector& oValues, size_t iSize = 0, size_t oSize = 0) + : conversionType(convType), pshape(shape), inType(iType), outType(oType), inputData(CreateBlob(iType, iValues, iSize)), + refData(CreateBlob(oType, oValues, oSize)) {} + ngraph::helpers::ConversionTypes conversionType; + ngraph::PartialShape pshape; + ngraph::element::Type inType; + ngraph::element::Type outType; + InferenceEngine::Blob::Ptr inputData; + InferenceEngine::Blob::Ptr refData; +}; + +class ReferenceConversionLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + const auto& params = GetParam(); + function = CreateFunction(params.pshape, params.inType, params.outType, params.conversionType); + inputData = {params.inputData}; + refOutData = {params.refData}; + } + + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + const auto& param = obj.param; + std::ostringstream result; + result << "convertionType=" << conversionNames[param.conversionType] << "_"; + result << "shape=" << param.pshape << "_"; + result << "iType=" << param.inType << "_"; + result << "oType=" << param.outType; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const ngraph::PartialShape& input_shape, const ngraph::element::Type& input_type, + const ngraph::element::Type& expected_output_type, + const ngraph::helpers::ConversionTypes& conversion_type) { + const auto in = std::make_shared(input_type, input_shape); + const auto convert = ngraph::builder::makeConversion(in, expected_output_type, conversion_type); + return std::make_shared(ngraph::NodeVector {convert}, ngraph::ParameterVector {in}); + } +}; +} // namespace ConversionOpsRefTestDefinitions +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/convert.cpp b/docs/template_plugin/tests/functional/op_reference/convert.cpp index b8e6f5846f7..68834b3a576 100644 --- a/docs/template_plugin/tests/functional/op_reference/convert.cpp +++ b/docs/template_plugin/tests/functional/op_reference/convert.cpp @@ -10,433 +10,403 @@ #include #include -#include "base_reference_test.hpp" +#include "conversion.hpp" -using namespace reference_tests; using namespace ngraph; using namespace InferenceEngine; +using ConversionTypes = ngraph::helpers::ConversionTypes; -struct ConvertParams { - template - ConvertParams(const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const ngraph::element::Type& oType, const std::vector& iValues, - const std::vector& oValues, size_t iSize = 0, size_t oSize = 0) - : pshape(shape), inType(iType), outType(oType), inputData(CreateBlob(iType, iValues, iSize)), refData(CreateBlob(oType, oValues, oSize)) {} - ngraph::PartialShape pshape; - ngraph::element::Type inType; - ngraph::element::Type outType; - InferenceEngine::Blob::Ptr inputData; - InferenceEngine::Blob::Ptr refData; -}; - -class ReferenceConvertLayerTest : public testing::TestWithParam, public CommonReferenceTest { -public: - void SetUp() override { - auto params = GetParam(); - function = CreateFunction(params.pshape, params.inType, params.outType); - inputData = {params.inputData}; - refOutData = {params.refData}; - } - static std::string getTestCaseName(const testing::TestParamInfo& obj) { - auto param = obj.param; - std::ostringstream result; - result << "shape=" << param.pshape << "_"; - result << "iType=" << param.inType << "_"; - result << "oType=" << param.outType; - return result.str(); - } - -private: - static std::shared_ptr CreateFunction(const PartialShape& input_shape, const element::Type& input_type, - const element::Type& expected_output_type) { - const auto in = std::make_shared(input_type, input_shape); - const auto convert = std::make_shared(in, expected_output_type); - return std::make_shared(NodeVector {convert}, ParameterVector {in}); - } -}; - -TEST_P(ReferenceConvertLayerTest, CompareWithHardcodedRefs) { - Exec(); -} +namespace reference_tests { +namespace ConversionOpsRefTestDefinitions { +namespace { INSTANTIATE_TEST_SUITE_P( - smoke_Convert_With_Hardcoded_Refs, ReferenceConvertLayerTest, + smoke_Conversion_With_Hardcoded_Refs, ReferenceConversionLayerTest, ::testing::Values( // destination boolean - ConvertParams(ngraph::PartialShape {2, 3}, ngraph::element::u8, ngraph::element::boolean, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 3}, ngraph::element::u8, ngraph::element::boolean, std::vector {0, 12, 23, 0, std::numeric_limits::lowest(), std::numeric_limits::max()}, std::vector {0, 1, 1, 0, 0, 1}), - ConvertParams(ngraph::PartialShape {2, 3}, ngraph::element::i32, ngraph::element::boolean, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 3}, ngraph::element::i32, ngraph::element::boolean, std::vector {0, -12, 23, 0, std::numeric_limits::lowest(), std::numeric_limits::max()}, std::vector {0, 1, 1, 0, 1, 1}), - ConvertParams(ngraph::PartialShape {3, 3}, ngraph::element::f32, ngraph::element::boolean, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {3, 3}, ngraph::element::f32, ngraph::element::boolean, std::vector {0.f, 1.5745f, 0.12352f, 0.f, std::numeric_limits::lowest(), std::numeric_limits::max(), std::numeric_limits::min(), std::numeric_limits::infinity(), -std::numeric_limits::infinity()}, std::vector {0, 1, 1, 0, 1, 1, 1, 1, 1}), - // destination bf16 - ConvertParams(ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::bf16, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::bf16, std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}, std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}), - ConvertParams(ngraph::PartialShape {11}, ngraph::element::u8, ngraph::element::bf16, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {11}, ngraph::element::u8, ngraph::element::bf16, std::vector {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}, std::vector {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}), // destination f16 - ConvertParams(ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::f16, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::f16, std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}, std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}), - ConvertParams(ngraph::PartialShape {11}, ngraph::element::u8, ngraph::element::f16, std::vector {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}, - std::vector {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {11}, ngraph::element::u8, ngraph::element::f16, + std::vector {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}, std::vector {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}), // destination f32 - ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::u1, ngraph::element::f32, std::vector {0xA0}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::u1, ngraph::element::f32, std::vector {0xA0}, std::vector {1.0f, 0.0f, 1.0f, 0.0f}, 4), - ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::u4, ngraph::element::f32, std::vector {0xFB, 0x0A}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::u4, ngraph::element::f32, std::vector {0xFB, 0x0A}, std::vector {15.0f, 11.0f, 0.0f, 10.0f}, 4), - ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::u8, ngraph::element::f32, std::vector {255, 128, 32, 0}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::u8, ngraph::element::f32, std::vector {255, 128, 32, 0}, std::vector {255.0f, 128.0f, 32.0f, 0.0f}), - ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::u16, ngraph::element::f32, std::vector {64000, 32000, 128, 0}, - std::vector {64000.0f, 32000.0f, 128.0f, 0.0f}), - ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::u32, ngraph::element::f32, std::vector {4000000, 2000000, 128, 0}, - std::vector {4000000.0f, 2000000.0f, 128.0f, 0.0f}), - ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::u64, ngraph::element::f32, std::vector {4000000, 2000000, 128, 0}, - std::vector {4000000.0f, 2000000.0f, 128.0f, 0.0f}), - ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::i4, ngraph::element::f32, std::vector {0xFE, 0xF2}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::u16, ngraph::element::f32, + std::vector {64000, 32000, 128, 0}, std::vector {64000.0f, 32000.0f, 128.0f, 0.0f}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::u32, ngraph::element::f32, + std::vector {4000000, 2000000, 128, 0}, std::vector {4000000.0f, 2000000.0f, 128.0f, 0.0f}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::u64, ngraph::element::f32, + std::vector {4000000, 2000000, 128, 0}, std::vector {4000000.0f, 2000000.0f, 128.0f, 0.0f}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::i4, ngraph::element::f32, std::vector {0xFE, 0xF2}, std::vector {-1.0f, -2.0f, -1.0f, 2.0f}, 4), - ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::i8, ngraph::element::f32, std::vector {-127, -0, 0, 127}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::i8, ngraph::element::f32, std::vector {-127, -0, 0, 127}, std::vector {-127.0f, -0.0f, 0.0f, 127.0f}), - ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::i16, ngraph::element::f32, std::vector {-32000, -0, 0, 32000}, - std::vector {-32000.0f, -0.0f, 0.0f, 32000.0f}), - ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::i32, ngraph::element::f32, std::vector {-64000, -0, 0, 64000}, - std::vector {-64000.0f, -0.0f, 0.0f, 64000.0f}), - ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::i64, ngraph::element::f32, std::vector {-64000, -0, 0, 64000}, - std::vector {-64000.0f, -0.0f, 0.0f, 64000.0f}), - ConvertParams(ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::bf16, ngraph::element::f32, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::i16, ngraph::element::f32, + std::vector {-32000, -0, 0, 32000}, std::vector {-32000.0f, -0.0f, 0.0f, 32000.0f}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::i32, ngraph::element::f32, + std::vector {-64000, -0, 0, 64000}, std::vector {-64000.0f, -0.0f, 0.0f, 64000.0f}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::i64, ngraph::element::f32, + std::vector {-64000, -0, 0, 64000}, std::vector {-64000.0f, -0.0f, 0.0f, 64000.0f}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::bf16, ngraph::element::f32, std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}, std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}), - ConvertParams(ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f16, ngraph::element::f32, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f16, ngraph::element::f32, std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}, std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}), - ConvertParams(ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::f32, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::f32, std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}, std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}), // destination i4 - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u1, ngraph::element::i4, std::vector {0xA0}, std::vector {0x10, 0x10}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i4, std::vector {0x12, 0x03}, std::vector {0x12, 0x03}, - 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i4, std::vector {1, 2, 0, 3}, std::vector {0x12, 0x03}, - 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i4, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u1, ngraph::element::i4, std::vector {0xA0}, + std::vector {0x10, 0x10}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i4, std::vector {0x12, 0x03}, std::vector {0x12, 0x03}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i4, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i4, std::vector {1, 2, 0, 3}, std::vector {0x12, 0x03}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i4, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i4, std::vector {1, 2, 0, 3}, std::vector {0x12, 0x03}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i4, std::vector {0xFE, 0x03}, std::vector {0xFE, 0x03}, - 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i4, std::vector {-1, -2, 2, 3}, std::vector {0xFE, 0x23}, - 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i4, std::vector {-1, -2, 2, 3}, - std::vector {0xFE, 0x23}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i4, std::vector {-1, -2, 2, 3}, - std::vector {0xFE, 0x23}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i4, std::vector {-1, -2, 2, 3}, - std::vector {0xFE, 0x23}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i4, std::vector {-1, -2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i4, std::vector {1, 2, 0, 3}, + std::vector {0x12, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i4, std::vector {1, 2, 0, 3}, + std::vector {0x12, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i4, std::vector {0xFE, 0x03}, std::vector {0xFE, 0x03}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i4, std::vector {-1, -2, 0, 3}, - std::vector {0xFE, 0x03}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i4, std::vector {-1, -2, 2, 3}, std::vector {0xFE, 0x23}, - 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i4, + std::vector {-1, -2, 0, 3}, std::vector {0xFE, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i4, + std::vector {-1, -2, 0, 3}, std::vector {0xFE, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), // destination i8 - ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i8, std::vector {0x81}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i8, std::vector {0x81}, std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i8, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, - 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i8, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i8, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i8, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i8, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i8, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i8, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i8, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i8, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i8, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, - 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i8, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i8, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i8, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i8, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i8, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i8, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i8, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i8, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i8, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i8, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i8, std::vector {-1, -2, 0, 3}, - std::vector {-1, -2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i8, std::vector {-1, -2, 0, 3}, - std::vector {-1, -2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i8, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i8, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i8, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i8, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), // destination i16 - ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i16, std::vector {0x81}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i16, std::vector {0x81}, std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i16, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, - 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i16, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i16, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i16, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i16, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i16, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i16, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i16, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i16, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i16, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i16, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, - 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i16, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i16, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i16, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i16, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i16, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i16, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i16, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i16, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i16, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i16, std::vector {-1, -2, 0, 3}, - std::vector {-1, -2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i16, std::vector {-1, -2, 0, 3}, - std::vector {-1, -2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i16, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i16, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i16, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i16, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), // destination i32 - ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i32, std::vector {0x81}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i32, std::vector {0x81}, std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i32, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, - 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i32, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i32, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i32, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i32, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i32, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i32, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i32, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i32, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i32, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i32, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, - 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i32, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i32, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i32, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i32, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i32, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i32, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i32, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i32, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i32, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i32, std::vector {-1, -2, 0, 3}, - std::vector {-1, -2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i32, std::vector {-1, -2, 0, 3}, - std::vector {-1, -2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i32, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i32, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i32, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i32, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), // destination i64 - ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i64, std::vector {0x81}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i64, std::vector {0x81}, std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i64, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, - 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i64, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i64, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i64, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i64, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i64, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i64, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i64, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i64, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i64, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i64, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, - 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i64, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i64, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i64, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i64, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i64, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i64, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i64, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i64, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i64, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i64, std::vector {-1, -2, 0, 3}, - std::vector {-1, -2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i64, std::vector {-1, -2, 0, 3}, - std::vector {-1, -2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i64, std::vector {-1, -2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i64, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i64, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i64, std::vector {-1, -2, 2, 3}, std::vector {-1, -2, 2, 3}), // destination u1 - ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u1, std::vector {0xA0}, std::vector {0xA0}, 8, 8), - ConvertParams(ngraph::PartialShape {8}, ngraph::element::u4, ngraph::element::u1, std::vector {0x10, 0x01, 0x00, 0x00}, - std::vector {0x90}, 8, 8), - ConvertParams(ngraph::PartialShape {8}, ngraph::element::u8, ngraph::element::u1, std::vector {1, 0, 1, 0, 0, 0, 0, 1}, - std::vector {0xA1}, 8, 8), - ConvertParams(ngraph::PartialShape {8}, ngraph::element::u16, ngraph::element::u1, std::vector {1, 0, 1, 0, 0, 0, 0, 1}, - std::vector {0xA1}, 8, 8), - ConvertParams(ngraph::PartialShape {8}, ngraph::element::u32, ngraph::element::u1, std::vector {1, 0, 1, 0, 0, 0, 0, 1}, - std::vector {0xA1}, 8, 8), - ConvertParams(ngraph::PartialShape {8}, ngraph::element::u64, ngraph::element::u1, std::vector {1, 0, 1, 0, 0, 0, 0, 1}, - std::vector {0xA1}, 8, 8), - ConvertParams(ngraph::PartialShape {8}, ngraph::element::i4, ngraph::element::u1, std::vector {0x10, 0x01, 0x00, 0x00}, - std::vector {0x90}, 8, 8), - ConvertParams(ngraph::PartialShape {8}, ngraph::element::i8, ngraph::element::u1, std::vector {1, 0, 1, 0, 0, 0, 0, 1}, - std::vector {0xA1}, 8, 8), - ConvertParams(ngraph::PartialShape {8}, ngraph::element::i16, ngraph::element::u1, std::vector {1, 0, 1, 0, 0, 0, 0, 1}, - std::vector {0xA1}, 8, 8), - ConvertParams(ngraph::PartialShape {8}, ngraph::element::i32, ngraph::element::u1, std::vector {1, 0, 1, 0, 0, 0, 0, 1}, - std::vector {0xA1}, 8, 8), - ConvertParams(ngraph::PartialShape {8}, ngraph::element::i64, ngraph::element::u1, std::vector {1, 0, 1, 0, 0, 0, 0, 1}, - std::vector {0xA1}, 8, 8), - ConvertParams(ngraph::PartialShape {8}, ngraph::element::f16, ngraph::element::u1, std::vector {1, 0, 1, 0, 0, 0, 0, 1}, - std::vector {0xA1}, 8, 8), - ConvertParams(ngraph::PartialShape {8}, ngraph::element::bf16, ngraph::element::u1, std::vector {1, 0, 1, 0, 0, 0, 0, 1}, - std::vector {0xA1}, 8, 8), - ConvertParams(ngraph::PartialShape {8}, ngraph::element::f32, ngraph::element::u1, std::vector {1, 0, 1, 0, 0, 0, 0, 1}, - std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u1, std::vector {0xA0}, + std::vector {0xA0}, 8, 8), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u4, ngraph::element::u1, + std::vector {0x10, 0x01, 0x00, 0x00}, std::vector {0x90}, 8, 8), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u8, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u16, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u32, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u64, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::i4, ngraph::element::u1, + std::vector {0x10, 0x01, 0x00, 0x00}, std::vector {0x90}, 8, 8), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::i8, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::i16, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::i32, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::i64, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::f16, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::bf16, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::f32, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), // destination u4 - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u1, ngraph::element::u4, std::vector {0xA0}, std::vector {0x10, 0x10}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u4, std::vector {0x12, 0x03}, std::vector {0x12, 0x03}, - 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u4, std::vector {1, 2, 0, 3}, std::vector {0x12, 0x03}, - 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u4, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u1, ngraph::element::u4, std::vector {0xA0}, + std::vector {0x10, 0x10}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u4, std::vector {0x12, 0x03}, std::vector {0x12, 0x03}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u4, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u4, std::vector {1, 2, 0, 3}, std::vector {0x12, 0x03}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u4, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u4, std::vector {1, 2, 0, 3}, std::vector {0x12, 0x03}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u4, std::vector {0xFE, 0x03}, std::vector {0xFE, 0x03}, - 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u4, std::vector {-1, -2, 2, 3}, std::vector {0xFE, 0x23}, - 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u4, std::vector {-1, -2, 2, 3}, - std::vector {0xFE, 0x23}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u4, std::vector {-1, -2, 2, 3}, - std::vector {0xFE, 0x23}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u4, std::vector {-1, -2, 2, 3}, - std::vector {0xFE, 0x23}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u4, std::vector {-1, -2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u4, std::vector {1, 2, 0, 3}, + std::vector {0x12, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u4, std::vector {1, 2, 0, 3}, + std::vector {0x12, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u4, std::vector {0xFE, 0x03}, std::vector {0xFE, 0x03}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u4, std::vector {-1, -2, 0, 3}, - std::vector {0xFE, 0x03}, 4, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u4, std::vector {-1, -2, 2, 3}, std::vector {0xFE, 0x23}, - 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u4, + std::vector {-1, -2, 0, 3}, std::vector {0xFE, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u4, + std::vector {-1, -2, 0, 3}, std::vector {0xFE, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), // destination u8 - ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u8, std::vector {0x81}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u8, std::vector {0x81}, std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u8, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, - 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u8, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u8, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u8, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u8, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u8, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u8, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u8, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u8, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u8, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, - 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u8, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u8, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u8, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u8, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u8, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u8, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u8, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u8, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u8, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u8, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u8, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u8, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u8, std::vector {1, 2, 0, 3}, - std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u8, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u8, + std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u8, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), // destination u16 - ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u16, std::vector {0x81}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u16, std::vector {0x81}, std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u16, std::vector {0x21, 0x43}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u16, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u16, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u16, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u16, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u16, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u16, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u16, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u16, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u16, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u16, std::vector {0x21, 0x43}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u16, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u16, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u16, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u16, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u16, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u16, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u16, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u16, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u16, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u16, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u16, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u16, std::vector {1, 2, 0, 3}, - std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u16, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u16, + std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u16, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), // destination u32 - ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u32, std::vector {0x81}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u32, std::vector {0x81}, std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u32, std::vector {0x21, 0x43}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u32, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u32, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u32, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u32, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u32, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u32, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u32, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u32, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u32, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u32, std::vector {0x21, 0x43}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u32, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u32, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u32, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u32, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u32, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u32, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u32, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u32, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u32, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u32, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u32, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u32, std::vector {1, 2, 0, 3}, - std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u32, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u32, + std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u32, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - // destination u64 - ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u64, std::vector {0x81}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u64, std::vector {0x81}, std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u64, std::vector {0x21, 0x43}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u64, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u64, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u64, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u64, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u64, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u64, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u64, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u64, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u64, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u64, std::vector {0x21, 0x43}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u64, std::vector {0x21, 0x43}, std::vector {2, 1, 4, 3}, 4), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u64, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u64, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u64, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u64, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u64, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u64, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u64, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u64, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u64, std::vector {1, 2, 0, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u64, std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u64, std::vector {1, 2, 0, 3}, - std::vector {1, 2, 0, 3}), - ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u64, std::vector {1, 2, 2, 3}, + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u64, + std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u64, std::vector {1, 2, 2, 3}, std::vector {1, 2, 2, 3})), - ReferenceConvertLayerTest::getTestCaseName); + ReferenceConversionLayerTest::getTestCaseName); +} // namespace +} // namespace ConversionOpsRefTestDefinitions +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/convert_like.cpp b/docs/template_plugin/tests/functional/op_reference/convert_like.cpp new file mode 100644 index 00000000000..cd745c4040a --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/convert_like.cpp @@ -0,0 +1,413 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "conversion.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using ConversionTypes = ngraph::helpers::ConversionTypes; + +namespace reference_tests { +namespace ConversionOpsRefTestDefinitions { +namespace { + +INSTANTIATE_TEST_SUITE_P( + smoke_Conversion_With_Hardcoded_Refs, ReferenceConversionLayerTest, + ::testing::Values( + // destination boolean + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 3}, ngraph::element::u8, ngraph::element::boolean, + std::vector {0, 12, 23, 0, std::numeric_limits::lowest(), std::numeric_limits::max()}, + std::vector {0, 1, 1, 0, 0, 1}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 3}, ngraph::element::i32, ngraph::element::boolean, + std::vector {0, -12, 23, 0, std::numeric_limits::lowest(), std::numeric_limits::max()}, + std::vector {0, 1, 1, 0, 1, 1}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {3, 3}, ngraph::element::f32, ngraph::element::boolean, + std::vector {0.f, 1.5745f, 0.12352f, 0.f, std::numeric_limits::lowest(), std::numeric_limits::max(), + std::numeric_limits::min(), std::numeric_limits::infinity(), -std::numeric_limits::infinity()}, + std::vector {0, 1, 1, 0, 1, 1, 1, 1, 1}), + // destination bf16 + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::bf16, + std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}, + std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {11}, ngraph::element::u8, ngraph::element::bf16, + std::vector {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}, + std::vector {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}), + + // destination f16 + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::f16, + std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}, + std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {11}, ngraph::element::u8, ngraph::element::f16, + std::vector {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}, + std::vector {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}), + + // destination f32 + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::u1, ngraph::element::f32, + std::vector {0xA0}, std::vector {1.0f, 0.0f, 1.0f, 0.0f}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::u4, ngraph::element::f32, + std::vector {0xFB, 0x0A}, std::vector {15.0f, 11.0f, 0.0f, 10.0f}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::u8, ngraph::element::f32, + std::vector {255, 128, 32, 0}, std::vector {255.0f, 128.0f, 32.0f, 0.0f}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::u16, ngraph::element::f32, + std::vector {64000, 32000, 128, 0}, std::vector {64000.0f, 32000.0f, 128.0f, 0.0f}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::u32, ngraph::element::f32, + std::vector {4000000, 2000000, 128, 0}, std::vector {4000000.0f, 2000000.0f, 128.0f, 0.0f}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::u64, ngraph::element::f32, + std::vector {4000000, 2000000, 128, 0}, std::vector {4000000.0f, 2000000.0f, 128.0f, 0.0f}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::i4, ngraph::element::f32, + std::vector {0xFE, 0xF2}, std::vector {-1.0f, -2.0f, -1.0f, 2.0f}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::i8, ngraph::element::f32, + std::vector {-127, -0, 0, 127}, std::vector {-127.0f, -0.0f, 0.0f, 127.0f}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::i16, ngraph::element::f32, + std::vector {-32000, -0, 0, 32000}, std::vector {-32000.0f, -0.0f, 0.0f, 32000.0f}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::i32, ngraph::element::f32, + std::vector {-64000, -0, 0, 64000}, std::vector {-64000.0f, -0.0f, 0.0f, 64000.0f}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::i64, ngraph::element::f32, + std::vector {-64000, -0, 0, 64000}, std::vector {-64000.0f, -0.0f, 0.0f, 64000.0f}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::bf16, ngraph::element::f32, + std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}, + std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f16, ngraph::element::f32, + std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}, + std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::f32, + std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}, + std::vector {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}), + + // destination i4 + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u1, ngraph::element::i4, std::vector {0xA0}, + std::vector {0x10, 0x10}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i4, std::vector {0x12, 0x03}, + std::vector {0x12, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i4, std::vector {1, 2, 0, 3}, + std::vector {0x12, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i4, std::vector {1, 2, 0, 3}, + std::vector {0x12, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i4, std::vector {1, 2, 0, 3}, + std::vector {0x12, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i4, std::vector {1, 2, 0, 3}, + std::vector {0x12, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i4, std::vector {0xFE, 0x03}, + std::vector {0xFE, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i4, + std::vector {-1, -2, 0, 3}, std::vector {0xFE, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i4, + std::vector {-1, -2, 0, 3}, std::vector {0xFE, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + // destination i8 + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i8, std::vector {0x81}, + std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i8, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i8, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i8, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i8, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i8, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i8, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i8, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i8, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i8, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i8, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i8, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i8, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i8, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + // destination i16 + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i16, std::vector {0x81}, + std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i16, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i16, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i16, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i16, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i16, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i16, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i16, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i16, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i16, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i16, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i16, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i16, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i16, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + // destination i32 + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i32, std::vector {0x81}, + std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i32, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i32, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i32, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i32, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i32, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i32, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i32, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i32, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i32, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i32, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i32, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i32, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i32, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + // destination i64 + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i64, std::vector {0x81}, + std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i64, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i64, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i64, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i64, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i64, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i64, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i64, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i64, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i64, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i64, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i64, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i64, + std::vector {-1, -2, 0, 3}, std::vector {-1, -2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i64, std::vector {-1, -2, 2, 3}, + std::vector {-1, -2, 2, 3}), + + // destination u1 + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u1, std::vector {0xA0}, + std::vector {0xA0}, 8, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u4, ngraph::element::u1, + std::vector {0x10, 0x01, 0x00, 0x00}, std::vector {0x90}, 8, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u8, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u16, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u32, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u64, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::i4, ngraph::element::u1, + std::vector {0x10, 0x01, 0x00, 0x00}, std::vector {0x90}, 8, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::i8, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::i16, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::i32, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::i64, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::f16, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::bf16, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::f32, ngraph::element::u1, + std::vector {1, 0, 1, 0, 0, 0, 0, 1}, std::vector {0xA1}, 8, 8), + + // destination u4 + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u1, ngraph::element::u4, std::vector {0xA0}, + std::vector {0x10, 0x10}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u4, std::vector {0x12, 0x03}, + std::vector {0x12, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u4, std::vector {1, 2, 0, 3}, + std::vector {0x12, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u4, std::vector {1, 2, 0, 3}, + std::vector {0x12, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u4, std::vector {1, 2, 0, 3}, + std::vector {0x12, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u4, std::vector {1, 2, 0, 3}, + std::vector {0x12, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u4, std::vector {0xFE, 0x03}, + std::vector {0xFE, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u4, + std::vector {-1, -2, 0, 3}, std::vector {0xFE, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u4, + std::vector {-1, -2, 0, 3}, std::vector {0xFE, 0x03}, 4, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u4, std::vector {-1, -2, 2, 3}, + std::vector {0xFE, 0x23}, 4, 4), + + // destination u8 + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u8, std::vector {0x81}, + std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u8, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u8, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u8, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u8, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u8, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u8, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u8, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u8, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u8, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u8, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u8, + std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u8, + std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u8, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + + // destination u16 + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u16, std::vector {0x81}, + std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u16, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u16, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u16, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u16, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u16, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u16, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u16, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u16, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u16, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u16, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u16, + std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u16, + std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u16, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + + // destination u32 + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u32, std::vector {0x81}, + std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u32, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u32, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u32, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u32, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u32, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u32, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u32, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u32, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u32, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u32, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u32, + std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u32, + std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u32, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + // destination u64 + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u64, std::vector {0x81}, + std::vector {1, 0, 0, 0, 0, 0, 0, 1}, 8), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u64, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u64, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u64, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u64, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u64, std::vector {1, 2, 0, 3}, + std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u64, std::vector {0x21, 0x43}, + std::vector {2, 1, 4, 3}, 4), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u64, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u64, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u64, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u64, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u64, + std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u64, + std::vector {1, 2, 0, 3}, std::vector {1, 2, 0, 3}), + ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u64, std::vector {1, 2, 2, 3}, + std::vector {1, 2, 2, 3})), + ReferenceConversionLayerTest::getTestCaseName); +} // namespace +} // namespace ConversionOpsRefTestDefinitions +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/equal.cpp b/docs/template_plugin/tests/functional/op_reference/equal.cpp new file mode 100644 index 00000000000..d80ec3271fb --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/equal.cpp @@ -0,0 +1,84 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include + +#include "comparison.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using ComparisonTypes = ngraph::helpers::ComparisonTypes; + + +namespace reference_tests { +namespace ComparisonOpsRefTestDefinitions { +namespace { + +TEST_P(ReferenceComparisonLayerTest, EqualCompareWithHardcodedRefs) { + Exec(); +} + +template +std::vector generateComparisonParams(const element::Type& type) { + using T = typename element_type_traits::value_type; + std::vector compParams { + // 1D // 2D // 3D // 4D + Builder {} + .compType(ComparisonTypes::EQUAL) + .input1({{2, 2}, type, std::vector {0, 12, 23, 0}}) + .input2({{2, 2}, type, std::vector {0, 12, 23, 0}}) + .expected({{2, 2}, element::boolean, std::vector {1, 1, 1, 1}}), + Builder {} + .compType(ComparisonTypes::EQUAL) + .input1({{2, 3}, type, std::vector {0, 6, 45, 1, 21, 21}}) + .input2({{2, 3}, type, std::vector {1, 18, 23, 1, 19, 21}}) + .expected({{2, 3}, element::boolean, std::vector {0, 0, 0, 1, 0, 1}}), + Builder {} + .compType(ComparisonTypes::EQUAL) + .input1({{1}, type, std::vector {53}}) + .input2({{1}, type, std::vector {53}}) + .expected({{1}, element::boolean, std::vector {1}}), + Builder {} + .compType(ComparisonTypes::EQUAL) + .input1({{2, 4}, type, std::vector {0, 12, 23, 0, 1, 5, 11, 8}}) + .input2({{2, 4}, type, std::vector {0, 12, 23, 0, 10, 5, 11, 8}}) + .expected({{2, 4}, element::boolean, std::vector {1, 1, 1, 1, 0, 1, 1, 1}}), + Builder {} + .compType(ComparisonTypes::EQUAL) + .input1({{3, 1, 2}, type, std::vector {2, 1, 4, 1, 3, 1}}) + .input2({{1, 2, 1}, type, std::vector {1, 1}}) + .expected({{3, 2, 2}, element::boolean, std::vector {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}}), + Builder {} + .compType(ComparisonTypes::EQUAL) + .input1({{2, 1, 2, 1}, type, std::vector {2, 1, 4, 1}}) + .input2({{1, 2, 1}, type, std::vector {1, 1}}) + .expected({{2, 1, 2, 1}, element::boolean, std::vector {0, 1, 0, 1}})}; + return compParams; +} + +std::vector generateComparisonCombinedParams() { + const std::vector> compTypeParams { + generateComparisonParams(element::f32), + generateComparisonParams(element::f16), + generateComparisonParams(element::i32), + generateComparisonParams(element::u32), + generateComparisonParams(element::boolean)}; + std::vector combinedParams; + + for (const auto& params : compTypeParams) { + combinedParams.insert(combinedParams.end(), params.begin(), params.end()); + } + return combinedParams; +} + +INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()), + ReferenceComparisonLayerTest::getTestCaseName); +} // namespace +} // namespace ComparisonOpsRefTestDefinitions +} // namespace reference_tests \ No newline at end of file diff --git a/docs/template_plugin/tests/functional/op_reference/erf.cpp b/docs/template_plugin/tests/functional/op_reference/erf.cpp new file mode 100644 index 00000000000..bd888a8e03c --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/erf.cpp @@ -0,0 +1,94 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace reference_tests; +using namespace ngraph; +using namespace InferenceEngine; + +struct ErfParams { + template + ErfParams(const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const std::vector& iValues) + : pshape(shape), inType(iType), outType(iType), inputData(CreateBlob(iType, iValues)) { + std::vector oValues; + std::vector output; + for (auto element : iValues) + output.push_back(static_cast(element)); + + std::transform(output.begin(), output.end(), output.begin(), [](double input) -> double { + return std::erf(input); + }); + + if (std::is_integral()) { + std::transform(output.begin(), output.end(), output.begin(), [](double input) -> double { + return std::round(input); + }); + } + + for (auto element : output) + oValues.push_back(static_cast(element)); + refData = CreateBlob(outType, oValues); + } + ngraph::PartialShape pshape; + ngraph::element::Type inType; + ngraph::element::Type outType; + InferenceEngine::Blob::Ptr inputData; + InferenceEngine::Blob::Ptr refData; +}; + +class ReferenceErfLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.pshape, params.inType, params.outType); + inputData = {params.inputData}; + refOutData = {params.refData}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "shape=" << param.pshape << "_"; + result << "iType=" << param.inType << "_"; + result << "oType=" << param.outType; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const PartialShape& input_shape, const element::Type& input_type, + const element::Type& expected_output_type) { + const auto in = std::make_shared(input_type, input_shape); + const auto erf = std::make_shared(in); + return std::make_shared(NodeVector {erf}, ParameterVector {in}); + } +}; + +TEST_P(ReferenceErfLayerTest, CompareWithRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_Erf_With_Hardcoded_Refs, ReferenceErfLayerTest, + ::testing::Values(ErfParams(ngraph::PartialShape {2, 5}, ngraph::element::f32, + std::vector {-INFINITY, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, INFINITY}), + ErfParams(ngraph::PartialShape {2, 5}, ngraph::element::f16, + std::vector {-INFINITY, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, INFINITY}), + ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::i32, + std::vector {std::numeric_limits::min(), -2, -1, 1, 2, std::numeric_limits::max()}), + ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::u32, + std::vector {std::numeric_limits::min(), 0, 1, 2, 3, std::numeric_limits::max()}), + ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::i64, + std::vector {std::numeric_limits::min(), -2, -1, 1, 2, std::numeric_limits::max()}), + ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::u64, + std::vector {std::numeric_limits::min(), 0, 1, 2, 3, std::numeric_limits::max()})), + ReferenceErfLayerTest::getTestCaseName); diff --git a/docs/template_plugin/tests/functional/op_reference/less.cpp b/docs/template_plugin/tests/functional/op_reference/less.cpp new file mode 100644 index 00000000000..5d01cdfab64 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/less.cpp @@ -0,0 +1,82 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include + +#include "comparison.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using ComparisonTypes = ngraph::helpers::ComparisonTypes; + +namespace reference_tests { +namespace ComparisonOpsRefTestDefinitions { +namespace { +TEST_P(ReferenceComparisonLayerTest, LessCompareWithHardcodedRefs) { + Exec(); +} + +template +std::vector generateComparisonParams(const element::Type& type) { + using T = typename element_type_traits::value_type; + std::vector compParams { + // 1D // 2D // 3D // 4D + Builder {} + .compType(ComparisonTypes::LESS) + .input1({{2, 2}, type, std::vector {0, 12, 23, 0}}) + .input2({{2, 2}, type, std::vector {0, 12, 23, 0}}) + .expected({{2, 2}, element::boolean, std::vector {0, 0, 0, 0}}), + Builder {} + .compType(ComparisonTypes::LESS) + .input1({{2, 3}, type, std::vector {0, 6, 45, 1, 21, 21}}) + .input2({{2, 3}, type, std::vector {1, 18, 23, 1, 19, 21}}) + .expected({{2, 3}, element::boolean, std::vector {1, 1, 0, 0, 0, 0}}), + Builder {} + .compType(ComparisonTypes::LESS) + .input1({{1}, type, std::vector {53}}) + .input2({{1}, type, std::vector {53}}) + .expected({{1}, element::boolean, std::vector {0}}), + Builder {} + .compType(ComparisonTypes::LESS) + .input1({{2, 4}, type, std::vector {0, 12, 23, 0, 1, 5, 11, 8}}) + .input2({{2, 4}, type, std::vector {0, 12, 23, 0, 10, 5, 11, 8}}) + .expected({{2, 4}, element::boolean, std::vector {0, 0, 0, 0, 1, 0, 0, 0}}), + Builder {} + .compType(ComparisonTypes::LESS) + .input1({{3, 1, 2}, type, std::vector {2, 1, 4, 1, 3, 1}}) + .input2({{1, 2, 1}, type, std::vector {1, 1}}) + .expected({{3, 2, 2}, element::boolean, std::vector {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}), + Builder {} + .compType(ComparisonTypes::LESS) + .input1({{2, 1, 2, 1}, type, std::vector {2, 1, 4, 1}}) + .input2({{1, 2, 1}, type, std::vector {1, 1}}) + .expected({{2, 1, 2, 1}, element::boolean, std::vector {0, 0, 0, 0}})}; + return compParams; +} + +std::vector generateComparisonCombinedParams() { + const std::vector> compTypeParams { + generateComparisonParams(element::f32), + generateComparisonParams(element::f16), + generateComparisonParams(element::i32), + generateComparisonParams(element::u32), + generateComparisonParams(element::boolean)}; + std::vector combinedParams; + + for (const auto& params : compTypeParams) { + combinedParams.insert(combinedParams.end(), params.begin(), params.end()); + } + return combinedParams; +} + +} // namespace +INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()), + ReferenceComparisonLayerTest::getTestCaseName); +} // namespace ComparisonOpsRefTestDefinitions +} // namespace reference_tests \ No newline at end of file diff --git a/docs/template_plugin/tests/functional/op_reference/less_eq.cpp b/docs/template_plugin/tests/functional/op_reference/less_eq.cpp new file mode 100644 index 00000000000..f530867f847 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/less_eq.cpp @@ -0,0 +1,82 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include + +#include "comparison.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using ComparisonTypes = ngraph::helpers::ComparisonTypes; + +namespace reference_tests { +namespace ComparisonOpsRefTestDefinitions { +namespace { +TEST_P(ReferenceComparisonLayerTest, LessEqualCompareWithHardcodedRefs) { + Exec(); +} + +template +std::vector generateComparisonParams(const element::Type& type) { + using T = typename element_type_traits::value_type; + std::vector compParams { + // 1D // 2D // 3D // 4D + Builder {} + .compType(ComparisonTypes::LESS_EQUAL) + .input1({{2, 2}, type, std::vector {0, 12, 23, 0}}) + .input2({{2, 2}, type, std::vector {0, 12, 23, 0}}) + .expected({{2, 2}, element::boolean, std::vector {1, 1, 1, 1}}), + Builder {} + .compType(ComparisonTypes::LESS_EQUAL) + .input1({{2, 3}, type, std::vector {0, 6, 45, 1, 21, 21}}) + .input2({{2, 3}, type, std::vector {1, 18, 23, 1, 19, 21}}) + .expected({{2, 3}, element::boolean, std::vector {1, 1, 0, 1, 0, 1}}), + Builder {} + .compType(ComparisonTypes::LESS_EQUAL) + .input1({{1}, type, std::vector {53}}) + .input2({{1}, type, std::vector {53}}) + .expected({{1}, element::boolean, std::vector {1}}), + Builder {} + .compType(ComparisonTypes::LESS_EQUAL) + .input1({{2, 4}, type, std::vector {0, 12, 23, 0, 1, 5, 11, 8}}) + .input2({{2, 4}, type, std::vector {0, 12, 23, 0, 10, 5, 11, 8}}) + .expected({{2, 4}, element::boolean, std::vector {1, 1, 1, 1, 1, 1, 1, 1}}), + Builder {} + .compType(ComparisonTypes::LESS_EQUAL) + .input1({{3, 1, 2}, type, std::vector {2, 1, 4, 1, 3, 1}}) + .input2({{1, 2, 1}, type, std::vector {1, 1}}) + .expected({{3, 2, 2}, element::boolean, std::vector {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}}), + Builder {} + .compType(ComparisonTypes::LESS_EQUAL) + .input1({{2, 1, 2, 1}, type, std::vector {2, 1, 4, 1}}) + .input2({{1, 2, 1}, type, std::vector {1, 1}}) + .expected({{2, 1, 2, 1}, element::boolean, std::vector {0, 1, 0, 1}})}; + return compParams; +} + +std::vector generateComparisonCombinedParams() { + const std::vector> compTypeParams { + generateComparisonParams(element::f32), + generateComparisonParams(element::f16), + generateComparisonParams(element::i32), + generateComparisonParams(element::u32), + generateComparisonParams(element::boolean)}; + std::vector combinedParams; + + for (const auto& params : compTypeParams) { + combinedParams.insert(combinedParams.end(), params.begin(), params.end()); + } + return combinedParams; +} + +} // namespace +INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()), + ReferenceComparisonLayerTest::getTestCaseName); +} // namespace ComparisonOpsRefTestDefinitions +} // namespace reference_tests \ No newline at end of file diff --git a/docs/template_plugin/tests/functional/op_reference/logical_and.cpp b/docs/template_plugin/tests/functional/op_reference/logical_and.cpp new file mode 100644 index 00000000000..0313874533e --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/logical_and.cpp @@ -0,0 +1,83 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace reference_tests; +using namespace ngraph; +using namespace InferenceEngine; + + +struct LogicalAndParams { + template + LogicalAndParams(const ngraph::PartialShape& input_shape1, const ngraph::PartialShape& input_shape2 , + const std::vector& iValues1, const std::vector& iValues2, const std::vector& oValues) + : pshape1(input_shape1), pshape2(input_shape2), inType(ngraph::element::boolean), outType(ngraph::element::boolean), + inputData1(CreateBlob(ngraph::element::boolean, iValues1)), inputData2(CreateBlob(ngraph::element::boolean, iValues2)), + refData(CreateBlob(ngraph::element::boolean, oValues)) {} + ngraph::PartialShape pshape1; + ngraph::PartialShape pshape2; + ngraph::element::Type inType; + ngraph::element::Type outType; + InferenceEngine::Blob::Ptr inputData1; + InferenceEngine::Blob::Ptr inputData2; + InferenceEngine::Blob::Ptr refData; +}; + +class ReferenceLogicalAndLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.pshape1, params.pshape2, params.inType); + inputData = {params.inputData1, params.inputData2}; + refOutData = {params.refData}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "input_shape1=" << param.pshape1 << "_"; + result << "input_shape2=" << param.pshape2 << "_"; + result << "iType=" << param.inType << "_"; + result << "oType=" << param.outType; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const PartialShape& input_shape1, + const PartialShape& input_shape2, const element::Type& input_type) { + const auto in = std::make_shared(input_type, input_shape1); + const auto in2 = std::make_shared(input_type, input_shape2); + const auto logical_and = std::make_shared(in, in2); + return std::make_shared(NodeVector {logical_and}, ParameterVector {in, in2}); + } +}; + +TEST_P(ReferenceLogicalAndLayerTest, CompareWithHardcodedRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_LogicalAnd_With_Hardcoded_Refs, ReferenceLogicalAndLayerTest, + ::testing::Values( + LogicalAndParams(ngraph::PartialShape {2, 2}, ngraph::PartialShape {2, 2}, + std::vector {true, false, true, false}, + std::vector {false, true, true, false}, + std::vector {false, false, true, false}), + LogicalAndParams(ngraph::PartialShape {2, 1, 2, 1}, ngraph::PartialShape {1, 1, 2, 1}, + std::vector {true, false, true, false}, + std::vector {true, false}, + std::vector {true, false, true, false}), + LogicalAndParams(ngraph::PartialShape {3, 4}, ngraph::PartialShape {3, 4}, + std::vector {true, true, true, true, true, false, true, false, false, true, true, true}, + std::vector {true, true, true, true, true, false, true, false, false, true, true, false}, + std::vector {true, true, true, true, true, false, true, false, false, true, true, false})), + ReferenceLogicalAndLayerTest::getTestCaseName); diff --git a/docs/template_plugin/tests/functional/op_reference/roi_pooling.cpp b/docs/template_plugin/tests/functional/op_reference/roi_pooling.cpp new file mode 100644 index 00000000000..9baedeb3404 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/roi_pooling.cpp @@ -0,0 +1,226 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using namespace reference_tests; + +struct ROIPoolingParams { + template + ROIPoolingParams(const size_t iH, const size_t iW, const size_t ch, const size_t rois, + const size_t oH, const size_t oW, const float sS, const std::string mode, + const ngraph::element::Type& type, const std::vector& inputValues, + const std::vector& proposalValues, const std::vector& outputValues) + : inputH(iH), inputW(iW), channelCount(ch), roiCount(rois), outputH(oH), outputW(oW), spatialScale(sS), + poolingMode(mode), dataType(type), featureMap(CreateBlob(type, inputValues)), + proposal(CreateBlob(type, proposalValues)), refData(CreateBlob(type, outputValues)) {} + size_t inputH; + size_t inputW; + size_t channelCount; + size_t roiCount; + size_t outputH; + size_t outputW; + float spatialScale; + std::string poolingMode; + ngraph::element::Type dataType; + InferenceEngine::Blob::Ptr featureMap; + InferenceEngine::Blob::Ptr proposal; + InferenceEngine::Blob::Ptr refData; + +public: + template + inline static std::vector increasinglyFilledBlob(size_t size) { + std::vector inputValues; + T one = 1; + for (size_t i = 0; i < size; i++) { + inputValues.push_back(one * i / 10); + } + return inputValues; + } + template + inline static std::vector equallyFilledBlob(size_t size, T value) { + std::vector inputValues; + for (size_t i = 0; i < size; i++) { + inputValues.push_back(value); + } + return inputValues; + } +}; + +class ReferenceRoiPoolingLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.inputH, params.inputW, params.channelCount, params.roiCount, + params.outputH, params.outputW, params.spatialScale, params.poolingMode, params.dataType); + inputData = {params.featureMap, params.proposal}; + refOutData = {params.refData}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "IS=" << param.inputH << "," << param.inputW << "_"; + result << "OS=" << param.outputH << "," << param.outputW << "_"; + result << "Ch=" << param.channelCount << "_"; + result << "Rois=" << param.roiCount << "_"; + result << "Ss=" << param.spatialScale << "_"; + result << "Mode=" << param.poolingMode << "_"; + result << "Prec=" << param.dataType << "_"; + result << std::to_string(obj.index); + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const size_t i_h, const size_t i_w, const size_t ch, const size_t roi_count, + const size_t o_h, const size_t o_w, const float spat_scale, const std::string mode, + const ngraph::element::Type& type) { + Shape feat_map_shape{1, ch, i_h, i_w}; + Shape rois_shape{roi_count, 5}; + Shape pooled_shape{o_h, o_w}; + Shape output_shape{roi_count, ch, o_h, o_w}; + + const auto feat_map = std::make_shared(type, feat_map_shape); + const auto rois = std::make_shared(type, rois_shape); + const auto roi_pooling = std::make_shared(feat_map, rois, pooled_shape, spat_scale, mode); + return std::make_shared(roi_pooling, ParameterVector{feat_map, rois}); + } +}; + +TEST_P(ReferenceRoiPoolingLayerTest, CompareWithHardcodedRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_ROIPooling_With_Hardcoded_Refs, ReferenceRoiPoolingLayerTest, + ::testing::Values( + // fp32 + // roi_pooling_1x1_max + ROIPoolingParams(6, 6, // iH, iW + 3, 3, // channels, rois + 1, 1, // oH, oW + 1.f, "max", // scale, mode + element::f32, ROIPoolingParams::increasinglyFilledBlob(3 * 6 * 6), + std::vector {0, 1, 1, 2, 3, 0, 1, 1, 2, 3, 0, 1, 1, 2, 3}, + std::vector {2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f}), + // roi_pooling_2x2_max + ROIPoolingParams(6, 6, // iH, iW + 1, 3, // channels, rois + 2, 2, // oH, oW + 1.f, "max", // scale, mode + element::f32, ROIPoolingParams::increasinglyFilledBlob(1 * 6 * 6), + std::vector {0, 1, 1, 3, 3, 0, 1, 2, 2, 4, 0, 0, 1, 4, 5}, + std::vector {1.4f, 1.5f, 2.0f, 2.1f, 1.9f, 2.0f, 2.5f, 2.6f, 2.0f, 2.2f, 3.2f, 3.4f}), + // roi_pooling_1x1_bilinear + ROIPoolingParams(6, 6, // iH, iW + 3, 2, // channels, rois + 1, 1, // oH, oW + 1.f, "bilinear", // scale, mode + element::f32, ROIPoolingParams::increasinglyFilledBlob(3 * 6 * 6), + std::vector {0, 0.2, 0.2, 0.4, 0.4, 0, 0.2, 0.2, 0.6, 0.6}, + std::vector {1.05f, 4.65f, 8.25f, 1.4f, 5.0f, 8.6f}), + // roi_pooling_2x2_bilinear + ROIPoolingParams(8, 8, // iH, iW + 1, 3, // channels, rois + 2, 2, // oH, oW + 1.f, "bilinear", // scale, mode + element::f32, ROIPoolingParams::increasinglyFilledBlob(1 * 8 * 8), + std::vector {0.f, 0.15f, 0.2f, 0.75f, 0.8f, + 0.f, 0.15f, 0.2f, 0.75f, 0.8f, + 0.f, 0.15f, 0.2f, 0.75f, 0.8f}, + std::vector {1.225f, 1.645f, 4.585f, 5.005f, + 1.225f, 1.645f, 4.585f, 5.005f, + 1.225f, 1.645f, 4.585f, 5.005f}), + // roi_pooling_2x2_bilinear_border_proposal + ROIPoolingParams(50, 50, // iH, iW + 1, 1, // channels, rois + 4, 4, // oH, oW + 1.f, "bilinear", // scale, mode + element::f32, ROIPoolingParams::equallyFilledBlob(1 * 50 * 50, 1), + std::vector {0.f, 0.f, 0.248046786f, 0.471333951f, 1.f}, + std::vector(16, 1.f)), + + // bf16 + // roi_pooling_1x1_max + ROIPoolingParams(6, 6, // iH, iW + 3, 3, // channels, rois + 1, 1, // oH, oW + 1.f, "max", // scale, mode + element::bf16, ROIPoolingParams::increasinglyFilledBlob(3 * 6 * 6), + std::vector {0, 1, 1, 2, 3, 0, 1, 1, 2, 3, 0, 1, 1, 2, 3}, + std::vector {2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f}), + // roi_pooling_2x2_max + ROIPoolingParams(6, 6, // iH, iW + 1, 3, // channels, rois + 2, 2, // oH, oW + 1.f, "max", // scale, mode + element::bf16, ROIPoolingParams::increasinglyFilledBlob(1 * 6 * 6), + std::vector {0, 1, 1, 3, 3, 0, 1, 2, 2, 4, 0, 0, 1, 4, 5}, + std::vector {1.4f, 1.5f, 2.0f, 2.1f, 1.9f, 2.0f, 2.5f, 2.6f, 2.0f, 2.2f, 3.2f, 3.4f}), + // roi_pooling_1x1_bilinear + ROIPoolingParams(6, 6, // iH, iW + 3, 2, // channels, rois + 1, 1, // oH, oW + 1.f, "bilinear", // scale, mode + element::bf16, ROIPoolingParams::increasinglyFilledBlob(3 * 6 * 6), + std::vector {0, 0.2, 0.2, 0.4, 0.4, 0, 0.2, 0.2, 0.6, 0.6}, + std::vector {1.05f, 4.65f, 8.25f, 1.4f, 5.0f, 8.6f}), + // roi_pooling_2x2_bilinear + ROIPoolingParams(8, 8, // iH, iW + 1, 3, // channels, rois + 2, 2, // oH, oW + 1.f, "bilinear", // scale, mode + element::bf16, ROIPoolingParams::increasinglyFilledBlob(1 * 8 * 8), + std::vector {0.f, 0.15f, 0.2f, 0.75f, 0.8f, + 0.f, 0.15f, 0.2f, 0.75f, 0.8f, + 0.f, 0.15f, 0.2f, 0.75f, 0.8f}, + std::vector {1.225f, 1.645f, 4.585f, 4.937f, + 1.225f, 1.645f, 4.585f, 4.937f, + 1.225f, 1.645f, 4.585f, 4.937f}), + // fp16 + // roi_pooling_1x1_max + ROIPoolingParams(6, 6, // iH, iW + 3, 3, // channels, rois + 1, 1, // oH, oW + 1.f, "max", // scale, mode + element::f16, ROIPoolingParams::increasinglyFilledBlob(3 * 6 * 6), + std::vector {0, 1, 1, 2, 3, 0, 1, 1, 2, 3, 0, 1, 1, 2, 3}, + std::vector {2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f}), + // roi_pooling_2x2_max + ROIPoolingParams(6, 6, // iH, iW + 1, 3, // channels, rois + 2, 2, // oH, oW + 1.f, "max", // scale, mode + element::f16, ROIPoolingParams::increasinglyFilledBlob(1 * 6 * 6), + std::vector {0, 1, 1, 3, 3, 0, 1, 2, 2, 4, 0, 0, 1, 4, 5}, + std::vector {1.4f, 1.5f, 2.0f, 2.1f, 1.9f, 2.0f, 2.5f, 2.6f, 2.0f, 2.2f, 3.2f, 3.4f}), + // roi_pooling_1x1_bilinear + ROIPoolingParams(6, 6, // iH, iW + 3, 2, // channels, rois + 1, 1, // oH, oW + 1.f, "bilinear", // scale, mode + element::f16, ROIPoolingParams::increasinglyFilledBlob(3 * 6 * 6), + std::vector {0, 0.2, 0.2, 0.4, 0.4, 0, 0.2, 0.2, 0.6, 0.6}, + std::vector {1.05f, 4.65f, 8.25f, 1.4f, 5.0f, 8.6f}), + // roi_pooling_2x2_bilinear + ROIPoolingParams(8, 8, // iH, iW + 1, 3, // channels, rois + 2, 2, // oH, oW + 1.f, "bilinear", // scale, mode + element::f16, ROIPoolingParams::increasinglyFilledBlob(1 * 8 * 8), + std::vector {0.f, 0.15f, 0.2f, 0.75f, 0.8f, + 0.f, 0.15f, 0.2f, 0.75f, 0.8f, + 0.f, 0.15f, 0.2f, 0.75f, 0.8f}, + std::vector {1.225f, 1.645f, 4.585f, 5.005f, + 1.225f, 1.645f, 4.585f, 5.005f, + 1.225f, 1.645f, 4.585f, 5.005f})), + ReferenceRoiPoolingLayerTest::getTestCaseName); diff --git a/docs/template_plugin/tests/functional/op_reference/tan.cpp b/docs/template_plugin/tests/functional/op_reference/tan.cpp new file mode 100644 index 00000000000..5be7a7ad03c --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/tan.cpp @@ -0,0 +1,85 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using namespace reference_tests; + +namespace { +struct TanParams { + template + TanParams(const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const std::vector& iValues, + const std::vector& oValues) + :pshape(shape), inType(iType), outType(iType), inputData(CreateBlob(iType, iValues)), refData(CreateBlob(iType, oValues)) {} + ngraph::PartialShape pshape; + ngraph::element::Type inType; + ngraph::element::Type outType; + InferenceEngine::Blob::Ptr inputData; + InferenceEngine::Blob::Ptr refData; +}; + +class ReferenceTanLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.pshape, params.inType); + inputData = {params.inputData}; + refOutData = {params.refData}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "shape=" << param.pshape << "_"; + result << "iType=" << param.inType << "_"; + result << "oType=" << param.outType; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const PartialShape& input_shape, const element::Type& input_type) { + const auto in = std::make_shared(input_type, input_shape); + const auto tan = std::make_shared(in); + return std::make_shared(tan, ParameterVector {in}); + } +}; + +TEST_P(ReferenceTanLayerTest, CompareWithHardcodedRefs) { + Exec(); +} + +std::vector generateTanCombinedParams() { + std::vector combinedParams { + TanParams(ngraph::PartialShape {5}, ngraph::element::i32, std::vector {-2, -1, 0, 1, 2}, + std::vector {2, -2, 0, 2, -2}), + TanParams(ngraph::PartialShape {5}, ngraph::element::i64, std::vector {-2, -1, 0, 1, 2}, + std::vector {2, -2, 0, 2, -2}), + TanParams(ngraph::PartialShape {5}, ngraph::element::u32, std::vector {1, 2, 3, 4, 5}, + std::vector {2, 0xFFFFFFFF - 1, 0, 1, 0xFFFFFFFF - 2}), + TanParams(ngraph::PartialShape {5}, ngraph::element::u64, std::vector {1, 2, 3, 4, 5}, + std::vector {2, 0xFFFFFFFFFFFFFFFF - 1, 0, 1, 0xFFFFFFFFFFFFFFFF - 2}), + TanParams(ngraph::PartialShape {11}, ngraph::element::f32, std::vector {0.f, 0.25f, + -0.25f, 0.5f, -0.5f, 1.f, -1.f, 2.f, -2.f, 4.f, -4.f}, + std::vector {0.00000000f, 0.25534192f, -0.25534192f, 0.54630249f, -0.54630249f, + 1.55740772f, -1.55740772f, -2.18503986f, 2.18503986f, 1.15782128f, -1.15782128f}), + TanParams(ngraph::PartialShape {11}, ngraph::element::f16, std::vector {0.f, 0.25f, + -0.25f, 0.5f, -0.5f, 1.f, -1.f, 2.f, -2.f, 4.f, -4.f}, + std::vector {0.00000000f, 0.25534192f, -0.25534192f, 0.54630249f, -0.54630249f, + 1.55740772f, -1.55740772f, -2.18503986f, 2.18503986f, 1.15782128f, -1.15782128f}) + }; + return combinedParams; +} + +INSTANTIATE_TEST_SUITE_P(smoke_TAN_With_Hardcoded_Refs, ReferenceTanLayerTest, ::testing::ValuesIn(generateTanCombinedParams()), + ReferenceTanLayerTest::getTestCaseName); +} // namespace diff --git a/inference-engine/cmake/ie_parallel.cmake b/inference-engine/cmake/ie_parallel.cmake index d33a73a5fa7..eb844d25b76 100644 --- a/inference-engine/cmake/ie_parallel.cmake +++ b/inference-engine/cmake/ie_parallel.cmake @@ -29,6 +29,7 @@ function(set_ie_threading_interface_for TARGET_NAME) set(TBB_IMPORTED_TARGETS ${TBB_IMPORTED_TARGETS} PARENT_SCOPE) set(TBB_VERSION ${TBB_VERSION} PARENT_SCOPE) if (NOT TBB_FOUND) + set(THREADING "SEQ" PARENT_SCOPE) ext_message(WARNING "TBB was not found by the configured TBB_DIR/TBBROOT path.\ SEQ method will be used.") endif () @@ -95,6 +96,7 @@ function(set_ie_threading_interface_for TARGET_NAME) set(IE_THREAD_DEFINE "IE_THREAD_TBB") ie_target_link_libraries(${TARGET_NAME} ${LINK_TYPE} ${TBB_IMPORTED_TARGETS}) else () + set(THREADING "SEQ" PARENT_SCOPE) ext_message(WARNING "TBB was not found by the configured TBB_DIR path.\ SEQ method will be used for ${TARGET_NAME}") endif () @@ -133,6 +135,7 @@ function(set_ie_threading_interface_for TARGET_NAME) if (NOT OMP_LIBRARIES_RELEASE) ext_message(WARNING "Intel OpenMP not found. Intel OpenMP support will be disabled. ${IE_THREAD_DEFINE} is defined") + set(THREADING "SEQ" PARENT_SCOPE) else () set(IE_THREAD_DEFINE "IE_THREAD_OMP") diff --git a/inference-engine/ie_bridges/c/src/CMakeLists.txt b/inference-engine/ie_bridges/c/src/CMakeLists.txt index 69760a52de9..a0e1b3469c9 100644 --- a/inference-engine/ie_bridges/c/src/CMakeLists.txt +++ b/inference-engine/ie_bridges/c/src/CMakeLists.txt @@ -14,7 +14,7 @@ add_library(${TARGET_NAME} SHARED ${HEADERS} ${SOURCES}) target_link_libraries(${TARGET_NAME} PRIVATE inference_engine) target_include_directories(${TARGET_NAME} PUBLIC - $ + $ $) add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}) @@ -40,5 +40,5 @@ install(TARGETS ${TARGET_NAME} EXPORT InferenceEngineTargets LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core_c) install(DIRECTORY ${InferenceEngine_C_API_SOURCE_DIR}/include/ - DESTINATION ${IE_CPACK_IE_DIR}/include + DESTINATION ${IE_CPACK_IE_DIR}/include/ie COMPONENT core_c_dev) diff --git a/inference-engine/ie_bridges/python/CMakeLists.txt b/inference-engine/ie_bridges/python/CMakeLists.txt index 15d248379d7..a88b1017a12 100644 --- a/inference-engine/ie_bridges/python/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/CMakeLists.txt @@ -58,6 +58,13 @@ else() endif() endif() +function(ov_python_disable_intel_warnings target) + if(UNIX AND CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + # 1292: unknown attribute "fallthrough" + target_compile_options(${target} PRIVATE -diag-disable=1292) + endif() +endfunction() + set (PYTHON_BRIDGE_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) add_subdirectory (src/openvino/inference_engine) add_subdirectory (src/openvino/offline_transformations) diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt index 059f335f5df..cfab4f2d907 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt @@ -20,13 +20,15 @@ set_source_files_properties(${PYX_SOURCES} PROPERTIES CYTHON_IS_CXX ON) # create target cython_add_module(${TARGET_NAME} ${SOURCES}) -set(INSTALLED_TARGETS ${TARGET_NAME}) +ov_python_disable_intel_warnings(${TARGET_NAME}) +set(INSTALLED_TARGETS ${TARGET_NAME}) list(REMOVE_ITEM PYX_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/ie_api.pyx") foreach(PYX_FILE IN LISTS PYX_SOURCES) get_filename_component(PYX_NAME "${PYX_FILE}" NAME_WE) cython_add_module(${PYX_NAME} ${PYX_FILE}) + ov_python_disable_intel_warnings(${PYX_NAME}) add_dependencies(${TARGET_NAME} ${PYX_NAME}) target_include_directories(${PYX_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") target_link_libraries(${PYX_NAME} PRIVATE ${InferenceEngine_LIBRARIES}) diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx index 66269fba630..5f7a0a02bcb 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx @@ -284,7 +284,9 @@ cdef class IECore: # If the parameter is not specified, the default configuration is handled automatically. # @return Instance of IECore class def __cinit__(self, xml_config_file: str = ""): - self.impl = C.IECore(xml_config_file.encode()) + cdef string c_xml_config_file = xml_config_file.encode() + with nogil: + self.impl = C.IECore(c_xml_config_file) ## Get a `namedtuple` object with versions of the plugin specified # @param device_name: Name of the the registered plugin @@ -326,12 +328,15 @@ cdef class IECore: cdef string weights_ cdef string model_ cdef IENetwork net = IENetwork() + cdef size_t bin_size if init_from_buffer: model_ = bytes(model) - net.impl = self.impl.readNetwork(model_, weights, len(weights)) + bin_buffer = weights + bin_size = len(weights) + with nogil: + net.impl = self.impl.readNetwork(model_, bin_buffer, bin_size) else: weights_ = "".encode() - model = os.fspath(model) if not os.path.isfile(model): raise Exception(f"Path to the model {model} doesn't exist or it's a directory") @@ -342,8 +347,8 @@ cdef class IECore: if not os.path.isfile(weights): raise Exception(f"Path to the weights {weights} doesn't exist or it's a directory") weights_ = weights.encode() - - net.impl = self.impl.readNetwork(model_, weights_) + with nogil: + net.impl = self.impl.readNetwork(model_, weights_) return net ## Loads a network that was read from the Intermediate Representation (IR) to the plugin with specified device name @@ -367,16 +372,22 @@ cdef class IECore: cpdef ExecutableNetwork load_network(self, network: [IENetwork, str], str device_name, config=None, int num_requests=1): cdef ExecutableNetwork exec_net = ExecutableNetwork() cdef map[string, string] c_config + cdef string c_device_name + cdef string c_network_path if num_requests < 0: raise ValueError(f"Incorrect number of requests specified: {num_requests}. Expected positive integer number " "or zero for auto detection") if config: c_config = dict_to_c_map(config) exec_net.ie_core_impl = self.impl + c_device_name = device_name.encode() if isinstance(network, str): - exec_net.impl = move(self.impl.loadNetworkFromFile((network).encode(), device_name.encode(), c_config, num_requests)) + c_network_path = network.encode() + with nogil: + exec_net.impl = move(self.impl.loadNetworkFromFile(c_network_path, c_device_name, c_config, num_requests)) else: - exec_net.impl = move(self.impl.loadNetwork((network).impl, device_name.encode(), c_config, num_requests)) + with nogil: + exec_net.impl = move(self.impl.loadNetwork((network).impl, c_device_name, c_config, num_requests)) return exec_net ## Creates an executable network from a previously exported network @@ -534,7 +545,9 @@ cdef class IECore: # If there are more than one device of a specific type, they all are listed followed by a dot and a number. @property def available_devices(self): - cdef vector[string] c_devices = self.impl.getAvailableDevices() + cdef vector[string] c_devices + with nogil: + c_devices = self.impl.getAvailableDevices() return [d.decode() for d in c_devices] ## This structure stores info about pre-processing of network inputs (scale, mean image, ...) @@ -897,15 +910,19 @@ cdef class ExecutableNetwork: ## A tuple of `InferRequest` instances @property def requests(self): + cdef size_t c_infer_requests_size + with nogil: + c_infer_requests_size = deref(self.impl).infer_requests.size() if len(self._infer_requests) == 0: - for i in range(deref(self.impl).infer_requests.size()): + for i in range(c_infer_requests_size): infer_request = InferRequest() - infer_request.impl = &(deref(self.impl).infer_requests[i]) + with nogil: + infer_request.impl = &(deref(self.impl).infer_requests[i]) infer_request._inputs_list = list(self.input_info.keys()) infer_request._outputs_list = list(self.outputs.keys()) self._infer_requests.append(infer_request) - if len(self._infer_requests) != deref(self.impl).infer_requests.size(): + if len(self._infer_requests) != c_infer_requests_size: raise Exception("Mismatch of infer requests number!") return self._infer_requests @@ -923,26 +940,6 @@ cdef class ExecutableNetwork: inputs[in_.first.decode()] = input_info_ptr return inputs - ## \note The property is deprecated. Please use the input_info property - # to get the map of inputs - # - ## A dictionary that maps input layer names to DataPtr objects - @property - def inputs(self): - warnings.warn("'inputs' property of ExecutableNetwork class is deprecated. " - "To access DataPtrs user need to use 'input_data' property " - "of InputInfoCPtr objects which can be accessed by 'input_info' property.", - DeprecationWarning) - cdef map[string, C.DataPtr] c_inputs = deref(self.impl).getInputs() - inputs = {} - cdef DataPtr data_ptr - for in_ in c_inputs: - data_ptr = DataPtr() - data_ptr._ptr = in_.second - data_ptr._ptr_plugin = deref(self.impl).getPluginLink() - inputs[in_.first.decode()] = data_ptr - return inputs - ## A dictionary that maps output layer names to CDataPtr objects @property def outputs(self): @@ -1022,16 +1019,26 @@ cdef class ExecutableNetwork: # If not specified, `timeout` value is set to -1 by default. # @return Request status code: OK or RESULT_NOT_READY cpdef wait(self, num_requests=None, timeout=None): + cdef int status_code + cdef int64_t c_timeout + cdef int c_num_requests if num_requests is None: num_requests = len(self.requests) + c_num_requests = num_requests if timeout is None: timeout = WaitMode.RESULT_READY - return deref(self.impl).wait( num_requests, timeout) + c_timeout = timeout + with nogil: + status_code = deref(self.impl).wait(c_num_requests, c_timeout) + return status_code ## Get idle request ID # @return Request index cpdef get_idle_request_id(self): - return deref(self.impl).getIdleRequestId() + cdef int request_id + with nogil: + request_id = deref(self.impl).getIdleRequestId() + return request_id ctypedef extern void (*cb_type)(void*, int) with gil @@ -1177,8 +1184,8 @@ cdef class InferRequest: cpdef infer(self, inputs=None): if inputs is not None: self._fill_inputs(inputs) - - deref(self.impl).infer() + with nogil: + deref(self.impl).infer() ## Starts asynchronous inference of the infer request and fill outputs array # @@ -1197,7 +1204,8 @@ cdef class InferRequest: self._fill_inputs(inputs) if self._py_callback_used: self._py_callback_called.clear() - deref(self.impl).infer_async() + with nogil: + deref(self.impl).infer_async() ## Waits for the result to become available. Blocks until specified timeout elapses or the result # becomes available, whichever comes first. @@ -1213,9 +1221,14 @@ cdef class InferRequest: # # Usage example: See `async_infer()` method of the the `InferRequest` class. cpdef wait(self, timeout=None): + cdef int status + cdef int64_t c_timeout + cdef int c_wait_mode if self._py_callback_used: # check request status to avoid blocking for idle requests - status = deref(self.impl).wait(WaitMode.STATUS_ONLY) + c_wait_mode = WaitMode.STATUS_ONLY + with nogil: + status = deref(self.impl).wait(c_wait_mode) if status != StatusCode.RESULT_NOT_READY: return status if not self._py_callback_called.is_set(): @@ -1230,8 +1243,10 @@ cdef class InferRequest: if timeout is None: timeout = WaitMode.RESULT_READY - - return deref(self.impl).wait( timeout) + c_timeout = timeout + with nogil: + status = deref(self.impl).wait(c_timeout) + return status ## Queries performance measures per layer to get feedback of what is the most time consuming layer. # @@ -1268,27 +1283,6 @@ cdef class InferRequest: "cpu_time": info.cpu_time, "execution_index": info.execution_index} return profile - ## A dictionary that maps input layer names to `numpy.ndarray` - # objects of proper shape with input data for the layer - @property - def inputs(self): - warnings.warn("'inputs' property of InferRequest is deprecated. Please instead use 'input_blobs' property.", - DeprecationWarning) - inputs = {} - for input in self._inputs_list: - inputs[input] = self._get_blob_buffer(input.encode()).to_numpy() - return inputs - - ## A dictionary that maps output layer names to `numpy.ndarray` objects with output data of the layer - @property - def outputs(self): - warnings.warn("'outputs' property of InferRequest is deprecated. Please instead use 'output_blobs' property.", - DeprecationWarning) - outputs = {} - for output in self._outputs_list: - outputs[output] = self._get_blob_buffer(output.encode()).to_numpy() - return deepcopy(outputs) - ## Current infer request inference time in milliseconds @property def latency(self): @@ -1333,68 +1327,25 @@ cdef class InferRequest: cdef class IENetwork: ## Class constructor # - # \note Reading networks using IENetwork constructor is deprecated. - # Please, use IECore.read_network() method instead. + # @param model: A PyCapsule containing smart pointer to nGraph function. # - # @param model: A `.xml` file of the IR or PyCapsule containing smart pointer to nGraph function. - # In case of passing a `.xml` file attribute value can be a string path or bytes with file content - # depending on `init_from_buffer` attribute value - # . - # @param weights: A `.bin` file of the IR. Depending on `init_from_buffer` value, can be a string path or - # bytes with file content. - # @param init_from_buffer: Defines the way of how `model` and `weights` attributes are interpreted. - # If `False`, attributes are interpreted as strings with paths to .xml and .bin files - # of IR. If `True`, they are interpreted as Python `bytes` object with .xml and .bin files content. - # Ignored in case of `IENetwork` object initialization from nGraph function. # @return Instance of IENetwork class # # Usage example:\n # Initializing `IENetwork` object from IR files: # ```python - # net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) + # func = Function([relu], [param], 'test') + # caps = Function.to_capsule(func) + # net = IENetwork(caps) # ``` - # - # Initializing `IENetwork` object bytes with content of IR files: - # ```python - # with open(path_to_bin_file, 'rb') as f: - # bin = f.read() - # with open(path_to_xml_file, 'rb') as f: - # xml = f.read() - # net = IENetwork(model=xml, weights=bin, init_from_buffer=True) - # ``` - - def __cinit__(self, model: [str, bytes] = "", weights: [str, bytes] = "", init_from_buffer: bool = False): + def __cinit__(self, model = None): # Try to create Inference Engine network from capsule - if model.__class__.__name__ == 'PyCapsule' and weights == '' and init_from_buffer is False: - self.impl = C.IENetwork(model) - return - cdef char*xml_buffer = malloc(len(model)+1) - cdef uint8_t*bin_buffer = malloc(len(weights)) - cdef string model_ - cdef string weights_ - if init_from_buffer: - warnings.warn("Reading network using constructor is deprecated. " - "Please, use IECore.read_network() method instead", DeprecationWarning) - memcpy(xml_buffer, model, len(model)) - memcpy(bin_buffer, weights, len(weights)) - xml_buffer[len(model)] = b'\0' - self.impl = C.IENetwork() - self.impl.load_from_buffer(xml_buffer, len(model), bin_buffer, len(weights)) + if model is not None: + with nogil: + self.impl = C.IENetwork(model) else: - if model and weights: - warnings.warn("Reading network using constructor is deprecated. " - "Please, use IECore.read_network() method instead", DeprecationWarning) - if not os.path.isfile(model): - raise Exception(f"Path to the model {model} doesn't exist or it's a directory") - if not os.path.isfile(weights): - raise Exception(f"Path to the weights {weights} doesn't exist or it's a directory") - model_ = model.encode() - weights_ = weights.encode() - self.impl = C.IENetwork(model_, weights_) - else: + with nogil: self.impl = C.IENetwork() - free(bin_buffer) - free(xml_buffer) ## Name of the loaded network @property @@ -1405,7 +1356,9 @@ cdef class IENetwork: ## A dictionary that maps input layer names to InputInfoPtr objects. @property def input_info(self): - cdef map[string, C.InputInfo.Ptr] c_inputs = self.impl.getInputsInfo() + cdef map[string, C.InputInfo.Ptr] c_inputs + with nogil: + c_inputs = self.impl.getInputsInfo() inputs = {} cdef InputInfoPtr input_info_ptr for input in c_inputs: @@ -1415,30 +1368,12 @@ cdef class IENetwork: inputs[input.first.decode()] = input_info_ptr return inputs - ## \note The property is deprecated. Please use the input_info property - # to get the map of inputs - # - ## A dictionary that maps input layer names to DataPtr objects - @property - def inputs(self): - warnings.warn("'inputs' property of IENetwork class is deprecated. " - "To access DataPtrs user need to use 'input_data' property " - "of InputInfoPtr objects which can be accessed by 'input_info' property.", - DeprecationWarning) - cdef map[string, C.DataPtr] c_inputs = self.impl.getInputs() - inputs = {} - cdef DataPtr data_ptr - for input in c_inputs: - data_ptr = DataPtr() - data_ptr._ptr_network = &self.impl - data_ptr._ptr = input.second - inputs[input.first.decode()] = data_ptr - return inputs - ## A dictionary that maps output layer names to DataPtr objects @property def outputs(self): - cdef map[string, C.DataPtr] c_outputs = self.impl.getOutputs() + cdef map[string, C.DataPtr] c_outputs + with nogil: + c_outputs = self.impl.getOutputs() outputs = {} cdef DataPtr data_ptr for output in c_outputs: diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp index e9d4f7660ba..1a6ae4f57ed 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp @@ -200,14 +200,6 @@ InferenceEnginePython::IENetwork InferenceEnginePython::read_network(std::string return InferenceEnginePython::IENetwork(std::make_shared(net)); } -InferenceEnginePython::IENetwork::IENetwork(const std::string& model, const std::string& weights) { - InferenceEngine::Core reader; - auto net = reader.ReadNetwork(model, weights); - actual = std::make_shared(net); - name = actual->getName(); - batch_size = actual->getBatchSize(); -} - InferenceEnginePython::IENetwork::IENetwork(const std::shared_ptr& cnn_network): actual(cnn_network) { if (actual == nullptr) IE_THROW() << "IENetwork was not initialized."; @@ -228,16 +220,6 @@ InferenceEnginePython::IENetwork::IENetwork(PyObject* network) { batch_size = actual->getBatchSize(); } -void InferenceEnginePython::IENetwork::load_from_buffer(const char* xml, size_t xml_size, uint8_t* bin, size_t bin_size) { - InferenceEngine::Core reader; - InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, {bin_size}, InferenceEngine::Layout::C); - auto weights_blob = InferenceEngine::make_shared_blob(tensorDesc, bin, bin_size); - auto net = reader.ReadNetwork(std::string(xml, xml + xml_size), weights_blob); - name = net.getName(); - actual = std::make_shared(net); - batch_size = actual->getBatchSize(); -} - void InferenceEnginePython::IENetwork::serialize(const std::string& path_to_xml, const std::string& path_to_bin) { actual->serialize(path_to_xml, path_to_bin); } @@ -275,15 +257,6 @@ const std::map InferenceEnginePyth return inputs; } -const std::map InferenceEnginePython::IENetwork::getInputs() { - std::map inputs; - const InferenceEngine::InputsDataMap& inputsInfo = actual->getInputsInfo(); - for (auto& in : inputsInfo) { - inputs[in.first] = in.second->getInputData(); - } - return inputs; -} - const std::map InferenceEnginePython::IENetwork::getOutputs() { std::map outputs; const InferenceEngine::OutputsDataMap& outputsInfo = actual->getOutputsInfo(); @@ -338,15 +311,6 @@ void InferenceEnginePython::IEExecNetwork::exportNetwork(const std::string& mode actual->Export(model_file); } -std::map InferenceEnginePython::IEExecNetwork::getInputs() { - InferenceEngine::ConstInputsDataMap inputsDataMap = actual->GetInputsInfo(); - std::map pyInputs; - for (const auto& item : inputsDataMap) { - pyInputs[item.first] = item.second->getInputData(); - } - return pyInputs; -} - std::map InferenceEnginePython::IEExecNetwork::getInputsInfo() { InferenceEngine::ConstInputsDataMap inputsDataMap = actual->GetInputsInfo(); std::map pyInputs; diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp index cd3fa07c49a..23d27474aff 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp @@ -60,18 +60,12 @@ struct IENetwork { const std::map getInputsInfo(); - const std::map getInputs(); - const std::map getOutputs(); void reshape(const std::map>& input_shapes); void serialize(const std::string& path_to_xml, const std::string& path_to_bin); - void load_from_buffer(const char* xml, size_t xml_size, uint8_t* bin, size_t bin_size); - - IENetwork(const std::string& model, const std::string& weights); - IENetwork(const std::shared_ptr& cnn_network); IENetwork(PyObject* network); @@ -146,7 +140,6 @@ struct IEExecNetwork { void exportNetwork(const std::string& model_file); std::map getInputsInfo(); - std::map getInputs(); std::map getOutputs(); PyObject* getMetric(const std::string& metric_name); diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd index 6f7fd918089..103c8d77d53 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd @@ -14,7 +14,7 @@ cdef extern from "" namespace "InferenceEngine": ctypedef vector[size_t] SizeVector cdef cppclass CExecutableNetwork "InferenceEngine::ExecutableNetwork" - + cdef cppclass TBlob[T]: ctypedef shared_ptr[TBlob[T]] Ptr @@ -154,27 +154,24 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython": cdef cppclass IEExecNetwork: vector[InferRequestWrap] infer_requests IENetwork GetExecGraphInfo() except + - map[string, DataPtr] getInputs() except + map[string, CDataPtr] getOutputs() except + map[string, InputInfo.CPtr] getInputsInfo() void exportNetwork(const string & model_file) except + object getMetric(const string & metric_name) except + object getConfig(const string & metric_name) except + - int wait(int num_requests, int64_t timeout) - int getIdleRequestId() + int wait(int num_requests, int64_t timeout) nogil + int getIdleRequestId() nogil shared_ptr[CExecutableNetwork] getPluginLink() except + cdef cppclass IENetwork: - IENetwork() except + - IENetwork(object) except + - IENetwork(const string &, const string &) except + + IENetwork() nogil except + + IENetwork(object) nogil except + string name size_t batch_size string precision map[string, vector[size_t]] inputs - const map[string, InputInfo.Ptr] getInputsInfo() except + - const map[string, DataPtr] getInputs() except + - map[string, DataPtr] getOutputs() except + + const map[string, InputInfo.Ptr] getInputsInfo() nogil except + + map[string, DataPtr] getOutputs() nogil except + void addOutput(string &, size_t) except + void setAffinity(map[string, string] & types_affinity_map, map[string, string] & layers_affinity_map) except + void setBatch(size_t size) except + @@ -182,7 +179,6 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython": void setLayerParams(map[string, map[string, string]] params_map) except + void serialize(const string& path_to_xml, const string& path_to_bin) except + void reshape(map[string, vector[size_t]] input_shapes) except + - void load_from_buffer(const char*xml, size_t xml_size, uint8_t*bin, size_t bin_size) except + object getFunction() except + void convertToOldRepresentation() except + string getOVNameForTensor(const string &) except + @@ -195,23 +191,23 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython": void setBlob(const string &blob_name, const CBlob.Ptr &blob_ptr, CPreProcessInfo& info) except + const CPreProcessInfo& getPreProcess(const string& blob_name) except + map[string, ProfileInfo] getPerformanceCounts() except + - void infer() except + - void infer_async() except + - int wait(int64_t timeout) except + + void infer() nogil except + + void infer_async() nogil except + + int wait(int64_t timeout) nogil except + void setBatch(int size) except + void setCyCallback(void (*)(void*, int), void *) except + vector[CVariableState] queryState() except + cdef cppclass IECore: - IECore() except + - IECore(const string & xml_config_file) except + + IECore() nogil except + + IECore(const string & xml_config_file) nogil except + map[string, Version] getVersions(const string & deviceName) except + - IENetwork readNetwork(const string& modelPath, const string& binPath) except + - IENetwork readNetwork(const string& modelPath,uint8_t*bin, size_t bin_size) except + + IENetwork readNetwork(const string& modelPath, const string& binPath) nogil except + + IENetwork readNetwork(const string& modelPath,uint8_t*bin, size_t bin_size) nogil except + unique_ptr[IEExecNetwork] loadNetwork(IENetwork network, const string deviceName, - const map[string, string] & config, int num_requests) except + + const map[string, string] & config, int num_requests) nogil except + unique_ptr[IEExecNetwork] loadNetworkFromFile(const string & modelPath, const string & deviceName, - const map[string, string] & config, int num_requests) except + + const map[string, string] & config, int num_requests) nogil except + unique_ptr[IEExecNetwork] importNetwork(const string & modelFIle, const string & deviceName, const map[string, string] & config, int num_requests) except + map[string, string] queryNetwork(IENetwork network, const string deviceName, @@ -221,7 +217,7 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython": void unregisterPlugin(const string & deviceName) except + void registerPlugins(const string & xmlConfigFile) except + void addExtension(const string & ext_lib_path, const string & deviceName) except + - vector[string] getAvailableDevices() except + + vector[string] getAvailableDevices() nogil except + object getMetric(const string & deviceName, const string & name) except + object getConfig(const string & deviceName, const string & name) except + diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt index ba526c3761d..512b1662be5 100644 --- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt @@ -20,7 +20,9 @@ set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/offline_transformations_ # create target cython_add_module(${TARGET_NAME} ${SOURCES}) + add_dependencies(${TARGET_NAME} ie_api) +ov_python_disable_intel_warnings(${TARGET_NAME}) if(COMMAND ie_add_vs_version_file) ie_add_vs_version_file(NAME ${TARGET_NAME} diff --git a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt index 8367f941d9f..9d3e1e0ffc0 100644 --- a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt @@ -20,7 +20,9 @@ set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/test_utils_api.pyx # create target cython_add_module(${TARGET_NAME} ${SOURCES}) + add_dependencies(${TARGET_NAME} ie_api) +ov_python_disable_intel_warnings(${TARGET_NAME}) if(COMMAND ie_add_vs_version_file) ie_add_vs_version_file(NAME ${TARGET_NAME} diff --git a/inference-engine/ie_bridges/python/tests/conftest.py b/inference-engine/ie_bridges/python/tests/conftest.py index fd327147c33..e697b58ee63 100644 --- a/inference-engine/ie_bridges/python/tests/conftest.py +++ b/inference-engine/ie_bridges/python/tests/conftest.py @@ -21,11 +21,6 @@ def model_onnx_path(): test_onnx = os.path.join(path_to_repo, "models", "test_model", 'test_model.onnx') return test_onnx -def model_prototxt_path(): - path_to_repo = os.environ["MODELS_PATH"] - test_prototxt = os.path.join(path_to_repo, "models", "test_model", 'test_model.prototxt') - return test_prototxt - def image_path(): path_to_repo = os.environ["DATA_PATH"] path_to_img = os.path.join(path_to_repo, 'validation_set', '224x224', 'dog.bmp') diff --git a/inference-engine/ie_bridges/python/tests/test_ExecutableNetwork.py b/inference-engine/ie_bridges/python/tests/test_ExecutableNetwork.py index 11e4b479dac..65811503d98 100644 --- a/inference-engine/ie_bridges/python/tests/test_ExecutableNetwork.py +++ b/inference-engine/ie_bridges/python/tests/test_ExecutableNetwork.py @@ -82,24 +82,6 @@ def test_input_info(device): del ie_core -def test_inputs_deprecated(device): - ie_core = ie.IECore() - net = ie_core.read_network(model=test_net_xml, weights=test_net_bin) - exec_net = ie_core.load_network(net, device, num_requests=5) - with warnings.catch_warnings(record=True) as w: - assert len(exec_net.inputs) == 1 - assert "data" in exec_net.inputs - assert isinstance(exec_net.inputs['data'], ie.DataPtr) - assert len(w) == 3 - for i in range (len(w)): - assert "'inputs' property of ExecutableNetwork class is deprecated. " \ - "To access DataPtrs user need to use 'input_data' property " \ - "of InputInfoCPtr objects which " \ - "can be accessed by 'input_info' property." in str(w[i].message) - del exec_net - del ie_core - - def test_outputs(device): ie_core = ie.IECore() net = ie_core.read_network(model=test_net_xml, weights=test_net_bin) diff --git a/inference-engine/ie_bridges/python/tests/test_IECore.py b/inference-engine/ie_bridges/python/tests/test_IECore.py index 41d28f1c41b..ed15b12d9b9 100644 --- a/inference-engine/ie_bridges/python/tests/test_IECore.py +++ b/inference-engine/ie_bridges/python/tests/test_IECore.py @@ -5,14 +5,16 @@ import os import pytest from sys import platform from pathlib import Path +from threading import Thread +from time import sleep, time +from queue import Queue from openvino.inference_engine import IENetwork, IECore, ExecutableNetwork -from conftest import model_path, plugins_path, model_onnx_path, model_prototxt_path +from conftest import model_path, plugins_path, model_onnx_path test_net_xml, test_net_bin = model_path() test_net_onnx = model_onnx_path() -test_net_prototxt = model_prototxt_path() plugins_xml, plugins_win_xml, plugins_osx_xml = plugins_path() @@ -201,18 +203,6 @@ def test_read_network_from_onnx_as_path(): assert isinstance(net, IENetwork) -def test_read_network_from_prototxt(): - ie = IECore() - net = ie.read_network(model=test_net_prototxt) - assert isinstance(net, IENetwork) - - -def test_read_network_from_prototxt_as_path(): - ie = IECore() - net = ie.read_network(model=Path(test_net_prototxt)) - assert isinstance(net, IENetwork) - - def test_incorrect_xml(): ie = IECore() with pytest.raises(Exception) as e: @@ -253,3 +243,37 @@ def test_net_from_buffer_valid(): o_net2 = ref_net.outputs assert ii_net.keys() == ii_net2.keys() assert o_net.keys() == o_net2.keys() + + +@pytest.mark.skipif(os.environ.get("TEST_DEVICE","CPU") != "GPU", reason=f"Device dependent test") +def test_load_network_release_gil(device): + running = True + message_queue = Queue() + def detect_long_gil_holds(): + sleep_time = 0.01 + latency_alert_threshold = 0.1 + # Send a message to indicate the thread is running and ready to detect GIL locks + message_queue.put("ready to detect") + while running: + start_sleep = time() + sleep(sleep_time) + elapsed = time() - start_sleep + if elapsed > latency_alert_threshold: + # Send a message to the testing thread that a long GIL lock occurred + message_queue.put(latency_alert_threshold) + ie = IECore() + net = ie.read_network(model=test_net_xml, weights=test_net_bin) + # Wait for the GIL lock detector to be up and running + gil_hold_detection_thread = Thread(daemon=True, target=detect_long_gil_holds) + gil_hold_detection_thread.start() + # Wait to make sure the thread is started and checking for GIL holds + sleep(0.1) + assert message_queue.get(timeout=5) == "ready to detect" + # Run the function that should unlock the GIL + exec_net = ie.load_network(net, device) + # Ensure resources are closed + running = False + gil_hold_detection_thread.join(timeout=5) + # Assert there were never any long gil locks + assert message_queue.qsize() == 0, \ + f"More than 0 GIL locks occured! Latency: {message_queue.get()})" diff --git a/inference-engine/ie_bridges/python/tests/test_IENetwork.py b/inference-engine/ie_bridges/python/tests/test_IENetwork.py index 1c3474e6891..d808e177234 100644 --- a/inference-engine/ie_bridges/python/tests/test_IENetwork.py +++ b/inference-engine/ie_bridges/python/tests/test_IENetwork.py @@ -12,60 +12,12 @@ from conftest import model_path test_net_xml, test_net_bin = model_path() -def test_create_ie_network_deprecated(): - with warnings.catch_warnings(record=True) as w: - net = IENetwork(model=test_net_xml, weights=test_net_bin) - assert isinstance(net, IENetwork) - assert len(w) == 1 - assert issubclass(w[-1].category, DeprecationWarning) - assert "Reading network using constructor is deprecated. " \ - "Please, use IECore.read_network() method instead" in str(w[0].message) - - -def test_incorrect_xml_deprecated(): - with warnings.catch_warnings(record=True) as w: - with pytest.raises(Exception) as e: - IENetwork(model="./model.xml", weights=test_net_bin) - assert "Path to the model ./model.xml doesn't exist or it's a directory" in str(e.value) - assert len(w) == 1 - assert issubclass(w[-1].category, DeprecationWarning) - assert "Reading network using constructor is deprecated. " \ - "Please, use IECore.read_network() method instead" in str(w[0].message) - - -def test_incorrect_bin_deprecated(): - with warnings.catch_warnings(record=True) as w: - with pytest.raises(Exception) as e: - IENetwork(model=test_net_xml, weights="./model.bin") - assert "Path to the weights ./model.bin doesn't exist or it's a directory" in str(e.value) - assert len(w) == 1 - assert issubclass(w[-1].category, DeprecationWarning) - assert "Reading network using constructor is deprecated. " \ - "Please, use IECore.read_network() method instead" in str(w[0].message) - - def test_name(): ie = IECore() net = ie.read_network(model=test_net_xml, weights=test_net_bin) assert net.name == "test_model" -def test_inputs_deprecated(): - ie = IECore() - net = ie.read_network(model=test_net_xml, weights=test_net_bin) - with warnings.catch_warnings(record=True) as w: - inp = net.inputs - assert isinstance(inp['data'], DataPtr) - assert inp['data'].layout == "NCHW" - assert inp['data'].precision == "FP32" - assert inp['data'].shape == [1, 3, 32, 32] - assert len(w) == 1 - assert "'inputs' property of IENetwork class is deprecated. " \ - "To access DataPtrs user need to use 'input_data' property " \ - "of InputInfoPtr objects which " \ - "can be accessed by 'input_info' property." in str(w[-1].message) - - def test_input_info(): ie = IECore() net = ie.read_network(model=test_net_xml, weights=test_net_bin) @@ -208,21 +160,7 @@ def test_reshape(): net.reshape({"data": (2, 3, 32, 32)}) -def test_read_net_from_buffer_deprecated(): - with warnings.catch_warnings(record=True) as w: - with open(test_net_bin, 'rb') as f: - bin = f.read() - with open(test_net_xml, 'rb') as f: - xml = f.read() - net = IENetwork(model=xml, weights=bin, init_from_buffer=True) - assert isinstance(net, IENetwork) - assert len(w) == 1 - assert issubclass(w[-1].category, DeprecationWarning) - assert "Reading network using constructor is deprecated. " \ - "Please, use IECore.read_network() method instead" in str(w[0].message) - - -def test_net_from_buffer_valid_deprecated(): +def test_net_from_buffer_valid(): ie = IECore() with open(test_net_bin, 'rb') as f: bin = f.read() diff --git a/inference-engine/ie_bridges/python/tests/test_InferRequest.py b/inference-engine/ie_bridges/python/tests/test_InferRequest.py index af79c0ff155..44afdfa9b61 100644 --- a/inference-engine/ie_bridges/python/tests/test_InferRequest.py +++ b/inference-engine/ie_bridges/python/tests/test_InferRequest.py @@ -66,32 +66,6 @@ def test_output_blobs(device): assert executable_network.requests[0].output_blobs['fc_out'].tensor_desc == td -def test_inputs_deprecated(device): - ie_core = ie.IECore() - net = ie_core.read_network(test_net_xml, test_net_bin) - executable_network = ie_core.load_network(net, device, num_requests=2) - with warnings.catch_warnings(record=True) as w: - inputs = executable_network.requests[0].inputs - assert "'inputs' property of InferRequest is deprecated. " \ - "Please instead use 'input_blobs' property." in str(w[-1].message) - del executable_network - del ie_core - del net - - -def test_outputs_deprecated(device): - ie_core = ie.IECore() - net = ie_core.read_network(test_net_xml, test_net_bin) - executable_network = ie_core.load_network(net, device, num_requests=2) - with warnings.catch_warnings(record=True) as w: - outputs = executable_network.requests[0].outputs - assert "'outputs' property of InferRequest is deprecated. Please instead use 'output_blobs' property." in str( - w[-1].message) - del executable_network - del ie_core - del net - - def test_inputs_list(device): ie_core = ie.IECore() net = ie_core.read_network(test_net_xml, test_net_bin) @@ -552,11 +526,10 @@ def test_query_state_write_buffer(device, input_shape, data_type, mode): pytest.skip("Can't run on ARM plugin") layout = ["C", "HW", "CHW", "NCHW"] - np_data_type = {"FP32": np.float32, "FP16": np.float16, "I32": np.int32} - from openvino.inference_engine import TensorDesc, Blob + from openvino.inference_engine import TensorDesc, Blob, format_map - net = ie.IENetwork(create_function_with_memory(input_shape, np_data_type[data_type])) + net = ie.IENetwork(create_function_with_memory(input_shape, format_map[data_type])) ie_core = ie.IECore() exec_net = ie_core.load_network(network=net, device_name=device, num_requests=1) request = exec_net.requests[0] @@ -572,23 +545,23 @@ def test_query_state_write_buffer(device, input_shape, data_type, mode): if mode == "set_init_memory_state": # create initial value const_init = 5 - init_array = np.full(input_shape, const_init, dtype=np_data_type[mem_state.state.tensor_desc.precision]) + init_array = np.full(input_shape, const_init, dtype=format_map[mem_state.state.tensor_desc.precision]) tensor_desc = TensorDesc(mem_state.state.tensor_desc.precision, input_shape, layout[len(input_shape) - 1]) blob = Blob(tensor_desc, init_array) mem_state.state = blob - res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=np_data_type[data_type])}) - expected_res = np.full(input_shape, 1 + const_init, dtype=np_data_type[data_type]) + res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=format_map[data_type])}) + expected_res = np.full(input_shape, 1 + const_init, dtype=format_map[data_type]) elif mode == "reset_memory_state": # reset initial state of ReadValue to zero mem_state.reset() - res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=np_data_type[data_type])}) + res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=format_map[data_type])}) # always ones - expected_res = np.full(input_shape, 1, dtype=np_data_type[data_type]) + expected_res = np.full(input_shape, 1, dtype=format_map[data_type]) else: - res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=np_data_type[data_type])}) - expected_res = np.full(input_shape, i, dtype=np_data_type[data_type]) + res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=format_map[data_type])}) + expected_res = np.full(input_shape, i, dtype=format_map[data_type]) assert np.allclose(res['MemoryAdd'], expected_res, atol=1e-6), \ - "Expected values: {} \n Actual values: {} \n".format(expected_res, res) + "Expected values: {} \n Actual values: {} \n".format(expected_res, res) \ No newline at end of file diff --git a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt index 681954f2766..1b1931c08a4 100644 --- a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt @@ -56,13 +56,13 @@ endif() add_custom_command(TARGET ie_wheel PRE_BUILD - COMMAND ${CMAKE_COMMAND} -E rm -rf "${CMAKE_CURRENT_BINARY_DIR}/site-packages" + COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/site-packages" COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} clean bdist_wheel --dist-dir ${CMAKE_BINARY_DIR}/wheels --build=${WHEEL_BUILD} --plat-name=${WHEEL_PLATFORM} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E rm "${CMAKE_CURRENT_SOURCE_DIR}/.env" + COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_CURRENT_SOURCE_DIR}/.env" WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" COMMENT "Building Python wheel ${WHEEL_PACKAGE_NAME}" VERBATIM diff --git a/inference-engine/samples/CMakeLists.txt b/inference-engine/samples/CMakeLists.txt index aef11e16f47..bccc7be715b 100644 --- a/inference-engine/samples/CMakeLists.txt +++ b/inference-engine/samples/CMakeLists.txt @@ -56,35 +56,30 @@ set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${IE_MAIN_SAMPLES_DIR}/${BIN_FOLDER}) if (WIN32) set_property (DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _CRT_SECURE_NO_WARNINGS) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS -DNOMINMAX") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") #no asynchronous structured exception handling + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") # no asynchronous structured exception handling set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE") if (TREAT_WARNING_AS_ERROR) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") #treating warnings as errors + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") # treating warnings as errors endif () if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qdiag-disable:177") endif() + # disable some noisy warnings if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251 /wd4275 /wd4267 /wd4819") #disable some warnings + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251 /wd4275 /wd4267 /wd4819") endif() else() + # treating warnings as errors if(TREAT_WARNING_AS_ERROR) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") #treating warnings as errors + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") endif() - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall") - if (APPLE) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=unused-command-line-argument") - elseif(UNIX) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wuninitialized -Winit-self") - if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wmaybe-uninitialized") - endif() + if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-disable:177") endif() endif() @@ -92,6 +87,15 @@ if(APPLE) set(CMAKE_MACOSX_RPATH ON) endif() +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64.*|aarch64.*|AARCH64.*)") + set(AARCH64 ON) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)") + set(ARM ON) +endif() +if(ARM AND NOT CMAKE_CROSSCOMPILING) + add_compile_options(-march=armv7-a) +endif() + set(CMAKE_POLICY_DEFAULT_CMP0063 NEW) set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(CMAKE_CXX_VISIBILITY_PRESET hidden) @@ -104,9 +108,6 @@ if(NOT DEFINED CMAKE_CXX_STANDARD) set (CMAKE_CXX_STANDARD 11) set (CMAKE_CXX_EXTENSIONS OFF) set (CMAKE_CXX_STANDARD_REQUIRED ON) - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}") - endif() endif() #################################### @@ -135,10 +136,6 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/cnpy") add_subdirectory(thirdparty/cnpy EXCLUDE_FROM_ALL) endif() -if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") -endif() - if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/common/utils") add_subdirectory(common/utils) endif() diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp index a369c2f1055..c21222f9a96 100644 --- a/inference-engine/samples/benchmark_app/benchmark_app.hpp +++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp @@ -147,6 +147,14 @@ static constexpr char iop_message[] = "Optional. Specifies precision for input a " Overwrites precision from ip and op options for " "specified layers."; +static constexpr char input_image_scale_message[] = "Optional. Scale values to be used for the input image per channel.\n" + "Values to be provided in the [R, G, B] format. Can be defined for desired input of the model.\n" + "Example: -iscale data[255,255,255],info[255,255,255]\n"; + +static constexpr char input_image_mean_message[] = "Optional. Mean values to be used for the input image per channel.\n" + "Values to be provided in the [R, G, B] format. Can be defined for desired input of the model,\n" + "Example: -imean data[255,255,255],info[255,255,255]\n"; + /// @brief Define flag for showing help message
DEFINE_bool(h, false, help_message); @@ -259,6 +267,12 @@ DEFINE_string(cache_dir, "", cache_dir_message); /// @brief Define flag for load network from model file by name without ReadNetwork
DEFINE_bool(load_from_file, false, load_from_file_message); +/// @brief Define flag for using input image scale
+DEFINE_string(iscale, "", input_image_scale_message); + +/// @brief Define flag for using input image mean
+DEFINE_string(imean, "", input_image_mean_message); + /** * @brief This function show a help message */ @@ -304,4 +318,6 @@ static void showUsage() { std::cout << " -ip " << inputs_precision_message << std::endl; std::cout << " -op " << outputs_precision_message << std::endl; std::cout << " -iop \"\" " << iop_message << std::endl; + std::cout << " -iscale " << input_image_scale_message << std::endl; + std::cout << " -imean " << input_image_mean_message << std::endl; } diff --git a/inference-engine/samples/benchmark_app/inputs_filling.cpp b/inference-engine/samples/benchmark_app/inputs_filling.cpp index ef8a045279a..eadd4eceeae 100644 --- a/inference-engine/samples/benchmark_app/inputs_filling.cpp +++ b/inference-engine/samples/benchmark_app/inputs_filling.cpp @@ -91,7 +91,9 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector& filePat size_t offset = imageId * numChannels * width * height + (((app_info.layout == "NCHW") || (app_info.layout == "CHW")) ? (ch * width * height + h * width + w) : (h * width * numChannels + w * numChannels + ch)); - inputBlobData[offset] = static_cast(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]); + inputBlobData[offset] = + (static_cast(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]) - static_cast(app_info.mean[ch])) / + static_cast(app_info.scale[ch]); } } } diff --git a/inference-engine/samples/benchmark_app/inputs_filling.hpp b/inference-engine/samples/benchmark_app/inputs_filling.hpp index 4410faae11e..000d613db59 100644 --- a/inference-engine/samples/benchmark_app/inputs_filling.hpp +++ b/inference-engine/samples/benchmark_app/inputs_filling.hpp @@ -12,4 +12,4 @@ #include "utils.hpp" void fillBlobs(const std::vector& inputFiles, const size_t& batchSize, benchmark_app::InputsInfo& app_inputs_info, - std::vector requests); + std::vector requests); \ No newline at end of file diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp index 8df3bc2f8e4..da2b77a0ce9 100644 --- a/inference-engine/samples/benchmark_app/main.cpp +++ b/inference-engine/samples/benchmark_app/main.cpp @@ -380,7 +380,7 @@ int main(int argc, char* argv[]) { batchSize = cnnNetwork.getBatchSize(); // Parse input shapes if specified bool reshape = false; - app_inputs_info = getInputsInfo(FLAGS_shape, FLAGS_layout, FLAGS_b, inputInfo, reshape); + app_inputs_info = getInputsInfo(FLAGS_shape, FLAGS_layout, FLAGS_b, FLAGS_iscale, FLAGS_imean, inputInfo, reshape); if (reshape) { InferenceEngine::ICNNNetwork::InputShapes shapes = {}; for (auto& item : app_inputs_info) @@ -441,7 +441,7 @@ int main(int argc, char* argv[]) { slog::info << "Import network took " << duration_ms << " ms" << slog::endl; if (statistics) statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"import network time (ms)", duration_ms}}); - app_inputs_info = getInputsInfo(FLAGS_shape, FLAGS_layout, FLAGS_b, exeNetwork.GetInputsInfo()); + app_inputs_info = getInputsInfo(FLAGS_shape, FLAGS_layout, FLAGS_b, FLAGS_iscale, FLAGS_imean, exeNetwork.GetInputsInfo()); if (batchSize == 0) { batchSize = 1; } diff --git a/inference-engine/samples/benchmark_app/utils.cpp b/inference-engine/samples/benchmark_app/utils.cpp index 2b99c3b555c..66deb5bad31 100644 --- a/inference-engine/samples/benchmark_app/utils.cpp +++ b/inference-engine/samples/benchmark_app/utils.cpp @@ -88,6 +88,17 @@ std::vector split(const std::string& s, char delim) { return result; } +std::vector splitFloat(const std::string& s, char delim) { + std::vector result; + std::stringstream ss(s); + std::string item; + + while (getline(ss, item, delim)) { + result.push_back(std::stof(item)); + } + return result; +} + std::vector parseDevices(const std::string& device_string) { std::string comma_separated_devices = device_string; if (comma_separated_devices.find(":") != std::string::npos) { @@ -161,6 +172,44 @@ std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& sha return ss.str(); } +std::map> parseScaleOrMean(const std::string& scale_mean, const benchmark_app::InputsInfo& inputs_info) { + // Format: data:[255,255,255],info[255,255,255] + std::map> return_value; + + std::string search_string = scale_mean; + auto start_pos = search_string.find_first_of('['); + while (start_pos != std::string::npos) { + auto end_pos = search_string.find_first_of(']'); + if (end_pos == std::string::npos) + break; + auto input_name = search_string.substr(0, start_pos); + auto input_value_string = search_string.substr(start_pos + 1, end_pos - start_pos - 1); + auto input_value = splitFloat(input_value_string, ','); + + if (!input_name.empty()) { + if (inputs_info.count(input_name)) { + return_value[input_name] = input_value; + } + // ignore wrong input name + } else { + for (auto& item : inputs_info) { + if (item.second.isImage()) + return_value[item.first] = input_value; + } + search_string.clear(); + break; + } + search_string = search_string.substr(end_pos + 1); + if (search_string.empty() || search_string.front() != ',') + break; + search_string = search_string.substr(1); + start_pos = search_string.find_first_of('['); + } + if (!search_string.empty()) + throw std::logic_error("Can't parse input parameter string: " + scale_mean); + return return_value; +} + #ifdef USE_OPENCV void dump_config(const std::string& filename, const std::map>& config) { cv::FileStorage fs(filename, cv::FileStorage::WRITE); diff --git a/inference-engine/samples/benchmark_app/utils.hpp b/inference-engine/samples/benchmark_app/utils.hpp index 0abebefe9e0..4452556b3c4 100644 --- a/inference-engine/samples/benchmark_app/utils.hpp +++ b/inference-engine/samples/benchmark_app/utils.hpp @@ -13,6 +13,8 @@ struct InputInfo { InferenceEngine::Precision precision; InferenceEngine::SizeVector shape; std::string layout; + std::vector scale; + std::vector mean; bool isImage() const; bool isImageInfo() const; size_t getDimentionByLayout(char character) const; @@ -31,6 +33,7 @@ std::map parseNStreamsValuePerDevice(const std::vector std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes); size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info); std::vector split(const std::string& s, char delim); +std::map> parseScaleOrMean(const std::string& scale_mean, const benchmark_app::InputsInfo& inputs_info); template std::map parseInputParameters(const std::string parameter_string, const std::map& input_info) { @@ -65,9 +68,11 @@ std::map parseInputParameters(const std::string parame template benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const std::string& layout_string, const size_t batch_size, - const std::map& input_info, bool& reshape_required) { + const std::string& scale_string, const std::string& mean_string, const std::map& input_info, + bool& reshape_required) { std::map shape_map = parseInputParameters(shape_string, input_info); std::map layout_map = parseInputParameters(layout_string, input_info); + reshape_required = false; benchmark_app::InputsInfo info_map; for (auto& item : input_info) { @@ -106,14 +111,33 @@ benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const s } info_map[name] = info; } + + // Update scale and mean + std::map> scale_map = parseScaleOrMean(scale_string, info_map); + std::map> mean_map = parseScaleOrMean(mean_string, info_map); + + for (auto& item : info_map) { + if (item.second.isImage()) { + item.second.scale.assign({1, 1, 1}); + item.second.mean.assign({0, 0, 0}); + + if (scale_map.count(item.first)) { + item.second.scale = scale_map.at(item.first); + } + if (mean_map.count(item.first)) { + item.second.mean = mean_map.at(item.first); + } + } + } + return info_map; } template benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const std::string& layout_string, const size_t batch_size, - const std::map& input_info) { + const std::string& scale_string, const std::string& mean_string, const std::map& input_info) { bool reshape_required = false; - return getInputsInfo(shape_string, layout_string, batch_size, input_info, reshape_required); + return getInputsInfo(shape_string, layout_string, batch_size, scale_string, mean_string, input_info, reshape_required); } #ifdef USE_OPENCV diff --git a/inference-engine/samples/common/utils/include/samples/classification_results.h b/inference-engine/samples/common/utils/include/samples/classification_results.h index 7fd999d87b0..1a8ea4306c3 100644 --- a/inference-engine/samples/common/utils/include/samples/classification_results.h +++ b/inference-engine/samples/common/utils/include/samples/classification_results.h @@ -54,21 +54,27 @@ private: * @param output Vector of indexes for the top n places */ template - void topResults(unsigned int n, InferenceEngine::TBlob& input, std::vector& output) { - InferenceEngine::SizeVector dims = input.getTensorDesc().getDims(); + void topResults(unsigned int n, InferenceEngine::Blob::Ptr& input, std::vector& output) { + InferenceEngine::SizeVector dims = input->getTensorDesc().getDims(); size_t input_rank = dims.size(); if (!input_rank || !dims[0]) IE_THROW() << "Input blob has incorrect dimensions!"; size_t batchSize = dims[0]; - std::vector indexes(input.size() / batchSize); + std::vector indexes(input->size() / batchSize); - n = static_cast(std::min((size_t)n, input.size())); + n = static_cast(std::min((size_t)n, input->size())); output.resize(n * batchSize); + InferenceEngine::MemoryBlob::CPtr moutput = InferenceEngine::as(input); + if (!moutput) { + IE_THROW() << "Output blob should be inherited from MemoryBlob"; + } + // locked memory holder should be alive all time while access to its buffer happens + auto moutputHolder = moutput->rmap(); for (size_t i = 0; i < batchSize; i++) { - size_t offset = i * (input.size() / batchSize); - T* batchData = input.data(); + size_t offset = i * (input->size() / batchSize); + T* batchData = moutputHolder.as(); batchData += offset; std::iota(std::begin(indexes), std::end(indexes), 0); @@ -88,16 +94,15 @@ private: * @param input 1D blob that contains probabilities * @param output Vector of indexes for the top n places */ - void topResults(unsigned int n, InferenceEngine::Blob& input, std::vector& output) { + void topResults(unsigned int n, InferenceEngine::Blob::Ptr& input, std::vector& output) { #define TBLOB_TOP_RESULT(precision) \ case InferenceEngine::Precision::precision: { \ using myBlobType = InferenceEngine::PrecisionTrait::value_type; \ - InferenceEngine::TBlob& tblob = dynamic_cast&>(input); \ - topResults(n, tblob, output); \ + topResults(n, input, output); \ break; \ } - switch (input.getTensorDesc().getPrecision()) { + switch (input->getTensorDesc().getPrecision()) { TBLOB_TOP_RESULT(FP32); TBLOB_TOP_RESULT(FP64); TBLOB_TOP_RESULT(FP16); @@ -111,7 +116,7 @@ private: TBLOB_TOP_RESULT(U64); TBLOB_TOP_RESULT(I64); default: - IE_THROW() << "cannot locate blob for precision: " << input.getTensorDesc().getPrecision(); + IE_THROW() << "cannot locate blob for precision: " << input->getTensorDesc().getPrecision(); } #undef TBLOB_TOP_RESULT @@ -129,7 +134,7 @@ public: if (_imageNames.size() != _batchSize) { throw std::logic_error("Batch size should be equal to the number of images."); } - topResults(_nTop, *_outBlob, _results); + topResults(_nTop, _outBlob, _results); } /** @@ -146,18 +151,17 @@ public: std::wcout << std::endl << std::endl; printHeader(); + InferenceEngine::MemoryBlob::CPtr moutput = InferenceEngine::as(_outBlob); + auto moutputHolder = moutput->rmap(); for (size_t id = image_id * _nTop, cnt = 0; id < (image_id + 1) * _nTop; ++cnt, ++id) { std::cout.precision(7); /** Getting probability for resulting class **/ - InferenceEngine::MemoryBlob::CPtr moutput = InferenceEngine::as(_outBlob); if (!moutput) { throw std::logic_error("We expect _outBlob to be inherited from MemoryBlob in " "ClassificationResult::print, " "but by fact we were not able to cast _outBlob to MemoryBlob"); } // locked memory holder should be alive all time while access to its buffer happens - auto moutputHolder = moutput->rmap(); - const auto result = moutputHolder .as::value_type*>()[_results.at(id) + diff --git a/inference-engine/samples/hello_reshape_ssd/reshape_ssd_extension.hpp b/inference-engine/samples/hello_reshape_ssd/reshape_ssd_extension.hpp index ac92f7c2aa4..1e6ae59bf6f 100644 --- a/inference-engine/samples/hello_reshape_ssd/reshape_ssd_extension.hpp +++ b/inference-engine/samples/hello_reshape_ssd/reshape_ssd_extension.hpp @@ -109,7 +109,7 @@ public: return std::make_shared(new_args.at(0)); } - bool visit_attributes(ngraph::AttributeVisitor& visitor) override { + bool visit_attributes(ngraph::AttributeVisitor&) override { return true; } }; diff --git a/inference-engine/samples/ngraph_function_creation_sample/main.cpp b/inference-engine/samples/ngraph_function_creation_sample/main.cpp index 0855c961a63..6cb1e104305 100644 --- a/inference-engine/samples/ngraph_function_creation_sample/main.cpp +++ b/inference-engine/samples/ngraph_function_creation_sample/main.cpp @@ -108,7 +108,7 @@ TBlob::CPtr ReadWeights(std::string filepath) { std::shared_ptr createNgraphFunction() { TBlob::CPtr weightsPtr = ReadWeights(FLAGS_m); - if (weightsPtr->byteSize() != 1724336) + if (weightsPtr->byteSize() != 6897344) IE_THROW() << "Incorrect weights file. This sample works only with LeNet " "classification network."; diff --git a/inference-engine/src/auto_plugin/auto_plugin.cpp b/inference-engine/src/auto_plugin/auto_plugin.cpp index 94b6a8a8b71..75e80faa2b4 100644 --- a/inference-engine/src/auto_plugin/auto_plugin.cpp +++ b/inference-engine/src/auto_plugin/auto_plugin.cpp @@ -274,31 +274,108 @@ DeviceName AutoInferencePlugin::SelectDevice(const std::vector& meta } std::vector CPU; - std::vector GPU; + std::vector dGPU; + std::vector iGPU; + std::vector MYRIAD; + std::vector VPUX; for (auto& item : metaDevices) { if (item.find("CPU") == 0) { CPU.push_back(item); continue; } + if (item.find("MYRIAD") == 0) { + MYRIAD.push_back(item); + continue; + } + if (item.find("VPUX") == 0) { + VPUX.push_back(item); + continue; + } if (item.find("GPU") == 0) { - GPU.push_back(item); + auto gpuFullDeviceName = GetCore()->GetMetric(item, METRIC_KEY(FULL_DEVICE_NAME)).as(); + if (gpuFullDeviceName.find("iGPU") != std::string::npos) { + iGPU.push_back(item); + } else if (gpuFullDeviceName.find("dGPU") != std::string::npos) { + dGPU.push_back(item); + } continue; } } - if (CPU.empty() && GPU.empty()) { + if (CPU.empty() && dGPU.empty() && iGPU.empty() && MYRIAD.empty() && VPUX.empty()) { IE_THROW(NotFound) << "No available device found"; } - // Sort GPU by name: GPU.2 > GPU.1 > GPU.0 > GPU, so we always choose the GPU[0] as best device - std::sort(GPU.begin(), GPU.end(), [](const DeviceName& a, const DeviceName& b)->bool{return b < a;}); + // Priority of selecting device: dGPU > VPUX > iGPU > MYRIAD > CPU + if (!dGPU.empty()) { + for (auto&& item : dGPU) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision); + if (supportNetwork != capability.end()) { + return item; + } + } + } else if (!VPUX.empty()) { + for (auto&& item : VPUX) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision); + if (supportNetwork != capability.end()) { + return item; + } + } + } else if (!iGPU.empty()) { + for (auto&& item : iGPU) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision); + if (supportNetwork != capability.end()) { + return item; + } + } + } else if (!MYRIAD.empty()) { + for (auto&& item : MYRIAD) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision); + if (supportNetwork != capability.end()) { + return item; + } + } + } - for (auto&& item : GPU) { - std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); - auto res = std::find(capability.begin(), capability.end(), networkPrecision); - if (res != capability.end()) { - return item; + // If network is FP32 but there is no device support FP32, offload FP32 network to device support FP16. + if (networkPrecision == "FP32") { + if (!dGPU.empty()) { + for (auto&& item : dGPU) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16"); + if (supportNetwork != capability.end()) { + return item; + } + } + } else if (!VPUX.empty()) { + for (auto&& item : VPUX) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16"); + if (supportNetwork != capability.end()) { + return item; + } + } + } else if (!iGPU.empty()) { + for (auto&& item : iGPU) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16"); + if (supportNetwork != capability.end()) { + return item; + } + } + } else if (!MYRIAD.empty()) { + for (auto&& item : MYRIAD) { + std::vector capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16"); + if (supportNetwork != capability.end()) { + return item; + } + } } } diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.cpp b/inference-engine/src/cldnn_engine/cldnn_graph.cpp index 2b333a38ee9..53cefa30cf7 100644 --- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp @@ -88,11 +88,11 @@ void CLDNNGraph::Build() { std::shared_ptr CLDNNGraph::BuildNetwork(std::shared_ptr program) { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::BuildNetwork"); - auto network = std::make_shared(*program, m_stream_id); + auto network = std::make_shared(program, m_stream_id); if (!m_config.graph_dumps_dir.empty() && m_stream_id == 0) { static int net_id = 0; - auto steps_info = network->get_optimization_steps_info(); + auto steps_info = network->get_optimizer_passes_info(); size_t step_idx = 0; for (auto& step : steps_info) { CNNNetwork net(GetExecGraphInfoByPrimitivesInfo(step.second, true)); diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.h b/inference-engine/src/cldnn_engine/cldnn_graph.h index 5ce64712fef..feae62a03c2 100644 --- a/inference-engine/src/cldnn_engine/cldnn_graph.h +++ b/inference-engine/src/cldnn_engine/cldnn_graph.h @@ -51,8 +51,10 @@ public: InferenceEngine::SizeVector GetOutputSize(std::string outName) const; std::string MapOutputName(std::string outName) const; std::string getName() const { return m_networkName; } + std::mutex& get_mutex() { return m_infer_mutex; } protected: + std::mutex m_infer_mutex; std::string m_networkName; Config m_config; diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp index bb923f373b9..9a55217975c 100644 --- a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp @@ -17,122 +17,36 @@ using namespace InferenceEngine; -namespace CLDNNPlugin { +namespace { const char fp32_suffix[] = "_fp32"; -const char str_not_allocated[] = "Input data was not allocated."; const char cannot_set_compound[] = "cannot set compound blob: supported only for input pre-processing"; const char wrong_nv12_blob[] = "NV12 input blob is expected for input with NV12 color format"; -const char unsupported_batched_blob[] = "Batched input blob is expected to contain nv12 blobs"; +const char unsupported_batched_blob[] = "Batched input blob is expected to contain NV12 blobs"; +const char str_input_not_allocated[] = "Input data was not allocated."; +const char str_output_not_allocated[] = "Output data was not allocated."; -Blob::Ptr CLDNNInferRequest::createInputBlob(const TensorDesc& desc, uint8_t* mem_ptr) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::createInputBlob"); - const Precision p = desc.getPrecision(); - - switch (p) { - case Precision::FP32: - if (mem_ptr != nullptr) - return make_shared_blob(desc, reinterpret_cast(mem_ptr)); - else - return make_shared_blob(desc); - case Precision::FP16: - if (mem_ptr != nullptr) - return make_shared_blob(desc, reinterpret_cast(mem_ptr)); - else - return make_shared_blob(desc); - case Precision::I16: - if (mem_ptr != nullptr) - return make_shared_blob(desc, reinterpret_cast(mem_ptr)); - else - return make_shared_blob(desc); - case Precision::U16: - if (mem_ptr != nullptr) - return make_shared_blob(desc, reinterpret_cast(mem_ptr)); - else - return make_shared_blob(desc); - case Precision::I32: - if (mem_ptr != nullptr) - return make_shared_blob(desc, reinterpret_cast(mem_ptr)); - else - return make_shared_blob(desc); - case Precision::I64: - if (mem_ptr != nullptr) - return make_shared_blob(desc, reinterpret_cast(mem_ptr)); - else - return make_shared_blob(desc); - case Precision::I8: - if (mem_ptr != nullptr) - return make_shared_blob(desc, reinterpret_cast(mem_ptr)); - else - return make_shared_blob(desc); - case Precision::U8: - if (mem_ptr != nullptr) - return make_shared_blob(desc, reinterpret_cast(mem_ptr)); - else - return make_shared_blob(desc); - case Precision::BOOL: - if (mem_ptr != nullptr) - return make_shared_blob(desc, reinterpret_cast(mem_ptr)); - else - return make_shared_blob(desc); - default: - IE_THROW() << "The plugin does not support input " << p.name() << " precision"; +template +void copyToFloat(float* dst, const InferenceEngine::Blob* src) { + if (!dst) { + return; } -} - -Blob::Ptr CLDNNInferRequest::createOutputBlob(const TensorDesc& desc, uint8_t* mem_ptr) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::createOutputBlob"); - const Precision p = desc.getPrecision(); - - switch (p) { - case Precision::FP32: - if (mem_ptr != nullptr) - return make_shared_blob(desc, reinterpret_cast(mem_ptr)); - else - return make_shared_blob(desc); - case Precision::FP16: - if (mem_ptr != nullptr) - return make_shared_blob(desc, reinterpret_cast(mem_ptr)); - else - return make_shared_blob(desc); - case Precision::I32: - if (mem_ptr != nullptr) - return make_shared_blob(desc, reinterpret_cast(mem_ptr)); - else - return make_shared_blob(desc); - case Precision::I64: - if (mem_ptr != nullptr) - return make_shared_blob(desc, reinterpret_cast(mem_ptr)); - else - return make_shared_blob(desc); - default: - IE_THROW() << "The plugin does not support output " << p.name() << " precision"; + auto t_blob = dynamic_cast*>(src); + if (!t_blob) { + IE_THROW() << "input type is " << src->getTensorDesc().getPrecision() << " but input is not " + << typeid(T).name(); } -} -void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory::ptr inputMem) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_attach"); - auto impl = getContextImpl(m_graph->GetContext()); - impl->acquire_lock(); - - auto mem_itr = inputsMemory.find(name); - - if (mem_itr != inputsMemory.end()) - mem_itr->second = inputMem; - else - inputsMemory.insert({ name, inputMem }); - - impl->release_lock(); -} - -void CLDNNInferRequest::input_alloc(cldnn::primitive_id name, const cldnn::layout& layout) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_alloc"); - cldnn::memory::ptr input_mem = m_graph->GetEngine()->allocate_memory(layout); - input_attach(name, input_mem); + const T* srcPtr = t_blob->readOnly(); + if (!srcPtr) { + IE_THROW(NotAllocated) << str_input_not_allocated; + } + for (size_t i = 0; i < t_blob->size(); i++) + dst[i] = srcPtr[i]; } template -void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi, cldnn::stream& stream) { +void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, CLDNNPlugin::buf_info* bi, cldnn::stream& stream) { size_t n = (bi == nullptr) ? dst->size() : bi->buf_size; size_t offset = (bi == nullptr) ? 0 : bi->buf_offset; @@ -169,80 +83,15 @@ void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi, } } -void CLDNNInferRequest::copyOutputData(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyOutputData"); - auto& stream = m_graph->GetNetwork()->get_stream(); - switch (dst->getTensorDesc().getPrecision()) { - case Precision::FP32: copyResultToOutputBlob(src, dst, bi, stream); break; - case Precision::FP16: copyResultToOutputBlob(src, dst, bi, stream); break; - case Precision::I32: copyResultToOutputBlob(src, dst, bi, stream); break; - case Precision::I64: copyResultToOutputBlob(src, dst, bi, stream); break; - default: IE_THROW(NotImplemented) << "The plugin does not support output " << dst->getTensorDesc().getPrecision() << " precision"; +inline void checkAlloc(const Blob::Ptr& blob, const std::string& err_str) { + bool not_allocated = false; + if (!blob->is()) { + not_allocated = (blob->buffer() == nullptr); + } else { + not_allocated = !CLDNNPlugin::getBlobImpl(blob->as())->is_allocated(); } -} - -void CLDNNInferRequest::copyInputData(std::shared_ptr network, - const cldnn::primitive_id &inputName, - const cldnn::layout& inputLayout, - const Blob &inputBlob, buf_info* bi) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyInputData"); - - size_t offset = (bi == nullptr) ? 0 : bi->buf_offset; - - cldnn::primitive_id internalName = "parameter:" + inputName; - auto locked = inputBlob.cbuffer(); - switch (inputBlob.getTensorDesc().getPrecision()) { - case Precision::FP32: { - float* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); - break; - } - case Precision::I32: { - int32_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); - break; - } - case Precision::I64: { - int64_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); - break; - } - case Precision::FP16: { - uint16_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); - break; - } - case Precision::I8: { - int8_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); - break; - } - case Precision::U8: { - uint8_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); - break; - } - case Precision::BOOL: { - uint8_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); - break; - } - default: - IE_THROW() << "The plugin does not support input " << inputBlob.getTensorDesc().getPrecision() << " precision"; - } -} - -void checkInputBlobNV12(const NV12Blob *nv12_ptr) { - auto y_ptr = nv12_ptr->y()->as(); - - // if the blobs are not remote, check their size - if (!y_ptr) { - if (nv12_ptr->y()->buffer() == nullptr) IE_THROW(NotAllocated) << str_not_allocated; - } - - auto uv_ptr = nv12_ptr->uv()->as(); - if (!uv_ptr) { - if (nv12_ptr->uv()->buffer() == nullptr) IE_THROW(NotAllocated) << str_not_allocated; + if (not_allocated) { + IE_THROW(NotAllocated) << err_str; } } @@ -260,17 +109,19 @@ void checkInputBlob(const Blob::Ptr &blob, const std::string strNotMatched("The input blob size is not equal to the network input size"); if (!blob) { - IE_THROW() << str_not_allocated; + IE_THROW(NotAllocated) << str_input_not_allocated; } if (ColorFormat::NV12 == foundInput->getPreProcess().getColorFormat() && nv12_two_inputs) { if (auto nv12_ptr = blob->as()) { - checkInputBlobNV12(nv12_ptr); + checkAlloc(nv12_ptr->y(), str_input_not_allocated); + checkAlloc(nv12_ptr->uv(), str_input_not_allocated); } else if (auto batched_ptr = blob->as()) { for (auto i = 0; i < batched_ptr->size(); i++) { auto nv12_ptr = getNV12BlobOrException(batched_ptr, i); - checkInputBlobNV12(nv12_ptr); + checkAlloc(nv12_ptr->y(), str_input_not_allocated); + checkAlloc(nv12_ptr->uv(), str_input_not_allocated); } } else { IE_THROW(ParameterMismatch) << wrong_nv12_blob; @@ -287,20 +138,17 @@ void checkInputBlob(const Blob::Ptr &blob, IE_THROW() << strNotMatched + ": got " << blob->size() << " expecting " << refSize; } - if (!blob->is()) { - if (blob->buffer() == nullptr) IE_THROW() << str_not_allocated; - } + checkAlloc(blob, str_input_not_allocated); } } void checkOutputBlob(const Blob::Ptr &blob, const std::string &name, const DataPtr foundOutput) { - const std::string strNotAllocated("Output data was not allocated."); const std::string strNotMatched("The output blob size is not equal to the network output size"); if (!blob) { - IE_THROW() << strNotAllocated; + IE_THROW(NotAllocated) << str_output_not_allocated; } SizeVector dims = foundOutput->getTensorDesc().getDims(); size_t refSize = foundOutput->getTensorDesc().getLayout() != SCALAR @@ -311,43 +159,17 @@ void checkOutputBlob(const Blob::Ptr &blob, IE_THROW() << strNotMatched + ": got " << blob->size() << " expecting " << refSize; } - if (!blob->is()) { - if (blob->buffer() == nullptr) IE_THROW() << strNotAllocated; - } + checkAlloc(blob, str_output_not_allocated); } -void CLDNNInferRequest::checkBlobs() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::checkBlobs"); - for (auto const &input : _inputs) { - InputInfo::Ptr foundInput = nullptr; - auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs), - [&](const std::pair &pair) { - return pair.first == input.first; - }); - if (foundInputPair != std::end(_networkInputs)) { - foundInput = foundInputPair->second; - } else { - IE_THROW(NotFound) - << "Failed to find input with name: \'" << input.first << "\'"; - } - checkInputBlob(input.second, input.first, foundInput, m_graph->getConfig().nv12_two_inputs); - } - for (auto const &output : _outputs) { - DataPtr foundOutput; - auto foundOutputPair = std::find_if(std::begin(_networkOutputs), std::end(_networkOutputs), - [&](const std::pair &pair) { - return pair.first == output.first; - }); - if (foundOutputPair != std::end(_networkOutputs)) { - foundOutput = foundOutputPair->second; - } else { - IE_THROW(NotFound) - << "Failed to find output with name: \'" << output.first << "\'"; - } - checkOutputBlob(output.second, output.first, foundOutput); - } -} +} // namespace +namespace CLDNNPlugin { + + +// ----------------------------------------------------------------------------------------- // +// ---------------------------- IE API impl ------------------------------------------------ // +// ----------------------------------------------------------------------------------------- // Blob::Ptr CLDNNInferRequest::GetBlob(const std::string& name) { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::GetBlob"); Blob::Ptr data; @@ -371,7 +193,7 @@ Blob::Ptr CLDNNInferRequest::GetBlob(const std::string& name) { return data; } -void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr &data) { +void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetBlob"); // perform all common checks first @@ -397,69 +219,73 @@ void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr &data) : foundOutput->getTensorDesc(); if (desc.getPrecision() != blobDesc.getPrecision()) { - IE_THROW(ParameterMismatch) - << "Failed to set Blob with precision not corresponding to user " - << (is_input ? "input" : "output") << " precision"; + IE_THROW(ParameterMismatch) << "Failed to set Blob with precision not corresponding to user " + << (is_input ? "input" : "output") << " precision"; } auto remote_ptr = data->as(); bool is_remote = remote_ptr != nullptr; if (is_remote) { auto impl = getBlobImpl(remote_ptr); - impl->allocate_if_needed(); + impl->allocate(); } - if (is_input) { - cldnn::primitive_id internalName(name); - if (is_remote) { - auto inputMem = getBlobImpl(remote_ptr)->getMemory(); - input_attach(internalName, inputMem); + _deviceInputs[name] = data; _inputs[name] = data; - } else if (compoundBlobPassed) { + } else { + auto nv12_ptr = data->as(); + auto batched_ptr = data->as(); + bool is_batched = batched_ptr != nullptr; + bool is_nv12 = nv12_ptr != nullptr; + int expected_batch = is_batched ? desc.getDims()[0] : 1; if (ColorFormat::NV12 == foundInput->getPreProcess().getColorFormat() && m_graph->getConfig().nv12_two_inputs) { // try extracting Y and UV remote blobs from it // and put them into appropriate network inputs // that should then go into biplanar NV12 reorder - auto nv12_ptr = data->as(); - auto batched_ptr = data->as(); - if (nv12_ptr != nullptr || batched_ptr != nullptr) { - int num_blobs = batched_ptr != nullptr ? batched_ptr->size() : 1; - - for (auto i = 0; i < num_blobs; i++) { - if (batched_ptr != nullptr) - nv12_ptr = getNV12BlobOrException(batched_ptr, i); + if (is_nv12 || is_batched) { + int num_blobs = is_batched ? batched_ptr->size() : 1; + for (auto i = 0; i < expected_batch; i++) { + std::string y_name = name + "_Y" + std::to_string(i); + std::string uv_name = name + "_UV" + std::to_string(i); + if (is_batched) { + int idx = i < num_blobs ? i : num_blobs-1; + nv12_ptr = getNV12BlobOrException(batched_ptr, idx); + } auto y_ptr = nv12_ptr->y()->as(); if (y_ptr) { auto y_impl = getBlobImpl(y_ptr); - y_impl->allocate_if_needed(); - input_attach(internalName + "_Y" + std::to_string(i), y_impl->getMemory()); + y_impl->allocate(); + _deviceInputs[y_name] = nv12_ptr->y(); is_remote = true; } auto uv_ptr = nv12_ptr->uv()->as(); if (uv_ptr) { auto uv_impl = getBlobImpl(uv_ptr); - uv_impl->allocate_if_needed(); - input_attach(internalName + "_UV" + std::to_string(i), uv_impl->getMemory()); + uv_impl->allocate(); + _deviceInputs[uv_name] = nv12_ptr->uv(); is_remote = true; } } - } else { - IE_THROW(ParameterMismatch) << wrong_nv12_blob; } - - if (is_remote) _inputs[name] = data; } + if (is_remote) + _inputs[name] = data; } if (!is_remote) { if (preProcessingRequired(foundInput, data)) { // Stores the given blob as ROI blob. It will be used to fill in network input // during pre-processing + if (_inputs[name]->is()) { + Blob::Ptr inputHostBlob = create_input_host_blob(desc); + inputHostBlob->allocate(); + _inputs[name] = inputHostBlob; + } _preProcData[name] = CreatePreprocDataHelper(); _preProcData[name]->isApplicable(data, _inputs[name]); _preProcData[name]->setRoiBlob(data); @@ -467,17 +293,16 @@ void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr &data) if (compoundBlobPassed) { IE_THROW(NotImplemented) << cannot_set_compound; } - size_t blobSize = desc.getLayout() != SCALAR ? details::product(desc.getDims()) : 1; if (dataSize != blobSize) { IE_THROW() << "Input blob size is not equal network input size (" - << dataSize << "!=" << blobSize << ")."; + << dataSize << "!=" << blobSize << ")."; } if (data->buffer() == nullptr) - IE_THROW() << str_not_allocated << " Input name: \'" << name << "\'"; + IE_THROW(NotAllocated) << str_input_not_allocated << " Input name: \'" << name << "\'"; _inputs[name] = data; } } @@ -487,148 +312,49 @@ void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr &data) } if (is_remote) { - std::string outputID = m_graph->MapOutputName(name); - auto impl = getBlobImpl(remote_ptr); - m_graph->GetNetwork()->set_output_memory(outputID, impl->getMemory()); + _deviceOutputs[name] = data; } else { size_t outputSize = desc.getLayout() != SCALAR ? details::product(desc.getDims()) : 1; if (dataSize != outputSize) { IE_THROW() << "Output blob size is not equal network output size (" << dataSize - << "!=" << outputSize << ")."; + << "!=" << outputSize << ")."; } if (data->buffer() == nullptr) - IE_THROW() << str_not_allocated << " Input name: \'" << name << "\'"; + IE_THROW(NotAllocated) << str_input_not_allocated << " Input name: \'" << name << "\'"; } _outputs[name] = data; } } -void CLDNNInferRequest::AllocateInputs() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateInputs"); - auto inputLayouts = m_graph->GetInputLayouts(); - auto& stream = m_graph->GetNetwork()->get_stream(); - // allocate inputs - for (auto& ni : _networkInputs) { - std::string name = ni.first; - const TensorDesc& desc = ni.second->getTensorDesc(); - - if (ColorFormat::NV12 == ni.second->getPreProcess().getColorFormat() && - m_graph->getConfig().nv12_two_inputs) { - std::vector blobs; - for (auto i = 0; i < desc.getDims()[0]; i++) { - cldnn::primitive_id YName(name + "_Y" + std::to_string(i)); - cldnn::primitive_id UVName(name + "_UV" + std::to_string(i)); - - if (inputLayouts.find(YName) == inputLayouts.end()) { - IE_THROW(ParameterMismatch) << "Input layout for " << YName << " is not found"; - } - if (inputLayouts.find(UVName) == inputLayouts.end()) { - IE_THROW(ParameterMismatch) << "Input layout for " << YName << " is not found"; - } - input_alloc(YName, inputLayouts.at(YName)); - input_alloc(UVName, inputLayouts.at(UVName)); - - size_t height = desc.getDims()[2], width = desc.getDims()[3]; - cldnn::mem_lock input_mem_ptr_Y{inputsMemory.at(YName), stream}; - TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC); - auto blobY = createInputBlob(ydesc, input_mem_ptr_Y.data()); - - cldnn::mem_lock input_mem_ptr_UV{ inputsMemory.at(UVName), stream }; - TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC); - auto blobUV = createInputBlob(uvdesc, input_mem_ptr_UV.data()); - - blobs.push_back(make_shared_blob(blobY, blobUV)); - } - _inputs[name] = desc.getDims()[0] == 1 ? blobs[0] : make_shared_blob(blobs); +void CLDNNInferRequest::checkBlobs() { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::checkBlobs"); + for (auto const &input : _inputs) { + InputInfo::Ptr foundInput = nullptr; + auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs), + [&](const std::pair &pair) { + return pair.first == input.first; + }); + if (foundInputPair != std::end(_networkInputs)) { + foundInput = foundInputPair->second; } else { - if (inputLayouts.find(name) == inputLayouts.end()) { - IE_THROW() << "Input layout for " << name << " is not found"; - } - cldnn::layout layout = inputLayouts.at(name); - input_alloc(name, layout); - cldnn::mem_lock mem_ptr{inputsMemory.at(name), stream}; - _inputs[name] = createInputBlob(desc, mem_ptr.data()); - - if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) { - cldnn::layout layout_fp32 = layout; - layout_fp32.data_type = cldnn::data_types::f32; - input_alloc(name + fp32_suffix, layout_fp32); - } + IE_THROW(NotFound) << "Failed to find input with name: \'" << input.first << "\'"; } + checkInputBlob(input.second, input.first, foundInput, m_graph->getConfig().nv12_two_inputs); } -} - -void CLDNNInferRequest::AllocateInputsDyn() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateInputsDyn"); - // allocate inputs - for (auto &input : m_graph->GetInputLayouts()) { - InputInfo::Ptr ni = _networkInputs.at(input.first); - TensorDesc desc = ni->getTensorDesc(); - SizeVector& dims = desc.getDims(); - - if (!dims.empty()) { - *dims.begin() = static_cast(m_graph->GetMaxDynamicBatchSize()); + for (auto const &output : _outputs) { + DataPtr foundOutput = nullptr; + auto foundOutputPair = std::find_if(std::begin(_networkOutputs), std::end(_networkOutputs), + [&](const std::pair &pair) { + return pair.first == output.first; + }); + if (foundOutputPair != std::end(_networkOutputs)) { + foundOutput = foundOutputPair->second; } else { - IE_THROW() << "Empty dimensions for input blob " << input.first; + IE_THROW(NotFound) << "Failed to find output with name: \'" << output.first << "\'"; } - - Blob::Ptr inputBlob = createInputBlob(desc); - if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) { - desc.setPrecision(Precision::FP32); - auto fp32inputBlob = InferenceEngine::make_shared_blob(desc); - fp32inputBlob->allocate(); - _inputs[input.first + fp32_suffix] = fp32inputBlob; - } - inputBlob->allocate(); - _inputs[input.first] = inputBlob; - } -} - -void CLDNNInferRequest::AllocateOutputs() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateOutputs"); - // allocate outputs - bool can_reuse_internal_mem = !m_useStreams; - for (auto& no : _networkOutputs) { - std::string outputID = m_graph->MapOutputName(no.first); - cldnn::memory::ptr output_mem = m_graph->GetNetwork()->get_output_memory(outputID); - cldnn::mem_lock output_mem_ptr{output_mem, m_graph->GetNetwork()->get_stream()}; - if (output_mem_ptr.data() == nullptr) { - IE_THROW() << "Empty output memory for primitive " << outputID; - } - - DataPtr oi = no.second; - const TensorDesc& desc = oi->getTensorDesc(); - - if (can_reuse_internal_mem) { - _outputs[no.first] = createOutputBlob(desc, output_mem_ptr.data()); - } else { - Blob::Ptr outputBlob = createOutputBlob(desc); - outputBlob->allocate(); - _outputs[no.first] = outputBlob; - } - outputsMap[no.first] = outputID; - } -} - -void CLDNNInferRequest::AllocateOutputsDyn() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateOutputsDyn"); - // allocate outputs - for (auto& no : _networkOutputs) { - DataPtr oi = no.second; - TensorDesc desc = oi->getTensorDesc(); - SizeVector& dims = desc.getDims(); - - if (!dims.empty()) { - *dims.begin() = static_cast(m_graph->GetMaxDynamicBatchSize()); - } else { - IE_THROW() << "Empty dimensions for output blob " << no.first; - } - - Blob::Ptr outputBlob = createOutputBlob(desc); - outputBlob->allocate(); - _outputs[no.first] = outputBlob; + checkOutputBlob(output.second, output.first, foundOutput); } } @@ -642,11 +368,11 @@ void CLDNNInferRequest::SetGraph(std::shared_ptr graph) if (m_graph->GetMaxDynamicBatchSize() > 1) { SetBatch(m_graph->GetMaxDynamicBatchSize()); - AllocateInputsDyn(); - AllocateOutputsDyn(); + allocate_inputs_dynamic(); + allocate_outputs_dynamic(); } else { - AllocateInputs(); - AllocateOutputs(); + allocate_inputs(); + allocate_outputs(); } } @@ -728,40 +454,272 @@ CLDNNInferRequest::CLDNNInferRequest(InputsDataMap networkInputs, OutputsDataMap streamExecutor = dynamic_cast(execNetwork->m_taskExecutor.get()); } -void CLDNNInferRequest::execAndParse() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParse"); - auto networkOutputs = m_graph->GetNetwork()->execute(); +// ----------------------------------------------------------------------------------------- // +// ---------------------------- internal utils --------- ----------------------------------- // +// ----------------------------------------------------------------------------------------- // + +Blob::Ptr CLDNNInferRequest::create_input_host_blob(const TensorDesc& desc, uint8_t* mem_ptr) { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::create_input_host_blob"); + const Precision& p = desc.getPrecision(); + + switch (p) { + case Precision::FP32: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); + case Precision::FP16: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); + case Precision::I16: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); + case Precision::U16: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); + case Precision::I32: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); + case Precision::I64: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); + case Precision::I8: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); + case Precision::U8: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); + case Precision::BOOL: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); + default: + IE_THROW(NotImplemented) << "The plugin does not support input " << p.name() << " precision"; + } +} + +Blob::Ptr CLDNNInferRequest::create_output_host_blob(const TensorDesc& desc, uint8_t* mem_ptr) { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::create_output_host_blob"); + const Precision& p = desc.getPrecision(); + + switch (p) { + case Precision::FP32: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); + case Precision::FP16: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); + case Precision::I32: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); + case Precision::I64: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); + default: + IE_THROW() << "The plugin does not support output " << p.name() << " precision"; + } +} + +void CLDNNInferRequest::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copy_output_data"); auto& stream = m_graph->GetNetwork()->get_stream(); + switch (dst->getTensorDesc().getPrecision()) { + case Precision::FP32: copyResultToOutputBlob(src, dst, bi, stream); break; + case Precision::FP16: copyResultToOutputBlob(src, dst, bi, stream); break; + case Precision::I32: copyResultToOutputBlob(src, dst, bi, stream); break; + case Precision::I64: copyResultToOutputBlob(src, dst, bi, stream); break; + default: IE_THROW(NotImplemented) << "The plugin does not support output " << dst->getTensorDesc().getPrecision() << " precision"; + } +} + +void CLDNNInferRequest::copy_input_data(std::shared_ptr network, + const cldnn::primitive_id &inputName, + const cldnn::layout& inputLayout, + const Blob &inputBlob, buf_info* bi) { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copy_input_data"); + + size_t offset = (bi == nullptr) ? 0 : bi->buf_offset; + + cldnn::primitive_id internalName = "parameter:" + inputName; + auto locked = inputBlob.cbuffer(); + switch (inputBlob.getTensorDesc().getPrecision()) { + case Precision::FP32: { + float* blob_ptr = const_cast(locked.as()) + offset; + network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); + break; + } + case Precision::I32: { + int32_t* blob_ptr = const_cast(locked.as()) + offset; + network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); + break; + } + case Precision::I64: { + int64_t* blob_ptr = const_cast(locked.as()) + offset; + network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); + break; + } + case Precision::FP16: { + uint16_t* blob_ptr = const_cast(locked.as()) + offset; + network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); + break; + } + case Precision::I8: { + int8_t* blob_ptr = const_cast(locked.as()) + offset; + network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); + break; + } + case Precision::U8: { + uint8_t* blob_ptr = const_cast(locked.as()) + offset; + network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); + break; + } + case Precision::BOOL: { + uint8_t* blob_ptr = const_cast(locked.as()) + offset; + network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); + break; + } + default: + IE_THROW() << "The plugin does not support input " << inputBlob.getTensorDesc().getPrecision() << " precision"; + } +} + +void CLDNNInferRequest::allocate_inputs() { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_inputs"); + auto inputLayouts = m_graph->GetInputLayouts(); + // allocate inputs + for (auto& ni : _networkInputs) { + std::string name = ni.first; + const TensorDesc& desc = ni.second->getTensorDesc(); + + if (ColorFormat::NV12 == ni.second->getPreProcess().getColorFormat() && + m_graph->getConfig().nv12_two_inputs) { + } else { + auto litr = inputLayouts.find(name); + if (litr == inputLayouts.end()) { + IE_THROW() << "Input layout for " << name << " is not found"; + } + + if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) { + TensorDesc desc_fp32 = desc; + desc_fp32.setPrecision(Precision::FP32); + auto blobPtr = create_device_blob(desc_fp32, litr->second); + _deviceInputs[name] = blobPtr; + Blob::Ptr inputBlob = create_input_host_blob(desc); + inputBlob->allocate(); + _inputs[name] = inputBlob; + } else { + auto blobPtr = create_device_blob(desc, litr->second); + _deviceInputs[name] = blobPtr; + _inputs[name] = blobPtr; + } + } + } +} + +void CLDNNInferRequest::allocate_inputs_dynamic() { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_inputs_dynamic"); + // allocate inputs + for (auto &input : m_graph->GetInputLayouts()) { + InputInfo::Ptr ni = _networkInputs.at(input.first); + TensorDesc desc = ni->getTensorDesc(); + SizeVector& dims = desc.getDims(); + + if (!dims.empty()) { + *dims.begin() = static_cast(m_graph->GetMaxDynamicBatchSize()); + } else { + IE_THROW() << "Empty dimensions for input blob " << input.first; + } + + Blob::Ptr inputBlob = create_input_host_blob(desc); + if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) { + desc.setPrecision(Precision::FP32); + auto fp32inputBlob = InferenceEngine::make_shared_blob(desc); + fp32inputBlob->allocate(); + _inputs[input.first + fp32_suffix] = fp32inputBlob; + } + inputBlob->allocate(); + _inputs[input.first] = inputBlob; + } +} + +void CLDNNInferRequest::allocate_outputs() { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_outputs"); + // allocate outputs + for (auto& no : _networkOutputs) { + std::string outputID = m_graph->MapOutputName(no.first); + const cldnn::layout output_layout = m_graph->GetNetwork()->get_output_memory(outputID)->get_layout(); + const TensorDesc& desc = no.second->getTensorDesc(); + + auto blobPtr = create_device_blob(desc, output_layout); + _deviceOutputs[no.first] = blobPtr; + _outputs[no.first] = blobPtr; + outputsMap[no.first] = outputID; + } +} + +void CLDNNInferRequest::allocate_outputs_dynamic() { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_outputs_dynamic"); + // allocate outputs + for (auto& no : _networkOutputs) { + DataPtr oi = no.second; + TensorDesc desc = oi->getTensorDesc(); + SizeVector& dims = desc.getDims(); + + if (!dims.empty()) { + *dims.begin() = static_cast(m_graph->GetMaxDynamicBatchSize()); + } else { + IE_THROW() << "Empty dimensions for output blob " << no.first; + } + + Blob::Ptr outputBlob = create_output_host_blob(desc); + outputBlob->allocate(); + _outputs[no.first] = outputBlob; + } +} + +void CLDNNInferRequest::exec_and_parse(const std::vector& dependencies) { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParse"); + auto networkOutputs = m_graph->GetNetwork()->execute(dependencies); // Collect outputs as requested by the model for (auto& no : _networkOutputs) { Blob::Ptr bptr = _outputs[no.first]; - - std::string outputID = outputsMap[no.first]; + std::string outputID = outputsMap.at(no.first); auto outputMemory = networkOutputs.at(outputID).get_memory(); // mapping remote blobs not needed - // let the user take care of them explicitly if (!bptr->is()) { - cldnn::mem_lock out_ptr{outputMemory, stream}; - auto blob_ptr = bptr->buffer().as(); - - // If Async API is used, copy of output blobs is not needed, unless SetBlob function was called. - // But in the case when old API is used we have to copy data to memory provided by user. - if (blob_ptr != out_ptr.data()) { - copyOutputData(outputMemory, bptr); - } + copy_output_data(outputMemory, bptr); } } - - // finally collect profiling info - if (m_useProfiling) { - m_graph->UpdatePerfStatistics(); - } } -void CLDNNInferRequest::execAndParseDyn() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParseDyn"); +void CLDNNInferRequest::exec_and_parse_dynamic() { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::exec_and_parse_dynamic"); std::vector> networkOutputs(m_graph->GetNetworksCount()); // set up exection and put all graphs into driver queue @@ -769,6 +727,14 @@ void CLDNNInferRequest::execAndParseDyn() { unsigned int mask = 1 << nb; if (m_curBatch & mask) { + for (auto& item : _inputs) { + const cldnn::primitive_id& inputName = item.first; + const Blob::Ptr inputBlob = item.second; + + auto inputLayout = m_graph->GetInputLayouts().at(inputName); + inputLayout.size.batch[0] = mask; + copy_input_data(m_graph->GetNetwork(nb), inputName, inputLayout, *inputBlob, &batchInputs[inputName][nb]); + } networkOutputs[nb] = m_graph->GetNetwork(nb)->execute(); } } @@ -783,7 +749,7 @@ void CLDNNInferRequest::execAndParseDyn() { auto outputMemory = networkOutputs[nb].at(outputID).get_memory(); Blob::Ptr bptr = _outputs[no.first]; - copyOutputData(outputMemory, bptr, &batchOutputs[no.first][nb]); + copy_output_data(outputMemory, bptr, &batchOutputs[no.first][nb]); } } } @@ -799,38 +765,61 @@ void CLDNNInferRequest::InferImpl() { // execute input pre-processing. execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP - for (auto &item : _inputs) { - std::string name = item.first; - Blob::Ptr inputBlob = item.second; + if (m_graph->GetMaxDynamicBatchSize() > 1) { + exec_and_parse_dynamic(); + return; + } + + { + // try locking stream infer mutex + const std::lock_guard lock(m_graph->get_mutex()); + + // set input and output memory from request blob maps + // into the network object primitives + std::vector dependencies; + for (auto& item : _inputs) { + std::string inputName = item.first; + Blob::Ptr& inputBlob = item.second; - if (m_graph->GetMaxDynamicBatchSize() > 1) { - PrepareInputDyn(name, *inputBlob); - } else { auto nv12_ptr = inputBlob->as(); auto batched_ptr = inputBlob->as(); + bool is_batched = batched_ptr != nullptr; + bool is_nv12 = nv12_ptr != nullptr; - if (nv12_ptr != nullptr || batched_ptr != nullptr) { - // special case for NV12 input blob - int num_blobs = batched_ptr != nullptr ? batched_ptr->size() : 1; - for (auto i = 0; i < num_blobs; i++) { - if (batched_ptr != nullptr) - nv12_ptr = getNV12BlobOrException(batched_ptr, i); - - PrepareInput(name + "_Y" + std::to_string(i), *nv12_ptr->y()); - PrepareInput(name + "_UV" + std::to_string(i), *nv12_ptr->uv()); + if (is_nv12 || is_batched) { + int num_blobs = is_batched ? batched_ptr->size() : 1; + int expected_batch = is_batched + ? _networkInputs.at(inputName)->getTensorDesc().getDims()[0] + : 1; + for (auto i = 0; i < expected_batch; i++) { + std::string y_name = inputName + "_Y" + std::to_string(i); + std::string uv_name = inputName + "_UV" + std::to_string(i); + if (is_batched) { + int idx = i < num_blobs ? i : num_blobs - 1; + nv12_ptr = getNV12BlobOrException(batched_ptr, idx); + } + prepare_input(y_name, nv12_ptr->y(), dependencies); + prepare_input(uv_name, nv12_ptr->uv(), dependencies); } } else { // regular blob - PrepareInput(name, *inputBlob); + prepare_input(inputName, inputBlob, dependencies); } } - } - // The actual inference - if (m_graph->GetMaxDynamicBatchSize() > 1) { - execAndParseDyn(); - } else { - execAndParse(); + for (auto& item : _outputs) { + std::string outputName = item.first; + Blob::Ptr& outputBlob = item.second; + prepare_output(outputName, outputBlob); + } + + // The actual inference + exec_and_parse(dependencies); + + // finally collect profiling info + if (m_useProfiling) { + m_graph->UpdatePerfStatistics(); + } } } @@ -843,101 +832,83 @@ std::map CLDNNInferRequest::GetPerforma } } -namespace { - -template -void copyToFloat(float* dst, const InferenceEngine::Blob* src) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "copyToFloat"); - if (!dst) { - return; - } - const InferenceEngine::TBlob* t_blob = dynamic_cast*>(src); - if (t_blob == nullptr) { - IE_THROW() << "input type is " << src->getTensorDesc().getPrecision() << " but input is not " - << typeid(T).name(); - } - - const T* srcPtr = t_blob->readOnly(); - if (srcPtr == nullptr) { - IE_THROW() << "Input data was not allocated."; - } - for (size_t i = 0; i < t_blob->size(); i++) dst[i] = srcPtr[i]; -} - -} // namespace - -void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const Blob &inputBlob) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::PrepareInput"); - // Get input layout - if (m_graph->GetInputLayouts().find(inputName) == m_graph->GetInputLayouts().end()) { +void CLDNNInferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr& inputBlob, + std::vector& dependencies) { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::prepare_input"); + auto inputLayoutItr = m_graph->GetInputLayouts().find(inputName); + if (inputLayoutItr == m_graph->GetInputLayouts().end()) { IE_THROW() << "Input name mismatch."; } - auto inputLayout = m_graph->GetInputLayouts().at(inputName); - auto is_same_buffer = [&](const Blob& blob, cldnn::memory::ptr memory) -> bool { - const std::string str_not_allocated("Input data was not allocated."); - cldnn::mem_lock ptr{memory, m_graph->GetNetwork()->get_stream()}; - const uint8_t* blob_ptr = blob.cbuffer().as(); - const uint8_t* mem_ptr = ptr.data(); - if (blob_ptr == nullptr || mem_ptr == nullptr) { - IE_THROW() << str_not_allocated; - } - return (blob_ptr == mem_ptr) && (blob.byteSize() == memory->size()); - }; - - cldnn::primitive_id internalName = "parameter:" + inputName; - cldnn::memory::ptr memory = inputsMemory.at(inputName); - auto& stream = m_graph->GetNetwork()->get_stream(); + Blob::Ptr reqBlob = _deviceInputs.at(inputName); auto _nw_ptr = m_graph->GetNetwork(); - auto prec = inputBlob.getTensorDesc().getPrecision(); - - if (inputBlob.is()) { - // no need to check for reuse - _nw_ptr->set_input_data(internalName, memory); - } else if (prec == Precision::I16 || prec == Precision::U16) { - // clDNN doesn't support I16 input precision, so we always have to convert input data to fp32 precision - cldnn::memory::ptr fp32_mem = inputsMemory.at(inputName+fp32_suffix); - cldnn::mem_lock ptr {fp32_mem, stream}; - if (prec == Precision::I16) { - copyToFloat(ptr.data(), &inputBlob); - } else { - copyToFloat(ptr.data(), &inputBlob); - } - - _nw_ptr->set_input_data(internalName, fp32_mem); - } else if (is_same_buffer(inputBlob, memory)) { - // If input memory was allocated by cldnn engine and wasn't overwritten by user set_input_data method won't copy input data. - switch (prec) { - case Precision::FP32: - case Precision::FP16: - case Precision::I8: - case Precision::U8: - case Precision::BOOL: - case Precision::I32: - case Precision::I64: { - _nw_ptr->set_input_data(internalName, memory); - break; + cldnn::primitive_id internalName = "parameter:" + inputName; + const auto& prec = inputBlob->getTensorDesc().getPrecision(); + auto remote_ptr = inputBlob->as(); + auto& stream = m_graph->GetNetwork()->get_stream(); + bool is_dev_input = remote_ptr != nullptr; + switch (prec) { + case Precision::FP32: + case Precision::FP16: + case Precision::I8: + case Precision::U8: + case Precision::BOOL: + case Precision::I16: + case Precision::U16: + case Precision::I32: + case Precision::I64: { + auto impl = getBlobImpl(is_dev_input ? + remote_ptr : + reqBlob->as()); + if (!impl->is_allocated()) { + IE_THROW() << str_input_not_allocated; } - default: - IE_THROW() << "Unsupported input precision " << prec; + auto inputMem = impl->getMemory(); + + if (!is_dev_input) { + if (prec == Precision::I16 || prec == Precision::U16) { + // clDNN doesn't support I16 input precision, + // so have to convert input data to fp32 precision + cldnn::mem_lock ptr{ inputMem, stream }; + if (prec == Precision::I16) { + copyToFloat(ptr.data(), inputBlob.get()); + } else { + copyToFloat(ptr.data(), inputBlob.get()); + } + } else { + auto src_lock = inputBlob->cbuffer(); + auto ev = inputMem->copy_from(stream, src_lock.as()); + dependencies.push_back(ev); + } + } + _nw_ptr->set_input_data(internalName, inputMem); + break; } - } else { - // Otherwise, we have to attach to user memory and then copy the data. - copyInputData(_nw_ptr, inputName, inputLayout, inputBlob); + default: + IE_THROW() << "Unsupported input precision " << prec; } } -void CLDNNInferRequest::PrepareInputDyn(const cldnn::primitive_id &inputName, const Blob &inputBlob) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::PrepareInputDyn"); - // now try to get execution results - for (unsigned nb = 0; nb < m_graph->GetNetworksCount(); nb++) { - unsigned int mask = 1 << nb; - - if (m_curBatch & mask) { - auto inputLayout = m_graph->GetInputLayouts().at(inputName); - inputLayout.size.batch[0] = mask; - copyInputData(m_graph->GetNetwork(nb), inputName, inputLayout, inputBlob, &batchInputs[inputName][nb]); - } +void CLDNNInferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::Ptr& outputBlob) { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::prepare_output"); + Blob::Ptr reqBlob = _deviceOutputs.at(outputName); + cldnn::primitive_id internalName = outputsMap[outputName]; + auto _nw_ptr = m_graph->GetNetwork(); + auto remote_ptr = outputBlob->as(); + auto output_blob_ptr = (reqBlob != outputBlob && remote_ptr != nullptr) + ? remote_ptr + : reqBlob->as(); + auto impl = getBlobImpl(output_blob_ptr); + if (!impl->is_allocated()) { + IE_THROW(NotAllocated) << str_output_not_allocated; } + auto outputMem = impl->getMemory(); + _nw_ptr->set_output_memory(internalName, outputMem); +} + +InferenceEngine::Blob::Ptr CLDNNInferRequest::create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout) { + auto blobPtr = std::make_shared(m_graph->GetContext(), m_graph->GetNetwork()->get_stream(), desc, layout); + getBlobImpl(blobPtr.get())->allocate(); + return blobPtr; } } // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.h b/inference-engine/src/cldnn_engine/cldnn_infer_request.h index a988438e8d6..43a40eea1bc 100644 --- a/inference-engine/src/cldnn_engine/cldnn_infer_request.h +++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.h @@ -23,6 +23,7 @@ class CLDNNExecNetwork; class CLDNNInferRequest : public InferenceEngine::IInferRequestInternal { public: + using Ptr = std::shared_ptr; // make sure all blobs and cldnn::memory objects // are in place and valid void checkBlobs() override; @@ -45,8 +46,9 @@ public: void EnableProfiling() { m_useProfiling = true; } void EnableStreams() { m_useStreams = true; } -protected: - std::map inputsMemory; +private: + InferenceEngine::BlobMap _deviceOutputs; + std::map inputsMap; std::map outputsMap; bool m_useProfiling; @@ -58,24 +60,25 @@ protected: std::map> batchOutputs; InferenceEngine::IStreamsExecutor* streamExecutor = nullptr; - InferenceEngine::Blob::Ptr createInputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr); - InferenceEngine::Blob::Ptr createOutputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr); - void copyOutputData(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr); - void copyInputData(std::shared_ptr network, const cldnn::primitive_id &inputName, - const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob, - buf_info* bi = nullptr); + void prepare_input(const cldnn::primitive_id &inputName, InferenceEngine::Blob::Ptr &inputBlob, + std::vector& dependencies); + void prepare_output(const cldnn::primitive_id& outputName, InferenceEngine::Blob::Ptr& outputBlob); - void input_attach(cldnn::primitive_id name, cldnn::memory::ptr inputMem); - void input_alloc(cldnn::primitive_id name, const cldnn::layout& layout); - void AllocateInputs(); - void AllocateOutputs(); - void AllocateInputsDyn(); - void AllocateOutputsDyn(); - void execAndParse(); - void execAndParseDyn(); + InferenceEngine::Blob::Ptr create_input_host_blob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr); + InferenceEngine::Blob::Ptr create_output_host_blob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr); + InferenceEngine::Blob::Ptr create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout); - void PrepareInput(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob); - void PrepareInputDyn(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob); + void copy_output_data(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr); + void copy_input_data(std::shared_ptr network, const cldnn::primitive_id &inputName, + const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob, + buf_info* bi = nullptr); + + void allocate_inputs(); + void allocate_outputs(); + void allocate_inputs_dynamic(); + void allocate_outputs_dynamic(); + void exec_and_parse(const std::vector& dependencies); + void exec_and_parse_dynamic(); }; }; // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp index 81a055a4a09..0c0ddf7e637 100644 --- a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp +++ b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp @@ -204,6 +204,7 @@ REGISTER_FACTORY(v5, Loop); // ------------------------------ Supported v6 ops ------------------------------ // REGISTER_FACTORY(v6, CTCGreedyDecoderSeqLen); REGISTER_FACTORY(v6, MVN); +REGISTER_FACTORY(v6, GatherElements); // ------------------------------ Supported v7 ops ------------------------------ // REGISTER_FACTORY(v7, Gather); diff --git a/inference-engine/src/cldnn_engine/cldnn_program.cpp b/inference-engine/src/cldnn_engine/cldnn_program.cpp index 275aeca31ca..7386501f0b1 100644 --- a/inference-engine/src/cldnn_engine/cldnn_program.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp @@ -6,7 +6,6 @@ #include "ngraph/ops.hpp" #include "ngraph_ops/nms_ie_internal.hpp" #include "cldnn_itt.h" -#include "cldnn/runtime/debug_configuration.hpp" using namespace InferenceEngine; using namespace InferenceEngine::details; @@ -178,16 +177,11 @@ std::shared_ptr Program::BuildProgram(const std::vectordump_graphs.empty()) { - options.set_option(cldnn::build_option::graph_dumps_dir(debug_config->dump_graphs)); - } - options.set_option(cldnn::build_option::optimize_data(true)); options.set_option(cldnn::build_option::tuning_config(m_config.tuningConfig)); @@ -199,7 +193,7 @@ std::shared_ptr Program::BuildProgram(const std::vector(*m_engine, *m_topology, options); + auto program = cldnn::program::build_program(*m_engine, *m_topology, options); CleanupBuild(); return program; diff --git a/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp b/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp index ce52a5eea07..34c3ae30d29 100644 --- a/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp @@ -81,22 +81,13 @@ bool CLDNNRemoteBlobImpl::is_locked() const noexcept { return lockedHolder != nullptr; } -void CLDNNRemoteBlobImpl::allocate_if_needed() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNRemoteBlobImpl::AllocateIfNeeded"); - auto _impl = getContextImpl(m_context.lock()); - _impl->acquire_lock(); - - if (m_memObject == nullptr) { - allocate(); - } - - _impl->release_lock(); -} - void CLDNNRemoteBlobImpl::allocate() noexcept { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNRemoteBlobImpl::Allocate"); assert(m_memObject == nullptr); - std::shared_ptr eng = getContextImpl(m_context.lock())->GetEngine(); + auto _impl = getContextImpl(m_context.lock()); + _impl->acquire_lock(); + std::shared_ptr eng = _impl->GetEngine(); switch (m_mem_type) { case BlobType::BT_BUF_INTERNAL: { @@ -129,6 +120,7 @@ void CLDNNRemoteBlobImpl::allocate() noexcept { default: m_memObject.reset(); } + _impl->release_lock(); } const std::shared_ptr& CLDNNRemoteBlobImpl::getAllocator() const noexcept { @@ -154,7 +146,7 @@ void CLDNNRemoteBlobImpl::lock() const { } void CLDNNRemoteBlobImpl::unlock() const { - lockedHolder.release(); + lockedHolder.reset(); } LockedMemory CLDNNRemoteBlobImpl::buffer() noexcept { diff --git a/inference-engine/src/cldnn_engine/cldnn_remote_context.h b/inference-engine/src/cldnn_engine/cldnn_remote_context.h index f6a92e82c48..a68612df041 100644 --- a/inference-engine/src/cldnn_engine/cldnn_remote_context.h +++ b/inference-engine/src/cldnn_engine/cldnn_remote_context.h @@ -44,8 +44,8 @@ public: explicit CLDNNRemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context, cldnn::stream& stream, const cldnn::layout& layout, - cldnn::shared_handle mem, - cldnn::shared_surface surf, + cldnn::shared_handle mem = nullptr, + cldnn::shared_surface surf = 0, uint32_t plane = 0, BlobType mem_type = BT_BUF_INTERNAL); @@ -64,7 +64,6 @@ public: bool is_allocated() const noexcept; bool is_locked() const noexcept; - void allocate_if_needed(); cldnn::memory::ptr getMemory() { return m_memObject; } protected: @@ -99,10 +98,10 @@ public: cldnn::stream& stream, const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, - cldnn::shared_handle mem, - cldnn::shared_surface surf, - uint32_t plane, - CLDNNRemoteBlobImpl::BlobType mem_type) + cldnn::shared_handle mem = nullptr, + cldnn::shared_surface surf = 0, + uint32_t plane = 0, + CLDNNRemoteBlobImpl::BlobType mem_type = CLDNNRemoteBlobImpl::BlobType::BT_BUF_INTERNAL) : _impl(context, stream, layout, mem, surf, plane, mem_type) , TpublicAPI(desc) {} @@ -184,7 +183,7 @@ public: * @brief Maps handle to heap memory accessible by any memory manipulation routines. * @return Generic pointer to memory */ - void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE) noexcept override { return nullptr; }; + void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE) noexcept override { return handle; }; /** * @brief Unmaps memory by handle with multiple sequential mappings of the same handle. * The multiple sequential mappings of the same handle are suppose to get the same diff --git a/inference-engine/src/cldnn_engine/ops/concat.cpp b/inference-engine/src/cldnn_engine/ops/concat.cpp index 9d37f959f03..453e9996530 100644 --- a/inference-engine/src/cldnn_engine/ops/concat.cpp +++ b/inference-engine/src/cldnn_engine/ops/concat.cpp @@ -12,14 +12,14 @@ namespace CLDNNPlugin { static cldnn::concatenation::concatenation_axis GetConcatAxis(int32_t axis, size_t rank) { - if (axis >= rank) + unsigned cldnn_axis = axis >= 0 ? axis : axis + static_cast(rank); + if (cldnn_axis >= rank) IE_THROW() << "Concatenation axis exceeds number of dimensions"; // Difference in dimension ordering between IE and clDNN, // reverse spatial dimensions after batch and feature. - unsigned cldnn_axis = axis; - if (axis >= 2) { - auto spatial_axis = axis - 2; + if (cldnn_axis >= 2) { + auto spatial_axis = cldnn_axis - 2; // Default and minimum number of dimensions is 4 auto spatial_size = std::max(rank, 4) - 2; cldnn_axis = spatial_size - spatial_axis - 1 + 2; diff --git a/inference-engine/src/cldnn_engine/ops/gather_elements.cpp b/inference-engine/src/cldnn_engine/ops/gather_elements.cpp new file mode 100644 index 00000000000..d6138280750 --- /dev/null +++ b/inference-engine/src/cldnn_engine/ops/gather_elements.cpp @@ -0,0 +1,66 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cldnn_program.h" +#include "cldnn_common_utils.h" + +#include "ngraph/op/gather_elements.hpp" +#include "ngraph/op/constant.hpp" + +#include "cldnn/primitives/gather_elements.hpp" + +namespace CLDNNPlugin { + +static cldnn::gather_elements::gather_elements_axis GetGatherAxis(int axis, unsigned rank) { + if (axis < 0) + axis += rank; + if (axis < 0 || axis >= rank) + IE_THROW() << "GatherElements axis is not correspond to number of dimensions"; + + // Difference in dimension ordering between IE and clDNN, + // reverse spatial dimensions after batch and feature. + unsigned cldnn_axis = axis; + if (axis >= 2) { + auto spatial_axis = axis - 2; + // Default and minimum number of dimensions is 4 + auto spatial_size = std::max(rank, 4u) - 2; + cldnn_axis = spatial_size - spatial_axis - 1 + 2; + } + + switch (cldnn_axis) { + case 0: return cldnn::gather_elements::gather_elements_axis::along_b; + case 1: return cldnn::gather_elements::gather_elements_axis::along_f; + case 2: return cldnn::gather_elements::gather_elements_axis::along_x; + case 3: return cldnn::gather_elements::gather_elements_axis::along_y; + case 4: return cldnn::gather_elements::gather_elements_axis::along_z; + case 5: return cldnn::gather_elements::gather_elements_axis::along_w; + default: IE_THROW() << "Unsupported GatherElements axis: " << axis; + } + return cldnn::gather_elements::gather_elements_axis::along_f; // shouldn't get here +} + +void CreateGatherElementsOp(Program& p, const std::shared_ptr& op) { + p.ValidateInputs(op, {2}); + auto inputPrimitives = p.GetInputPrimitiveIDs(op); + std::string layerName = layer_type_name_ID(op); + + size_t rank = op->get_input_shape(0).size(); + int32_t axis = static_cast(op->get_axis()); + + auto outLayout = DefaultFormatForDims(op->get_output_shape(0).size()); + + auto primitive = cldnn::gather_elements(layerName, + inputPrimitives[0], + inputPrimitives[1], + outLayout, + CldnnTensorFromIEDims(op->get_output_shape(0)), + GetGatherAxis(axis, rank)); + + p.AddPrimitive(primitive); + p.AddPrimitiveToProfiler(op); +} + +REGISTER_FACTORY_IMPL(v6, GatherElements); + +} // namespace CLDNNPlugin diff --git a/inference-engine/src/gna_plugin/backend/dnn.hpp b/inference-engine/src/gna_plugin/backend/dnn.hpp index 4cb0b74a54b..a005468c118 100644 --- a/inference-engine/src/gna_plugin/backend/dnn.hpp +++ b/inference-engine/src/gna_plugin/backend/dnn.hpp @@ -58,7 +58,8 @@ void AdvanceCnnOperationIfAllApplied(const std::vector& c template void AdvancePwlOperationIfAllApplied(const std::vector& component, int i, T*& operation) { - if (i == component.size() - 1 || (component[i + 1].operation != kDnnMaxPoolOp)) { + if (i == component.size() - 1 || ((component[i + 1].operation != kDnnMaxPoolOp) + && (component[i + 1].operation != kDnnPiecewiselinearOp))) { operation++; } } diff --git a/inference-engine/src/gna_plugin/backend/dnn_types.h b/inference-engine/src/gna_plugin/backend/dnn_types.h index d08d9346d35..0b00b41ec83 100644 --- a/inference-engine/src/gna_plugin/backend/dnn_types.h +++ b/inference-engine/src/gna_plugin/backend/dnn_types.h @@ -227,7 +227,7 @@ OvGnaType OvGnaTypeIntFromBytes(T bytesPerElement) { return r->second; } -static std::string OvGnaTypeToString(OvGnaType type) { +inline std::string OvGnaTypeToString(OvGnaType type) { static const std::map typeToString = { {OvGnaTypeInt8, "OvGnaTypeInt8"}, {OvGnaTypeInt16, "OvGnaTypeInt16"}, @@ -241,7 +241,7 @@ static std::string OvGnaTypeToString(OvGnaType type) { return r->second; } -static std::string OvGnaModeToString(OvGnaMode mode) { +inline std::string OvGnaModeToString(OvGnaMode mode) { static const std::map modeToString = { {OvGnaModeDefault, "OvGnaModeDefault"}, {OvGnaModeDisabled, "OvGnaModeDisabled"}, diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp index 90af0451929..6a3af8e428b 100644 --- a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp +++ b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp @@ -24,6 +24,10 @@ constexpr uint32_t noOfInputsLowPrecDivisor = 16; constexpr uint32_t affineMaxBatchSize = 8; +constexpr uint32_t maxPoolMaxWindowSize = 6; + +constexpr uint32_t copyMaxGrouping = 8; + namespace Cnn2D { struct RangeLimit { uint32_t min; @@ -87,6 +91,8 @@ class Validator { static void ThrowIfNotEmpty(const std::string prefix, const std::string error); public: + Validator() = default; + void ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth, const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN, const uint32_t strideH, const uint32_t strideW, OvGnaType inPrecision) const; diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp index 3c91f18dc3b..bb3451c0aa7 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin.cpp @@ -63,6 +63,7 @@ #include "transformations/swap_input_matmul_gna.hpp" #include "transformations/convert_matmul_to_pointwise_convolution.hpp" #include "transformations/split_convolution_with_large_buffer_size.hpp" +#include "transformations/decompose_2d_conv.hpp" #include "transformations/convert_padded2valid_conv.hpp" #include @@ -673,6 +674,11 @@ void GNAPlugin::AddDebugProperties(const InferenceEngine::CNNLayerPtr layer, void GNAPlugin::LoadNetwork(CNNNetwork & _network) { OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "LoadNetwork"); std::shared_ptr convertedNetwork; + + if (!gnaFlags->sw_fp32) { + InitGNADevice(); + } + if (_network.getFunction()) { CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network); const auto& graph = clonedNetwork.getFunction(); @@ -682,6 +688,11 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + if (config.gnaCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) { + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + } // TODO enable this transformation for networks with convolutions if (!ngraph::op::util::has_op_with_type(graph)) { manager.register_pass(); @@ -870,15 +881,16 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { // fill in extra storage with memory layers graphCompiler.fillMemoryConnections(memoryPairs); - if (!graphCompiler.memory_connection.empty()) { + if (!graphCompiler.memory_connection.empty() && gnaFlags->gna_lib_async_threads_num != 1) { + // TODO: check if updating the number of threads is needed for sw_fp32 gnaFlags->gna_lib_async_threads_num = 1; + if (!gnaFlags->sw_fp32) + InitGNADevice(); } if (gnaFlags->sw_fp32) { gnamem.reset(new gna_memory_type(memory::make_polymorph>())); graphCompiler.setGNAMemoryPtr(gnamem); - } else { - InitGNADevice(); } // keep inputs information and create input primitives diff --git a/inference-engine/src/gna_plugin/gna_plugin_config.cpp b/inference-engine/src/gna_plugin/gna_plugin_config.cpp index e18847e851c..f5e28e10aed 100644 --- a/inference-engine/src/gna_plugin/gna_plugin_config.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin_config.cpp @@ -90,8 +90,8 @@ void Config::UpdateFromMap(const std::map& config) { } } auto scale_factor = InferenceEngine::CNNLayer::ie_parse_float(value); - if (fp32eq(scale_factor, 0.0f)) { - THROW_GNA_EXCEPTION << "input scale factor of 0.0f not supported"; + if (fp32eq(scale_factor, 0.0f) || std::isinf(scale_factor)) { + THROW_GNA_EXCEPTION << "input scale factor of 0.0f or +-inf not supported"; } // missing scale factors are set to be 1.0f if (inputScaleFactors.size() <= input_index) { diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp index f4e5fc7a931..fb7a673ca1b 100644 --- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp +++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp @@ -2173,7 +2173,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() { // Find all output layers connected to FQ auto nextLayers = CNNNetGetAllNextLayersSkipCertain(*fqLayer, -1, donotSkip); if (nextLayers.empty()) { - return; + continue; } if (isFQFuseAllowed) { diff --git a/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.cpp b/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.cpp index 1701993f387..82f8ccc5ead 100644 --- a/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.cpp +++ b/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.cpp @@ -14,72 +14,26 @@ #include #include #include +#include "utils/transformation_helper.hpp" using namespace GNAPluginNS; NGRAPH_RTTI_DEFINITION(ConvertPadded2ValidConv, "ConvertPadded2ValidConv", 0); -struct ConvData { - size_t input_height; - size_t input_width; - size_t input_channel_count; - size_t filter_count; - size_t pads_begin_width; - size_t pads_begin_height; - size_t pads_end_width; - size_t pads_end_height; - ngraph::op::PadType padding_type; - ngraph::element::Type element_type; -}; - -static bool VerifyAndGetConvParams(std::shared_ptr conv, ConvData& conv_data) { +static bool VerifyAndGetConvData(std::shared_ptr conv, ConvData& conv_data) { const auto& input = conv->input_value(0); - // We support only 2D conv batch 1 - if (conv->get_dilations().size() != 2 || - conv->get_strides().size() != 2 || - input.get_shape()[0] != 1) { + // We support only batch 1 + if (input.get_shape()[0] != 1) { return false; } - conv_data.padding_type = conv->get_auto_pad(); - conv_data.input_channel_count = conv->input_value(0).get_shape()[1]; - conv_data.input_height = conv->input_value(0).get_shape()[2]; - conv_data.input_width = conv->input_value(0).get_shape()[3]; - conv_data.filter_count = conv->input_value(1).get_shape()[0]; - conv_data.pads_begin_height = conv->get_pads_begin()[0]; - conv_data.pads_begin_width = conv->get_pads_begin()[1]; - conv_data.pads_end_height = conv->get_pads_end()[0]; - conv_data.pads_end_width = conv->get_pads_end()[1]; - conv_data.element_type = conv->get_element_type(); + GetConvData(conv, conv_data); return conv_data.pads_begin_height || conv_data.pads_end_height || conv_data.pads_begin_width || conv_data.pads_end_width; } -static bool TransposeOrderMatches(std::shared_ptr transpose, std::vector order) { - if (!transpose) - return false; - const ngraph::Output& transpose_order = transpose->input_value(1); - auto transpose_order_dim = transpose_order.get_shape().size(); - - if (transpose_order_dim != 1 || transpose_order.get_shape()[0] != order.size()) - return false; - - auto const_with_order_values = std::dynamic_pointer_cast(transpose_order.get_node_shared_ptr()); - if (!const_with_order_values) - return false; - - const auto data = const_with_order_values->cast_vector(); - if (data.empty()) - return false; - - if (!std::equal(order.begin(), order.end(), data.begin())) - return false; - - return true; -} - static bool VerifyBias(std::shared_ptr bias, const size_t& filter_count) { auto add_const = std::dynamic_pointer_cast(bias->input_value(0).get_node_shared_ptr()); @@ -91,16 +45,6 @@ static bool VerifyBias(std::shared_ptr bias, const size_t& return (add_const && shape_size(add_const->get_shape()) == filter_count); } -static std::shared_ptr FlatCrop(ngraph::Output input, size_t offset, size_t size) { - return std::make_shared( - input, // data - ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset}), // begin sice index - ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset + size}), // end slice index - ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)1, (size_t)1}), // strides - std::vector{1, 0}, // begin mask - std::vector{1, 0}); // end mask -} - static void InsertPadding(ngraph::OutputVector& input_rows_to_concat, size_t size, const std::shared_ptr& conv, const std::shared_ptr padding_const, size_t biggest_padding) { @@ -226,7 +170,7 @@ static bool Convert(std::shared_ptr leading_transpose, ConvData conv_data; - if (!VerifyAndGetConvParams(std::dynamic_pointer_cast(conv), conv_data)) + if (!VerifyAndGetConvData(std::dynamic_pointer_cast(conv), conv_data)) return false; // We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) @@ -246,7 +190,7 @@ static bool Convert(std::shared_ptr leading_transpose, return true; } -std::function)> consumers_and_rank(const size_t expected_count, const ngraph::Dimension& expected_rank) { +static std::function)> consumers_and_rank(const size_t expected_count, const ngraph::Dimension& expected_rank) { return [=](ngraph::Output output) -> bool { return ngraph::pattern::consumers_count(expected_count) && ngraph::pattern::rank_equals(expected_rank); }; @@ -263,34 +207,35 @@ ConvertPadded2ValidConv::ConvertPadded2ValidConv() { ngraph::pattern::consumers_count(1)); auto bias = ngraph::pattern::wrap_type({conv, const_input}, ngraph::pattern::consumers_count(1)); - auto fq = ngraph::pattern::wrap_type({bias, const_input, const_input, const_input, const_input}, + auto fq_bias = ngraph::pattern::wrap_type({bias, const_input, const_input, const_input, const_input}, ngraph::pattern::consumers_count(1)); auto max_pool1 = ngraph::pattern::wrap_type({bias}, ngraph::pattern::consumers_count(1)); - auto max_pool2 = ngraph::pattern::wrap_type({fq}, + auto max_pool2 = ngraph::pattern::wrap_type({fq_bias}, ngraph::pattern::consumers_count(1)); auto af1 = ngraph::pattern::wrap_type({bias}, ngraph::pattern::consumers_count(1)); auto af2 = ngraph::pattern::wrap_type({fq}, ngraph::pattern::consumers_count(1)); + ngraph::opset7::Sign, ngraph::opset7::Clamp>({fq_bias}, ngraph::pattern::consumers_count(1)); auto af3 = ngraph::pattern::wrap_type({max_pool1}, ngraph::pattern::consumers_count(1)); auto af4 = ngraph::pattern::wrap_type({max_pool2}, ngraph::pattern::consumers_count(1)); - auto transpose_input = std::make_shared(ngraph::OutputVector{conv, bias, max_pool1, max_pool2, fq, af1, af2, af3, af4}); + auto fq_af = ngraph::pattern::wrap_type({af4, const_input, const_input, const_input, const_input}, + ngraph::pattern::consumers_count(1)); + auto transpose_input = + std::make_shared(ngraph::OutputVector{conv, bias, max_pool1, max_pool2, fq_bias, af1, af2, af3, af4, fq_af}); auto trailing_transpose = ngraph::pattern::wrap_type({transpose_input, const_input}, consumers_and_rank(1, 4)); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { const auto& pattern_map = m.get_pattern_value_map(); - auto conv_output = conv->output(0).get_node_shared_ptr(); - IE_ASSERT(conv_output != nullptr); - - auto bias_node = std::dynamic_pointer_cast(conv_output); + auto bias_it = pattern_map.find(bias); + auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr()); return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(trailing_transpose).get_node_shared_ptr(), bias_node); diff --git a/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.hpp b/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.hpp index 9d8a0f10477..55bef912b9c 100644 --- a/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.hpp +++ b/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.hpp @@ -13,11 +13,11 @@ namespace GNAPluginNS { * wrapped with transposes, to a valid convolution with padding added before the leading transpose, * POT precessed models are supported (fake quantized layers omitted below for clarity): * - * Padding - * | + * Padding + * | * Transpose (NHWC -> NCHW) Transpose (NHWC -> NCHW) * | | - * Convolution with padding Convolution with padding + * Convolution with padding Valid convolution * | | * Broadcast Bias (optional) Broadcast Bias (optional) * | | diff --git a/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.cpp b/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.cpp new file mode 100644 index 00000000000..4b313ce8bb0 --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.cpp @@ -0,0 +1,667 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "transformations/decompose_2d_conv.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include "utils/transformation_helper.hpp" +#include "backend/gna_limitations.hpp" +#include "layers/gna_convolution_layer.hpp" + + +using namespace GNAPluginNS; + +NGRAPH_RTTI_DEFINITION(Decompose2DConv, "Decompose2DConv", 0); +NGRAPH_RTTI_DEFINITION(Decompose2DConvTransposedWithBias, "Decompose2DConvTransposedWithBias", 0); +NGRAPH_RTTI_DEFINITION(Decompose2DConvTransposedWithBiasAF, "Decompose2DConvTransposedWithBiasAF", 0); + +struct GraphData { + std::shared_ptrleading_transpose; + std::shared_ptrfq_conv; + std::shared_ptrconv; + std::shared_ptrtrailing_transpose; + std::shared_ptrfq_bias; + std::shared_ptrmax_pool; + std::shared_ptraf; + std::shared_ptrfq_af; + std::shared_ptrlast_op_in_sequence_for_replacement; + std::shared_ptrbias_const; + size_t conv_count; + size_t pool_size_width; + size_t pool_stride_width; + // TODO: currently 2D max pool is not supported + //size_t pool_size_height; + //size_t pool_stride_height; +}; + +static bool VerifyAndGetConvData(std::shared_ptr conv, ConvData& conv_data) { + const auto& input = conv->input_value(0); + const auto& filters = conv->input_value(1); + + // We support only batch == 1 + if (input.get_shape()[0] != 1) { + return false; + } + + size_t filter_height = filters.get_shape()[2]; + size_t filter_width = filters.get_shape()[3]; + + if (filter_width > GNALimitations::copyMaxGrouping || filter_height > GNALimitations::copyMaxGrouping) { + return false; + } + + GetConvData(conv, conv_data); + + IE_ASSERT(conv_data.output_channel_count == conv->get_output_shape(0)[1]); + + return true; +} + +static std::shared_ptr VerifyBiasAndReshapeConst(std::shared_ptr conv_bias, const ConvData& conv_data) { + auto add_const = std::dynamic_pointer_cast(conv_bias->input_value(1).get_node_shared_ptr()); + + if (add_const) { + auto bias_size = shape_size(add_const->get_shape()); + + // The add may be a normal add not conv bias, then we just go further + if (bias_size == conv_data.filter_count) { + return ngraph::op::util::make_try_fold(add_const, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{1, bias_size, 1, 1}), false); + } + } + // Bias size does not match (or dynamic bias), can't decompose such convolution + return nullptr; +} + +static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr max_pool) { + auto pool_filter = max_pool->get_kernel(); + auto pool_strides = max_pool->get_strides(); + + // Check Max Pool padding and limitations + if ((max_pool->get_auto_pad() != ngraph::op::PadType::VALID && + (max_pool->get_auto_pad() != ngraph::op::PadType::EXPLICIT || + max_pool->get_pads_begin() != ngraph::Shape({0, 0}) || max_pool->get_pads_end() != ngraph::Shape({0, 0}))) || + pool_filter.size() != 2 || pool_strides.size() != 2 || + pool_filter[0] > GNALimitations::maxPoolMaxWindowSize) + return false; + + graph_data.pool_size_width = pool_filter[1]; + graph_data.pool_stride_width = pool_strides[1]; + return true; +} + +static size_t CalculateConvCount(const ConvData& conv_data) { + // Check if split of plane due to GNA HW limitations of 768 filter elements is possible + size_t conv_count = 1; + size_t total_factorized_conv_channel_count = (conv_data.input_channel_count * conv_data.filter_height * conv_data.filter_width); + while (total_factorized_conv_channel_count / conv_count > GNALimitations::convFilterMaxSize || + total_factorized_conv_channel_count % conv_count != 0 || conv_data.filter_channel_count % conv_count != 0) + conv_count++; + + return conv_count; +} + +static bool ShouldDecompose(GraphData& graph_data, const ConvData& conv_data) { + // Calculate the number of splits required + graph_data.conv_count = CalculateConvCount(conv_data); + + // Concat (copy) layer limitation allows to split up to a certain limit + // Currently we are able to split only convolutions without pooling in horizontal dimension + if (graph_data.conv_count > GNALimitations::copyMaxGrouping || + ((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1)) + return false; + + // GNA supported features or handled otherwise - there is no need to decompose such convolution + if (graph_data.conv_count == 1 && (((conv_data.input_height == 1 || conv_data.input_width == 1) && + conv_data.filter_dilation_width == 1 && conv_data.filter_dilation_height == 1) || + GNAConvolutionLayer::isMappableFrom2DTo1D(conv_data.input_height, conv_data.input_width, conv_data.filter_width, conv_data.filter_stride_width))) + return false; + + return true; +} + +static std::vector> Split2DConvFilters(std::shared_ptr& filters, + const bool& vertical_permute, const bool& horizontal_permute, const size_t& split_channels) { + + if (!horizontal_permute && !vertical_permute && split_channels == 1) + return {filters}; + + std::vector > result; + ngraph::Shape reshape_shape; + auto flat_filters = filters->outputs(); + const auto filter_shape = filters->get_output_shape(0); + IE_ASSERT(filter_shape.size() == 4); + + if (split_channels > 1) { + const auto axis_node = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1}); + const auto split = std::make_shared(filters, axis_node, split_channels); + flat_filters = split->outputs(); + } + + for (size_t split_index = 0; split_index < split_channels; split_index++) { + ngraph::Output& flat_filter = flat_filters[split_index]; + if (horizontal_permute && !vertical_permute) { + result.push_back(std::make_shared(flat_filter, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{0, 1, 3, 2}))); + } else { + result.push_back(flat_filter.get_node_shared_ptr()); + } + } + + if (vertical_permute && horizontal_permute) { + reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] * filter_shape[2] * filter_shape[3] / split_channels, 1, 1}; + } else if (vertical_permute && !horizontal_permute) { + reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] * filter_shape[2] / split_channels, 1, filter_shape[3]}; + } else if (!vertical_permute && horizontal_permute) { + reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] * filter_shape[3] / split_channels, filter_shape[2], 1}; + } else { + reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] / split_channels, filter_shape[2], filter_shape[3]}; + } + + for (auto &new_filter : result) + new_filter = ngraph::op::util::make_try_fold(new_filter, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, reshape_shape), false); + + return result; +} + +static ngraph::OutputVector SplitInput(const GraphData& graph_data, ConvData& conv_data) { + // We need to have proper input shape first + ngraph::OutputVector split_planes; + auto padded_input_plane = std::make_shared(graph_data.leading_transpose->input_value(0), + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, + ngraph::Shape{1, shape_size(graph_data.leading_transpose->input_value(0).get_shape())}), false); + copy_runtime_info(graph_data.conv, padded_input_plane); + + if (graph_data.conv_count > 1) { + // If we have split input plane and convolutions due to GNA limitation - we must sum their results at the end + conv_data.input_channel_count /= graph_data.conv_count; + + auto reshape_before_transpose = std::make_shared(padded_input_plane, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, + {shape_size(padded_input_plane->get_shape()) / graph_data.conv_count, graph_data.conv_count}), false); + + auto transpose_before_channel_wise_split = std::make_shared(reshape_before_transpose, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {1, 0})->output(0)); + + const auto axis_node = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0}); + const auto split = std::make_shared(transpose_before_channel_wise_split, axis_node, graph_data.conv_count); + split_planes = split->outputs(); + } else { + split_planes.push_back(padded_input_plane); + } + + return split_planes; +} + +static std::vector> SplitFilters(const GraphData& graph_data, ConvData& conv_data) { + // If the input plane exceeds GNA limits and we have split into several convolutions, then we need to split filter data as well; + // we also need to take filter height and potential dilation into account when modifying the filters + + // Take account of fake quantize when getting filter values + auto filter_values = std::dynamic_pointer_cast(graph_data.fq_conv == nullptr ? + graph_data.conv->input_value(1).get_node_shared_ptr() : graph_data.fq_conv->input_value(0).get_node_shared_ptr()); + bool vertical_permute = (conv_data.filter_height > 1); + bool horizontal_permute = (conv_data.filter_dilation_width > 1); + std::vector> h_1_filters{}; + + h_1_filters = Split2DConvFilters(filter_values, vertical_permute, horizontal_permute, graph_data.conv_count); + + for (auto filter : h_1_filters) + copy_runtime_info(graph_data.conv, filter); + + return h_1_filters; +} + +static void TransformInput(const GraphData& graph_data, const ConvData& conv_data, ngraph::Output& split_input_plane) { + /* + * Padded row - NHWC order + * | + * Split in vertical dim (filter height) + * / | \ + * Concat + * | + * Transpose + */ + + // First we need to prepare flat (height = 1) slices of input data proper for flattened (height = 1) filters created later on; + // the input datat is overlapping (duplicated) + ngraph::OutputVector dilated_input_planes; + for (size_t filter_height = 0; filter_height < conv_data.filter_height; filter_height++) { + size_t offset; + + if (conv_data.filter_stride_height > 1) { + // Prepare strided slices of input data + for (size_t output_height = 0; output_height < conv_data.output_height; output_height++) { + offset = (filter_height * conv_data.filter_dilation_height + output_height * conv_data.filter_stride_height) * + conv_data.input_width * conv_data.input_channel_count; + auto slice = FlatCrop(split_input_plane, offset, conv_data.input_width * conv_data.input_channel_count); + copy_runtime_info(graph_data.conv, slice); + dilated_input_planes.push_back(slice); + } + } else { + offset = filter_height * conv_data.filter_dilation_height * conv_data.input_width * conv_data.input_channel_count; + auto slice = FlatCrop(split_input_plane, offset, conv_data.input_width * conv_data.input_channel_count * conv_data.output_height); + copy_runtime_info(graph_data.conv, slice); + dilated_input_planes.push_back(slice); + } + } + + // Interleaving dilated input planes + std::shared_ptr dilated_chunks_concat = std::make_shared(dilated_input_planes, 0); + + // Additional reshape is required for strided slices of input intended for each filter row + if (conv_data.filter_stride_height > 1) { + dilated_chunks_concat = std::make_shared(dilated_chunks_concat, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, + {conv_data.filter_height, conv_data.input_width * conv_data.input_channel_count * conv_data.output_height}), false); + } + + auto transposed_dilated_chunks = std::make_shared(dilated_chunks_concat, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {1, 0})->output(0)); + + // Flattening of interleaved input planes + auto flattened_dilated_transposed_input = std::make_shared(transposed_dilated_chunks, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, + {(size_t)1, conv_data.input_width * conv_data.input_channel_count * conv_data.output_height * conv_data.filter_height}), false); + + copy_runtime_info(graph_data.conv, {dilated_chunks_concat, flattened_dilated_transposed_input, transposed_dilated_chunks }); + split_input_plane = flattened_dilated_transposed_input; +} + +static void InsertFQLayer(const std::shared_ptr fqLayer, + std::shared_ptr lastNode) { + if (fqLayer != nullptr) { + lastNode = fqLayer->clone_with_new_inputs({lastNode, + fqLayer->input_value(1), fqLayer->input_value(2), + fqLayer->input_value(3), fqLayer->input_value(4)}); + ngraph::copy_runtime_info(fqLayer, lastNode); + } +} + +// Valid 1D (decomposed 2D) convolution wrapped with transposes NHWC => NCHW => conv => NCHW => NHWC +static std::shared_ptr Create1DConv(const GraphData& graph_data, const ConvData& conv_data, const ngraph::Output& input, + std::shared_ptr filters, const size_t conv_index, const size_t h_index) { + // Transpose NHWC => NCHW + std::shared_ptr nchw_input = std::make_shared(input, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 3, 1, 2})->output(0)); + + // Fake quantize + InsertFQLayer(graph_data.fq_conv, filters); + + // 1D Convolution + auto conv = std::make_shared(nchw_input, filters, + ngraph::Strides{1, conv_data.filter_stride_width}, ngraph::CoordinateDiff{0, 0}, ngraph::CoordinateDiff{0, 0}, + ngraph::Strides{1, 1}, ngraph::op::PadType::VALID); + std::string conv_name = graph_data.conv->get_friendly_name() + "_H_" + std::to_string(h_index) + "_CH_" + std::to_string(0); + conv->set_friendly_name(conv_name); + + // Bias & fake quantize + std::shared_ptr last_conv_block_op = conv; + if (graph_data.bias_const && conv_index == 0) { + last_conv_block_op = std::make_shared(conv, graph_data.bias_const); + copy_runtime_info(graph_data.conv, last_conv_block_op); + InsertFQLayer(graph_data.fq_bias, last_conv_block_op); + } + + // Max pooling + if ((graph_data.max_pool && graph_data.pool_size_width > 1) || graph_data.pool_stride_width > 1) { + last_conv_block_op = std::make_shared(last_conv_block_op, + ngraph::Strides{1, graph_data.pool_stride_width}, ngraph::Shape{0, 0}, ngraph::Shape{0, 0}, + ngraph::Shape{1, graph_data.pool_size_width}, graph_data.max_pool->get_rounding_type(), ngraph::op::PadType::VALID); + } + + // Activation function & fake quantize + if (graph_data.af && graph_data.conv_count == 1) { + last_conv_block_op = graph_data.af->copy_with_new_inputs({last_conv_block_op}); + copy_runtime_info(conv, last_conv_block_op); + InsertFQLayer(graph_data.fq_af, last_conv_block_op); + } + + // Transpose NCHW => NHWC + auto nhwc_output = std::make_shared(last_conv_block_op, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 2, 3, 1})->output(0)); + copy_runtime_info(graph_data.conv, {nchw_input, conv, nhwc_output}); + return nhwc_output; +} + +static std::shared_ptr CreateDecomposedConv(const GraphData& graph_data, ConvData& conv_data, + ngraph::Output& reduced_input_plane, const std::vector>& h_1_filters, const size_t conv_index) { + ngraph::OutputVector result_chunks; + std::shared_ptr last_op; + bool horizontal_permute = (conv_data.filter_dilation_width > 1); + size_t h_1_filter_channel_count = (conv_data.input_channel_count * conv_data.filter_height); + + for (size_t output_height = 0; output_height < conv_data.output_height; output_height++) { + size_t offset = output_height * conv_data.input_width * h_1_filter_channel_count; + auto row = (conv_data.output_height == 1) ? reduced_input_plane : + FlatCrop(reduced_input_plane, offset, conv_data.input_width * h_1_filter_channel_count); + /* + * Padded row + * | + * ??? ??? + * | + * Split in vertical dim + * / | \ + * Concat + * | + * Permute + * | + * Transpose (NHWC => NCHW) + * | + * 1D Conv (Bias | MaxPooling) + * | + * Transpose (NCHW => NHWC) + */ + auto nhwc_conv_y_input = row; + + if (horizontal_permute) { + // Horizontal split - transform input accordingly + ngraph::OutputVector dilated_chunks; + std::shared_ptr dilated_chunks_concat = nhwc_conv_y_input.get_node_shared_ptr(); + + // We need to calculate some parameters in case horizontal stride > 1 is used, because if we use the ones available from the original convolution + // we won't take into account the fact horizontal strides will be supported by the newly created 1D convolution, and not by decomposition + size_t filter_dilation_width = conv_data.filter_width > 1 ? conv_data.filter_dilation_width : 1; + size_t output_width = (conv_data.input_width - (conv_data.filter_width + filter_dilation_width - 2)); + + if (conv_data.filter_width > 1) { + for (size_t filter_width = 0; filter_width < conv_data.filter_width; filter_width++) { + size_t offset = filter_width * conv_data.filter_dilation_width * h_1_filter_channel_count; + auto slice = FlatCrop(row, offset, h_1_filter_channel_count * output_width); + copy_runtime_info(graph_data.conv, slice); + dilated_chunks.push_back(slice); + } + + dilated_chunks_concat = std::make_shared(dilated_chunks, 0); + } + + auto transposed_dilated_chunks = std::make_shared(dilated_chunks_concat, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {1, 0})->output(0)); + + auto flattened_dilated_conv_input = std::make_shared(transposed_dilated_chunks, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, + ngraph::Shape{1, 1, output_width, h_1_filter_channel_count * conv_data.filter_width}), false); + + copy_runtime_info(graph_data.conv, ngraph::NodeVector{flattened_dilated_conv_input, transposed_dilated_chunks, dilated_chunks_concat}); + + nhwc_conv_y_input = flattened_dilated_conv_input; + } else { + // If no horizontal split is done, only reshape is required before decomposed convolution + nhwc_conv_y_input = std::make_shared(nhwc_conv_y_input, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, + ngraph::Shape{1, 1, conv_data.input_width, h_1_filter_channel_count}), false); + } + + // Pointwise convolutions + // Valid 1D convolution wrapped with transposes NHWC => NCHW => Conv => NCHW => NHWC + // Activation function can be fused with convolution only if it isn't split + auto nhwc_y_output = Create1DConv(graph_data, conv_data, nhwc_conv_y_input, h_1_filters[conv_index], conv_index, output_height); + result_chunks.push_back(nhwc_y_output); + last_op = nhwc_y_output; + } + + // Horizontal dimemsion greater than 1 + if (result_chunks.size() > 1) { + // Concat in horizontal dimension + // In NHWC index of H is 1 + auto concatenated_sub_results = std::make_shared(result_chunks, 1); + copy_runtime_info(graph_data.conv, concatenated_sub_results); + last_op = concatenated_sub_results; + } + return last_op; +} + +static void Decompose(const GraphData& graph_data, ConvData& conv_data) { + std::vector> partial_conv_results; + + // Split input due to GNA filter element count limit + auto split_planes = SplitInput(graph_data, conv_data); + // Split filters due to GNA filter element count limit, 2D convolution shape, or dilations + auto h_1_filters = SplitFilters(graph_data, conv_data); + + // Do transformations in each of the splits created above + for (size_t conv_index = 0; conv_index < graph_data.conv_count; conv_index++) { + ngraph::Output& split_input_plane = split_planes[conv_index]; + + // Input data needs to be prepared before 2D convolution decomposition + if (conv_data.filter_height > 1 || conv_data.filter_stride_height > 1) { + TransformInput(graph_data, conv_data, split_input_plane); + } + + auto flat_conv = CreateDecomposedConv(graph_data, conv_data, split_input_plane, h_1_filters, conv_index); + partial_conv_results.push_back(flat_conv); + } + + std::shared_ptr conv_result = partial_conv_results.front(); + for (size_t i = 1; i < partial_conv_results.size(); i++) { + auto add_result = std::make_shared(partial_conv_results[i], conv_result); + copy_runtime_info(graph_data.conv, add_result); + conv_result = add_result; + } + + // TODO: Max Pool 2D case + //if (graph_data.max_pool && (graph_data.pool_size_height > 1 || graph_data.pool_stride_height > 1)) { + //} + + // Activation function after trailing Transpose NCHW->NHWC + if (graph_data.af && graph_data.conv_count > 1) { + auto af_result = graph_data.af->copy_with_new_inputs({conv_result}); + copy_runtime_info(graph_data.conv, af_result); + conv_result = af_result; + } + // We need to put the same name as before for the Convolution layer, so its output can be used as network result + std::string conv_result_name = graph_data.last_op_in_sequence_for_replacement->get_friendly_name(); + replace_node(graph_data.last_op_in_sequence_for_replacement, conv_result); + conv_result->set_friendly_name(conv_result_name); +} + +static bool Convert(std::shared_ptr leading_transpose, + std::shared_ptr fq_conv, + std::shared_ptr conv, + std::shared_ptr trailing_transpose, + std::shared_ptr bias, + std::shared_ptr fq_bias, + std::shared_ptr max_pool, + std::shared_ptr af, + std::shared_ptr fq_af, + std::shared_ptr last_op_for_replacement) { + + GraphData graph_data{std::dynamic_pointer_cast(leading_transpose), + std::dynamic_pointer_cast(fq_conv), + std::dynamic_pointer_cast(conv), + std::dynamic_pointer_cast(trailing_transpose), + std::dynamic_pointer_cast(fq_bias), + std::dynamic_pointer_cast(max_pool), + std::dynamic_pointer_cast(af), + std::dynamic_pointer_cast(fq_af), + last_op_for_replacement, nullptr, 1, 1, 1}; + ConvData conv_data; + + if (!VerifyAndGetConvData(std::dynamic_pointer_cast(conv), conv_data)) + return false; + + // We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) + // or similar cases, so required network must be in NHWC order like in TF + if (!TransposeOrderMatches(std::dynamic_pointer_cast(leading_transpose), {0, 3, 1, 2})) + return false; + + if (!TransposeOrderMatches(std::dynamic_pointer_cast(trailing_transpose), {0, 2, 3, 1})) + return false; + + if (bias && !(graph_data.bias_const = VerifyBiasAndReshapeConst(std::dynamic_pointer_cast(bias), conv_data))) + return false; + + if (max_pool && !VerifyMaxPool(graph_data, std::dynamic_pointer_cast(max_pool))) + return false; + + if (!ShouldDecompose(graph_data, conv_data)) + return false; + + // All checks applied - now we may start decomposition + Decompose(graph_data, conv_data); + + return true; +} + +static bool VerifyBias(std::shared_ptr conv, std::shared_ptr bias) { + auto add_const = std::dynamic_pointer_cast(bias->input_value(1).get_node_shared_ptr()); + + if (!add_const) { + add_const = std::dynamic_pointer_cast(bias->input_value(0).get_node_shared_ptr()); + } + + if (!add_const) { + auto bias_size = shape_size(add_const->get_shape()); + auto conv_filter_count = conv->input_value(1).get_shape()[0]; + if (bias_size == conv_filter_count) + return true; + } + return false; +} + +Decompose2DConv::Decompose2DConv() { + MATCHER_SCOPE(Decompose2DConv); + + auto const_input = ngraph::pattern::wrap_type(); + auto leading_transpose = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), const_input}, + consumers_and_rank(1, 4)); + auto filters_const = ngraph::pattern::wrap_type(ngraph::pattern::rank_equals(4)); + auto fq_conv = ngraph::pattern::wrap_type({const_input, const_input, const_input, const_input, const_input}, + consumers_and_rank(1, 4)); + auto filters = std::make_shared(ngraph::OutputVector{filters_const, fq_conv}); + auto conv = ngraph::pattern::wrap_type({leading_transpose, filters}, + consumers_and_rank(1, 4)); + auto bias = ngraph::pattern::wrap_type({conv, const_input}, + ngraph::pattern::consumers_count(1)); + auto fq_bias = ngraph::pattern::wrap_type({bias, const_input, const_input, const_input, const_input}, + ngraph::pattern::consumers_count(1)); + auto max_pool1 = ngraph::pattern::wrap_type({bias}, + ngraph::pattern::consumers_count(1)); + auto max_pool2 = ngraph::pattern::wrap_type({fq_bias}, + ngraph::pattern::consumers_count(1)); + auto af1 = ngraph::pattern::wrap_type({bias}, ngraph::pattern::consumers_count(1)); + auto af2 = ngraph::pattern::wrap_type({fq_bias}, ngraph::pattern::consumers_count(1)); + auto af3 = ngraph::pattern::wrap_type({max_pool1}, ngraph::pattern::consumers_count(1)); + auto af4 = ngraph::pattern::wrap_type({max_pool2}, ngraph::pattern::consumers_count(1)); + auto fq_af = ngraph::pattern::wrap_type({af4, const_input, const_input, const_input, const_input}, + ngraph::pattern::consumers_count(1)); + auto transpose_input = + std::make_shared(ngraph::OutputVector{conv, bias, max_pool1, max_pool2, fq_bias, af1, af2, af3, af4, fq_af}); + auto trailing_transpose = ngraph::pattern::wrap_type({transpose_input, const_input}, + consumers_and_rank(1, 4)); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto fq_conv_it = pattern_map.find(fq_conv); + auto fq_conv_node = (fq_conv_it == std::end(pattern_map) ? nullptr : fq_conv_it->second.get_node_shared_ptr()); + auto bias_it = pattern_map.find(bias); + auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr()); + auto fq_bias_it = pattern_map.find(fq_bias); + auto fq_bias_node = (fq_bias_it == std::end(pattern_map) ? nullptr : fq_bias_it->second.get_node_shared_ptr()); + auto fq_af_it = pattern_map.find(fq_af); + auto fq_af_node = (fq_af_it == std::end(pattern_map) ? nullptr : fq_af_it->second.get_node_shared_ptr()); + auto max_pool1_it = pattern_map.find(max_pool1); + auto max_pool2_it = pattern_map.find(max_pool2); + auto max_pool_node = (max_pool1_it == std::end(pattern_map) ? + ((max_pool2_it == std::end(pattern_map) ? nullptr : max_pool2_it->second.get_node_shared_ptr())) : max_pool1_it->second.get_node_shared_ptr()); + std::shared_ptr af_node = nullptr; + std::vector af_it + {pattern_map.find(af1), pattern_map.find(af2), pattern_map.find(af3), pattern_map.find(af4)}; + + for (auto const& af : af_it) { + if (af != std::end(pattern_map)) { + af_node = af->second.get_node_shared_ptr(); + break; + } + } + + return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), fq_conv_node, pattern_map.at(conv).get_node_shared_ptr(), + pattern_map.at(trailing_transpose).get_node_shared_ptr(), bias_node, fq_bias_node, max_pool_node, af_node, fq_af_node, + pattern_map.at(trailing_transpose).get_node_shared_ptr()); + }; + + auto m = std::make_shared(trailing_transpose, matcher_name); + this->register_matcher(m, callback); +} + +Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias() { + MATCHER_SCOPE(Decompose2DConvTransposedWithBias); + + auto const_input_i64 = ngraph::pattern::wrap_type(ngraph::pattern::type_matches(ngraph::element::i64)); + auto const_input = ngraph::pattern::wrap_type(); + auto leading_transpose = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), const_input_i64}, + consumers_and_rank(1, 4)); + auto conv = ngraph::pattern::wrap_type( + {leading_transpose, ngraph::pattern::wrap_type(ngraph::pattern::rank_equals(4))}, + consumers_and_rank(1, 4)); + auto trailing_transpose = ngraph::pattern::wrap_type({conv, const_input_i64}, + consumers_and_rank(1, 4)); + auto bias = ngraph::pattern::wrap_type({trailing_transpose, const_input}, + ngraph::pattern::consumers_count(1)); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + if (!VerifyBias(pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr())) + return false; + + return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), nullptr, pattern_map.at(conv).get_node_shared_ptr(), + pattern_map.at(trailing_transpose).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr(), nullptr, nullptr, + nullptr, nullptr, pattern_map.at(bias).get_node_shared_ptr()); + }; + + auto m = std::make_shared(bias, matcher_name); + this->register_matcher(m, callback); +} + +Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF() { + MATCHER_SCOPE(Decompose2DConvTransposedWithBiasAF); + + auto const_input_i64 = ngraph::pattern::wrap_type(ngraph::pattern::type_matches(ngraph::element::i64)); + auto const_input = ngraph::pattern::wrap_type(); + auto leading_transpose = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), const_input_i64}, + consumers_and_rank(1, 4)); + auto conv = ngraph::pattern::wrap_type( + {leading_transpose, ngraph::pattern::wrap_type(ngraph::pattern::rank_equals(4))}, + consumers_and_rank(1, 4)); + auto trailing_transpose = ngraph::pattern::wrap_type({conv, const_input_i64}, + consumers_and_rank(1, 4)); + auto bias = ngraph::pattern::wrap_type({trailing_transpose, const_input}, + ngraph::pattern::consumers_count(1)); + auto af = ngraph::pattern::wrap_type({bias}, + ngraph::pattern::consumers_count(1)); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + if (!VerifyBias(pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr())) + return false; + + return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), nullptr, pattern_map.at(conv).get_node_shared_ptr(), + pattern_map.at(trailing_transpose).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr(), nullptr, + nullptr, pattern_map.at(af).get_node_shared_ptr(), nullptr, pattern_map.at(af).get_node_shared_ptr()); + }; + + auto m = std::make_shared(af, matcher_name); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.hpp b/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.hpp new file mode 100644 index 00000000000..4fbaf47ff72 --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.hpp @@ -0,0 +1,80 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace GNAPluginNS { + +/** + * @brief Decompose a 2D convolution, wrapped with transposes, + * to a set of valid 1D convolutions with padding added in front of the set: + * + * Padding + * | + * Transpose (NHWC -> NCHW) Transpose (NHWC -> NCHW) + * | | + * Convolution with padding Valid convolution + * | | + * Broadcast Bias (optional) Broadcast Bias (optional) + * | | + * Max Pooling (optional) Max Pooling (optional) + * | | + * Activation Function (optional) Activation Function (optional) + * | | + * Transpose (NCHW -> NHWC) Transpose (NCHW -> NHWC) + * + */ +class Decompose2DConv : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + Decompose2DConv(); +}; + +/** + * @brief Decomopose a 2D convolution wrapped with transposes, with bias after trailing transpose, + * to a set of valid 1D convolutions with padding added in front of the set: + * + * Padding + * | + * Transpose (NHWC -> NCHW) Transpose (NHWC -> NCHW) + * | | + * Convolution with padding Valid convolution + * | | + * Transpose (NCHW -> NHWC) Transpose (NCHW -> NHWC) + * | | + * Broadcast Bias Broadcast Bias + * + */ +class Decompose2DConvTransposedWithBias : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + Decompose2DConvTransposedWithBias(); +}; + +/** + * @brief Decomopose a 2D convolution wrapped with transposes, with bias + * to a set of valid 1D convolutions with padding added in front of the set: + * + * Padding + * | + * Transpose (NHWC -> NCHW) Transpose (NHWC -> NCHW) + * | | + * Convolution with padding Valid convolution + * | | + * Transpose (NCHW -> NHWC) Transpose (NCHW -> NHWC) + * | | + * Broadcast Bias Broadcast Bias + * | | + * Activation Function Activation Function + * + */ +class Decompose2DConvTransposedWithBiasAF : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + Decompose2DConvTransposedWithBiasAF(); +}; + +} // namespace GNAPluginNS diff --git a/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.cpp b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.cpp new file mode 100644 index 00000000000..79fe863a18f --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.cpp @@ -0,0 +1,75 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + + +#include +#include +#include "transformation_helper.hpp" + + +namespace GNAPluginNS { + +void GetConvData(std::shared_ptr conv, ConvData& conv_data) { + conv_data.output_height = conv->get_output_shape(0)[2]; + conv_data.output_width = conv->get_output_shape(0)[3]; + conv_data.input_channel_count = conv->input_value(0).get_shape()[1]; + conv_data.input_height = conv->input_value(0).get_shape()[2]; + conv_data.input_width = conv->input_value(0).get_shape()[3]; + conv_data.filter_count = conv->input_value(1).get_shape()[0]; + conv_data.filter_channel_count = conv->input_value(1).get_shape()[1]; + conv_data.filter_height = conv->input_value(1).get_shape()[2]; + conv_data.filter_width = conv->input_value(1).get_shape()[3]; + conv_data.filter_dilation_height = conv->get_dilations()[0]; + conv_data.filter_dilation_width = conv->get_dilations()[1]; + conv_data.filter_stride_height = conv->get_strides()[0]; + conv_data.filter_stride_width = conv->get_strides()[1]; + conv_data.output_channel_count = conv_data.filter_count; + conv_data.pads_begin_height = conv->get_pads_begin()[0]; + conv_data.pads_begin_width = conv->get_pads_begin()[1]; + conv_data.pads_end_height = conv->get_pads_end()[0]; + conv_data.pads_end_width = conv->get_pads_end()[1]; + conv_data.padding_type = conv->get_auto_pad(); + conv_data.element_type = conv->get_element_type(); +} + +std::function)> consumers_and_rank(const size_t expected_count, const ngraph::Dimension& expected_rank) { + return [=](ngraph::Output output) -> bool { + return ngraph::pattern::consumers_count(expected_count)(output) && ngraph::pattern::rank_equals(expected_rank)(output); + }; +} + +bool TransposeOrderMatches(std::shared_ptr transpose, std::vector order) { + if (!transpose) + return false; + const ngraph::Output& transpose_order = transpose->input_value(1); + auto transpose_order_dim = transpose_order.get_shape().size(); + + if (transpose_order_dim != 1 || transpose_order.get_shape()[0] != order.size()) + return false; + + auto const_with_order_values = std::dynamic_pointer_cast(transpose_order.get_node_shared_ptr()); + if (!const_with_order_values) + return false; + + const auto data = const_with_order_values->cast_vector(); + if (data.empty()) + return false; + + if (!std::equal(order.begin(), order.end(), data.begin())) + return false; + + return true; +} + +std::shared_ptr FlatCrop(ngraph::Output input, size_t offset, size_t size) { + return std::make_shared( + input, // data + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset}), // begin sice index + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset + size}), // end slice index + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)1, (size_t)1}), // strides + std::vector{1, 0}, // begin mask + std::vector{1, 0}); // end mask +} + +} // namespace GNAPluginNS diff --git a/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.hpp b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.hpp new file mode 100644 index 00000000000..14fca200f7b --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.hpp @@ -0,0 +1,64 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +namespace GNAPluginNS { + +struct ConvData { + size_t input_height; + size_t input_width; + size_t input_channel_count; + size_t filter_height; + size_t filter_width; + size_t filter_count; + size_t filter_channel_count; + size_t filter_dilation_height; + size_t filter_dilation_width; + size_t filter_stride_height; + size_t filter_stride_width; + size_t output_height; + size_t output_width; + size_t output_channel_count; + size_t pads_begin_width; + size_t pads_begin_height; + size_t pads_end_width; + size_t pads_end_height; + ngraph::op::PadType padding_type; + ngraph::element::Type element_type; +}; + +/** + * @brief gets all convolution related data into a struct for further processing + * @param conv convolution node to get data of + * @param conv_data convolution data structure to put data into + * @return void + */ +void GetConvData(std::shared_ptr conv, ConvData& conv_data); + +/** + * @brief ngraph matcher predicate fusing existing predicates for consumers count and rank of a layer + * @param expected_count expected consumers count for of node + * @param expected_rank expected node rank + * @return predicate function wrapper + */ +std::function)> consumers_and_rank(const size_t expected_count, const ngraph::Dimension& expected_rank); + +/** + * @brief checks whether transpose matches a given order + * @param transpose transpose layer + * @param order order of transposition to be compared with + * @return true if the order matches, false otherwise + */ +bool TransposeOrderMatches(std::shared_ptr transpose, std::vector order); + +/** + * @brief performs a crop of a flattened input tensor + * @param input input layer + * @param offset offset to start the crop at* + * @param size size of the crop + * @return pointer to the newly created slice + */ +std::shared_ptr FlatCrop(ngraph::Output input, size_t offset, size_t size); +} // namespace GNAPluginNS diff --git a/inference-engine/src/hetero_plugin/hetero_infer_request.cpp b/inference-engine/src/hetero_plugin/hetero_infer_request.cpp index 7171363e783..2b8d2f4f261 100644 --- a/inference-engine/src/hetero_plugin/hetero_infer_request.cpp +++ b/inference-engine/src/hetero_plugin/hetero_infer_request.cpp @@ -77,7 +77,7 @@ void HeteroInferRequest::SetBlob(const std::string& name, const InferenceEngine: if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) { r->SetBlob(name, data, foundInput->getPreProcess()); } - } catch (const InferenceEngine::NotFound& ex) {} + } catch (const InferenceEngine::NotFound&) {} } } diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt index bf3acd4d466..1152c12392e 100644 --- a/inference-engine/src/inference_engine/CMakeLists.txt +++ b/inference-engine/src/inference_engine/CMakeLists.txt @@ -12,11 +12,11 @@ if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") endif() file (GLOB LIBRARY_SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/cpp/*.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/threading/*.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/cpp/*.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/cpp_interfaces/interface/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/cpp/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/threading/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/cpp/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/cpp_interfaces/interface/*.cpp ) # TODO: WA for OneHot pass usage in reshape @@ -30,38 +30,38 @@ set(LEGACY_LIBRARY_SHARED_SRCS set_source_files_properties(${LEGACY_LIBRARY_SHARED_SRCS} PROPERTIES COMPILE_DEFINITIONS "USE_STATIC_IE") -set(IE_STATIC_DEPENDENT_FILES ${CMAKE_CURRENT_SOURCE_DIR}/file_utils.cpp) +set(IE_STATIC_DEPENDENT_FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/file_utils.cpp) list(REMOVE_ITEM LIBRARY_SRC ${IE_STATIC_DEPENDENT_FILES}) file (GLOB LIBRARY_HEADERS - ${CMAKE_CURRENT_SOURCE_DIR}/*.h - ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/*.h + ${CMAKE_CURRENT_SOURCE_DIR}/src/*.hpp ) if (LINUX) file (GLOB LIBRARY_SRC ${LIBRARY_SRC} - ${CMAKE_CURRENT_SOURCE_DIR}/os/lin/*.cpp) + ${CMAKE_CURRENT_SOURCE_DIR}/src/os/lin/*.cpp) file (GLOB LIBRARY_HEADERS ${LIBRARY_HEADERS} - ${CMAKE_CURRENT_SOURCE_DIR}/os/lin/*.hpp) + ${CMAKE_CURRENT_SOURCE_DIR}/src/os/lin/*.hpp) elseif (UNIX) list (APPEND LIBRARY_SRC - ${CMAKE_CURRENT_SOURCE_DIR}/os/lin/lin_shared_object_loader.cpp) + ${CMAKE_CURRENT_SOURCE_DIR}/src/os/lin/lin_shared_object_loader.cpp) endif() if (WIN32) file (GLOB LIBRARY_SRC ${LIBRARY_SRC} - ${CMAKE_CURRENT_SOURCE_DIR}/os/win/*.cpp) + ${CMAKE_CURRENT_SOURCE_DIR}/src/os/win/*.cpp) file (GLOB LIBRARY_HEADERS ${LIBRARY_HEADERS} - ${CMAKE_CURRENT_SOURCE_DIR}/os/win/*.hpp) + ${CMAKE_CURRENT_SOURCE_DIR}/src/os/win/*.hpp) endif() if(ENABLE_SSE42) - file(GLOB SSE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/*.cpp) - file(GLOB SSE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/*.hpp) + file(GLOB SSE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/cpu_x86_sse42/*.cpp) + file(GLOB SSE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/src/cpu_x86_sse42/*.hpp) list(APPEND LIBRARY_HEADERS ${SSE_HEADERS}) list(APPEND LIBRARY_SRC ${SSE_SRC}) @@ -75,9 +75,9 @@ if(ENABLE_SSE42) endif() endif() -addVersionDefines(ie_version.cpp CI_BUILD_NUMBER) +addVersionDefines(src/ie_version.cpp CI_BUILD_NUMBER) -set (PUBLIC_HEADERS_DIR "${IE_MAIN_SOURCE_DIR}/include") +set (PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include/ie") file (GLOB_RECURSE PUBLIC_HEADERS ${PUBLIC_HEADERS_DIR}/*.hpp @@ -116,7 +116,7 @@ add_library(${TARGET_NAME}_obj OBJECT ${PUBLIC_HEADERS}) ie_faster_build(${TARGET_NAME}_obj - UNITY PCH PRIVATE "precomp.hpp" + UNITY PCH PRIVATE "src/precomp.hpp" ) target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API @@ -128,7 +128,7 @@ target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE $ $) -target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" +target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src" "${IE_MAIN_SOURCE_DIR}/src/readers/ir_reader" # for ie_ir_version.hpp $ $ @@ -162,11 +162,12 @@ if (TBBBIND_2_4_FOUND) endif() target_link_libraries(${TARGET_NAME} PRIVATE pugixml::static openvino::itt ${CMAKE_DL_LIBS} Threads::Threads - ngraph ngraph::frontend_manager::static inference_engine_transformations) + ngraph::frontend_manager::static inference_engine_transformations + PUBLIC ngraph) target_include_directories(${TARGET_NAME} INTERFACE $ - $ + $ PRIVATE $ $) @@ -194,7 +195,7 @@ if (TBBBIND_2_4_FOUND) target_link_libraries(${TARGET_NAME}_s PRIVATE ${TBBBIND_2_4_IMPORTED_TARGETS}) endif() -target_include_directories(${TARGET_NAME}_s PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}" +target_include_directories(${TARGET_NAME}_s PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/src" $ "${IE_MAIN_SOURCE_DIR}/src/legacy_api/src") @@ -216,7 +217,7 @@ set_target_properties(${TARGET_NAME} ${TARGET_NAME}_obj ${TARGET_NAME}_s # Export for build tree -export(TARGETS ${TARGET_NAME} NAMESPACE IE:: +export(TARGETS ngraph ${TARGET_NAME} NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/InferenceEngineTargets.cmake") # Export for developer package @@ -229,7 +230,14 @@ list(APPEND core_components ngraph) list(APPEND PATH_VARS "IE_INCLUDE_DIR" "IE_NGRAPH_DIR" "IE_PARALLEL_CMAKE") -if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND TBBROOT MATCHES ${TEMP}) +# define variables for InferenceEngineConfig.cmake +if(THREADING MATCHES "^(TBB|TBB_AUTO)$") + set(IE_TBB_DIR "${TBB_DIR}") + list(APPEND PATH_VARS "IE_TBB_DIR") +endif() + +# install only downloaded TBB, system one is not installed +if(THREADING MATCHES "^(TBB|TBB_AUTO)$" AND TBBROOT MATCHES ${TEMP}) ie_cpack_add_component(tbb REQUIRED) list(APPEND core_components tbb) @@ -249,8 +257,6 @@ if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND TBBROOT MATCH COMPONENT tbb) set(IE_TBB_DIR_INSTALL "external/tbb/cmake") - set(IE_TBB_DIR "${TBB_DIR}") - list(APPEND PATH_VARS "IE_TBB_DIR") install(FILES "${TBB}/cmake/TBBConfig.cmake" "${TBB}/cmake/TBBConfigVersion.cmake" @@ -263,7 +269,7 @@ endif() ie_cpack_add_component(core REQUIRED DEPENDS ${core_components}) ie_cpack_add_component(core_dev REQUIRED core ngraph_dev) -install(DIRECTORY "${PUBLIC_HEADERS_DIR}" DESTINATION ${IE_CPACK_IE_DIR} +install(DIRECTORY "${PUBLIC_HEADERS_DIR}" DESTINATION ${IE_CPACK_IE_DIR}/include COMPONENT core_dev) install(TARGETS ${TARGET_NAME} EXPORT InferenceEngineTargets @@ -301,7 +307,7 @@ configure_package_config_file("${OpenVINO_SOURCE_DIR}/cmake/templates/InferenceE INSTALL_DESTINATION "${CMAKE_INSTALL_PREFIX}" PATH_VARS ${PATH_VARS}) -set(IE_INCLUDE_DIR "include") +set(IE_INCLUDE_DIR "include/ie") set(IE_NGRAPH_DIR "../ngraph/cmake") set(IE_TBB_DIR "${IE_TBB_DIR_INSTALL}") set(IE_PARALLEL_CMAKE "share/ie_parallel.cmake") diff --git a/inference-engine/include/cldnn/cldnn_config.hpp b/inference-engine/src/inference_engine/include/ie/cldnn/cldnn_config.hpp similarity index 100% rename from inference-engine/include/cldnn/cldnn_config.hpp rename to inference-engine/src/inference_engine/include/ie/cldnn/cldnn_config.hpp diff --git a/inference-engine/include/cpp/ie_cnn_network.h b/inference-engine/src/inference_engine/include/ie/cpp/ie_cnn_network.h similarity index 99% rename from inference-engine/include/cpp/ie_cnn_network.h rename to inference-engine/src/inference_engine/include/ie/cpp/ie_cnn_network.h index 1fe5d2173f2..ef73b67e504 100644 --- a/inference-engine/include/cpp/ie_cnn_network.h +++ b/inference-engine/src/inference_engine/include/ie/cpp/ie_cnn_network.h @@ -20,12 +20,7 @@ #include "ie_common.h" #include "ie_data.h" #include "ie_extension.h" - -namespace ngraph { - -class Function; - -} // namespace ngraph +#include namespace InferenceEngine { diff --git a/inference-engine/include/cpp/ie_executable_network.hpp b/inference-engine/src/inference_engine/include/ie/cpp/ie_executable_network.hpp similarity index 100% rename from inference-engine/include/cpp/ie_executable_network.hpp rename to inference-engine/src/inference_engine/include/ie/cpp/ie_executable_network.hpp diff --git a/inference-engine/include/cpp/ie_infer_request.hpp b/inference-engine/src/inference_engine/include/ie/cpp/ie_infer_request.hpp similarity index 100% rename from inference-engine/include/cpp/ie_infer_request.hpp rename to inference-engine/src/inference_engine/include/ie/cpp/ie_infer_request.hpp diff --git a/inference-engine/include/cpp/ie_memory_state.hpp b/inference-engine/src/inference_engine/include/ie/cpp/ie_memory_state.hpp similarity index 100% rename from inference-engine/include/cpp/ie_memory_state.hpp rename to inference-engine/src/inference_engine/include/ie/cpp/ie_memory_state.hpp diff --git a/inference-engine/include/details/ie_blob_iterator.hpp b/inference-engine/src/inference_engine/include/ie/details/ie_blob_iterator.hpp similarity index 100% rename from inference-engine/include/details/ie_blob_iterator.hpp rename to inference-engine/src/inference_engine/include/ie/details/ie_blob_iterator.hpp diff --git a/inference-engine/include/details/ie_exception.hpp b/inference-engine/src/inference_engine/include/ie/details/ie_exception.hpp similarity index 100% rename from inference-engine/include/details/ie_exception.hpp rename to inference-engine/src/inference_engine/include/ie/details/ie_exception.hpp diff --git a/inference-engine/include/details/ie_pre_allocator.hpp b/inference-engine/src/inference_engine/include/ie/details/ie_pre_allocator.hpp similarity index 100% rename from inference-engine/include/details/ie_pre_allocator.hpp rename to inference-engine/src/inference_engine/include/ie/details/ie_pre_allocator.hpp diff --git a/inference-engine/include/details/ie_so_loader.h b/inference-engine/src/inference_engine/include/ie/details/ie_so_loader.h similarity index 100% rename from inference-engine/include/details/ie_so_loader.h rename to inference-engine/src/inference_engine/include/ie/details/ie_so_loader.h diff --git a/inference-engine/include/details/ie_so_pointer.hpp b/inference-engine/src/inference_engine/include/ie/details/ie_so_pointer.hpp similarity index 100% rename from inference-engine/include/details/ie_so_pointer.hpp rename to inference-engine/src/inference_engine/include/ie/details/ie_so_pointer.hpp diff --git a/inference-engine/include/gna/gna_config.hpp b/inference-engine/src/inference_engine/include/ie/gna/gna_config.hpp similarity index 100% rename from inference-engine/include/gna/gna_config.hpp rename to inference-engine/src/inference_engine/include/ie/gna/gna_config.hpp diff --git a/inference-engine/include/gpu/details/gpu_context_helpers.hpp b/inference-engine/src/inference_engine/include/ie/gpu/details/gpu_context_helpers.hpp similarity index 100% rename from inference-engine/include/gpu/details/gpu_context_helpers.hpp rename to inference-engine/src/inference_engine/include/ie/gpu/details/gpu_context_helpers.hpp diff --git a/inference-engine/include/gpu/gpu_config.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp similarity index 100% rename from inference-engine/include/gpu/gpu_config.hpp rename to inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp diff --git a/inference-engine/include/gpu/gpu_context_api_dx.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_dx.hpp similarity index 100% rename from inference-engine/include/gpu/gpu_context_api_dx.hpp rename to inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_dx.hpp diff --git a/inference-engine/include/gpu/gpu_context_api_ocl.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_ocl.hpp similarity index 100% rename from inference-engine/include/gpu/gpu_context_api_ocl.hpp rename to inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_ocl.hpp diff --git a/inference-engine/include/gpu/gpu_context_api_va.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_va.hpp similarity index 100% rename from inference-engine/include/gpu/gpu_context_api_va.hpp rename to inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_va.hpp diff --git a/inference-engine/include/gpu/gpu_ocl_wrapper.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_ocl_wrapper.hpp similarity index 100% rename from inference-engine/include/gpu/gpu_ocl_wrapper.hpp rename to inference-engine/src/inference_engine/include/ie/gpu/gpu_ocl_wrapper.hpp diff --git a/inference-engine/include/gpu/gpu_params.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_params.hpp similarity index 100% rename from inference-engine/include/gpu/gpu_params.hpp rename to inference-engine/src/inference_engine/include/ie/gpu/gpu_params.hpp diff --git a/inference-engine/include/hetero/hetero_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/hetero/hetero_plugin_config.hpp similarity index 100% rename from inference-engine/include/hetero/hetero_plugin_config.hpp rename to inference-engine/src/inference_engine/include/ie/hetero/hetero_plugin_config.hpp diff --git a/inference-engine/include/ie_allocator.hpp b/inference-engine/src/inference_engine/include/ie/ie_allocator.hpp similarity index 100% rename from inference-engine/include/ie_allocator.hpp rename to inference-engine/src/inference_engine/include/ie/ie_allocator.hpp diff --git a/inference-engine/include/ie_api.h b/inference-engine/src/inference_engine/include/ie/ie_api.h similarity index 100% rename from inference-engine/include/ie_api.h rename to inference-engine/src/inference_engine/include/ie/ie_api.h diff --git a/inference-engine/include/ie_blob.h b/inference-engine/src/inference_engine/include/ie/ie_blob.h similarity index 100% rename from inference-engine/include/ie_blob.h rename to inference-engine/src/inference_engine/include/ie/ie_blob.h diff --git a/inference-engine/include/ie_common.h b/inference-engine/src/inference_engine/include/ie/ie_common.h similarity index 100% rename from inference-engine/include/ie_common.h rename to inference-engine/src/inference_engine/include/ie/ie_common.h diff --git a/inference-engine/include/ie_compound_blob.h b/inference-engine/src/inference_engine/include/ie/ie_compound_blob.h similarity index 100% rename from inference-engine/include/ie_compound_blob.h rename to inference-engine/src/inference_engine/include/ie/ie_compound_blob.h diff --git a/inference-engine/include/ie_core.hpp b/inference-engine/src/inference_engine/include/ie/ie_core.hpp similarity index 100% rename from inference-engine/include/ie_core.hpp rename to inference-engine/src/inference_engine/include/ie/ie_core.hpp diff --git a/inference-engine/include/ie_data.h b/inference-engine/src/inference_engine/include/ie/ie_data.h similarity index 100% rename from inference-engine/include/ie_data.h rename to inference-engine/src/inference_engine/include/ie/ie_data.h diff --git a/inference-engine/include/ie_extension.h b/inference-engine/src/inference_engine/include/ie/ie_extension.h similarity index 99% rename from inference-engine/include/ie_extension.h rename to inference-engine/src/inference_engine/include/ie/ie_extension.h index 8014d658d80..97184fd5ba4 100644 --- a/inference-engine/include/ie_extension.h +++ b/inference-engine/src/inference_engine/include/ie/ie_extension.h @@ -14,6 +14,7 @@ #include #include +#include #include "ie_iextension.h" #include "details/ie_so_pointer.hpp" diff --git a/inference-engine/include/ie_icnn_network.hpp b/inference-engine/src/inference_engine/include/ie/ie_icnn_network.hpp similarity index 99% rename from inference-engine/include/ie_icnn_network.hpp rename to inference-engine/src/inference_engine/include/ie/ie_icnn_network.hpp index ec640691ecc..62ef93824ee 100644 --- a/inference-engine/include/ie_icnn_network.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_icnn_network.hpp @@ -18,11 +18,7 @@ #include "ie_data.h" #include "ie_input_info.hpp" -namespace ngraph { - -class Function; - -} // namespace ngraph +#include namespace InferenceEngine { diff --git a/inference-engine/include/ie_iexecutable_network.hpp b/inference-engine/src/inference_engine/include/ie/ie_iexecutable_network.hpp similarity index 100% rename from inference-engine/include/ie_iexecutable_network.hpp rename to inference-engine/src/inference_engine/include/ie/ie_iexecutable_network.hpp diff --git a/inference-engine/include/ie_iextension.h b/inference-engine/src/inference_engine/include/ie/ie_iextension.h similarity index 98% rename from inference-engine/include/ie_iextension.h rename to inference-engine/src/inference_engine/include/ie/ie_iextension.h index d001b999081..be327c15376 100644 --- a/inference-engine/include/ie_iextension.h +++ b/inference-engine/src/inference_engine/include/ie/ie_iextension.h @@ -19,6 +19,7 @@ #include "ie_layouts.h" #include "ie_blob.h" #include "ie_version.hpp" +#include /** * @def INFERENCE_EXTENSION_API(TYPE) @@ -30,13 +31,6 @@ #define INFERENCE_EXTENSION_API(TYPE) INFERENCE_ENGINE_API(TYPE) #endif -namespace ngraph { - -class OpSet; -class Node; - -} // namespace ngraph - namespace InferenceEngine { /** diff --git a/inference-engine/include/ie_iinfer_request.hpp b/inference-engine/src/inference_engine/include/ie/ie_iinfer_request.hpp similarity index 100% rename from inference-engine/include/ie_iinfer_request.hpp rename to inference-engine/src/inference_engine/include/ie/ie_iinfer_request.hpp diff --git a/inference-engine/include/ie_input_info.hpp b/inference-engine/src/inference_engine/include/ie/ie_input_info.hpp similarity index 100% rename from inference-engine/include/ie_input_info.hpp rename to inference-engine/src/inference_engine/include/ie/ie_input_info.hpp diff --git a/inference-engine/include/ie_layouts.h b/inference-engine/src/inference_engine/include/ie/ie_layouts.h similarity index 97% rename from inference-engine/include/ie_layouts.h rename to inference-engine/src/inference_engine/include/ie/ie_layouts.h index 31c42e1d02a..42fe8fbca2c 100644 --- a/inference-engine/include/ie_layouts.h +++ b/inference-engine/src/inference_engine/include/ie/ie_layouts.h @@ -304,6 +304,14 @@ public: */ static Layout getLayoutByDims(const SizeVector& dims); + /** + * @brief Returns the standard layout for the specified tensor rank + * + * @param rank of the requested layout + * @return the standard memory layout + */ + static Layout getLayoutByRank(size_t rank); + private: /** * Memory layout diff --git a/inference-engine/include/ie_locked_memory.hpp b/inference-engine/src/inference_engine/include/ie/ie_locked_memory.hpp similarity index 100% rename from inference-engine/include/ie_locked_memory.hpp rename to inference-engine/src/inference_engine/include/ie/ie_locked_memory.hpp diff --git a/inference-engine/include/ie_parallel.hpp b/inference-engine/src/inference_engine/include/ie/ie_parallel.hpp similarity index 100% rename from inference-engine/include/ie_parallel.hpp rename to inference-engine/src/inference_engine/include/ie/ie_parallel.hpp diff --git a/inference-engine/include/ie_parameter.hpp b/inference-engine/src/inference_engine/include/ie/ie_parameter.hpp similarity index 99% rename from inference-engine/include/ie_parameter.hpp rename to inference-engine/src/inference_engine/include/ie/ie_parameter.hpp index 4aa6760d474..425673f45b0 100644 --- a/inference-engine/include/ie_parameter.hpp +++ b/inference-engine/src/inference_engine/include/ie/ie_parameter.hpp @@ -21,12 +21,6 @@ #include "ie_blob.h" -namespace ngraph { - -class Variant; - -} // namespace ngraph - namespace InferenceEngine { /** diff --git a/inference-engine/include/ie_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp similarity index 100% rename from inference-engine/include/ie_plugin_config.hpp rename to inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp diff --git a/inference-engine/include/ie_precision.hpp b/inference-engine/src/inference_engine/include/ie/ie_precision.hpp similarity index 100% rename from inference-engine/include/ie_precision.hpp rename to inference-engine/src/inference_engine/include/ie/ie_precision.hpp diff --git a/inference-engine/include/ie_preprocess.hpp b/inference-engine/src/inference_engine/include/ie/ie_preprocess.hpp similarity index 100% rename from inference-engine/include/ie_preprocess.hpp rename to inference-engine/src/inference_engine/include/ie/ie_preprocess.hpp diff --git a/inference-engine/include/ie_remote_context.hpp b/inference-engine/src/inference_engine/include/ie/ie_remote_context.hpp similarity index 100% rename from inference-engine/include/ie_remote_context.hpp rename to inference-engine/src/inference_engine/include/ie/ie_remote_context.hpp diff --git a/inference-engine/include/ie_transformations.hpp b/inference-engine/src/inference_engine/include/ie/ie_transformations.hpp similarity index 100% rename from inference-engine/include/ie_transformations.hpp rename to inference-engine/src/inference_engine/include/ie/ie_transformations.hpp diff --git a/inference-engine/include/ie_version.hpp b/inference-engine/src/inference_engine/include/ie/ie_version.hpp similarity index 100% rename from inference-engine/include/ie_version.hpp rename to inference-engine/src/inference_engine/include/ie/ie_version.hpp diff --git a/inference-engine/include/inference_engine.hpp b/inference-engine/src/inference_engine/include/ie/inference_engine.hpp similarity index 100% rename from inference-engine/include/inference_engine.hpp rename to inference-engine/src/inference_engine/include/ie/inference_engine.hpp diff --git a/inference-engine/include/multi-device/multi_device_config.hpp b/inference-engine/src/inference_engine/include/ie/multi-device/multi_device_config.hpp similarity index 100% rename from inference-engine/include/multi-device/multi_device_config.hpp rename to inference-engine/src/inference_engine/include/ie/multi-device/multi_device_config.hpp diff --git a/inference-engine/include/vpu/hddl_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/hddl_config.hpp similarity index 100% rename from inference-engine/include/vpu/hddl_config.hpp rename to inference-engine/src/inference_engine/include/ie/vpu/hddl_config.hpp diff --git a/inference-engine/include/vpu/hddl_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/hddl_plugin_config.hpp similarity index 100% rename from inference-engine/include/vpu/hddl_plugin_config.hpp rename to inference-engine/src/inference_engine/include/ie/vpu/hddl_plugin_config.hpp diff --git a/inference-engine/include/vpu/myriad_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/myriad_config.hpp similarity index 100% rename from inference-engine/include/vpu/myriad_config.hpp rename to inference-engine/src/inference_engine/include/ie/vpu/myriad_config.hpp diff --git a/inference-engine/include/vpu/myriad_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/myriad_plugin_config.hpp similarity index 100% rename from inference-engine/include/vpu/myriad_plugin_config.hpp rename to inference-engine/src/inference_engine/include/ie/vpu/myriad_plugin_config.hpp diff --git a/inference-engine/include/vpu/vpu_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/vpu_config.hpp similarity index 100% rename from inference-engine/include/vpu/vpu_config.hpp rename to inference-engine/src/inference_engine/include/ie/vpu/vpu_config.hpp diff --git a/inference-engine/include/vpu/vpu_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/vpu_plugin_config.hpp similarity index 100% rename from inference-engine/include/vpu/vpu_plugin_config.hpp rename to inference-engine/src/inference_engine/include/ie/vpu/vpu_plugin_config.hpp diff --git a/inference-engine/src/inference_engine/blob_factory.cpp b/inference-engine/src/inference_engine/src/blob_factory.cpp similarity index 100% rename from inference-engine/src/inference_engine/blob_factory.cpp rename to inference-engine/src/inference_engine/src/blob_factory.cpp diff --git a/inference-engine/src/inference_engine/blob_transform.cpp b/inference-engine/src/inference_engine/src/blob_transform.cpp similarity index 100% rename from inference-engine/src/inference_engine/blob_transform.cpp rename to inference-engine/src/inference_engine/src/blob_transform.cpp diff --git a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp similarity index 96% rename from inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp rename to inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp index 1f05ca0098c..f53894e7d2d 100644 --- a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp +++ b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp @@ -35,6 +35,9 @@ #include +#include +#include + #include "ie_ngraph_utils.hpp" #include "exec_graph_info.hpp" #include "ie_itt.hpp" @@ -88,12 +91,12 @@ void CNNNetworkNGraphImpl::createDataForResult(const ::ngraph::Output<::ngraph:: void CNNNetworkNGraphImpl::validateFunctionNames() const { // nGraph function parameters and pre-Results operations should have unique names - std::unordered_set unique_names; + std::unordered_map> unique_names; for (const auto& param : _ngraph_function->get_parameters()) { if (unique_names.count(param->get_friendly_name())) { IE_THROW() << "Function contains several inputs with one friendly name!"; } - unique_names.insert(param->get_friendly_name()); + unique_names.insert({param->get_friendly_name(), param}); } for (const auto& result : _ngraph_function->get_results()) { const auto& parent = result->get_input_node_shared_ptr(0); @@ -101,10 +104,10 @@ void CNNNetworkNGraphImpl::validateFunctionNames() const { if (parent->get_output_size() > 1) { name += "." + std::to_string(result->get_input_source_output(0).get_index()); } - if (unique_names.count(name) && !ngraph::op::is_parameter(parent)) { - IE_THROW() << "Function contains several inputs and outputs with one friendly name!"; + if (unique_names.count(name) && !ngraph::op::is_parameter(parent) && parent != unique_names.at(name)) { + IE_THROW() << "Function contains several inputs and outputs with one friendly name: " << name; } - unique_names.insert(name); + unique_names.insert({name, parent}); } } @@ -364,13 +367,10 @@ CNNNetworkNGraphImpl::reshape(const std::map& bool parameter_replaced = false; for (size_t i = 0; i < params.size(); i++) { - const auto& param = params[i]; + auto& param = params[i]; if (inputShapes.find(param->get_friendly_name()) == inputShapes.end()) continue; - ::ngraph::PartialShape shape(inputShapes.at(param->get_friendly_name())); - auto newParam = std::make_shared<::ngraph::op::Parameter>(param->get_element_type(), shape); - newParam->set_friendly_name(param->get_friendly_name()); - _ngraph_function->replace_parameter(i, newParam); + param->set_partial_shape(inputShapes.at(param->get_friendly_name())); parameter_replaced = true; } if (parameter_replaced) @@ -392,6 +392,8 @@ CNNNetworkNGraphImpl::reshape(const std::map& ::ngraph::pass::Manager manager; // resolves dynamism by replacing dynamic operation with static version manager.register_pass<::ngraph::pass::ConvertNMS5ToLegacyMatcher>(false); + manager.register_pass<::ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>(); + manager.register_pass<::ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(); manager.register_pass<::ngraph::pass::DisableConvertConstantFoldingOnConstPath>(); manager.register_pass<::ngraph::pass::ConstantFolding>(); // OneHotToLegacy changes output precision diff --git a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.hpp similarity index 100% rename from inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp rename to inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.hpp diff --git a/inference-engine/src/inference_engine/compilation_context.cpp b/inference-engine/src/inference_engine/src/compilation_context.cpp similarity index 100% rename from inference-engine/src/inference_engine/compilation_context.cpp rename to inference-engine/src/inference_engine/src/compilation_context.cpp diff --git a/inference-engine/src/inference_engine/compilation_context.hpp b/inference-engine/src/inference_engine/src/compilation_context.hpp similarity index 100% rename from inference-engine/src/inference_engine/compilation_context.hpp rename to inference-engine/src/inference_engine/src/compilation_context.hpp diff --git a/inference-engine/src/inference_engine/cpp/exception2status.hpp b/inference-engine/src/inference_engine/src/cpp/exception2status.hpp similarity index 100% rename from inference-engine/src/inference_engine/cpp/exception2status.hpp rename to inference-engine/src/inference_engine/src/cpp/exception2status.hpp diff --git a/inference-engine/src/inference_engine/cpp/ie_cnn_network.cpp b/inference-engine/src/inference_engine/src/cpp/ie_cnn_network.cpp similarity index 100% rename from inference-engine/src/inference_engine/cpp/ie_cnn_network.cpp rename to inference-engine/src/inference_engine/src/cpp/ie_cnn_network.cpp diff --git a/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp b/inference-engine/src/inference_engine/src/cpp/ie_executable_network.cpp similarity index 100% rename from inference-engine/src/inference_engine/cpp/ie_executable_network.cpp rename to inference-engine/src/inference_engine/src/cpp/ie_executable_network.cpp diff --git a/inference-engine/src/inference_engine/cpp/ie_executable_network_base.hpp b/inference-engine/src/inference_engine/src/cpp/ie_executable_network_base.hpp similarity index 100% rename from inference-engine/src/inference_engine/cpp/ie_executable_network_base.hpp rename to inference-engine/src/inference_engine/src/cpp/ie_executable_network_base.hpp diff --git a/inference-engine/src/inference_engine/cpp/ie_infer_async_request_base.hpp b/inference-engine/src/inference_engine/src/cpp/ie_infer_async_request_base.hpp similarity index 100% rename from inference-engine/src/inference_engine/cpp/ie_infer_async_request_base.hpp rename to inference-engine/src/inference_engine/src/cpp/ie_infer_async_request_base.hpp diff --git a/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp b/inference-engine/src/inference_engine/src/cpp/ie_infer_request.cpp similarity index 99% rename from inference-engine/src/inference_engine/cpp/ie_infer_request.cpp rename to inference-engine/src/inference_engine/src/cpp/ie_infer_request.cpp index 9e68666b7a3..f94a3b6ba1c 100644 --- a/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp +++ b/inference-engine/src/inference_engine/src/cpp/ie_infer_request.cpp @@ -127,7 +127,7 @@ void InferRequest::SetCompletionCallbackImpl(std::function { plugin.ImportNetwork(networkStream, config); networkIsImported = true; }); - } catch (const HeaderException& ex) { + } catch (const HeaderException&) { // For these exceptions just remove old cache and set that import didn't work cacheManager->removeCacheEntry(blobId); networkIsImported = false; diff --git a/inference-engine/src/inference_engine/ie_data.cpp b/inference-engine/src/inference_engine/src/ie_data.cpp similarity index 100% rename from inference-engine/src/inference_engine/ie_data.cpp rename to inference-engine/src/inference_engine/src/ie_data.cpp diff --git a/inference-engine/src/inference_engine/ie_itt.hpp b/inference-engine/src/inference_engine/src/ie_itt.hpp similarity index 100% rename from inference-engine/src/inference_engine/ie_itt.hpp rename to inference-engine/src/inference_engine/src/ie_itt.hpp diff --git a/inference-engine/src/inference_engine/ie_layouts.cpp b/inference-engine/src/inference_engine/src/ie_layouts.cpp similarity index 99% rename from inference-engine/src/inference_engine/ie_layouts.cpp rename to inference-engine/src/inference_engine/src/ie_layouts.cpp index b566693c155..a9308877e7d 100644 --- a/inference-engine/src/inference_engine/ie_layouts.cpp +++ b/inference-engine/src/inference_engine/src/ie_layouts.cpp @@ -161,8 +161,8 @@ bool TensorDesc::operator!=(const TensorDesc& rhs) const { return !(*this == rhs); } -Layout TensorDesc::getLayoutByDims(const SizeVector& dims) { - switch (dims.size()) { +Layout TensorDesc::getLayoutByRank(size_t rank) { + switch (rank) { case 0: return Layout::SCALAR; case 1: @@ -180,6 +180,10 @@ Layout TensorDesc::getLayoutByDims(const SizeVector& dims) { } } +Layout TensorDesc::getLayoutByDims(const SizeVector& dims) { + return getLayoutByRank(dims.size()); +} + size_t TensorDesc::offset(const SizeVector& v) const { if (layout == Layout::ANY) IE_THROW() << "Cannot calculate offset for any format!"; diff --git a/inference-engine/src/inference_engine/ie_memcpy.cpp b/inference-engine/src/inference_engine/src/ie_memcpy.cpp similarity index 100% rename from inference-engine/src/inference_engine/ie_memcpy.cpp rename to inference-engine/src/inference_engine/src/ie_memcpy.cpp diff --git a/inference-engine/src/inference_engine/ie_network_reader.cpp b/inference-engine/src/inference_engine/src/ie_network_reader.cpp similarity index 100% rename from inference-engine/src/inference_engine/ie_network_reader.cpp rename to inference-engine/src/inference_engine/src/ie_network_reader.cpp diff --git a/inference-engine/src/inference_engine/ie_network_reader.hpp b/inference-engine/src/inference_engine/src/ie_network_reader.hpp similarity index 100% rename from inference-engine/src/inference_engine/ie_network_reader.hpp rename to inference-engine/src/inference_engine/src/ie_network_reader.hpp diff --git a/inference-engine/src/inference_engine/ie_ngraph_utils.cpp b/inference-engine/src/inference_engine/src/ie_ngraph_utils.cpp similarity index 100% rename from inference-engine/src/inference_engine/ie_ngraph_utils.cpp rename to inference-engine/src/inference_engine/src/ie_ngraph_utils.cpp diff --git a/inference-engine/src/inference_engine/ie_system_conf.cpp b/inference-engine/src/inference_engine/src/ie_system_conf.cpp similarity index 100% rename from inference-engine/src/inference_engine/ie_system_conf.cpp rename to inference-engine/src/inference_engine/src/ie_system_conf.cpp diff --git a/inference-engine/src/inference_engine/ie_transformations.cpp b/inference-engine/src/inference_engine/src/ie_transformations.cpp similarity index 100% rename from inference-engine/src/inference_engine/ie_transformations.cpp rename to inference-engine/src/inference_engine/src/ie_transformations.cpp diff --git a/inference-engine/src/inference_engine/ie_version.cpp b/inference-engine/src/inference_engine/src/ie_version.cpp similarity index 100% rename from inference-engine/src/inference_engine/ie_version.cpp rename to inference-engine/src/inference_engine/src/ie_version.cpp diff --git a/inference-engine/src/inference_engine/os/lin/lin_shared_object_loader.cpp b/inference-engine/src/inference_engine/src/os/lin/lin_shared_object_loader.cpp similarity index 100% rename from inference-engine/src/inference_engine/os/lin/lin_shared_object_loader.cpp rename to inference-engine/src/inference_engine/src/os/lin/lin_shared_object_loader.cpp diff --git a/inference-engine/src/inference_engine/os/lin/lin_system_conf.cpp b/inference-engine/src/inference_engine/src/os/lin/lin_system_conf.cpp similarity index 100% rename from inference-engine/src/inference_engine/os/lin/lin_system_conf.cpp rename to inference-engine/src/inference_engine/src/os/lin/lin_system_conf.cpp diff --git a/inference-engine/src/inference_engine/os/win/win_shared_object_loader.cpp b/inference-engine/src/inference_engine/src/os/win/win_shared_object_loader.cpp similarity index 100% rename from inference-engine/src/inference_engine/os/win/win_shared_object_loader.cpp rename to inference-engine/src/inference_engine/src/os/win/win_shared_object_loader.cpp diff --git a/inference-engine/src/inference_engine/os/win/win_system_conf.cpp b/inference-engine/src/inference_engine/src/os/win/win_system_conf.cpp similarity index 100% rename from inference-engine/src/inference_engine/os/win/win_system_conf.cpp rename to inference-engine/src/inference_engine/src/os/win/win_system_conf.cpp diff --git a/inference-engine/src/inference_engine/precision_utils.cpp b/inference-engine/src/inference_engine/src/precision_utils.cpp similarity index 100% rename from inference-engine/src/inference_engine/precision_utils.cpp rename to inference-engine/src/inference_engine/src/precision_utils.cpp diff --git a/inference-engine/src/inference_engine/precomp.hpp b/inference-engine/src/inference_engine/src/precomp.hpp similarity index 100% rename from inference-engine/src/inference_engine/precomp.hpp rename to inference-engine/src/inference_engine/src/precomp.hpp diff --git a/inference-engine/src/inference_engine/system_allocator.cpp b/inference-engine/src/inference_engine/src/system_allocator.cpp similarity index 100% rename from inference-engine/src/inference_engine/system_allocator.cpp rename to inference-engine/src/inference_engine/src/system_allocator.cpp diff --git a/inference-engine/src/inference_engine/system_allocator.hpp b/inference-engine/src/inference_engine/src/system_allocator.hpp similarity index 100% rename from inference-engine/src/inference_engine/system_allocator.hpp rename to inference-engine/src/inference_engine/src/system_allocator.hpp diff --git a/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp b/inference-engine/src/inference_engine/src/threading/ie_cpu_streams_executor.cpp similarity index 100% rename from inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp rename to inference-engine/src/inference_engine/src/threading/ie_cpu_streams_executor.cpp diff --git a/inference-engine/src/inference_engine/threading/ie_executor_manager.cpp b/inference-engine/src/inference_engine/src/threading/ie_executor_manager.cpp similarity index 100% rename from inference-engine/src/inference_engine/threading/ie_executor_manager.cpp rename to inference-engine/src/inference_engine/src/threading/ie_executor_manager.cpp diff --git a/inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp b/inference-engine/src/inference_engine/src/threading/ie_istreams_executor.cpp similarity index 98% rename from inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp rename to inference-engine/src/inference_engine/src/threading/ie_istreams_executor.cpp index 1a2993f3365..702a0beecee 100644 --- a/inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp +++ b/inference-engine/src/inference_engine/src/threading/ie_istreams_executor.cpp @@ -123,11 +123,11 @@ Parameter IStreamsExecutor::Config::GetConfig(const std::string& key) { break; } } else if (key == CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) { - return {_streams}; + return {std::to_string(_streams)}; } else if (key == CONFIG_KEY(CPU_THREADS_NUM)) { - return {_threads}; + return {std::to_string(_threads)}; } else if (key == CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)) { - return {_threadsPerStream}; + return {std::to_string(_threadsPerStream)}; } else { IE_THROW() << "Wrong value for property key " << key; } diff --git a/inference-engine/src/inference_engine/threading/ie_itask_executor.cpp b/inference-engine/src/inference_engine/src/threading/ie_itask_executor.cpp similarity index 100% rename from inference-engine/src/inference_engine/threading/ie_itask_executor.cpp rename to inference-engine/src/inference_engine/src/threading/ie_itask_executor.cpp diff --git a/inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.cpp b/inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.cpp similarity index 100% rename from inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.cpp rename to inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.cpp diff --git a/inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.hpp b/inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.hpp similarity index 100% rename from inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.hpp rename to inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.hpp diff --git a/inference-engine/src/inference_engine/threading/ie_thread_affinity.cpp b/inference-engine/src/inference_engine/src/threading/ie_thread_affinity.cpp similarity index 100% rename from inference-engine/src/inference_engine/threading/ie_thread_affinity.cpp rename to inference-engine/src/inference_engine/src/threading/ie_thread_affinity.cpp diff --git a/inference-engine/src/inference_engine/threading/ie_thread_affinity.hpp b/inference-engine/src/inference_engine/src/threading/ie_thread_affinity.hpp similarity index 100% rename from inference-engine/src/inference_engine/threading/ie_thread_affinity.hpp rename to inference-engine/src/inference_engine/src/threading/ie_thread_affinity.hpp diff --git a/inference-engine/src/inference_engine/xml_parse_utils.cpp b/inference-engine/src/inference_engine/src/xml_parse_utils.cpp similarity index 100% rename from inference-engine/src/inference_engine/xml_parse_utils.cpp rename to inference-engine/src/inference_engine/src/xml_parse_utils.cpp diff --git a/inference-engine/src/legacy_api/CMakeLists.txt b/inference-engine/src/legacy_api/CMakeLists.txt index 28afe337e38..9de8bf16910 100644 --- a/inference-engine/src/legacy_api/CMakeLists.txt +++ b/inference-engine/src/legacy_api/CMakeLists.txt @@ -39,7 +39,7 @@ target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE target_include_directories(${TARGET_NAME}_obj PRIVATE ${PUBLIC_HEADERS_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/src - ${IE_MAIN_SOURCE_DIR}/src/inference_engine # For CNNNetworkNGraphImpl + ${IE_MAIN_SOURCE_DIR}/src/inference_engine/src # For CNNNetworkNGraphImpl $ $ $ diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp index 7057fc1f597..e59ec61c8f4 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp @@ -19,7 +19,7 @@ namespace ngraph { namespace pass { namespace low_precision { -class LP_TRANSFORMATIONS_API Exception : std::exception { +class LP_TRANSFORMATIONS_API Exception : public std::exception { std::shared_ptr buffer; mutable std::string buffer_str; public: diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp index da226fe263b..fee17230569 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp @@ -16,6 +16,7 @@ public: NGRAPH_RTTI_DECLARATION; MultiplyTransformation(const Params& params = Params()); bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; + bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp index 77218320dba..3229c9814f0 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp @@ -151,7 +151,7 @@ public: static bool isQuantizeSupported(const std::shared_ptr& fakeQuantize); - static FakeQuantizeDequantization getDequantization(const std::shared_ptr& node, const size_t parentIndex = 0ul, const bool inPlace = false); + static FakeQuantizeDequantization getDequantization(const std::shared_ptr& node, const size_t parentIndex = 0ul, const bool inPlace = false); static FakeQuantizeDequantization getDequantizationBelow(const std::shared_ptr& node, const bool convertIsMandatory = false); @@ -186,7 +186,7 @@ public: static size_t getParentOutputIndex(const std::shared_ptr& parent, const std::shared_ptr& child); - static FakeQuantizeDequantizationValues createEmptyValues(const FakeQuantizeDequantization& dequantization); + static FakeQuantizeDequantizationValues createEmptyValues(const FakeQuantizeDequantization& dequantization, const element::Type precision); static bool isZeroConst(const std::shared_ptr& node); static bool checkZeroPoint(const std::shared_ptr& node, const DataPrecision& dataPrecision = DataPrecision()); diff --git a/inference-engine/src/low_precision_transformations/src/add.cpp b/inference-engine/src/low_precision_transformations/src/add.cpp index 4ecd8464370..55a101c101f 100644 --- a/inference-engine/src/low_precision_transformations/src/add.cpp +++ b/inference-engine/src/low_precision_transformations/src/add.cpp @@ -152,28 +152,25 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter newAddOrSubtract = newMultiply; } } else { - // dequantizations are on both branches + // low precision with dequantization operations on at least one branch const int emptyPathIndex = fullPathIndex == 0 ? 1 : 0; - FakeQuantizeDequantization dequantizationEmptyPath = NetworkHelper::getDequantization(add, emptyPathIndex); - if (updatePrecisions && !dequantizationEmptyPath.empty() && !dequantizationEmptyPath.isLowPrecision()) { - return false; + if (updatePrecisions) { + const FakeQuantizeDequantization dequantizationEmptyPath = NetworkHelper::getDequantization(add, emptyPathIndex); + if (!dequantizationEmptyPath.empty() && !dequantizationEmptyPath.isLowPrecision()) { + return false; + } } - FakeQuantizeDequantization dequantizationFullPath = NetworkHelper::getDequantization(add, fullPathIndex); - if (updatePrecisions && !dequantizationFullPath.empty() && !dequantizationFullPath.isLowPrecision()) { - return false; - } - - dequantizationEmptyPath = NetworkHelper::foldDequantization(addNode, emptyPathIndex); + const FakeQuantizeDequantization dequantizationEmptyPath = NetworkHelper::foldDequantization(addNode, emptyPathIndex); std::shared_ptr subtractEmptyPathValues; std::shared_ptr multiplyEmptyPathValues; - std::tie(subtractEmptyPathValues, multiplyEmptyPathValues) = NetworkHelper::createEmptyValues(dequantizationEmptyPath); + std::tie(subtractEmptyPathValues, multiplyEmptyPathValues) = NetworkHelper::createEmptyValues(dequantizationEmptyPath, deqPrecision); - dequantizationFullPath = NetworkHelper::foldDequantization(addNode, fullPathIndex); + const FakeQuantizeDequantization dequantizationFullPath = NetworkHelper::foldDequantization(addNode, fullPathIndex); std::shared_ptr subtractFullPathValues; std::shared_ptr multiplyFullPathValues; - std::tie(subtractFullPathValues, multiplyFullPathValues) = NetworkHelper::createEmptyValues(dequantizationFullPath); + std::tie(subtractFullPathValues, multiplyFullPathValues) = NetworkHelper::createEmptyValues(dequantizationFullPath, deqPrecision); // calculation // before: Y = (SC1 * (X1 - SH1)) + (SC2 * (X2 - SH2)) @@ -196,11 +193,24 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter OutputVector inputs{ {}, {} }; auto fullPathInput = dequantizationFullPath.convert == nullptr ? dequantizationFullPath.data : dequantizationFullPath.convert; + // inputs[0] inputs[1] + // \ / + // \ / + // newAddOrSubtract + // | + // newMultiply + inputs[emptyPathIndex] = dequantizationEmptyPath.data; inputs[fullPathIndex] = std::make_shared( newSubtractFullPathValues == nullptr ? fullPathInput : - std::make_shared(fullPathInput, newSubtractFullPathValues), + std::make_shared( + // precision on branch with dequantization operations can be different with dequantization precision, + // for example: FP16 model with FP32 dequantization + fullPathInput.get_element_type() != newSubtractFullPathValues->get_element_type() ? + std::make_shared(fullPathInput, newSubtractFullPathValues->get_element_type()) : + fullPathInput, + newSubtractFullPathValues), newMultiplyFullPathValues); newAddOrSubtract = std::make_shared>( diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp index 0863dcb3f09..6adeb1f413c 100644 --- a/inference-engine/src/low_precision_transformations/src/concat.cpp +++ b/inference-engine/src/low_precision_transformations/src/concat.cpp @@ -5,9 +5,7 @@ #include "low_precision/concat.hpp" #include -#include #include -#include #include #include @@ -189,7 +187,6 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context const auto outPShape = concat->get_output_partial_shape(0); const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, outPShape.rank()); - // TODO: LPT: to support current flow: #58269 if (normalizedAxis != 1ul) { return false; } @@ -198,8 +195,6 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context return false; } - const bool perTensorQuantizationIsRequired = normalizedAxis != 1ul; - element::Type precision; for (size_t i = 0ul; i < concat->get_input_size(); i++) { const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(concat, i); @@ -212,12 +207,6 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context } else if (precision != dequantization.data.get_element_type()) { return false; } - - if (perTensorQuantizationIsRequired && - (((dequantization.subtractConstant != nullptr) && !NetworkHelper::isScalarLike(dequantization.subtractConstant)) || - ((dequantization.multiplyConstant != nullptr) && !NetworkHelper::isScalarLike(dequantization.multiplyConstant)))) { - return false; - } } return true; } diff --git a/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp b/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp index b1b7674631b..54e87798a64 100644 --- a/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp @@ -54,15 +54,10 @@ bool EltwiseBaseTransformation::canBeTransformed(const TransformationContext& co return false; } + // at least one branch quantization is mandatory if ((dequantization1.data.get_node() == nullptr) || - (dequantization1.empty() && !is_type(dequantization1.data.get_node_shared_ptr()) && - !is_type(dequantization2.data.get_node_shared_ptr()))) { - return false; - } - - if ((dequantization2.data.get_node() == nullptr) || - (dequantization2.empty() && !is_type(dequantization2.data.get_node_shared_ptr()) && - !is_type(dequantization1.data.get_node_shared_ptr()))) { + (dequantization2.data.get_node() == nullptr) || + (dequantization1.empty() && dequantization2.empty())) { return false; } @@ -101,15 +96,39 @@ static bool isBranchHaveMultipleConsumers(const std::shared_ptr branchData // return branch index with FP32 precision after eltwise transformation int EltwiseBaseTransformation::getNotEmpty(const std::shared_ptr& eltwise) const { const FakeQuantizeDequantization dequantization1 = pass::low_precision::NetworkHelper::getDequantization(eltwise, 0ul); - if (dequantization1.empty() || as_type(dequantization1.data.get_node())) { + if (as_type(dequantization1.data.get_node())) { return -1; } const FakeQuantizeDequantization dequantization2 = pass::low_precision::NetworkHelper::getDequantization(eltwise, 1ul); - if (dequantization2.empty() || as_type(dequantization2.data.get_node())) { + if (as_type(dequantization2.data.get_node())) { return -1; } + if (!dequantization1.empty() && dequantization1.isLowPrecision() && (dequantization2.empty() || !dequantization2.isLowPrecision())) { + return 1; + } + + if ((dequantization1.empty() || !dequantization1.isLowPrecision()) && !dequantization2.empty() && dequantization2.isLowPrecision()) { + return 0; + } + + if (!updatePrecisions) { + // If result is still not defined, then handle special cases for updatePrecisions == false, assumption for one branch quantization: + // 1. branch with dequantization operations is quantized, + // 2. empty branch is not quantized. + // As result: move dequantization operations to empty branch. + // Note: keep comparisions uppper as is: low precision can be used in updatePrecisions == false case + // if FakeQuantize operations were decomposed before LPT. + if (!dequantization1.empty() && dequantization2.empty()) { + return 1; + } + + if (dequantization1.empty() || !dequantization2.empty()) { + return 0; + } + } + const std::shared_ptr fakeQuantize1 = as_type_ptr(dequantization1.data.get_node_shared_ptr()); const std::shared_ptr fakeQuantize2 = diff --git a/inference-engine/src/low_precision_transformations/src/multiply.cpp b/inference-engine/src/low_precision_transformations/src/multiply.cpp index d95fe2812c3..923f77a7d20 100644 --- a/inference-engine/src/low_precision_transformations/src/multiply.cpp +++ b/inference-engine/src/low_precision_transformations/src/multiply.cpp @@ -41,7 +41,7 @@ MultiplyTransformation::MultiplyTransformation(const Params& params) : EltwiseBa bool MultiplyTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { auto multiply = m.get_match_root(); - if (!LayerTransformation::canBeTransformed(context, multiply)) { + if (!canBeTransformed(context, multiply)) { return false; } @@ -116,7 +116,7 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p dequantizationEmptyPath = NetworkHelper::foldDequantization(multiply, emptyPathIndex); std::shared_ptr subtractValuesEmptyPath; std::shared_ptr multiplyValuesEmptyPath; - std::tie(subtractValuesEmptyPath, multiplyValuesEmptyPath) = NetworkHelper::createEmptyValues(dequantizationEmptyPath); + std::tie(subtractValuesEmptyPath, multiplyValuesEmptyPath) = NetworkHelper::createEmptyValues(dequantizationEmptyPath, deqPrecision); // check if empty path shifts are not zero if (!NetworkHelper::isZeroConst(subtractValuesEmptyPath)) { @@ -126,7 +126,7 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p dequantizationFullPath = NetworkHelper::foldDequantization(multiply, fullPathIndex); std::shared_ptr subtractValuesFullPath; std::shared_ptr multiplyValuesFullPath; - std::tie(subtractValuesFullPath, multiplyValuesFullPath) = NetworkHelper::createEmptyValues(dequantizationFullPath); + std::tie(subtractValuesFullPath, multiplyValuesFullPath) = NetworkHelper::createEmptyValues(dequantizationFullPath, deqPrecision); // before: Y = (SC1 * (X1 - SH1)) * (SC2 * X2) @@ -160,6 +160,24 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p return true; } +bool MultiplyTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const { + FakeQuantizeDequantization dequantization1 = pass::low_precision::NetworkHelper::getDequantization(layer, 0ul); + FakeQuantizeDequantization dequantization2 = pass::low_precision::NetworkHelper::getDequantization(layer, 1ul); + + if ((dequantization1.data.get_node() == nullptr) || + (dequantization1.empty() && !is_type(dequantization1.data.get_node_shared_ptr()) && + !is_type(dequantization2.data.get_node_shared_ptr()))) { + return false; + } + + if ((dequantization2.data.get_node() == nullptr) || + (dequantization2.empty() && !is_type(dequantization2.data.get_node_shared_ptr()) && + !is_type(dequantization1.data.get_node_shared_ptr()))) { + return false; + } + return EltwiseBaseTransformation::canBeTransformed(context, layer); +} + } // namespace low_precision } // namespace pass } // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp index 3f49e8b327c..879bd24dc04 100644 --- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp +++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp @@ -1268,7 +1268,7 @@ bool NetworkHelper::isQuantizeSupported(const std::shared_ptrget_levels()); } -FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_ptr& node, const size_t parentIndex, const bool inPlace) { +FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_ptr& node, const size_t parentIndex, const bool inPlace) { auto getDataIndex = [](const std::shared_ptr& node) { if (is_type(node->get_input_node_ptr(1))) { return 0ul; @@ -1285,7 +1285,7 @@ FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_pt return 1ul; }; - Output dataNode = inPlace ? node->output(0) : node->input_value(parentIndex); + Output dataNode = inPlace ? std::const_pointer_cast(node)->output(0) : node->input_value(parentIndex); const std::shared_ptr multiply = as_type_ptr(dataNode.get_node_shared_ptr()); std::shared_ptr multiplyConstant; @@ -1440,22 +1440,20 @@ std::shared_ptr NetworkHelper::normalizeDequantizationShape(co return normalizedConstant; } -FakeQuantizeDequantizationValues NetworkHelper::createEmptyValues(const FakeQuantizeDequantization& dequantization) { - std::shared_ptr parent = dequantization.convert ? dequantization.convert : dequantization.data.get_node_shared_ptr(); +FakeQuantizeDequantizationValues NetworkHelper::createEmptyValues(const FakeQuantizeDequantization& dequantization, const element::Type precision) { + const std::shared_ptr multiplyConstant = dequantization.multiply ? + dequantization.multiplyConstant->get_element_type() != precision ? + foldConvert(dequantization.multiplyConstant, precision) : + dequantization.multiplyConstant : + std::make_shared(precision, Shape({}), std::vector({ 1.f })); - std::shared_ptr multiply1Const = dequantization.multiply ? - dequantization.multiply->get_input_node_shared_ptr(1)->clone_with_new_inputs({}) : - std::make_shared(parent->get_output_element_type(0), Shape({}), std::vector({ 1.f })); + const std::shared_ptr subtractConstant = dequantization.subtract ? + dequantization.subtractConstant->get_element_type() != precision ? + foldConvert(dequantization.subtractConstant, precision) : + dequantization.subtractConstant : + std::make_shared(precision, Shape({}), std::vector({ 0.f })); - std::shared_ptr subtract1Const = dequantization.subtract ? - (dequantization.subtractConvert == nullptr ? - dequantization.subtractConstant->clone_with_new_inputs({}) : - foldConvert(dequantization.subtractConstant, dequantization.subtractConvert->get_element_type())) : - std::make_shared(parent->get_output_element_type(0), Shape({}), std::vector({ 0.f })); - - subtract1Const->set_output_type(0, multiply1Const->get_output_element_type(0), subtract1Const->get_output_partial_shape(0)); - - return FakeQuantizeDequantizationValues(subtract1Const, multiply1Const); + return FakeQuantizeDequantizationValues(subtractConstant, multiplyConstant); } bool NetworkHelper::isZeroConst(const std::shared_ptr& node) { diff --git a/inference-engine/src/low_precision_transformations/src/reshape.cpp b/inference-engine/src/low_precision_transformations/src/reshape.cpp index f478928537e..b94e62320e4 100644 --- a/inference-engine/src/low_precision_transformations/src/reshape.cpp +++ b/inference-engine/src/low_precision_transformations/src/reshape.cpp @@ -38,131 +38,80 @@ ReshapeTransformation::ReshapeTransformation(const Params& params) : LayerTransf } void reshapeDequantizationConstant(const std::shared_ptr& reshape) { + // Reshape dequantization operation Constant. + // 1. Calculate result dequantization Constant shape for broadcast based on original dequantization Constant shape and Reshape output. + // For example: dequantization shape {1, 3, 1, 1}, output Reshape shape {1, 12, 3, 3}, result for broadcast: {1, 3, 4, 1}, + // where '4' calculated for temporary broadcast before reshape. + // 2. Broadcast dequantization Constant, if channels are changed + // 3. Reshape and replace + auto replaceConstant = [](const std::shared_ptr& reshape, const std::shared_ptr& originalConstant) { + // reshape for element-wise constant is not required + auto constantShape = originalConstant->get_shape(); + if (shape_size(constantShape) == 1ul) { + if (!constantShape.empty()) { + const auto newConstant = NetworkHelper::toScalar(originalConstant); + replace_node(originalConstant, newConstant); + } + return; + } + + auto const reshapeInputRank = reshape->get_input_partial_shape(0).rank(); + assert(reshapeInputRank.is_static()); + if (constantShape.size() > 1ul) { + while (constantShape.size() < static_cast(reshapeInputRank.get_length())) { + constantShape.insert(constantShape.begin(), 1ul); + } + } + + const auto reshapeOutputPShape = reshape->output(0).get_partial_shape(); + const auto reshapeOutputRank = reshapeOutputPShape.rank(); + assert(reshapeOutputRank.is_static()); + assert(reshapeOutputRank.get_length() >= 2); + assert(reshapeOutputPShape[1].is_static()); + assert(static_cast(reshapeOutputPShape[1].get_length()) >= constantShape[1]); + assert(reshapeOutputPShape[1].get_length() % constantShape[1] == 0); + const size_t dimensionsToBroadcast = reshapeOutputPShape[1].get_length() / constantShape[1]; + if (dimensionsToBroadcast == 0ul) { + return; + } + + Shape newOperationConstantBroadcastedShape = originalConstant->output(0).get_shape(); + // add dimensions to broadcast values + if (newOperationConstantBroadcastedShape.size() == 2ul) { + newOperationConstantBroadcastedShape.push_back(dimensionsToBroadcast); + } else { + newOperationConstantBroadcastedShape[2] = dimensionsToBroadcast; + } + const std::shared_ptr broadcastedConstant = fold( + originalConstant, + std::make_shared( + element::i32, + Shape({ newOperationConstantBroadcastedShape.size() }), + newOperationConstantBroadcastedShape)); + + std::vector newReshapeConstValues(reshapeOutputRank.get_length(), 1ul); + newReshapeConstValues[1] = reshapeOutputPShape[1].get_length(); + const std::shared_ptr newReshapeConstant = std::make_shared( + element::i32, + Shape({ newReshapeConstValues.size() }), + newReshapeConstValues); + + const std::shared_ptr resultConstant = fold( + broadcastedConstant, + newReshapeConstant, + reshape->get_special_zero()); + + replace_node(originalConstant, resultConstant); + }; + const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(reshape, 0); - if (dequantization.multiplyConstant->get_shape().size() > 1ul) { - // Reshape Subtract or Multiply operation Constant. - // 1. modify reshape parameters to avoid reshape by spatial dimensions - // 2. broadcast element-wise constant if channels are changed - // 3. reshape element-wise constant with modified reshape parameters - auto replaceConstant = [](const std::shared_ptr& reshape, const std::shared_ptr& op) { - const size_t constantIndex = as_type(op->get_input_node_ptr(1)) ? 1 : 0; - const auto originalConstant = as_type_ptr(op->get_input_node_shared_ptr(constantIndex)); - const auto constantShape = originalConstant->get_shape(); - // reshape for element-wise constant is not required - if (shape_size(constantShape) == 1ul) { - if (constantShape.size() > 1ul) { - const Shape newConstShape = Shape(reshape->get_output_partial_shape(0).rank().get_length(), 1ul); - const auto newConstant = opset1::Constant::create( - originalConstant->get_element_type(), newConstShape, originalConstant->cast_vector()); - replace_node(op->get_input_node_shared_ptr(constantIndex), newConstant); - } + if (dequantization.subtract != nullptr) { + replaceConstant(reshape, dequantization.subtractConstant); + } - return; - } - - // simple broadcast operation Constant shape to shape on activations - auto newOperationConstantShape = constantShape; - auto const reshapeInputPShape = reshape->get_input_partial_shape(0); - PartialShape newOperationConstantBroadcastedShape(reshapeInputPShape); - newOperationConstantBroadcastedShape[0] = 1ul; - - if ((reshapeInputPShape.rank().get_length() - newOperationConstantShape.size()) == 1ul) { - newOperationConstantShape.insert(newOperationConstantShape.begin(), 1ul); - } - const std::shared_ptr newOperationConstant = std::make_shared( - op->input(constantIndex).get_element_type(), - newOperationConstantShape, - originalConstant->cast_vector()); - - // reshape -1 value handling - auto getOverallValue = [](const Shape& shape, const std::vector& reshapeValues, const bool specialZero) -> size_t { - size_t overallValue = shape_size(shape); - for (size_t i = 0; i < reshapeValues.size(); ++i) { - auto reshapeValue = reshapeValues[i]; - if ((reshapeValue == 1ul) || (reshapeValue == -1) || ((reshapeValue == 0ul) && !specialZero)) { - continue; - } - - if ((reshapeValue == 0ul) && specialZero) { - reshapeValue = shape[i]; - } - - overallValue = overallValue / reshapeValue; - } - return overallValue; - }; - - // modify reshape constant for element-wise constant reshape - // element-wise constant doesn't have spatial dimensions, as result we should remove spatial dimensions from reshape parameters - const std::vector reshapeConstValues = as_type_ptr(reshape->get_input_node_shared_ptr(1))->cast_vector(); - - size_t overallValue = 0; - for (size_t i = 0; i < reshapeConstValues.size(); ++i) { - if (reshapeConstValues[i] == -1) { - overallValue = getOverallValue( - reshapeInputPShape.to_shape(), - reshapeConstValues, - as_type_ptr(reshape)->get_special_zero()); - break; - } - } - - std::vector newReshapeConstValues(reshapeConstValues); - for (int i = static_cast(newReshapeConstValues.size() - 1); i >= 0; --i) { - if (static_cast(newOperationConstantShape.size()) <= i) { - // new dimension was added - newReshapeConstValues[i] = 1; - } else if (newOperationConstantShape[i] == 1ul) { - // keep the same - newReshapeConstValues[i] = 1; - } else if (newReshapeConstValues[i] == -1) { - // modified reshape parameters are different, but value instead '-1' has to be equal as original reshape - newReshapeConstValues[i] = overallValue; - } - } - - const std::shared_ptr newReshapeConstant = std::make_shared( - reshape->input(1).get_element_type(), - Shape({ newReshapeConstValues.size() }), - newReshapeConstValues); - - // if channels are different then broadcast spatial dimensions to reshape channels correctly - // limitation which has to be covered by canBeTransformed: - // 1. spatial dimensions have to be absent or equal to 1 after reshape - // 2. only second dimension can be changed - - const bool shouldBroadcast = (shape_size(newReshapeConstValues) != 1ul) && (reshapeConstValues[1] != 0) && - (((reshapeConstValues[1] != -1) && - (static_cast(newOperationConstantShape[1]) != reshapeConstValues[1])) || - ((reshapeConstValues[1] == -1) && - (newOperationConstantShape[1] != overallValue))); - - const std::shared_ptr broadcastedConstant = shouldBroadcast ? - fold( - newOperationConstant, - std::make_shared( - element::i32, - Shape({static_cast(newOperationConstantBroadcastedShape.rank().get_length())}), - // TODO: investigate behaviour - newOperationConstantBroadcastedShape.to_shape())) : - newOperationConstant; - - const std::shared_ptr resultConstant = fold( - broadcastedConstant, - newReshapeConstant, - reshape->get_special_zero()); - - replace_node(op->get_input_node_shared_ptr(constantIndex), resultConstant); - }; - - if (dequantization.subtract != nullptr) { - replaceConstant(reshape, dequantization.subtract); - } - - if (dequantization.multiply != nullptr) { - replaceConstant(reshape, dequantization.multiply); - } + if (dequantization.multiply != nullptr) { + replaceConstant(reshape, dequantization.multiplyConstant); } } @@ -186,7 +135,7 @@ bool ReshapeTransformation::isPrecisionPreserved(std::shared_ptr op) const return true; } -size_t getLastNotBroadcastedChannel(const Shape& shape) { +size_t getLastNotBroadcastedDimension(const Shape& shape) { for (int i = static_cast(shape.size()) - 1; i >= 0; --i) { if (shape[i] != 1ul) { return i; @@ -195,7 +144,7 @@ size_t getLastNotBroadcastedChannel(const Shape& shape) { return 0; } -size_t getFirstChangedChannel(const PartialShape& shape1, const PartialShape& shape2) { +size_t getFirstChangedDimension(const PartialShape& shape1, const PartialShape& shape2) { const size_t minSize = std::min(shape1.rank().get_length(), shape2.rank().get_length()); size_t i = 0; for (; i < minSize; ++i) { @@ -216,11 +165,15 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex return false; } - // TODO: LPT: to support current flow: #58269 - //if (((dequantization.subtractConstant != nullptr) && NetworkHelper::isScalarLike(dequantization.subtractConstant)) || - // ((dequantization.multiplyConstant != nullptr) && NetworkHelper::isScalarLike(dequantization.multiplyConstant))) { - // return true; - //} + if (((dequantization.subtract == nullptr) || NetworkHelper::isScalarLike(dequantization.subtractConstant)) && + ((dequantization.multiply == nullptr) || NetworkHelper::isScalarLike(dequantization.multiplyConstant))) { + return true; + } + + const PartialShape outputPShape = op->get_output_partial_shape(0); + if (outputPShape[1].is_dynamic()) { + return false; + } const Shape subtractShape = dequantization.subtract == nullptr ? Shape{} : dequantization.subtractConstant->get_shape(); Shape subtractShapeWithBatch = subtractShape; @@ -245,26 +198,23 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex multiplyShapeWithBatch.insert(multiplyShapeWithBatch.begin(), 1ul); } - const PartialShape outputPShape = op->get_output_partial_shape(0); - // if we have per-channel dq, dynamic shape, and "-1" reshape value - don't transform - if (outputPShape.is_dynamic() && (shape_size(subtractShape) > 1ul || shape_size(multiplyShape) > 1ul)) { - const auto reshapeConstant = as_type_ptr(op->get_input_node_shared_ptr(1))->cast_vector(); - if (std::any_of(reshapeConstant.cbegin(), reshapeConstant.cend(), [](const int value) { return value == -1; })) { - return false; - } + const size_t outputChannel = static_cast(outputPShape[1].get_length()); + if (!subtractShapeWithBatch.empty() && (outputChannel < subtractShapeWithBatch[1])) { + return false; + } + if (!multiplyShapeWithBatch.empty() && (outputChannel < multiplyShapeWithBatch[1])) { + return false; + } + + if (outputPShape.is_static() && + ((!subtractShapeWithBatch.empty() && ((outputChannel % subtractShapeWithBatch[1]) != 0)) || + (!multiplyShapeWithBatch.empty() && (outputChannel % multiplyShapeWithBatch[1] != 0)))) { + return false; } return canBeTransformed(subtractShapeWithBatch, multiplyShapeWithBatch, inputPShape, outputPShape); } -size_t getChannelVolume(const PartialShape& shape) { - size_t volume = 1ul; - for (int i = 2; i < shape.rank().get_length(); ++i) { - volume = volume * shape[i].get_length(); - } - return volume; -} - bool ReshapeTransformation::canBeTransformed( const ngraph::Shape& subtractShape, const ngraph::Shape& multiplyShape, @@ -277,68 +227,15 @@ bool ReshapeTransformation::canBeTransformed( return false; } - // TODO: story 38439 - if ((inputRank == 4ul) && (outputRank == 2ul)) { - auto checkSpatialDimensions = [](const Shape& dequantizationConstShape) { - for (size_t i = (dequantizationConstShape.size() - 2); i < dequantizationConstShape.size(); ++i) { - if (dequantizationConstShape[i] != 1ul) { - return false; - } - } - return true; - }; + const size_t lastNotBroadcastedDimension = std::max(getLastNotBroadcastedDimension(subtractShape), getLastNotBroadcastedDimension(multiplyShape)); + const size_t firstChangedDimension = getFirstChangedDimension(inputShape, outputShape); + // LPT supports channel on the second dimension natively <= reshape transformation supports more shapes for this case + if ((lastNotBroadcastedDimension == 1ul) && (firstChangedDimension == 1ul)) { + return true; + } - if (((subtractShape.size() >= 3ul) && (!checkSpatialDimensions(subtractShape))) || - ((multiplyShape.size() >= 3ul) && (!checkSpatialDimensions(multiplyShape)))) { - return false; - } - - if (inputRank > 1ul) { - if (inputShape[1].is_dynamic()) { - return false; - } - } else { - if (inputShape[0].is_dynamic()) { - return false; - } - } - - if (outputRank > 1ul) { - if (outputShape[1].is_dynamic()) { - return false; - } - } else { - if (outputShape[0].is_dynamic()) { - return false; - } - } - - // custom validation for Layout::NCHW => Layout::NC - const size_t inputChannelsCount = inputRank > 1ul ? inputShape[1].get_length() : inputShape[0].get_length(); - const size_t outputChannelsCount = outputRank > 1ul ? outputShape[1].get_length() : outputShape[0].get_length(); - for (size_t i = 2; i < inputRank; ++i) { - if (inputShape[i].is_dynamic()) { - return false; - } - } - - if ((inputShape[0] != outputShape[0]) || ((inputChannelsCount * getChannelVolume(inputShape)) != outputChannelsCount)) { - return false; - } - } else { - if (ngraph::shape_size(subtractShape) > 1 || ngraph::shape_size(multiplyShape) > 1) { - for (size_t i = 0; i < 2ul; ++i) { - if (inputShape[i] != outputShape[i]) { - return false; - } - } - } - - const size_t lastNotBroadcastedChannel = std::max(getLastNotBroadcastedChannel(subtractShape), getLastNotBroadcastedChannel(multiplyShape)); - const size_t firstChangedChannel = getFirstChangedChannel(inputShape, outputShape); - if (lastNotBroadcastedChannel >= firstChangedChannel) { - return false; - } + if (lastNotBroadcastedDimension >= firstChangedDimension) { + return false; } return true; diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp index e20fed518e4..cb786a8af36 100644 --- a/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp +++ b/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp @@ -161,8 +161,8 @@ void VariantWrapper::merge( resultSharedValue->preferablePrecisions.insert(sharedValue->preferablePrecisions.begin(), sharedValue->preferablePrecisions.end()); - const auto resultSize = abs(resultSharedValue->minInterval.high - resultSharedValue->minInterval.low); - const auto size = abs(sharedValue->minInterval.high - sharedValue->minInterval.low); + const auto resultSize = std::abs(resultSharedValue->minInterval.high - resultSharedValue->minInterval.low); + const auto size = std::abs(sharedValue->minInterval.high - sharedValue->minInterval.low); if (resultSize > size) { resultSharedValue->minInterval = sharedValue->minInterval; diff --git a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp new file mode 100644 index 00000000000..6041e1f3f7b --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp @@ -0,0 +1,247 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cpu_blocked_memory_desc.h" +#include "mkldnn_memory.h" +#include "utils/cpu_utils.hpp" + +using namespace MKLDNNPlugin; + +BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector& dims) : MemoryDesc(dims, Blocked) , precision(prc) { + order.resize(dims.size()); + std::iota(order.begin(), order.end(), 0); + blockedDims = dims; + offsetPadding = 0; + offsetPaddingToData.resize(dims.size(), 0); + strides.resize(order.size()); + strides[strides.size() - 1] = 1; + for (size_t i = 2; i <= order.size(); i++) { + strides[strides.size() - i] = strides[strides.size() - (i - 1)] * blockedDims[blockedDims.size() - (i - 1)]; + } +} + +BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector& dims, const std::vector& blockedDims, + const std::vector& order, size_t offsetPadding, const std::vector& offsetPaddingToData, + const std::vector& strides) : MemoryDesc(dims, Blocked), precision(prc) { + if (std::any_of(order.begin(), order.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + IE_THROW() << "BlockedMemoryDesc do not support undefined order."; + } + + if (std::any_of(blockedDims.begin() + dims.size(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + IE_THROW() << "BlockedMemoryDesc doesn't support undefined blockedDims."; + } + + this->order = order; + this->blockedDims = blockedDims; + this->offsetPadding = offsetPadding; + + if (offsetPaddingToData.empty() && !order.empty()) { + this->offsetPaddingToData.resize(order.size()); + this->offsetPaddingToData[order.size() - 1] = 0; + for (size_t i = 2; i <= order.size(); i++) { + this->offsetPaddingToData[order.size() - i] = 0; + } + } else { + this->offsetPaddingToData = offsetPaddingToData; + } + + if (strides.empty() && !order.empty()) { + if (std::any_of(this->blockedDims.begin(), this->blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + this->strides.resize(order.size(), Shape::UNDEFINED_DIM); + } else { + this->strides.resize(order.size()); + this->strides[order.size() - 1] = 1; + for (size_t i = 2; i <= order.size(); i++) { + this->strides[order.size() - i] = this->strides[order.size() - (i - 1)] * this->blockedDims[blockedDims.size() - (i - 1)]; + } + } + } else { + this->strides = strides; + } + + if (!everyone_is(this->order.size(), this->blockedDims.size(), this->offsetPaddingToData.size(), this->strides.size())) { + IE_THROW() << "Order, blocked dims, offset padding to data and strides must have equals size"; + } +} + +bool BlockedMemoryDesc::isDefined() const { + bool defined = true; + defined = defined && std::none_of(blockedDims.cbegin(), blockedDims.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; }); + defined = defined && std::none_of(strides.cbegin(), strides.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; }); + defined = defined && std::none_of(order.cbegin(), order.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; }); + defined = defined && std::none_of(offsetPaddingToData.cbegin(), offsetPaddingToData.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; }); + defined = defined && offsetPadding != Shape::UNDEFINED_DIM; + + return defined; +} + +bool BlockedMemoryDesc::isCompatible(const MemoryDesc& rhs) const { + const MemoryDesc* pRhs = &rhs; + if (auto blockingDesc = dynamic_cast(pRhs)) { + return isCompatible(*blockingDesc); + } else if (auto mkldnnDesc = dynamic_cast(pRhs)) { + return mkldnnDesc->isCompatible(*this); + } else { + return false; + } +} + +bool BlockedMemoryDesc::isCompatible(const BlockedMemoryDesc& rhs) const { + if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) + return false; + + if (!dimsEqualWeak(this->getBlockDims(), rhs.getBlockDims())) { + return false; + } + + if (!dimsEqualWeak(this->getOffsetPaddingToData(), rhs.getOffsetPaddingToData())) { + return false; + } + + // this check needed to avoid inserting unnecessary reorders if the memory is used in place and the batch size is equal to 1 + size_t skipAxis = this->getShape().getRank() > 0 && this->getShape().getDims().front() == 1 ? 0 : + Shape::UNDEFINED_DIM; //ignore batch axis if batch size == 1 + if (!dimsEqualWeak(this->getStrides(), rhs.getStrides(), skipAxis)) { + return false; + } + + if (!dimsEqualWeak(this->getOrder(), rhs.getOrder())) { + return false; + } + + return dimsEqualWeak(this->getOffsetPadding(), rhs.getOffsetPadding()); +} + +bool BlockedMemoryDesc::isCompatible(const MKLDNNMemoryDesc& rhs) const { + return rhs.isCompatible(*this); +} + +size_t BlockedMemoryDesc::getMemSizeImp() const { + int64_t e_size = getOffsetPadding() + 1; // size in bytes (from begin of data to last element) + for (int j = 0; j < getBlockDims().size(); j++) + e_size += (getBlockDims()[j] - 1) * getStrides()[j]; + + + e_size *= getPrecision() == InferenceEngine::Precision::BIN ? 1 : getPrecision().size(); + + return e_size; +} + +size_t BlockedMemoryDesc::getOffset(const InferenceEngine::SizeVector& v) const { + InferenceEngine::SizeVector off_v = v; + + size_t n_blocked_dims = order.size(); + if (blockedDims.size() != n_blocked_dims || strides.size() != n_blocked_dims) { + IE_THROW() << "Cannot calculate offset. Incorrect primitive descriptor!"; + } + InferenceEngine::SizeVector blockedShift(n_blocked_dims); + for (size_t i = 1; i <= n_blocked_dims; i++) { + blockedShift[n_blocked_dims - i] = off_v[order[n_blocked_dims - i]] % blockedDims[n_blocked_dims - i]; + off_v[order[n_blocked_dims - i]] /= blockedDims[n_blocked_dims - i]; + } + size_t offset = getOffsetPadding(); + for (size_t d = 0; d < n_blocked_dims; ++d) { + const size_t p = blockedShift[d] + getOffsetPaddingToData()[d]; + offset += p * strides[d]; + } + return offset; +} + +size_t BlockedMemoryDesc::getElementOffset(size_t elemNumber) const { + // TODO [DS]: rewrite to support dynamic shapes + auto& dims = shape.getStaticDims(); + size_t n_dims = dims.size(); + InferenceEngine::SizeVector pos(n_dims); + for (size_t rd = 1; rd <= n_dims; ++rd) { + const size_t d = n_dims - rd; + const size_t cur_dim = dims[d]; + pos[d] = elemNumber % cur_dim; + elemNumber /= cur_dim; + } + return getOffset(pos); +} + +bool BlockedMemoryDesc::hasLayoutType(LayoutType layoutType) const { + switch (layoutType) { + case LayoutType::ncsp: + return isPlainFormat(); + case LayoutType::nspc: + return isTailCFormat(); + case LayoutType::nCsp8c: + return isBlockedCFormat(8); + case LayoutType::nCsp16c: + return isBlockedCFormat(16); + default: + return false; + } +} + +bool BlockedMemoryDesc::isPlainFormat() const { + if (shape.getRank() != order.size()) { + return false; + } + for (size_t i = 0; i < order.size(); ++i) { + if (order[i] != i) { + return false; + } + } + return true; +} + +bool BlockedMemoryDesc::isBlockedCFormat(size_t blk_size) const { + if ((order.size() - shape.getRank()) != 1) { + return false; + } + for (size_t i = 0; i < order.size() - 1; ++i) { + if (order[i] != i) { + return false; + } + } + if (order.back() != 1) { + return false; + } + if (blockedDims.back() != blk_size) { + return false; + } + return true; +} + +bool BlockedMemoryDesc::isTailCFormat() const { + if (shape.getRank() < 3) { + return false; + } + if (shape.getRank() != order.size()) { + return false; + } + if (!std::is_sorted(order.begin(), --order.end())) { + return false; + } + if (order.back() != 1) { + return false; + } + return true; +} + +std::string BlockedMemoryDesc::serializeFormat() const { + std::stringstream result; + char startLetter = 'a'; + std::unordered_map mapAxisBlockSize; + for (size_t i = shape.getRank(); i < order.size(); ++i) { + mapAxisBlockSize.insert({order[i], blockedDims[i]}); + } + + for (size_t i = 0; i < shape.getRank(); ++i) { + char nextLetter = startLetter + order[i]; + if (mapAxisBlockSize.count(i)) { + nextLetter = toupper(nextLetter); + } + result << nextLetter; + } + + for (auto& item : mapAxisBlockSize) { + result << item.second << char(startLetter + item.first); + } + + return result.str(); +} diff --git a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h new file mode 100644 index 00000000000..2c5b8a7d53c --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h @@ -0,0 +1,100 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cpu_memory_desc.h" + +namespace MKLDNNPlugin { + +class MKLDNNMemoryDesc; + +class BlockedMemoryDesc : public MemoryDesc { +public: + BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector& dims); + + BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector& dims, const std::vector& blockedDims, + const std::vector& order, size_t offsetPadding = 0, const std::vector& offsetPaddingToData = {}, + const std::vector& strides = {}); + + MemoryDescPtr clone() const override { + return MKLDNNPlugin::make_unique(*this); + } + + bool isDefined() const override; + + bool isCompatible(const MemoryDesc& rhs) const override; + + bool isCompatible(const BlockedMemoryDesc& rhs) const; + + bool isCompatible(const MKLDNNMemoryDesc& rhs) const; + + InferenceEngine::Precision getPrecision() const override { + return precision; + } + + void setPrecision(InferenceEngine::Precision prc) override { + precision = std::move(prc); + } + + const std::vector& getBlockDims() const { + return blockedDims; + } + + /** + * @brief Returns the vector of order + * + * @return order + */ + const std::vector& getOrder() const { + return order; + } + + /** + * @brief Returns the per-dimension offset vector + * + * @return offsets + */ + const std::vector& getOffsetPaddingToData() const { + return offsetPaddingToData; + } + /** + * @brief Returns the offset to the current memory block + * + * @return offset + */ + size_t getOffsetPadding() const { + return offsetPadding; + } + + /** + * @brief Returns strides for each dimension + * + * @return strides + */ + const std::vector& getStrides() const { + return strides; + } + + bool hasLayoutType(LayoutType layoutType) const override; + + std::string serializeFormat() const override; + +private: + size_t getElementOffset(size_t elemNumber) const override; + size_t getMemSizeImp() const override; + size_t getOffset(const InferenceEngine::SizeVector& v) const; + bool isPlainFormat() const; + bool isBlockedCFormat(size_t blk_size) const; + bool isTailCFormat() const; + +private: + InferenceEngine::Precision precision; + std::vector blockedDims; + std::vector strides; + std::vector order; + std::vector offsetPaddingToData; + size_t offsetPadding; +}; +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h b/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h new file mode 100644 index 00000000000..31d2b4b2091 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h @@ -0,0 +1,110 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "cpu_shape.h" +#include "utils/general_utils.h" + +namespace MKLDNNPlugin { + +enum MemoryDescType { + Blocked, + Mkldnn +}; + +enum class LayoutType : unsigned { + nspc, // general per channels format + ncsp, // general planar + nCsp8c, // general channels blocked by 8 + nCsp16c // general channels blocked by 16 +}; + +class MemoryDesc { +public: + MemoryDescType getType() const { + return type; + } + + const Shape& getShape() const { + return shape; + } + + virtual ~MemoryDesc() = default; + + virtual InferenceEngine::Precision getPrecision() const = 0; + + virtual void setPrecision(InferenceEngine::Precision prc) = 0; + + virtual std::unique_ptr clone() const = 0; + + virtual bool isCompatible(const MemoryDesc& rhs) const = 0; + + // Checks that all dimensions, offsets, strides, etc are defined (!= UNDEFINED_DIM) + virtual bool isDefined() const = 0; + + virtual bool hasLayoutType(LayoutType layoutType) const = 0; + + virtual std::string serializeFormat() const = 0; + + /** + * @brief Get minimal required memory size in bytes. + * @return return minimal required memory size in bytes or UNDEFINED_SIZE in case undefined descriptor + */ + size_t getCurrentSize() const { + size_t retVal = UNDEFINED_SIZE; + if (isDefined()) { + retVal = getMemSizeImp(); + } + return retVal; + } + + template ::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + T* as() { + T* casted = dynamic_cast(this); + if (!casted) + IE_THROW() << "Cannot dynamically cast MemoryDesc"; + return casted; + } + + template ::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + const T* as() const { + const T* casted = dynamic_cast(this); + if (!casted) + IE_THROW() << "Cannot dynamically cast MemoryDesc"; + return casted; + } + + static constexpr size_t UNDEFINED_SIZE = std::numeric_limits::max(); + +protected: + MemoryDesc(const Shape& shape, MemoryDescType type) + : shape(shape), type(type) {} + + MemoryDesc(const std::vector& dims, MemoryDescType type) + : shape(dims), type(type) {} + + virtual size_t getMemSizeImp() const = 0; + + // Get offset to the n'th element. Returns physical index of the element by the logical one considering padding, layout, blocking etc. + virtual size_t getElementOffset(size_t elemNumber) const = 0; + + MemoryDescType type; + Shape shape; + + friend class BlobDumper; + // WA: optimizedNspc2Ncsp used getElementOffset inside implementation + friend class MKLDNNSplitNode; +}; + +using MemoryDescPtr = std::unique_ptr; +using MemoryDescConstPtr = std::unique_ptr; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp new file mode 100644 index 00000000000..cc04db7f26f --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp @@ -0,0 +1,395 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cpu_memory_desc.h" +#include "cpu_memory_desc_utils.h" +#include "mkldnn_memory.h" +#include "utils/general_utils.h" +#include "utils/cpu_utils.hpp" +#include +#include +#include +#include + +using namespace mkldnn; +using namespace MKLDNNPlugin; +using namespace InferenceEngine; + +namespace MKLDNNPlugin { + +/** + * Convert to BlockedDescriptor + * + * mkl: IOhw_4i16o4i dims {32, 64, 128, 128} + * strides // the order of outer dims is encoded here + * inner_blks 4 16 4 + * inner_idxs 1 0 1 + * + * IE tensor desc has more expressive ability. Any oneDNN blocked tensor can be covreted. + * How to convert into IE representation: + * 0. Detect a new_outer_order of outer_dims via descending strides. + * 1. IE strides : concatenate strides in new_outer_order and inner strides. + * 2. IE dims : concatenate outer dims in new_outer_order with auto padding and inner blocks + * 3. IE order : concatenate new_outer_order and inner_idxs + */ +BlockedMemoryDesc MemoryDescUtils::convertToBlockedDescriptor(const MKLDNNMemoryDesc& inpDesc) { + mkldnn::memory::desc desc = inpDesc; + const auto dims = desc.dims(); + + if (desc.data.format_kind != dnnl_blocked) + IE_THROW() << "Conversion is not possible"; + + const auto &blk_desc = desc.data.format_desc.blocking; + + const size_t outer_ndims = dims.size(); + const size_t inner_ndims = blk_desc.inner_nblks; + const size_t total_ndims = outer_ndims + inner_ndims; + + // strides of inner dims. In case of 4i16o4i will be {64, 4, 1} + std::vector inner_strides(inner_ndims, 1); + for (size_t i = 1; i < blk_desc.inner_nblks; i++) { + inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i]; + } + + // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1} + std::vector total_block_per_dim(outer_ndims, 1); + for (int i = 0; i < inner_ndims; i++) { + total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; + } + std::vector outer_block_dims(std::begin(dims), std::begin(dims) + outer_ndims); + for (size_t i = 0; i < outer_block_dims.size(); i++) { + outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); + } + + // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} + std::vector outer_order(outer_ndims); + std::iota(outer_order.begin(), outer_order.end(), 0); + std::sort(outer_order.begin(), outer_order.end(), + [&blk_desc, &outer_block_dims] (size_t ind_l, size_t ind_r) { + return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) || + (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); + }); + + // IE blocked order + // [new_outer_order] U [inner_idxs] + SizeVector ie_blk_order(total_ndims, 0); + std::copy(outer_order.begin(), outer_order.end(), ie_blk_order.begin()); + std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, ie_blk_order.begin() + dims.size()); + + // IE blocked strides + // [outer_strides via new_outer_order] U [inner_strides] + SizeVector ie_blk_strides(total_ndims, 0); + std::copy(inner_strides.rbegin(), inner_strides.rend(), ie_blk_strides.rbegin()); + std::transform(outer_order.begin(), outer_order.end(), ie_blk_strides.begin(), + [&] (size_t i) { return blk_desc.strides[i]; }); + + // IE blocked dims + // [dims via new_outer_order with auto pad] U [inner_blk_dims] + SizeVector ie_blk_dims(total_ndims, 0); + std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks, + ie_blk_dims.end() - blk_desc.inner_nblks); + std::transform(outer_order.begin(), outer_order.end(), ie_blk_dims.begin(), + [&] (size_t i) { return outer_block_dims[i]; }); + + // IE offset padded to data. Same as for oneDNN + SizeVector ie_blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims}; + size_t ie_blk_offset0 = desc.data.offset0; + + // TODO: The tensor desc implementation allow to specify offset_to_data for inner blocked dims. + // Which is not obvious behavior. It required offset_to_data.size == total_ndims, so will + // fill it with zero. + ie_blk_offset_to_data.insert(ie_blk_offset_to_data.end(), inner_ndims, 0); + + BlockedMemoryDesc res(MKLDNNMemory::convertToIePrec(desc.data_type()), SizeVector {begin(dims), end(dims)}, ie_blk_dims, + ie_blk_order, ie_blk_offset0, ie_blk_offset_to_data, ie_blk_strides); + return res; +} + + +InferenceEngine::TensorDesc MemoryDescUtils::convertToTensorDesc(const MemoryDesc& desc) { + if (auto blockingDesc = dynamic_cast(&desc)) { + return InferenceEngine::TensorDesc(blockingDesc->getPrecision(), blockingDesc->getShape().getStaticDims(), + {blockingDesc->getBlockDims(), blockingDesc->getOrder(), blockingDesc->getOffsetPadding(), + blockingDesc->getOffsetPaddingToData(), blockingDesc->getStrides()}); + } else if (auto mkldnnDesc = dynamic_cast(&desc)) { + auto blockingDesc = convertToBlockedDescriptor(*mkldnnDesc); + return InferenceEngine::TensorDesc(blockingDesc.getPrecision(), blockingDesc.getShape().getStaticDims(), + {blockingDesc.getBlockDims(), blockingDesc.getOrder(), blockingDesc.getOffsetPadding(), + blockingDesc.getOffsetPaddingToData(), blockingDesc.getStrides()}); + } + + IE_THROW() << "Cannot convert MemoryDesc to InferenceEngine::TensorDesc"; + + return InferenceEngine::TensorDesc(); +} + +MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const MemoryDesc& desc) { + if (MemoryDescType::Blocked == desc.getType()) { + return convertToMKLDNNMemoryDesc(*(desc.as())); + } else if (MemoryDescType::Mkldnn == desc.getType()) { + return *(desc.as()); + } else { + IE_THROW() << "Cannot convert MemoryDesc to MKLDNNMemoryDesc"; + } +} + +MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const BlockedMemoryDesc& desc) { + dnnl_memory_desc_t mkldnnDesc; + + // scalar case + if (desc.getShape().getRank() == 0) { + mkldnn::memory::desc convertedDesc; + convertedDesc.data.format_kind = dnnl_blocked; + convertedDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(desc.getPrecision())); + convertedDesc.data.ndims = 1; + convertedDesc.data.dims[0] = 1; + convertedDesc.data.padded_dims[0] = 1; + convertedDesc.data.format_desc.blocking.strides[0] = 1; + convertedDesc.data.padded_offsets[0] = 0; + convertedDesc.data.offset0 = desc.getOffsetPadding(); + return MKLDNNMemoryDesc(convertedDesc); + } + + auto dims = desc.getShape().getStaticDims(); + + auto ie_blkdDims = desc.getBlockDims(); + auto ie_order = desc.getOrder(); + auto ie_offsetsToData = desc.getOffsetPaddingToData(); + auto ie_strides = desc.getStrides(); + + size_t outer_ndims = dims.size(); + size_t inner_ndims = ie_order.size() - dims.size(); + + bool is_descending_strides = true; + for (int i = 1; i < ie_strides.size(); i++) { + is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]); + } + + // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims + // and may be we can achieve correct "descending strides" form which allow conversion. + if (!is_descending_strides) + IE_THROW() << "Unsupported case for conversion"; + + std::vector outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension + for (size_t i = 0; i < outer_ndims; i++) { + outer_order[ie_order[i]] = i; + } + bool outer_is_correct_permutation_of_n = + std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end(); + + if (!outer_is_correct_permutation_of_n) + IE_THROW() << "Unsupported case for conversion"; + + bool inner_block_are_dense = one_of(ie_strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted + for (int i = outer_ndims; i < ie_strides.size() - 1; i++) { + inner_block_are_dense &= (ie_strides[i] == ie_strides[i+1] * ie_blkdDims[i+1]); + } + + if (!inner_block_are_dense) + IE_THROW() << "Unsupported case for conversion"; + + bool inner_pad_offsets_is_zero = std::all_of(ie_offsetsToData.begin() + outer_ndims, ie_offsetsToData.end(), + [](size_t pad) { return pad == 0; }); + + if (!inner_pad_offsets_is_zero) + IE_THROW() << "Unsupported case for conversion"; + + // Fill general memory desc fields + mkldnnDesc.format_kind = dnnl_blocked; + mkldnnDesc.extra.flags = 0; + mkldnnDesc.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(desc.getPrecision())); + mkldnnDesc.ndims = dims.size(); + mkldnnDesc.offset0 = desc.getOffsetPadding(); + std::copy(dims.begin(), dims.end(), mkldnnDesc.dims); + std::copy(ie_offsetsToData.begin(), ie_offsetsToData.begin() + outer_ndims, mkldnnDesc.padded_offsets); + std::fill(mkldnnDesc.padded_dims, mkldnnDesc.padded_dims + outer_ndims, 1); + for (size_t i = 0; i < ie_order.size(); i++) { + auto idx = ie_order[i]; + mkldnnDesc.padded_dims[idx] *= ie_blkdDims[i]; + } + + // Fill blocking desc + auto &dnn_blk_desc = mkldnnDesc.format_desc.blocking; + dnn_blk_desc.inner_nblks = inner_ndims; + std::copy(ie_blkdDims.end() - inner_ndims, ie_blkdDims.end(), dnn_blk_desc.inner_blks); + std::copy(ie_order.end() - inner_ndims, ie_order.end(), dnn_blk_desc.inner_idxs); + for (size_t i = 0; i < outer_ndims; i++) { + dnn_blk_desc.strides[i] = ie_strides[outer_order[i]]; + } + + return MKLDNNMemoryDesc(mkldnnDesc); +} + + +/** + * Construct from IE::TensorDesc + * @param tDesc + * + * IE IOhw_4i16o4i dims(N) = {32, 64, 128, 128} + * blockedDims {4, 2, 128, 128, 4, 16, 4} // total dims(inner, outermost, auto blocked/padded). Generally sorted by strides. + * strides {8388608, 4194304, 32768, 256, 64, 4, 1} // strides for blockedDims, growing sequence + * order {1, 0, 2, 3, 1, 0, 1} // matching to original dims + * + * All vectors blockedDims/strides/order have same size equals total num of internal blocked dims(inner_dims + outer_dims) + * + * Tensor descriptor filing is not deterministic. It allows any permutation of index which keeps order of + * real dims spliting. + * for {1, 0, 2, 3, 1, 0, 1} we can swap elements [1] <=> [4] + * but not [0]<=>[4] because it breacke spliting original dims into internal blocked dims + * Normalization of representation: Make strides growing but keep layout same as original. Not all + * layout allow us to meet normalize form of tensor desc. + * + * Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N] + */ +MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const InferenceEngine::TensorDesc& tDesc) { + mkldnn::memory::desc mkldnnDesc({}, mkldnn::memory::data_type::undef, mkldnn::memory::format_tag::undef); + auto dims = tDesc.getDims(); + + // TODO: implicit conversion of dims is no good... + if (tDesc.getLayout() == Layout::SCALAR) { + mkldnnDesc.data.format_kind = dnnl_blocked; + mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); + mkldnnDesc.data.ndims = 1; + mkldnnDesc.data.dims[0] = 1; + mkldnnDesc.data.padded_dims[0] = 1; + mkldnnDesc.data.format_desc.blocking.strides[0] = 1; + mkldnnDesc.data.padded_offsets[0] = 0; + mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); + return MKLDNNMemoryDesc(mkldnnDesc); + } + + if (tDesc.getLayout() == Layout::ANY) { + mkldnnDesc.data.format_kind = dnnl_format_kind_any; + mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); + mkldnnDesc.data.ndims = dims.size(); + std::copy(dims.begin(), dims.end(), mkldnnDesc.data.dims); + std::copy(dims.begin(), dims.end(), mkldnnDesc.data.padded_dims); + mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); + std::fill(mkldnnDesc.data.padded_offsets, mkldnnDesc.data.padded_offsets + dims.size(), 0); + return MKLDNNMemoryDesc(mkldnnDesc); + } + + auto ie_blkdDims = tDesc.getBlockingDesc().getBlockDims(); + auto ie_order = tDesc.getBlockingDesc().getOrder(); + auto ie_offsetsToData = tDesc.getBlockingDesc().getOffsetPaddingToData(); + auto ie_strides = tDesc.getBlockingDesc().getStrides(); + + size_t outer_ndims = dims.size(); + size_t inner_ndims = ie_order.size() - dims.size(); + + bool is_descending_strides = true; + for (int i = 1; i < ie_strides.size(); i++) { + is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]); + } + + // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims + // and may be we can achieve correct "descending strides" form which allow conversion. + if (!is_descending_strides) + IE_THROW() << "Unsupported case for conversion"; + + std::vector outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension + for (size_t i = 0; i < outer_ndims; i++) { + outer_order[ie_order[i]] = i; + } + bool outer_is_correct_permutation_of_n = + std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end(); + + if (!outer_is_correct_permutation_of_n) + IE_THROW() << "Unsupported case for conversion"; + + bool inner_block_are_dense = one_of(ie_strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted + for (int i = outer_ndims; i < ie_strides.size() - 1; i++) { + inner_block_are_dense &= (ie_strides[i] == ie_strides[i+1] * ie_blkdDims[i+1]); + } + + if (!inner_block_are_dense) + IE_THROW() << "Unsupported case for conversion"; + + bool inner_pad_offsets_is_zero = std::all_of(ie_offsetsToData.begin() + outer_ndims, ie_offsetsToData.end(), + [](size_t pad) { return pad == 0; }); + + if (!inner_pad_offsets_is_zero) + IE_THROW() << "Unsupported case for conversion"; + + // Fill general memory desc fields + mkldnnDesc.data.format_kind = dnnl_blocked; + mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); + mkldnnDesc.data.ndims = dims.size(); + mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); + std::copy(dims.begin(), dims.end(), mkldnnDesc.data.dims); + std::copy(ie_offsetsToData.begin(), ie_offsetsToData.begin() + outer_ndims, mkldnnDesc.data.padded_offsets); + std::fill(mkldnnDesc.data.padded_dims, mkldnnDesc.data.padded_dims + outer_ndims, 1); + for (size_t i = 0; i < ie_order.size(); i++) { + auto idx = ie_order[i]; + mkldnnDesc.data.padded_dims[idx] *= ie_blkdDims[i]; + } + + // Fill blocking desc + auto &dnn_blk_desc = mkldnnDesc.data.format_desc.blocking; + dnn_blk_desc.inner_nblks = inner_ndims; + std::copy(ie_blkdDims.end() - inner_ndims, ie_blkdDims.end(), dnn_blk_desc.inner_blks); + std::copy(ie_order.end() - inner_ndims, ie_order.end(), dnn_blk_desc.inner_idxs); + for (size_t i = 0; i < outer_ndims; i++) { + dnn_blk_desc.strides[i] = ie_strides[outer_order[i]]; + } + + return MKLDNNMemoryDesc(mkldnnDesc); +} + +BlockedMemoryDesc MemoryDescUtils::convertToBlockedDescriptor(const MemoryDesc &desc) { + if (desc.getType() == MemoryDescType::Blocked) { + return *(desc.as()); + } else if (desc.getType() == MemoryDescType::Mkldnn) { + return MemoryDescUtils::convertToBlockedDescriptor(*(desc.as())); + } else { + IE_THROW() << "Cannot convert to blocked memory descriptor. Unsupported memory desc type"; + } +} + +MemoryDescPtr MemoryDescUtils::applyUndefinedOffset(const MKLDNNMemoryDesc& desc) { + if (desc.getFormatKind() != dnnl_format_kind_t::dnnl_blocked) + IE_THROW() << "applyUndefinedOffset doesn't support not dnnl_blocked MKLDNNMemoryDesc"; + + mkldnn::memory::desc retDesc = desc; + retDesc.data.offset0 = Shape::UNDEFINED_DIM; + return MKLDNNPlugin::make_unique(retDesc); +} + +MemoryDescPtr MemoryDescUtils::applyUndefinedOffset(const BlockedMemoryDesc &desc) { + std::vector strides; + std::vector offsetPaddingToData; + + strides.resize(desc.getBlockDims().size(), Shape::UNDEFINED_DIM); + offsetPaddingToData.resize(desc.getBlockDims().size(), 0); + size_t offsetPadding = Shape::UNDEFINED_DIM; + + return MKLDNNPlugin::make_unique(desc.getPrecision(), desc.getShape().getDims(), desc.getBlockDims(), + desc.getOrder(), offsetPadding, offsetPaddingToData, strides); +} + +MemoryDescPtr MemoryDescUtils::resetOffset(const MemoryDesc* desc) { + if (MemoryDescType::Blocked == desc->getType()) { + auto blockedDesc = desc->as(); + return MKLDNNPlugin::make_unique(blockedDesc->getPrecision(), blockedDesc->getShape().getDims(), + blockedDesc->getBlockDims(), blockedDesc->getOrder()); + } else if (MemoryDescType::Mkldnn == desc->getType()) { + auto mkldnnDesc = desc->as(); + mkldnn::memory::desc retDesc = *mkldnnDesc; + retDesc.data.offset0 = 0; + return MKLDNNPlugin::make_unique(retDesc); + } else { + IE_THROW() << "resetOffset support Blocked and Mkldnn descpriptors only"; + } +} + +InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &mem) { + // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor + auto& memDesc = mem.GetDesc(); + InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); + + desc = InferenceEngine::TensorDesc(desc.getPrecision(), memDesc.getShape().getStaticDims(), desc.getBlockingDesc()); + return MKLDNNPlugin::isEmptyTensorDesc(desc) ? make_blob_with_precision(desc) : make_blob_with_precision(desc, mem.GetData()); +} + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h new file mode 100644 index 00000000000..5cc6b0fc103 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h @@ -0,0 +1,88 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace MKLDNNPlugin { +class MKLDNNMemoryDesc; +class BlockedMemoryDesc; +class MKLDNNMemory; + +class MemoryDescUtils { +public: + /** + * @brief Converts MemoryDesc to InferenceEngine::TensorDesc + * @param desc MemoryDesc to be converted + * @return converted InferenceEngine::TensorDesc + */ + static InferenceEngine::TensorDesc convertToTensorDesc(const MemoryDesc& desc); + + /** + * @brief Converts MemoryDesc to MKLDNNMemoryDesc + * @param desc MemoryDesc to be converted + * @return converted MKLDNNMemoryDesc + */ + static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const MemoryDesc& desc); + + /** + * @brief Converts BlockedMemoryDesc to MKLDNNMemoryDesc + * @param desc BlockedMemoryDesc to be converted + * @return converted MKLDNNMemoryDesc + */ + static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const BlockedMemoryDesc& desc); + + /** + * @brief Converts InferenceEngine::TensorDesc to MKLDNNMemoryDesc + * @param desc InferenceEngine::TensorDesc to be converted + * @return converted MKLDNNMemoryDesc + */ + static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const InferenceEngine::TensorDesc& desc); + + /** + * @brief Converts MemoryDesc to BlockedMemoryDesc + * @param desc MemoryDesc to be converted + * @return converted BlockedMemoryDesc + */ + static BlockedMemoryDesc convertToBlockedDescriptor(const MemoryDesc& desc); + + /** + * @brief Converts MKLDNNMemoryDesc to BlockedMemoryDesc + * @param desc MKLDNNMemoryDesc to be converted + * @return converted BlockedMemoryDesc + */ + static BlockedMemoryDesc convertToBlockedDescriptor(const MKLDNNMemoryDesc& inpDesc); + + /** + * @brief Creates MKLDNNMemoryDesc with offset0 of UNDEFINED_DIM size + * @param desc modifiable MKLDNNMemoryDesc + * @return pointer to MKLDNNMemoryDesc + */ + static MemoryDescPtr applyUndefinedOffset(const MKLDNNMemoryDesc& desc); + + /** + * @brief Creates BlockedMemoryDesc with offsetPadding, strides of UNDEFINED_DIM size and offsetPaddingToData of 0 size + * @param desc modifiable BlockedMemoryDesc + * @return pointer to BlockedMemoryDesc + */ + static MemoryDescPtr applyUndefinedOffset(const BlockedMemoryDesc& desc); + + /** + * @brief Creates MemoryDesc with offsetPadding of 0 size + * @param desc modifiable MemoryDesc + * @return pointer to MemoryDesc + */ + static MemoryDescPtr resetOffset(const MemoryDesc* desc); + + /** + * @brief Creates InferenceEngine::Blob from MKLDNNMemory + * @param desc MKLDNNMemory from which will be created InferenceEngine::Blob + * @return pointer to InferenceEngine::Blob + */ + static InferenceEngine::Blob::Ptr interpretAsBlob(const MKLDNNMemory& mem); +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_shape.h b/inference-engine/src/mkldnn_plugin/cpu_shape.h new file mode 100644 index 00000000000..fd063c2dc18 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/cpu_shape.h @@ -0,0 +1,159 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "perf_count.h" +#include +#include +#include +#include +#include "mkldnn_dims.h" + +namespace MKLDNNPlugin { + +class Shape { +public: + Shape() = default; + + explicit Shape(const ngraph::PartialShape& shape) { + minDims = shape.get_min_shape(); + maxDims = shape.get_max_shape(); + type = shape.is_static() ? ShapeType::Static : ShapeType::Dynamic; + + initDims(); + } + + explicit Shape(const InferenceEngine::SizeVector& shape) { + minDims = shape; + maxDims = shape; + type = ShapeType::Static; + + initDims(); + } + + /** + * @brief + * for static shape + * maxDims = [2, 3, 4, 5] + * minDims = [2, 3, 4, 5] + * dims = [2, 3, 4, 5] + * @return return lower bound of shape = [2, 3, 4, 5] + * for dynamic shape + * maxDims = [6, 6, 6, 6] + * minDims = [1, 1, 1, 1] + * dims = [UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM] + * @return return lower bound of shape = [1, 1, 1, 1] + */ + const std::vector& getMinDims() const { + return minDims; + } + + /** + * @brief + * for static shape + * maxDims = [2, 3, 4, 5] + * minDims = [2, 3, 4, 5] + * dims = [2, 3, 4, 5] + * @return return upper bound of shape = [2, 3, 4, 5] + * for dynamic shape + * maxDims = [6, 6, 6, 6] + * minDims = [1, 1, 1, 1] + * dims = [UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM] + * @return return upper bound of shape = [6, 6, 6, 6] + */ + const std::vector& getMaxDims() const { + return maxDims; + } + + /** + * @brief return defined shape or throw exception for dynamic case + * @return return shape + */ + const std::vector& getStaticDims() const { + if (type != ShapeType::Static) { + IE_THROW() << "Cannot get dims for non static shape"; + } + + return minDims; + } + + /** + * @brief + * for static shape + * maxDims = [2, 3, 4, 5] + * minDims = [2, 3, 4, 5] + * dims = [2, 3, 4, 5] + * @return return defined shape = [2, 3, 4, 5] + * for dynamic shape + * maxDims = [2, 3, 6, 6] + * minDims = [2, 3, 1, 1] + * dims = [2, 3, UNDEFINED_DIM, UNDEFINED_DIM] + * @return return shape with defined and undefined dims = [2, 3, UNDEFINED_DIM, UNDEFINED_DIM] + */ + const std::vector& getDims() const { + return dims; + } + bool isStatic() const { + return type == ShapeType::Static; + } + + size_t getRank() const { + return minDims.size(); + } + + size_t getElementsCount() const { + if (type != ShapeType::Static) { + IE_THROW() << "Cannot get elements count for non static shape"; + } + + size_t size = 1; + + for (int i = 0; i < minDims.size(); i++) { + size *= minDims[i]; + } + + return size; + } + + ngraph::PartialShape toPartialShape() const { + std::vector nGraphDims; + nGraphDims.reserve(minDims.size()); + for (int i = 0; i < minDims.size(); i++) { + nGraphDims.emplace_back(minDims[i], maxDims[i]); + } + return ngraph::PartialShape(nGraphDims); + } + + bool operator == (const Shape& rhs) const { + return minDims == rhs.minDims && maxDims == rhs.maxDims; + } + + bool operator != (const Shape& rhs) const { + return !(*this == rhs); + } + + enum : size_t { + UNDEFINED_DIM = 0xffffffffffffffff + }; + +private: + void initDims() { + dims.resize(minDims.size()); + for (int i = 0; i < minDims.size(); i++) { + dims[i] = minDims[i] == maxDims[i] ? minDims[i] : UNDEFINED_DIM; + } + } + + enum class ShapeType { + Static, + Dynamic + } type {ShapeType::Static}; + + std::vector minDims; + std::vector maxDims; + std::vector dims; +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_types.h b/inference-engine/src/mkldnn_plugin/cpu_types.h index e5bc8af0b5c..7c820c4db50 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_types.h +++ b/inference-engine/src/mkldnn_plugin/cpu_types.h @@ -16,6 +16,7 @@ enum Type { Deconvolution, Lrn, Pooling, + AdaptivePooling, FullyConnected, Softmax, Split, @@ -85,7 +86,9 @@ enum Type { ExperimentalDetectronPriorGridGenerator, ExperimentalDetectronGenerateProposalsSingleImage, ExtractImagePatches, - NonMaxSuppression + NonMaxSuppression, + MatrixNms, + MulticlassNms }; enum Algorithm { @@ -95,6 +98,10 @@ enum Algorithm { PoolingMax, PoolingAvg, + // Adaptive pooling algorithms + AdaptivePoolingMax, + AdaptivePoolingAvg, + // Convolution algorithms ConvolutionCommon, ConvolutionGrouped, diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp index 1415dc1ae95..34261b1ac87 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp @@ -6,7 +6,6 @@ #include "mkldnn_node.h" #include "mkldnn_extension_utils.h" #include -#include "utils/cpu_utils.hpp" using namespace mkldnn; namespace MKLDNNPlugin { @@ -29,7 +28,7 @@ const MKLDNNNodePtr MKLDNNEdge::getChild() const { } bool MKLDNNEdge::isUseExternalMemory() const { - return externalMemoryPtr; + return useExternalMemory; } bool MKLDNNEdge::isDropped() const { @@ -77,7 +76,7 @@ bool MKLDNNEdge::needReorder() { int inNumber = getInputNum(); bool in_place = inPlace(); bool childCanChangeMem = childSPD->getConfig().outConfs.empty(); - for (const auto conf : childSPD->getConfig().outConfs) { + for (const auto& conf : childSPD->getConfig().outConfs) { if (conf.inPlace == outNumber && outNumber >= 0) childCanChangeMem = true; } @@ -89,7 +88,7 @@ bool MKLDNNEdge::needReorder() { int outNumber = edge->getOutputNum(); if (childSPD->getConfig().outConfs.empty()) count++; - for (const auto conf : childSPD->getConfig().outConfs) { + for (const auto& conf : childSPD->getConfig().outConfs) { if (conf.inPlace == outNumber) count++; } @@ -114,7 +113,7 @@ bool MKLDNNEdge::needReorder() { outNumber >= 0 && outNumber < childSPD->getConfig().inConfs.size() && childSPD->getConfig().inConfs[outNumber].inPlace >= 0) canBeInPlaceConflicts = true; } - return canBeInPlaceConflicts || !MKLDNNExtensionUtils::initTensorsAreEqual(getInputDesc(), getOutputDesc()); + return canBeInPlaceConflicts || !getInputDesc().isCompatible(getOutputDesc()); } void MKLDNNEdge::reuse(MKLDNNMemoryPtr ptr) { @@ -124,35 +123,6 @@ void MKLDNNEdge::reuse(MKLDNNMemoryPtr ptr) { status = Status::Allocated; } -const InferenceEngine::TensorDesc& MKLDNNEdge::getInputDescRO() const { - return inputDesc; -} - -InferenceEngine::TensorDesc MKLDNNEdge::getInputDesc() { - if (inputDesc.getLayout() == InferenceEngine::Layout::ANY) { - inputDesc = getSpecifiedInputDesc({}); - } - return inputDesc; -} - -const InferenceEngine::TensorDesc& MKLDNNEdge::getOutputDescRO() const { - return outputDesc; -} - -InferenceEngine::TensorDesc MKLDNNEdge::getOutputDesc() { - if (outputDesc.getLayout() == InferenceEngine::Layout::ANY) { - outputDesc = getSpecifiedOutputDesc({}); - } - return outputDesc; -} - -InferenceEngine::TensorDesc MKLDNNEdge::getDesc() { - if (!MKLDNNExtensionUtils::initTensorsAreEqual(getInputDesc(), getOutputDesc())) - IE_THROW() << "Cannot get descriptor for edge: " << getParent()->getName() << "->" - << getChild()->getName(); - return getInputDesc(); -} - int MKLDNNEdge::getInputNum() const { return parent_port; } @@ -168,45 +138,29 @@ void MKLDNNEdge::allocate(const void* mem_ptr) { if (memoryPtr) IE_THROW() << "Unexpected behaviour: status == NeedAllocation but memory is already allocated."; - auto inputDesc = getInputDesc(); - auto outputDesc = getOutputDesc(); - if (!MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, inputDesc) || - (inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 && - (inputDesc.getPrecision() != outputDesc.getPrecision() || - inputDesc.getBlockingDesc() != outputDesc.getBlockingDesc()))) - IE_THROW() << "Cannot allocate memory. Nodes have primitive descriptors with different formats."; - if (inputDesc.getLayout() == InferenceEngine::Layout::ANY) - IE_THROW() << "Cannot get input descriptor!"; + auto& inputDesc = getInputDesc(); + auto& outputDesc = getOutputDesc(); + if (!inputDesc.isDefined() || !outputDesc.isDefined()) + IE_THROW() << "Cannot allocate memory for undefined descriptors."; + if (!inputDesc.isCompatible(outputDesc)) + IE_THROW() << "Cannot allocate memory for incompatible descriptors."; auto parentPtr = getParent(); memoryPtr.reset(new MKLDNNMemory(parentPtr->getEngine())); - memoryPtr->Create(MKLDNNMemoryDesc(inputDesc), mem_ptr, false); // no pads zeroing + + memoryPtr->Create(inputDesc, mem_ptr, false); // no pads zeroing status = Status::Allocated; } -std::string MKLDNNEdge::name() { - auto tensorDescToStr = [](InferenceEngine::TensorDesc const & desc) { - std::string name = desc.getPrecision().name(); - - auto blockingDesc = desc.getBlockingDesc(); - auto dims = blockingDesc.getBlockDims(); - - if (!dims.empty()) { - name += "["; - for (size_t i = 1; i < dims.size(); ++i) { - name += std::to_string(dims[i - 1]) + ","; - } - name += std::to_string(dims.back()) + "]"; - } - - return name; - }; - +std::string MKLDNNEdge::name() const { auto parentPtr = getParent(); auto childPtr = getChild(); - return parentPtr->getName() + std::to_string(parent_port) + tensorDescToStr(getInputDesc()) - + "<->" + childPtr->getName() + std::to_string(child_port); + std::stringstream result; + + result << parentPtr->getName() << " port " << parent_port << " <-> " << childPtr->getName() << " port " << child_port; + + return result.str(); } void MKLDNNEdge::externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache) { @@ -221,7 +175,7 @@ void MKLDNNEdge::externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache) { auto ptr = weightsCache->findOrCreate(name(), alloc, false); memoryPtr = *ptr; - externalMemoryPtr = true; + useExternalMemory = true; status = Status::Allocated; } else { allocate(); @@ -242,10 +196,13 @@ void MKLDNNEdge::changeStatus(MKLDNNEdge::Status state) { status = state; } -const MKLDNNDims& MKLDNNEdge::getDims() { - if (!dims.ndims()) { - MKLDNNDims outDims; - MKLDNNDims inDims; +// TODO [DS]: remove while DynamicShapes migration +// TODO [DS]: How should we validate shape compatibility? +// TODO [DS]: Why do we allow uninitialized shape? +const Shape& MKLDNNEdge::getShape() { + if (!shape.getRank()) { + Shape inShape; + Shape outShape; auto childPtr = getChild(); auto parentPtr = getParent(); @@ -254,8 +211,8 @@ const MKLDNNDims& MKLDNNEdge::getDims() { IE_THROW() << "Error cannot find input data for " << child.lock()->getName() << " from " << parent.lock()->getName(); } - if (inNum < childPtr->inDims.size()) { - outDims = childPtr->inDims[inNum]; + if (inNum < childPtr->inputShapes.size()) { + outShape = childPtr->inputShapes[inNum]; } int outNum = getInputNum(); @@ -263,84 +220,34 @@ const MKLDNNDims& MKLDNNEdge::getDims() { IE_THROW() << "Error cannot find output data for " << parent.lock()->getName() << " to " << child.lock()->getName(); } - if (outNum >= parentPtr->outDims.size()) + if (outNum >= parentPtr->outputShapes.size()) outNum = 0; - if (outNum < parentPtr->outDims.size()) { - inDims = parentPtr->outDims[outNum]; + if (outNum < parentPtr->outputShapes.size()) { + inShape = parentPtr->outputShapes[outNum]; } - if (inDims.ndims() && outDims.ndims() && inDims.ndims() != outDims.ndims() && inDims.size() != outDims.size()) + if (inShape.getRank() && outShape.getRank() && inShape.getRank() != outShape.getRank() && inShape.getElementsCount() != outShape.getElementsCount()) IE_THROW() << "Nodes " << getParent()->getName() << " and " << getChild()->getName() << " have incompatible dimensions!"; - if (outDims.ndims() != 0) { - dims = outDims; - } else if (inDims.ndims() != 0) { - dims = inDims; + if (outShape.getRank() != 0) { + shape = outShape; + } else if (inShape.getRank() != 0) { + shape = inShape; } else { - dims = MKLDNNDims({(size_t)1}); + shape = Shape(InferenceEngine::SizeVector({1})); } - if (!(outDims.ndims() == 0 && inDims.ndims() == 0) && !dims.ndims()) + if (!(outShape.getRank() == 0 && inShape.getRank() == 0) && !shape.getRank()) IE_THROW() << "Cannot detect right dims for nodes " << getParent()->getName() << " and " << getChild()->getName(); } - return dims; + + return shape; } -bool MKLDNNEdge::nodeCanChangeDesc(const MKLDNNNodePtr &node) const { - PrimitiveDescInfo * selectedPd = node->getSelectedPrimitiveDescriptor(); - if (selectedPd == nullptr) - IE_THROW() << "Primitive descriptor for node " << node->getName() << " is not selected."; - - for (auto &inputDesc : selectedPd->getConfig().inConfs) { - if (inputDesc.desc.getLayout() != InferenceEngine::Layout::ANY) { - return true; - } - } - - for (auto &outDesc : selectedPd->getConfig().outConfs) { - if (outDesc.desc.getLayout() != InferenceEngine::Layout::ANY) { - return true; - } - } - - MKLDNNDims inputDims; - for (size_t i = 0; i < node->getParentEdges().size(); i++) { - if (inputDims.size() == 1 && inputDims.ndims() == 0) { - inputDims = node->getParentEdgeAt(i)->getDims(); - continue; - } - - if (inputDims.ndims() != node->getParentEdgeAt(i)->getDims().ndims()) { - return true; - } - } - for (size_t i = 0; i < node->getChildEdges().size(); i++) { - if (inputDims.size() == 1 && inputDims.ndims() == 0) { - inputDims = node->getChildEdgeAt(i)->getDims(); - continue; - } - - if (inputDims.ndims() != node->getChildEdgeAt(i)->getDims().ndims()) { - return true; - } - } - - return false; -} - -/// In we have {any, any, any} -> {any} or {any} -> {any, any, any} or {any} -> {any} it means that -/// layer doesn't change memory format -/// We don't support {any, any, nchw} -> {any} -InferenceEngine::TensorDesc MKLDNNEdge::getSpecifiedInputDesc(std::map formats, size_t enterCountUp, size_t enterCountDown) { - InferenceEngine::TensorDesc inDesc; - - if (inputDesc.getLayout() != InferenceEngine::Layout::ANY) { - return inputDesc; - } - +const MemoryDesc& MKLDNNEdge::getInputDesc() const { auto parentPtr = getParent(); if (parentPtr->getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Primitive descriptor for node " << parentPtr->getName() << " is not selected."; @@ -349,248 +256,48 @@ InferenceEngine::TensorDesc MKLDNNEdge::getSpecifiedInputDesc(std::mapgetName() << "."; - if (inputIdx >= parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size()) + auto& outConfs = parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs; + if (outConfs.empty()) + IE_THROW() << "Node " << parentPtr->getName() << " has empty output config list."; + + if (inputIdx >= outConfs.size()) inputIdx = 0; - inDesc = parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc; - if (inDesc.getLayout() != InferenceEngine::Layout::ANY) { - return inDesc; - } - - bool isFormatChanging = nodeCanChangeDesc(parentPtr); - - if (!isFormatChanging && inputIdx < parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size() && - parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIdx].desc.getLayout() != InferenceEngine::Layout::ANY) { - inDesc = parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIdx].desc; - parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc = inDesc; - return inDesc; - } - - for (size_t i = 0; i < parentPtr->getChildEdges().size(); i++) { - auto childEdge = parentPtr->getChildEdgeAt(i); - auto child = childEdge->getChild(); - int childIdx = childEdge->getOutputNum(); - if (!child->getSelectedPrimitiveDescriptor() || childIdx < 0 || - childEdge->getDims().ndims() != getDims().ndims()) { - continue; - } - if (child->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size() <= childIdx) - childIdx = 0; - memory::format_tag childInDesc = MKLDNNMemoryDesc(child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[childIdx].desc).getFormat(); - if (childInDesc != memory::format_tag::any && childInDesc != memory::format_tag::undef) { - if (formats.find(childInDesc) == formats.end()) - formats[childInDesc] = 1; - else - formats[childInDesc] += 1; - continue; - } - if (nodeCanChangeDesc(child)) - continue; - - if (enterCountUp < 2) { - childInDesc = MKLDNNMemoryDesc(childEdge->getSpecifiedOutputDesc(formats, enterCountUp, ++enterCountDown)).getFormat(); - if (childInDesc != memory::format_tag::any && childInDesc != memory::format_tag::undef) { - if (formats.find(childInDesc) == formats.end()) - formats[childInDesc] = 1; - else - formats[childInDesc] += 1; - } - } - } - - if (!isFormatChanging) { - for (size_t i = 0; i < parentPtr->getParentEdges().size(); i++) { - auto parentEdge = parentPtr->getParentEdgeAt(i); - auto parent = parentEdge->getParent(); - int parentIdx = parentEdge->getInputNum(); - if (!parent->getSelectedPrimitiveDescriptor() || parentIdx < 0 || - parentEdge->getDims().ndims() != getDims().ndims()) { - continue; - } - if (parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size() <= parentIdx) { - parentIdx = 0; - } - memory::format_tag parentOutDesc = MKLDNNMemoryDesc(parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[parentIdx].desc).getFormat(); - if (parentOutDesc != memory::format_tag::any && parentOutDesc != memory::format_tag::undef) { - if (formats.find(parentOutDesc) == formats.end()) - formats[parentOutDesc] = 1; - else - formats[parentOutDesc] += 1; - continue; - } - if (nodeCanChangeDesc(parent)) - continue; - - if (enterCountUp < 2) { - parentOutDesc = MKLDNNMemoryDesc(parentEdge->getSpecifiedInputDesc(formats, ++enterCountUp, enterCountDown)).getFormat(); - if (parentOutDesc != memory::format_tag::any && parentOutDesc != memory::format_tag::undef) { - if (formats.find(parentOutDesc) == formats.end()) - formats[parentOutDesc] = 1; - else - formats[parentOutDesc] += 1; - } - } - } - } - - size_t maxFormatCount = 0; - memory::format_tag desc = MKLDNNMemory::GetPlainFormat(getDims()); - for (auto &it : formats) { - if (maxFormatCount < it.second && MKLDNNMemory::isConsistant(getDims(), it.first)) { - maxFormatCount = it.second; - desc = it.first; - } - } - - auto inDataType = MKLDNNMemoryDesc(parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc).getDataType(); - parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc = MKLDNNMemoryDesc(getDims(), inDataType, desc); - if (!isFormatChanging && inputIdx < parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size() && - parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIdx].desc.getLayout() == InferenceEngine::Layout::ANY) { - parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIdx].desc = - MKLDNNExtensionUtils::getUninitTensorDesc(MKLDNNMemoryDesc(getDims(), inDataType, desc)); - } - - return MKLDNNMemoryDesc(getDims(), inDataType, desc); + return *(outConfs[inputIdx].desc); } -InferenceEngine::TensorDesc MKLDNNEdge::getSpecifiedOutputDesc(std::map formats, size_t enterCountUp, size_t enterCountDown) { - InferenceEngine::TensorDesc outDesc; - - if (outputDesc.getLayout() != InferenceEngine::Layout::ANY) { - return outputDesc; - } - +const MemoryDesc& MKLDNNEdge::getOutputDesc() const { auto childPtr = getChild(); - auto parentPtr = getParent(); if (childPtr->getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Primitive descriptor for node " << childPtr->getName() << " is not selected."; int outputIdx = getOutputNum(); - int inputIdx = getInputNum(); if (outputIdx < 0) { IE_THROW() << "Edge cannot be found for node" << childPtr->getName() << "."; } - if (outputIdx >= childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size()) + auto& inConfs = childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs; + if (inConfs.empty()) + IE_THROW() << "Node " << childPtr->getName() << " has empty input config list."; + + if (outputIdx >= inConfs.size()) outputIdx = 0; - outDesc = childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc; - if (outDesc.getLayout() != InferenceEngine::Layout::ANY) { - return outDesc; - } + return *(inConfs[outputIdx].desc); +} - if (inputIdx >= parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size()) - inputIdx = 0; +const MemoryDesc& MKLDNNEdge::getDesc() const { + if (!getInputDesc().isCompatible(getOutputDesc())) + IE_THROW() << "Cannot get descriptor for edge: " << getParent()->getName() << "->" + << getChild()->getName(); - bool isFormatChanging = nodeCanChangeDesc(childPtr); - - if ((!isFormatChanging && outputIdx < childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size() && - childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc.getLayout() != InferenceEngine::Layout::ANY) || - (isFormatChanging && inputIdx >= 0 && - parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc.getLayout() != InferenceEngine::Layout::ANY)) { - auto inputDataType = childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc.getPrecision(); - if (!isFormatChanging) - outDesc = childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc; - else - outDesc = parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc; - childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc = InferenceEngine::TensorDesc(inputDataType, getDims().ToSizeVector(), - {outDesc.getBlockingDesc().getBlockDims(), - outDesc.getBlockingDesc().getOrder()}); - return childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc; - } - - for (size_t i = 0; i < childPtr->getParentEdges().size(); i++) { - auto parentEdge = childPtr->getParentEdgeAt(i); - auto parent = parentEdge->getParent(); - int parentIdx = parentEdge->getInputNum(); - if (!parent->getSelectedPrimitiveDescriptor() || parentIdx < 0 || - parentEdge->getDims().ndims() != getDims().ndims()) { - continue; - } - if (parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size() <= parentIdx) { - parentIdx = 0; - } - memory::format_tag parentOutDesc = MKLDNNMemoryDesc(parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[parentIdx].desc).getFormat(); - if (parentOutDesc != memory::format_tag::any && parentOutDesc != memory::format_tag::undef) { - if (formats.find(parentOutDesc) == formats.end()) - formats[parentOutDesc] = 1; - else - formats[parentOutDesc] += 1; - continue; - } - if (nodeCanChangeDesc(parent)) - continue; - - if (enterCountDown < 2) { - parentOutDesc = MKLDNNMemoryDesc(parentEdge->getSpecifiedInputDesc(formats, ++enterCountUp, enterCountDown)).getFormat(); - if (parentOutDesc != memory::format_tag::any && parentOutDesc != memory::format_tag::undef) { - if (formats.find(parentOutDesc) == formats.end()) - formats[parentOutDesc] = 1; - else - formats[parentOutDesc] += 1; - } - } - } - - if (!isFormatChanging) { - for (size_t i = 0; i < childPtr->getChildEdges().size(); i++) { - auto childEdge = childPtr->getChildEdgeAt(i); - auto child = childEdge->getChild(); - int childIdx = childEdge->getOutputNum(); - if (!child->getSelectedPrimitiveDescriptor() || childIdx < 0 || - childEdge->getDims().ndims() != getDims().ndims()) { - continue; - } - if (child->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size() <= childIdx) { - childIdx = 0; - } - memory::format_tag childInDesc = MKLDNNMemoryDesc(child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[childIdx].desc).getFormat(); - if (childInDesc != memory::format_tag::any && childInDesc != memory::format_tag::undef) { - if (formats.find(childInDesc) == formats.end()) - formats[childInDesc] = 1; - else - formats[childInDesc] += 1; - continue; - } - if (nodeCanChangeDesc(child)) - continue; - - if (enterCountDown < 2) { - childInDesc = MKLDNNMemoryDesc(childEdge->getSpecifiedOutputDesc(formats, enterCountUp, ++enterCountDown)).getFormat(); - if (childInDesc != memory::format_tag::any && childInDesc != memory::format_tag::undef) { - if (formats.find(childInDesc) == formats.end()) - formats[childInDesc] = 1; - else - formats[childInDesc] += 1; - } - } - } - } - - size_t maxFormatCount = 0; - memory::format_tag format = MKLDNNMemory::GetPlainFormat(getDims()); - for (auto &it : formats) { - if (maxFormatCount < it.second && MKLDNNMemory::isConsistant(getDims(), it.first)) { - maxFormatCount = it.second; - format = it.first; - } - } - - auto inDataType = MKLDNNMemoryDesc(childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[getOutputNum()].desc).getDataType(); - childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc = MKLDNNMemoryDesc(getDims(), inDataType, format); - if (!isFormatChanging && outputIdx < childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size() && - childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc.getLayout() == InferenceEngine::Layout::ANY) { - childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc = - MKLDNNExtensionUtils::getUninitTensorDesc(MKLDNNMemoryDesc(getDims(), inDataType, format)); - } - - return childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc; + return getInputDesc(); } const MKLDNNMemory &MKLDNNEdge::getMemory() { if (status == Status::NotAllocated) { memoryPtr.reset(new MKLDNNMemory(getParent()->getEngine())); - memoryPtr->Create(MKLDNNMemoryDesc(getDesc()), getSharedEdge()->getMemoryPtr()->GetData()); + memoryPtr->Create(getDesc(), getSharedEdge()->getMemoryPtr()->GetData()); memoryFromEdge.reset(); changeStatus(Status::Allocated); } @@ -601,7 +308,7 @@ const MKLDNNMemory &MKLDNNEdge::getMemory() { MKLDNNMemoryPtr &MKLDNNEdge::getMemoryPtr() { if (status == Status::NotAllocated) { memoryPtr.reset(new MKLDNNMemory(getParent()->getEngine())); - memoryPtr->Create(MKLDNNMemoryDesc(getDesc()), getSharedEdge()->getMemoryPtr()->GetData()); + memoryPtr->Create(getDesc(), getSharedEdge()->getMemoryPtr()->GetData()); memoryFromEdge.reset(); changeStatus(Status::Allocated); } @@ -609,19 +316,6 @@ MKLDNNMemoryPtr &MKLDNNEdge::getMemoryPtr() { return memoryPtr; } -InferenceEngine::Blob::Ptr MKLDNNEdge::getBlob() { - if (!memoryPtr) - IE_THROW() << "Cannot get blob! Edge isn't initialized."; - InferenceEngine::TensorDesc desc = getDesc(); - - if (desc.getLayout() == InferenceEngine::Layout::ANY) - desc = InferenceEngine::TensorDesc(desc.getPrecision(), dims.ToSizeVector(), desc.getLayout()); - else - desc = InferenceEngine::TensorDesc(desc.getPrecision(), dims.ToSizeVector(), desc.getBlockingDesc()); - - return isEmptyTensorDesc(desc) ? make_blob_with_precision(desc) : make_blob_with_precision(desc, memoryPtr->GetData()); -} - void MKLDNNEdge::sharedMemFrom(const MKLDNNEdgePtr &edge) { memoryFromEdge = edge; status = Status::NotAllocated; @@ -633,7 +327,7 @@ void MKLDNNEdge::validate() { getMemory(); getParent(); getChild(); - getDims(); + getShape(); if (status != Status::Allocated) { IE_THROW() << "Error memory is not allocated!"; @@ -644,8 +338,7 @@ void MKLDNNEdge::validate() { MKLDNNEdgePtr MKLDNNEdge::getSharedEdge() const { auto memoryFromEdgePtr = memoryFromEdge.lock(); if (!memoryFromEdgePtr) { - IE_THROW() << "Cannot get memory ptr for edge(" << getParent()->getName() << "->" - << getChild()->getName() << "). The pointer on the edge with memory is empty!"; + IE_THROW() << "Cannot get memory ptr for edge( " << name() << " ). The pointer on the edge with memory is empty!"; } return memoryFromEdgePtr; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h index 63e2a16414d..5e6f4d23542 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h @@ -5,11 +5,9 @@ #pragma once #include -#include -#include "mkldnn_memory.h" -#include "mkldnn_dims.h" +#include "cpu_shape.h" +#include "cpu_memory_desc.h" #include "mkldnn_weights_cache.hpp" -#include "mkldnn/ie_mkldnn.h" #include #include @@ -53,10 +51,7 @@ public: const std::shared_ptr getParent() const; const std::shared_ptr getChild() const; - InferenceEngine::Blob::Ptr getBlob(); - InferenceEngine::TensorDesc getDesc(); - - const MKLDNNDims &getDims(); + const Shape &getShape(); const MKLDNNMemory& getMemory(); MKLDNNMemoryPtr& getMemoryPtr(); @@ -73,34 +68,23 @@ public: MKLDNNEdgePtr getSharedEdge() const; MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const; - const InferenceEngine::TensorDesc& getInputDescRO() const; - const InferenceEngine::TensorDesc& getOutputDescRO() const; - private: - std::string name(); + std::string name() const; std::weak_ptr parent; std::weak_ptr child; int parent_port; int child_port; - bool externalMemoryPtr = false; + bool useExternalMemory = false; MKLDNNEdgeWeakPtr memoryFromEdge; - MKLDNNDims dims; + Shape shape; MKLDNNMemoryPtr memoryPtr; Status status = Status::Uninitialized; - InferenceEngine::TensorDesc getInputDesc(); - InferenceEngine::TensorDesc getOutputDesc(); - InferenceEngine::TensorDesc getSpecifiedInputDesc(std::map formats, - size_t enterCountUp = 1, size_t enterCountDown = 0); - InferenceEngine::TensorDesc getSpecifiedOutputDesc(std::map formats, - size_t enterCountUp = 0, size_t enterCountDown = 1); - - InferenceEngine::TensorDesc inputDesc; - InferenceEngine::TensorDesc outputDesc; - - bool nodeCanChangeDesc(const std::shared_ptr& node) const; + const MemoryDesc& getInputDesc() const; + const MemoryDesc& getOutputDesc() const; + const MemoryDesc& getDesc() const; enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2, LOOK_BOTH = LOOK_UP | LOOK_DOWN, LOOK_NO_RECURRENT = 4 }; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp index 2d7d4e5e6b6..d1c851645b1 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp @@ -32,7 +32,7 @@ uint8_t MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type dataType) } } -memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision prec) { +memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(const InferenceEngine::Precision& prec) { switch (prec) { case InferenceEngine::Precision::FP32: return memory::data_type::f32; @@ -47,6 +47,8 @@ memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::P return memory::data_type::u8; case InferenceEngine::Precision::BIN: return memory::data_type::bin; + case InferenceEngine::Precision::UNSPECIFIED: + return memory::data_type::undef; default: { IE_THROW() << "The plugin does not support " << prec.name(); } @@ -67,155 +69,18 @@ InferenceEngine::Precision MKLDNNExtensionUtils::DataTypeToIEPrecision(memory::d return InferenceEngine::Precision::U8; case memory::data_type::bin: return InferenceEngine::Precision::BIN; + case memory::data_type::undef: + return InferenceEngine::Precision::UNSPECIFIED; default: { IE_THROW() << "Unsupported data type."; } } } -InferenceEngine::TensorDesc MKLDNNExtensionUtils::getUninitTensorDesc(const InferenceEngine::TensorDesc &desc) { - std::vector notInitArr; - std::vector zeroArr; - for (size_t i = 0; i < desc.getBlockingDesc().getBlockDims().size(); i++) { - notInitArr.push_back(std::numeric_limits::max()); - zeroArr.push_back(0); - } - // MKLDNN doesn't support offset_padding_to_data[i] != 0 (assert(src_d_blk.offset_padding_to_data[d] == 0);) - return desc.getLayout() == InferenceEngine::Layout::ANY ? desc : - InferenceEngine::TensorDesc(desc.getPrecision(), desc.getDims(), - {desc.getBlockingDesc().getBlockDims(), desc.getBlockingDesc().getOrder(), - std::numeric_limits::max(), zeroArr, notInitArr}); +InferenceEngine::SizeVector MKLDNNExtensionUtils::convertToSizeVector(const mkldnn::memory::dims& dims) { + return InferenceEngine::SizeVector(dims.begin(), dims.end()); } -bool MKLDNNExtensionUtils::initTensorsAreEqual(const InferenceEngine::TensorDesc &desc1, const InferenceEngine::TensorDesc &desc2) { - if (desc1.getDims() != desc2.getDims() || desc1.getPrecision() != desc2.getPrecision()) - return false; - if (desc1.getLayout() == InferenceEngine::Layout::SCALAR && desc2.getLayout() == InferenceEngine::Layout::SCALAR) - return true; - if (desc1.getLayout() == InferenceEngine::Layout::ANY || desc2.getLayout() == InferenceEngine::Layout::ANY) - return true; - bool batch1 = desc1.getDims()[0] == 1; - const auto& in1Block = desc1.getBlockingDesc(); - const auto& in2Block = desc2.getBlockingDesc(); - size_t uninitNum = std::numeric_limits::max(); - if (in1Block.getBlockDims().size() != in2Block.getBlockDims().size()) - return false; - for (size_t i = 0; i < in1Block.getBlockDims().size(); i++) { - if (in1Block.getBlockDims()[i] != in2Block.getBlockDims()[i] && - in1Block.getBlockDims()[i] != uninitNum && in2Block.getBlockDims()[i] != uninitNum) - return false; - if (in1Block.getOffsetPaddingToData()[i] != in2Block.getOffsetPaddingToData()[i] && - in1Block.getOffsetPaddingToData()[i] != uninitNum && in2Block.getOffsetPaddingToData()[i] != uninitNum) - return false; - if (i >= batch1 && in1Block.getStrides()[i] != in2Block.getStrides()[i] && - in1Block.getStrides()[i] != uninitNum && in2Block.getStrides()[i] != uninitNum) - return false; - if (in1Block.getOrder()[i] != in2Block.getOrder()[i] && - in1Block.getOrder()[i] != uninitNum && in2Block.getOrder()[i] != uninitNum) - return false; - } - return !(in1Block.getOffsetPadding() != in2Block.getOffsetPadding() && - in1Block.getOffsetPadding() != uninitNum && in2Block.getOffsetPadding() != uninitNum); -} - -PartialBlkDesc PartialBlkDesc::makePlain(const InferenceEngine::SizeVector &dims) { - PartialBlkDesc res; - res.outer_order.resize(dims.size()); - std::iota(res.outer_order.begin(), res.outer_order.end(), 0); - return res; -} - -PartialBlkDesc PartialBlkDesc::makeCBlocked(const InferenceEngine::SizeVector &dims, size_t block_size) { - PartialBlkDesc res; - res.outer_order.resize(dims.size()); - std::iota(res.outer_order.begin(), res.outer_order.end(), 0); - res.inner_blk_size = {block_size}; - res.inner_blk_idxes = {1}; - return res; -} - - -PartialBlkDesc PartialBlkDesc::makeTailC(const InferenceEngine::SizeVector &dims) { - PartialBlkDesc res = makePlain(dims); - if (dims.size() > 2) { - auto itr = res.outer_order.begin() + 1; - std::rotate(itr, itr + 1, res.outer_order.end()); - } - return res; -} - -PartialBlkDesc PartialBlkDesc::extractFrom(const InferenceEngine::TensorDesc &desc) { - if (desc.getLayout() == InferenceEngine::ANY) - IE_THROW() << "Cannot extract partial blocked descriptor for `ANY` layout"; - - const auto &dims = desc.getDims(); - const auto &blk = desc.getBlockingDesc(); - const auto &blk_dims = blk.getBlockDims(); - const auto &blk_order = blk.getOrder(); - - PartialBlkDesc res; - res.outer_order = {blk_order.begin(), blk_order.begin() + dims.size()}; - res.inner_blk_idxes = {blk_order.begin() + dims.size(), blk_order.end()}; - res.inner_blk_size = {blk_dims.begin() + dims.size(), blk_dims.end()}; - - return res; -} - -bool PartialBlkDesc::isAutoExtendedWith(const InferenceEngine::SizeVector &dims) const { - auto tmp_dims = dims; - for (int i = 0; i < inner_blk_size.size(); i++) { - auto idx = inner_blk_idxes[i]; - auto blk = inner_blk_size[i]; - if (tmp_dims[idx] % blk == 0) - tmp_dims[idx] /= blk; - else - return true; - } - return false; -} - -bool PartialBlkDesc::operator == (const PartialBlkDesc& it) const { - return std::tie(this->inner_blk_idxes, - this->inner_blk_size, - this->outer_order) == - std::tie(it.inner_blk_idxes, - it.inner_blk_size, - it.outer_order); -} - -// Lexicographical compare of content -bool PartialBlkDesc::operator < (const PartialBlkDesc& it) const { - return std::tie(this->inner_blk_idxes, - this->inner_blk_size, - this->outer_order) < - std::tie(it.inner_blk_idxes, - it.inner_blk_size, - it.outer_order); -} - -std::string MKLDNNExtensionUtils::getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) { - std::string inArgs, outArgs; - if (parentDesc.getPrecision() != childDesc.getPrecision()) { - inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name()); - outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name()); - } - auto fmt_tag_src = MKLDNNMemoryDesc(parentDesc).getFormat(); - auto fmt_tag_dst = MKLDNNMemoryDesc(childDesc).getFormat(); - if (fmt_tag_src != fmt_tag_dst || one_of(mkldnn::memory::format_tag::undef, fmt_tag_src, fmt_tag_dst)) { - inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(fmt_tag_src); - outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(fmt_tag_dst); - } - return inArgs + "_" + outArgs; -} - -InferenceEngine::Precision MKLDNNExtensionUtils::getMaxPrecision(std::vector precisions) { - if (!precisions.empty()) { - std::sort(precisions.begin(), precisions.end(), - [](const InferenceEngine::Precision &lhs, const InferenceEngine::Precision &rhs) { - return lhs.size() > rhs.size(); - }); - return precisions[0]; - } - - return InferenceEngine::Precision::UNSPECIFIED; +std::vector MKLDNNExtensionUtils::convertToDnnlDims(const InferenceEngine::SizeVector& dims) { + return std::vector(dims.begin(), dims.end());; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h index 95e14a7afa2..8e7f9a1b374 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h @@ -11,77 +11,17 @@ #include #include "mkldnn.hpp" -#include "mkldnn_memory.h" +#include "cpu_memory_desc.h" namespace MKLDNNPlugin { - -/** - * Partial tensor descriptor - * - * Represent a classes of layout. As example Plain, TailC, CBlocked and other. - * - * The tensor are in one layout family if they have same PartialBlkDesc. - * - * Any tensor will have same PartialBlkDesc as it subview tensor. - * - * PartialBlkDesc plus Dims allow to reconstruct real tensorDesc (dense representation). - */ -class PartialBlkDesc { -public: - /** - * Check if this partial blocking desc will lead to additional zero padding - * for real tensor with provided dims - * - * Example: dims [2, 3, 8, 8] with blocking by 16 for second dim. Will lead - * to effective dims [2, 16, 8, 8] with zeroing all values - * [:, 3:16, :, :] - * - * @param dims to check on zero auto padding - * @return true if provided dims will use auto padding. Otherwise false. - */ - bool isAutoExtendedWith(const InferenceEngine::SizeVector &dims) const; - - /** - * Construct PartialBlkDesc from provided TensorDesc - * - * PartialBlkDesc has less expressiveness power so some information from TensorDesc will be dropped. - * The different TensorDesc object will has equal PartialBlkDesc. - * - * @param desc to extract PartialBlkDesc information about kind of layout - * @return PartialBlkDesc object corresponds layout described in desc - */ - static PartialBlkDesc extractFrom(const InferenceEngine::TensorDesc &desc); - - /** Construct plain PartialBlkDesc based on dims information */ - static PartialBlkDesc makePlain(const InferenceEngine::SizeVector &dims); - - /** Construct blocked Channel PartialBlkDesc based on dims information */ - static PartialBlkDesc makeCBlocked(const InferenceEngine::SizeVector &dims, size_t block_size); - - /** Construct per Channel PartialBlkDesc based on dims information */ - static PartialBlkDesc makeTailC(const InferenceEngine::SizeVector &dims); - - /** Compare operators. Allow to use it as key for std::map */ - bool operator == (const PartialBlkDesc& it) const; - bool operator < (const PartialBlkDesc& it) const; - -private: - PartialBlkDesc() = default; - InferenceEngine::SizeVector outer_order; - InferenceEngine::SizeVector inner_blk_size; - InferenceEngine::SizeVector inner_blk_idxes; -}; - class MKLDNNExtensionUtils { public: static uint8_t sizeOfDataType(mkldnn::memory::data_type dataType); - static mkldnn::memory::data_type IEPrecisionToDataType(InferenceEngine::Precision prec); + static mkldnn::memory::data_type IEPrecisionToDataType(const InferenceEngine::Precision& prec); static InferenceEngine::Precision DataTypeToIEPrecision(mkldnn::memory::data_type dataType); - static InferenceEngine::TensorDesc getUninitTensorDesc(const InferenceEngine::TensorDesc& desc); - static bool initTensorsAreEqual(const InferenceEngine::TensorDesc &desc1, const InferenceEngine::TensorDesc &desc2); - static std::string getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc); - static InferenceEngine::Precision getMaxPrecision(std::vector precisions); + static InferenceEngine::SizeVector convertToSizeVector(const mkldnn::memory::dims& dims); + static std::vector convertToDnnlDims(const InferenceEngine::SizeVector& dims); }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index 4e3fba2d2b1..e97912762e7 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -39,6 +39,7 @@ #include "utils/node_dumper.h" #include "utils/ngraph_utils.hpp" #include "utils/cpu_utils.hpp" +#include "cpu_memory_desc_utils.h" #include #include @@ -47,15 +48,6 @@ #include #include -/***************************************************** - * Debug capability - * - PRINT_GRAPH_INFO : Define it to enable printing - * additional information to std output. - * - * @todo Align with CPU_DEBUG_CAPS implementation - *****************************************************/ -// #define PRINT_GRAPH_INFO - using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; @@ -168,7 +160,7 @@ void MKLDNNGraph::Replicate(const std::shared_ptr &subgr auto parentNode = portInfo.first; auto port = portInfo.second; const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName(); - const MKLDNNNodePtr outNode = std::make_shared(parentNode->outDims[port].ToSizeVector(), + const MKLDNNNodePtr outNode = std::make_shared(parentNode->outputShapes[port], parentNode->getOriginalOutputPrecisionAtPort(port), nodeName, "Result", getEngine(), weightsCache); MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0)); @@ -269,7 +261,7 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana auto parentNode = op2node[unusedOutput.get_node_shared_ptr()]; const auto port = unusedOutput.get_index(); const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName(); - const MKLDNNNodePtr outNode = std::make_shared(parentNode->outDims[port].ToSizeVector(), + const MKLDNNNodePtr outNode = std::make_shared(parentNode->outputShapes[port], parentNode->getOriginalOutputPrecisionAtPort(port), nodeName, "Result", getEngine(), weightsCache); MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0)); @@ -306,15 +298,15 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana // Loading mean images for (const auto& input : inputsInfo) { - MKLDNNDims outDims; - if (!inputNodesMap[input.first]->getChildEdgeAt(0)->getDims().ndims()) { - outDims = MKLDNNDims(InferenceEngine::SizeVector(1, 1)); + Shape outShape; + if (!inputNodesMap[input.first]->outputShapes.front().getRank()) { + outShape = Shape(SizeVector({1, 1})); } else { - outDims = inputNodesMap[input.first]->getChildEdgeAt(0)->getDims(); + outShape = inputNodesMap[input.first]->outputShapes.front(); } InputInfo::Ptr ii = inputsInfo[input.first]; if (ii && ii->getPreProcess().getNumberOfChannels()) { - _normalizePreprocMap[input.first].Load(outDims, ii); + _normalizePreprocMap[input.first].Load(outShape, ii); } } } @@ -347,6 +339,8 @@ void MKLDNNGraph::InitGraph() { graphNode->cleanup(); } #endif + ExtractConstantNodes(); + ExecuteConstantNodesOnly(); } @@ -390,6 +384,16 @@ void MKLDNNGraph::InitOptimalPrimitiveDescriptors() { } } +void MKLDNNGraph::ExtractConstantNodes() { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExtractConstantNodes"); + for (auto& graphNode : graphNodes) { + if (graphNode->isConstant()) + constantGraphNodes.emplace_back(graphNode); + else + mutableGraphNodes.emplace_back(graphNode); + } +} + void MKLDNNGraph::ExecuteConstantNodesOnly() { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly"); mkldnn::stream stream(eng); @@ -418,10 +422,7 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() { return std::make_tuple(hasExternalInvalidEdges, hasLocalAllocatedEdges, outputs); }; - for (auto &graphNode : graphNodes) { - if (!graphNode->isConstant()) - continue; - + for (auto &graphNode : constantGraphNodes) { if (weightsCache) { auto sharedOutputs = acquireSharedOutputs(graphNode); @@ -437,9 +438,9 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() { } } -static bool isReorderAvailable(const TensorDesc& parentDesc, const TensorDesc& childDesc, const mkldnn::engine& eng) { - memory::desc dstMemDesc = MKLDNNMemoryDesc(childDesc); - memory::desc srcMemDesc = MKLDNNMemoryDesc(parentDesc); +static bool isReorderAvailable(const MemoryDesc& parentDesc, const MemoryDesc& childDesc, const mkldnn::engine& eng) { + memory::desc dstMemDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(childDesc); + memory::desc srcMemDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(parentDesc);; mkldnn::primitive_attr attr; dnnl_primitive_desc_t result = nullptr; @@ -471,14 +472,14 @@ void MKLDNNGraph::InitEdges() { if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() && !isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) { // If we are here, then we need to insert Convert, because there are no reorders that support such type conversion - const auto inDesc = edge->getInputDesc(); - const auto outDesc = edge->getOutputDesc(); + const auto& inDesc = edge->getInputDesc(); + const auto& outDesc = edge->getOutputDesc(); std::string convertName = edge->getParent()->getName() + "_" + inDesc.getPrecision().name() + "_" + outDesc.getPrecision().name(); - auto convertNode = std::make_shared(inDesc.getDims(), inDesc.getPrecision(), outDesc.getPrecision(), convertName, - this->getEngine(), this->weightsCache); + auto convertNode = std::make_shared(inDesc.getShape().getStaticDims(), inDesc.getPrecision(), outDesc.getPrecision(), + convertName, this->getEngine(), this->weightsCache); convertNode->setDescs(inDesc, outDesc); InsertNode(edge, convertNode, true); @@ -492,7 +493,7 @@ void MKLDNNGraph::InitEdges() { if (insertReorder) { std::string basicLayerName = edge->getParent()->getName() + "_" + - MKLDNNExtensionUtils::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" + + MKLDNNReorderNode::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" + edge->getChild()->getName(); std::string layerName = basicLayerName; int idx = 0; @@ -601,22 +602,10 @@ void MKLDNNGraph::AllocateWithReuse() { int e_start = edge->getParent()->execIndex; int e_finish = edge->getChild()->execIndex; - const BlockingDesc block_desk = edge->getDesc().getBlockingDesc(); - - int64_t e_size = block_desk.getOffsetPadding() + 1; // size in bytes (from begin of data to last element) - for (int j = 0; j < block_desk.getBlockDims().size(); j++) - e_size += (block_desk.getBlockDims()[j] - 1) * block_desk.getStrides()[j]; - - // In some cases computational formula above doesn't work properly (e.g. for OhIw8o4i layout). - // This WA allows to limit the size of allocated memory from below. - // TODO: need to properly investigate the root cause of incorrect computations - int64_t min_size = 1; - for (int64_t dim : block_desk.getBlockDims()) { - min_size *= dim; + int64_t e_size = edge->getDesc().getCurrentSize(); // size in bytes (from the beginning of data to the last element) + if (e_size == MemoryDesc::UNDEFINED_SIZE) { + IE_THROW() << "Can not allocate memory since the size is undefined."; } - e_size = std::max(e_size, min_size); - - e_size *= edge->getDesc().getPrecision() == Precision::BIN ? 1 : edge->getDesc().getPrecision().size(); box.start = std::min(e_start, box.start); box.finish = std::max(e_finish, box.finish); @@ -650,7 +639,7 @@ void MKLDNNGraph::AllocateWithReuse() { size_t total_size = static_cast(memSolver.solve()) * alignment; memWorkspace = std::make_shared(eng); - memWorkspace->Create(MKLDNNMemoryDesc(TensorDesc(Precision::I8, {total_size}, Layout::C))); + memWorkspace->Create(MKLDNNMemoryDesc({total_size}, mkldnn::memory::data_type::s8)); if (edge_clusters.empty()) return; @@ -710,13 +699,11 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine:: auto input = inputNodesMap.find(name); if (input != inputNodesMap.end()) { - MKLDNNDims outDims = input->second->getChildEdgeAt(0)->getDims(); - const void *ext_data_ptr = in->cbuffer(); void *inter_data_ptr = input->second->getChildEdgeAt(0)->getMemory().GetData(); if (ext_data_ptr != inter_data_ptr) { - auto ext_tdesc = MKLDNNMemoryDesc {in->getTensorDesc()}; + auto ext_tdesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(in->getTensorDesc()); auto ext_mem = MKLDNNMemory(eng); ext_mem.Create(ext_tdesc, ext_data_ptr, false); @@ -727,7 +714,8 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine:: // todo: make sure 'name' exists in this map... if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) { if (in->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) { - _normalizePreprocMap[name].NormalizeImage(outDims, reinterpret_cast(inter_data_ptr), + _normalizePreprocMap[name].NormalizeImage(input->second->getChildEdgeAt(0)->getShape(), + reinterpret_cast(inter_data_ptr), in->getTensorDesc().getLayout()); } else { IE_THROW() << "Mean image of type " << in->getTensorDesc().getPrecision().name() << " is unsupported"; @@ -775,7 +763,7 @@ void MKLDNNGraph::PullOutputData(const BlobMap &out) { MB_to_process = std::min(config.batchLimit, MB_to_process); size_t size_to_copy = intr_blob.GetElementsCount() * MB_to_process / MB; - const auto actualDesc = node->getParentEdgeAt(0)->getDesc(); + const auto actualDesc = MemoryDescUtils::convertToTensorDesc(node->getParentEdgeAt(0)->getDesc()); const auto expectedDesc = ext_blob->getTensorDesc(); // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it @@ -790,7 +778,7 @@ void MKLDNNGraph::PullOutputData(const BlobMap &out) { } if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) { - auto outBlobDesc = MKLDNNMemoryDesc{expectedDesc}; + auto outBlobDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(expectedDesc); auto outBloMem = MKLDNNMemory(eng); outBloMem.Create(outBlobDesc, ext_blob_ptr, false); @@ -810,24 +798,27 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) { ENABLE_CPU_DEBUG_CAP(NodeDumper nd(config.debugCaps, infer_count)); - for (int i = 0; i < graphNodes.size(); i++) { - if (request != nullptr) { +#ifdef CPU_DEBUG_CAPS + for (const auto& node : constantGraphNodes) { + if (request != nullptr) request->ThrowIfCanceled(); - } - PERF(graphNodes[i]); + ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(node)); + ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(node)); + } +#endif - if (batch > 0) - graphNodes[i]->setDynamicBatchLim(batch); + for (const auto& node : mutableGraphNodes) { + PERF(config.collectPerfCounters, node); + if (request != nullptr) + request->ThrowIfCanceled(); - ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(graphNodes[i])); + ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(node)); - if (!graphNodes[i]->isConstant()) { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, graphNodes[i]->profiling.execute); - graphNodes[i]->execute(stream); - } + OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, node->profiling.execute); + node->execute(stream); - ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(graphNodes[i])); + ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(node)); } if (infer_count != -1) infer_count++; @@ -889,7 +880,7 @@ void MKLDNNGraph::SortTopologically() { // Make first N (N == port_num) edge indexes are matched with port index for (auto &node : graphNodes) { { - int port_num = node->inDims.size(); + int port_num = node->inputShapes.size(); std::vector res(port_num); for (int i = 0; i < node->parentEdges.size(); i++) { @@ -903,7 +894,7 @@ void MKLDNNGraph::SortTopologically() { node->parentEdges = {res.begin(), res.end()}; } { - int port_num = node->outDims.size(); + int port_num = node->outputShapes.size(); std::vector res(port_num); for (int i = 0; i < node->childEdges.size(); i++) { @@ -965,16 +956,20 @@ Config MKLDNNGraph::getProperty() const { return config; } -void MKLDNNGraph::getInputBlobs(InferenceEngine::BlobMap &resp) { - for (auto &it : inputNodesMap) { - resp[it.first] = it.second->getChildEdgeAt(0)->getBlob(); +Blob::Ptr MKLDNNGraph::getInputBlob(const std::string& name) { + auto itr = inputNodesMap.find(name); + if (itr != inputNodesMap.end()) { + return MemoryDescUtils::interpretAsBlob(itr->second->getChildEdgeAt(0)->getMemory()); } + return nullptr; } -void MKLDNNGraph::getOutputBlobs(InferenceEngine::BlobMap &resp) { - for (auto &it : outputNodesMap) { - resp[it.first] = it.second->getParentEdgeAt(0)->getBlob(); +Blob::Ptr MKLDNNGraph::getOutputBlob(const std::string& name) { + auto itr = outputNodesMap.find(name); + if (itr != outputNodesMap.end()) { + return MemoryDescUtils::interpretAsBlob(itr->second->getParentEdgeAt(0)->getMemory()); } + return nullptr; } void MKLDNNGraph::RemoveEdge(MKLDNNEdgePtr& edge) { @@ -1084,7 +1079,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) { MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentConv, inNum, outNum)); graphEdges.push_back(newEdge); parent->addEdge(newEdge); - parentConv->inDims.push_back(newEdge->getDims()); + parentConv->inputShapes.push_back(Shape(newEdge->getShape())); } } @@ -1116,15 +1111,14 @@ void MKLDNNGraph::RemoveDroppedEdges() { } } -MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const TensorDesc& inDesc, const TensorDesc& outDesc, - bool isOptimized, InferenceEngine::Blob::Ptr scales) { +MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const MemoryDesc& inDesc, const MemoryDesc& outDesc, + bool isOptimized) { MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layerName, getEngine(), weightsCache)); auto *reorderPtr = dynamic_cast(newReorder.get()); if (reorderPtr == nullptr) { IE_THROW() << "MKLDNNGraph::InsertReorder: Cannot cast to MKLDNNReorderNode"; } reorderPtr->setDescs(inDesc, outDesc); - reorderPtr->_scales = scales; reorderPtr->setOptimized(isOptimized); InsertNode(edge, newReorder, true); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h index 213fb3b0d54..50ccd0be4f0 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h @@ -44,8 +44,8 @@ public: void setProperty(const std::map &properties); Config getProperty() const; - void getInputBlobs(InferenceEngine::BlobMap &in_map); - void getOutputBlobs(InferenceEngine::BlobMap &out_map); + InferenceEngine::Blob::Ptr getInputBlob(const std::string& name); + InferenceEngine::Blob::Ptr getOutputBlob(const std::string& name); template void CreateGraph(NET &network, @@ -115,17 +115,17 @@ public: * @param layerName * Reorder layer name * @param inDesc - * input tensor descriptor + * input memory descriptor * @param outDesc - * output tensor descriptor + * output memory descriptor * @param isOptimized * optimization flag; if isOptimized is true then Reorder node does nothing * @param scales * pointer to the blob containing scales * @return pointer to the new Reorder node. */ - MKLDNNNodePtr InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const InferenceEngine::TensorDesc& inDesc, - const InferenceEngine::TensorDesc& outDesc, bool isOptimized = false, InferenceEngine::Blob::Ptr scales = nullptr); + MKLDNNNodePtr InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const MemoryDesc& inDesc, + const MemoryDesc& outDesc, bool isOptimized = false); /** * @brief Insert MKLDNNNode at the edge-specified location. @@ -218,6 +218,7 @@ protected: void Allocate(); void AllocateWithReuse(); void CreatePrimitives(); + void ExtractConstantNodes(); void ExecuteConstantNodesOnly(); friend class MKLDNNInferRequest; @@ -225,6 +226,11 @@ protected: friend InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph); private: + // these node pointers (from graphNodes) are to avoid regular checking for + // constant node in ExecuteConstantNodesOnly and Infer methods + std::vector constantGraphNodes; + std::vector mutableGraphNodes; + void EnforceBF16(); }; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp index ac4bfff6b6d..909a5083f71 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp @@ -10,7 +10,6 @@ #include #include "ngraph/ngraph.hpp" #include "utils/debug_capabilities.h" - #include #include #include @@ -46,11 +45,11 @@ std::map extract_node_metadata(const MKLDNNNodePtr &no std::string outputPrecisionsStr; if (!node->getChildEdges().empty()) { - outputPrecisionsStr = node->getChildEdgeAt(0)->getDesc().getPrecision().name(); + outputPrecisionsStr = node->getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().name(); bool isAllEqual = true; for (size_t i = 1; i < node->getChildEdges().size(); i++) { - if (node->getChildEdgeAt(i-1)->getDesc().getPrecision() != node->getChildEdgeAt(i)->getDesc().getPrecision()) { + if (node->getChildEdgeAt(i - 1)->getMemory().GetDesc().getPrecision() != node->getChildEdgeAt(i)->getMemory().GetDesc().getPrecision()) { isAllEqual = false; break; } @@ -59,12 +58,12 @@ std::map extract_node_metadata(const MKLDNNNodePtr &no // If all output precisions are the same, we store the name only once if (!isAllEqual) { for (size_t i = 1; i < node->getChildEdges().size(); i++) - outputPrecisionsStr += "," + std::string(node->getChildEdgeAt(i)->getDesc().getPrecision().name()); + outputPrecisionsStr += "," + std::string(node->getChildEdgeAt(i)->getMemory().GetDesc().getPrecision().name()); } } else { // Branch to correctly handle output nodes if (!node->getParentEdges().empty()) { - outputPrecisionsStr = node->getParentEdgeAt(0)->getDesc().getPrecision().name(); + outputPrecisionsStr = node->getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name(); } } serialization_info[ExecGraphInfoSerialization::OUTPUT_PRECISIONS] = outputPrecisionsStr; @@ -73,12 +72,11 @@ std::map extract_node_metadata(const MKLDNNNodePtr &no auto outDescs = node->getSelectedPrimitiveDescriptor()->getConfig().outConfs; if (!outDescs.empty()) { - auto fmt0 = MKLDNNMemoryDesc(outDescs[0].desc).getFormat(); - outputLayoutsStr = mkldnn::utils::fmt2str(fmt0); + outputLayoutsStr = outDescs[0].desc->serializeFormat(); bool isAllEqual = true; for (size_t i = 1; i < outDescs.size(); i++) { - if (MKLDNNMemoryDesc(outDescs[i - 1].desc).getFormat() != MKLDNNMemoryDesc(outDescs[i].desc).getFormat()) { + if (outDescs[i - 1].desc->serializeFormat() != outDescs[i].desc->serializeFormat()) { isAllEqual = false; break; } @@ -87,8 +85,7 @@ std::map extract_node_metadata(const MKLDNNNodePtr &no // If all output layouts are the same, we store the name only once if (!isAllEqual) { for (size_t i = 1; i < outDescs.size(); i++) { - auto fmt = MKLDNNMemoryDesc(outDescs[i].desc).getFormat(); - outputLayoutsStr += "," + std::string(mkldnn::utils::fmt2str(fmt)); + outputLayoutsStr += "," + outDescs[i].desc->serializeFormat(); } } } else { @@ -163,10 +160,8 @@ InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph auto meta_data = extract_node_metadata(node); std::shared_ptr return_node; if (is_input) { - auto desc = node->getChildEdgeAt(0)->getDesc(); - auto param = std::make_shared( - details::convertPrecision(desc.getPrecision()), - ngraph::PartialShape(desc.getDims())); + auto& desc = node->getChildEdgeAt(0)->getMemory().GetDesc(); + auto param = std::make_shared(details::convertPrecision(desc.getPrecision()), desc.getShape().toPartialShape()); return_node = param; params.push_back(param); } else if (is_output) { @@ -177,10 +172,8 @@ InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph get_inputs(node), node->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size()); for (size_t port = 0; port < return_node->get_output_size(); ++port) { - auto desc = node->getChildEdgeAt(port)->getDesc(); - return_node->set_output_type(port, - details::convertPrecision(desc.getPrecision()), - ngraph::PartialShape(desc.getDims())); + auto& desc = node->getChildEdgeAt(port)->getMemory().GetDesc(); + return_node->set_output_type(port, details::convertPrecision(desc.getPrecision()), desc.getShape().toPartialShape()); } } @@ -237,18 +230,19 @@ void serializeToXML(const MKLDNNGraph &graph, const std::string& path) { void serializeToCout(const MKLDNNGraph &graph) { for (const auto& node : graph.GetNodes()) { std::cout << "name: " << node->getName() << " [ "; - if (!node->getParentEdges().empty()) { - const auto& parentEdge = *(node->getParentEdges()[0].lock()); - const auto& prnt_out_desc = parentEdge.getOutputDescRO(); - std::cout << "in: " << prnt_out_desc.getPrecision().name() - << "/l=" << prnt_out_desc.getLayout() - << "; "; - } - if (!node->getChildEdges().empty()) { - const auto& childEdge = *(node->getChildEdges()[0].lock()); - const auto& chld_in_desc = childEdge.getInputDescRO(); - std::cout << "out: " << chld_in_desc.getPrecision().name() - << "/l=" << chld_in_desc.getLayout(); + auto nodeDesc = node->getSelectedPrimitiveDescriptor(); + if (nodeDesc) { + auto& inConfs = nodeDesc->getConfig().inConfs; + if (!inConfs.empty()) { + std::cout << "in: " << inConfs.front().desc->getPrecision().name() + << "/l=" << inConfs.front().desc->serializeFormat() + << "; "; + } + auto& outConfs = nodeDesc->getConfig().outConfs; + if (!outConfs.empty()) { + std::cout << "out: " << outConfs.front().desc->getPrecision().name() + << "/l=" << outConfs.front().desc->serializeFormat(); + } } std::cout << " ]" << std::endl; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index 9811b683ad1..9cbc9b79aeb 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -43,6 +43,7 @@ #include #include "mkldnn_itt.h" +#include "cpu_memory_desc_utils.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -165,15 +166,15 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) { if (biasNode->getType() != Input || !biasNode->isConstant() || biasNode->getChildEdges().size() != 1) return false; - auto convOutDims = parentNode->getChildEdgesAtPort(0)[0]->getDims().ToSizeVector(); - auto biasDims = getNormalizedDimsBySize(biasNode->getChildEdgesAtPort(0)[0]->getDims().ToSizeVector(), + auto convOutDims = parentNode->getChildEdgesAtPort(0)[0]->getShape().getDims(); + auto biasDims = getNormalizedDimsBySize(biasNode->getChildEdgesAtPort(0)[0]->getShape().getDims(), convOutDims.size()); // TODO [NM]: Legacy ConvBias fusion transformation supports both per-tensor (via explicit broadcasing) and per-channel cases. // Most of the real models contain per-channel bias, so we need to reavaluate the need to support per-tensor variant. if (convOutDims.size() != biasDims.size() || biasDims.size() < 2) return false; - if (biasDims[0] != 1 || biasDims[1] != convOutDims[1]) + if (biasDims[0] != 1 || !dimsEqualStrong(biasDims[1], convOutDims[1])) return false; for (int i = 2; i < biasDims.size(); i++) { @@ -249,8 +250,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) { graphEdges.push_back(newEdge); parent->addEdge(newEdge); - parent->outDims[inNum] = MKLDNNDims({parentEltwise->outDims[0][1]}); - parentEltwise->inDims.push_back(parent->outDims[0]); + parent->outputShapes[inNum] = Shape(SizeVector{parentEltwise->outputShapes[0].getStaticDims()[1]}); + parentEltwise->inputShapes.push_back(parent->outputShapes[0]); } } @@ -299,17 +300,17 @@ void MKLDNNGraphOptimizer::FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &grap void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableSecondInput = [](MKLDNNNodePtr node, MKLDNNDims dataDims) { + auto isSutableSecondInput = [](MKLDNNNodePtr node, SizeVector dataDims) { if (node->getType() != Input || !node->isConstant()) return false; - auto secondInputDims = node->outDims[0]; - if (secondInputDims.ndims() != dataDims.ndims() || secondInputDims.ndims() < 2) + auto secondInputDims = node->outputShapes[0].getDims(); + if (secondInputDims.size() != dataDims.size() || secondInputDims.size() < 2) return false; - if (secondInputDims[0] != 1 || secondInputDims[1] != dataDims[1]) + if (secondInputDims[0] != 1 || !dimsEqualStrong(secondInputDims[1], dataDims[1])) return false; - for (size_t i = 2; i < secondInputDims.ndims(); i++) { + for (size_t i = 2; i < secondInputDims.size(); i++) { if (secondInputDims[i] != 1) return false; } @@ -322,14 +323,14 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { node->getParentEdges().size() != 2 || node->getChildEdges().size() != 1) return false; - return isSutableSecondInput(node->getParentEdgesAtPort(1)[0]->getParent(), node->getParentEdgesAtPort(0)[0]->getDims()); + return isSutableSecondInput(node->getParentEdgesAtPort(1)[0]->getParent(), node->getParentEdgesAtPort(0)[0]->getShape().getDims()); }; auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2) return false; - return isSutableSecondInput(childNode->getParentEdgesAtPort(1)[0]->getParent(), childNode->getParentEdgesAtPort(0)[0]->getDims()); + return isSutableSecondInput(childNode->getParentEdgesAtPort(1)[0]->getParent(), childNode->getParentEdgesAtPort(0)[0]->getShape().getDims()); }; auto parent = graphNodes.begin(); @@ -397,7 +398,7 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { graphEdges.push_back(newEdge); parent->addEdge(newEdge); - parentEltwise->inDims.push_back(parent->outDims[0]); + parentEltwise->inputShapes.push_back(parent->outputShapes[0]); } } @@ -416,9 +417,9 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { bool retVal = false; if (node->getType() == Convolution) { if (auto convNode = std::dynamic_pointer_cast(node)) { - auto ndims = convNode->getParentEdgeAt(0)->getDims().ndims(); + auto rank = convNode->getParentEdgeAt(0)->getShape().getRank(); // int8 depthwise convolution does not support fusing zero points in 3D case - if (implication(convNode->isDepthWise(), ndims == 4)) { + if (implication(convNode->isDepthWise(), rank == 4)) { retVal = true; } } @@ -431,8 +432,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { if (convNode == nullptr) IE_THROW() << "Cannot get convolution node " << node->getName(); - int IC = node->getParentEdgesAtPort(0)[0]->getDims()[1]; - int OC = node->getChildEdgesAtPort(0)[0]->getDims()[1]; + int IC = node->getParentEdgesAtPort(0)[0]->getShape().getDims()[1]; + int OC = node->getChildEdgesAtPort(0)[0]->getShape().getDims()[1]; + + if (Shape::UNDEFINED_DIM == IC || Shape::UNDEFINED_DIM == OC) { + return false; + } if (parent0->getType() == Eltwise) { if (!parent0->getFusedWith().empty() || !parent1->getFusedWith().empty()) @@ -456,15 +461,15 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { if (arg0->getOriginalOutputPrecisionAtPort(0) != Precision::U8) return false; - if (parent0->getParentEdgesAtPort(1)[0]->getDims().size() < 2) { + if (parent0->getParentEdgesAtPort(1)[0]->getShape().getRank() < 2) { return false; } - auto zpDims = parent0->getParentEdgesAtPort(1)[0]->getDims(); - if (zpDims[0] != 1 || zpDims[1] != IC) + auto zpDims = parent0->getParentEdgesAtPort(1)[0]->getShape().getDims(); + if (zpDims[0] != 1 || !dimsEqualStrong(zpDims[1], IC)) return false; - for (int i = 2; i < zpDims.ndims(); i++) { + for (int i = 2; i < zpDims.size(); i++) { if (zpDims[i] != 1) return false; } @@ -485,7 +490,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { if (zeroPointsData == nullptr) IE_THROW() << "zeroPointsBlob has not allocated buffer"; - for (int j = 0; j < parent0->getParentEdgesAtPort(1)[0]->getDims()[1]; j++) { + auto zeroPointDataSize = parent0->getParentEdgesAtPort(1)[0]->getShape().getDims()[1]; + if (Shape::UNDEFINED_DIM == zeroPointDataSize) { + return false; + } + + for (int j = 0; j < zeroPointDataSize; j++) { convNode->inputZeroPoints.push_back(zeroPointsData[j]); } } else { @@ -524,11 +534,13 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { ptrdiff_t G = convNode->getGroupNum(); const int groupOffset = convNode->getAlgorithm() == ConvolutionGrouped ? 1 : 0; - ptrdiff_t OC = weightsConstant->outDims[0][0 + groupOffset]; - ptrdiff_t IC = weightsConstant->outDims[0][1 + groupOffset]; - ptrdiff_t KD = weightsConstant->outDims[0].ndims() == (5 + groupOffset) ? weightsConstant->outDims[0][weightsConstant->outDims[0].ndims() - 3] : 1; - ptrdiff_t KH = weightsConstant->outDims[0][weightsConstant->outDims[0].ndims() - 2]; - ptrdiff_t KW = weightsConstant->outDims[0][weightsConstant->outDims[0].ndims() - 1]; + auto& weightsConstantDims = weightsConstant->outputShapes[0].getStaticDims(); + + ptrdiff_t OC = weightsConstantDims[0 + groupOffset]; + ptrdiff_t IC = weightsConstantDims[1 + groupOffset]; + ptrdiff_t KD = weightsConstantDims.size() == (5 + groupOffset) ? weightsConstantDims[weightsConstantDims.size() - 3] : 1; + ptrdiff_t KH = weightsConstantDims[weightsConstantDims.size() - 2]; + ptrdiff_t KW = weightsConstantDims[weightsConstantDims.size() - 1]; for (size_t g = 0; g < G; g++) { for (size_t oc = 0; oc < OC; oc++) { @@ -588,7 +600,7 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra auto& graphNodes = graph.GetNodes(); auto isSutableParentNode = [](MKLDNNNodePtr node) { - return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getParentEdgeAt(0)->getDims().ndims() != 3; + return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getParentEdgeAt(0)->getShape().getRank() != 3; }; auto parent = graphNodes.begin(); @@ -653,12 +665,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { const auto &strides = conv->getStride(); const auto &paddings = conv->getPaddingL(); - const auto &inDims = node->getParentEdgeAt(0)->getDims(); - const auto &outDims = node->getChildEdgeAt(0)->getDims(); + const auto &inDims = node->getParentEdgeAt(0)->getShape().getDims(); + const auto &outDims = node->getChildEdgeAt(0)->getShape().getDims(); bool isSupportedParams = conv->getGroupNum() == 1 && - inDims.ndims() == 4 && - inDims[inDims.ndims() - 1] == outDims[outDims.ndims() - 1] && - inDims[inDims.ndims() - 2] == outDims[outDims.ndims() - 2] && + inDims.size() == 4 && + dimsEqualStrong(inDims[inDims.size() - 1], outDims[outDims.size() - 1]) && + dimsEqualStrong(inDims[inDims.size() - 2], outDims[outDims.size() - 2]) && is1x1Convolution(conv) && // TODO [oneDNN] : fusing is permitted only with 1x1 convolutions everyone_is(1, strides[strides.size() - 1], strides[strides.size() - 2]) && everyone_is(0, paddings[paddings.size() - 1], paddings[paddings.size() - 2]) && @@ -702,8 +714,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { const auto weightRank = convChild->getWeightDims().size(); const auto stridesSize = convChild->getStride().size(); - bool isSupportedParams = convChild->outDims[0][1] == convChild->getGroupNum() && - convChild->outDims[0][1] != 1 && + bool isSupportedParams = dimsEqualStrong(convChild->outputShapes[0].getDims()[1], convChild->getGroupNum()) && + convChild->outputShapes[0].getDims()[1] != 1 && everyone_is(3, convChild->getWeightDims()[weightRank - 1], convChild->getWeightDims()[weightRank - 2]) && everyone_is(1, convChild->getPaddingL()[stridesSize - 1], convChild->getPaddingL()[stridesSize - 2]) && everyone_is(1, convChild->getPaddingR()[stridesSize - 1], convChild->getPaddingR()[stridesSize - 2]) && @@ -711,14 +723,18 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { convChild->getStride()[stridesSize - 1] == convChild->getStride()[stridesSize - 2] && withBias && one_of(convChild->getStride()[stridesSize - 1], 1, 2) && - childNode->getChildEdgeAt(0)->getDims().ndims() == 4; + childNode->getChildEdgeAt(0)->getShape().getRank() == 4; return isSupportedParams; }; auto isFusingWorthwhile = [&](const MKLDNNNodePtr &parentNode, const MKLDNNNodePtr &childNode) { - auto inDims = childNode->inDims[0]; - auto outDims = childNode->outDims[0]; + if (!childNode->inputShapes[0].isStatic() || !childNode->outputShapes[0].isStatic()) { + return false; + } + + auto inDims = childNode->inputShapes[0].getStaticDims(); + auto outDims = childNode->outputShapes[0].getStaticDims(); int elemSize = childNode->getOriginalOutputPrecisionAtPort(0).size(); int L3_cache_size = utils::get_cache_size(3, false); @@ -1076,9 +1092,9 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG if (mergedConv->fusedWith.size() > 0 && (mergedConv->fusedWith[0]->getType() == Convolution || mergedConv->fusedWith[0]->getType() == BinaryConvolution)) { // Merged with DW_conv. Shape may change - mergedConv->inDims.push_back(mergedConv->fusedWith[0]->outDims[0]); + mergedConv->inputShapes.push_back(mergedConv->fusedWith[0]->outputShapes[0]); } else { - mergedConv->inDims.push_back(mergedConv->outDims[0]); + mergedConv->inputShapes.push_back(mergedConv->outputShapes[0]); } size_t childIdx = 0lu; @@ -1352,7 +1368,7 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { graphEdges.push_back(newEdge); parent->addEdge(newEdge); - parent->outDims[inNum] = child->inDims[outNum]; + parent->outputShapes[inNum] = child->inputShapes[outNum]; } } else { MKLDNNEdgePtr &remEdge = p_edge; @@ -1373,7 +1389,7 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { graphEdges.push_back(newEdge); parent->addEdge(newEdge); - parentNode->inDims.push_back(parent->outDims[0]); + parentNode->inputShapes.push_back(parent->outputShapes[0]); } } @@ -1400,16 +1416,6 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { if (nn == nullptr) IE_THROW() << "Cannot get reorder layer " << nextNode->getName(); - auto scales = n->_scales; - - if (n->_scales != nullptr && nn->_scales != nullptr) { - IE_THROW() << "Merging scales of two subsequent reorders is unsupported yet"; - } else { - if (scales == nullptr) { - scales = nn->_scales; - } - } - MKLDNNNodePtr p = n->getParentEdgeAt(0)->getParent(); MKLDNNNodePtr c = nn->getChildEdgeAt(0)->getChild(); @@ -1430,7 +1436,7 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName(); - graph.InsertReorder(edge, layerName, n->getInput(), nn->getOutput(), false, scales); + graph.InsertReorder(edge, layerName, n->getInput(), nn->getOutput(), false); graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end()); } } @@ -1448,8 +1454,8 @@ void MKLDNNGraphOptimizer::FuseBroadcastAndEltwise(MKLDNNGraph &graph) { MKLDNNNodePtr& broadcastNode = graphNode; MKLDNNNodePtr eltwiseNode = broadcastNode->getChildEdgeAt(0)->getChild(); - eltwiseNode->inDims[broadcastNode->getChildEdgeAt(0)->getOutputNum()] - = broadcastNode->getParentEdgeAt(0)->getDims(); + eltwiseNode->inputShapes[broadcastNode->getChildEdgeAt(0)->getOutputNum()] + = broadcastNode->getParentEdgeAt(0)->getShape(); auto& edges = graph.GetEdges(); for (size_t i = 1lu; i < broadcastNode->getParentEdges().size(); i++) { @@ -1673,9 +1679,14 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { } auto& transposeOrder = transposeNode->getOrder(); - auto& layoutOrder = transposeNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder(); - auto& inOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getBlockingDesc().getOrder(); - auto& outOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder(); + auto layoutOrder = MemoryDescUtils::convertToBlockedDescriptor( + *transposeNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc).getOrder(); + + auto inBlockedDesc = MemoryDescUtils::convertToBlockedDescriptor(*reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc); + auto outBlockedDesc = MemoryDescUtils::convertToBlockedDescriptor(*reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc); + + auto& inOrder = inBlockedDesc.getOrder(); + auto& outOrder = outBlockedDesc.getOrder(); if (transposeOrder.size() != layoutOrder.size() || layoutOrder.size() != inOrder.size() || inOrder.size() != outOrder.size()) { return false; @@ -1751,18 +1762,18 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { graph.DropNode(parentNode); graph.DropNode(childNode); - auto inDesc = parentNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc; - auto outDesc = childNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc; + auto& inDesc = parentNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc; + auto& outDesc = childNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc; - auto inPrec = inDesc.getPrecision(); - auto outPrec = outDesc.getPrecision(); + auto inPrec = inDesc->getPrecision(); + auto outPrec = outDesc->getPrecision(); - auto reorderInDesc = TensorDesc(inDesc); - auto reorderOutDesc = TensorDesc(outDesc); - reorderOutDesc.setPrecision(inPrec); + auto reorderInDesc = inDesc->clone(); + auto reorderOutDesc = outDesc->clone(); + reorderOutDesc->setPrecision(inPrec); std::string reorderlayerName = parentParentNode->getName() + "_" + - MKLDNNExtensionUtils::getReorderArgs(reorderInDesc, reorderOutDesc) + "_" + "fake"; + MKLDNNReorderNode::getReorderArgs(*reorderInDesc, *reorderOutDesc) + "_" + "fake"; MKLDNNEdgePtr edge; for (auto &childEdge : parentParentNode->getChildEdges()) { @@ -1775,17 +1786,17 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { IE_THROW() << "Transpose node '" << parentNode->getName() << "' has invalid edges."; } - auto reorderNode = graph.InsertReorder(edge, reorderlayerName, reorderInDesc, reorderOutDesc, true); + auto reorderNode = graph.InsertReorder(edge, reorderlayerName, *reorderInDesc, *reorderOutDesc, true); // case 2 if (inPrec != outPrec) { - auto reorderInDesc2 = TensorDesc(reorderOutDesc); - auto reorderOutDesc2 = TensorDesc(outDesc); + auto reorderInDesc2 = reorderOutDesc->clone(); + auto reorderOutDesc2 = outDesc->clone(); std::string reorderLayerName2 = reorderNode->getName() + "_" + - MKLDNNExtensionUtils::getReorderArgs(reorderInDesc2, reorderOutDesc2) + "_" + childChildNode->getName(); + MKLDNNReorderNode::getReorderArgs(*reorderInDesc2, *reorderOutDesc2) + "_" + childChildNode->getName(); - graph.InsertReorder(reorderNode->getChildEdgeAt(0), reorderLayerName2, reorderInDesc2, reorderOutDesc2, false); + graph.InsertReorder(reorderNode->getChildEdgeAt(0), reorderLayerName2, *reorderInDesc2, *reorderOutDesc2, false); } }; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp index 738604a6f0a..77dbe3e1215 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp @@ -213,8 +213,6 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: InferenceEngine::Blob::Ptr data; if (graph->hasInputWithName(name)) { - InferenceEngine::BlobMap blobs; - graph->getInputBlobs(blobs); // ROI blob is returned only if it was set previously. auto it = _preProcData.find(name); if (it != _preProcData.end()) { @@ -223,7 +221,12 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: } if (_inputs.find(name) == _inputs.end()) { - InferenceEngine::TensorDesc desc = blobs[name]->getTensorDesc(); + auto pBlob = graph->getInputBlob(name); + if (!pBlob) { + IE_THROW() << "MKLDNN graph doesn't contain input node with name: " << name; + } + + InferenceEngine::TensorDesc desc = pBlob->getTensorDesc(); if (_networkInputs.find(name) != _networkInputs.end()) { InferenceEngine::Layout l = _networkInputs[name]->getLayout(); @@ -235,7 +238,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: _inputs[name] = make_blob_with_precision(desc); _inputs[name]->allocate(); - if (blobs[name]->getTensorDesc() == desc && + if (pBlob->getTensorDesc() == desc && graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) { externalPtr[name] = _inputs[name]->buffer(); } @@ -258,9 +261,12 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: } if (graph->hasOutputWithName(name)) { - InferenceEngine::BlobMap blobs; - graph->getOutputBlobs(blobs); if (_outputs.find(name) == _outputs.end()) { + auto pBlob = graph->getOutputBlob(name); + if (!pBlob) { + IE_THROW() << "MKLDNN graph doesn't contain output node with name: " << name; + } + if (!data) { InferenceEngine::TensorDesc desc = _networkOutputs[name]->getTensorDesc(); desc.setPrecision(normalizeToSupportedPrecision(desc.getPrecision())); @@ -275,7 +281,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: data = make_blob_with_precision(desc); data->allocate(); } else { - const auto& expectedTensorDesc = blobs[name]->getTensorDesc(); + const auto& expectedTensorDesc = pBlob->getTensorDesc(); if (expectedTensorDesc.getPrecision() != data->getTensorDesc().getPrecision()) { IE_THROW(ParameterMismatch) << "Network input and output use the same name: " << name << " but expect blobs with different precision: " @@ -295,7 +301,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: } _outputs[name] = data; - if (!externalPtr.count(name) && data->getTensorDesc() == blobs[name]->getTensorDesc() && !graph->getProperty().batchLimit) { + if (!externalPtr.count(name) && data->getTensorDesc() == pBlob->getTensorDesc() && !graph->getProperty().batchLimit) { externalPtr[name] = data->buffer(); } } @@ -366,12 +372,12 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In IE_THROW(ParameterMismatch) << "Failed to set input blob. Blocking descriptor mismatch."; } - InferenceEngine::BlobMap blobs; - graph->getInputBlobs(blobs); - if (blobs.find(name) == blobs.end()) + auto pBlob = graph->getInputBlob(name); + if (!pBlob) { IE_THROW() << "MKLDNN graph doesn't contain input node with name: " << name; + } - if (data->getTensorDesc() == blobs.at(name)->getTensorDesc() && + if (data->getTensorDesc() == pBlob->getTensorDesc() && graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) { externalPtr[name] = data->buffer(); } else if (externalPtr.find(name) != externalPtr.end()) { @@ -404,12 +410,11 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In IE_THROW(ParameterMismatch) << "Failed to set output blob. Blocking descriptor mismatch."; } - InferenceEngine::BlobMap blobs; - graph->getOutputBlobs(blobs); - if (blobs.find(name) == blobs.end()) + auto pBlob = graph->getOutputBlob(name); + if (!pBlob) IE_THROW() << "MKLDNN graph doesn't contain output node with name: " << name; - if (data->getTensorDesc() == blobs.at(name)->getTensorDesc() && + if (data->getTensorDesc() == pBlob->getTensorDesc() && !graph->getProperty().batchLimit) { externalPtr[name] = data->buffer(); } else if (externalPtr.find(name) != externalPtr.end()) { @@ -435,6 +440,7 @@ void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() { auto& child = input->second->getChildEdgeAt(i)->getChild(); if (child->isConstant()) canBeInPlace = false; + auto* concat = dynamic_cast(child.get()); if (canBeInPlace && concat && concat->isOptimized()) canBeInPlace = false; @@ -506,6 +512,10 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBatch(int new_batch) { } m_curBatch = new_batch; + + for (const auto& node : graph->GetNodes()) { + node->setDynamicBatchLim(new_batch); + } } std::vector MKLDNNPlugin::MKLDNNInferRequest::QueryState() { diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp index 6d82ccf3e22..a6a64120f00 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp @@ -2,23 +2,24 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include -#include #include #include #include -#include #include "utils/general_utils.h" #include #include +#include #include "mkldnn_memory.h" #include "mkldnn_extension_utils.h" #include "nodes/common/cpu_memcpy.h" #include "nodes/common/cpu_convert.h" #include "mkldnn/ie_mkldnn.h" +#include "cpu_shape.h" +#include "cpu_memory_desc_utils.h" +#include "mkldnn_extension_utils.h" using namespace InferenceEngine; using namespace mkldnn; @@ -54,7 +55,7 @@ void MKLDNNMemory::Create(const memory::dims& dims, memory::data_type data_type, format = memory::format_tag::any; } - memory::desc desc = MKLDNNMemoryDesc({dims}, data_type, format); + memory::desc desc = MKLDNNMemoryDesc(MKLDNNExtensionUtils::convertToSizeVector(dims), data_type, format); Create(desc, data); } @@ -89,10 +90,16 @@ void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data, bo } } +void MKLDNNMemory::Create(const MemoryDesc &desc, const void *data, bool pads_zeroing) { + pMemDesc = desc.clone(); + Create(mkldnn::memory::desc(MemoryDescUtils::convertToMKLDNNMemoryDesc(desc)), data, pads_zeroing); +} + + void MKLDNNMemory::reorderData(const MKLDNNMemory &input, const MKLDNNMemory &output, size_t size) { if (size != 0) IE_ASSERT(size <= output.GetDescriptor().get_size()); - if (input.GetDesc() == output.GetDesc()) { + if (input.GetDescriptor() == output.GetDescriptor()) { auto srcPtr = static_cast(input.GetPtr()); auto dstPtr = static_cast(output.GetPtr()); @@ -118,7 +125,7 @@ void MKLDNNMemory::reorderData(const MKLDNNMemory &input, const MKLDNNMemory &ou MKLDNNExtensionUtils::DataTypeToIEPrecision(output.GetDataType()), input.GetElementsCount()); MKLDNNMemory tmpMem(output.eng); - tmpMem.Create(input.GetDims(), output.GetDataType(), input.GetDesc().getFormat(), tmpBuff.data()); + tmpMem.Create(input.GetDims(), output.GetDataType(), input.GetMKLDNNDesc().getFormat(), tmpBuff.data()); pReorder = std::unique_ptr(new mkldnn::reorder(tmpMem.GetPrimitive(), output.GetPrimitive())); srcMemoryPtr = tmpMem.prim; @@ -189,8 +196,8 @@ void MKLDNNMemory::FillZero() { memset(dataPtr, 0, GetSize()); } -memory::format_tag MKLDNNMemory::GetPlainFormat(const memory::dims& dims) { - switch (dims.size()) { +memory::format_tag MKLDNNMemory::GetPlainFormatByRank(size_t rank) { + switch (rank) { case 0: case 1: return memory::format_tag::a; @@ -222,11 +229,6 @@ InferenceEngine::Layout MKLDNNMemory::GetPlainLayout(const memory::dims& dims) { } } -bool MKLDNNMemory::isConsistant(const mkldnn::memory::dims& dims, mkldnn::memory::format_tag format) { - memory::desc attempt(dims, memory::data_type::f32, format, true); - return static_cast(attempt); -} - Precision MKLDNNMemory::convertToIePrec(memory::data_type dataType) { return MKLDNNExtensionUtils::DataTypeToIEPrecision(dataType); } @@ -262,6 +264,42 @@ std::string MKLDNNMemory::formatToString(memory::format_tag fmt) { return mkldnn::utils::fmt2str(fmt); } +void *MKLDNNMemory::GetPtr() const { + auto ptr = static_cast(GetData()); + auto md = GetDescriptor().data; + mkldnn::impl::memory_desc_wrapper wrapper(md); + ptr += wrapper.offset0() * wrapper.data_type_size(); + return ptr; +} + +template<> +MKLDNNMemoryDesc MKLDNNMemory::GetDescWithType() const { + if (auto descPtr = dynamic_cast(pMemDesc.get())) { + return *descPtr; + } else { + switch (pMemDesc->getType()) { + case (MemoryDescType::Blocked): + return MemoryDescUtils::convertToMKLDNNMemoryDesc(*(pMemDesc->as())); + default: + IE_THROW() << "Can not convert unsupported memory descriptor"; + } + } +} + +template<> +BlockedMemoryDesc MKLDNNMemory::GetDescWithType() const { + if (auto descPtr = dynamic_cast(pMemDesc.get())) { + return *descPtr; + } else { + switch (pMemDesc->getType()) { + case (MemoryDescType::Mkldnn): + return MemoryDescUtils::convertToBlockedDescriptor(*(pMemDesc->as())); + default: + IE_THROW() << "Can not convert unsupported memory descriptor"; + } + } +} + bool MKLDNNMemoryDesc::operator==(const MKLDNNMemoryDesc &rhs) const { return this->desc == rhs.desc; } @@ -274,51 +312,42 @@ MKLDNNMemoryDesc::operator mkldnn::memory::desc() const { return desc; } -MKLDNNMemoryDesc::MKLDNNMemoryDesc(const mkldnn::memory::dims& dims, mkldnn::memory::data_type dataType, - mkldnn::memory::format_tag format): desc(dims, dataType, mkldnn::memory::format_tag::any) { +MKLDNNMemoryDesc::MKLDNNMemoryDesc(const mkldnn::memory::desc& desc) : + MemoryDesc(Shape(MKLDNNExtensionUtils::convertToSizeVector(desc.dims())), Mkldnn), desc(desc) { + if (desc.data.format_kind == dnnl::impl::format_kind::any) + IE_THROW(Unexpected) << "Memory format any is prohibited!"; +} + +MKLDNNMemoryDesc::MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format) + : MemoryDesc(Shape(_dims), Mkldnn) { + if (format == memory::format_tag::any) + IE_THROW(Unexpected) << "Memory format any is prohibited!"; if (format != memory::format_tag::undef) { - if (format == memory::format_tag::x && dims.size() == 0) { + if (format == memory::format_tag::x && _dims.size() == 0) { desc = mkldnn::memory::desc(mkldnn::memory::dims(1, 1), dataType, format); } else { - desc = mkldnn::memory::desc(dims, dataType, format); + desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, format); } } else { // Trying to create plain descriptor // This WA is needed since memory::format_tag doesn't contain plain tag for tensors with rank > 6D - mkldnn::memory::dims strides(dims.size(), 1); - for (int d = dims.size() - 2; d >= 0; d--) { - strides[d] = strides[d + 1] * dims[d + 1]; + mkldnn::memory::dims strides(_dims.size(), 1); + for (int d = _dims.size() - 2; d >= 0; d--) { + strides[d] = strides[d + 1] * _dims[d + 1]; } - desc = mkldnn::memory::desc(dims, dataType, strides); + desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, strides); } } -MKLDNNMemoryDesc::MKLDNNMemoryDesc(const mkldnn::memory::dims& dims, mkldnn::memory::data_type dataType) : desc() { - const auto ndims = dims.size(); +MKLDNNMemoryDesc::MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType) + : MemoryDesc(Shape(_dims), Mkldnn), desc() { + const auto ndims = _dims.size(); mkldnn::memory::dims plain_strides(ndims, 1); for (size_t i = 1; i < ndims; i++) { - plain_strides[ndims - i -1] = plain_strides[ndims - i] * dims[ndims - i]; - } - desc = {dims, dataType, plain_strides}; -} - -size_t MKLDNNMemoryDesc::GetElementSize() const { - const auto type = desc.data_type(); - switch (type) { - case memory::data_type::f16 : - case memory::data_type::bf16 : - return 2; - case memory::data_type::f32 : - case memory::data_type::s32 : - return 4; - case memory::data_type::s8 : - case memory::data_type::u8 : - case memory::data_type::bin : - return 1; - default: - IE_THROW() << "Unknown data type"; + plain_strides[ndims - i -1] = plain_strides[ndims - i] * _dims[ndims - i]; } + desc = {MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, plain_strides}; } static const std::map> form_tags_by_ndims { @@ -677,32 +706,92 @@ bool MKLDNNMemoryDesc::isTailCFormat() const { return is_tailc_strides; } +bool MKLDNNMemoryDesc::blocksExtended() const { + for (int i = 0; i < desc.data.ndims; i++) { + if (desc.data.dims[i] != desc.data.padded_dims[i]) + return true; + } + return false; +} + +size_t MKLDNNMemoryDesc::getMemSizeImp() const { + return desc.get_size(); +} + +size_t MKLDNNMemoryDesc::getElementOffset(size_t elemNumber) const { + mkldnn::impl::memory_desc_wrapper wrapped(desc.data); + return wrapped.off_l(elemNumber); +} + +bool MKLDNNMemoryDesc::isCompatible(const MemoryDesc &rhs) const { + if (MemoryDescType::Blocked == rhs.getType()) { + return isCompatible(*(rhs.as())); + } else if (MemoryDescType::Mkldnn == rhs.getType()) { + return isCompatible(*(rhs.as())); + } else { + return false; + } +} + +bool MKLDNNMemoryDesc::isCompatible(const MKLDNNMemoryDesc &rhs) const { + using namespace dnnl; + using namespace impl; + using namespace dnnl::impl::utils; + if (this->desc == rhs.desc) { + return true; + } + mkldnn::impl::memory_desc_wrapper wrappedThis(this->desc.data); + mkldnn::impl::memory_desc_wrapper wrappedRhs(rhs.desc.data); + if (one_of(wrappedThis.format_kind(), format_kind::undef, format_kind::any)) + return false; + if (wrappedThis.is_wino_desc() || wrappedThis.is_rnn_packed_desc()) return false; + + const auto &blk = wrappedThis.blocking_desc(); + const auto &r_blk = wrappedRhs.blocking_desc(); + + int stride_start = wrappedThis.ndims() >0 && wrappedThis.dims()[0] == 1 ? 1 : 0; //ignore batch axis stride if batch size == 1 + + // Here is a slightly modified version of mkldnn::impl::memory_desc_wrapper::similar_to() call able to skip specific strides check. + return wrappedThis.ndims() == wrappedRhs.ndims() + && wrappedThis.format_kind() == wrappedRhs.format_kind() + && wrappedThis.data_type() == wrappedRhs.data_type() + && array_cmp(wrappedThis.dims(), wrappedRhs.dims(), wrappedThis.ndims()) + && array_cmp(blk.strides + stride_start, r_blk.strides + stride_start, wrappedThis.ndims() - stride_start) + && blk.inner_nblks == r_blk.inner_nblks + && array_cmp(blk.inner_blks, r_blk.inner_blks, blk.inner_nblks) + && array_cmp(blk.inner_idxs, r_blk.inner_idxs, blk.inner_nblks) + && array_cmp(wrappedThis.padded_dims(), wrappedRhs.padded_dims(), wrappedRhs.ndims()) + && array_cmp(wrappedThis.padded_offsets(), wrappedRhs.padded_offsets(), wrappedThis.ndims()) + && dimsEqualWeak(wrappedThis.offset0(), wrappedRhs.offset0()); +} + + /** - * Convert to IE::TensorDesc + * Check compatibility with BlockedMemoryDesc * * mkl: IOhw_4i16o4i dims {32, 64, 128, 128} * strides // the order of outer dims is encoded here * inner_blks 4 16 4 * inner_idxs 1 0 1 * - * IE tensor desc has more expressive ability. Any oneDNN blocked tensor can be covreted. - * How to convert into IE representation: + * BlockedMemoryDesc desc has more expressive ability. + * How to check compatibility with BlockedMemoryDesc representation: * 0. Detect a new_outer_order of outer_dims via descending strides. - * 1. IE strides : concatenate strides in new_outer_order and inner strides. - * 2. IE dims : concatenate outer dims in new_outer_order with auto padding and inner blocks - * 3. IE order : concatenate new_outer_order and inner_idxs + * 1. BlockedMemoryDesc strides : concatenate strides in new_outer_order and inner strides. + * 2. BlockedMemoryDesc dims : concatenate outer dims in new_outer_order with auto padding and inner blocks + * 3. BlockedMemoryDesc order : concatenate new_outer_order and inner_idxs */ -MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const { + +bool MKLDNNMemoryDesc::isCompatible(const BlockedMemoryDesc &rhs) const { + if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) { + return false; + } + const auto dims = desc.dims(); - if (desc.data.format_kind == dnnl_format_kind_any) - return TensorDesc { - MKLDNNMemory::convertToIePrec(desc.data_type()), - SizeVector {begin(dims), end(dims)}, - Layout::ANY}; - - if (desc.data.format_kind != dnnl_blocked) - IE_THROW() << "Conversion is not possible"; + if (desc.data.format_kind != dnnl_blocked) { + return false; + } const auto &blk_desc = desc.data.format_desc.blocking; @@ -731,174 +820,99 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const { std::iota(outer_order.begin(), outer_order.end(), 0); std::sort(outer_order.begin(), outer_order.end(), [&blk_desc, &outer_block_dims] (size_t ind_l, size_t ind_r) { - return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) || - (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); - }); + return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) || + (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); + }); - // IE blocked order + // blocked order // [new_outer_order] U [inner_idxs] - SizeVector ie_blk_order(total_ndims, 0); - std::copy(outer_order.begin(), outer_order.end(), ie_blk_order.begin()); - std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, ie_blk_order.begin() + dims.size()); + SizeVector blk_order(total_ndims, 0); + std::copy(outer_order.begin(), outer_order.end(), blk_order.begin()); + std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, blk_order.begin() + dims.size()); - // IE blocked strides - // [outer_strides via new_outer_order] U [inner_strides] - SizeVector ie_blk_strides(total_ndims, 0); - std::copy(inner_strides.rbegin(), inner_strides.rend(), ie_blk_strides.rbegin()); - std::transform(outer_order.begin(), outer_order.end(), ie_blk_strides.begin(), - [&] (size_t i) { return blk_desc.strides[i]; }); + if (!dimsEqualWeak(blk_order, rhs.getOrder())) { + return false; + } - // IE blocked dims + //TODO [DS]: undefined offset is also used now as an indicator of undefined strides + if (desc.data.offset0 != Shape::UNDEFINED_DIM) { + // blocked strides + // [outer_strides via new_outer_order] U [inner_strides] + SizeVector blk_strides(total_ndims, 0); + std::copy(inner_strides.rbegin(), inner_strides.rend(), blk_strides.rbegin()); + std::transform(outer_order.begin(), outer_order.end(), blk_strides.begin(), + [&](size_t i) { return blk_desc.strides[i]; }); + + size_t skipAxis = this->getShape().getRank() > 0 && this->getShape().getDims().front() == 1 ? 0 : + Shape::UNDEFINED_DIM; //ignore batch axis if batch size == 1 + if (!dimsEqualWeak(blk_strides, rhs.getStrides(), skipAxis)) { + return false; + } + } + + // blocked dims // [dims via new_outer_order with auto pad] U [inner_blk_dims] - SizeVector ie_blk_dims(total_ndims, 0); + SizeVector blk_dims(total_ndims, 0); std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks, - ie_blk_dims.end() - blk_desc.inner_nblks); - std::transform(outer_order.begin(), outer_order.end(), ie_blk_dims.begin(), + blk_dims.end() - blk_desc.inner_nblks); + std::transform(outer_order.begin(), outer_order.end(), blk_dims.begin(), [&] (size_t i) { return outer_block_dims[i]; }); - // IE offset padded to data. Same as for oneDNN - SizeVector ie_blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims}; - size_t ie_blk_offset0 = desc.data.offset0; + if (!dimsEqualWeak(blk_dims, rhs.getBlockDims())) { + return false; + } - // TODO: The tensor desc implementation allow to specify offset_to_data for inner blocked dims. + // offset padded to data. Same as for oneDNN + SizeVector blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims}; + // TODO: The BlockedMemoryDesc implementation allow to specify offset_to_data for inner blocked dims. // Which is not obvious behavior. It required offset_to_data.size == total_ndims, so will // fill it with zero. - ie_blk_offset_to_data.insert(ie_blk_offset_to_data.end(), inner_ndims, 0); + blk_offset_to_data.insert(blk_offset_to_data.end(), inner_ndims, 0); + if (!dimsEqualWeak(blk_offset_to_data, rhs.getOffsetPaddingToData())) { + return false; + } - - BlockingDesc ie_blk_desc { ie_blk_dims, - ie_blk_order, - ie_blk_offset0, - ie_blk_offset_to_data, - ie_blk_strides }; - TensorDesc res { - MKLDNNMemory::convertToIePrec(desc.data_type()), - SizeVector {begin(dims), end(dims)}, - ie_blk_desc }; - // TODO: BLOCKED is the most common layout which covers all other permute layout like NHWC. - // But for some cases we have to specify it more correctly.. may be.. or just keep - // auto detected layout in constructor of TensorDesc. - return res; + return dimsEqualWeak(desc.data.offset0, rhs.getOffsetPadding()); } -/** - * Construct from IE::TensorDesc - * @param tDesc - * - * IE IOhw_4i16o4i dims(N) = {32, 64, 128, 128} - * blockedDims {4, 2, 128, 128, 4, 16, 4} // total dims(inner, outermost, auto blocked/padded). Generally sorted by strides. - * strides {8388608, 4194304, 32768, 256, 64, 4, 1} // strides for blockedDims, growing sequence - * order {1, 0, 2, 3, 1, 0, 1} // matching to original dims - * - * All vectors blockedDims/strides/order have same size equals total num of internal blocked dims(inner_dims + outer_dims) - * - * Tensor descriptor filing is not deterministic. It allows any permutation of index which keeps order of - * real dims spliting. - * for {1, 0, 2, 3, 1, 0, 1} we can swap elements [1] <=> [4] - * but not [0]<=>[4] because it breacke spliting original dims into internal blocked dims - * Normalization of representation: Make strides growing but keep layout same as original. Not all - * layout allow us to meet normalize form of tensor desc. - * - * Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N] - */ -MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc): - desc({}, mkldnn::memory::data_type::undef, mkldnn::memory::format_tag::undef) { - auto dims = tDesc.getDims(); - - // TODO: implicit conversion of dims is no good... - if (tDesc.getLayout() == Layout::SCALAR) { - desc.data.format_kind = dnnl_blocked; - desc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); - desc.data.ndims = 1; - desc.data.dims[0] = 1; - desc.data.padded_dims[0] = 1; - desc.data.format_desc.blocking.strides[0] = 1; - desc.data.padded_offsets[0] = 0; - desc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); - return; - } - - if (tDesc.getLayout() == Layout::ANY) { - desc.data.format_kind = dnnl_format_kind_any; - desc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); - desc.data.ndims = dims.size(); - std::copy(dims.begin(), dims.end(), desc.data.dims); - std::copy(dims.begin(), dims.end(), desc.data.padded_dims); - desc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); - std::fill(desc.data.padded_offsets, desc.data.padded_offsets + dims.size(), 0); - return; - } - - auto ie_blkdDims = tDesc.getBlockingDesc().getBlockDims(); - auto ie_order = tDesc.getBlockingDesc().getOrder(); - auto ie_offsetsToData = tDesc.getBlockingDesc().getOffsetPaddingToData(); - auto ie_strides = tDesc.getBlockingDesc().getStrides(); - - size_t outer_ndims = dims.size(); - size_t inner_ndims = ie_order.size() - dims.size(); - - bool is_descending_strides = true; - for (int i = 1; i < ie_strides.size(); i++) { - is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]); - } - - // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims - // and may be we can achieve correct "descending strides" form which allow conversion. - if (!is_descending_strides) - IE_THROW() << "Unsupported case for conversion"; - - std::vector outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension - for (size_t i = 0; i < outer_ndims; i++) { - outer_order[ie_order[i]] = i; - } - bool outer_is_correct_permutation_of_n = - std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end(); - - if (!outer_is_correct_permutation_of_n) - IE_THROW() << "Unsupported case for conversion"; - - bool inner_block_are_dense = one_of(ie_strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted - for (int i = outer_ndims; i < ie_strides.size() - 1; i++) { - inner_block_are_dense &= (ie_strides[i] == ie_strides[i+1] * ie_blkdDims[i+1]); - } - - if (!inner_block_are_dense) - IE_THROW() << "Unsupported case for conversion"; - - bool inner_pad_offsets_is_zero = std::all_of(ie_offsetsToData.begin() + outer_ndims, ie_offsetsToData.end(), - [](size_t pad) { return pad == 0; }); - - if (!inner_pad_offsets_is_zero) - IE_THROW() << "Unsupported case for conversion"; - - // Fill general memory desc fields - desc.data.format_kind = dnnl_blocked; - desc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); - desc.data.ndims = dims.size(); - desc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); - std::copy(dims.begin(), dims.end(), desc.data.dims); - std::copy(ie_offsetsToData.begin(), ie_offsetsToData.begin() + outer_ndims, desc.data.padded_offsets); - std::fill(desc.data.padded_dims, desc.data.padded_dims + outer_ndims, 1); - for (size_t i = 0; i < ie_order.size(); i++) { - auto idx = ie_order[i]; - desc.data.padded_dims[idx] *= ie_blkdDims[i]; - } - - // Fill blocking desc - auto &dnn_blk_desc = desc.data.format_desc.blocking; - dnn_blk_desc.inner_nblks = inner_ndims; - std::copy(ie_blkdDims.end() - inner_ndims, ie_blkdDims.end(), dnn_blk_desc.inner_blks); - std::copy(ie_order.end() - inner_ndims, ie_order.end(), dnn_blk_desc.inner_idxs); - for (size_t i = 0; i < outer_ndims; i++) { - dnn_blk_desc.strides[i] = ie_strides[outer_order[i]]; +bool MKLDNNMemoryDesc::hasLayoutType(LayoutType layoutType) const { + switch (layoutType) { + case LayoutType::ncsp: + return isPlainFormat(); + case LayoutType::nspc: + return isTailCFormat(); + case LayoutType::nCsp8c: + return isBlockedCFormat(8); + case LayoutType::nCsp16c: + return isBlockedCFormat(16); + default: + return false; } } -bool MKLDNNMemoryDesc::blocksExtended() const { - for (int i = 0; i < desc.data.ndims; i++) { - if (desc.data.dims[i] != desc.data.padded_dims[i]) - return true; +std::string MKLDNNMemoryDesc::serializeFormat() const { + if (desc.data.format_kind == dnnl_format_kind_wino) { + switch (desc.data.format_desc.wino_desc.wino_format) { + case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOIoi: return "wino_aaOIoi"; + case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOio: return "wino_aaOio"; + case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOBiOo: return "wino_aaOBiOo"; + case dnnl_wino_memory_format_t::dnnl_wino_wei_OBaaIBOIio: return "wino_OBaaIBOIio"; + default: return "wino_undef"; + } } - return false; + auto fmt = getFormat(); + return mkldnn::utils::fmt2str(fmt); +} + +bool MKLDNNMemoryDesc::isDefined() const { + return desc.data.offset0 != Shape::UNDEFINED_DIM; +} + +InferenceEngine::Precision MKLDNNMemoryDesc::getPrecision() const { + return MKLDNNExtensionUtils::DataTypeToIEPrecision(desc.data_type()); +} + +void MKLDNNMemoryDesc::setPrecision(InferenceEngine::Precision prc) { + desc.data.data_type = static_cast(MKLDNNExtensionUtils::IEPrecisionToDataType(prc)); } } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h index 5de42240dba..d4cf4fc634b 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h @@ -6,13 +6,18 @@ #include "ie_layouts.h" #include "mkldnn_dims.h" +#include "cpu_memory_desc.h" +#include "mkldnn_extension_utils.h" #include #include +#include +#include #include #include #include #include +#include /** * @file contains a concept classes to work with memory/tensor/blob abstractions on plugin level. @@ -34,20 +39,15 @@ namespace MKLDNNPlugin { * Represent internal plugin abstraction of tensor description * */ -class MKLDNNMemoryDesc { +class MKLDNNMemoryDesc : public MemoryDesc { public: - /** Empty constructor - doesn't define any tensor representation */ - MKLDNNMemoryDesc(): desc() {} - /** Construct a tensor desc with plain layout format (like ND C array) */ - MKLDNNMemoryDesc(const mkldnn::memory::dims& dims, mkldnn::memory::data_type dataType); + MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType); /** Construct a tensor desc with specified layout format tag. Any and Undef is not supported */ - MKLDNNMemoryDesc(const mkldnn::memory::dims& dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format); - - explicit MKLDNNMemoryDesc(const InferenceEngine::TensorDesc& tDesc); - explicit MKLDNNMemoryDesc(const mkldnn::memory::desc& desc): desc(desc) {} + MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format); + explicit MKLDNNMemoryDesc(const mkldnn::memory::desc& desc); /** * Try to define original format tag use on creation @@ -60,8 +60,6 @@ public: return static_cast(desc.data.data_type); } - size_t GetElementSize() const; - MKLDNNDims getDims() const { return MKLDNNDims(desc.data.dims, desc.data.ndims); } @@ -75,15 +73,38 @@ public: bool operator != (const MKLDNNMemoryDesc& rhs) const; operator mkldnn::memory::desc() const; - operator InferenceEngine::TensorDesc() const; + bool isSame(mkldnn::memory::format_tag fmt) const; + dnnl_format_kind_t getFormatKind() const { + return desc.data.format_kind; + } + + std::unique_ptr clone() const override { + return MKLDNNPlugin::make_unique(*this); + } + + bool hasLayoutType(LayoutType layoutType) const override; + + std::string serializeFormat() const override; + + bool isDefined() const override; + + InferenceEngine::Precision getPrecision() const override; + + void setPrecision(InferenceEngine::Precision prc) override; + + bool isCompatible(const MemoryDesc& rhs) const override; + bool isCompatible(const BlockedMemoryDesc& rhs) const; + bool isCompatible(const MKLDNNMemoryDesc& rhs) const; + +private: + size_t getElementOffset(size_t elemNumber) const override; + size_t getMemSizeImp() const override; bool isPlainFormat() const; bool isBlockedCFormat(size_t blk_size = UNREACHABLE_DIM) const; bool isTailCFormat() const; - bool isSame(mkldnn::memory::format_tag fmt) const; - private: static constexpr size_t UNREACHABLE_DIM = std::numeric_limits::max(); mkldnn::memory::desc desc; @@ -94,6 +115,12 @@ class MKLDNNMemory { public: explicit MKLDNNMemory(const mkldnn::engine& eng); + MKLDNNMemory(const MKLDNNMemory&) = delete; + MKLDNNMemory& operator= (const MKLDNNMemory&) = delete; + + MKLDNNMemory(MKLDNNMemory&&) = default; + MKLDNNMemory& operator= (MKLDNNMemory&&) = default; + const mkldnn::memory& GetPrimitive() const { return *prim; } @@ -106,10 +133,15 @@ public: return prim->get_desc(); } - const MKLDNNMemoryDesc GetDesc() const { - return MKLDNNMemoryDesc {prim->get_desc()}; + const MemoryDesc& GetDesc() const { + return *pMemDesc; } + template ::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + T GetDescWithType() const; + /** * Return handler of buffer. Real data may starts from some other offset * @return @@ -126,12 +158,7 @@ public: * Like a GetData() but offset is applied. * @return */ - void* GetPtr() const { - auto ptr = static_cast(GetData()); - ptr += GetDescriptor().data.offset0 * GetDesc().GetElementSize(); - return ptr; - } - + void* GetPtr() const; mkldnn::memory::data_type GetDataType() const { return static_cast(GetDescriptor().data.data_type); @@ -145,19 +172,15 @@ public: return {std::begin(data.dims), std::begin(data.dims) + data.ndims}; } - void Create(const mkldnn::memory::dims& dims, mkldnn::memory::data_type data_type, mkldnn::memory::format_tag format, - const void* data = nullptr); - - void Create(const mkldnn::memory::desc& desc, const void* data = nullptr, bool pads_zeroing = true); + void Create(const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); // Like a plain format void SetData(mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format, const void* data, size_t size, bool ftz = true) const; void SetData(const MKLDNNMemory& memory, size_t size = 0, bool ftz = true) const; void FillZero(); - static mkldnn::memory::format_tag GetPlainFormat(const mkldnn::memory::dims& dims); + static mkldnn::memory::format_tag GetPlainFormatByRank(size_t rank); static InferenceEngine::Layout GetPlainLayout(const mkldnn::memory::dims& dims); - static bool isConsistant(const mkldnn::memory::dims& dims, mkldnn::memory::format_tag format); static mkldnn::memory::format_tag Convert(const InferenceEngine::Layout layout); static InferenceEngine::Precision convertToIePrec(mkldnn::memory::data_type dataType); static mkldnn::memory::data_type convertToDataType(const InferenceEngine::Precision &precision); @@ -167,6 +190,17 @@ public: static void reorderData(const MKLDNNMemory& input, const MKLDNNMemory& output, size_t size = 0); private: + void Create(const mkldnn::memory::dims& dims, mkldnn::memory::data_type data_type, mkldnn::memory::format_tag format, + const void* data = nullptr); + + void Create(const mkldnn::memory::desc& desc, const void* data = nullptr, bool pads_zeroing = true); + + const MKLDNNMemoryDesc GetMKLDNNDesc() const { + return MKLDNNMemoryDesc(prim->get_desc()); + } + +private: + MemoryDescPtr pMemDesc; std::shared_ptr prim; mkldnn::engine eng; }; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h index aaddd7e4575..3cbe768370c 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h @@ -8,6 +8,7 @@ #include "blob_factory.hpp" #include "mkldnn_memory.h" #include "nodes/common/cpu_memcpy.h" +#include "cpu_memory_desc_utils.h" #include @@ -17,7 +18,7 @@ class MKLDNNVariableState : public InferenceEngine::IVariableStateInternal { public: MKLDNNVariableState(std::string name, MKLDNNMemoryPtr storage) : InferenceEngine::IVariableStateInternal{name} { - state = make_blob_with_precision(MKLDNNMemoryDesc(storage->GetDescriptor())); + state = make_blob_with_precision(MemoryDescUtils::convertToTensorDesc(storage->GetDesc())); state->allocate(); cpu_memcpy(state->buffer(), storage->GetData(), storage->GetSize()); } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index e46c7a7b0bd..7e29589caf9 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -55,6 +55,7 @@ #include "utils/general_utils.h" #include "utils/cpu_utils.hpp" #include "nodes/common/cpu_convert.h" +#include "cpu_memory_desc_utils.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -72,6 +73,8 @@ static const InferenceEngine::details::caseless_unordered_map { "FullyConnected", FullyConnected }, { "MaxPool", Pooling }, { "AvgPool", Pooling }, + { "AdaptiveMaxPool", AdaptivePooling}, + { "AdaptiveAvgPool", AdaptivePooling}, { "Add", Eltwise }, { "Subtract", Eltwise }, { "Multiply", Eltwise }, @@ -223,7 +226,9 @@ static const InferenceEngine::details::caseless_unordered_map { "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator}, { "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage}, { "ExtractImagePatches", ExtractImagePatches}, - { "NonMaxSuppressionIEInternal", NonMaxSuppression} + { "NonMaxSuppressionIEInternal", NonMaxSuppression}, + { "MatrixNms", MatrixNms}, + { "MulticlassNms", MulticlassNms} }; Type TypeFromName(const std::string type) { @@ -248,20 +253,16 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr& op, const mkldnn::en type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) { algorithm = Algorithm::Undefined; fusingPort = -1; - const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name(); - for (size_t i = 0; i < op->get_input_size(); i++) { - if (op->get_input_partial_shape(i).is_dynamic()) - IE_THROW() << errorPrefix << " has dynamic input shape on " << i << " port, but CPU plug-in supports only static shape"; - } - for (size_t i = 0; i < op->get_output_size(); i++) { - if (op->get_output_partial_shape(i).is_dynamic()) - IE_THROW() << errorPrefix << " has dynamic output shape on " << i << " port, but CPU plug-in supports only static shape"; - } for (size_t i = 0; i < op->get_input_size(); i++) { - const auto &shape = op->get_input_shape(i); - inDims.emplace_back(ngraph::is_scalar(shape) ? ngraph::Shape{1} : shape); + const auto &shape = op->get_input_partial_shape(i); + + bool isScalar = false; + if (shape.rank().is_static()) { + isScalar = shape.rank().get_length() == 0; + } + inputShapes.emplace_back(isScalar ? ngraph::PartialShape{1} : shape); originalInputPrecisions.emplace_back(details::convertPrecision(op->get_input_element_type(i))); } @@ -270,8 +271,13 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr& op, const mkldnn::en IE_THROW() << "Node with type '" << typeStr << "' and name '" << name << "' does not have any outputs."; } for (size_t i = 0; i < op->get_output_size(); i++) { - const auto &shape = op->get_output_shape(i); - outDims.emplace_back(ngraph::is_scalar(shape) ? ngraph::Shape{1} : shape); + const auto &shape = op->get_output_partial_shape(i); + + bool isScalar = false; + if (shape.rank().is_static()) { + isScalar = shape.rank().get_length() == 0; + } + outputShapes.emplace_back(isScalar ? ngraph::PartialShape{1} : shape); originalOutputPrecisions.emplace_back(details::convertPrecision(op->get_output_element_type(i))); } } @@ -418,9 +424,10 @@ void MKLDNNNode::selectPreferPrimitiveDescriptor(const std::vector= parent_spd->getConfig().outConfs.size()) { inNum = 0; } - if (MKLDNNExtensionUtils::initTensorsAreEqual( - getSupportedPrimitiveDescriptors()[i].getConfig().inConfs[j].desc, - parent_spd->getConfig().outConfs[inNum].desc)) { + auto& curDesc = getSupportedPrimitiveDescriptors()[i].getConfig().inConfs[j].desc; + auto& parentDesc = parent_spd->getConfig().outConfs[inNum].desc; + + if (curDesc->isCompatible(*parentDesc)) { equalsLocalFormatCount++; } } @@ -455,9 +462,9 @@ bool MKLDNNNode::canBeInPlace() const { return false; } - MKLDNNDims dims = getParentEdgeAt(0)->getDims(); + auto inShape = getParentEdgeAt(0)->getShape(); for (size_t cIdx = 0; cIdx < getChildEdges().size(); cIdx++) { - if (getChildEdgeAt(cIdx)->getDims() != dims) { + if (getChildEdgeAt(cIdx)->getShape() != inShape) { return false; } } @@ -465,7 +472,7 @@ bool MKLDNNNode::canBeInPlace() const { } void MKLDNNNode::resolveNotAllocatedEdges() { - const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor(); + const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (!selected_pd) IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName(); for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) { @@ -476,7 +483,7 @@ void MKLDNNNode::resolveNotAllocatedEdges() { auto * memPtr = reinterpret_cast(parentEdge->getMemory().GetData()); parentEdge->getMemoryPtr().reset(new MKLDNNMemory(getEngine())); - parentEdge->getMemoryPtr()->Create(MKLDNNMemoryDesc(selected_pd->getConfig().inConfs[i].desc), memPtr); + parentEdge->getMemoryPtr()->Create(*selected_pd->getConfig().inConfs[i].desc, memPtr); parentEdge->changeStatus(MKLDNNEdge::Status::Allocated); } @@ -488,7 +495,7 @@ void MKLDNNNode::resolveNotAllocatedEdges() { auto * memPtr = reinterpret_cast(childEdge->getMemory().GetData()); childEdge->getMemoryPtr().reset(new MKLDNNMemory(getEngine())); - childEdge->getMemoryPtr()->Create(MKLDNNMemoryDesc(selected_pd->getConfig().outConfs[i].desc), memPtr); + childEdge->getMemoryPtr()->Create(*selected_pd->getConfig().outConfs[i].desc, memPtr); childEdge->changeStatus(MKLDNNEdge::Status::Allocated); } @@ -543,14 +550,14 @@ std::string MKLDNNNode::getPrimitiveDescriptorType() { // it is mixed precision. if (selectedPrimitiveDesc) { if (!selectedPrimitiveDesc->getConfig().inConfs.empty()) { - if (selectedPrimitiveDesc->getConfig().inConfs[0].desc.getPrecision() != InferenceEngine::Precision::U8) { - str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().inConfs[0].desc.getPrecision().name()); + if (selectedPrimitiveDesc->getConfig().inConfs[0].desc->getPrecision() != InferenceEngine::Precision::U8) { + str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().inConfs[0].desc->getPrecision().name()); } else { str_type += "_I8"; } } else { - if (selectedPrimitiveDesc->getConfig().outConfs[0].desc.getPrecision() != InferenceEngine::Precision::U8) { - str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().outConfs[0].desc.getPrecision().name()); + if (selectedPrimitiveDesc->getConfig().outConfs[0].desc->getPrecision() != InferenceEngine::Precision::U8) { + str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().outConfs[0].desc->getPrecision().name()); } else { str_type += "_I8"; } @@ -579,7 +586,7 @@ const MKLDNNEdgePtr MKLDNNNode::getChildEdgeAt(size_t idx) const { } const std::vector MKLDNNNode::getParentEdgesAtPort(size_t idx) const { - if (idx >= inDims.size()) + if (idx >= inputShapes.size()) IE_THROW() << "Node " << getName() << " contains less input ports than " << idx; std::vector res; @@ -593,7 +600,7 @@ const std::vector MKLDNNNode::getParentEdgesAtPort(size_t idx) co } const std::vector MKLDNNNode::getChildEdgesAtPort(size_t idx) const { - if (idx >= outDims.size()) + if (idx >= outputShapes.size()) IE_THROW() << "Node " << getName() << " contains less output ports than " << idx; std::vector res; @@ -607,18 +614,18 @@ const std::vector MKLDNNNode::getChildEdgesAtPort(size_t idx) con } -std::vector MKLDNNNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const { - if (dims.ndims() == 0) +std::vector MKLDNNNode::getAvailableFormatsForDims(const Shape &dims) const { + if (dims.getRank() == 0) return {memory::format_tag::x}; - else if (dims.ndims() == 1) + else if (dims.getRank() == 1) return {memory::format_tag::x}; - else if (dims.ndims() == 2) + else if (dims.getRank() == 2) return {memory::format_tag::nc}; - else if (dims.ndims() == 3) + else if (dims.getRank() == 3) return {memory::format_tag::tnc, memory::format_tag::ntc}; - else if (dims.ndims() == 4) + else if (dims.getRank() == 4) return {memory::format_tag::nchw, memory::format_tag::nChw8c, memory::format_tag::nChw16c}; - else if (dims.ndims() == 5) + else if (dims.getRank() == 5) return {memory::format_tag::ncdhw, memory::format_tag::nCdhw8c, memory::format_tag::nCdhw16c}; return {memory::format_tag::any}; } @@ -637,22 +644,22 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() { auto itpd = desc.createPrimitiveDescriptorIterator(engine); while (static_cast(itpd)) { - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; for (size_t i = 0; i < descInputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getSrcMemDesc(itpd, i)); - config.inConfs.push_back(dataConfig); + PortConfig portConfig; + portConfig.inPlace = -1; + portConfig.constant = false; + portConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getSrcMemDesc(itpd, i)); + config.inConfs.push_back(portConfig); } for (size_t i = 0; i < descOutputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = canBeInPlace() ? 0 : -1; - dataConfig.constant = false; - dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getDstMemDesc(itpd, i)); - config.outConfs.push_back(dataConfig); + PortConfig portConfig; + portConfig.inPlace = canBeInPlace() ? 0 : -1; + portConfig.constant = false; + portConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getDstMemDesc(itpd, i)); + config.outConfs.push_back(portConfig); } impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); @@ -665,15 +672,12 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() { void MKLDNNNode::filterSupportedPrimitiveDescriptors() { // Compare by partial layout descriptor (without particular strides values) - auto areCompatible = [](const TensorDesc& tdesc, mkldnn::memory::format_tag fmt) { - TensorDesc fmt_tdesc = MKLDNNMemoryDesc{ - MKLDNNDims(tdesc.getDims()), - MKLDNNExtensionUtils::IEPrecisionToDataType(tdesc.getPrecision()), - fmt}; + auto areCompatible = [](const MemoryDesc& desc, mkldnn::memory::format_tag fmt) -> bool { + MKLDNNMemoryDesc fmt_tdesc = MKLDNNMemoryDesc{desc.getShape().getStaticDims(), + MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), + fmt}; - auto tmp_partial_tdesc = PartialBlkDesc::extractFrom(fmt_tdesc); - auto actual_partial_tdesc = PartialBlkDesc::extractFrom(tdesc); - return tmp_partial_tdesc == actual_partial_tdesc; + return desc.isCompatible(fmt_tdesc); }; if (!inputMemoryFormatsFilter.empty() || !outputMemoryFormatsFilter.empty()) { @@ -685,11 +689,11 @@ void MKLDNNNode::filterSupportedPrimitiveDescriptors() { bool isSuitableDesc = true; for (int i = 0; i < inputMemoryFormatsFilter.size(); i++) { - const bool matched = areCompatible(config.inConfs[i].desc, inputMemoryFormatsFilter[i]); + const bool matched = areCompatible(*config.inConfs[i].desc, inputMemoryFormatsFilter[i]); isSuitableDesc &= matched; } for (int i = 0; i < outputMemoryFormatsFilter.size(); i++) { - const bool matched = areCompatible(config.outConfs[i].desc, outputMemoryFormatsFilter[i]); + const bool matched = areCompatible(*config.outConfs[i].desc, outputMemoryFormatsFilter[i]); isSuitableDesc &= matched; } if (!isSuitableDesc) { @@ -701,22 +705,22 @@ void MKLDNNNode::filterSupportedPrimitiveDescriptors() { } } -void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) { +void MKLDNNNode::initDescriptor(const NodeConfig& config) { auto* selectedPD = getSelectedPrimitiveDescriptor(); if (!selectedPD) { return; } - std::vector inDescs; + std::vector inDescs; for (const auto& inConf : config.inConfs) - inDescs.push_back(inConf.desc); - std::vector outDescs; + inDescs.push_back(inConf.desc.get()); + std::vector outDescs; for (const auto& outConf : config.outConfs) - outDescs.push_back(outConf.desc); - createDescriptor({inDescs}, {outDescs}); + outDescs.push_back(outConf.desc.get()); + createDescriptor(inDescs, outDescs); std::shared_ptr attr = initPrimitiveAttr(); - InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig(); + NodeConfig rightConfig = selectedPD->getConfig(); size_t selected_count = 0; for (size_t j = 0; j < descs.size(); j++) { const auto &desc = descs[j]; @@ -727,10 +731,10 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) { itpd = desc.createPrimitiveDescriptorIterator(engine, *(attr.get())); } while (static_cast(itpd)) { - InferenceEngine::LayerConfig cfg; + NodeConfig cfg; cfg.dynBatchSupport = true; for (size_t i = 0; i < descInputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = canBeInPlace() ? 0 : -1; dataConfig.constant = false; dataConfig.desc = getSrcMemDesc(itpd, i); @@ -738,7 +742,7 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) { } for (size_t i = 0; i < descOutputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; dataConfig.desc = getDstMemDesc(itpd, i); @@ -768,23 +772,21 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) { return; for (size_t i = 0; i < selectedConfig.inConfs.size(); i++) { - if (selectedConfig.inConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY && - !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.inConfs[i].desc, config.inConfs[i].desc)) + if (!selectedConfig.inConfs[i].desc->isCompatible(*config.inConfs[i].desc)) IE_THROW() << "Incorrect descriptor for node: " << getName(); } for (size_t i = 0; i < selectedConfig.outConfs.size(); i++) { - if (selectedConfig.outConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY && - !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.outConfs[i].desc, config.outConfs[i].desc)) + if (!selectedConfig.outConfs[i].desc->isCompatible(*config.outConfs[i].desc)) IE_THROW() << "Incorrect descriptor for node: " << getName(); } rightConfig = config; } - selectedPD->getConfig() = rightConfig; + selectedPD->setConfig(rightConfig); } -void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::primitive_desc_iterator& itpd) { +void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd) { for (size_t i = 0; i < getChildEdges().size(); i++) { auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr()) @@ -806,7 +808,8 @@ void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::pri const auto &internalBlob = internalBlobs[i]; auto create = [&] () { - auto newDesc = MKLDNNMemoryDesc(internalBlob->getTensorDesc()); + // TODO [DS]: internal blobs should be removed or rewritten using Memory object + auto newDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(internalBlob->getTensorDesc()); MKLDNNMemory memory{ engine }; memory.Create(newDesc, internalBlob->buffer()); @@ -947,119 +950,60 @@ const std::vector& MKLDNNNode::getPrimitivesPriority() { return implPriorities; } -bool MKLDNNNode::isUninitTensorDesc(const InferenceEngine::TensorDesc& desc) const { - if (desc.getLayout() == InferenceEngine::Layout::ANY) - return true; - - if (desc.getBlockingDesc().getOffsetPadding() == std::numeric_limits::max()) - return true; - - for (size_t i = 0; i < desc.getBlockingDesc().getOrder().size(); i++) { - if (desc.getBlockingDesc().getOffsetPaddingToData()[i] == std::numeric_limits::max() || - desc.getBlockingDesc().getStrides()[i] == std::numeric_limits::max()) - return true; - } - - return false; -} - -InferenceEngine::TensorDesc MKLDNNNode::getConfiguredInputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const { - if (!isUninitTensorDesc(config.inConfs[idx].desc)) - return config.inConfs[idx].desc; - +std::unique_ptr MKLDNNNode::getDefinedInputDesc(const NodeConfig &config, size_t idx) const { int num = getParentEdgeAt(idx)->getInputNum(); auto *selectedPD = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor(); if (!selectedPD) IE_THROW() << "Cannot get selected primitive descriptor for node: " << getParentEdgeAt(idx)->getParent()->getName(); - if (selectedPD->getConfig().outConfs.size() <= num) - num = 0; + if (config.inConfs[idx].desc->isDefined()) { + return config.inConfs[idx].desc->clone(); + } if (config.inConfs[idx].inPlace >= 0) { - return getConfiguredOutputDesc(config, static_cast(config.inConfs[idx].inPlace)); + return getDefinedOutputDesc(config, static_cast(config.inConfs[idx].inPlace)); } if (num >= 0) { auto parentConf = selectedPD->getConfig().outConfs[num]; - parentConf.desc.setPrecision(config.inConfs[idx].desc.getPrecision()); - if (isUninitTensorDesc(parentConf.desc) && parentConf.inPlace >= 0) + parentConf.desc->setPrecision(config.inConfs[idx].desc->getPrecision()); + if (!parentConf.desc->isDefined() && parentConf.inPlace >= 0) getParentEdgeAt(idx)->getParent()->initOptimalPrimitiveDescriptor(); parentConf = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num]; - if (!isUninitTensorDesc(parentConf.desc) && - MKLDNNExtensionUtils::initTensorsAreEqual(parentConf.desc, config.inConfs[idx].desc)) { - return parentConf.desc; - } - - if (config.inConfs[idx].desc.getLayout() == InferenceEngine::Layout::ANY && - parentConf.desc.getLayout() != InferenceEngine::Layout::ANY) { - return InferenceEngine::TensorDesc(parentConf.desc.getPrecision(), - parentConf.desc.getDims(), { - parentConf.desc.getBlockingDesc().getBlockDims(), - parentConf.desc.getBlockingDesc().getOrder() - }); + if (parentConf.desc->isDefined() && parentConf.desc->isCompatible(*config.inConfs[idx].desc)) { + return parentConf.desc->clone(); } } - if (config.inConfs[idx].desc.getLayout() != InferenceEngine::Layout::ANY) { - return InferenceEngine::TensorDesc(config.inConfs[idx].desc.getPrecision(), - config.inConfs[idx].desc.getDims(), { - config.inConfs[idx].desc.getBlockingDesc().getBlockDims(), - config.inConfs[idx].desc.getBlockingDesc().getOrder() - }); - } - - return InferenceEngine::TensorDesc(config.inConfs[idx].desc.getPrecision(), - config.inConfs[idx].desc.getDims(), - InferenceEngine::TensorDesc::getLayoutByDims(config.inConfs[idx].desc.getDims())); + return MemoryDescUtils::resetOffset(config.inConfs[idx].desc.get()); } -InferenceEngine::TensorDesc MKLDNNNode::getConfiguredOutputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const { - if (!isUninitTensorDesc(config.outConfs[idx].desc)) - return config.outConfs[idx].desc; - +std::unique_ptr MKLDNNNode::getDefinedOutputDesc(const NodeConfig &config, size_t idx) const { int num = getChildEdgeAt(idx)->getOutputNum(); auto *selectedPD = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor(); if (!selectedPD) IE_THROW() << "Cannot get selected primitive descriptor for node: " << getChildEdgeAt(idx)->getChild()->getName(); - if (selectedPD->getConfig().inConfs.size() <= num) - num = 0; + if (config.outConfs[idx].desc->isDefined()) { + return config.outConfs[idx].desc->clone(); + } if (config.outConfs[idx].inPlace >= 0) { - return getConfiguredInputDesc(config, static_cast(config.outConfs[idx].inPlace)); + return getDefinedInputDesc(config, static_cast(config.outConfs[idx].inPlace)); } if (num >= 0) { auto childConf = selectedPD->getConfig().inConfs[num]; - childConf.desc.setPrecision(config.outConfs[idx].desc.getPrecision()); - if (isUninitTensorDesc(childConf.desc) && childConf.inPlace >= 0) + childConf.desc->setPrecision(config.outConfs[idx].desc->getPrecision()); + if (!childConf.desc->isDefined() && childConf.inPlace >= 0) getChildEdgeAt(idx)->getChild()->initOptimalPrimitiveDescriptor(); childConf = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor()->getConfig().inConfs[num]; - if (!isUninitTensorDesc(childConf.desc) && - MKLDNNExtensionUtils::initTensorsAreEqual(childConf.desc, config.outConfs[idx].desc)) { - return childConf.desc; - } - if (config.outConfs[idx].desc.getLayout() == InferenceEngine::Layout::ANY && - childConf.desc.getLayout() != InferenceEngine::Layout::ANY) { - return InferenceEngine::TensorDesc(childConf.desc.getPrecision(), - childConf.desc.getDims(), { - childConf.desc.getBlockingDesc().getBlockDims(), - childConf.desc.getBlockingDesc().getOrder() - }); + if (childConf.desc->isDefined() && childConf.desc->isCompatible(*config.outConfs[idx].desc)) { + return childConf.desc->clone(); } } - if (config.outConfs[idx].desc.getLayout() != InferenceEngine::Layout::ANY) { - return InferenceEngine::TensorDesc(config.outConfs[idx].desc.getPrecision(), - config.outConfs[idx].desc.getDims(), { - config.outConfs[idx].desc.getBlockingDesc().getBlockDims(), - config.outConfs[idx].desc.getBlockingDesc().getOrder() - }); - } - - return InferenceEngine::TensorDesc(config.outConfs[idx].desc.getPrecision(), - config.outConfs[idx].desc.getDims(), - InferenceEngine::TensorDesc::getLayoutByDims(config.outConfs[idx].desc.getDims())); + return MemoryDescUtils::resetOffset(config.outConfs[idx].desc.get()); } void MKLDNNNode::initOptimalPrimitiveDescriptor() { @@ -1067,17 +1011,13 @@ void MKLDNNNode::initOptimalPrimitiveDescriptor() { if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; auto config = selected_pd->getConfig(); - if (!isInitConfig(config)) { + if (!isConfigDefined(config)) { for (size_t i = 0; i < config.inConfs.size(); i++) { - // TensorDescriptor constructor which is called inside getConfiguredInputDesc incorrectly computes offset field. - // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values. - config.inConfs[i].desc = MKLDNNMemoryDesc(getConfiguredInputDesc(config, i)); + config.inConfs[i].desc = getDefinedInputDesc(config, i); } for (size_t i = 0; i < config.outConfs.size(); i++) { - // TensorDescriptor constructor which is called inside getConfiguredOutputDesc incorrectly computes offset field. - // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values. - config.outConfs[i].desc = MKLDNNMemoryDesc(getConfiguredOutputDesc(config, i)); + config.outConfs[i].desc = getDefinedOutputDesc(config, i); } initDescriptor(config); @@ -1086,38 +1026,22 @@ void MKLDNNNode::initOptimalPrimitiveDescriptor() { } } -bool MKLDNNNode::isInitConfig(const InferenceEngine::LayerConfig& config) const { +bool MKLDNNNode::isConfigDefined(const NodeConfig &config) const { for (const auto& configs : {config.inConfs, config.outConfs}) { for (const auto &dc : configs) { - if (isUninitTensorDesc(dc.desc)) + if (!dc.desc->isDefined()) return false; } } return true; } -MKLDNNMemoryDesc MKLDNNNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); - if (desc.getLayout() == InferenceEngine::Layout::ANY) - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - else - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); +std::unique_ptr MKLDNNNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + return MKLDNNPlugin::make_unique(primitive_desc_it.src_desc(idx)); } -MKLDNNMemoryDesc MKLDNNNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx)); - if (desc.getLayout() == InferenceEngine::Layout::ANY) - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - else - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); +std::unique_ptr MKLDNNNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + return MKLDNNPlugin::make_unique(primitive_desc_it.dst_desc(idx)); } int MKLDNNNode::batchToProcess() { @@ -1126,15 +1050,15 @@ int MKLDNNNode::batchToProcess() { int MKLDNNNode::getMaxBatch() { // FIXME: batch != 0 dims number - if (!inDims.empty()) { - if (inDims[0].ndims()) - return inDims[0][0]; + if (!inputShapes.empty()) { + if (inputShapes[0].getRank()) + return static_cast(inputShapes[0].getStaticDims()[0]); else return 1; } - if (!outDims.empty() && outDims[0].ndims()) { - if (outDims[0].ndims()) - return outDims[0][0]; + if (!outputShapes.empty()) { + if (outputShapes[0].getRank()) + return static_cast(outputShapes[0].getStaticDims()[0]); else return 1; } @@ -1323,12 +1247,12 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const } const auto isBroadcastableToDataInput = [&]() { - const auto dataShape = getParentEdgeAt(fusingPort)->getDims().ToSizeVector(); + const auto dataShape = getParentEdgeAt(fusingPort)->getShape().getStaticDims(); for (size_t i = 0; i < getParentEdges().size(); i++) { if (i == fusingPort) continue; - auto weightShape = getParentEdgeAt(i)->getDims().ToSizeVector(); - if (!isPerTensorOrPerChannelBroadcastable(dataShape, weightShape)) + auto weightShape = getParentEdgeAt(i)->getShape().getStaticDims(); + if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape)) return false; } return true; @@ -1351,7 +1275,11 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const { if (node->getType() == FakeQuantize) { - return node->getAlgorithm() != FQBinarization; + bool ret = node->getAlgorithm() != FQBinarization; + for (size_t i = 1; i < node->getParentEdges().size(); i++) { + ret &= node->getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() == 1; + } + return ret; } else if (node->getType() == Eltwise) { return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh, EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, @@ -1396,7 +1324,7 @@ void MKLDNNNode::fillScalesAndShifts(const MKLDNNNode *parentNode, std::vector(outDims[0][outDims[0].ndims() > 1 ? 1 : 0]); + const size_t bufferSize = static_cast(outputShapes[0].getStaticDims()[outputShapes[0].getRank() > 1 ? 1 : 0]); if (align == -1) { align = bufferSize; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index 29618d51fdb..77dab59e904 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -26,8 +26,10 @@ #include #include #include -#include +#include #include "cpu_types.h" +#include "cpu_shape.h" +#include "cpu_memory_desc.h" namespace MKLDNNPlugin { @@ -54,6 +56,8 @@ static std::string NameFromType(Type type) { return "Lrn"; case Pooling: return "Pooling"; + case AdaptivePooling: + return "AdaptivePooling"; case FullyConnected: return "FullyConnected"; case MatMul: @@ -192,89 +196,101 @@ static std::string NameFromType(Type type) { return "ExtractImagePatches"; case NonMaxSuppression: return "NonMaxSuppression"; + case MatrixNms: + return "MatrixNms"; + case MulticlassNms: + return "MulticlassNms"; default: return "Unknown"; } } -class PrimitiveDescInfo { +class PortConfigurator { public: - PrimitiveDescInfo(const InferenceEngine::LayerConfig& conf, impl_desc_type type): config(conf) { + PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc, const Shape& shape, + bool constant = false, int inPlace = -1) : + blockedDescCreator(getBlockedDescCreator(blockedDescType)), prc(prc), shape(shape), constant(constant), inPlace(inPlace) {} + + PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc = InferenceEngine::Precision::UNSPECIFIED, + bool constant = false, int inPlace = -1) : + blockedDescCreator(getBlockedDescCreator(blockedDescType)), prc(prc), constant(constant), inPlace(inPlace) {} + + MKLDNNPlugin::BlockedDescCreator::CreatorConstPtr blockedDescCreator; + const InferenceEngine::Precision prc; + const Shape shape; + bool constant = false; + int inPlace = -1; + +private: + static MKLDNNPlugin::BlockedDescCreator::CreatorConstPtr getBlockedDescCreator(MKLDNNPlugin::LayoutType blockedDescType) { + auto& creators = MKLDNNPlugin::BlockedDescCreator::getCommonCreators(); + if (creators.find(blockedDescType) == creators.end()) { + IE_THROW() << "Cannot find tensor descriptor creator"; + } + return creators.at(blockedDescType); + } +}; + +struct PortConfig { + PortConfig() = default; + + PortConfig(const PortConfig& rhs) { + this->constant = rhs.constant; + this->inPlace = rhs.inPlace; + if (rhs.desc) { + this->desc = rhs.desc->clone(); + } + } + + PortConfig& operator=(const PortConfig& rhs) { + this->constant = rhs.constant; + this->inPlace = rhs.inPlace; + if (rhs.desc) { + this->desc = rhs.desc->clone(); + } + return *this; + } + + PortConfig(PortConfig&& rhs) = default; + PortConfig& operator=(PortConfig&& rhs) = default; + + // TODO [DS]: better to make private and const + bool constant = false; + int inPlace = -1; + std::unique_ptr desc; +}; + +struct NodeConfig { + bool dynBatchSupport = false; + std::vector inConfs; + std::vector outConfs; +}; + +class NodeDesc { +public: + NodeDesc(const NodeConfig& conf, impl_desc_type type): config(conf) { implementationType = type; } - PrimitiveDescInfo(const InferenceEngine::LayerConfig& conf, impl_desc_type type, const std::vector& outFmts): config(conf) { - implementationType = type; - outputLayouts = outFmts; - } - - PrimitiveDescInfo(const InferenceEngine::LayerConfig& conf, impl_desc_type type, mkldnn::memory::format_tag outFmt): config(conf) { - implementationType = type; - - setOutputLayouts(outFmt); - } - - PrimitiveDescInfo(const PrimitiveDescInfo &descInfo) = default; - PrimitiveDescInfo(PrimitiveDescInfo &&descInfo) = default; - - PrimitiveDescInfo &operator=(const PrimitiveDescInfo &descInfo) = default; - - const InferenceEngine::LayerConfig getConfig() const { + const NodeConfig& getConfig() const { return config; } - InferenceEngine::LayerConfig& getConfig() { - return config; + + void setConfig(const NodeConfig& config) { + this->config = config; } impl_desc_type getImplementationType() const { return implementationType; } - const std::vector& getOutputLayouts() const { - return outputLayouts; - } - void setImplementationType(impl_desc_type type) { implementationType = type; } - void setOutputLayouts(mkldnn::memory::format_tag outFmt) { - outputLayouts.clear(); - - for (int i = 0; i < config.outConfs.size(); i++) { - outputLayouts.push_back(outFmt); - } - } - private: - InferenceEngine::LayerConfig config; + NodeConfig config; impl_desc_type implementationType; - std::vector outputLayouts; -}; - -class DataConfigurator { -public: - DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, InferenceEngine::Precision prc, const InferenceEngine::SizeVector& shape, - bool constant = false, int inplace = -1) : - tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), shape(shape), constant(constant), inplace(inplace) {} - - DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, InferenceEngine::Precision prc = InferenceEngine::Precision::UNSPECIFIED, - bool constant = false, int inplace = -1) : - tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), shape({}), constant(constant), inplace(inplace) {} - - const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr tensorDescCreator; - const InferenceEngine::Precision prc = InferenceEngine::Precision::UNSPECIFIED; - const InferenceEngine::SizeVector shape; - const bool constant = false; - const int inplace = -1; -private: - static MKLDNNPlugin::TensorDescCreator::CreatorConstPtr getTensorDescCreator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType) { - auto& creators = MKLDNNPlugin::TensorDescCreator::getCommonCreators(); - if (creators.find(tensorDescType) == creators.end()) { - IE_THROW() << "Cannot find tensor descriptor creator"; - } - return creators.at(tensorDescType); - } }; class MKLDNNNode { @@ -420,18 +436,18 @@ public: return type; } - const std::vector& getSupportedPrimitiveDescriptors() const { + const std::vector& getSupportedPrimitiveDescriptors() const { return supportedPrimitiveDescriptors; } - inline const PrimitiveDescInfo* getSelectedPrimitiveDescriptor() const { + inline const NodeDesc* getSelectedPrimitiveDescriptor() const { if (selectedPrimitiveDescriptorIndex < 0 || selectedPrimitiveDescriptorIndex >= supportedPrimitiveDescriptors.size()) return nullptr; return &supportedPrimitiveDescriptors[selectedPrimitiveDescriptorIndex]; } - inline PrimitiveDescInfo* getSelectedPrimitiveDescriptor() { + inline NodeDesc* getSelectedPrimitiveDescriptor() { if (selectedPrimitiveDescriptorIndex < 0 || selectedPrimitiveDescriptorIndex >= supportedPrimitiveDescriptors.size()) return nullptr; @@ -467,9 +483,10 @@ public: virtual void initOptimalPrimitiveDescriptor(); virtual void getSupportedDescriptors() = 0; - virtual void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) {} - virtual void initDescriptor(const InferenceEngine::LayerConfig& config); + // TODO [DS]: Should be moved into Node derivative class + virtual void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) {} + virtual void initDescriptor(const NodeConfig& config); virtual bool created() const = 0; virtual bool created(const MKLDNNExtensionManager::Ptr& extMgr) { return created(); @@ -483,23 +500,19 @@ public: template PD createPrimitiveDescriptor(const mkldnn::primitive_attr &attr = mkldnn::primitive_attr()) { - auto descsEqual = [](const std::vector& srcDescs, - const std::vector& selectedDescs) { + auto descsCompatible = [](const std::vector& srcDescs, + const std::vector& selectedDescs) { if (srcDescs.empty() && selectedDescs.empty()) return true; if (srcDescs.empty() || selectedDescs.empty()) return false; for (size_t i = 0; i < srcDescs.size() && i < selectedDescs.size(); i++) { - if (!(srcDescs[i].getBlockingDesc() == selectedDescs[i].desc.getBlockingDesc() && - srcDescs[i].getPrecision() == selectedDescs[i].desc.getPrecision() && - srcDescs[i].getDims() == selectedDescs[i].desc.getDims()) && - srcDescs[i].getLayout() != InferenceEngine::Layout::ANY) - return false; + return srcDescs[i]->isCompatible(*selectedDescs[i].desc); } return true; }; - const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor(); + const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << "."; @@ -507,19 +520,19 @@ public: auto itpd = desc.createPrimitiveDescriptorIterator(engine, attr); while (static_cast(itpd)) { - std::vector srcDescs; + std::vector srcDescs; for (size_t i = 0; i < descInputNumbers(desc); i++) srcDescs.push_back(getSrcMemDesc(itpd, i)); - std::vector dstDescs; + std::vector dstDescs; for (size_t i = 0; i < descOutputNumbers(desc); i++) dstDescs.push_back(getDstMemDesc(itpd, i)); impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); if (impl_type == selected_pd->getImplementationType() && - descsEqual(srcDescs, selected_pd->getConfig().inConfs) && - descsEqual(dstDescs, selected_pd->getConfig().outConfs)) { + descsCompatible(srcDescs, selected_pd->getConfig().inConfs) && + descsCompatible(dstDescs, selected_pd->getConfig().outConfs)) { prepareMemory(selected_pd, itpd); PD prim_desc = createPd(desc); return {itpd.get()}; @@ -646,10 +659,10 @@ protected: virtual int getMaxBatch(); - virtual InferenceEngine::TensorDesc getConfiguredInputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const; - virtual InferenceEngine::TensorDesc getConfiguredOutputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const; - virtual MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); - virtual MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); + virtual std::unique_ptr getDefinedInputDesc(const NodeConfig &config, size_t idx) const; + virtual std::unique_ptr getDefinedOutputDesc(const NodeConfig &config, size_t idx) const; + virtual std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); + virtual std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); /** * @brief Appends new item into ops list with the information on how the node should be executed as post operation. @@ -663,8 +676,8 @@ protected: GetPrimitiveMemoryFormatFunc; std::vector internalBlobDesc; - std::vector inDims; - std::vector outDims; + std::vector inputShapes; + std::vector outputShapes; std::vector fusedWith; std::vector mergedWith; @@ -689,12 +702,11 @@ protected: ConstantType constant = ConstantType::Unknown; std::vector internalBlobs; std::vector internalBlobMemory; - std::vector supportedPrimitiveDescriptors; + std::vector supportedPrimitiveDescriptors; std::unordered_map primArgs; MKLDNNPrimitive prim; std::vector descs; - InferenceEngine::Blob::Ptr ext_scales; MKLDNNWeightsSharing::Ptr weightCache; Algorithm algorithm = Algorithm::Undefined; @@ -706,14 +718,13 @@ protected: friend class MKLDNNGraphOptimizer; friend class NodeDumper; - bool isUninitTensorDesc(const InferenceEngine::TensorDesc& desc) const; - bool isInitConfig(const InferenceEngine::LayerConfig& config) const; void selectPreferPrimitiveDescriptor(const std::vector& priority, bool ignoreConstInputs); + bool isConfigDefined(const NodeConfig &config) const; virtual bool canBeInPlace() const; virtual const std::vector& getPrimitivesPriority(); - virtual std::vector getAvailableFormatsForDims(const MKLDNNDims& dims) const; + virtual std::vector getAvailableFormatsForDims(const Shape& dims) const; int batchToProcess(); InferenceEngine::Layout getWeightsLayoutByDims(InferenceEngine::SizeVector dims, bool isGrouped); @@ -730,42 +741,39 @@ protected: */ virtual std::vector getOutputPrecisions() const; - void addSupportedPrimDesc(const std::vector& inDataConfigurators, - const std::vector& outDataConfigurators, + void addSupportedPrimDesc(const std::vector& inPortConfigs, + const std::vector& outPortConfigs, impl_desc_type implType, bool dynBatchSupport = false) { - auto fill_port = [] (const DataConfigurator& dataConfigurator, const InferenceEngine::SizeVector& dims, - InferenceEngine::Precision prc, std::vector& port) -> bool { - // In order to simplify particular node initialization logic we just don't add config in case target shape is not supported by tensorDescCreator. - // This should be suitable for major of scenarios since almost all nodes add `ncsp` tensorDescCreator which supports any shape rank. - if (dims.size() < dataConfigurator.tensorDescCreator->getMinimalRank()) + auto fill_port = [] (const PortConfigurator& portConfigurator, const Shape& shape, + InferenceEngine::Precision prc, std::vector& port) -> bool { + // In order to simplify particular node initialization logic we just don't add config in case target shape is not supported by blockedDescCreator. + // This should be suitable for major of scenarios since almost all nodes add `ncsp` blockedDescCreator which supports any shape rank. + if (shape.getRank() < portConfigurator.blockedDescCreator->getMinimalRank()) return false; - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = dataConfigurator.inplace; - dataConfig.constant = dataConfigurator.constant; + PortConfig portConfig; + portConfig.inPlace = portConfigurator.inPlace; + portConfig.constant = portConfigurator.constant; + portConfig.desc = portConfigurator.blockedDescCreator->createUniqueDesc(prc, shape.getStaticDims()); - dataConfig.desc = dataConfigurator.tensorDescCreator->createDesc(prc, dims); - - port.push_back(dataConfig); + port.push_back(std::move(portConfig)); return true; }; - InferenceEngine::LayerConfig config; - for (size_t i = 0; i < inDataConfigurators.size(); i++) { - auto dims = inDataConfigurators[i].shape.empty() ? getParentEdgesAtPort(i)[0]->getDims().ToSizeVector() : inDataConfigurators[i].shape; - auto prc = inDataConfigurators[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalInputPrecisionAtPort(i) - : inDataConfigurators[i].prc; - if (!fill_port(inDataConfigurators[i], dims, prc, config.inConfs)) + NodeConfig config; + for (size_t i = 0; i < inPortConfigs.size(); i++) { + auto shape = inPortConfigs[i].shape.getRank() == 0 ? getParentEdgesAtPort(i)[0]->getShape() : inPortConfigs[i].shape; + auto prc = inPortConfigs[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalInputPrecisionAtPort(i) : inPortConfigs[i].prc; + if (!fill_port(inPortConfigs[i], shape, prc, config.inConfs)) return; } - for (size_t i = 0; i < outDataConfigurators.size(); i++) { - auto dims = outDataConfigurators[i].shape.empty() ? getChildEdgesAtPort(i)[0]->getDims().ToSizeVector() : outDataConfigurators[i].shape; - auto prc = outDataConfigurators[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalOutputPrecisionAtPort(i) - : outDataConfigurators[i].prc; - if (!fill_port(outDataConfigurators[i], dims, prc, config.outConfs)) + for (size_t i = 0; i < outPortConfigs.size(); i++) { + auto dims = outPortConfigs[i].shape.getRank() == 0 ? getChildEdgesAtPort(i)[0]->getShape() : outPortConfigs[i].shape; + auto prc = outPortConfigs[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalOutputPrecisionAtPort(i) : outPortConfigs[i].prc; + if (!fill_port(outPortConfigs[i], dims, prc, config.outConfs)) return; } @@ -811,7 +819,7 @@ private: return PD(*selected_desc_ptr, engine); } - void prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::primitive_desc_iterator& itpd); + void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd); enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 }; ConstantType checkConstant(LOOK look, std::vector& checkNodes); }; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 59a29ebf40a..c7907aa5569 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -57,7 +57,10 @@ #include #include #include +#include +#include #include +#include #include #include #include @@ -167,6 +170,9 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); manager.register_pass(); if (useLpt) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/base.hpp b/inference-engine/src/mkldnn_plugin/nodes/base.hpp deleted file mode 100644 index b611c8eb0a4..00000000000 --- a/inference-engine/src/mkldnn_plugin/nodes/base.hpp +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include "nodes/list.hpp" -#include "common/tensor_desc_creator.h" -#include "ngraph/descriptor/tensor.hpp" -#include -#include "cpu_types.h" - -#include -#include - -namespace InferenceEngine { -namespace Extensions { -namespace Cpu { - -class ExtLayerBase: public ILayerExecImpl { -public: - StatusCode getSupportedConfigurations(std::vector& conf, ResponseDesc *resp) noexcept override { - if (!errorMsg.empty()) { - if (resp) { - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return GENERAL_ERROR; - } - conf = confs; - return OK; - } - - StatusCode init(LayerConfig& config, ResponseDesc *resp) noexcept override { - for (auto& input : config.inConfs) { - for (auto& offset : input.desc.getBlockingDesc().getOffsetPaddingToData()) { - if (offset) { - return GENERAL_ERROR; - } - } - if (input.desc.getBlockingDesc().getOffsetPadding()) { - return GENERAL_ERROR; - } - } - for (auto& output : config.outConfs) { - for (auto& offset : output.desc.getBlockingDesc().getOffsetPaddingToData()) { - if (offset) { - return GENERAL_ERROR; - } - } - if (output.desc.getBlockingDesc().getOffsetPadding()) { - return GENERAL_ERROR; - } - } - return OK; - } - -protected: - MKLDNNPlugin::Algorithm getAlgorithm() const { - return algorithm; - } - MKLDNNPlugin::Algorithm algorithm; - - class DataConfigurator { - public: - DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, Precision prc = Precision::UNSPECIFIED, bool constant = false, int inplace = -1) : - tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), constant(constant), inplace(inplace) {} - - DataConfigurator(const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr& tensorDescCreator, Precision prc = Precision::UNSPECIFIED, - bool constant = false, int inplace = -1) : tensorDescCreator(tensorDescCreator), prc(prc), constant(constant), inplace(inplace) {} - - const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr tensorDescCreator; - const bool constant = false; - const int inplace = -1; - const Precision prc = Precision::UNSPECIFIED; // By default ngraph node precision is used - private: - static MKLDNNPlugin::TensorDescCreator::CreatorConstPtr getTensorDescCreator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType) { - auto& creators = MKLDNNPlugin::TensorDescCreator::getCommonCreators(); - if (creators.find(tensorDescType) == creators.end()) { - IE_THROW() << "Cannot find tensor descriptor creator"; - } - return creators.at(tensorDescType); - } - }; - - void addConfig(const std::shared_ptr& op, - const std::vector& inDataConfigurators, - const std::vector& outDataConfigurators, - bool dynBatchSupport = false) { - LayerConfig config; - - if (inDataConfigurators.size() != op->get_input_size()) - IE_THROW() << "Cannot add config for operation " << op->get_friendly_name() << ". Incorrect number of inputs: " << - "expected: " << op->get_input_size() << ", provided: " << inDataConfigurators.size(); - if (outDataConfigurators.size() != op->get_output_size()) - IE_THROW() << "Cannot add config for operation " << op->get_friendly_name() << ". Incorrect number of outputs: " << - "expected: " << op->get_output_size() << ", provided: " << outDataConfigurators.size(); - - auto fill_port = [] (const DataConfigurator& dataConfigurator, const ngraph::descriptor::Tensor& tensor, std::vector& port) -> bool { - // In order to simplify particular node initialization logic we just don't add config in case target shape is not supported by tensorDescCreator. - // This should be suitable for major of scenarios since almost all nodes add `ncsp` tensorDescCreator which supports any shape rank. - if (tensor.get_shape().size() < dataConfigurator.tensorDescCreator->getMinimalRank()) - return false; - - auto precision = dataConfigurator.prc != Precision::UNSPECIFIED ? dataConfigurator.prc : details::convertPrecision(tensor.get_element_type()); - - DataConfig dataConfig; - dataConfig.inPlace = dataConfigurator.inplace; - dataConfig.constant = dataConfigurator.constant; - dataConfig.desc = dataConfigurator.tensorDescCreator->createDesc(precision, tensor.get_shape()); - - port.push_back(dataConfig); - - return true; - }; - - for (size_t i = 0; i < inDataConfigurators.size(); i++) - if (!fill_port(inDataConfigurators[i], op->get_input_tensor(i), config.inConfs)) - return; - - for (size_t i = 0; i < outDataConfigurators.size(); i++) - if (!fill_port(outDataConfigurators[i], op->get_output_tensor(i), config.outConfs)) - return; - - config.dynBatchSupport = dynBatchSupport; - confs.push_back(config); - } - - std::string errorMsg; - std::vector confs; -}; - -template -class ImplFactory : public ILayerImplFactory { -public: - explicit ImplFactory(const std::shared_ptr& op) : ngraphOp(op) {} - - // First implementation has more priority than next - StatusCode getImplementations(std::vector& impls, ResponseDesc *resp) noexcept override { - try { - impls.push_back(ILayerImpl::Ptr(new IMPL(ngraphOp))); - } catch (const InferenceEngine::Exception& ex) { - strncpy(resp->msg, ex.what(), sizeof(resp->msg) - 1); - IE_SUPPRESS_DEPRECATED_START - return ex.getStatus() != OK ? ex.getStatus() : GENERAL_ERROR; - IE_SUPPRESS_DEPRECATED_END - } - return OK; - } -protected: - const std::shared_ptr ngraphOp; -}; - -#define REG_FACTORY_FOR(__prim, __type) \ - void __prim ## __type(MKLDNNExtensions * extInstance) { \ - using namespace MKLDNNPlugin; \ - extInstance->layersFactory.registerNodeIfRequired(MKLDNNPlugin, __type, OV_PP_TOSTRING(__type), ImplFactory<__prim>); \ - } - -} // namespace Cpu -} // namespace Extensions -} // namespace InferenceEngine diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.cpp b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp similarity index 60% rename from inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.cpp rename to inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp index 18d48383162..85566b3833a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "tensor_desc_creator.h" +#include "blocked_desc_creator.h" #include using namespace InferenceEngine; @@ -11,19 +11,19 @@ using namespace MKLDNNPlugin; namespace { constexpr size_t channelsPos = 1lu; -class PlainFormatCreator : public TensorDescCreator { +class PlainFormatCreator : public BlockedDescCreator { public: - InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const override { + BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const override { SizeVector order(srcDims.size()); std::iota(order.begin(), order.end(), 0); - return TensorDesc(precision, srcDims, {srcDims, order}); + return BlockedMemoryDesc(precision, srcDims, srcDims, order); } size_t getMinimalRank() const override { return 0lu; } }; -class PerChannelCreator : public TensorDescCreator { +class PerChannelCreator : public BlockedDescCreator { public: - InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision &precision, const InferenceEngine::SizeVector &srcDims) const override { + BlockedMemoryDesc createDesc(const InferenceEngine::Precision &precision, const InferenceEngine::SizeVector &srcDims) const override { SizeVector order(srcDims.size()); std::iota(order.begin(), order.end(), 0); SizeVector blkDims = srcDims; @@ -37,15 +37,15 @@ public: moveElementBack(blkDims, channelsPos); } - return TensorDesc(precision, srcDims, {blkDims, order}); + return BlockedMemoryDesc(precision, srcDims, blkDims, order); } size_t getMinimalRank() const override { return 3lu; } }; -class ChannelBlockedCreator : public TensorDescCreator { +class ChannelBlockedCreator : public BlockedDescCreator { public: ChannelBlockedCreator(size_t blockSize) : _blockSize(blockSize) {} - InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const override { + BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const override { if (srcDims.size() < 2) { IE_THROW() << "Can't create blocked tensor descriptor!"; } @@ -55,10 +55,12 @@ public: order.push_back(channelsPos); SizeVector blkDims = srcDims; - blkDims[channelsPos] = blkDims[channelsPos] / _blockSize + (blkDims[channelsPos] % _blockSize ? 1 : 0); + if (Shape::UNDEFINED_DIM != blkDims[channelsPos]) { + blkDims[channelsPos] = blkDims[channelsPos] / _blockSize + (blkDims[channelsPos] % _blockSize ? 1 : 0); + } blkDims.push_back(_blockSize); - return TensorDesc(precision, srcDims, {blkDims, order}); + return BlockedMemoryDesc(precision, srcDims, blkDims, order); } size_t getMinimalRank() const override { return 3lu; } @@ -67,16 +69,16 @@ private: }; } // namespace -const TensorDescCreator::CreatorsMap& TensorDescCreator::getCommonCreators() { - static const CreatorsMap map{ { TensorDescCreatorTypes::nspc, CreatorConstPtr(new PerChannelCreator) }, - { TensorDescCreatorTypes::nCsp8c, CreatorConstPtr(new ChannelBlockedCreator(8)) }, - { TensorDescCreatorTypes::nCsp16c, CreatorConstPtr(new ChannelBlockedCreator(16)) }, - { TensorDescCreatorTypes::ncsp, CreatorConstPtr(new PlainFormatCreator) } }; +const BlockedDescCreator::CreatorsMap& BlockedDescCreator::getCommonCreators() { + static const CreatorsMap map{ { LayoutType::nspc, CreatorConstPtr(new PerChannelCreator) }, + { LayoutType::nCsp8c, CreatorConstPtr(new ChannelBlockedCreator(8)) }, + { LayoutType::nCsp16c, CreatorConstPtr(new ChannelBlockedCreator(16)) }, + { LayoutType::ncsp, CreatorConstPtr(new PlainFormatCreator) } }; return map; } std::pair -TensorDescCreator::makeFilteredRange(const CreatorsMap &map, unsigned int rank) { +BlockedDescCreator::makeFilteredRange(const CreatorsMap &map, unsigned int rank) { auto rankFilter = [rank](const CreatorsMap::value_type& item) { if (item.second->getMinimalRank() > rank) { return false; @@ -90,7 +92,7 @@ TensorDescCreator::makeFilteredRange(const CreatorsMap &map, unsigned int rank) } std::pair -TensorDescCreator::makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector& supportedTypes) { +BlockedDescCreator::makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector& supportedTypes) { unsigned bitMask = 0ul; for (auto& item : supportedTypes) { bitMask |= 1 << static_cast(item); @@ -112,7 +114,7 @@ TensorDescCreator::makeFilteredRange(const CreatorsMap& map, unsigned rank, cons } std::pair -TensorDescCreator::makeFilteredRange(const CreatorsMap &map, TensorDescCreator::Predicate predicate) { +BlockedDescCreator::makeFilteredRange(const CreatorsMap &map, BlockedDescCreator::Predicate predicate) { auto first = CreatorsMapFilterConstIterator(std::move(predicate), map.begin(), map.end()); auto last = first.end(); return std::make_pair(first, last); diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.h b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h similarity index 74% rename from inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.h rename to inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h index 4fda57fcb2f..f53524288e4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.h +++ b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h @@ -4,25 +4,19 @@ #pragma once -#include #include +#include "cpu_shape.h" +#include "cpu_blocked_memory_desc.h" namespace MKLDNNPlugin { -enum class TensorDescCreatorTypes : unsigned { - nspc, // general per channels format - ncsp, // general planar - nCsp8c, // general channels blocked by 8 - nCsp16c // general channels blocked by 16 -}; - class CreatorsMapFilterConstIterator; -class TensorDescCreator { +class BlockedDescCreator { public: - typedef std::shared_ptr CreatorPtr; - typedef std::shared_ptr CreatorConstPtr; - typedef std::map CreatorsMap; + typedef std::shared_ptr CreatorPtr; + typedef std::shared_ptr CreatorConstPtr; + typedef std::map CreatorsMap; typedef std::function Predicate; public: @@ -30,17 +24,20 @@ public: static std::pair makeFilteredRange(const CreatorsMap &map, unsigned rank); static std::pair - makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector& supportedTypes); + makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector& supportedTypes); static std::pair makeFilteredRange(const CreatorsMap& map, Predicate predicate); - virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const = 0; + virtual BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const = 0; + std::unique_ptr createUniqueDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const { + return MKLDNNPlugin::make_unique(createDesc(precision, srcDims)); + } virtual size_t getMinimalRank() const = 0; - virtual ~TensorDescCreator() = default; + virtual ~BlockedDescCreator() = default; }; class CreatorsMapFilterConstIterator { public: - typedef TensorDescCreator::CreatorsMap::const_iterator Iterator; + typedef BlockedDescCreator::CreatorsMap::const_iterator Iterator; typedef std::iterator_traits::value_type value_type; typedef std::iterator_traits::reference reference; typedef std::iterator_traits::pointer pointer; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp new file mode 100644 index 00000000000..4bf60d6eb21 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp @@ -0,0 +1,264 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "mkldnn_adaptive_pooling.h" +#include "ie_parallel.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace MKLDNNPlugin; +using namespace InferenceEngine; +using namespace mkldnn; +using namespace mkldnn::impl::cpu::x64; + +bool MKLDNNAdaptivePoolingNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveAvgPool::type_info)) { + auto adaPool = std::dynamic_pointer_cast(op); + if (!adaPool) { + errorMessage = "Only opset8 AdaptiveAvgPooling operation is supported"; + return false; + } + } else if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveMaxPool::type_info)) { + auto adaPool = std::dynamic_pointer_cast(op); + if (!adaPool) { + errorMessage = "Only opset8 AdaptiveMaxPooling operation is supported"; + return false; + } + } else { + errorMessage = "Unsupported Adaptive pooling mode"; + return false; + } + } catch (...) { + return false; + } + return true; +} + +MKLDNNAdaptivePoolingNode::MKLDNNAdaptivePoolingNode(const std::shared_ptr& op, const mkldnn::engine& eng, + MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { + std::string errorMessage; + if (isSupportedOperation(op, errorMessage)) { + errorPrefix = "Adaptive Pooling layer with name '" + getName() + "' "; + } else { + IE_THROW(NotImplemented) << errorMessage; + } + if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveAvgPool::type_info)) { + algorithm = Algorithm::AdaptivePoolingAvg; + } else if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveMaxPool::type_info)) { + algorithm = Algorithm::AdaptivePoolingMax; + } +} + +void MKLDNNAdaptivePoolingNode::getSupportedDescriptors() { + if (!descs.empty()) + return; + + if (getParentEdges().size() != 2) + IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getParentEdges().size(); + if (getChildEdges().size() != (algorithm == AdaptivePoolingMax ? 2 : 1)) + IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getParentEdges().size(); + + auto parentDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto childDims = getChildEdgeAt(0)->getShape().getStaticDims(); + + spatialDimsCount = parentDims.size() - 2; + if (!one_of(spatialDimsCount, 1, 2, 3)) { + IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank(); + } + + if (getParentEdgeAt(1)->getShape().getRank() != 1) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); + } + + if (getChildEdgeAt(0)->getShape().getRank() != getParentEdgeAt(0)->getShape().getRank()) { + IE_THROW() << errorPrefix << "must keep data rank"; + } +} + +void MKLDNNAdaptivePoolingNode::initSupportedPrimitiveDescriptors() { + if (!supportedPrimitiveDescriptors.empty()) + return; + + // we supports only fp32 currently + precision = Precision::FP32; + + InferenceEngine::LayerConfig config; + config.dynBatchSupport = false; + config.inConfs.resize(2); + config.outConfs.resize((algorithm == Algorithm::AdaptivePoolingAvg ? 1 : 2)); + + std::vector dataFormats{ LayoutType::ncsp }; + if (getParentEdgeAt(0)->getShape().getStaticDims()[1] != 1) { + dataFormats.push_back(LayoutType::nspc); + dataFormats.push_back(LayoutType::nCsp16c); + dataFormats.push_back(LayoutType::nCsp8c); + } + for (const auto &df : dataFormats) { + if (algorithm == Algorithm::AdaptivePoolingAvg) { + addSupportedPrimDesc({{df, precision}, {LayoutType::ncsp, Precision::I32}}, + {{df, precision}}, + impl_desc_type::unknown); + } else { + addSupportedPrimDesc({{df, precision}, {LayoutType::ncsp, Precision::I32}}, + {{df, precision}, {LayoutType::ncsp, Precision::I32}}, + impl_desc_type::unknown); + } + } +} + +void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) { + auto inputPrec = getParentEdgeAt(0)->getMemory().GetDescriptor().data.data_type; + auto outputPrec = getChildEdgeAt(0)->getMemory().GetDescriptor().data.data_type; + if (!(inputPrec == mkldnn_f32 && outputPrec == mkldnn_f32)) + IE_THROW() << errorPrefix << "doesn't support demanded precisions"; + + auto &srcMemory0 = getParentEdgeAt(0)->getMemory(); + auto &srcMemory1 = getParentEdgeAt(1)->getMemory(); + int *indexDst = nullptr; + + if (algorithm == Algorithm::AdaptivePoolingMax) { + indexDst = reinterpret_cast(getChildEdgeAt(1)->getMemoryPtr()->GetPtr()); + } + + auto srcBlockDesc = srcMemory0.GetDescriptor().data.format_desc.blocking; + + int blockSize = srcBlockDesc.inner_nblks > 0 ? srcBlockDesc.inner_blks[0] : 1; + auto isPlainFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::ncsp); + auto isTailCFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::nspc); + + const auto *src = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcPooledSpatialShapes = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); + auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + + if (srcMemory1.GetElementsCount() != spatialDimsCount) + IE_THROW() << errorPrefix << "has input spatial dimension (" << srcMemory1.GetElementsCount() + << ") inconsistent with pooling vector size (" << spatialDimsCount << ")"; + + auto inputDimVector = srcMemory0.GetDims(); + const int N = static_cast(inputDimVector[0]); + const int C = static_cast(inputDimVector[1]); + const int ID = static_cast(spatialDimsCount == 3 ? inputDimVector[2] : 1); + const int IH = static_cast(spatialDimsCount >= 2 ? inputDimVector[spatialDimsCount] : 1); + const int IW = static_cast(inputDimVector[spatialDimsCount + 1]); + + const int OD = static_cast(spatialDimsCount == 3 ? srcPooledSpatialShapes[0] : 1); + const int OH = static_cast(spatialDimsCount >= 2 ? srcPooledSpatialShapes[spatialDimsCount - 2] : 1); + const int OW = static_cast(srcPooledSpatialShapes[spatialDimsCount - 1]); + + const int iHW = IH * IW; + const int oDHW = OD * OH * OW, oHW = OH * OW; + + const int chPadding = srcMemory0.GetDescriptor().data.padded_dims[1]; + const int blockCount = (isTailCFmt ? 1 : chPadding / blockSize); + auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); + if (!selectedPrimitiveDescriptor) + IE_THROW() << errorPrefix << "doesn't have primitive descriptors."; + auto config = selectedPrimitiveDescriptor->getConfig(); + auto srcStrides = getParentEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); + auto dstStrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); + + // unified strides array + const size_t tailDimsOffset = (isTailCFmt ? -1 : 0); + const size_t inStrides[5] = { + srcStrides[0], + (isTailCFmt ? 1 : srcStrides[1]), + (spatialDimsCount == 3 ? srcStrides[2 + tailDimsOffset] : 0), + (spatialDimsCount >= 2 ? srcStrides[spatialDimsCount + tailDimsOffset] : 0), + srcStrides[spatialDimsCount + 1 + tailDimsOffset] }; + const size_t outStrides[5] = { + dstStrides[0], + (isTailCFmt ? 1 : dstStrides[1]), + (spatialDimsCount == 3 ? dstStrides[2 + tailDimsOffset] : 0), + (spatialDimsCount >= 2 ? dstStrides[spatialDimsCount + tailDimsOffset] : 0), + dstStrides[spatialDimsCount + 1 + tailDimsOffset] }; + + std::function pool; + auto poolMax = [&] (const float *srcData, float *dstData, int od, int oh, int ow, size_t spatIndOff) { + size_t dStart, dEnd, hStart, hEnd, wStart, wEnd; + setBinBorders(&dStart, &dEnd, od, ID, OD); + setBinBorders(&hStart, &hEnd, oh, IH, OH); + setBinBorders(&wStart, &wEnd, ow, IW, OW); + float res = srcData[dStart * inStrides[2] + hStart * inStrides[3] + wStart * inStrides[4]]; // initial max value + int resIndex = dStart * iHW + hStart * IW + wStart; // initial max index + for (size_t pixD = dStart; pixD < dEnd; pixD++) { + for (size_t pixH = hStart; pixH < hEnd; pixH++) { + for (size_t pixW = wStart; pixW < wEnd; pixW++) { + float curr = srcData[pixD * inStrides[2] + pixH * inStrides[3] + pixW * inStrides[4]]; + resIndex = (res < curr ? pixD * iHW + pixH * IW + pixW : resIndex); + res = std::max(res, curr); + } + } + } + *dstData = res; + indexDst[spatIndOff * oDHW + od * oHW + oh * OW + ow] = resIndex; + }; + auto poolAvg = [&] (const float *srcData, float *dstData, int od, int oh, int ow, size_t spatIndOff) { + size_t dStart, dEnd, hStart, hEnd, wStart, wEnd; + setBinBorders(&dStart, &dEnd, od, ID, OD); + setBinBorders(&hStart, &hEnd, oh, IH, OH); + setBinBorders(&wStart, &wEnd, ow, IW, OW); + auto binSize = (dEnd - dStart) * (hEnd - hStart) * (wEnd - wStart); + if (binSize == 0) + IE_THROW() << errorPrefix << "has empty bin"; + float sum = 0; + for (size_t pixD = dStart; pixD < dEnd; pixD++) { + for (size_t pixH = hStart; pixH < hEnd; pixH++) { + for (size_t pixW = wStart; pixW < wEnd; pixW++) { + float curr = srcData[pixD * inStrides[2] + pixH * inStrides[3] + pixW * inStrides[4]]; + sum = sum + curr; + } + } + } + *dstData = sum / binSize; + }; + + if (algorithm == Algorithm::AdaptivePoolingMax) { + pool = poolMax; + } else { + pool = poolAvg; + } + + parallel_for5d(N, blockCount, OD, OH, OW, + [&](int n, int blkIdx, int od, int oh, int ow) { + auto srcData = src + n * inStrides[0] + blkIdx * inStrides[1]; + auto dstData = dst + n * outStrides[0] + blkIdx * outStrides[1] + + od * outStrides[2] + oh * outStrides[3] + ow * outStrides[4]; + int cStart = 0, cEnd = C, inResidual = 0, outResidual = 0; + if (!isTailCFmt) { + cStart = blkIdx * blockSize; + cEnd = (blkIdx == blockCount - 1 ? C : cStart + blockSize); + } + for (int c = cStart; c < cEnd; c++) { + if (isTailCFmt) { + inResidual = c * inStrides[1]; + outResidual = c * outStrides[1]; + } else if (!isPlainFmt) { + inResidual = outResidual = c % blockSize; + } + pool(srcData + inResidual, dstData + outResidual, od, oh, ow, n * C + c); + }}); +} + +bool MKLDNNAdaptivePoolingNode::created() const { + return getType() == AdaptivePooling; +} + +void MKLDNNAdaptivePoolingNode::createPrimitive() {} + +inline void MKLDNNAdaptivePoolingNode::setBinBorders(size_t *startPtr, size_t *endPtr, size_t idx, size_t inputLength, size_t outputLength) { + *(startPtr) = idx * inputLength / outputLength; + *(endPtr) = ceil(static_cast((idx + 1) * inputLength) / outputLength); +} + +REG_MKLDNN_PRIM_FOR(MKLDNNAdaptivePoolingNode, AdaptivePooling) diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h new file mode 100644 index 00000000000..386e57f4dcf --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h @@ -0,0 +1,35 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include + +namespace MKLDNNPlugin { + +class MKLDNNAdaptivePoolingNode : public MKLDNNNode { +public: + MKLDNNAdaptivePoolingNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + + void getSupportedDescriptors() override; + void initSupportedPrimitiveDescriptors() override; + void createPrimitive() override; + void execute(mkldnn::stream strm) override; + bool created() const override; + + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + +private: + int spatialDimsCount; + InferenceEngine::Precision precision = InferenceEngine::Precision::FP32; + inline void setBinBorders(size_t *startPtr, size_t *endPtr, size_t idx, size_t inputLength, size_t outputLength); + + std::string errorPrefix; +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp index e2616f43c99..8700a70c5b6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp @@ -10,7 +10,7 @@ #include "utils/bfloat16.hpp" #include #include "mkldnn_batch_to_space_node.h" -#include +#include #include using namespace MKLDNNPlugin; @@ -67,32 +67,32 @@ void MKLDNNBatchToSpaceNode::initSupportedPrimitiveDescriptors() { if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end()) IE_THROW() << errorPrefix << " has unsupported precision: " << precision.name(); - addSupportedPrimDesc({{TensorDescCreatorTypes::nspc, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::nspc, precision}}, + addSupportedPrimDesc({{LayoutType::nspc, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::nspc, precision}}, impl_desc_type::ref_any); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::ncsp, precision}}, + addSupportedPrimDesc({{LayoutType::ncsp, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::ncsp, precision}}, impl_desc_type::ref_any); if (inDims[1] % 8 == 0) { - addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp8c, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::nCsp8c, precision}}, + addSupportedPrimDesc({{LayoutType::nCsp8c, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::nCsp8c, precision}}, impl_desc_type::ref_any); } if (inDims[1] % 16 == 0) { - addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp16c, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::nCsp16c, precision}}, + addSupportedPrimDesc({{LayoutType::nCsp16c, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::nCsp16c, precision}}, impl_desc_type::ref_any); } } @@ -112,15 +112,16 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto layout = getParentEdgeAt(0)->getDesc().getLayout(); - const bool blocked = layout != NCHW && layout != NCDHW && layout != NHWC && layout != NDHWC; + auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + + const bool blocked = srcDesc.hasLayoutType(LayoutType::nCsp8c) || srcDesc.hasLayoutType(LayoutType::nCsp16c); const auto dimsSize = inDims.size(); auto inShape5D = getShape5D(inDims); auto outShape5D = getShape5D(outDims); auto blockShape = getShape5D(blockShapeIn); - if (layout == NHWC || layout == NDHWC) { + if (srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5)) { inShape5D.push_back(inShape5D[1]); inShape5D.erase(inShape5D.begin() + 1); outShape5D.push_back(outShape5D[1]); @@ -129,9 +130,11 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { blockShape.erase(blockShape.begin() + 1); } - const size_t blockSize = blocked ? getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims().back() : 1lu; - const size_t blockCountInput = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1]; - const size_t blockCountOutput = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1]; + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + + const size_t blockSize = blocked ? dstDesc.getBlockDims().back() : 1lu; + const size_t blockCountInput = srcDesc.getBlockDims()[1]; + const size_t blockCountOutput = dstDesc.getBlockDims()[1]; const auto blockRemainder = inShape5D[1] % blockSize; const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder; @@ -166,7 +169,7 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - cropsBeginIn[2] : 0lu; bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx; oAdd[1] = bIdx % blockShapeIn[1] - cropsBeginIn[1]; - if (layout == NHWC || layout == NDHWC) { + if (srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5)) { oAdd.push_back(oAdd[1]); oAdd.erase(oAdd.begin() + 1); } @@ -221,12 +224,13 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { } void MKLDNNBatchToSpaceNode::execute(mkldnn::stream strm) { - switch (getParentEdgeAt(0)->getDesc().getPrecision().size()) { + switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().size()) { case 1: batchToSpaceKernel::value_type>(); break; case 2: batchToSpaceKernel::value_type>(); break; case 4: batchToSpaceKernel::value_type>(); break; default: - IE_THROW() << "BatchToSpace layer does not support precision '" + std::string(getParentEdgeAt(0)->getDesc().getPrecision().name()) + "'"; + IE_THROW() << "BatchToSpace layer does not support precision '" << + std::string(getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name()) << "'"; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h index cab89df7dc6..353ea634511 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h @@ -24,6 +24,10 @@ public: static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; +private: + template + void batchToSpaceKernel(); + private: InferenceEngine::SizeVector inDims; InferenceEngine::SizeVector outDims; @@ -31,9 +35,6 @@ private: std::vector cropsBeginIn; std::string errorPrefix; - - template - void batchToSpaceKernel(); }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp index 517066d6f32..183bc158ff2 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp @@ -942,16 +942,16 @@ void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << "has incorrect number of output edges"; - if (getParentEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getDims().ndims(); + if (getParentEdgeAt(0)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank(); } - if (getParentEdgeAt(1)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims(); + if (getParentEdgeAt(1)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); } - if (getChildEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims(); + if (getChildEdgeAt(0)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank(); } } @@ -961,7 +961,7 @@ void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() { setPostOps(attr); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(2); config.inConfs[0].constant = false; @@ -975,26 +975,38 @@ void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() { if (implType != impl_desc_type::ref) { // optimzed implementation - auto outputDataType = withBinarization ? memory::data_type::bin : memory::data_type::f32; - auto weiFormat = implType == impl_desc_type::jit_avx512 ? memory::format_tag::OIhw16o32i : memory::format_tag::OIhw8o32i; // auto weiFormat = implType == impl_desc_type::jit_avx512 ? memory::format_tag::OhIw16o32i : memory::format_tag::OhIw8o32i; - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), memory::data_type::bin, memory::format_tag::nhwc); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::bin, weiFormat); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nhwc); + //activation + auto nspcCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::nspc); + config.inConfs[0].desc = nspcCreator->createUniqueDesc(Precision::BIN, getParentEdgeAt(0)->getShape().getStaticDims()); + + //weights + size_t weiFirstDimBlockSize = implType == impl_desc_type::jit_avx512 ? 16 : 8; //memory::format_tag::OIhw16o32i : memory::format_tag::OIhw8o32i; + auto weiDims = getParentEdgeAt(1)->getShape().getStaticDims(); + std::vector weiBlockDims = {div_up(weiDims[0], weiFirstDimBlockSize), div_up(weiDims[1], 32), + weiDims[2], weiDims[3], weiFirstDimBlockSize, 32}; + std::vector weiOrder = {0, 1, 2, 3, 0, 1}; + + config.inConfs[1].desc = MKLDNNPlugin::make_unique(Precision::BIN, weiDims, weiBlockDims, weiOrder); + + //result + auto outputPrecision = withBinarization ? Precision::BIN : Precision::FP32; + config.outConfs[0].desc = nspcCreator->createUniqueDesc(outputPrecision, getChildEdgeAt(0)->getShape().getStaticDims()); if (withSum) { config.inConfs.push_back(config.outConfs[0]); config.outConfs[0].inPlace = 2; } - supportedPrimitiveDescriptors.push_back({config, implType, memory::format_tag::nhwc}); + supportedPrimitiveDescriptors.push_back({config, implType}); } else { // reference implementation - auto weiFormat = group > 1 ? memory::format_tag::goihw : memory::format_tag::oihw; + auto weiCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); + auto nspcCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::nspc); - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), memory::data_type::bin, memory::format_tag::nhwc); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::bin, weiFormat); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), memory::data_type::f32, memory::format_tag::nhwc); - supportedPrimitiveDescriptors.push_back({config, implType, memory::format_tag::nhwc}); + config.inConfs[0].desc = nspcCreator->createUniqueDesc(Precision::BIN, getParentEdgeAt(0)->getShape().getStaticDims()); + config.inConfs[1].desc = weiCreator->createUniqueDesc(Precision::BIN, getParentEdgeAt(1)->getShape().getStaticDims()); + config.outConfs[0].desc = nspcCreator->createUniqueDesc(Precision::FP32, getChildEdgeAt(0)->getShape().getStaticDims()); + supportedPrimitiveDescriptors.push_back({config, implType}); } } @@ -1003,11 +1015,9 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() { if (!selectedPrimitiveDescriptor) IE_THROW() << "CPU binary convolution with name '" << getName() << "' doesn't have primitive descriptors."; - auto config = selectedPrimitiveDescriptor->getConfig(); - - auto srcDims = config.inConfs[0].desc.getDims(); - auto weiDims = config.inConfs[1].desc.getDims(); - auto dstDims = config.outConfs[0].desc.getDims(); + auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto weiDims = getParentEdgeAt(1)->getShape().getStaticDims(); + auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); auto implType = selectedPrimitiveDescriptor->getImplementationType(); @@ -1061,9 +1071,12 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() { jcp.nb_oc_blocking = nstl::min(implType == impl_desc_type::jit_sse42 ? 2 : implType == impl_desc_type::jit_avx2 ? 4 : 6, jcp.nb_oc); - jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(config.outConfs[0].desc.getPrecision()); - jcp.typesize_in = config.inConfs[0].desc.getPrecision() == Precision::BIN ? 1 : config.inConfs[0].desc.getPrecision().size(); - jcp.typesize_out = config.outConfs[0].desc.getPrecision() == Precision::BIN ? 1 : config.outConfs[0].desc.getPrecision().size(); + auto srcPrecision = getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(); + auto dstPrecision = getChildEdgeAt(0)->getMemory().GetDesc().getPrecision(); + + jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(dstPrecision); + jcp.typesize_in = srcPrecision == Precision::BIN ? 1 : srcPrecision.size(); + jcp.typesize_out = dstPrecision == Precision::BIN ? 1 : dstPrecision.size(); int r_pad_no_tail = nstl::max(0, (jcp.ow - jcp.ur_w_tail - 1) * jcp.stride_w + (jcp.kw - 1) * (jcp.dilate_w + 1) - (jcp.iw + jcp.l_pad - 1)); @@ -1093,7 +1106,11 @@ bool MKLDNNBinaryConvolutionNode::canFuse(const MKLDNNNodePtr& node) const { return false; if (node->getType() == FakeQuantize) { - return node->getAlgorithm() == FQBinarization; + bool ret = node->getAlgorithm() == FQBinarization; + for (size_t i = 1; i < node->getParentEdges().size(); i++) { + ret &= node->getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() == 1; + } + return ret; } else { return canFuseSimpleOperation(node); } @@ -1277,30 +1294,28 @@ void MKLDNNBinaryConvolutionNode::execute(mkldnn::stream strm) { auto weights = reinterpret_cast(weightsMemory->GetPtr()); auto dst = reinterpret_cast(dstMemory->GetPtr()); + auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + std::vector srcStride(srcDesc.getStrides().size()); + for (int i = 0; i < srcStride.size(); i++) { + srcStride[srcDesc.getOrder()[i]] = srcDesc.getStrides()[i]; + } + + auto weiDesc = getParentEdgeAt(1)->getMemory().GetDescWithType(); + std::vector weightsStride(weiDesc.getShape().getRank()); + for (int i = 0; i < weightsStride.size(); i++) { + weightsStride[weiDesc.getOrder()[i]] = weiDesc.getStrides()[i]; + } + + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + std::vector dstStride(dstDesc.getStrides().size()); + for (int i = 0; i < dstStride.size(); i++) { + dstStride[dstDesc.getOrder()[i]] = dstDesc.getStrides()[i]; + } + auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) IE_THROW() << "CPU binary convolution with name '" << getName() << "' doesn't have primitive descriptors."; - auto config = selectedPrimitiveDescriptor->getConfig(); - - auto srcBlockDesc = config.inConfs[0].desc.getBlockingDesc(); - std::vector srcStride(srcBlockDesc.getStrides().size()); - for (int i = 0; i < srcStride.size(); i++) { - srcStride[srcBlockDesc.getOrder()[i]] = srcBlockDesc.getStrides()[i]; - } - - auto weiBlockDesc = config.inConfs[1].desc.getBlockingDesc(); - std::vector weightsStride(config.inConfs[1].desc.getDims().size()); - for (int i = 0; i < weightsStride.size(); i++) { - weightsStride[weiBlockDesc.getOrder()[i]] = weiBlockDesc.getStrides()[i]; - } - - auto dstBlockDesc = config.outConfs[0].desc.getBlockingDesc(); - std::vector dstStride(dstBlockDesc.getStrides().size()); - for (int i = 0; i < dstStride.size(); i++) { - dstStride[dstBlockDesc.getOrder()[i]] = dstBlockDesc.getStrides()[i]; - } - auto implType = selectedPrimitiveDescriptor->getImplementationType(); if (implType != impl_desc_type::ref) { executeOptimized(src, weights, dst, srcStride, weightsStride, dstStride); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp index 3d9815d48c1..ef9c14ad0d4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp @@ -10,7 +10,7 @@ #include "utils/bfloat16.hpp" #include #include "mkldnn_broadcast_node.h" -#include +#include #include #include "common/cpu_memcpy.h" @@ -60,18 +60,20 @@ void MKLDNNBroadcastNode::initSupportedPrimitiveDescriptors() { Precision prec = getOriginalInputPrecisionAtPort(BROADCAST_INPUT); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, prec}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}, - {{TensorDescCreatorTypes::ncsp, prec}}, + addSupportedPrimDesc({{LayoutType::ncsp, prec}, + {LayoutType::ncsp, Precision::I32}}, + {{LayoutType::ncsp, prec}}, impl_desc_type::ref_any); } void MKLDNNBroadcastNode::execute(mkldnn::stream strm) { - size_t shape_size = (getParentEdgeAt(BROADCAST_SHAPE)->getDesc().getDims())[0]; - SizeVector dst_dims = getChildEdgeAt(0)->getDesc().getDims(); - SizeVector src_dims = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getDims(); - SizeVector srcStrides = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getBlockingDesc().getStrides(); - size_t data_size = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getPrecision().size(); + size_t shape_size = (getParentEdgeAt(BROADCAST_SHAPE)->getMemory().GetDesc().getShape().getStaticDims())[0]; + SizeVector dst_dims = getChildEdgeAt(0)->getMemory().GetDesc().getShape().getStaticDims(); + SizeVector src_dims = getParentEdgeAt(BROADCAST_INPUT)->getMemory().GetDesc().getShape().getStaticDims(); + + auto srcDesc = getParentEdgeAt(BROADCAST_INPUT)->getMemory().GetDescWithType(); + SizeVector srcStrides = srcDesc.getStrides(); + size_t data_size = srcDesc.getPrecision().size(); if (!src_dims.size()) src_dims = SizeVector(1, 1); @@ -86,7 +88,8 @@ void MKLDNNBroadcastNode::execute(mkldnn::stream strm) { IE_THROW() << "Output tensor dimension is smaller then input tensor dimension"; } - InferenceEngine::SizeVector dstStrides = getChildEdgeAt(0)->getDesc().getBlockingDesc().getStrides(); + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + InferenceEngine::SizeVector dstStrides = dstDesc.getStrides(); InferenceEngine::SizeVector src_aligned(dst_dims.size()); InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size()); size_t prefix_size = dst_dims.size() - src_dims.size(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp index c6c327a1993..602f4954c3b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include #include @@ -82,9 +80,9 @@ void MKLDNNBucketizeNode::initSupportedPrimitiveDescriptors() { output_precision = Precision::I32; } - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, input_precision}, - {TensorDescCreatorTypes::ncsp, boundaries_precision}}, - {{TensorDescCreatorTypes::ncsp, output_precision}}, + addSupportedPrimDesc({{LayoutType::ncsp, input_precision}, + {LayoutType::ncsp, boundaries_precision}}, + {{LayoutType::ncsp, output_precision}}, impl_desc_type::ref_any); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp index 4990a658d61..2907a035788 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp @@ -21,7 +21,8 @@ #include "mkldnn_eltwise_node.h" #include #include "common/cpu_memcpy.h" -#include "common/tensor_desc_creator.h" +#include "common/blocked_desc_creator.h" +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -61,19 +62,19 @@ MKLDNNConcatNode::MKLDNNConcatNode(const std::shared_ptr& op, cons } void MKLDNNConcatNode::getSupportedDescriptors() { - auto& firstParentDims = getParentEdgeAt(0)->getDims(); + auto& firstParentDims = getParentEdgeAt(0)->getShape().getStaticDims(); for (size_t i = 1; i < getParentEdges().size(); i++) { - auto& dims = getParentEdgeAt(i)->getDims(); + auto& dims = getParentEdgeAt(i)->getShape().getStaticDims(); bool incorrectDims = false; - for (size_t j = 0; j < firstParentDims.ndims(); j++) { + for (size_t j = 0; j < firstParentDims.size(); j++) { if (j == axis) continue; - if (dims.ndims() != firstParentDims.ndims() || firstParentDims[j] != dims[j]) { + if (dims.size() != firstParentDims.size() || firstParentDims[j] != dims[j]) { incorrectDims = true; break; } } - if (incorrectDims || firstParentDims.ndims() == 0) { + if (incorrectDims || firstParentDims.size() == 0) { IE_THROW() << "Incorrect input dimensions for concat node " << getName(); } } @@ -100,19 +101,19 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { // Concat supports only equal precisions for inputs and output outputPrecision = inputPrecision; - auto& dstDims = getChildEdgeAt(0)->getDims(); - std::vector tdCreatorTypes = {TensorDescCreatorTypes::ncsp, TensorDescCreatorTypes::nspc}; + auto& dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + std::vector tdCreatorTypes = {LayoutType::ncsp, LayoutType::nspc}; // check if blocked layouts are available the channels size should be evenly divided by the block size to avoid slow oneDNN ref implementation - if (dstDims.ndims() > channelAxis) { - for (auto item : { std::make_pair(8lu, TensorDescCreatorTypes::nCsp8c), std::make_pair(16lu, TensorDescCreatorTypes::nCsp16c)}) { - SizeVector blkDims = dstDims.ToSizeVector(); + if (dstDims.size() > channelAxis) { + for (auto item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c)}) { + SizeVector blkDims = dstDims; if (blkDims[channelAxis] % item.first) continue; bool blocked = true; for (size_t i = 0; i < getParentEdges().size(); i++) { - auto& srcDims = getParentEdgeAt(i)->getDims(); + auto& srcDims = getParentEdgeAt(i)->getShape().getStaticDims(); if (srcDims[channelAxis] % item.first) { blocked = false; break; @@ -126,28 +127,27 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { std::vector pdIndexesToReuse; - auto& creatorsMap = TensorDescCreator::getCommonCreators(); - auto itrRange = TensorDescCreator::makeFilteredRange(creatorsMap, static_cast(dstDims.ndims()), tdCreatorTypes); + auto& creatorsMap = BlockedDescCreator::getCommonCreators(); + auto itrRange = BlockedDescCreator::makeFilteredRange(creatorsMap, static_cast(dstDims.size()), tdCreatorTypes); for (auto itr = itrRange.first; itr != itrRange.second; ++itr) { - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.outConfs.resize(1); config.outConfs[0].inPlace = -1; config.outConfs[0].constant = false; - config.outConfs[0].desc = itr->second->createDesc(outputPrecision, dstDims.ToSizeVector()); - memory::format_tag outFmt = MKLDNNMemoryDesc(config.outConfs[0].desc).getFormat(); + config.outConfs[0].desc = itr->second->createUniqueDesc(outputPrecision, dstDims); config.inConfs.resize(getParentEdges().size()); for (size_t i = 0; i < getParentEdges().size(); ++i) { config.inConfs[i].inPlace = -1; config.inConfs[i].constant = false; - config.inConfs[i].desc = MKLDNNExtensionUtils::getUninitTensorDesc( - itr->second->createDesc(inputPrecision, getParentEdgeAt(i)->getDims().ToSizeVector())); + config.inConfs[i].desc = MemoryDescUtils::applyUndefinedOffset( + itr->second->createDesc(inputPrecision, getParentEdgeAt(i)->getShape().getStaticDims())); } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, outFmt); - if (itr->first != TensorDescCreatorTypes::nspc) { + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); + if (itr->first != LayoutType::nspc) { pdIndexesToReuse.push_back(supportedPrimitiveDescriptors.size() - 1); } } @@ -161,8 +161,8 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig(); auto config = refConfig; - const auto& order = refConfig.outConfs[0].desc.getBlockingDesc().getOrder(); - const auto& blkDims = refConfig.outConfs[0].desc.getBlockingDesc().getBlockDims(); + const auto &order = refConfig.outConfs[0].desc->as()->getOrder(); + const auto &blkDims = refConfig.outConfs[0].desc->as()->getBlockDims(); auto numOfDim = blkDims.size(); SizeVector offsets(numOfDim, 0lu); @@ -178,17 +178,16 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { } } - config.outConfs[0].desc = TensorDesc(outputPrecision, dstDims.ToSizeVector(), {blkDims, order, offset, offsets, strides}); - memory::format_tag outFmt = MKLDNNMemoryDesc(config.outConfs[0].desc).getFormat(); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(outputPrecision, dstDims, blkDims, order, offset, offsets, strides); for (size_t i = 0; i < getParentEdges().size(); i++) { - const auto& srcBlkDims = refConfig.inConfs[i].desc.getBlockingDesc().getBlockDims(); - const auto& dims = refConfig.inConfs[i].desc.getDims(); + const auto& srcBlkDims = refConfig.inConfs[i].desc->as()->getBlockDims(); + const auto& dims = refConfig.inConfs[i].desc->getShape().getStaticDims(); config.inConfs[i].inPlace = 0; - config.inConfs[i].desc = TensorDesc(inputPrecision, dims, {srcBlkDims, order, offset, offsets, strides}); + config.inConfs[i].desc = MKLDNNPlugin::make_unique(inputPrecision, dims, srcBlkDims, order, offset, offsets, strides); } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFmt); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } } @@ -210,7 +209,9 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() { canOptimize = false; } - std::map formatFrequency; + std::map formatFrequency; + std::vector supportedLayouts = {LayoutType::ncsp, LayoutType::nspc, LayoutType::nCsp8c, LayoutType::nCsp16c}; + for (size_t i = 0; i < getParentEdges().size(); i++) { auto parentEdge = getParentEdgeAt(i); auto parent = parentEdge->getParent(); @@ -224,10 +225,11 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() { if (outputIndex < 0 || outputIndex >= parent_config.outConfs.size()) IE_THROW() << "Cannot find index of output node"; const auto &port_desc = parent_config.outConfs[outputIndex].desc; - if (port_desc.getLayout() == Layout::ANY) - continue; - auto partial_format_desc = PartialBlkDesc::extractFrom(port_desc); - formatFrequency[partial_format_desc] += 1; + for (auto& item : supportedLayouts) { + if (port_desc->hasLayoutType(item)) { + formatFrequency[item] += 1; + } + } } for (size_t i = 0; i < getChildEdges().size(); i++) { auto childEdge = getChildEdgeAt(i); @@ -241,37 +243,47 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() { if (inputIndex < 0 || inputIndex >= config.inConfs.size()) IE_THROW() << "Cannot find index of output node"; const auto &port_desc = config.inConfs[inputIndex].desc; - if (port_desc.getLayout() == Layout::ANY) - continue; - auto partial_format_desc = PartialBlkDesc::extractFrom(port_desc); - formatFrequency[partial_format_desc] += 1; + for (auto& item : supportedLayouts) { + if (port_desc->hasLayoutType(item)) { + formatFrequency[item] += 1; + } + } } size_t maxCount = 0; - auto outDims = getChildEdgeAt(0)->getDims().ToSizeVector(); - auto convertTo = PartialBlkDesc::makePlain(outDims); + auto outDims = getChildEdgeAt(0)->getShape().getStaticDims(); + LayoutType convertTo = LayoutType::ncsp; for (auto &it : formatFrequency) { if (it.second > maxCount) { maxCount = it.second; convertTo = it.first; } else if (it.second == maxCount) { - if (isInQuantizedGraph && it.first == PartialBlkDesc::makeTailC(outDims)) { + if (isInQuantizedGraph && it.first == LayoutType::nspc) { convertTo = it.first; - } else if (it.first == PartialBlkDesc::makeCBlocked(outDims, 8) || it.first == PartialBlkDesc::makeCBlocked(outDims, 16)) { + } else if (it.first == LayoutType::nCsp8c || it.first == LayoutType::nCsp16c) { convertTo = it.first; } } } - if (convertTo.isAutoExtendedWith(outDims)) - convertTo = PartialBlkDesc::makePlain(outDims); - for (size_t i = 0; i < getParentEdges().size(); i++) { - if (convertTo.isAutoExtendedWith(getParentEdgeAt(i)->getDims().ToSizeVector())) - convertTo = PartialBlkDesc::makePlain(outDims); + for (auto& item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c) }) { + if (convertTo == item.second) { + if (outDims[1] % item.first != 0) { + convertTo = LayoutType::ncsp; + break; + } + for (size_t i = 0; i < getParentEdges().size(); i++) { + auto& inpDims = getParentEdgeAt(i)->getShape().getStaticDims(); + if (inpDims[1] % item.first != 0) { + convertTo = LayoutType::ncsp; + break; + } + } + } } for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); ++i) { - if (PartialBlkDesc::extractFrom(supportedPrimitiveDescriptors[i].getConfig().outConfs[0].desc) == convertTo) { + if (supportedPrimitiveDescriptors[i].getConfig().outConfs[0].desc->hasLayoutType(convertTo)) { if (IMPLICATION(supportedPrimitiveDescriptors[i].getImplementationType() == impl_desc_type::unknown, canOptimize)) { canSelectPrimitive.push_back(i); } @@ -283,7 +295,7 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() { return; } - // if there are more then one PD with similar data layouts - select the optimized one + // if there are more than one PD with similar data layouts - select the optimized one for (auto indx : canSelectPrimitive) { if (supportedPrimitiveDescriptors[indx].getImplementationType() == impl_desc_type::unknown) { selectPrimitiveDescriptorByIndex(static_cast(indx)); @@ -321,7 +333,7 @@ void MKLDNNConcatNode::createPrimitive() { IE_THROW() << "Preferable primitive descriptor is not set."; //check if selected Tensor descriptor has nspc layout and concat axis is C - if (axis == channelAxis && getChildEdgeAt(0)->getMemory().GetDesc().isTailCFormat()) { + if (axis == channelAxis && getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { canOptimizeNspc = true; return; } @@ -337,8 +349,8 @@ void MKLDNNConcatNode::createPrimitive() { } auto desc = srcMemPtr->GetDescriptor(); - auto dims = getParentEdgeAt(i)->getDims(); - for (size_t j = 0; j < dims.ndims(); j++) { + auto& dims = getParentEdgeAt(i)->getShape().getStaticDims(); + for (size_t j = 0; j < dims.size(); j++) { desc.data.dims[j] = dims[j]; } @@ -346,8 +358,8 @@ void MKLDNNConcatNode::createPrimitive() { } auto desc = getChildEdgeAt(0)->getMemory().GetDescriptor(); - auto dims = getChildEdgeAt(0)->getDims(); - for (size_t i = 0; i < dims.ndims(); i++) { + auto& dims = getChildEdgeAt(0)->getShape().getStaticDims(); + for (size_t i = 0; i < dims.size(); i++) { desc.data.dims[i] = dims[i]; desc.data.padded_dims[i] = dims[i]; } @@ -370,79 +382,77 @@ void MKLDNNConcatNode::initOptimalPrimitiveDescriptor() { if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; - if (!isOptimized()) { + if (!isOptimized()) { + MKLDNNNode::initOptimalPrimitiveDescriptor(); auto config = selected_pd->getConfig(); - if (!isInitConfig(config)) { + if (!isConfigDefined(config)) { for (size_t i = 0; i < config.inConfs.size(); i++) { - config.inConfs[i].desc = getConfiguredInputDesc(config, i); + config.inConfs[i].desc = getDefinedInputDesc(config, i); // Concat doesn't support different precision on inputs - config.inConfs[i].desc.setPrecision(inputPrecision); + config.inConfs[i].desc->setPrecision(inputPrecision); } for (size_t i = 0; i < config.outConfs.size(); i++) { - config.outConfs[i].desc = getConfiguredOutputDesc(config, i); - config.outConfs[i].desc.setPrecision(outputPrecision); + config.outConfs[i].desc = getDefinedOutputDesc(config, i); + config.outConfs[i].desc->setPrecision(outputPrecision); } initDescriptor(config); } - - return; } auto config = selected_pd->getConfig(); - if (isInitConfig(config)) + if (isConfigDefined(config)) return; for (size_t i = 0; i < config.outConfs.size(); i++) { - if (!isUninitTensorDesc(config.outConfs[i].desc)) + if (config.outConfs[i].desc->isDefined()) continue; int num = getChildEdgeAt(i)->getOutputNum(); if (num >= 0) { auto childConf = getChildEdgeAt(i)->getChild()->getSelectedPrimitiveDescriptor()->getConfig().inConfs[num]; - childConf.desc.setPrecision(config.outConfs[i].desc.getPrecision()); + childConf.desc->setPrecision(config.outConfs[i].desc->getPrecision()); if (getChildEdgeAt(i)->getChild()->getSelectedPrimitiveDescriptor()) { - if (isUninitTensorDesc(childConf.desc) && childConf.inPlace >= 0) + if (!childConf.desc->isDefined() && childConf.inPlace >= 0) getChildEdgeAt(i)->getChild()->initOptimalPrimitiveDescriptor(); - if (!isUninitTensorDesc(childConf.desc) && - MKLDNNExtensionUtils::initTensorsAreEqual(childConf.desc, config.outConfs[i].desc)) { - config.outConfs[i].desc = childConf.desc; + if (childConf.desc->isDefined() && childConf.desc->isCompatible(*config.outConfs[i].desc)) { + config.outConfs[i].desc = childConf.desc->clone(); continue; } } } - config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(), - config.outConfs[i].desc.getDims(), { - config.outConfs[i].desc.getBlockingDesc().getBlockDims(), - config.outConfs[i].desc.getBlockingDesc().getOrder() - }); + + // reset undefined offsets + config.outConfs[i].desc = MemoryDescUtils::resetOffset(config.outConfs[i].desc.get()); } + auto firstOutBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.outConfs[0].desc); size_t offset = 0; for (size_t i = 0; i < config.inConfs.size(); i++) { - config.inConfs[i].desc = InferenceEngine::TensorDesc(config.inConfs[i].desc.getPrecision(), - config.inConfs[i].desc.getDims(), { - config.inConfs[i].desc.getBlockingDesc().getBlockDims(), - config.inConfs[i].desc.getBlockingDesc().getOrder(), - config.outConfs[0].desc.getBlockingDesc().getOffsetPadding() + offset, - config.outConfs[0].desc.getBlockingDesc().getOffsetPaddingToData(), - config.outConfs[0].desc.getBlockingDesc().getStrides() - }); + auto inpBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[i].desc); + config.inConfs[i].desc = MKLDNNPlugin::make_unique(inpBlockingDesc.getPrecision(), + inpBlockingDesc.getShape().getStaticDims(), + inpBlockingDesc.getBlockDims(), + inpBlockingDesc.getOrder(), + firstOutBlockingDesc.getOffsetPadding() + offset, + firstOutBlockingDesc.getOffsetPaddingToData(), + firstOutBlockingDesc.getStrides()); size_t axisSize = 1; - if (config.inConfs[0].desc.getLayout() == Layout::NHWC) { - // This is more general and works for any "direct" Layout (such as nchw or nhwc), but it doesn't work for nchw8c - size_t realAxis = inverseOrder(config.inConfs[0].desc.getBlockingDesc().getOrder(), axis); - for (size_t j = realAxis; j < config.inConfs[i].desc.getBlockingDesc().getBlockDims().size(); j++) { - size_t jj = config.inConfs[0].desc.getBlockingDesc().getOrder()[j]; - axisSize *= config.inConfs[i].desc.getBlockingDesc().getBlockDims()[jj]; + auto firstInpBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[0].desc); + if (firstInpBlockingDesc.hasLayoutType(LayoutType::nspc)) { + // This is more general and works for any "direct" Layout (such as nchw or nhwc), but it doesn't work for blocked + size_t realAxis = inverseOrder(firstInpBlockingDesc.getOrder(), axis); + for (size_t j = realAxis; j < inpBlockingDesc.getBlockDims().size(); j++) { + size_t jj = firstInpBlockingDesc.getOrder()[j]; + axisSize *= inpBlockingDesc.getBlockDims()[jj]; } } else { // This works for nchw and nchw8c/nchw16c - for (size_t j = axis; j < config.inConfs[i].desc.getBlockingDesc().getBlockDims().size(); j++) { - axisSize *= config.inConfs[i].desc.getBlockingDesc().getBlockDims()[j]; + for (size_t j = axis; j < inpBlockingDesc.getBlockDims().size(); j++) { + axisSize *= inpBlockingDesc.getBlockDims()[j]; } } offset += axisSize; @@ -470,7 +480,7 @@ void MKLDNNConcatNode::execute(mkldnn::stream strm) { } InferenceEngine::Precision MKLDNNConcatNode::getRuntimePrecision() const { - return MKLDNNExtensionUtils::getMaxPrecision(getInputPrecisions()); + return getMaxPrecision(getInputPrecisions()); } void MKLDNNConcatNode::execNspcSpecCase() { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp index 36de12e94d9..4bff8260c79 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp @@ -18,6 +18,7 @@ #include #include #include "common/cpu_convert.h" +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -68,7 +69,7 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr biasesDims = { groupOC }; for (int i = 0; i < convolutionOp->get_strides().size(); i++) { - stride.push_back(static_cast(convolutionOp->get_strides()[i])); + stride.push_back(convolutionOp->get_strides()[i]); } for (int i = 0; i < convolutionOp->get_dilations().size(); i++) { dilation.push_back(static_cast(convolutionOp->get_dilations()[i]) - 1); @@ -90,7 +91,7 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr biasesDims = {groupOC * groupNum}; for (int i = 0; i < groupConvolutionOp->get_strides().size(); i++) { - stride.push_back(static_cast(groupConvolutionOp->get_strides()[i])); + stride.push_back(groupConvolutionOp->get_strides()[i]); } for (int i = 0; i < groupConvolutionOp->get_dilations().size(); i++) { dilation.push_back(static_cast(groupConvolutionOp->get_dilations()[i]) - 1); @@ -142,17 +143,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { (withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Input) : true); } - if (isWinograd()) { - internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { - return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0)); - }); - internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { - if (!withBiases) - return MKLDNNMemoryDesc(); - return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(1)); - }); - } - withSum = false; int expectedInputEdgesNum = static_cast(getOriginalInputsNumber()); for (int i = 0; i < fusedWith.size(); i++) { @@ -169,36 +159,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { } } - // we can't convert winograd memory descriptor to TensorDesc, so we removed weight and bias edges and put data into internalBlobs - if (isWinograd()) { - std::vector edgesToRemove; - internalBlobs.push_back(createInternalBlob(weightDims, 1, isGrouped)); - edgesToRemove.push_back(getParentEdgeAt(1)); - - if (withBiases) { - internalBlobs.push_back(createInternalBlob(biasesDims, 2)); - edgesToRemove.push_back(getParentEdgeAt(2)); - } - - if (expectedInputEdgesNum - getOriginalInputsNumber() > 0) { - size_t reconnectPort = 1; - for (size_t startPort = 2 + (withBiases ? 1 : 0); startPort < expectedInputEdgesNum; startPort++) { - getParentEdgeAt(startPort)->setChildPort(reconnectPort); - reconnectPort++; - } - } - - for (size_t i = 0; i < edgesToRemove.size(); i++) { - removeEdge(edgesToRemove[i]); - } - - expectedInputEdgesNum -= getOriginalInputsNumber() - 1; - if (withBiases) { - inDims.erase(inDims.begin() + 2); - } - inDims.erase(inDims.begin() + 1); - } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0)); if (!inputZeroPoints.empty()) inputDataType = memory::data_type::u8; @@ -229,11 +189,12 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { } if (getParentEdges().size() != expectedInputEdgesNum) - IE_THROW() << "Incorrect number of input edges for layer " << getName(); + IE_THROW() << "Incorrect number of input edges for layer " << getName() << ", expected: " << expectedInputEdgesNum + << " actual: " << getParentEdges().size(); if (getChildEdges().empty()) IE_THROW() << "Incorrect number of output edges for layer " << getName(); - int ndims = getParentEdgesAtPort(0)[0]->getDims().ndims(); + int ndims = getParentEdgesAtPort(0)[0]->getShape().getRank(); MKLDNNDims weightsDims = MKLDNNDims(weightDims); withDWConv = isFusedWith(Convolution); @@ -241,10 +202,14 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { for (int i = 0; i < fusedWith.size(); i++) { auto *convolutionNode = dynamic_cast(fusedWith[i].get()); if (convolutionNode) { - dw_conv_ih = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 2]; - dw_conv_iw = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 1]; - dw_conv_oc = convolutionNode->outDims[0][1]; - const auto &dwWeightsDims = convolutionNode->inDims[1].ToSizeVector(); + auto& inActivationDims = convolutionNode->inputShapes[0].getStaticDims(); + dw_conv_ih = inActivationDims[convolutionNode->inputShapes[0].getRank() - 2]; + dw_conv_iw = inActivationDims[convolutionNode->inputShapes[0].getRank() - 1]; + + auto& outDims = convolutionNode->outputShapes[0].getStaticDims(); + dw_conv_oc = outDims[1]; + + const auto &dwWeightsDims = convolutionNode->inputShapes[1].getStaticDims(); dw_conv_kernel.push_back(dwWeightsDims[dwWeightsDims.size() - 1]); dw_conv_kernel.push_back(dwWeightsDims[dwWeightsDims.size() - 2]); dw_conv_strides = convolutionNode->getStride(); @@ -262,8 +227,8 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { for (int j = 0; j < paddingR.size(); j++) { int with_group = isGrouped ? 1 : 0; int krn = weightsDims[with_group + 2 + j]; - int src = getParentEdgeAt(0)->getDims()[2 + j]; - int dst = getChildEdgeAt(0)->getDims()[2 + j]; + int src = getParentEdgeAt(0)->getShape().getStaticDims()[2 + j]; + int dst = getChildEdgeAt(0)->getShape().getStaticDims()[2 + j]; krn = (krn - 1)*(dilation[j] + 1) + 1; int calc_dst = (src - krn + paddingL[j]) / stride[j] + 1; @@ -272,18 +237,18 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { } } - MKLDNNMemoryDesc in_candidate, out_candidate; + MemoryDescPtr in_candidate, out_candidate; if (canBeExecutedInInt8()) { // We have to extend convolution_x8s8s32x from oneDNN to support BF16 output data type if (outputDataType == memory::data_type::bf16) outputDataType = memory::data_type::f32; if (eltwisePrecision == Precision::BF16) eltwisePrecision = Precision::FP32; - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ndims == 5 ? memory::format_tag::ndhwc - : memory::format_tag::nhwc); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, ndims == 5 ? memory::format_tag::ndhwc - : memory::format_tag::nhwc); - createDescriptor({in_candidate}, {out_candidate}); + in_candidate = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), + inputDataType, ndims == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc); + out_candidate = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), + outputDataType, ndims == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } else { inputDataType = (getOriginalInputPrecisionAtPort(0) == Precision::BF16 && !(isDepthWise() && ndims == 5)) ? memory::data_type::bf16 : memory::data_type::f32; @@ -320,33 +285,36 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { memory::format_tag nCsp16c = ndims == 4 ? memory::format_tag::nChw16c : memory::format_tag::nCdhw16c; memory::format_tag nCsp8c = ndims == 4 ? memory::format_tag::nChw8c : memory::format_tag::nCdhw8c; + auto inputDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto outputDims = getChildEdgeAt(0)->getShape().getStaticDims(); + if (IC == 1 && groupOC == 1) { - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ncsp); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, ncsp); - createDescriptor({in_candidate}, {out_candidate}); + in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, ncsp); + out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, ncsp); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } else if (IC < 4) { - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ncsp); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp16c); - createDescriptor({in_candidate}, {out_candidate}); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp8c); - createDescriptor({in_candidate}, {out_candidate}); + in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, ncsp); + out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp16c); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp8c); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } else { - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, nCsp16c); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp16c); - createDescriptor({in_candidate}, {out_candidate}); - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, nCsp8c); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp8c); - createDescriptor({in_candidate}, {out_candidate}); + in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, nCsp16c); + out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp16c); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, nCsp8c); + out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp8c); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ncsp); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, ncsp); - createDescriptor({in_candidate}, {out_candidate}); + in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, ncsp); + out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, ncsp); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); if (inputDataType != memory::data_type::bf16 && isNspcAvailable()) { - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, nspc); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nspc); - createDescriptor({in_candidate}, {out_candidate}); + in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, nspc); + out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nspc); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } } } @@ -421,15 +389,18 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { continue; auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr); while (static_cast(itpd)) { - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; for (size_t i = 0; i < descInputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = getSrcMemDesc(itpd, i); - if (!isGrouped) - dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc); + auto srcDesc = getSrcMemDesc(itpd, i); + if (isGrouped || srcDesc->getFormatKind() != dnnl_format_kind_t::dnnl_blocked) + dataConfig.desc = std::move(srcDesc); + else + dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*srcDesc); + config.inConfs.push_back(dataConfig); } @@ -437,34 +408,38 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32); auto biasPrc = memory::data_type::f32; - MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); - MKLDNNDims dwBiasesDims({dw_conv_oc}); + std::vector dwWeightsDims({dw_conv_oc, 1, 1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); + std::vector dwBiasesDims({dw_conv_oc}); - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNMemoryDesc(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); + dataConfig.desc = MKLDNNPlugin::make_unique(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); config.inConfs.push_back(dataConfig); - dataConfig.desc = MKLDNNMemoryDesc(dwBiasesDims, biasPrc, memory::format_tag::x); + dataConfig.desc = MKLDNNPlugin::make_unique(dwBiasesDims, biasPrc, memory::format_tag::x); config.inConfs.push_back(dataConfig); } for (size_t i = 0; i < descOutputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; if (withSum) { dataConfig.inPlace = getParentEdges().size() - 1; } dataConfig.constant = false; - dataConfig.desc = getDstMemDesc(itpd, i); - if (!isGrouped) - dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc); + + auto dstDesc = getDstMemDesc(itpd, i); + if (isGrouped || dstDesc->getFormatKind() != dnnl_format_kind_t::dnnl_blocked) + dataConfig.desc = std::move(dstDesc); + else + dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*dstDesc); + config.outConfs.push_back(dataConfig); if (withSum) { dataConfig.inPlace = -1; - dataConfig.desc.setPrecision(eltwisePrecision); + dataConfig.desc->setPrecision(eltwisePrecision); config.inConfs.push_back(dataConfig); } } @@ -505,9 +480,10 @@ bool MKLDNNConvolutionNode::created() const { return getType() == Convolution; } -void MKLDNNConvolutionNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0]; +void MKLDNNConvolutionNode::createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) { + auto inDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); + auto outDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]); memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision()); memory::data_type bdt = memory::data_type::f32; @@ -516,12 +492,9 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector algorithms; @@ -533,17 +506,17 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector conv_desc; if (withBiases) { - MKLDNNMemoryDesc bias_candidate{blocked_biasesDims, bdt, memory::format_tag::any}; + mkldnn::memory::desc bias_candidate(blocked_biasesDims, bdt, memory::format_tag::any); conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg, - in_candidate, wgh_candidate, bias_candidate, out_candidate, + inDesc, wgh_candidate, bias_candidate, outDesc, mkldnn::memory::dims(stride.begin(), stride.end()), mkldnn::memory::dims(dilation.begin(), dilation.end()), mkldnn::memory::dims(paddingL.begin(), paddingL.end()), mkldnn::memory::dims(paddingR.begin(), paddingR.end()))); } else { conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg, - in_candidate, wgh_candidate, out_candidate, + inDesc, wgh_candidate, outDesc, mkldnn::memory::dims(stride.begin(), stride.end()), mkldnn::memory::dims(dilation.begin(), dilation.end()), mkldnn::memory::dims(paddingL.begin(), paddingL.end()), @@ -569,7 +542,7 @@ void MKLDNNConvolutionNode::addZeroPoints(mkldnn::primitive_attr& attr) const { } } -void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& config) { +void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) { auto* selectedPD = getSelectedPrimitiveDescriptor(); if (!selectedPD) { return; @@ -589,14 +562,14 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c // } if (isStridedBlobsSupported) { - createDescriptor({config.inConfs[0].desc}, {config.outConfs[0].desc}); + createDescriptor({config.inConfs[0].desc.get()}, {config.outConfs[0].desc.get()}); } mkldnn::primitive_attr attr; addZeroPoints(attr); setPostOps(attr); - InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig(); + auto rightConfig = selectedPD->getConfig(); size_t selected_count = 0; bool containJitImpl = false; @@ -607,10 +580,10 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c continue; auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr); while (static_cast(itpd)) { - InferenceEngine::LayerConfig cfg; + NodeConfig cfg; cfg.dynBatchSupport = true; for (size_t j = 0; j < descInputNumbers(desc); j++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; dataConfig.desc = getSrcMemDesc(itpd, j); @@ -621,27 +594,27 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32); auto biasPrc = memory::data_type::f32; - MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); - MKLDNNDims dwBiasesDims({dw_conv_oc}); + std::vector dwWeightsDims({dw_conv_oc, 1, 1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); + std::vector dwBiasesDims({dw_conv_oc}); - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNMemoryDesc(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); + dataConfig.desc = MKLDNNPlugin::make_unique(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); cfg.inConfs.push_back(dataConfig); - dataConfig.desc = MKLDNNMemoryDesc(dwBiasesDims, biasPrc, memory::format_tag::x); + dataConfig.desc = MKLDNNPlugin::make_unique(dwBiasesDims, biasPrc, memory::format_tag::x); cfg.inConfs.push_back(dataConfig); } for (size_t j = 0; j < descOutputNumbers(desc); j++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; dataConfig.desc = getDstMemDesc(itpd, j); if (withSum) { auto eltwiseConfig = dataConfig; - eltwiseConfig.desc.setPrecision(eltwisePrecision); + eltwiseConfig.desc->setPrecision(eltwisePrecision); cfg.inConfs.push_back(eltwiseConfig); dataConfig.inPlace = getParentEdges().size() - 1; } @@ -668,7 +641,7 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c break; } } - selectedPD->getConfig() = rightConfig; + selectedPD->setConfig(rightConfig); } void MKLDNNConvolutionNode::filterSupportedPrimitiveDescriptors() { @@ -729,44 +702,17 @@ bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) c auto dstMemDesc = MKLDNNMemoryDesc {convDesc->data.dst_desc}; auto srcDataType = convDesc->data.src_desc.data_type; auto dstDataType = convDesc->data.dst_desc.data_type; - bool isPlanarFloatConv = srcMemDesc.isPlainFormat() - && dstMemDesc.isPlainFormat() + bool isPlanarFloatConv = srcMemDesc.hasLayoutType(LayoutType::ncsp) + && dstMemDesc.hasLayoutType(LayoutType::ncsp) && srcDataType == memory::data_type::f32 && dstDataType == memory::data_type::f32; return !isPossibleJitPlanar && isPlanarFloatConv; } -MKLDNNMemoryDesc MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - InferenceEngine::TensorDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) - : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); - - if (desc.getLayout() == InferenceEngine::Layout::ANY) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - } else { - if (getParentEdgeAt(idx)->getDims().ToSizeVector().size() != *std::max_element(desc.getBlockingDesc().getOrder().begin(), - desc.getBlockingDesc().getOrder().end()) + 1) { - auto old_dims = getParentEdgeAt(idx)->getDims().ToSizeVector(); - auto new_dims = InferenceEngine::SizeVector({groupNum, div_up(old_dims[0], groupNum)}); - for (int i = 1; i < old_dims.size(); i++) { - new_dims.push_back(old_dims[i]); - } - - auto td = InferenceEngine::TensorDesc(desc.getPrecision(), - new_dims, - desc.getBlockingDesc()); - if (new_dims.size() == desc.getBlockingDesc().getBlockDims().size()) { - td.setLayout(BLOCKED); - } - return MKLDNNMemoryDesc(td); - } else { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); - } - } +std::unique_ptr MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); + return MKLDNNPlugin::make_unique(std::move(desc)); } bool MKLDNNConvolutionNode::canFuse(const MKLDNNNodePtr& node) const { @@ -774,11 +720,11 @@ bool MKLDNNConvolutionNode::canFuse(const MKLDNNNodePtr& node) const { } const mkldnn::memory& MKLDNNConvolutionNode::getWeights() const { - return isWinograd() ? internalBlobMemory[0]->GetPrimitive() : getParentEdgeAt(1)->getMemory().GetPrimitive(); + return getParentEdgeAt(1)->getMemory().GetPrimitive(); } const mkldnn::memory& MKLDNNConvolutionNode::getBias() const { - return isWinograd() ? internalBlobMemory[1]->GetPrimitive() : getParentEdgeAt(2)->getMemory().GetPrimitive(); + return getParentEdgeAt(2)->getMemory().GetPrimitive(); } InferenceEngine::Precision MKLDNNConvolutionNode::getRuntimePrecision() const { @@ -792,7 +738,7 @@ InferenceEngine::Precision MKLDNNConvolutionNode::getRuntimePrecision() const { } } - return MKLDNNExtensionUtils::getMaxPrecision(inputPrecisions); + return getMaxPrecision(inputPrecisions); } bool MKLDNNConvolutionNode::isNspcAvailable() const { @@ -809,8 +755,8 @@ bool MKLDNNConvolutionNode::isNspcAvailable() const { } // A bunch of heuristics are designed to cut off not optimal nspc convolution applications - auto inpDims = getParentEdgeAt(0)->getDims().ToSizeVector(); - auto outDims = getChildEdgeAt(0)->getDims().ToSizeVector(); + auto inpDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto outDims = getChildEdgeAt(0)->getShape().getStaticDims(); auto ndims = inpDims.size(); if (isDepthWise()) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h index 79b4aef029e..7fa5ed80bb8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h @@ -20,9 +20,9 @@ public: static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; - void initDescriptor(const InferenceEngine::LayerConfig& config) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; + void initDescriptor(const NodeConfig& config) override; void createPrimitive() override; void selectOptimalPrimitiveDescriptor() override; void initSupportedPrimitiveDescriptors() override; @@ -32,13 +32,13 @@ public: return false; } InferenceEngine::Precision getRuntimePrecision() const override; - MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; const mkldnn::memory& getWeights() const; const mkldnn::memory& getBias() const; size_t descInputNumbers(MKLDNNDescriptor desc) override { - return static_cast(isWinograd() ? 1 : getOriginalInputsNumber()); + return getOriginalInputsNumber(); } bool canBeExecutedInInt8() const; @@ -49,7 +49,7 @@ public: std::vector outputCompensation; const InferenceEngine::SizeVector &getWeightDims() { return weightDims; } - const std::vector &getStride() { return stride; } + const std::vector &getStride() { return stride; } const std::vector &getDilation() { return dilation; } const std::vector &getPaddingL() { return paddingL; } const std::vector &getPaddingR() { return paddingR; } @@ -77,18 +77,18 @@ private: bool withDWConv; bool isGrouped; bool isPrimitivesPriorityDefined = false; - std::vector stride; + std::vector stride; std::vector dilation; std::vector paddingL; std::vector paddingR; InferenceEngine::SizeVector weightDims; InferenceEngine::SizeVector biasesDims; - ptrdiff_t dw_conv_oc; - ptrdiff_t dw_conv_ih; - ptrdiff_t dw_conv_iw; - std::vector dw_conv_kernel; - std::vector dw_conv_strides; + size_t dw_conv_oc; + size_t dw_conv_ih; + size_t dw_conv_iw; + std::vector dw_conv_kernel; + std::vector dw_conv_strides; mkldnn::memory::data_type dw_conv_in_dt; size_t groupNum; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp index 678922f3a4b..00a403c8bb6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp @@ -5,7 +5,7 @@ #include #include "mkldnn_convert_node.h" #include "common/cpu_convert.h" -#include "common/tensor_desc_creator.h" +#include "common/blocked_desc_creator.h" #include using namespace mkldnn; @@ -38,9 +38,9 @@ MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr& op, co MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc, const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode("Convert", nodeName, eng, cache) { - inDims.emplace_back(dims); + inputShapes.emplace_back(dims); addOriginalInputPrecision(inPrc); - outDims.emplace_back(dims); + outputShapes.emplace_back(dims); addOriginalOutputPrecision(outPrc); errorPrefix = "Convert node with name '" + getName() + "'"; @@ -49,10 +49,10 @@ MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, co void MKLDNNConvertNode::getSupportedDescriptors() { // if tensor descriptors are set via setDescs method we need to update the inDims/outDims data // from correspond tensor descriptors. - if (outDims.empty() && output && output->getLayout() != InferenceEngine::Layout::ANY) - outDims.push_back(MKLDNNDims(output->getDims())); - if (inDims.empty() && input && input->getLayout() != InferenceEngine::Layout::ANY) - inDims.push_back(MKLDNNDims(input->getDims())); + if (outputShapes.empty()) + outputShapes.push_back(output->getShape()); + if (inputShapes.empty()) + inputShapes.push_back(input->getShape()); if (getParentEdges().size() != 1) IE_THROW() << errorPrefix << " has incorrect number of input edges"; if (getChildEdges().empty()) @@ -63,39 +63,40 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - LayerConfig config; - DataConfig dataIn; - DataConfig dataConfigOut; + NodeConfig config; + PortConfig dataIn; + PortConfig dataConfigOut; config.dynBatchSupport = false; // if input and output pointers are not null, then the inp/output tensor descriptors were set using setDescs method, so // they should be used as the actual descriptors. - if (input && input->getLayout() != InferenceEngine::Layout::ANY && output && output->getLayout() != InferenceEngine::Layout::ANY) { - dataIn.desc = *input; + if (input && output) { + dataIn.desc = input->clone(); config.inConfs.push_back(dataIn); - const auto& blockingDesc = config.inConfs[0].desc.getBlockingDesc(); // inp/out layouts must be the same - dataConfigOut.desc = TensorDesc(output->getPrecision(), input->getDims(), blockingDesc); + // inp/out layouts must be the same + dataConfigOut.desc = config.inConfs[0].desc->clone(); + dataConfigOut.desc->setPrecision(output->getPrecision()); config.outConfs.push_back(dataConfigOut); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat()); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } else if (getOriginalInputsNumber() == 1 && getOriginalOutputsNumber() == 1) { - const SizeVector& insDims = getParentEdgeAt(0)->getDims().ToSizeVector(); + const Shape& insShape = getParentEdgeAt(0)->getShape(); auto insPrecision = getOriginalInputPrecisionAtPort(0); - const SizeVector& outputDims = getChildEdgeAt(0)->getDims().ToSizeVector(); + const Shape& outputShape = getChildEdgeAt(0)->getShape(); auto outPrecision = getOriginalOutputPrecisionAtPort(0); config.inConfs.push_back(dataIn); config.outConfs.push_back(dataConfigOut); - auto creators = TensorDescCreator::getCommonCreators(); - auto range = TensorDescCreator::makeFilteredRange(creators, insDims.size()); + auto creators = BlockedDescCreator::getCommonCreators(); + auto range = BlockedDescCreator::makeFilteredRange(creators, insShape.getRank()); for (auto itr = range.first; itr != range.second; ++itr) { - config.inConfs[0].desc = itr->second->createDesc(insPrecision, insDims); - config.outConfs[0].desc = itr->second->createDesc(outPrecision, outputDims); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(insPrecision, insShape.getDims())); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(outPrecision, outputShape.getDims())); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat()); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } } else { IE_THROW() << errorPrefix << " has incorrect number of input/output edges"; @@ -121,7 +122,7 @@ void MKLDNNConvertNode::execute(mkldnn::stream strm) { void* srcPtr = parentMem.GetPtr(); void* dstPtr = childMem.GetPtr(); - cpu_convert(srcPtr, dstPtr, getParentEdgeAt(0)->getDesc().getPrecision(), getChildEdgeAt(0)->getDesc().getPrecision(), parentMem.GetElementsCount()); + cpu_convert(srcPtr, dstPtr, parentMem.GetDesc().getPrecision(), childMem.GetDesc().getPrecision(), parentMem.GetElementsCount()); } bool MKLDNNConvertNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h index ca43bb3db5c..38707385f7a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h @@ -30,19 +30,19 @@ public: // In that case the Convert node is instantiated with default CNNLayer and inp/out tensor descriptors are set via this method. // This is useful if the Convert node is added to the graph as an auxiliary operation at the MKLDNNGraph // initialization stage. - void setDescs(const InferenceEngine::TensorDesc& input, const InferenceEngine::TensorDesc& output) { - this->input.reset(new InferenceEngine::TensorDesc(input)); - this->output.reset(new InferenceEngine::TensorDesc(output)); + void setDescs(const MemoryDesc& input, const MemoryDesc& output) { + this->input = input.clone(); + this->output = output.clone(); } - std::shared_ptr getInput() const { return input; } - std::shared_ptr getOutput() const { return output; } + const MemoryDesc& getInput() const { return *input; } + const MemoryDesc& getOutput() const { return *output; } static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: - std::shared_ptr input; - std::shared_ptr output; + std::unique_ptr input; + std::unique_ptr output; std::string errorPrefix; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp index 34c9aaf191e..2bf514fffda 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp @@ -1,7 +1,6 @@ // Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" #include #include @@ -59,9 +58,9 @@ void MKLDNNCTCGreedyDecoderNode::initSupportedPrimitiveDescriptors() { if (seqLenPrecision != Precision::FP32 && seqLenPrecision != Precision::BF16) IE_THROW() << errorPrefix << "has unsupported 'sequence_length' input precision: " << seqLenPrecision; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } @@ -70,9 +69,9 @@ void MKLDNNCTCGreedyDecoderNode::execute(mkldnn::stream strm) { const float* sequenceMask = reinterpret_cast(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->GetPtr()); float* outputSequences = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - const size_t T = getParentEdgeAt(DATA_INDEX)->getDims()[0]; - const size_t B = getParentEdgeAt(DATA_INDEX)->getDims()[1]; - const int C = getParentEdgeAt(DATA_INDEX)->getDims()[2]; + const size_t T = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[0]; + const size_t B = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[1]; + const int C = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[2]; const size_t BC = B * C; const size_t CB1 = C * (B - 1); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp index 0eccdbfa1b5..acd273a9ad9 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp @@ -1,7 +1,6 @@ // Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" #include #include @@ -58,15 +57,15 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::initSupportedPrimitiveDescriptors() { if (seqLenPrecision != Precision::I32 && seqLenPrecision != Precision::I64) IE_THROW() << errorPrefix << "has unsupported 'sequence_length' input precision: " << seqLenPrecision; - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); for (int i = 1; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::I32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::I32); addSupportedPrimDesc(inDataConf, - {{TensorDescCreatorTypes::ncsp, Precision::I32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}, + {{LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, impl_desc_type::ref_any); } @@ -76,13 +75,13 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::execute(mkldnn::stream strm) { int* decodedClasses = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getMemoryPtr()->GetPtr()); int* decodedClassesLength = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_LENGTH_INDEX)[0]->getMemoryPtr()->GetPtr()); - const size_t B = getParentEdgeAt(DATA_INDEX)->getDims()[0];; - const size_t T = getParentEdgeAt(DATA_INDEX)->getDims()[1];; - const int C = getParentEdgeAt(DATA_INDEX)->getDims()[2];; + const size_t B = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[0];; + const size_t T = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[1];; + const int C = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[2];; const size_t TC = T * C; int blankIndex = C - 1; - if (inDims.size() > BLANK_INDEX) + if (inputShapes.size() > BLANK_INDEX) blankIndex = (reinterpret_cast(getParentEdgeAt(BLANK_INDEX)->getMemoryPtr()->GetPtr()))[0]; size_t workAmount = 0; @@ -91,7 +90,7 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::execute(mkldnn::stream strm) { std::string errorMsg = errorPrefix + ". Sequence length " + std::to_string(sequenceLengths[b]) + " cannot be greater than according decoded classes dimension size " - + std::to_string(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getDims()[1]); + + std::to_string(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getShape().getStaticDims()[1]); IE_THROW() << errorMsg; } workAmount += sequenceLengths[b]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp index b355dcaefcd..47da0501486 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp @@ -46,14 +46,14 @@ void MKLDNNCTCLossNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); for (int i = 1; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::I32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::I32); addSupportedPrimDesc(inDataConf, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } @@ -66,12 +66,12 @@ void MKLDNNCTCLossNode::execute(mkldnn::stream strm) { const int* labelsLength = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->GetPtr()); float* dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - const size_t batchNum = getParentEdgeAt(0)->getDims()[0]; - const size_t maxTime = getParentEdgeAt(0)->getDims()[1]; - const size_t classesNum = getParentEdgeAt(0)->getDims()[2]; + const size_t batchNum = getParentEdgeAt(0)->getShape().getStaticDims()[0]; + const size_t maxTime = getParentEdgeAt(0)->getShape().getStaticDims()[1]; + const size_t classesNum = getParentEdgeAt(0)->getShape().getStaticDims()[2]; int blankIndex = classesNum - 1; - if (inDims.size() > 4) { + if (inputShapes.size() > 4) { blankIndex = reinterpret_cast(getParentEdgeAt(4)->getMemoryPtr()->GetPtr())[0]; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp index 3f6c8f90348..5124409cf8b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // #include "list.hpp" -#include "base.hpp" #include #include @@ -78,20 +77,20 @@ void MKLDNNCumSumNode::initSupportedPrimitiveDescriptors() { IE_THROW() << errorPrefix << " has unsupported 'axis' input precision: " << axisTensorPrec.name(); } - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, dataPrecision); + inDataConf.emplace_back(LayoutType::ncsp, dataPrecision); for (int i = 1; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::I32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::I32); addSupportedPrimDesc(inDataConf, - {{TensorDescCreatorTypes::ncsp, dataPrecision}}, + {{LayoutType::ncsp, dataPrecision}}, impl_desc_type::ref_any); } void MKLDNNCumSumNode::execute(mkldnn::stream strm) { - if (inDims.size() == numOfInputs) - axis = getAxis(getParentEdgeAt(AXIS)->getBlob(), getParentEdgeAt(CUM_SUM_DATA)->getBlob()); + if (inputShapes.size() == numOfInputs) + axis = getAxis(getParentEdgeAt(AXIS)->getMemory(), getParentEdgeAt(CUM_SUM_DATA)->getMemory()); switch (dataPrecision) { case Precision::I8 : { @@ -134,7 +133,7 @@ template void MKLDNNCumSumNode::exec() { const auto *input = reinterpret_cast(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetPtr()); auto *output = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - const std::vector strides = getParentEdgeAt(CUM_SUM_DATA)->getDesc().getBlockingDesc().getStrides(); + const std::vector strides = getParentEdgeAt(CUM_SUM_DATA)->getMemory().GetDescWithType().getStrides(); if (reverse) { if (exclusive) { @@ -248,18 +247,18 @@ inline size_t MKLDNNCumSumNode::getStartOffset(const std::vector &forSta return startOffset; } -size_t MKLDNNCumSumNode::getAxis(const Blob::CPtr& _axis, const Blob::CPtr& _data) const { - const auto& axisPrecision = _axis->getTensorDesc().getPrecision(); - const int64_t dataShapeSize = static_cast(_data->getTensorDesc().getDims().size()); +size_t MKLDNNCumSumNode::getAxis(const MKLDNNMemory& _axis, const MKLDNNMemory& _data) const { + const auto& axisPrecision = _axis.GetDesc().getPrecision(); + const int64_t dataShapeSize = static_cast(_data.GetDesc().getShape().getRank()); int64_t axisValueFromBlob; switch (axisPrecision) { case Precision::I32 : { - const auto *axisPtr = _axis->cbuffer().as(); + const auto *axisPtr = reinterpret_cast(_axis.GetPtr()); axisValueFromBlob = static_cast(axisPtr[0]); break; } case Precision::I64 : { - const auto *axisPtr = _axis->cbuffer().as(); + const auto *axisPtr = reinterpret_cast(_axis.GetPtr()); axisValueFromBlob = axisPtr[0]; break; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h index 794d6bc73f1..bbe180f5544 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h @@ -34,7 +34,7 @@ private: inline size_t getStartOffset(const std::vector &forStartOffset, const std::vector& strides) const; - size_t getAxis(const InferenceEngine::Blob::CPtr& _axis, const InferenceEngine::Blob::CPtr& _data) const; + size_t getAxis(const MKLDNNMemory& _axis, const MKLDNNMemory& _data) const; enum { CUM_SUM_DATA, AXIS, numOfInputs }; bool exclusive; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp index ba44392e66f..62c173c72f5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp @@ -16,6 +16,7 @@ #include #include #include +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -108,10 +109,10 @@ InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceE // So we disconnect constant node containing weights from the graph and then don't use it. if (getParentEdges().size() == 3) { removeEdge(getParentEdgeAt(2)); - inDims.erase(inDims.begin() + 2); + inputShapes.erase(inputShapes.begin() + 2); } removeEdge(getParentEdgeAt(1)); - inDims.erase(inDims.begin() + 1); + inputShapes.erase(inputShapes.begin() + 1); InferenceEngine::SizeVector dimsForBlockedDesc{dims}; std::swap(dimsForBlockedDesc[withGroups + 0], dimsForBlockedDesc[withGroups + 1]); @@ -151,7 +152,7 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const { if (!withGroups && stride.back() > 3) return false; if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) { - auto inDims = getChildEdgeAt(0)->getDims().ToSizeVector(); + auto inDims = getChildEdgeAt(0)->getShape().getStaticDims(); // heuristicConst = 2^26 // heuristicParam = IC^2 * SP auto heuristicConst = 67108864; @@ -230,8 +231,8 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { for (int i = 0; i < paddingR.size(); i++) { int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0; int krn = weightDims[with_group + 2 + i]; - int src = getChildEdgeAt(0)->getDims()[2 + i]; - int dst = getParentEdgeAt(0)->getDims()[2 + i]; + int src = getChildEdgeAt(0)->getShape().getStaticDims()[2 + i]; + int dst = getParentEdgeAt(0)->getShape().getStaticDims()[2 + i]; krn = (krn - 1)*(dilation[i] + 1) + 1; int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1; @@ -242,15 +243,15 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { // WA: if int8 deconvolution is supported, we create internal weights blob in IO format std::swap(weightDims[withGroups + 0], weightDims[withGroups + 1]); internalBlobs.push_back(createWeiBlobAsIO(weightDims)); - auto format = getParentEdgeAt(0)->getDims().ndims() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc; - MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, format); - MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType, format); - createDescriptor({in_candidate}, {out_candidate}); + auto format = getParentEdgeAt(0)->getShape().getRank() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc; + MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format); + MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, format); + createDescriptor({&in_candidate}, {&out_candidate}); } else { - for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getDims())) { - MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, format); - MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType, format); - createDescriptor({in_candidate}, {out_candidate}); + for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { + MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format); + MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, format); + createDescriptor({&in_candidate}, {&out_candidate}); } } setPostOps(attr); @@ -346,10 +347,10 @@ void MKLDNNDeconvolutionNode::createPrimitive() { } } -void MKLDNNDeconvolutionNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - MKLDNNMemoryDesc in_candidate(inputDesc[0]); - MKLDNNMemoryDesc out_candidate(outputDesc[0]); +void MKLDNNDeconvolutionNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { + const MKLDNNMemoryDesc in_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); + const MKLDNNMemoryDesc out_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]); // grouping and autoblicking is not compatible if ((withGroups && !isDW) && (in_candidate.blocksExtended() || out_candidate.blocksExtended())) @@ -361,7 +362,7 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector deconv_desc; deconv_desc.reset(new deconvolution_forward::desc(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct, in_candidate, wgh_candidate, out_candidate, @@ -370,7 +371,7 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector conv_desc; conv_desc.reset(new convolution_forward::desc(prop_kind::forward_inference, alg, @@ -399,52 +400,21 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { if (idx == 2) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(getOriginalInputPrecisionAtPort(2), - getParentEdgeAt(2)->getDims().ToSizeVector(), - TensorDesc::getLayoutByDims(getParentEdgeAt(2)->getDims().ToSizeVector()))); + auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(2)); + return MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), dataType, + MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(2)->getShape().getRank())); } - InferenceEngine::TensorDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) + MKLDNNMemoryDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) : isInt8 ? MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)) : MKLDNNMemoryDesc(primitive_desc_it.diff_dst_desc(idx)); - - if (desc.getLayout() == InferenceEngine::Layout::ANY) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - } else { - if (getParentEdgeAt(idx)->getDims().ToSizeVector().size() != *std::max_element(desc.getBlockingDesc().getOrder().begin(), - desc.getBlockingDesc().getOrder().end()) + 1) { - auto old_dims = getParentEdgeAt(idx)->getDims().ToSizeVector(); - auto new_dims = weightDims; - - auto td = InferenceEngine::TensorDesc(desc.getPrecision(), - new_dims, - desc.getBlockingDesc()); - if (new_dims.size() == desc.getBlockingDesc().getBlockDims().size()) { - td.setLayout(BLOCKED); - } - return MKLDNNMemoryDesc(td); - } else { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); - } - } + return MKLDNNPlugin::make_unique(std::move(desc)); } -MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - InferenceEngine::TensorDesc desc = isInt8 ? MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx)) - : MKLDNNMemoryDesc(primitive_desc_it.diff_src_desc(idx)); - if (desc.getLayout() == InferenceEngine::Layout::ANY) - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - else - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); +std::unique_ptr MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + return isInt8 ? MKLDNNPlugin::make_unique(primitive_desc_it.dst_desc(idx)) : + MKLDNNPlugin::make_unique(primitive_desc_it.diff_src_desc(idx)); } InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const { @@ -458,7 +428,7 @@ InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const } } - return MKLDNNExtensionUtils::getMaxPrecision(inputPrecisions); + return getMaxPrecision(inputPrecisions); } REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h index f3f47c83a9f..15ee71d6af7 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h @@ -17,8 +17,8 @@ public: MKLDNNDeconvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); void getSupportedDescriptors() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; void createPrimitive() override; void filterSupportedPrimitiveDescriptors() override; void filterSupportedDescriptors(); @@ -31,8 +31,8 @@ public: return static_cast(getParentEdges().size()); } - MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; - MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; InferenceEngine::Precision getRuntimePrecision() const override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp index a2fae182a52..4d29550eda0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp @@ -785,20 +785,20 @@ void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << "has incorrect number of output edges"; - if (getParentEdgeAt(0)->getDims().ndims() != 4) { + if (getParentEdgeAt(0)->getShape().getRank() != 4) { IE_THROW() << "Deformable convolution layer. Unsupported mode. Only 4D blobs are supported as input."; } - if (getParentEdgeAt(1)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims(); + if (getParentEdgeAt(1)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); } - if (getParentEdgeAt(2)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getDims().ndims(); + if (getParentEdgeAt(2)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getShape().getRank(); } - if (getChildEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims(); + if (getChildEdgeAt(0)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank(); } } @@ -806,7 +806,7 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(3); config.inConfs[0].constant = false; @@ -838,20 +838,26 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() { auto weiFormat = group > 1 ? mayiuse(avx512_common) ? memory::format_tag::gOIhw16i16o : memory::format_tag::gOIhw8i8o : mayiuse(avx512_common) ? memory::format_tag::OIhw16i16o : memory::format_tag::OIhw8i8o; - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), memory::data_type::f32, dataFormat); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::f32, offFormat); - config.inConfs[2].desc = MKLDNNMemoryDesc(getParentEdgeAt(2)->getDims(), memory::data_type::f32, weiFormat); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), memory::data_type::f32, dataFormat); - supportedPrimitiveDescriptors.push_back({config, impl_type, dataFormat}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), + memory::data_type::f32, dataFormat); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), + memory::data_type::f32, offFormat); + config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), + memory::data_type::f32, weiFormat); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), + memory::data_type::f32, dataFormat); + supportedPrimitiveDescriptors.push_back({config, impl_type}); } else { // reference implementation - auto weiFormat = group > 1 ? memory::format_tag::goihw : memory::format_tag::oihw; - - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), memory::data_type::f32, memory::format_tag::nchw); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::f32, memory::format_tag::nchw); - config.inConfs[2].desc = MKLDNNMemoryDesc(getParentEdgeAt(2)->getDims(), memory::data_type::f32, memory::format_tag::oihw); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), memory::data_type::f32, memory::format_tag::nchw); - supportedPrimitiveDescriptors.push_back({config, impl_type, weiFormat}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32, + memory::format_tag::nchw); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::f32, + memory::format_tag::nchw); + config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::f32, + memory::format_tag::oihw); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32, + memory::format_tag::nchw); + supportedPrimitiveDescriptors.push_back({config, impl_type}); } } @@ -861,9 +867,9 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() { IE_THROW() << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors."; auto config = selectedPrimitiveDescriptor->getConfig(); - auto srcDims = config.inConfs[0].desc.getDims(); - auto weiDims = config.inConfs[2].desc.getDims(); - auto dstDims = config.outConfs[0].desc.getDims(); + auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto weiDims = getParentEdgeAt(2)->getShape().getStaticDims(); + auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); jcp.dg = deformable_group; @@ -1062,25 +1068,20 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) { const auto *weights = reinterpret_cast(srcMemory2.GetPtr()); float *dst = reinterpret_cast(dstMemory.GetPtr()); - auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); - if (!selectedPrimitiveDescriptor) - IE_THROW() << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors."; - auto config = selectedPrimitiveDescriptor->getConfig(); - - auto src_block_desc = config.inConfs[0].desc.getBlockingDesc(); + auto src_block_desc = getParentEdgeAt(0)->getMemory().GetDescWithType(); std::vector src_strides(src_block_desc.getStrides().size()); for (int i = 0; i < src_strides.size(); i++) { src_strides[src_block_desc.getOrder()[i]] = src_block_desc.getStrides()[i]; } - auto dst_block_desc = config.outConfs[0].desc.getBlockingDesc(); + auto dst_block_desc = getChildEdgeAt(0)->getMemory().GetDescWithType(); std::vector dst_strides(dst_block_desc.getStrides().size()); for (int i = 0; i < dst_strides.size(); i++) { dst_strides[dst_block_desc.getOrder()[i]] = dst_block_desc.getStrides()[i]; } - auto off_strides = config.inConfs[1].desc.getBlockingDesc().getStrides(); - auto wei_strides = config.inConfs[2].desc.getBlockingDesc().getStrides(); + auto off_strides = getParentEdgeAt(1)->getMemory().GetDescWithType().getStrides(); + auto wei_strides = getParentEdgeAt(2)->getMemory().GetDescWithType().getStrides(); if (def_conv_kernel) { executeOptimized(src, offsets, weights, dst, src_strides, off_strides, dst_strides); @@ -1094,7 +1095,7 @@ bool MKLDNNDeformableConvolutionNode::created() const { } InferenceEngine::Precision MKLDNNDeformableConvolutionNode::getRuntimePrecision() const { - return MKLDNNExtensionUtils::getMaxPrecision(getInputPrecisions()); + return getMaxPrecision(getInputPrecisions()); } REG_MKLDNN_PRIM_FOR(MKLDNNDeformableConvolutionNode, DeformableConvolution); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp index 38bebcd5271..a117d3acbdc 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp @@ -6,7 +6,7 @@ #include #include -#include "common/tensor_desc_creator.h" +#include "common/blocked_desc_creator.h" #include #include @@ -58,7 +58,7 @@ MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr(std::pow(blockSize, nSpatialDims)); } else { IE_THROW(NotImplemented) << errorMessage; @@ -66,13 +66,13 @@ MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr 5) THROW_ERROR << "doesn't support dimensions with rank greater than 5"; - SizeVector dstDims = outDims[0].ToSizeVector(); + SizeVector dstDims = outputShapes[0].getStaticDims(); if (srcDims.size() != dstDims.size()) THROW_ERROR << "has incorrect number of input/output dimensions"; @@ -99,8 +99,8 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() { return; InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); - auto srcDims = getParentEdgeAt(0)->getDims(); - const size_t nDims = srcDims.ndims(); + auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); + const size_t nDims = srcDims.size(); impl_desc_type impl_type; if (mayiuse(impl::cpu::x64::avx512_common)) { @@ -113,7 +113,7 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::ref; } - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(1); config.outConfs.resize(1); @@ -122,27 +122,27 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() { config.outConfs[0].inPlace = -1; config.outConfs[0].constant = false; - std::vector supportedTypes; + std::vector supportedTypes; if (nDims > 2) { auto canUseBlocked = [=](const size_t block) { return srcDims[1] % block == 0 && (srcDims[1] / block) % blockStep == 0 && (mode == Mode::DEPTH_FIRST ? block % blockStep == 0 : true); }; - supportedTypes.push_back(TensorDescCreatorTypes::nspc); + supportedTypes.push_back(LayoutType::nspc); if (canUseBlocked(8lu)) - supportedTypes.push_back(TensorDescCreatorTypes::nCsp8c); + supportedTypes.push_back(LayoutType::nCsp8c); if (canUseBlocked(16lu)) - supportedTypes.push_back(TensorDescCreatorTypes::nCsp16c); + supportedTypes.push_back(LayoutType::nCsp16c); } - supportedTypes.push_back(TensorDescCreatorTypes::ncsp); - auto creators = TensorDescCreator::getCommonCreators(); - auto range = TensorDescCreator::makeFilteredRange(creators, nDims, supportedTypes); + supportedTypes.push_back(LayoutType::ncsp); + auto creators = BlockedDescCreator::getCommonCreators(); + auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes); for (auto itr = range.first; itr != range.second; ++itr) { - config.inConfs[0].desc = itr->second->createDesc(precision, getParentEdgeAt(0)->getDims().ToSizeVector()); - config.outConfs[0].desc = itr->second->createDesc(precision, getChildEdgeAt(0)->getDims().ToSizeVector()); - supportedPrimitiveDescriptors.emplace_back(config, impl_type, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat()); + config.inConfs[0].desc = itr->second->createUniqueDesc(precision, getParentEdgeAt(0)->getShape().getStaticDims()); + config.outConfs[0].desc = itr->second->createUniqueDesc(precision, getChildEdgeAt(0)->getShape().getStaticDims()); + supportedPrimitiveDescriptors.emplace_back(config, impl_type); } } @@ -156,18 +156,19 @@ void MKLDNNDepthToSpaceNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << "has unidentified preferable primitive descriptor"; - SizeVector srcDims = getParentEdgeAt(0)->getBlob()->getTensorDesc().getDims(); - SizeVector dstDims = getChildEdgeAt(0)->getBlob()->getTensorDesc().getDims(); + SizeVector srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); + SizeVector dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); size_t nDims = srcDims.size(); const size_t nSpatialDims = nDims - 2; - const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat(); + const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); const size_t reshapedRank = nDims + nSpatialDims + static_cast(isBlocked) + static_cast(isBlocked && mode == Mode::DEPTH_FIRST); const size_t lastIdx = reshapedRank - 1; size_t firstSpatialOrder = 2; PermuteParams params; - params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size(); + params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size(); params.order.resize(reshapedRank, 0); params.src_block_order.resize(reshapedRank); params.dst_block_order.resize(reshapedRank); @@ -193,8 +194,8 @@ void MKLDNNDepthToSpaceNode::createPrimitive() { }; if (isBlocked) { - SizeVector srcBlockedDims = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims(); - SizeVector dstBlockedDims = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims(); + SizeVector srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + SizeVector dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); size_t orderShiftForBlocks, orderShiftForDims; if (mode == Mode::BLOCKS_FIRST) { @@ -223,7 +224,7 @@ void MKLDNNDepthToSpaceNode::createPrimitive() { } reshapeAndSetPermOrder(orderShiftForDims, orderShiftForBlocks, firstSpatialOrder, srcBlockedDims); - } else if (getParentEdgeAt(0)->getMemory().GetDesc().isTailCFormat()) { + } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { srcDims.push_back(srcDims[1]); dstDims.push_back(dstDims[1]); srcDims.erase(srcDims.begin() + 1); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp index 0a2f4fc8140..1d44dd3f747 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp @@ -1,7 +1,6 @@ // Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" #include #include @@ -116,13 +115,13 @@ void MKLDNNDetectionOutputNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); for (int i = 0; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } @@ -132,12 +131,12 @@ void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) { const float *loc_data = reinterpret_cast(getParentEdgeAt(idx_location)->getMemoryPtr()->GetPtr()); const float *conf_data = reinterpret_cast(getParentEdgeAt(idx_confidence)->getMemoryPtr()->GetPtr()); const float *prior_data = reinterpret_cast(getParentEdgeAt(idx_priors)->getMemoryPtr()->GetPtr()); - const float *arm_conf_data = inDims.size() > 3 ? + const float *arm_conf_data = inputShapes.size() > 3 ? reinterpret_cast(getParentEdgeAt(idx_arm_confidence)->getMemoryPtr()->GetPtr()) : nullptr; - const float *arm_loc_data = inDims.size() > 4 ? + const float *arm_loc_data = inputShapes.size() > 4 ? reinterpret_cast(getParentEdgeAt(idx_arm_location)->getMemoryPtr()->GetPtr()) : nullptr; - const int N = getParentEdgeAt(idx_confidence)->getDims()[0]; + const int N = getParentEdgeAt(idx_confidence)->getShape().getStaticDims()[0]; float *decoded_bboxes_data = _decoded_bboxes.data(); float *reordered_conf_data = _reordered_conf.data(); @@ -286,8 +285,8 @@ void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) { } } - const int num_results = getChildEdgesAtPort(0)[0]->getDims()[2]; - const int DETECTION_SIZE = getChildEdgesAtPort(0)[0]->getDims()[3]; + const int num_results = getChildEdgesAtPort(0)[0]->getShape().getStaticDims()[2]; + const int DETECTION_SIZE = getChildEdgesAtPort(0)[0]->getShape().getStaticDims()[3]; if (DETECTION_SIZE != 7) { IE_THROW() << NOT_IMPLEMENTED; } @@ -300,7 +299,7 @@ void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) { else dst_data_size = N * _num_classes * _num_priors * DETECTION_SIZE * sizeof(float); - if (dst_data_size > getChildEdgesAtPort(0)[0]->getBlob()->byteSize()) { + if (dst_data_size > getChildEdgesAtPort(0)[0]->getMemory().GetSize()) { IE_THROW() << OUT_OF_BOUNDS; } memset(dst_data, 0, dst_data_size); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp index b9ef511d010..1796d49989e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp @@ -48,20 +48,20 @@ MKLDNNDFTNode::MKLDNNDFTNode(const std::shared_ptr& op, const mkld } /* Data */ - inputShape = inDims[DATA_INDEX].ToSizeVector(); + inputShape = inputShapes[DATA_INDEX].getStaticDims(); if (inputShape.size() < 2) { IE_THROW() << layerErrorPrefix << " has invalid 'data' input tensor with rank: " << inputShape.size(); } /* Axes */ - const auto axesRank = inDims[AXES_INDEX].ndims(); + const auto axesRank = inputShapes[AXES_INDEX].getRank(); if (axesRank != 1) { IE_THROW() << layerErrorPrefix << " has invalid 'axes' input tensor with rank: " << axesRank; } /* Signal size */ if (inputsNumber > SIGNAL_SIZE_INDEX) { - const auto signalSizeRank = inDims[SIGNAL_SIZE_INDEX].ndims(); + const auto signalSizeRank = inputShapes[SIGNAL_SIZE_INDEX].getRank(); if (signalSizeRank != 1) { IE_THROW() << layerErrorPrefix << " has invalid 'signal_size' input tensor with rank: " << signalSizeRank; } @@ -93,12 +93,12 @@ void MKLDNNDFTNode::initSupportedPrimitiveDescriptors() { } } - std::vector inDataConfigurators({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}); + std::vector inDataConfigurators({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::I32}}); if (getOriginalInputsNumber() > SIGNAL_SIZE_INDEX) - inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::I32}); + inDataConfigurators.push_back({LayoutType::ncsp, Precision::I32}); - addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, impl_desc_type::ref_any); + addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } namespace { @@ -225,7 +225,7 @@ void copyDataToOutputWithSignalSize(const float* input, const std::vector(axesEdge->getMemoryPtr()->GetPtr()); - axes = std::vector(axesStartPtr, axesStartPtr + axesEdge->getDims()[0]); + axes = std::vector(axesStartPtr, axesStartPtr + axesEdge->getShape().getStaticDims()[0]); for (auto& axis : axes) { if (axis < 0) { axis += inputShape.size() - 1; @@ -233,7 +233,7 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) { } std::sort(axes.begin(), axes.end()); - outputShape = getChildEdgeAt(0)->getDims().ToSizeVector(); + outputShape = getChildEdgeAt(0)->getShape().getStaticDims(); for (size_t axis : axes) { size_t nComplex = outputShape[axis]; // FFT uses different twiddle factors @@ -247,8 +247,8 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) { const auto *input = reinterpret_cast(inputDataEdge->getMemoryPtr()->GetPtr()); auto *output = reinterpret_cast(outputDataEdge->getMemoryPtr()->GetPtr()); - auto inputStrides = inputDataEdge->getDesc().getBlockingDesc().getStrides(); - auto outputStrides = outputDataEdge->getDesc().getBlockingDesc().getStrides(); + auto inputStrides = inputDataEdge->getMemory().GetDescWithType().getStrides(); + auto outputStrides = outputDataEdge->getMemory().GetDescWithType().getStrides(); if (inputShape != outputShape) { copyDataToOutputWithSignalSize(input, inputShape, inputStrides, output, outputShape, outputStrides); } else { @@ -257,7 +257,7 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) { } // 1d case - if (inputDataEdge->getDesc().getDims().size() == 2) { + if (inputDataEdge->getShape().getRank() == 2) { size_t nComplex = outputShape[0]; if (IsPowerOfTwo(nComplex)) { fft(output, nComplex * 2, true); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp index 9dd250d7b96..d777e22210f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp @@ -125,11 +125,11 @@ struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, pu if (eltwiseNode.getFusedWith()[i].get()->getType() == Eltwise) { post_op_emitters.push_back(create_eltwise_emitter(*eltwiseNode.getFusedWith()[i].get(), exec_prc)); } else if (eltwiseNode.getFusedWith()[i].get()->getType() == FakeQuantize) { - auto fakeQuantizeNode = dynamic_cast(eltwiseNode.getFusedWith()[i].get()); - fakeQuantizeNode->appendPostOps(post_ops); + auto fakeQuantizeNode = dynamic_cast(eltwiseNode.getFusedWith()[i].get()); + fakeQuantizeNode->appendPostOps(post_ops); - quantization_injectors.push_back(std::make_shared>( - this, post_ops.get()->entry_[post_ops.len() - 1], vmm_d_weights, vmm_d_bias, reg_d_weights, reg_d_bias)); + quantization_injectors.push_back(std::make_shared>( + this, post_ops.get()->entry_[post_ops.len() - 1], vmm_d_weights, vmm_d_bias, reg_d_weights, reg_d_bias)); } } @@ -965,9 +965,9 @@ size_t MKLDNNEltwiseNode::getOpInputsNum() const { } bool MKLDNNEltwiseNode::isWithBroadcast() { - auto oDims = outDims[0].ToSizeVector(); - for (size_t i = 0; i < inDims.size(); i++) { - auto iDims = inDims[i].ToSizeVector(); + auto oDims = outputShapes[0].getStaticDims(); + for (size_t i = 0; i < inputShapes.size(); i++) { + auto iDims = inputShapes[i].getStaticDims(); if (iDims != oDims) return true; } @@ -1080,10 +1080,10 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { Blocked }; - auto initDesc = [&] (LayoutType lt) -> PrimitiveDescInfo { - auto createMemoryDesc = [lt](MKLDNNEdgePtr edge, Precision prc, size_t offset) -> TensorDesc { - if (lt == ChannelsFirst && edge->getDims().ndims() != 1) { - auto dims = edge->getDims().ToSizeVector(); + auto initDesc = [&] (LayoutType lt) -> NodeDesc { + auto createMemoryDesc = [lt](MKLDNNEdgePtr edge, Precision prc, size_t offset) -> std::unique_ptr { + if (lt == ChannelsFirst && edge->getShape().getRank() != 1) { + auto dims = edge->getShape().getStaticDims(); auto ndims = dims.size(); std::vector order(ndims); std::iota(order.begin(), order.end(), 0); @@ -1097,11 +1097,11 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { blocks[i] = dims[order[i]]; } - return TensorDesc(prc, edge->getDims().ToSizeVector(), {blocks, order, offset}); - } else if (lt == Blocked && edge->getDims().ndims() != 1 && edge->getDims()[1] != 1) { + return MKLDNNPlugin::make_unique(prc, edge->getShape().getStaticDims(), blocks, order, offset); + } else if (lt == Blocked && edge->getShape().getRank() != 1 && edge->getShape().getStaticDims()[1] != 1) { size_t blockSize = mayiuse(x64::avx512_common) ? 16 : 8; - std::vector blocks = edge->getDims().ToSizeVector(); + std::vector blocks = edge->getShape().getStaticDims(); std::vector order(blocks.size()); std::iota(order.begin(), order.end(), 0); @@ -1109,37 +1109,38 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { blocks.push_back(blockSize); order.push_back(1); - return TensorDesc(prc, edge->getDims().ToSizeVector(), {blocks, order, offset}); + return MKLDNNPlugin::make_unique(prc, edge->getShape().getStaticDims(), blocks, order, offset); } else { - std::vector blocks = edge->getDims().ToSizeVector(); + std::vector blocks = edge->getShape().getStaticDims(); std::vector order(blocks.size()); std::iota(order.begin(), order.end(), 0); - return TensorDesc(prc, edge->getDims().ToSizeVector(), {blocks, order, offset}); + return MKLDNNPlugin::make_unique(prc, edge->getShape().getStaticDims(), blocks, order, offset); } }; size_t offset = std::numeric_limits::max(); - InferenceEngine::LayerConfig config; - config.dynBatchSupport = getChildEdgeAt(0)->getDims().ndims() > 1 && getChildEdgeAt(0)->getDims() == getParentEdgeAt(0)->getDims(); + NodeConfig config; + config.dynBatchSupport = getChildEdgeAt(0)->getShape().getRank() > 1 && getChildEdgeAt(0)->getShape() == + getParentEdgeAt(0)->getShape(); for (size_t i = 0; i < getParentEdges().size(); i++) { - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = (!i && canBeInPlace() && inputPrecisions[i] == outputPrecision) ? 0 : -1; - dataConfig.constant = false; + PortConfig portConfig; + portConfig.inPlace = (!i && canBeInPlace() && inputPrecisions[i] == outputPrecision) ? 0 : -1; + portConfig.constant = false; - dataConfig.desc = createMemoryDesc(getParentEdgeAt(i), inputPrecisions[i], offset); + portConfig.desc = createMemoryDesc(getParentEdgeAt(i), inputPrecisions[i], offset); - config.inConfs.push_back(dataConfig); + config.inConfs.push_back(portConfig); } - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; + PortConfig portConfig; + portConfig.inPlace = -1; + portConfig.constant = false; - dataConfig.desc = createMemoryDesc(getChildEdgeAt(0), outputPrecision, offset); + portConfig.desc = createMemoryDesc(getChildEdgeAt(0), outputPrecision, offset); - config.outConfs.push_back(dataConfig); + config.outConfs.push_back(portConfig); impl_desc_type impl_type; if (mayiuse(x64::avx512_common)) { @@ -1155,18 +1156,20 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { return {config, impl_type}; }; - bool isChannelsFirstApplicable = one_of(getChildEdgeAt(0)->getDims().ndims(), 1, 2, 4, 5); + bool isChannelsFirstApplicable = one_of(getChildEdgeAt(0)->getShape().getRank(), 1, 2, 4, 5); for (size_t i = 0; i < getParentEdges().size(); i++) { - isChannelsFirstApplicable = isChannelsFirstApplicable && one_of(getParentEdgeAt(i)->getDims().ndims(), 1, 2, 4, 5); - isChannelsFirstApplicable = isChannelsFirstApplicable && implication(getParentEdgeAt(i)->getDims().ndims() != 1, - getChildEdgeAt(0)->getDims().ndims() == getParentEdgeAt(i)->getDims().ndims()); + isChannelsFirstApplicable = isChannelsFirstApplicable && one_of(getParentEdgeAt(i)->getShape().getRank(), 1, 2, 4, 5); + isChannelsFirstApplicable = isChannelsFirstApplicable && implication(getParentEdgeAt(i)->getShape().getRank() != 1, + getChildEdgeAt(0)->getShape().getRank() == + getParentEdgeAt(i)->getShape().getRank()); } - bool isBlockedApplicable = one_of(getChildEdgeAt(0)->getDims().ndims(), 1, 4, 5); + bool isBlockedApplicable = one_of(getChildEdgeAt(0)->getShape().getRank(), 1, 4, 5); for (size_t i = 0; i < getParentEdges().size(); i++) { - isBlockedApplicable = isBlockedApplicable && one_of(getParentEdgeAt(i)->getDims().ndims(), 1, 4, 5); - isBlockedApplicable = isBlockedApplicable && implication(getParentEdgeAt(i)->getDims().ndims() != 1, - getChildEdgeAt(0)->getDims().ndims() == getParentEdgeAt(i)->getDims().ndims()); + isBlockedApplicable = isBlockedApplicable && one_of(getParentEdgeAt(i)->getShape().getRank(), 1, 4, 5); + isBlockedApplicable = isBlockedApplicable && implication(getParentEdgeAt(i)->getShape().getRank() != 1, + getChildEdgeAt(0)->getShape().getRank() == + getParentEdgeAt(i)->getShape().getRank()); } if (isChannelsFirstApplicable) @@ -1177,9 +1180,7 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { } void MKLDNNEltwiseNode::createPrimitive() { - auto config = getSelectedPrimitiveDescriptor()->getConfig(); - - auto initDims = [this, config](size_t maxInputSize) { + auto initDims = [this](size_t maxInputSize) { size_t inputNum = getParentEdges().size(); dims_in.resize(inputNum); @@ -1189,8 +1190,9 @@ void MKLDNNEltwiseNode::createPrimitive() { dims_out.resize(maxInputSize, 1); + auto outBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); std::vector order(maxInputSize); - auto outOrder = config.outConfs[0].desc.getBlockingDesc().getOrder(); + auto outOrder = outBlockingDesc.getOrder(); for (size_t i = 0; i < order.size(); i++) { if (i < order.size() - outOrder.size()) order[i] = i; @@ -1198,17 +1200,18 @@ void MKLDNNEltwiseNode::createPrimitive() { order[i] = outOrder[i - (order.size() - outOrder.size())] + (order.size() - outOrder.size()); } - size_t outRank = config.outConfs[0].desc.getBlockingDesc().getBlockDims().size(); + size_t outRank = outBlockingDesc.getBlockDims().size(); for (int i = 0; i < outRank; i++) { - dims_out[dims_out.size() - 1 - i] = config.outConfs[0].desc.getBlockingDesc().getBlockDims()[outRank - 1 - i]; + dims_out[dims_out.size() - 1 - i] = outBlockingDesc.getBlockDims()[outRank - 1 - i]; } for (int i = 0; i < inputNum; i++) { - size_t inRank = config.inConfs[i].desc.getBlockingDesc().getBlockDims().size(); + auto inBlockingDesc = getParentEdgeAt(i)->getMemory().GetDescWithType(); + size_t inRank = inBlockingDesc.getBlockDims().size(); // WA to normalize blocked and planar layouts - auto inOrder = config.inConfs[i].desc.getBlockingDesc().getOrder(); - size_t startOff = outOrder.size() != config.outConfs[0].desc.getDims().size() && + auto inOrder = inBlockingDesc.getOrder(); + size_t startOff = outOrder.size() != outBlockingDesc.getShape().getRank() && outOrder[outOrder.size() - 1] != inOrder[inOrder.size() - 1] ? 1 : 0; // WA to handle nspc layout with 1D tensors @@ -1217,7 +1220,7 @@ void MKLDNNEltwiseNode::createPrimitive() { } for (int j = 0; j < inRank; j++) { - dims_in[i][dims_in[i].size() - 1 - j - startOff] = config.inConfs[i].desc.getBlockingDesc().getBlockDims()[inRank - 1 - j]; + dims_in[i][dims_in[i].size() - 1 - j - startOff] = inBlockingDesc.getBlockDims()[inRank - 1 - j]; } } @@ -1229,13 +1232,13 @@ void MKLDNNEltwiseNode::createPrimitive() { } }; - auto initOffsets = [this, config](size_t maxInputSize) { + auto initOffsets = [this](size_t maxInputSize) { size_t inputNum = getParentEdges().size(); offsets_out.resize(maxInputSize, 1); offset_out_calc(offsets_out, dims_out); for (int j = 0; j < maxInputSize; j++) { - offsets_out[j] *= config.outConfs[0].desc.getPrecision().size(); + offsets_out[j] *= getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().size(); } offsets_in.resize(inputNum); @@ -1243,7 +1246,7 @@ void MKLDNNEltwiseNode::createPrimitive() { offsets_in[i].resize(maxInputSize, 1); offset_in_calc(offsets_in[i], dims_in[i], dims_out); for (int j = 0; j < maxInputSize; j++) { - offsets_in[i][j] *= config.inConfs[i].desc.getPrecision().size(); + offsets_in[i][j] *= getParentEdgeAt(i)->getMemory().GetDesc().getPrecision().size(); } } @@ -1287,10 +1290,11 @@ void MKLDNNEltwiseNode::createPrimitive() { } }; - tensorRank = std::max(static_cast(optimalTensorRank), config.outConfs[0].desc.getBlockingDesc().getBlockDims().size()); + auto outBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + tensorRank = std::max(static_cast(optimalTensorRank), outBlockingDesc.getBlockDims().size()); initDims(tensorRank); - auto outOrder = config.outConfs[0].desc.getBlockingDesc().getOrder(); + auto outOrder = outBlockingDesc.getOrder(); size_t oc_size = 0; offsets_oc.resize(tensorRank, 0); if (isFusedWith(FakeQuantize)) { @@ -1310,7 +1314,7 @@ void MKLDNNEltwiseNode::createPrimitive() { fullWorkAmount *= dims_out[i]; } - isDynBatchEnabled = config.dynBatchSupport; + isDynBatchEnabled = getSelectedPrimitiveDescriptor()->getConfig().dynBatchSupport; size_t minimalConcurrency = parallel_get_max_threads(); size_t minimalJitWorkAmount = 256; @@ -1320,7 +1324,7 @@ void MKLDNNEltwiseNode::createPrimitive() { bool hasDifferentDims = false; while (currentJitWorkAmount < minimalJitWorkAmount && currentJitWorkAmount < fullWorkAmount && // we shouldn't collapse batch dimension in case dynamic batch is enabled - (!isDynBatchEnabled || (config.outConfs[0].desc.getBlockingDesc().getBlockDims().size() - collapsedDims > 2))) { + (!isDynBatchEnabled || (outBlockingDesc.getBlockDims().size() - collapsedDims > 2))) { if (dims_out.size() - collapsedDims - 2 < 0) break; @@ -1372,22 +1376,24 @@ void MKLDNNEltwiseNode::createPrimitive() { } } - batchDimIdx = tensorRank - config.outConfs[0].desc.getBlockingDesc().getBlockDims().size() + collapsedDims; + batchDimIdx = tensorRank - outBlockingDesc.getBlockDims().size() + collapsedDims; schedulerWorkAmount = fullWorkAmount / dims_out[dims_out.size() - 1]; initOffsets(tensorRank); - jep.inputs_number = config.inConfs.size(); + const size_t inpuPortsCount = getSelectedPrimitiveDescriptor()->getConfig().inConfs.size(); + + jep.inputs_number = inpuPortsCount; jep.input_size = tensorRank; - for (int i = 0; i < config.inConfs.size(); i++) { + for (int i = 0; i < inpuPortsCount; i++) { jep.src_size[i] = dims_in[i][dims_in[i].size() - 1]; - jep.src_prc[i] = config.inConfs[i].desc.getPrecision(); + jep.src_prc[i] = getParentEdgesAtPort(i).front()->getMemory().GetDesc().getPrecision(); } jep.dst_size = dims_out[dims_out.size() - 1]; - jep.dst_prc = config.outConfs[0].desc.getPrecision(); + jep.dst_prc = getChildEdgesAtPort(0).front()->getMemory().GetDesc().getPrecision(); - for (int i = 0; i < config.inConfs.size(); i++) { + for (int i = 0; i < inpuPortsCount; i++) { jep.src_offsets[i] = offsets_in[i]; } jep.dst_offsets = offsets_out; @@ -1415,13 +1421,13 @@ void MKLDNNEltwiseNode::initOptimalPrimitiveDescriptor() { if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; auto config = selected_pd->getConfig(); - if (!isInitConfig(config)) { + if (!isConfigDefined(config)) { for (size_t i = 0; i < config.inConfs.size(); i++) { - config.inConfs[i].desc = getConfiguredInputDesc(config, i); + config.inConfs[i].desc = std::move(getDefinedInputDesc(config, i)); } for (size_t i = 0; i < config.outConfs.size(); i++) { - config.outConfs[i].desc = getConfiguredOutputDesc(config, i); + config.outConfs[i].desc = std::move(getDefinedOutputDesc(config, i)); } initDescriptor(config); @@ -1641,13 +1647,13 @@ bool MKLDNNEltwiseNode::canBeInPlace() const { } } - return getParentEdgesAtPort(0)[0].get()->getDims() == getChildEdgesAtPort(0)[0].get()->getDims(); + return getParentEdgesAtPort(0)[0].get()->getShape() == getChildEdgesAtPort(0)[0].get()->getShape(); } void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) { // Handling Convolution custom Add node fusing case which is processed via dnnl append_sum() API. specialConvolutionAddFusing = (parentNode->getType() == Convolution || parentNode->getType() == BinaryConvolution) && getAlgorithm() == EltwiseAdd && - getParentEdgesAtPort(0)[0]->getDims().ToSizeVector() == getParentEdgesAtPort(1)[0]->getDims().ToSizeVector(); + getParentEdgesAtPort(0)[0]->getShape() == getParentEdgesAtPort(1)[0]->getShape(); if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) { fillScalesAndShifts(parentNode.get(), scales, shifts, 16); } @@ -1770,7 +1776,7 @@ InferenceEngine::Precision MKLDNNEltwiseNode::getRuntimePrecision() const { } } - return MKLDNNExtensionUtils::getMaxPrecision(inputPrecisions); + return getMaxPrecision(inputPrecisions); } REG_MKLDNN_PRIM_FOR(MKLDNNEltwiseNode, Eltwise); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp index f59b69b023d..4499e91dacb 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp @@ -62,15 +62,15 @@ void MKLDNNEmbeddingBagOffsetSumNode::initSupportedPrimitiveDescriptors() { IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name(); } - std::vector inDataConfigurators({{TensorDescCreatorTypes::ncsp, inDataPrecision}, - {TensorDescCreatorTypes::ncsp, Precision::I32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}); + std::vector inDataConfigurators({{LayoutType::ncsp, inDataPrecision}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}); if (getOriginalInputsNumber() > DEFAULT_INDEX_IDX) - inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::I32}); + inDataConfigurators.push_back({LayoutType::ncsp, Precision::I32}); if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX) - inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, inDataPrecision}); + inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision}); - addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, impl_desc_type::ref_any); + addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } void MKLDNNEmbeddingBagOffsetSumNode::initFromInputs() { @@ -122,7 +122,8 @@ void MKLDNNEmbeddingBagOffsetSumNode::execute(mkldnn::stream strm) { if (_withWeights) weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getDesc(), getChildEdgeAt(0)->getDesc()); + MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), + getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims()); } bool MKLDNNEmbeddingBagOffsetSumNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp index 3318e1089fa..f185d085881 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp @@ -58,12 +58,12 @@ void MKLDNNEmbeddingBagPackedSumNode::initSupportedPrimitiveDescriptors() { IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name(); } - std::vector inDataConfigurators({{TensorDescCreatorTypes::ncsp, inDataPrecision}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}); + std::vector inDataConfigurators({{LayoutType::ncsp, inDataPrecision}, + {LayoutType::ncsp, Precision::I32}}); if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX) - inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, inDataPrecision}); + inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision}); - addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, impl_desc_type::ref_any); + addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } void MKLDNNEmbeddingBagPackedSumNode::initFromInputs() { @@ -89,7 +89,8 @@ void MKLDNNEmbeddingBagPackedSumNode::execute(mkldnn::stream strm) { if (_withWeights) weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getDesc(), getChildEdgeAt(0)->getDesc()); + MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), + getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims()); } bool MKLDNNEmbeddingBagPackedSumNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp index 8abeee76d76..853da79accf 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp @@ -44,13 +44,12 @@ MKLDNNEmbeddingBagSumNode::MKLDNNEmbeddingBagSumNode( template void MKLDNNEmbeddingBagSumNode::processData(const T* srcData, const T* weightsData, T* dstData, - const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc) { + const InferenceEngine::SizeVector& inDataDims, const InferenceEngine::SizeVector& outDataDims) { std::string msgPrefix = std::string("Node EmbeddingBagSum with name '") + _layerName + "' "; initFromInputs(); - const auto& inDataDims = srcDesc.getDims(); - const size_t outputBagsNum = dstDesc.getDims()[0]; + const size_t outputBagsNum = outDataDims[0]; auto threadBody = [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); @@ -115,27 +114,27 @@ void MKLDNNEmbeddingBagSumNode::processData(const T* srcData, const T* weightsDa parallel_nt(0, threadBody); } -void MKLDNNEmbeddingBagSumNode::execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData, - const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc) { - switch (srcDesc.getPrecision()) { +void MKLDNNEmbeddingBagSumNode::execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData, const InferenceEngine::Precision &srcPrc, + const InferenceEngine::SizeVector& inDims, const InferenceEngine::SizeVector& outDims) { + switch (srcPrc) { case Precision::FP32: { return processData::value_type>(reinterpret_cast(srcData), - reinterpret_cast(weightsData), reinterpret_cast(dstData), srcDesc, dstDesc); + reinterpret_cast(weightsData), reinterpret_cast(dstData), inDims, outDims); } case Precision::I8: { return processData::value_type>(reinterpret_cast(srcData), - reinterpret_cast(weightsData), reinterpret_cast(dstData), srcDesc, dstDesc); + reinterpret_cast(weightsData), reinterpret_cast(dstData), inDims, outDims); } case Precision::U8: { - return processData::value_type>(srcData, weightsData, dstData, srcDesc, dstDesc); + return processData::value_type>(srcData, weightsData, dstData, inDims, outDims); } case Precision::I32: { return processData::value_type>(reinterpret_cast(srcData), - reinterpret_cast(weightsData), reinterpret_cast(dstData), srcDesc, dstDesc); + reinterpret_cast(weightsData), reinterpret_cast(dstData), inDims, outDims); } default: { IE_THROW() << "EmbeddingBagSum layer does not support precision '" - + std::string(srcDesc.getPrecision().name()) + "'"; + + std::string(srcPrc.name()) + "'"; } } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h index f3513501b5c..ef5e7ed9a2f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h @@ -21,8 +21,8 @@ public: size_t perSampleWeightsIdx, size_t defaultIndexIdx); - void execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData, - const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc); + void execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData, const InferenceEngine::Precision &srcPrc, + const InferenceEngine::SizeVector& inDims, const InferenceEngine::SizeVector& outDims); ~MKLDNNEmbeddingBagSumNode() = default; @@ -37,7 +37,7 @@ protected: template void processData(const T* srcData, const T* weightsData, T* dstData, - const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc); + const InferenceEngine::SizeVector& inDataDims, const InferenceEngine::SizeVector& outDataDims); const size_t EMB_TABLE_IDX = 0lu; const size_t INDICES_IDX; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp index 82eae04dcc2..1cea74dc5fb 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp @@ -62,21 +62,21 @@ void MKLDNNEmbeddingSegmentsSumNode::initSupportedPrimitiveDescriptors() { IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name(); } - std::vector inDataConfigurators({{TensorDescCreatorTypes::ncsp, inDataPrecision}, - {TensorDescCreatorTypes::ncsp, Precision::I32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}); + std::vector inDataConfigurators({{LayoutType::ncsp, inDataPrecision}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}); if (getOriginalInputsNumber() > DEFAULT_INDEX_IDX) - inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::I32}); + inDataConfigurators.push_back({LayoutType::ncsp, Precision::I32}); if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX) - inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, inDataPrecision}); + inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision}); - addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, impl_desc_type::ref_any); + addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } void MKLDNNEmbeddingSegmentsSumNode::initFromInputs() { indices_ = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr()); - indicesSize_ = getParentEdgeAt(INDICES_IDX)->getBlob()->size(); + indicesSize_ = getParentEdgeAt(INDICES_IDX)->getShape().getElementsCount(); segmentIds_ = reinterpret_cast(getParentEdgeAt(SEGMENT_ID_IDX)->getMemoryPtr()->GetPtr()); @@ -124,7 +124,8 @@ void MKLDNNEmbeddingSegmentsSumNode::execute(mkldnn::stream strm) { if (_withWeights) weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getDesc(), getChildEdgeAt(0)->getDesc()); + MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), + getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims()); } bool MKLDNNEmbeddingSegmentsSumNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp index fe2362003f3..d04b80b0086 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp @@ -1,7 +1,6 @@ // Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" #include #include @@ -253,22 +252,22 @@ void MKLDNNExperimentalDetectronDetectionOutputNode::initSupportedPrimitiveDescr if (!supportedPrimitiveDescriptors.empty()) return; - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); for (int i = 0; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } void MKLDNNExperimentalDetectronDetectionOutputNode::execute(mkldnn::stream strm) { - const int rois_num = getParentEdgeAt(INPUT_ROIS)->getDims()[0]; - assert(classes_num_ == static_cast(getParentEdgeAt(INPUT_SCORES)->getDims()[1])); - assert(4 * classes_num_ == static_cast(getParentEdgeAt(INPUT_DELTAS)->getDims()[1])); + const int rois_num = getParentEdgeAt(INPUT_ROIS)->getShape().getStaticDims()[0]; + assert(classes_num_ == static_cast(getParentEdgeAt(INPUT_SCORES)->getShape().getStaticDims()[1])); + assert(4 * classes_num_ == static_cast(getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[1])); const auto* boxes = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); const auto* deltas = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp index 255f8443765..8bd70dd2a6e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include #include @@ -313,36 +311,36 @@ void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::initSupportedP if (!supportedPrimitiveDescriptors.empty()) return; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::execute(mkldnn::stream strm) { try { - if (inDims.size() != 4 || outDims.size() != 2) { + if (inputShapes.size() != 4 || outputShapes.size() != 2) { IE_THROW() << "Incorrect number of input or output edges!"; } size_t anchor_dims_size = 1; - for (size_t i = 0; i < getParentEdgeAt(INPUT_ANCHORS)->getDims().ToSizeVector().size(); i++) { - anchor_dims_size *= getParentEdgeAt(INPUT_ANCHORS)->getDims().ToSizeVector()[i]; + for (size_t i = 0; i < getParentEdgeAt(INPUT_ANCHORS)->getShape().getRank(); i++) { + anchor_dims_size *= getParentEdgeAt(INPUT_ANCHORS)->getShape().getStaticDims()[i]; } size_t deltas_dims_size = 1; - for (size_t i = 0; i < getParentEdgeAt(INPUT_DELTAS)->getDims().ToSizeVector().size(); i++) { - deltas_dims_size *= getParentEdgeAt(INPUT_DELTAS)->getDims().ToSizeVector()[i]; + for (size_t i = 0; i < getParentEdgeAt(INPUT_DELTAS)->getShape().getRank(); i++) { + deltas_dims_size *= getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[i]; } if (anchor_dims_size != deltas_dims_size) IE_THROW() << "'Anchors' blob size for ONNXProposal is incompatible with 'deltas' blob size!"; size_t score_dims_size = 1; - for (size_t i = 0; i < getParentEdgeAt(INPUT_SCORES)->getDims().ToSizeVector().size(); i++) { - score_dims_size *= getParentEdgeAt(INPUT_SCORES)->getDims().ToSizeVector()[i]; + for (size_t i = 0; i < getParentEdgeAt(INPUT_SCORES)->getShape().getRank(); i++) { + score_dims_size *= getParentEdgeAt(INPUT_SCORES)->getShape().getStaticDims()[i]; } if (deltas_dims_size != (4 * score_dims_size)) IE_THROW() << "'Deltas' blob size for ONNXProposal is incompatible with 'scores' blob size!"; @@ -356,11 +354,11 @@ void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::execute(mkldnn float *p_roi_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); float *p_roi_score_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->GetPtr()); - const int anchors_num = getParentEdgeAt(INPUT_SCORES)->getDims()[0]; + const int anchors_num = getParentEdgeAt(INPUT_SCORES)->getShape().getStaticDims()[0]; // bottom shape: (num_anchors) x H x W - const int bottom_H = getParentEdgeAt(INPUT_DELTAS)->getDims()[1]; - const int bottom_W = getParentEdgeAt(INPUT_DELTAS)->getDims()[2]; + const int bottom_H = getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[1]; + const int bottom_W = getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[2]; // input image height & width const float img_H = p_img_info_cpu[0]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp index b5d073a0b35..001257c443d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include @@ -55,22 +53,22 @@ void MKLDNNExperimentalDetectronPriorGridGeneratorNode::initSupportedPrimitiveDe if (!supportedPrimitiveDescriptors.empty()) return; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } void MKLDNNExperimentalDetectronPriorGridGeneratorNode::execute(mkldnn::stream strm) { - const int num_priors_ = getParentEdgeAt(INPUT_PRIORS)->getDims()[0]; - assert(getParentEdgeAt(INPUT_PRIORS)->getDims()[1] == 4); + const int num_priors_ = getParentEdgeAt(INPUT_PRIORS)->getShape().getStaticDims()[0]; + assert(getParentEdgeAt(INPUT_PRIORS)->getShape().getStaticDims()[1] == 4); // Execute - const int layer_width = grid_w_ ? grid_w_ : getParentEdgeAt(INPUT_FEATUREMAP)->getDims()[3]; - const int layer_height = grid_h_ ? grid_h_ : getParentEdgeAt(INPUT_FEATUREMAP)->getDims()[2]; - const float step_w = stride_w_ ? stride_w_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getDims()[3]) / layer_width; - const float step_h = stride_h_ ? stride_h_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getDims()[2]) / layer_height; + const int layer_width = grid_w_ ? grid_w_ : getParentEdgeAt(INPUT_FEATUREMAP)->getShape().getStaticDims()[3]; + const int layer_height = grid_h_ ? grid_h_ : getParentEdgeAt(INPUT_FEATUREMAP)->getShape().getStaticDims()[2]; + const float step_w = stride_w_ ? stride_w_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getShape().getStaticDims()[3]) / layer_width; + const float step_h = stride_h_ ? stride_h_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getShape().getStaticDims()[2]) / layer_height; const auto *bottom_data_0 = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *top_data_0 = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp index 94e7f033a95..09313e30bd6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include #include @@ -341,27 +339,27 @@ void MKLDNNExperimentalDetectronROIFeatureExtractorNode::initSupportedPrimitiveD if (!supportedPrimitiveDescriptors.empty()) return; - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); for (int i = 0; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } void MKLDNNExperimentalDetectronROIFeatureExtractorNode::execute(mkldnn::stream strm) { - const int levels_num = inDims.size() - INPUT_FEATURES_START; - const int num_rois = getParentEdgeAt(INPUT_ROIS)->getDims()[0]; - const int channels_num = getParentEdgeAt(INPUT_FEATURES_START)->getDims()[1]; + const int levels_num = inputShapes.size() - INPUT_FEATURES_START; + const int num_rois = getParentEdgeAt(INPUT_ROIS)->getShape().getStaticDims()[0]; + const int channels_num = getParentEdgeAt(INPUT_FEATURES_START)->getShape().getStaticDims()[1]; const int feaxels_per_roi = pooled_height_ * pooled_width_ * channels_num; auto *input_rois = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); auto *output_rois_features = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROI_FEATURES)[0]->getMemoryPtr()->GetPtr()); float *output_rois = nullptr; - if (OUTPUT_ROIS < outDims.size()) { + if (OUTPUT_ROIS < outputShapes.size()) { output_rois = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); } @@ -381,8 +379,8 @@ void MKLDNNExperimentalDetectronROIFeatureExtractorNode::execute(mkldnn::stream const int level_rois_num = rois_per_level[i + 1] - level_rois_offset; if (level_rois_num > 0) { auto *featuremap = reinterpret_cast(getParentEdgeAt(INPUT_FEATURES_START + i)->getMemoryPtr()->GetPtr()); - const int featuremap_height = getParentEdgeAt(INPUT_FEATURES_START + i)->getDims()[2]; - const int featuremap_width = getParentEdgeAt(INPUT_FEATURES_START + i)->getDims()[3]; + const int featuremap_height = getParentEdgeAt(INPUT_FEATURES_START + i)->getShape().getStaticDims()[2]; + const int featuremap_width = getParentEdgeAt(INPUT_FEATURES_START + i)->getShape().getStaticDims()[3]; ROIAlignForward_cpu_kernel(feaxels_per_roi * level_rois_num, featuremap, 1.0f / pyramid_scales_[i], diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp index d543658f78e..f77c3fcb2b0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include #include @@ -51,14 +49,14 @@ void MKLDNNExperimentalDetectronTopKROIsNode::initSupportedPrimitiveDescriptors( if (!supportedPrimitiveDescriptors.empty()) return; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } void MKLDNNExperimentalDetectronTopKROIsNode::execute(mkldnn::stream strm) { - const int input_rois_num = getParentEdgeAt(INPUT_ROIS)->getDims()[0]; + const int input_rois_num = getParentEdgeAt(INPUT_ROIS)->getShape().getStaticDims()[0]; const int top_rois_num = (std::min)(max_rois_num_, input_rois_num); auto *input_rois = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp index d4c5d303796..13ada3cf81d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include #include @@ -411,8 +409,8 @@ void MKLDNNExtractImagePatchesNode::initSupportedPrimitiveDescriptors() { if (_supported_precisions_sizes.find(precision.size()) == _supported_precisions_sizes.end()) IE_THROW() << errorPrefix << "has unsupported precision: " << precision.name(); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision}}, - {{TensorDescCreatorTypes::ncsp, precision}}, + addSupportedPrimDesc({{LayoutType::ncsp, precision}}, + {{LayoutType::ncsp, precision}}, impl_desc_type::ref_any); } @@ -421,12 +419,12 @@ void MKLDNNExtractImagePatchesNode::execute(mkldnn::stream strm) { char *dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); const size_t dtype_size = getOriginalInputPrecisionAtPort(0).size(); - const auto& inDims = getParentEdgeAt(0)->getDims().ToSizeVector(); + const auto& inDims = getParentEdgeAt(0)->getShape().getStaticDims(); const size_t IC = inDims[1]; const size_t IH = inDims[2]; const size_t IW = inDims[3]; - const auto& outDims = getChildEdgesAtPort(0)[0]->getDims().ToSizeVector(); + const auto& outDims = getChildEdgesAtPort(0)[0]->getShape().getStaticDims(); const size_t OB = outDims[0]; const size_t OH = outDims[2]; const size_t OW = outDims[3]; @@ -436,8 +434,8 @@ void MKLDNNExtractImagePatchesNode::execute(mkldnn::stream strm) { const size_t RH = _rates[0], RW = _rates[1]; const size_t PT = _pad_top, PL = _pad_left; - const std::vector istrides = getParentEdgeAt(0)->getDesc().getBlockingDesc().getStrides(); - const std::vector ostrides = getChildEdgesAtPort(0)[0]->getDesc().getBlockingDesc().getStrides(); + const std::vector istrides = getParentEdgeAt(0)->getMemory().GetDescWithType().getStrides(); + const std::vector ostrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); const std::vector ostrides_partial = {ostrides[0], KW * IC * ostrides[1], IC * ostrides[1], ostrides[1]}; if (extract_image_patches_kernel) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp index b12bed6a476..b08ebae30f4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp @@ -19,6 +19,7 @@ #include "ie_parallel.hpp" #include +#include // Quantization ranges validation is switched off by default in order to avoid regressions on user side // #define VALIDATE_QUANTIZATION_RANGES @@ -219,7 +220,7 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ this->preamble(); - if (jqp_.src_layout == Layout::CHW || jqp_.src_layout == Layout::NCHW || jqp_.src_layout == Layout::NCDHW) + if (jqp_.is_planar) compute_planar(); else compute_generic(); @@ -1090,31 +1091,23 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr MKLDNNFakeQuantizeNode::getDataFormats() const { +std::vector MKLDNNFakeQuantizeNode::getDataFormats() const { // Special case for first FQ in the network - if (getParentEdgesAtPort(0)[0]->getDims()[getAxis()] == 3) { - return { MKLDNNMemory::GetPlainFormat(getParentEdgesAtPort(0)[0]->getDims()) }; + if (getParentEdgesAtPort(0)[0]->getShape().getStaticDims()[getAxis()] == 3) { + return { LayoutType::ncsp }; } else { if (isBinarization()) { - return {memory::format_tag::nhwc}; + return { LayoutType::nspc }; } else { - switch (getParentEdgesAtPort(0)[0]->getDims().ndims()) { - case 4: - if (getAxis() == 1) { - auto blkFormat = mayiuse(cpu::x64::avx512_common) ? memory::format_tag::nChw16c : memory::format_tag::nChw8c; - return {blkFormat, memory::format_tag::nhwc, memory::format_tag::nchw}; - } else { - return {memory::format_tag::nchw}; - } - case 5: - if (getAxis() == 1) { - auto blkFormat = mayiuse(cpu::x64::avx512_common) ? memory::format_tag::nCdhw16c : memory::format_tag::nCdhw8c; - return {blkFormat, memory::format_tag::ndhwc, memory::format_tag::ncdhw}; - } else { - return {memory::format_tag::ncdhw}; - } - default: - return {MKLDNNMemory::GetPlainFormat(getParentEdgesAtPort(0)[0]->getDims())}; + if (one_of(getParentEdgesAtPort(0)[0]->getShape().getRank(), 4, 5)) { + if (getAxis() == 1) { + auto blkFormat = mayiuse(cpu::x64::avx512_common) ? LayoutType::nCsp16c : LayoutType::nCsp8c; + return { blkFormat, LayoutType::nspc, LayoutType::ncsp }; + } else { + return { LayoutType::ncsp }; + } + } else { + return { LayoutType::ncsp }; } } } @@ -1147,12 +1140,12 @@ void MKLDNNFakeQuantizeNode::getSupportedDescriptors() { IE_THROW() << errorPrefix << "has unsupported number of parent edges at port " << i; } - if (getParentEdgesAtPort(0)[0]->getDims().ndims() != getChildEdgesAtPort(0)[0]->getDims().ndims()) { + if (getParentEdgesAtPort(0)[0]->getShape().getRank() != getChildEdgesAtPort(0)[0]->getShape().getRank()) { IE_THROW() << errorPrefix << "has different ranks for input and output tensors"; } if (isBinarization()) { - if (getParentEdgesAtPort(0)[0]->getDims().ndims() != 4ul) { + if (getParentEdgesAtPort(0)[0]->getShape().getRank() != 4ul) { IE_THROW() << errorPrefix << "doesn't support input/output rank != 4"; } } @@ -1189,47 +1182,52 @@ void MKLDNNFakeQuantizeNode::initSupportedPrimitiveDescriptors() { } } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getInputPrecision()); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOutputPrecision()); - for (auto& fmt : getDataFormats()) { - LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; for (size_t i = 0; i < getParentEdges().size(); i++) { - DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; if (i == 0) { - dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDataType, fmt); + auto descCreator = BlockedDescCreator::getCommonCreators().at(fmt); + dataConfig.desc = descCreator->createUniqueDesc(getInputPrecision(), getParentEdgeAt(i)->getShape().getStaticDims()); } else { - dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), memory::data_type::f32, - MKLDNNMemory::GetPlainFormat(getParentEdgeAt(i)->getDims())); + auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); + dataConfig.desc = descCreator->createUniqueDesc(Precision::FP32, getParentEdgeAt(i)->getShape().getStaticDims()); } config.inConfs.push_back(dataConfig); } - DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, fmt); + auto descCreator = BlockedDescCreator::getCommonCreators().at(fmt); + dataConfig.desc = descCreator->createUniqueDesc(getOutputPrecision(), getChildEdgeAt(0)->getShape().getStaticDims()); config.outConfs.push_back(dataConfig); - supportedPrimitiveDescriptors.push_back({config, impl_type, fmt}); + supportedPrimitiveDescriptors.push_back({config, impl_type}); } } void MKLDNNFakeQuantizeNode::createPrimitive() { auto config = getSelectedPrimitiveDescriptor()->getConfig(); - auto inDims = config.inConfs[0].desc.getDims(); + auto inDims = config.inConfs[0].desc->getShape().getStaticDims(); jqp.c = inDims.size() > 1 ? inDims[1] : 1; - jqp.src_prc = config.inConfs[0].desc.getPrecision(); + jqp.src_prc = config.inConfs[0].desc->getPrecision(); jqp.wei_prc = Precision::FP32; - jqp.dst_prc = config.outConfs[0].desc.getPrecision(); + jqp.dst_prc = config.outConfs[0].desc->getPrecision(); - jqp.src_layout = config.inConfs[0].desc.getLayout(); + auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + jqp.s_str = srcDesc.getStrides(); + + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + jqp.d_str = dstDesc.getStrides(); + + jqp.is_planar = srcDesc.hasLayoutType(LayoutType::ncsp) && one_of(srcDesc.getShape().getRank(), 3, 4, 5); jqp.op_type = getAlgorithm(); @@ -1258,7 +1256,7 @@ void MKLDNNFakeQuantizeNode::createPrimitive() { if (quantize_kernel) quantize_kernel->create_ker(); - size_t axisSize = getParentEdgeAt(0)->getDims()[getAxis()]; + size_t axisSize = getParentEdgeAt(0)->getShape().getStaticDims()[getAxis()]; size_t axisPaddedSize = rnd_up(axisSize, 16); MKLDNNMemoryDesc weightsDataDesc = {{(uint32_t)axisPaddedSize}, memory::data_type::f32, memory::format_tag::x}; @@ -1297,12 +1295,11 @@ void MKLDNNFakeQuantizeNode::executeReference() { auto src = reinterpret_cast(srcMemory->GetPtr()); - auto config = getSelectedPrimitiveDescriptor()->getConfig(); - auto srcDims = config.inConfs[0].desc.getDims(); - auto dstDims = config.outConfs[0].desc.getDims(); + auto srcDims = srcMemory->GetDesc().getShape().getStaticDims(); + auto dstDims = dstMemory->GetDesc().getShape().getStaticDims(); - auto s_str = config.inConfs[0].desc.getBlockingDesc().getStrides(); - auto d_str = config.outConfs[0].desc.getBlockingDesc().getStrides(); + auto s_str = jqp.s_str; + auto d_str = jqp.d_str; const int N = srcDims[0]; const int C = srcDims.size() > 1 ? srcDims[1] : 1; @@ -1419,10 +1416,9 @@ void MKLDNNFakeQuantizeNode::executeBinarization() { auto thresholds = reinterpret_cast(internalBlobMemory[0]->GetData()); auto output_mask = reinterpret_cast(internalBlobMemory[1]->GetData()); - auto config = getSelectedPrimitiveDescriptor()->getConfig(); - auto src_dims = config.inConfs[0].desc.getDims(); + auto src_dims = srcMemory->GetDesc().getShape().getStaticDims(); - std::vector s_str = config.inConfs[0].desc.getBlockingDesc().getStrides(); + std::vector s_str = jqp.s_str; size_t tmp = s_str[s_str.size() - 1]; for (int i = s_str.size() - 1; i > 1; i--) { s_str[i] = s_str[i - 1]; @@ -1463,24 +1459,23 @@ void MKLDNNFakeQuantizeNode::executeQuantization() { auto output_scale = reinterpret_cast(internalBlobMemory[4]->GetData()); auto output_shift = reinterpret_cast(internalBlobMemory[5]->GetData()); - auto config = getSelectedPrimitiveDescriptor()->getConfig(); - auto srcDims = config.inConfs[0].desc.getDims(); + auto& srcDesc = srcMemory->GetDesc(); + auto srcDims = srcDesc.getShape().getStaticDims(); - bool is_blk_format = jqp.src_layout != Layout::NHWC && jqp.src_layout != Layout::NDHWC; - int blk_size = (jqp.src_layout == Layout::CHW || - jqp.src_layout == Layout::NCHW || - jqp.src_layout == Layout::NCDHW) ? 1 : mayiuse(cpu::x64::avx512_common) ? 16 : 8; + bool is_blk_format = !srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5); + int blk_size = (srcDesc.hasLayoutType(LayoutType::ncsp) && one_of(srcDesc.getShape().getRank(), 3, 4, 5)) + ? 1 : mayiuse(cpu::x64::avx512_common) ? 16 : 8; auto src_type_size = jqp.src_prc.size(); auto dst_type_size = jqp.dst_prc.size(); - std::vector s_str = config.inConfs[0].desc.getBlockingDesc().getStrides(); + auto s_str = jqp.s_str; - if (jqp.src_layout == BLOCKED) { + if (is_blk_format) { s_str[1] /= blk_size; } - if (jqp.src_layout == Layout::NHWC || jqp.src_layout == Layout::NDHWC) { + if (srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5)) { size_t tmp = s_str[s_str.size() - 1]; for (int i = s_str.size() - 1; i > 1; i--) { s_str[i] = s_str[i - 1]; @@ -1495,7 +1490,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() { const int H = srcDims.size() == 3 ? srcDims[2] : srcDims.size() > 3 ? srcDims[srcDims.size() - 2] : 1; const int W = srcDims.size() > 3 ? srcDims[srcDims.size() - 1] : 1; - if (jqp.src_layout == Layout::CHW) { + if (srcDesc.hasLayoutType(LayoutType::ncsp) && srcDesc.getShape().getRank() == 3) { parallel_nd(N, CB, D, [&](int n, int cb, int d) { auto arg = jit_quantize_call_args(); @@ -1542,7 +1537,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() { arg.src_step = is_blk_format ? (size_t) blk_size * src_type_size : (size_t) C * src_type_size; arg.dst_step = is_blk_format ? (size_t) blk_size * dst_type_size : (size_t) C * dst_type_size; - arg.block_size = (is_blk_format && jqp.src_layout != Layout::NC) ? (size_t) blk_size : nstl::min(blk_size, C - c); + arg.block_size = (is_blk_format && srcDims.size() != 2) ? (size_t) blk_size : nstl::min(blk_size, C - c); arg.work_amount = (size_t) W; (*quantize_kernel)(&arg); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h index 4430acac9ba..eb6a49b1210 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h @@ -17,12 +17,14 @@ namespace MKLDNNPlugin { struct jit_quantize_params { int c; + bool is_planar; InferenceEngine::Precision src_prc; InferenceEngine::Precision wei_prc; InferenceEngine::Precision dst_prc; - InferenceEngine::Layout src_layout; + std::vector s_str; + std::vector d_str; Algorithm op_type; }; @@ -109,7 +111,7 @@ public: private: void init() override; - std::vector getDataFormats() const; + std::vector getDataFormats() const; void executeReference(); void executeBinarization(); void executeQuantization(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp index e5b9ade8567..ee8dc1b730b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp @@ -12,6 +12,7 @@ #include #include #include "utils/general_utils.h" +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -50,18 +51,18 @@ MKLDNNFullyConnectedNode::MKLDNNFullyConnectedNode(const std::shared_ptr MKLDNNFullyConnectedNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const { - if (dims.ndims() == 0) +std::vector MKLDNNFullyConnectedNode::getAvailableFormatsForDims(const Shape &dims) const { + if (dims.getRank() == 0) return {memory::format_tag::x}; - else if (dims.ndims() == 1) + else if (dims.getRank() == 1) return {memory::format_tag::x}; - else if (dims.ndims() == 2) + else if (dims.getRank() == 2) return {memory::format_tag::nc}; - else if (dims.ndims() == 3) + else if (dims.getRank() == 3) return {memory::format_tag::tnc}; - else if (dims.ndims() == 4) + else if (dims.getRank() == 4) return {memory::format_tag::nChw8c, memory::format_tag::nChw16c, memory::format_tag::nhwc, memory::format_tag::nchw}; - else if (dims.ndims() == 5) + else if (dims.getRank() == 5) return {memory::format_tag::nCdhw8c, memory::format_tag::nCdhw16c, memory::format_tag::ndhwc, memory::format_tag::ncdhw}; return {memory::format_tag::any}; } @@ -100,23 +101,23 @@ void MKLDNNFullyConnectedNode::getSupportedDescriptors() { outputDataType = memory::data_type::bf16; } - MKLDNNDims inDims = getParentEdgeAt(0)->getDims(); - MKLDNNDims outDims = getChildEdgeAt(0)->getDims(); + const auto inDims = getParentEdgeAt(0)->getShape().getStaticDims(); + const auto outDims = getChildEdgeAt(0)->getShape().getStaticDims(); - if (inDims.ndims() == 3) { + if (inDims.size() == 3) { weightsDims = InferenceEngine::SizeVector({static_cast(outDims[2]), static_cast(inDims[2])}); } else { weightsDims.push_back(outDims[1]); - for (int i = 1; i < inDims.ndims(); i++) + for (int i = 1; i < inDims.size(); i++) weightsDims.push_back(inDims[i]); } biasesDims.push_back(weightsDims[0]); - for (auto format : getAvailableFormatsForDims(inDims)) { - MKLDNNMemoryDesc in_candidate(inDims, inputDataType, format); - MKLDNNMemoryDesc out_candidate(outDims, outputDataType, memory::format_tag::any); + for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { + auto in_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(inDims), inputDataType, format); + auto out_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(outDims), outputDataType, mkldnn::memory::format_tag::any); - createDescriptor({in_candidate}, {out_candidate}); + createDescriptorInternal(in_candidate, out_candidate); } } @@ -236,35 +237,40 @@ std::shared_ptr MKLDNNFullyConnectedNode::initPrimitiveA return attr; } -void MKLDNNFullyConnectedNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0]; +// WA: creation MKLDNNMemoryDesc with format == any is prohibited +// so we create mkldnn::memory::desc directly +// we need specific method and can't remove createDescriptor from base class because its used into initDescriptor +void MKLDNNFullyConnectedNode::createDescriptorInternal(const mkldnn::memory::desc &inputDesc, + const mkldnn::memory::desc &outputDesc) { + auto in_candidate = inputDesc; + auto out_candidate = outputDesc; - mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision()); - mkldnn::memory::data_type bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision()); - if (inDesc.getPrecision() == Precision::BF16) { + mkldnn::memory::data_type wdt = in_candidate.data_type(); + mkldnn::memory::data_type bdt = out_candidate.data_type(); + if (in_candidate.data_type() == mkldnn::memory::data_type::bf16) { bdt = mkldnn::memory::data_type::f32; - } else if (inDesc.getPrecision() == Precision::U8 || inDesc.getPrecision() == Precision::I8) { + } else if (in_candidate.data_type() == mkldnn::memory::data_type::u8 || in_candidate.data_type() == mkldnn::memory::data_type::s8) { wdt = memory::data_type::s8; if (withBiases) bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(BIAS_ID)); } - if (inDesc.getDims().size() == 3) { - auto inDims = inDesc.getDims(); - auto outDims = outDesc.getDims(); - InferenceEngine::SizeVector normalizedInDims = {inDims[0] * inDims[1], inDims[2]}; - InferenceEngine::SizeVector normalizedOutDims = {outDims[0] * outDims[1], outDims[2]}; - inDesc = InferenceEngine::TensorDesc(inDesc.getPrecision(), normalizedInDims, TensorDesc::getLayoutByDims(normalizedInDims)); - outDesc = InferenceEngine::TensorDesc(outDesc.getPrecision(), normalizedOutDims, TensorDesc::getLayoutByDims(normalizedOutDims)); + if (in_candidate.dims().size() == 3) { + auto inDims = in_candidate.dims(); + auto outDims = out_candidate.dims(); + auto normalizedInDims = {inDims[0] * inDims[1], inDims[2]}; + auto normalizedOutDims = {outDims[0] * outDims[1], outDims[2]}; + in_candidate = mkldnn::memory::desc(normalizedInDims, in_candidate.data_type(), + MKLDNNMemory::GetPlainFormatByRank(normalizedInDims.size())); + out_candidate = mkldnn::memory::desc(normalizedOutDims, out_candidate.data_type(), + MKLDNNMemory::GetPlainFormatByRank(normalizedOutDims.size())); } - MKLDNNMemoryDesc in_candidate(inDesc); - MKLDNNMemoryDesc out_candidate(outDesc); - MKLDNNMemoryDesc wgh_candidate(MKLDNNDims(weightsDims), wdt, mkldnn::memory::format_tag::any); + mkldnn::memory::desc wgh_candidate(MKLDNNDims(weightsDims), wdt, mkldnn::memory::format_tag::any); if (withBiases) { - MKLDNNMemoryDesc bias_candidate(MKLDNNDims(inDims[BIAS_ID]), bdt, memory::format_tag::any); + mkldnn::memory::desc bias_candidate(MKLDNNExtensionUtils::convertToDnnlDims(inputShapes[BIAS_ID].getStaticDims()), bdt, + mkldnn::memory::format_tag::any); MKLDNNDescriptor desc(std::shared_ptr( new inner_product_forward::desc(prop_kind::forward_scoring, in_candidate, wgh_candidate, bias_candidate, out_candidate))); @@ -277,40 +283,28 @@ void MKLDNNFullyConnectedNode::createDescriptor(const std::vector 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) - : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); - - if (desc.getLayout() == InferenceEngine::Layout::ANY) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - } else if (getParentEdgeAt(idx)->getDims().ndims() == 3) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - TensorDesc::getLayoutByDims(getParentEdgeAt(idx)->getDims().ToSizeVector()))); - } else { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); - } +void MKLDNNFullyConnectedNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { + createDescriptorInternal(MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]), MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0])); } -MKLDNNMemoryDesc MKLDNNFullyConnectedNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx)); - if (desc.getLayout() == InferenceEngine::Layout::ANY) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - } else if (getChildEdgeAt(idx)->getDims().ndims() == 3) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - TensorDesc::getLayoutByDims(getChildEdgeAt(idx)->getDims().ToSizeVector()))); - } else { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); +std::unique_ptr MKLDNNFullyConnectedNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); + + if (getParentEdgeAt(idx)->getShape().getRank() == 3) { + desc = MKLDNNMemoryDesc(getParentEdgeAt(idx)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), + MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(idx)->getShape().getRank())); } + return MKLDNNPlugin::make_unique(std::move(desc)); +} + +std::unique_ptr MKLDNNFullyConnectedNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx)); + if (getChildEdgeAt(idx)->getShape().getRank() == 3) { + desc = MKLDNNMemoryDesc(getChildEdgeAt(idx)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), + MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(idx)->getShape().getRank())); + } + return MKLDNNPlugin::make_unique(std::move(desc)); } InferenceEngine::Precision MKLDNNFullyConnectedNode::getRuntimePrecision() const { @@ -324,7 +318,7 @@ InferenceEngine::Precision MKLDNNFullyConnectedNode::getRuntimePrecision() const } } - return MKLDNNExtensionUtils::getMaxPrecision(inputPrecisions); + return getMaxPrecision(inputPrecisions); } REG_MKLDNN_PRIM_FOR(MKLDNNFullyConnectedNode, FullyConnected); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h index 63b1e88ae6f..01820fdfcc3 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h @@ -16,7 +16,7 @@ class MKLDNNFullyConnectedNode : public MKLDNNNode { public: MKLDNNFullyConnectedNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - std::vector getAvailableFormatsForDims(const MKLDNNDims &dims) const override; + std::vector getAvailableFormatsForDims(const Shape &dims) const override; void getSupportedDescriptors() override; void createPrimitive() override; void execute(mkldnn::stream strm) override; @@ -27,15 +27,15 @@ public: } const std::vector& getPrimitivesPriority() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; size_t descInputNumbers(MKLDNNDescriptor desc) override { return static_cast(getOriginalInputsNumber()); } - MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; - MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; InferenceEngine::Precision getRuntimePrecision() const override; @@ -47,6 +47,9 @@ protected: std::shared_ptr initPrimitiveAttr(); private: + void createDescriptorInternal(const mkldnn::memory::desc &inputDesc, + const mkldnn::memory::desc &outputDesc); + InferenceEngine::SizeVector weightsDims; InferenceEngine::SizeVector biasesDims; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp index e3e14e35691..e4da50abe8a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp @@ -86,9 +86,9 @@ void MKLDNNGatherElementsNode::initSupportedPrimitiveDescriptors() { dataTypeSize_ = inDataPrecision.size(); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, inDataPrecision}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}, - {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, + addSupportedPrimDesc({{LayoutType::ncsp, inDataPrecision}, + {LayoutType::ncsp, Precision::I32}}, + {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } @@ -98,7 +98,7 @@ void MKLDNNGatherElementsNode::directExecution() { const auto *indices = reinterpret_cast(getParentEdgeAt(indicesIndex_)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - const int outSize = getChildEdgeAt(0)->getBlob()->size(); + const int outSize = getChildEdgeAt(0)->getShape().getElementsCount(); auto threadBody = [&](const int ithr, const int nthr) { int start(0lu), end(0lu); splitter(outSize, nthr, ithr, start, end); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp index ee7623f9b48..75ee34dbda5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp @@ -89,9 +89,9 @@ void MKLDNNGatherNDNode::initSupportedPrimitiveDescriptors() { _dataTypeSize = inDataPrecision.size(); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, inDataPrecision}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}, - {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, + addSupportedPrimDesc({{LayoutType::ncsp, inDataPrecision}, + {LayoutType::ncsp, Precision::I32}}, + {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } @@ -101,10 +101,11 @@ void MKLDNNGatherNDNode::gatherElementwise() { const auto *indices = reinterpret_cast(getParentEdgeAt(_indicesIndex)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto strides = getParentEdgeAt(_dataIndex)->getDesc().getBlockingDesc().getStrides(); + auto strides = getParentEdgeAt(_dataIndex)->getMemory().GetDescWithType().getStrides(); const size_t* srcMultipliers = strides.data() + _batchDims; - const size_t cycles = getChildEdgeAt(0)->getBlob()->byteSize() / (sizeof(dataType) * _batchNum); + const size_t cycles = getChildEdgeAt(0)->getShape().getElementsCount() * + getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().size() / (sizeof(dataType) * _batchNum); const size_t CS = cycles * _sliceRank; const size_t CB = cycles * _blockSize; const size_t workAmount = _batchNum * cycles; @@ -149,11 +150,11 @@ void MKLDNNGatherNDNode::gatherBlocks() { std::vector srcMultipliers(_sliceRank); for (size_t i = 0; i < _sliceRank ; i++) - srcMultipliers[i] = _dataTypeSize * getParentEdgeAt(_dataIndex)->getDesc().getBlockingDesc().getStrides()[i + _batchDims]; + srcMultipliers[i] = _dataTypeSize * getParentEdgeAt(_dataIndex)->getMemory().GetDescWithType().getStrides()[i + _batchDims]; const size_t batchStep = _batchStep * _dataTypeSize; const size_t dataStep = _blockSize * _dataTypeSize; - const size_t cycles = getChildEdgeAt(0)->getBlob()->byteSize() / (dataStep * _batchNum); + const size_t cycles = getChildEdgeAt(0)->getMemory().GetSize() / (dataStep * _batchNum); const size_t CS = cycles * _sliceRank; const size_t CB = cycles * dataStep; const size_t workAmount = _batchNum * cycles; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp index ade92f6a4a0..f41a57730a5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp @@ -75,10 +75,10 @@ void MKLDNNGatherNode::initSupportedPrimitiveDescriptors() { return; Precision dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DATA); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision}, - {TensorDescCreatorTypes::ncsp, Precision::I32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}, - {{TensorDescCreatorTypes::ncsp, dataPrecision}}, + addSupportedPrimDesc({{LayoutType::ncsp, dataPrecision}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, + {{LayoutType::ncsp, dataPrecision}}, impl_desc_type::ref_any); } @@ -92,10 +92,10 @@ void MKLDNNGatherNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << errorPrefix_ << " has unidentified preferable primitive descriptor."; - const SizeVector srcDims = getParentEdgeAt(GATHER_DATA)->getDims().ToSizeVector(); - const SizeVector idxDims = getParentEdgeAt(GATHER_INDEXES)->getDims().ToSizeVector(); - const SizeVector dstDims = getChildEdgeAt(0)->getDims().ToSizeVector(); - dataSize = getParentEdgeAt(GATHER_DATA)->getDesc().getPrecision().size(); + const SizeVector srcDims = getParentEdgeAt(GATHER_DATA)->getShape().getStaticDims(); + const SizeVector idxDims = getParentEdgeAt(GATHER_INDEXES)->getShape().getStaticDims(); + const SizeVector dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + dataSize = getParentEdgeAt(GATHER_DATA)->getMemory().GetDesc().getPrecision().size(); indexRange = srcDims[axis]; batchSize = std::accumulate(srcDims.begin(), srcDims.begin() + batchDims, 1, std::multiplies()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp index ce396446df2..89fb6c08167 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp @@ -1,7 +1,6 @@ // Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" #include #include @@ -66,11 +65,11 @@ void MKLDNNGatherTreeNode::initSupportedPrimitiveDescriptors() { IE_THROW() << errorPrefix << " has incorrect input/output data precision. Must be the same."; } - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision}, - {TensorDescCreatorTypes::ncsp, precision}, - {TensorDescCreatorTypes::ncsp, precision}, - {TensorDescCreatorTypes::ncsp, precision}}, - {{TensorDescCreatorTypes::ncsp, precision}}, + addSupportedPrimDesc({{LayoutType::ncsp, precision}, + {LayoutType::ncsp, precision}, + {LayoutType::ncsp, precision}, + {LayoutType::ncsp, precision}}, + {{LayoutType::ncsp, precision}}, impl_desc_type::ref_any); } @@ -85,16 +84,16 @@ template void MKLDNNGatherTreeNode::gatherTreeKernel() noexcept { const auto *step_idx = reinterpret_cast(getParentEdgeAt(GATHER_TREE_STEP_IDX)->getMemoryPtr()->GetPtr()); const auto * const parent_idx = reinterpret_cast(getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemoryPtr()->GetPtr()); - const size_t parent_idx_size = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getDims().size() - - getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getDesc().getBlockingDesc().getOffsetPadding(); + const size_t parent_idx_size = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getShape().getElementsCount() + - getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemory().GetDescWithType().getOffsetPadding(); const auto *max_seq_len = reinterpret_cast(getParentEdgeAt(GATHER_TREE_MAX_SEQ_LEN)->getMemoryPtr()->GetPtr()); auto end_token = (reinterpret_cast(getParentEdgeAt(GATHER_TREE_END_TOKEN)->getMemoryPtr()->GetPtr()))[0]; auto * final_idx = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - SizeVector step_idx_dims = getParentEdgeAt(GATHER_TREE_STEP_IDX)->getDims().ToSizeVector(); - SizeVector parent_idx_dims = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getDims().ToSizeVector(); - SizeVector max_seq_len_dims = getParentEdgeAt(GATHER_TREE_MAX_SEQ_LEN)->getDims().ToSizeVector(); - SizeVector final_idx_dims = getChildEdgesAtPort(0)[0]->getDims().ToSizeVector(); + SizeVector step_idx_dims = getParentEdgeAt(GATHER_TREE_STEP_IDX)->getShape().getStaticDims(); + SizeVector parent_idx_dims = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getShape().getStaticDims(); + SizeVector max_seq_len_dims = getParentEdgeAt(GATHER_TREE_MAX_SEQ_LEN)->getShape().getStaticDims(); + SizeVector final_idx_dims = getChildEdgesAtPort(0)[0]->getShape().getStaticDims(); int32_t max_time = step_idx_dims[0]; const size_t batch_size = step_idx_dims[1]; const size_t beam_width = step_idx_dims[2]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp index 86f89ccea7c..ef87345daae 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp @@ -8,6 +8,7 @@ #include #include #include +#include "cpu_memory_desc_utils.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -22,6 +23,42 @@ void MKLDNNGenericNode::getSupportedDescriptors() { } } +NodeConfig MKLDNNGenericNode::convertLayerToNodeConfig(const InferenceEngine::LayerConfig &layerConfig) { + NodeConfig config; + config.dynBatchSupport = layerConfig.dynBatchSupport; + config.inConfs.resize(layerConfig.inConfs.size()); + for (size_t i = 0; i < layerConfig.inConfs.size(); i++) { + config.inConfs[i].inPlace = layerConfig.inConfs[i].inPlace; + config.inConfs[i].constant = layerConfig.inConfs[i].constant; + config.inConfs[i].desc = MemoryDescUtils::convertToMKLDNNMemoryDesc(layerConfig.inConfs[i].desc).clone(); + } + config.outConfs.resize(layerConfig.outConfs.size()); + for (size_t i = 0; i < layerConfig.outConfs.size(); i++) { + config.outConfs[i].inPlace = layerConfig.outConfs[i].inPlace; + config.outConfs[i].constant = layerConfig.outConfs[i].constant; + config.outConfs[i].desc = MemoryDescUtils::convertToMKLDNNMemoryDesc(layerConfig.outConfs[i].desc).clone(); + } + return config; +} + +InferenceEngine::LayerConfig MKLDNNGenericNode::convertNodeToLayerConfig(const NodeConfig &nodeConfig) { + InferenceEngine::LayerConfig config; + config.dynBatchSupport = nodeConfig.dynBatchSupport; + config.inConfs.resize(nodeConfig.inConfs.size()); + for (size_t i = 0; i < nodeConfig.inConfs.size(); i++) { + config.inConfs[i].inPlace = nodeConfig.inConfs[i].inPlace; + config.inConfs[i].constant = nodeConfig.inConfs[i].constant; + config.inConfs[i].desc = MemoryDescUtils::convertToTensorDesc(*nodeConfig.inConfs[i].desc); + } + config.outConfs.resize(nodeConfig.outConfs.size()); + for (size_t i = 0; i < nodeConfig.outConfs.size(); i++) { + config.outConfs[i].inPlace = nodeConfig.outConfs[i].inPlace; + config.outConfs[i].constant = nodeConfig.outConfs[i].constant; + config.outConfs[i].desc = MemoryDescUtils::convertToTensorDesc(*nodeConfig.outConfs[i].desc); + } + return config; +} + void MKLDNNGenericNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -35,7 +72,7 @@ void MKLDNNGenericNode::initSupportedPrimitiveDescriptors() { } for (auto& config : configs) { - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); + supportedPrimitiveDescriptors.emplace_back(convertLayerToNodeConfig(config), impl_desc_type::unknown); } } if (impls.empty()) { @@ -109,9 +146,9 @@ void MKLDNNGenericNode::execLayer() { std::vector inputs; std::vector constInputs; std::vector inputDescs; - std::vector outputShapes; + std::vector execOutputShapes; for (size_t i = 0; i < getParentEdges().size(); i++) { - auto inputBlob = getParentEdgeAt(i)->getBlob(); + auto inputBlob = MemoryDescUtils::interpretAsBlob(getParentEdgeAt(i)->getMemory()); inputs.push_back(inputBlob); constInputs.push_back(inputBlob); if (isDynBatch && dynBatchLim >= inputs[inputs.size() - 1]->getTensorDesc().getDims()[0]) { @@ -137,14 +174,14 @@ void MKLDNNGenericNode::execLayer() { } } std::vector outputs; - for (size_t i = 0; i < outDims.size(); i++) { + for (size_t i = 0; i < outputShapes.size(); i++) { if (isDynBatch) { auto out_edge = getChildEdgesAtPort(i)[0]; - auto td = out_edge->getBlob()->getTensorDesc(); - td.setDims(outputShapes[i]); + auto td = MemoryDescUtils::convertToTensorDesc(out_edge->getMemory().GetDesc()); + td.setDims(execOutputShapes[i]); outputs.push_back(make_blob_with_precision(td, out_edge->getMemory().GetData())); } else { - outputs.push_back(getChildEdgesAtPort(i)[0]->getBlob()); + outputs.push_back(MemoryDescUtils::interpretAsBlob(getChildEdgesAtPort(i)[0]->getMemory())); } } InferenceEngine::ResponseDesc resp; @@ -154,8 +191,8 @@ void MKLDNNGenericNode::execLayer() { } } -void MKLDNNGenericNode::initDescriptor(const InferenceEngine::LayerConfig &config) { - InferenceEngine::LayerConfig rightConfig = config; +void MKLDNNGenericNode::initDescriptor(const NodeConfig &config) { + NodeConfig rightConfig = config; InferenceEngine::StatusCode rc; InferenceEngine::ResponseDesc resp; @@ -190,14 +227,15 @@ void MKLDNNGenericNode::initDescriptor(const InferenceEngine::LayerConfig &confi impls.clear(); impls.emplace_back(selectedImpl); - rc = impls[0]->init(rightConfig, &resp); + auto ieConfig = convertNodeToLayerConfig(rightConfig); + rc = impls[0]->init(ieConfig, &resp); if (rc != InferenceEngine::OK) { IE_THROW() << resp.msg; } - + rightConfig = convertLayerToNodeConfig(ieConfig); auto descriptor = getSelectedPrimitiveDescriptor(); if (descriptor != nullptr) { - descriptor->getConfig() = rightConfig; + descriptor->setConfig(rightConfig); } bool isConst = !rightConfig.inConfs.empty() || !rightConfig.outConfs.empty(); for (const auto &inConf : rightConfig.inConfs) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h index f93b79c7852..63d0d5e20f0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h @@ -29,12 +29,15 @@ public: return false; } - void initDescriptor(const InferenceEngine::LayerConfig& config) override; + void initDescriptor(const NodeConfig& config) override; void execLayer(); void cleanup() override; protected: + NodeConfig convertLayerToNodeConfig(const InferenceEngine::LayerConfig &layerConfig); + InferenceEngine::LayerConfig convertNodeToLayerConfig(const NodeConfig &nodeConfig); + InferenceEngine::ILayerImplFactory::Ptr extFactory; std::vector impls; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp index 0dbe8dee59e..605aa2d6af5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include @@ -46,8 +44,8 @@ void MKLDNNGRNNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32, false, 0}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32, false, 0}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32, false, 0}}, + {{LayoutType::ncsp, Precision::FP32, false, 0}}, impl_desc_type::ref_any); } @@ -55,7 +53,7 @@ void MKLDNNGRNNode::execute(mkldnn::stream strm) { const float* src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); float* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - SizeVector dims = getParentEdgeAt(0)->getDims().ToSizeVector(); + SizeVector dims = getParentEdgeAt(0)->getShape().getStaticDims(); int N = static_cast((dims.size() > 0) ? dims[0] : 1); int C = static_cast((dims.size() > 1) ? dims[1] : 1); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp index cdb553309b8..33b6fdab4f4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp @@ -246,7 +246,7 @@ MKLDNNInputNode::MKLDNNInputNode(const std::shared_ptr& op, const } void MKLDNNInputNode::cloneBlobIfRequired() { - MKLDNNDims dims(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape()); + std::vector dims(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape()); const auto prec = convertPrecision(constOp->get_element_type()); const size_t size = dims.size(); MKLDNNMemoryDesc memDesc(dims, MKLDNNExtensionUtils::IEPrecisionToDataType(prec)); @@ -349,15 +349,15 @@ void MKLDNNInputNode::cloneBlobIfRequired() { } } -MKLDNNInputNode::MKLDNNInputNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &prc, const std::string &name, +MKLDNNInputNode::MKLDNNInputNode(const Shape& shape, const InferenceEngine::Precision &prc, const std::string &name, const std::string &type, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(type, name, eng, cache) { constant = ConstantType::NoConst; if (getType() == Input) { - outDims.emplace_back(dims); + outputShapes.emplace_back(shape); addOriginalOutputPrecision(prc); } else if (getType() == Output) { - inDims.emplace_back(dims); + inputShapes.emplace_back(shape); addOriginalInputPrecision(prc); } } @@ -388,42 +388,29 @@ void MKLDNNInputNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - LayerConfig config; - config.dynBatchSupport = true; + std::vector inPortConfs; + std::vector outPortConfs; + if (getType() == Input || getType() == MemoryInput) { precision = getOriginalOutputPrecisionAtPort(0); if (precision == Precision::U16 || isMeanImage) { precision = Precision::FP32; } - DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - auto mem_tdesc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType); - dataConfig.desc = mem_tdesc; - config.outConfs.push_back(dataConfig); - // ReadValue operation expects constant input + outPortConfs.push_back({LayoutType::ncsp, precision}); if (!getParentEdges().empty()) { - DataConfig inConfig; - inConfig.inPlace = -1; - inConfig.constant = true; - inConfig.desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType); - config.inConfs.push_back(inConfig); + inPortConfs.push_back({LayoutType::ncsp, precision, true}); } } else if (getType() == Output) { precision = getOriginalInputPrecisionAtPort(0); if (precision == Precision::U16) precision = Precision::FP32; - DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - auto mem_tdesc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType); - dataConfig.desc = mem_tdesc; - config.inConfs.push_back(dataConfig); + inPortConfs.push_back({LayoutType::ncsp, precision}); } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); + + addSupportedPrimDesc(inPortConfs, + outPortConfs, + impl_desc_type::unknown); } void MKLDNNInputNode::createPrimitive() { @@ -440,7 +427,7 @@ void MKLDNNInputNode::createPrimitive() { << " from node " << getParentEdgeAt(i)->getParent()->getName() << "."; } - const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor(); + const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << "."; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h index 872f8e14f8e..8c57ac88730 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h @@ -14,7 +14,7 @@ namespace MKLDNNPlugin { class MKLDNNInputNode : public MKLDNNNode { public: MKLDNNInputNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - MKLDNNInputNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &prc, const std::string &name, + MKLDNNInputNode(const Shape& shape, const InferenceEngine::Precision &prc, const std::string &name, const std::string &type, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp index caedec83ee8..df6e4930b54 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp @@ -1829,7 +1829,7 @@ void MKLDNNInterpolateNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " has incorrect number of output edges"; - srcDim = getParentEdgeAt(DATA_ID)->getDims().ToSizeVector(); + srcDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); int dataRank = srcDim.size(); // get pad @@ -1868,7 +1868,7 @@ void MKLDNNInterpolateNode::getSupportedDescriptors() { } else { srcDimPad = srcDim; } - dstDim = getChildEdgeAt(0)->getDims().ToSizeVector(); + dstDim = getChildEdgeAt(0)->getShape().getStaticDims(); } void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { @@ -1902,7 +1902,7 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { inputPrec = inputPrecision; outputPrec = outputPrecision; - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; if (isAxesSpecified) { config.inConfs.resize(4); @@ -1916,22 +1916,26 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { auto axesType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::I32); auto pushDesc = [&](memory::format_tag dataFormat, impl_desc_type implDetail) { - config.inConfs[DATA_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA_ID)->getDims(), inputDataType, dataFormat); - config.inConfs[TARGET_SHAPE_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(TARGET_SHAPE_ID)->getDims(), targetShapeType, memory::format_tag::x); - config.inConfs[SCALES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(SCALES_ID)->getDims(), scalesType, memory::format_tag::x); + config.inConfs[DATA_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), + inputDataType, dataFormat); + config.inConfs[TARGET_SHAPE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(TARGET_SHAPE_ID)->getShape().getStaticDims(), + targetShapeType, memory::format_tag::x); + config.inConfs[SCALES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(SCALES_ID)->getShape().getStaticDims(), scalesType, + memory::format_tag::x); if (isAxesSpecified) - config.inConfs[AXES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(AXES_ID)->getDims(), axesType, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, dataFormat); - supportedPrimitiveDescriptors.push_back({config, implDetail, dataFormat}); + config.inConfs[AXES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXES_ID)->getShape().getStaticDims(), axesType, + memory::format_tag::x); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, dataFormat); + supportedPrimitiveDescriptors.push_back({config, implDetail}); }; - auto channels = getParentEdgeAt(DATA_ID)->getDims().ndims() > 1 ? getParentEdgeAt(DATA_ID)->getDims()[1] : 1; + auto channels = getParentEdgeAt(DATA_ID)->getShape().getRank() > 1 ? getParentEdgeAt(DATA_ID)->getShape().getStaticDims()[1] : 1; if (!mayiuse(cpu::x64::sse41) || mode == InterpolateMode::linear) { - pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), ref); + pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), ref); } else { // blk and by_channel JIT kernel on sse41 or above machine - if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 4) { + if (getParentEdgeAt(DATA_ID)->getShape().getRank() == 4) { if (mayiuse(cpu::x64::avx512_common)) { pushDesc(memory::format_tag::nhwc, jit_avx512); if (channels != 1) @@ -1945,7 +1949,7 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { if (channels != 1) pushDesc(memory::format_tag::nChw8c, jit_sse42); } - } else if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 5 && mode != InterpolateMode::cubic) { + } else if (getParentEdgeAt(DATA_ID)->getShape().getRank() == 5 && mode != InterpolateMode::cubic) { if (mayiuse(cpu::x64::avx512_common)) { pushDesc(memory::format_tag::ndhwc, jit_avx512); if (channels != 1) @@ -1963,7 +1967,7 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { // planar for 1.ref on machine without sse41(if no sse41, canFuse() is false). 2.JIT kernel for f32 && avx2(gather).(with fuse) if (mayiuse(cpu::x64::avx2) && inputPrec == Precision::FP32) { - pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), jit_avx2); + pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), jit_avx2); } } } @@ -1989,11 +1993,10 @@ void MKLDNNInterpolateNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << errorPrefix << " did not set preferable primitive descriptor"; - auto selectedPD = getSelectedPrimitiveDescriptor(); auto jcp = jit_interpolate_config_params(); jcp.mode = mode; - jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[0].desc.getPrecision()); - jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc.getPrecision()); + jcp.src_dt = getParentEdgeAt(0)->getMemory().GetDataType(); + jcp.dst_dt = getChildEdgeAt(0)->getMemory().GetDataType(); jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt); jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt); jcp.indices_size = sizeof(int); @@ -2008,9 +2011,10 @@ void MKLDNNInterpolateNode::createPrimitive() { jcp.ID = srcDimPad5d[2]; jcp.spatial_dim_size = spatialDimSize; - if (getChildEdgeAt(0)->getMemory().GetDesc().isPlainFormat()) { + if (getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) { jcp.layout = InterpolateLayoutType::planar; - } else if (getChildEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat()) { + } else if (getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || + getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c)) { jcp.layout = InterpolateLayoutType::block; } else { jcp.layout = InterpolateLayoutType::by_channel; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp index 5750f8517b0..702d5b42f0c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp @@ -64,8 +64,8 @@ void MKLDNNLogSoftmaxNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp index a3460091ecf..b107fca7834 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp @@ -6,6 +6,7 @@ #include #include #include +#include using namespace MKLDNNPlugin; using namespace InferenceEngine; @@ -88,19 +89,20 @@ void MKLDNNLrnNode::getSupportedDescriptors() { precision = InferenceEngine::Precision::FP32; auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - auto parentDims = getParentEdgeAt(0)->getDims(); + const auto &parentShape = getParentEdgeAt(0)->getShape(); + const auto parentStaticDims = parentShape.getStaticDims(); - for (auto format : getAvailableFormatsForDims(parentDims)) { - MKLDNNMemoryDesc in_candidate(parentDims, inputDataType, format); - createDescriptor({in_candidate}, {}); + for (auto format : getAvailableFormatsForDims(parentShape)) { + auto in_candidate = MKLDNNPlugin::make_unique(parentStaticDims, inputDataType, format); + createDescriptor({in_candidate.get()}, {}); } } -MKLDNNMemoryDesc MKLDNNLrnNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +std::unique_ptr MKLDNNLrnNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { if (idx > 0) { - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(getOriginalInputPrecisions()[idx], - getParentEdgeAt(idx)->getDims().ToSizeVector(), - TensorDesc::getLayoutByDims(getParentEdgeAt(idx)->getDims().ToSizeVector()))); + return MKLDNNPlugin::make_unique(getParentEdgeAt(idx)->getShape().getStaticDims(), + MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[idx]), + MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(idx)->getShape().getRank())); } else { return MKLDNNNode::getSrcMemDesc(primitive_desc_it, idx); } @@ -123,12 +125,12 @@ bool MKLDNNLrnNode::created() const { return getType() == Lrn; } -void MKLDNNLrnNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { +void MKLDNNLrnNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { mkldnn::algorithm alg = isAcrossMaps ? mkldnn::algorithm::lrn_across_channels : mkldnn::algorithm::lrn_within_channel; - MKLDNNMemoryDesc in_candidate(inputDesc[0]); MKLDNNDescriptor desc(std::shared_ptr( - new mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, alg, in_candidate, size, alpha, beta, k))); + new mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, alg, MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]), + size, alpha, beta, k))); descs.push_back(desc); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h index 53cfaa79682..295d16b369c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h @@ -17,12 +17,12 @@ public: MKLDNNLrnNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); void getSupportedDescriptors() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; size_t descInputNumbers(MKLDNNDescriptor desc) override { return static_cast(getOriginalInputsNumber()); } - MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; void createPrimitive() override; bool created() const override; bool canBeInPlace() const override { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp index 908686bf6df..fed1158f97e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp @@ -49,18 +49,18 @@ void MKLDNNMathNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); for (int i = 0; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } void MKLDNNMathNode::execute(mkldnn::stream strm) { - size_t dataSize = getChildEdgeAt(0)->getBlob()->size(); + size_t dataSize = getChildEdgeAt(0)->getShape().getElementsCount(); const float *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp index b7f2c0a4277..a0a7f7eafa4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp @@ -55,8 +55,8 @@ MKLDNNMatMulNode::MKLDNNMatMulNode(const std::shared_ptr& op, cons errorPrefix = "Gemm node with name '" + getName() + "'"; const auto matMul = std::dynamic_pointer_cast(op); - alpha = 1; - beta = 1; + alpha = 1.f; + beta = 0.f; transposeA = matMul->get_transpose_a(); transposeB = matMul->get_transpose_b(); } else { @@ -70,14 +70,14 @@ void MKLDNNMatMulNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " has incorrect number of output edges for layer " << getName(); - auto inDims0 = getParentEdgeAt(0)->getDims(); - auto inDims1 = getParentEdgeAt(1)->getDims(); - auto outDims = getChildEdgeAt(0)->getDims(); + auto inDims0 = getParentEdgeAt(0)->getShape().getStaticDims(); + auto inDims1 = getParentEdgeAt(1)->getShape().getStaticDims(); + auto outDims = getChildEdgeAt(0)->getShape().getStaticDims(); - if (inDims0.ndims() != inDims1.ndims() || inDims0.ndims() != outDims.ndims()) + if (inDims0.size() != inDims1.size() || inDims0.size() != outDims.size()) IE_THROW() << errorPrefix << " has invalid dims count"; - int nDims = inDims0.ndims(); + int nDims = inDims0.size(); xAxis = nDims - 1; yAxis = nDims - 2; auto xAxis0 = transposeA ? yAxis : xAxis; @@ -135,22 +135,22 @@ void MKLDNNMatMulNode::initSupportedPrimitiveDescriptors() { auto inputDataType1 = MKLDNNExtensionUtils::IEPrecisionToDataType(inPrec1); auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; - auto createDataConfig = [](const MKLDNNDims& dims, memory::data_type dataType) -> InferenceEngine::DataConfig { - InferenceEngine::DataConfig dataConfig; + auto createDataConfig = [](const std::vector& dims, memory::data_type dataType) -> PortConfig { + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNMemoryDesc(dims, dataType, MKLDNNMemory::GetPlainFormat(dims)); + dataConfig.desc = MKLDNNPlugin::make_unique(dims, dataType, MKLDNNMemory::GetPlainFormatByRank(dims.size())); return dataConfig; }; - config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getDims(), inputDataType0)); - config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getDims(), inputDataType1)); - config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getDims(), outputDataType)); + config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType0)); + config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getShape().getStaticDims(), inputDataType1)); + config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType)); - supportedPrimitiveDescriptors.push_back(PrimitiveDescInfo(config, impl_desc_type::gemm_any, MKLDNNMemory::GetPlainFormat(getChildEdgeAt(0)->getDims()))); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::gemm_any); } void MKLDNNMatMulNode::initOptimalPrimitiveDescriptor() { @@ -158,8 +158,9 @@ void MKLDNNMatMulNode::initOptimalPrimitiveDescriptor() { if (selected_pd == nullptr) IE_THROW() << errorPrefix << " did not set preferable primitive descriptor"; auto config = selected_pd->getConfig(); - if (isInitConfig(config)) - return; + + if (isConfigDefined(config)) + return; MKLDNNNode::initOptimalPrimitiveDescriptor(); @@ -179,6 +180,34 @@ void MKLDNNMatMulNode::createPrimitive() { IE_THROW() << errorPrefix << " did not allocate input memory"; if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << errorPrefix << " did not set preferable primitive descriptor"; + + auto inDims0 = src0MemPtr->GetDims(); + auto outDims = dstMemPtr->GetDims(); + + params.src0_mem_ptr = src0MemPtr; + params.src1_mem_ptr = src1MemPtr; + params.dst_mem_ptr = dstMemPtr; + + params.ndims = outDims.size(); + + params.MB1 = 1; + params.MB2 = outDims.size() > 3 ? outDims[params.ndims - 3] : 1; + + params.M = outDims[yAxis]; + params.N = outDims[xAxis]; + params.K = transposeA ? inDims0[yAxis] : inDims0[xAxis]; + + params.transa = transposeA ? 'T' : 'N'; + params.transb = transposeB ? 'T' : 'N'; + + params.lda = transposeA ? params.M : params.K; + params.ldb = transposeB ? params.K : params.N; + params.ldc = params.N; + + params.shift1 = params.M * params.N * params.MB2; + params.shift2 = params.M * params.N; + + runtimePrecision = getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(); } inline void process_gemm(char transa, char transb, int M, int N, int K, float alpha, const float *A, int lda, @@ -212,67 +241,57 @@ inline void process_gemm(char transa, char transb, int M, int N, int K, float al } template -void MKLDNNMatMulNode::process_data() { - auto inDims0 = getParentEdgeAt(0)->getDims(); - auto inDims1 = getParentEdgeAt(1)->getDims(); - auto outDims = getChildEdgeAt(0)->getDims(); +inline void MKLDNNMatMulNode::process_data() { + const T0* src0_ptr = reinterpret_cast(params.src0_mem_ptr->GetPtr()); + const T1* src1_ptr = reinterpret_cast(params.src1_mem_ptr->GetPtr()); + float* dst_ptr = reinterpret_cast(params.dst_mem_ptr->GetPtr()); - auto& srcMemory0 = getParentEdgeAt(0)->getMemory(); - auto& srcMemory1 = getParentEdgeAt(1)->getMemory(); - auto& dstMemory0 = getChildEdgeAt(0)->getMemory(); + const int MB = batchToProcess(); + if (params.ndims == 4) { + params.MB1 = MB; + } else if (params.ndims == 3) { + params.shift1 = params.shift1 * MB / params.MB2; + params.MB2 = MB; + } - const T0 *src0_ptr = reinterpret_cast(srcMemory0.GetPtr()); - const T1 *src1_ptr = reinterpret_cast(srcMemory1.GetData()); - float *dst_ptr = reinterpret_cast(dstMemory0.GetData()); - - int MB1 = outDims.ndims() == 4 ? batchToProcess() : 1; - int MB2 = outDims.ndims() == 3 ? batchToProcess() : outDims.ndims() > 3 ? outDims[outDims.ndims() - 3] : 1; - int M = outDims[yAxis]; - int N = outDims[xAxis]; - int K = transposeA ? inDims0[yAxis] : inDims0[xAxis]; - - const char transa = transposeA ? 'T' : 'N'; - const char transb = transposeB ? 'T' : 'N'; - - int lda = transposeA ? M : K; - int ldb = transposeB ? K : N; - int ldc = N; - - beta = 0.f; - - for (int b1 = 0; b1 < MB1; b1++) { + for (int b1 = 0; b1 < params.MB1; ++b1) { const T0 *a_ptr = src0_ptr; const T1 *b_ptr = src1_ptr; float *d_ptr = dst_ptr; - for (int b2 = 0; b2 < MB2; b2++) { - process_gemm(transa, transb, M, N, K, alpha, a_ptr, lda, b_ptr, ldb, beta, d_ptr, ldc); + for (int b2 = 0; b2 < params.MB2; ++b2) { + process_gemm(params.transa, params.transb, params.M, params.N, params.K, + alpha, a_ptr, params.lda, b_ptr, params.ldb, beta, d_ptr, params.ldc); a_ptr += aOffsets[0]; b_ptr += bOffsets[0]; - d_ptr += M * N; + d_ptr += params.shift2; } src0_ptr += aOffsets[1]; src1_ptr += bOffsets[1]; - dst_ptr += MB2 * M * N; + dst_ptr += params.shift1; } } void MKLDNNMatMulNode::execute(mkldnn::stream strm) { - switch (getParentEdgeAt(0)->getDesc().getPrecision()) { - case Precision::FP32: + switch (runtimePrecision) { + case Precision::FP32: { process_data(); break; - case Precision::BF16: + } + case Precision::BF16: { process_data(); break; - case Precision::I8: + } + case Precision::I8: { process_data(); break; - case Precision::U8: + } + case Precision::U8: { process_data(); break; + } default: IE_THROW() << errorPrefix << " has incorrect precision on first input"; } @@ -283,13 +302,13 @@ bool MKLDNNMatMulNode::created() const { } int MKLDNNMatMulNode::getMaxBatch() { - if (!outDims.empty()) - return outDims[0][0]; + if (!outputShapes.empty()) + return outputShapes[0].getStaticDims()[0]; return 0; } InferenceEngine::Precision MKLDNNMatMulNode::getRuntimePrecision() const { - return MKLDNNExtensionUtils::getMaxPrecision(getInputPrecisions()); + return getMaxPrecision(getInputPrecisions()); } REG_MKLDNN_PRIM_FOR(MKLDNNMatMulNode, MatMul); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h index 6196665aabc..3f056cc9953 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h @@ -28,8 +28,8 @@ public: static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: - float alpha = 1.0f; - float beta = 1.0f; + float alpha = 1.f; + float beta = 0.f; bool transposeA = false; bool transposeB = false; @@ -40,9 +40,36 @@ private: std::vector bOffsets; std::vector cOffsets; - template void process_data(); + InferenceEngine::Precision runtimePrecision; + + template inline void process_data(); std::string errorPrefix; + + struct { + MKLDNNMemoryPtr src0_mem_ptr = nullptr; + MKLDNNMemoryPtr src1_mem_ptr = nullptr; + MKLDNNMemoryPtr dst_mem_ptr = nullptr; + + char transa = 'N'; + char transb = 'N'; + + int MB1 = 1; + int MB2 = 1; + + int M = 0; + int N = 0; + int K = 0; + + int lda = 0; + int ldb = 0; + int ldc = 0; + + int shift1 = 0; + int shift2 = 0; + + size_t ndims = 0; + } params; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp new file mode 100644 index 00000000000..ade776e8ce3 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp @@ -0,0 +1,383 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "mkldnn_matrix_nms_node.h" + +#include +#include +#include +#include +#include + +#include "ie_parallel.hpp" +#include "ngraph/opsets/opset8.hpp" +#include "ngraph_ops/nms_static_shape_ie.hpp" +#include "utils/general_utils.h" + +using namespace MKLDNNPlugin; +using namespace InferenceEngine; +using MatrixNmsIEInternal = ngraph::op::internal::NmsStaticShapeIE; + +using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType; +using ngNmseDcayFunction = ngraph::op::v8::MatrixNms::DecayFunction; + +bool MKLDNNMatrixNmsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + const auto nms = std::dynamic_pointer_cast(op); + if (!nms) { + errorMessage = "Only internal MatrixNms operation is supported"; + return false; + } + const auto& attrs = nms->get_attrs(); + const auto& sortType = attrs.sort_result_type; + if (!one_of(sortType, ngNmsSortResultType::NONE, ngNmsSortResultType::SCORE, ngNmsSortResultType::CLASSID)) { + errorMessage = "Does not support SortResultType mode: " + ngraph::as_string(sortType); + return false; + } + const auto& decayType = attrs.decay_function; + if (!one_of(decayType, ngNmseDcayFunction::LINEAR, ngNmseDcayFunction::GAUSSIAN)) { + errorMessage = "Does not support DcayFunction " + ngraph::as_string(decayType); + return false; + } + } catch (...) { + return false; + } + return true; +} + +MKLDNNMatrixNmsNode::MKLDNNMatrixNmsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache) + : MKLDNNNode(op, eng, cache) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } + + errorPrefix = "MatrixNMS layer with name '" + getName() + "' "; + const auto matrix_nms = std::dynamic_pointer_cast(op); + + if (getOriginalInputsNumber() != 2) + IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber(); + + if (getOriginalOutputsNumber() != 3) + IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber(); + + const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims(); + const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims(); + if (!(boxes_dims[0] == scores_dims[0] && boxes_dims[1] == scores_dims[2])) { + IE_THROW() << errorPrefix << "has incompatible 'boxes' and 'scores' input dmensions"; + } + + m_numBatches = boxes_dims[0]; + m_numBoxes = boxes_dims[1]; + if (boxes_dims.size() != 3) + IE_THROW() << errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size(); + if (boxes_dims[2] != 4) + IE_THROW() << errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2]; + + m_numClasses = scores_dims[1]; + if (scores_dims.size() != 3) + IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size(); + + if (m_numBatches != scores_dims[0]) + IE_THROW() << errorPrefix << " num_batches is different in 'boxes' and 'scores' inputs"; + if (m_numBoxes != scores_dims[2]) + IE_THROW() << errorPrefix << " num_boxes is different in 'boxes' and 'scores' inputs"; + auto& attrs = matrix_nms->get_attrs(); + if (attrs.sort_result_type == ngraph::op::util::NmsBase::SortResultType::CLASSID) + m_sortResultType = MatrixNmsSortResultType::CLASSID; + else if (attrs.sort_result_type == ngraph::op::util::NmsBase::SortResultType::SCORE) + m_sortResultType = MatrixNmsSortResultType::SCORE; + else if (attrs.sort_result_type == ngraph::op::util::NmsBase::SortResultType::NONE) + m_sortResultType = MatrixNmsSortResultType::NONE; + + if (attrs.decay_function == ngraph::op::v8::MatrixNms::DecayFunction::GAUSSIAN) + m_decayFunction = GAUSSIAN; + else if (attrs.decay_function == ngraph::op::v8::MatrixNms::DecayFunction::LINEAR) + m_decayFunction = LINEAR; + + m_sortResultAcrossBatch = attrs.sort_result_across_batch; + m_scoreThreshold = attrs.score_threshold; + m_nmsTopk = attrs.nms_top_k; + m_keepTopk = attrs.keep_top_k; + m_backgroundClass = attrs.background_class; + + m_gaussianSigma = attrs.gaussian_sigma; + m_postThreshold = attrs.post_threshold; + m_normalized = attrs.normalized; + int64_t max_output_boxes_per_class = 0; + size_t real_num_classes = m_backgroundClass == -1 ? m_numClasses : m_numClasses - 1; + if (m_nmsTopk >= 0) + max_output_boxes_per_class = std::min(m_numBoxes, static_cast(m_nmsTopk)); + else + max_output_boxes_per_class = m_numBoxes; + + m_maxBoxesPerBatch = max_output_boxes_per_class * real_num_classes; + if (m_keepTopk >= 0) + m_maxBoxesPerBatch = std::min(m_maxBoxesPerBatch, static_cast(m_keepTopk)); +} + +void MKLDNNMatrixNmsNode::initSupportedPrimitiveDescriptors() { + if (!supportedPrimitiveDescriptors.empty()) + return; + + m_realNumClasses = m_backgroundClass == -1 ? m_numClasses : m_numClasses - 1; + m_realNumBoxes = m_nmsTopk == -1 ? m_numBoxes : std::min(m_nmsTopk, static_cast(m_numBoxes)); + m_numPerBatch.resize(m_numBatches); + m_filteredBoxes.resize(m_numBatches * m_realNumClasses * m_realNumBoxes); + m_numPerBatchClass.resize(m_numBatches, std::vector(m_numClasses, 0)); + m_classOffset.resize(m_numClasses, 0); + + for (size_t i = 0, count = 0; i < m_numClasses; i++) { + if (i == m_backgroundClass) + continue; + m_classOffset[i] = (count++) * m_realNumBoxes; + } + + if (m_decayFunction == MatrixNmsDecayFunction::LINEAR) { + m_decay_fn = [](float iou, float max_iou, float sigma) -> float { + return (1. - iou) / (1. - max_iou + 1e-10f); + }; + } else { + m_decay_fn = [](float iou, float max_iou, float sigma) -> float { + return std::exp((max_iou * max_iou - iou * iou) * sigma); + }; + } + + const std::vector supportedFloatPrecision = {Precision::FP32}; + const std::vector supportedIntOutputPrecision = {Precision::I32, Precision::I64}; + + checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType); + + checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", inType); + + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_INDICES), supportedIntOutputPrecision, "selected_indices", outType); + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_OUTPUTS), supportedFloatPrecision, "selected_outputs", outType); + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_VALID_OUTPUTS), supportedIntOutputPrecision, "valid_outputs", outType); + + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, + impl_desc_type::ref_any); +} + +bool MKLDNNMatrixNmsNode::created() const { + return getType() == MatrixNms; +} + +namespace { + +static inline float boxArea(const float* bbox, const bool normalized) { + if (bbox[2] < bbox[0] || bbox[3] < bbox[1]) { + return static_cast(0.); + } else { + const float width = bbox[2] - bbox[0]; + const float height = bbox[3] - bbox[1]; + if (normalized) { + return width * height; + } else { + return (width + 1) * (height + 1); + } + } +} + +static inline float intersectionOverUnion(const float* bbox1, const float* bbox2, const bool normalized) { + if (bbox2[0] > bbox1[2] || bbox2[2] < bbox1[0] || bbox2[1] > bbox1[3] || bbox2[3] < bbox1[1]) { + return static_cast(0.); + } else { + const float xMin = std::max(bbox1[0], bbox2[0]); + const float yMin = std::max(bbox1[1], bbox2[1]); + const float xMax = std::min(bbox1[2], bbox2[2]); + const float yMax = std::min(bbox1[3], bbox2[3]); + float norm = normalized ? static_cast(0.) : static_cast(1.); + float width = xMax - xMin + norm; + float height = yMax - yMin + norm; + const float interArea = width * height; + const float bbox1Area = boxArea(bbox1, normalized); + const float bbox2Area = boxArea(bbox2, normalized); + return interArea / (bbox1Area + bbox2Area - interArea); + } +} +} // namespace + +size_t MKLDNNMatrixNmsNode::nmsMatrix(const float* boxesData, const float* scoresData, BoxInfo* filterBoxes, const int64_t batchIdx, const int64_t classIdx) { + std::vector candidateIndex(m_numBoxes); + std::iota(candidateIndex.begin(), candidateIndex.end(), 0); + auto end = std::remove_if(candidateIndex.begin(), candidateIndex.end(), [&scoresData, this](int32_t idx) { + return scoresData[idx] <= m_scoreThreshold; + }); + int64_t numDet = 0; + int64_t originalSize = std::distance(candidateIndex.begin(), end); + if (originalSize <= 0) { + return 0; + } + if (m_nmsTopk > -1 && originalSize > m_nmsTopk) { + originalSize = m_nmsTopk; + } + + std::partial_sort(candidateIndex.begin(), candidateIndex.begin() + originalSize, end, [&scoresData](int32_t a, int32_t b) { + return scoresData[a] > scoresData[b]; + }); + + std::vector iouMatrix((originalSize * (originalSize - 1)) >> 1); + std::vector iouMax(originalSize); + + iouMax[0] = 0.; + InferenceEngine::parallel_for(originalSize - 1, [&](size_t i) { + float max_iou = 0.; + size_t actual_index = i + 1; + auto idx_a = candidateIndex[actual_index]; + for (int64_t j = 0; j < actual_index; j++) { + auto idx_b = candidateIndex[j]; + auto iou = intersectionOverUnion(boxesData + idx_a * 4, boxesData + idx_b * 4, m_normalized); + max_iou = std::max(max_iou, iou); + iouMatrix[actual_index * (actual_index - 1) / 2 + j] = iou; + } + iouMax[actual_index] = max_iou; + }); + + if (scoresData[candidateIndex[0]] > m_postThreshold) { + auto box_index = candidateIndex[0]; + auto box = boxesData + box_index * 4; + filterBoxes[0].box.x1 = box[0]; + filterBoxes[0].box.y1 = box[1]; + filterBoxes[0].box.x2 = box[2]; + filterBoxes[0].box.y2 = box[3]; + filterBoxes[0].index = batchIdx * m_numBoxes + box_index; + filterBoxes[0].score = scoresData[candidateIndex[0]]; + filterBoxes[0].batchIndex = batchIdx; + filterBoxes[0].classIndex = classIdx; + numDet++; + } + + for (int64_t i = 1; i < originalSize; i++) { + float minDecay = 1.; + for (int64_t j = 0; j < i; j++) { + auto maxIou = iouMax[j]; + auto iou = iouMatrix[i * (i - 1) / 2 + j]; + auto decay = m_decay_fn(iou, maxIou, m_gaussianSigma); + minDecay = std::min(minDecay, decay); + } + auto ds = minDecay * scoresData[candidateIndex[i]]; + if (ds <= m_postThreshold) + continue; + auto boxIndex = candidateIndex[i]; + auto box = boxesData + boxIndex * 4; + filterBoxes[numDet].box.x1 = box[0]; + filterBoxes[numDet].box.y1 = box[1]; + filterBoxes[numDet].box.x2 = box[2]; + filterBoxes[numDet].box.y2 = box[3]; + filterBoxes[numDet].index = batchIdx * m_numBoxes + boxIndex; + filterBoxes[numDet].score = ds; + filterBoxes[numDet].batchIndex = batchIdx; + filterBoxes[numDet].classIndex = classIdx; + numDet++; + } + return numDet; +} + +void MKLDNNMatrixNmsNode::execute(mkldnn::stream strm) { + const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); + const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); + + InferenceEngine::parallel_for2d(m_numBatches, m_numClasses, [&](size_t batchIdx, size_t classIdx) { + if (classIdx == m_backgroundClass) { + m_numPerBatchClass[batchIdx][classIdx] = 0; + return; + } + const float* boxesPtr = boxes + batchIdx * m_numBoxes * 4; + const float* scoresPtr = scores + batchIdx * (m_numClasses * m_numBoxes) + classIdx * m_numBoxes; + size_t classNumDet = 0; + size_t batchOffset = batchIdx * m_realNumClasses * m_realNumBoxes; + classNumDet = nmsMatrix(boxesPtr, scoresPtr, m_filteredBoxes.data() + batchOffset + m_classOffset[classIdx], batchIdx, classIdx); + m_numPerBatchClass[batchIdx][classIdx] = classNumDet; + }); + + InferenceEngine::parallel_for(m_numBatches, [&](size_t batchIdx) { + size_t batchOffset = batchIdx * m_realNumClasses * m_realNumBoxes; + BoxInfo* batchFilteredBox = m_filteredBoxes.data() + batchOffset; + auto& numPerClass = m_numPerBatchClass[batchIdx]; + auto numDet = std::accumulate(numPerClass.begin(), numPerClass.end(), 0); + auto start_offset = numPerClass[0]; + + for (size_t i = 1; i < numPerClass.size(); i++) { + auto offset_class = m_classOffset[i]; + for (size_t j = 0; j < numPerClass[i]; j++) { + batchFilteredBox[start_offset + j] = batchFilteredBox[offset_class + j]; + } + start_offset += numPerClass[i]; + } + auto keepNum = numDet; + if (m_keepTopk > -1) { + auto k = static_cast(m_keepTopk); + if (keepNum > k) + keepNum = k; + } + + std::partial_sort(batchFilteredBox, batchFilteredBox + keepNum, batchFilteredBox + numDet, [](const BoxInfo& lhs, const BoxInfo rhs) { + return lhs.score > rhs.score || (lhs.score == rhs.score && lhs.classIndex < rhs.classIndex) || + (lhs.score == rhs.score && lhs.classIndex == rhs.classIndex && lhs.index < rhs.index); + }); + m_numPerBatch[batchIdx] = keepNum; + }); + + auto startOffset = m_numPerBatch[0]; + for (size_t i = 1; i < m_numPerBatch.size(); i++) { + auto offset_batch = i * m_realNumClasses * m_realNumBoxes; + for (size_t j = 0; j < m_numPerBatch[i]; j++) { + m_filteredBoxes[startOffset + j] = m_filteredBoxes[offset_batch + j]; + } + startOffset += m_numPerBatch[i]; + } + + if (m_sortResultAcrossBatch) { /* sort across batch */ + if (m_sortResultType == MatrixNmsSortResultType::SCORE) { + parallel_sort(m_filteredBoxes.begin(), m_filteredBoxes.begin() + startOffset, [](const BoxInfo& l, const BoxInfo& r) { + return (l.score > r.score) || (l.score == r.score && l.batchIndex < r.batchIndex) || + (l.score == r.score && l.batchIndex == r.batchIndex && l.classIndex < r.classIndex) || + (l.score == r.score && l.batchIndex == r.batchIndex && l.classIndex == r.classIndex && l.index < r.index); + }); + } else if (m_sortResultType == MatrixNmsSortResultType::CLASSID) { + parallel_sort(m_filteredBoxes.begin(), m_filteredBoxes.begin() + startOffset, [](const BoxInfo& l, const BoxInfo& r) { + return (l.classIndex < r.classIndex) || (l.classIndex == r.classIndex && l.batchIndex < r.batchIndex) || + (l.classIndex == r.classIndex && l.batchIndex == r.batchIndex && l.score > r.score) || + (l.classIndex == r.classIndex && l.batchIndex == r.batchIndex && l.score == r.score && l.index < r.index); + }); + } + } + + float* selectedOutputs = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTED_OUTPUTS)[0]->getMemoryPtr()->GetPtr()); + int* selectedIndices = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTED_INDICES)[0]->getMemoryPtr()->GetPtr()); + int* validOutputs = reinterpret_cast(getChildEdgesAtPort(NMS_VALID_OUTPUTS)[0]->getMemoryPtr()->GetPtr()); + std::copy(m_numPerBatch.begin(), m_numPerBatch.end(), validOutputs); + + int64_t outputOffset = 0; + int64_t originalOffset = 0; + for (size_t i = 0; i < m_numBatches; i++) { + auto real_boxes = m_numPerBatch[i]; + for (size_t j = 0; j < real_boxes; j++) { + auto originalIndex = originalOffset + j; + selectedIndices[j + outputOffset] = static_cast(m_filteredBoxes[originalIndex].index); + auto selectedBase = selectedOutputs + (outputOffset + j) * 6; + selectedBase[0] = m_filteredBoxes[originalIndex].classIndex; + selectedBase[1] = m_filteredBoxes[originalIndex].score; + selectedBase[2] = m_filteredBoxes[originalIndex].box.x1; + selectedBase[3] = m_filteredBoxes[originalIndex].box.y1; + selectedBase[4] = m_filteredBoxes[originalIndex].box.x2; + selectedBase[5] = m_filteredBoxes[originalIndex].box.y2; + } + std::fill_n(selectedOutputs + (outputOffset + real_boxes) * 6, (m_maxBoxesPerBatch - real_boxes) * 6, -1); + std::fill_n(selectedIndices + (outputOffset + real_boxes), m_maxBoxesPerBatch - real_boxes, -1); + outputOffset += m_maxBoxesPerBatch; + originalOffset += real_boxes; + } +} + +void MKLDNNMatrixNmsNode::checkPrecision(const Precision prec, const std::vector precList, const std::string name, const std::string type) { + if (std::find(precList.begin(), precList.end(), prec) == precList.end()) + IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec; +} + +REG_MKLDNN_PRIM_FOR(MKLDNNMatrixNmsNode, MatrixNms); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h new file mode 100644 index 00000000000..5d85a366952 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h @@ -0,0 +1,100 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include + +namespace MKLDNNPlugin { + +enum MatrixNmsSortResultType { + CLASSID, // sort selected boxes by class id (ascending) in each batch element + SCORE, // sort selected boxes by score (descending) in each batch element + NONE // do not guarantee the order in each batch element +}; + +enum MatrixNmsDecayFunction { GAUSSIAN, LINEAR }; + +class MKLDNNMatrixNmsNode : public MKLDNNNode { +public: + MKLDNNMatrixNmsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache); + + void getSupportedDescriptors() override {}; + void initSupportedPrimitiveDescriptors() override; + void createPrimitive() override {}; + void execute(mkldnn::stream strm) override; + bool created() const override; + + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + +private: + // input + static const size_t NMS_BOXES = 0; + static const size_t NMS_SCORES = 1; + + // output + static const size_t NMS_SELECTED_OUTPUTS = 0; + static const size_t NMS_SELECTED_INDICES = 1; + static const size_t NMS_VALID_OUTPUTS = 2; + + size_t m_numBatches; + size_t m_numBoxes; + size_t m_numClasses; + size_t m_maxBoxesPerBatch; + + MatrixNmsSortResultType m_sortResultType; + bool m_sortResultAcrossBatch; + float m_scoreThreshold; + int m_nmsTopk; + int m_keepTopk; + int m_backgroundClass; + MatrixNmsDecayFunction m_decayFunction; + float m_gaussianSigma; + float m_postThreshold; + bool m_normalized; + + struct Rectangle { + Rectangle(float x_left, float y_left, float x_right, float y_right) : x1 {x_left}, y1 {y_left}, x2 {x_right}, y2 {y_right} {} + + Rectangle() = default; + + float x1 = 0.0f; + float y1 = 0.0f; + float x2 = 0.0f; + float y2 = 0.0f; + }; + + struct BoxInfo { + BoxInfo(const Rectangle& r, int64_t idx, float sc, int64_t batch_idx, int64_t class_idx) + : box {r}, index {idx}, batchIndex {batch_idx}, classIndex {class_idx}, score {sc} {} + + BoxInfo() = default; + + Rectangle box; + int64_t index = -1; + int64_t batchIndex = -1; + int64_t classIndex = -1; + float score = 0.0f; + }; + std::string errorPrefix; + const std::string inType = "input", outType = "output"; + std::vector m_numPerBatch; + std::vector> m_numPerBatchClass; + std::vector m_filteredBoxes; + std::vector m_classOffset; + size_t m_realNumClasses; + size_t m_realNumBoxes; + float (*m_decay_fn)(float, float, float); + void checkPrecision(const InferenceEngine::Precision prec, const std::vector precList, const std::string name, + const std::string type); + + size_t nmsMatrix(const float* boxesData, const float* scoresData, BoxInfo* filterBoxes, const int64_t batchIdx, const int64_t classIdx); +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp index 4aa0281a114..3218bc54eb0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp @@ -60,13 +60,14 @@ void MKLDNNMemoryOutputNode::initSupportedPrimitiveDescriptors() { InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(1); config.inConfs[0].inPlace = -1; config.inConfs[0].constant = false; - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims())); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, memory::format_tag::any); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(0)->getShape().getRank())); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } void MKLDNNMemoryOutputNode::execute(mkldnn::stream strm) { @@ -105,8 +106,7 @@ MKLDNNMemoryInputNode::MKLDNNMemoryInputNode(const std::shared_ptr void MKLDNNMemoryInputNode::createPrimitive() { MKLDNNInputNode::createPrimitive(); - auto mem_desc = getChildEdgeAt(0)->getMemoryPtr()->GetDescriptor(); - dataStore->Create(mem_desc); + dataStore->Create(getChildEdgeAt(0)->getMemory().GetDesc()); // default memory state is zero filled dataStore->FillZero(); @@ -119,7 +119,7 @@ void MKLDNNMemoryInputNode::createPrimitive() { * @param src source memory object */ inline -static void simple_copy(MKLDNNMemory& dst, const MKLDNNMemory& src) { +static void simple_copy(const MKLDNNMemory& dst, const MKLDNNMemory& src) { auto srcPtr = static_cast(src.GetPtr()); auto dstPtr = static_cast(dst.GetPtr()); auto srcSizeInByte = src.GetSize(); @@ -146,11 +146,10 @@ void MKLDNNMemoryInputNode::storeState(const MKLDNNMemory &new_state) { } void MKLDNNMemoryInputNode::execute(mkldnn::stream strm) { - auto dst_mem = getChildEdgeAt(0)->getMemory(); // TODO: Should be simple call of: // dst_mem.SetData(dataStore, false); // But because of performance reason we use simple manual copy - simple_copy(dst_mem, *dataStore); + simple_copy(getChildEdgeAt(0)->getMemory(), *dataStore); } MKLDNNMemoryNodeVirtualEdge::Holder* MKLDNNMemoryNodeVirtualEdge::registerInput(MKLDNNMemoryInputNode * node) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp new file mode 100644 index 00000000000..64dccbdaeab --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp @@ -0,0 +1,413 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "mkldnn_multiclass_nms.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ie_parallel.hpp" +#include "utils/general_utils.h" + +using namespace MKLDNNPlugin; +using namespace InferenceEngine; + +using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType; +using MulticlassNmsIEInternal = ngraph::op::internal::NmsStaticShapeIE; + +bool MKLDNNMultiClassNmsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + const auto nms = std::dynamic_pointer_cast(op); + if (!nms) { + errorMessage = "Only internal MulitClassNonMaxSuppression operation is supported"; + return false; + } + const auto& atrri = nms->get_attrs(); + const auto& sortType = atrri.sort_result_type; + if (!one_of(sortType, ngNmsSortResultType::NONE, ngNmsSortResultType::SCORE, ngNmsSortResultType::CLASSID)) { + errorMessage = "Does not support SortResultType mode: " + ngraph::as_string(sortType); + return false; + } + } catch (...) { + return false; + } + return true; +} + +MKLDNNMultiClassNmsNode::MKLDNNMultiClassNmsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache) + : MKLDNNNode(op, eng, cache) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } + errorPrefix = "MultiClassNms layer with name '" + getName() + "' "; + const auto nms = std::dynamic_pointer_cast(op); + + if (getOriginalInputsNumber() != 2) + IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber(); + + if (getOriginalOutputsNumber() != 3) + IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber(); + + auto& atrri = nms->get_attrs(); + sort_result_across_batch = atrri.sort_result_across_batch; + max_output_boxes_per_class = atrri.nms_top_k; + iou_threshold = atrri.iou_threshold; + score_threshold = atrri.score_threshold; + background_class = atrri.background_class; + keep_top_k = atrri.keep_top_k; + if (atrri.sort_result_type == ngNmsSortResultType::CLASSID) + sort_result_type = MulticlassNmsSortResultType::CLASSID; + else if (atrri.sort_result_type == ngNmsSortResultType::SCORE) + sort_result_type = MulticlassNmsSortResultType::SCORE; + else if (atrri.sort_result_type == ngNmsSortResultType::NONE) + sort_result_type = MulticlassNmsSortResultType::NONE; + nms_eta = atrri.nms_eta; + normalized = atrri.normalized; + + const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims(); + if (boxes_dims.size() != 3) + IE_THROW() << errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size(); + if (boxes_dims[2] != 4) + IE_THROW() << errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2]; + + const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims(); + if (scores_dims.size() != 3) + IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size(); + + if (boxes_dims[0] != scores_dims[0]) + IE_THROW() << errorPrefix << " num_batches is different in 'boxes' and 'scores' inputs"; + if (boxes_dims[1] != scores_dims[2]) + IE_THROW() << errorPrefix << " num_boxes is different in 'boxes' and 'scores' inputs"; + + const SizeVector& valid_outputs_dims = outputShapes[NMS_SELECTEDNUM].getStaticDims(); + if (valid_outputs_dims.size() != 1) + IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output rank: " << valid_outputs_dims.size(); + if (valid_outputs_dims[0] != boxes_dims[0]) // valid_outputs_dims[0] != num_batches + IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output 1st dimension size: " << valid_outputs_dims[0]; +} + +void MKLDNNMultiClassNmsNode::initSupportedPrimitiveDescriptors() { + if (!supportedPrimitiveDescriptors.empty()) + return; + const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims(); + num_batches = boxes_dims[0]; + num_boxes = boxes_dims[1]; + const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims(); + num_classes = scores_dims[1]; + numFiltBox.resize(num_batches, std::vector(num_classes)); // batches + numBoxOffset.resize(num_batches); + + if (max_output_boxes_per_class) { + max_output_boxes_per_class = (max_output_boxes_per_class == -1) ? num_boxes : max_output_boxes_per_class; + filtBoxes.resize(max_output_boxes_per_class * num_batches * num_classes); + } + + const std::vector supportedFloatPrecision = {Precision::FP32, Precision::BF16}; + const std::vector supportedIntOutputPrecision = {Precision::I32, Precision::I64}; + + checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType); + + checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", inType); + + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDINDICES), supportedIntOutputPrecision, "selected_indices", outType); + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDOUTPUTS), supportedFloatPrecision, "selected_outputs", outType); + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDNUM), supportedIntOutputPrecision, "selected_num", outType); + + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, + impl_desc_type::ref_any); +} + +void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) { + const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); + const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); + + auto dims_boxes = getParentEdgeAt(NMS_BOXES)->getMemory().GetDesc().getShape().getStaticDims(); + + if (max_output_boxes_per_class == 0) + return; + + int* selected_indices = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr()->GetPtr()); + + float* selected_outputs = selected_outputs = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDOUTPUTS)[0]->getMemoryPtr()->GetPtr()); + + int* selected_num = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDNUM)[0]->getMemoryPtr()->GetPtr()); + + auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType().getStrides(); + auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType().getStrides(); + + if ((nms_eta >= 0) && (nms_eta < 1)) { + nmsWithEta(boxes, scores, boxesStrides, scoresStrides); + } else { + nmsWithoutEta(boxes, scores, boxesStrides, scoresStrides); + } + + size_t startOffset = numFiltBox[0][0]; + numBoxOffset[0] = 0; + for (size_t b = 0; b < numFiltBox.size(); b++) { + size_t batchOffsetNew = 0; + size_t batchOffset = b * num_classes * max_output_boxes_per_class; + for (size_t c = (b == 0 ? 1 : 0); c < numFiltBox[b].size(); c++) { + size_t offset = batchOffset + c * max_output_boxes_per_class; + for (size_t i = 0; i < numFiltBox[b][c]; i++) { + filtBoxes[startOffset + i] = filtBoxes[offset + i]; + } + startOffset += numFiltBox[b][c]; + batchOffsetNew += numFiltBox[b][c]; + } + numBoxOffset[b] = batchOffsetNew; + if (b == 0) + numBoxOffset[b] += numFiltBox[0][0]; + } + // sort element before go through keep_top_k + parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) { + return ((l.batch_index < r.batch_index) || + ((l.batch_index == r.batch_index) && ((l.score > r.score) || ((std::fabs(l.score - r.score) < 1e-6) && l.class_index < r.class_index) || + ((std::fabs(l.score - r.score) < 1e-6) && l.class_index == r.class_index && l.box_index < r.box_index)))); + }); + + if (keep_top_k > -1) { + startOffset = 0; + size_t offset = 0; + for (size_t b = 0; b < numFiltBox.size(); b++) { + if (numBoxOffset[b] > keep_top_k) { + if (startOffset == offset) { + startOffset += keep_top_k; + offset += numBoxOffset[b]; + } else { + for (size_t i = 0; i < keep_top_k; i++) { + filtBoxes[startOffset + i] = filtBoxes[offset + i]; + } + startOffset += keep_top_k; + offset += numBoxOffset[b]; + } + } else { + if (startOffset == offset) { + startOffset += numBoxOffset[b]; + offset += numBoxOffset[b]; + } else { + for (size_t i = 0; i < numBoxOffset[b]; i++) { + filtBoxes[startOffset + i] = filtBoxes[offset + i]; + } + startOffset += numBoxOffset[b]; + offset += numBoxOffset[b]; + } + } + } + } + + if (sort_result_across_batch) { + if (sort_result_type == SCORE) { + parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) { + return (l.score > r.score) || (l.score == r.score && l.batch_index < r.batch_index) || + (l.score == r.score && l.batch_index == r.batch_index && l.class_index < r.class_index) || + (l.score == r.score && l.batch_index == r.batch_index && l.class_index == r.class_index && l.box_index < r.box_index); + }); + } else if (sort_result_type == CLASSID) { + parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) { + return (l.class_index < r.class_index) || (l.class_index == r.class_index && l.batch_index < r.batch_index) || + (l.class_index == r.class_index && l.batch_index == r.batch_index && l.score > r.score) || + (l.class_index == r.class_index && l.batch_index == r.batch_index && l.score == r.score && l.box_index < r.box_index); + }); + } + } else if (sort_result_type == CLASSID) { + parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) { + return ((l.batch_index < r.batch_index) || + ((l.batch_index == r.batch_index) && + ((l.class_index < r.class_index) || ((l.class_index == r.class_index) && l.score > r.score) || + ((std::fabs(l.score - r.score) <= 1e-6) && l.class_index == r.class_index && l.box_index < r.box_index)))); + }); + } + + const size_t selectedBoxesNum = getChildEdgeAt(NMS_SELECTEDINDICES)->getMemory().GetDesc().getShape().getStaticDims()[0]; + const size_t validOutputs = std::min(startOffset, selectedBoxesNum); + + std::vector m_selected_num; + m_selected_num.resize(dims_boxes[0]); + + const size_t selectedBoxesNum_perBatch = selectedBoxesNum / dims_boxes[0]; + + for (size_t idx = 0lu; idx < validOutputs; idx++) { + m_selected_num[filtBoxes[idx].batch_index]++; + } + + int64_t output_offset = 0; + int64_t original_offset = 0; + for (size_t i = 0; i < dims_boxes[0]; i++) { + auto real_boxes = m_selected_num[i]; + selected_num[i] = static_cast(real_boxes); + + for (size_t j = 0; j < real_boxes; j++) { + auto original_index = original_offset + j; + selected_indices[j + output_offset] = filtBoxes[original_index].batch_index * dims_boxes[1] + filtBoxes[original_index].box_index; + auto selected_base = selected_outputs + (output_offset + j) * 6; + selected_base[0] = filtBoxes[original_index].class_index; + selected_base[1] = filtBoxes[original_index].score; + selected_base[2] = boxes[selected_indices[j + output_offset] * 4]; + selected_base[3] = boxes[selected_indices[j + output_offset] * 4 + 1]; + selected_base[4] = boxes[selected_indices[j + output_offset] * 4 + 2]; + selected_base[5] = boxes[selected_indices[j + output_offset] * 4 + 3]; + } + std::fill_n(selected_outputs + (output_offset + real_boxes) * 6, (selectedBoxesNum_perBatch - real_boxes) * 6, -1); + std::fill_n(selected_indices + (output_offset + real_boxes), selectedBoxesNum_perBatch - real_boxes, -1); + output_offset += selectedBoxesNum_perBatch; + original_offset += real_boxes; + } +} + +bool MKLDNNMultiClassNmsNode::created() const { + return getType() == MulticlassNms; +} + +float MKLDNNMultiClassNmsNode::intersectionOverUnion(const float* boxesI, const float* boxesJ, const bool normalized) { + float yminI, xminI, ymaxI, xmaxI, yminJ, xminJ, ymaxJ, xmaxJ; + const float norm = static_cast(normalized == false); + + // to align with reference + yminI = boxesI[0]; + xminI = boxesI[1]; + ymaxI = boxesI[2]; + xmaxI = boxesI[3]; + yminJ = boxesJ[0]; + xminJ = boxesJ[1]; + ymaxJ = boxesJ[2]; + xmaxJ = boxesJ[3]; + + float areaI = (ymaxI - yminI + norm) * (xmaxI - xminI + norm); + float areaJ = (ymaxJ - yminJ + norm) * (xmaxJ - xminJ + norm); + if (areaI <= 0.f || areaJ <= 0.f) + return 0.f; + + float intersection_area = (std::max)((std::min)(ymaxI, ymaxJ) - (std::max)(yminI, yminJ) + norm, 0.f) * + (std::max)((std::min)(xmaxI, xmaxJ) - (std::max)(xminI, xminJ) + norm, 0.f); + return intersection_area / (areaI + areaJ - intersection_area); +} + +void MKLDNNMultiClassNmsNode::nmsWithEta(const float* boxes, const float* scores, const SizeVector& boxesStrides, const SizeVector& scoresStrides) { + auto less = [](const boxInfo& l, const boxInfo& r) { + return l.score < r.score || ((l.score == r.score) && (l.idx > r.idx)); + }; + + auto func = [](float iou, float adaptive_threshold) { + return iou <= adaptive_threshold ? 1.0f : 0.0f; + }; + + parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) { + if (class_idx != background_class) { + std::vector fb; + const float* boxesPtr = boxes + batch_idx * boxesStrides[0]; + const float* scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1]; + + std::priority_queue, decltype(less)> sorted_boxes(less); + for (int box_idx = 0; box_idx < num_boxes; box_idx++) { + if (scoresPtr[box_idx] >= score_threshold) // algin with ref + sorted_boxes.emplace(boxInfo({scoresPtr[box_idx], box_idx, 0})); + } + fb.reserve(sorted_boxes.size()); + if (sorted_boxes.size() > 0) { + auto adaptive_threshold = iou_threshold; + int max_out_box = (max_output_boxes_per_class > sorted_boxes.size()) ? sorted_boxes.size() : max_output_boxes_per_class; + while (max_out_box && !sorted_boxes.empty()) { + boxInfo currBox = sorted_boxes.top(); + float origScore = currBox.score; + sorted_boxes.pop(); + max_out_box--; + + bool box_is_selected = true; + for (int idx = static_cast(fb.size()) - 1; idx >= currBox.suppress_begin_index; idx--) { + float iou = intersectionOverUnion(&boxesPtr[currBox.idx * 4], &boxesPtr[fb[idx].box_index * 4], normalized); + currBox.score *= func(iou, adaptive_threshold); + if (iou >= adaptive_threshold) { + box_is_selected = false; + break; + } + if (currBox.score <= score_threshold) + break; + } + + currBox.suppress_begin_index = fb.size(); + if (box_is_selected) { + if (nms_eta < 1 && adaptive_threshold > 0.5) { + adaptive_threshold *= nms_eta; + } + if (currBox.score == origScore) { + fb.push_back({currBox.score, batch_idx, class_idx, currBox.idx}); + continue; + } + if (currBox.score > score_threshold) { + sorted_boxes.push(currBox); + } + } + } + } + numFiltBox[batch_idx][class_idx] = fb.size(); + size_t offset = batch_idx * num_classes * max_output_boxes_per_class + class_idx * max_output_boxes_per_class; + for (size_t i = 0; i < fb.size(); i++) { + filtBoxes[offset + i] = fb[i]; + } + } + }); +} + +void MKLDNNMultiClassNmsNode::nmsWithoutEta(const float* boxes, const float* scores, const SizeVector& boxesStrides, const SizeVector& scoresStrides) { + parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) { + if (class_idx != background_class) { + const float* boxesPtr = boxes + batch_idx * boxesStrides[0]; + const float* scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1]; + + std::vector> sorted_boxes; + for (int box_idx = 0; box_idx < num_boxes; box_idx++) { + if (scoresPtr[box_idx] >= score_threshold) // algin with ref + sorted_boxes.emplace_back(std::make_pair(scoresPtr[box_idx], box_idx)); + } + + int io_selection_size = 0; + if (sorted_boxes.size() > 0) { + parallel_sort(sorted_boxes.begin(), sorted_boxes.end(), [](const std::pair& l, const std::pair& r) { + return (l.first > r.first || ((l.first == r.first) && (l.second < r.second))); + }); + int offset = batch_idx * num_classes * max_output_boxes_per_class + class_idx * max_output_boxes_per_class; + filtBoxes[offset + 0] = filteredBoxes(sorted_boxes[0].first, batch_idx, class_idx, sorted_boxes[0].second); + io_selection_size++; + int max_out_box = (max_output_boxes_per_class > sorted_boxes.size()) ? sorted_boxes.size() : max_output_boxes_per_class; + for (size_t box_idx = 1; box_idx < max_out_box; box_idx++) { + bool box_is_selected = true; + for (int idx = io_selection_size - 1; idx >= 0; idx--) { + float iou = + intersectionOverUnion(&boxesPtr[sorted_boxes[box_idx].second * 4], &boxesPtr[filtBoxes[offset + idx].box_index * 4], normalized); + if (iou >= iou_threshold) { + box_is_selected = false; + break; + } + } + + if (box_is_selected) { + filtBoxes[offset + io_selection_size] = filteredBoxes(sorted_boxes[box_idx].first, batch_idx, class_idx, sorted_boxes[box_idx].second); + io_selection_size++; + } + } + } + numFiltBox[batch_idx][class_idx] = io_selection_size; + } + }); +} + +void MKLDNNMultiClassNmsNode::checkPrecision(const Precision prec, const std::vector precList, const std::string name, const std::string type) { + if (std::find(precList.begin(), precList.end(), prec) == precList.end()) + IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec; +} + +REG_MKLDNN_PRIM_FOR(MKLDNNMultiClassNmsNode, MulticlassNms) \ No newline at end of file diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp new file mode 100644 index 00000000000..0627f72cea0 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp @@ -0,0 +1,93 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include + +namespace MKLDNNPlugin { + +enum MulticlassNmsSortResultType { + CLASSID, // sort selected boxes by class id (ascending) in each batch element + SCORE, // sort selected boxes by score (descending) in each batch element + NONE // do not guarantee the order in each batch element +}; + +class MKLDNNMultiClassNmsNode : public MKLDNNNode { +public: + MKLDNNMultiClassNmsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache); + + void getSupportedDescriptors() override {}; + void initSupportedPrimitiveDescriptors() override; + void createPrimitive() override {}; + void execute(mkldnn::stream strm) override; + bool created() const override; + + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + +private: + // input (port Num) + const size_t NMS_BOXES = 0; + const size_t NMS_SCORES = 1; + + // output (port Num) + const size_t NMS_SELECTEDOUTPUTS = 0; + const size_t NMS_SELECTEDINDICES = 1; + const size_t NMS_SELECTEDNUM = 2; + + bool sort_result_across_batch = false; + MulticlassNmsSortResultType sort_result_type = NONE; + + size_t num_batches; + size_t num_boxes; + size_t num_classes; + + int max_output_boxes_per_class = 0; + float iou_threshold = 0.0f; + float score_threshold = 0.0f; + + int32_t background_class = 0; + int32_t keep_top_k = 0; + float nms_eta = 0.0f; + bool normalized = true; + + std::string errorPrefix; + + std::vector> numFiltBox; + std::vector numBoxOffset; + const std::string inType = "input", outType = "output"; + + struct filteredBoxes { + float score; + int batch_index; + int class_index; + int box_index; + filteredBoxes() = default; + filteredBoxes(float _score, int _batch_index, int _class_index, int _box_index) + : score(_score), batch_index(_batch_index), class_index(_class_index), box_index(_box_index) {} + }; + + struct boxInfo { + float score; + int idx; + int suppress_begin_index; + }; + + std::vector filtBoxes; + + void checkPrecision(const InferenceEngine::Precision prec, const std::vector precList, const std::string name, + const std::string type); + + float intersectionOverUnion(const float* boxesI, const float* boxesJ, const bool normalized); + + void nmsWithEta(const float* boxes, const float* scores, const InferenceEngine::SizeVector& boxesStrides, const InferenceEngine::SizeVector& scoresStrides); + + void nmsWithoutEta(const float* boxes, const float* scores, const InferenceEngine::SizeVector& boxesStrides, + const InferenceEngine::SizeVector& scoresStrides); +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp index baff79e5d75..f476aa8dec5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp @@ -733,7 +733,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { !getParentEdgeAt(0)->getParent()->isConstant(); const size_t inputsNum = getParentEdges().size(); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(inputsNum); config.outConfs.resize(1); @@ -742,17 +742,15 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { config.inConfs[0].inPlace = -1; config.outConfs[0].inPlace = canBeInplace ? 0 : -1; if (inputsNum == 2) { - const auto dims = getParentEdgeAt(1)->getDims().ToSizeVector(); - config.inConfs[1].desc = TensorDesc(Precision::I32, - dims, - TensorDesc::getLayoutByDims(dims)); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::s32, + MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(1)->getShape().getRank())); config.inConfs[1].constant = true; } auto pushDesc = [&](memory::format_tag format, impl_desc_type impl_type) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, format); - config.outConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), outputDataType, format); - supportedPrimitiveDescriptors.push_back({config, impl_type, format}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), outputDataType, format); + supportedPrimitiveDescriptors.push_back({config, impl_type}); }; impl_desc_type impl_type; @@ -768,22 +766,22 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { if (mayiuse(cpu::x64::sse41)) { // nspc - if (getParentEdgeAt(0)->getDims().ndims() == 4) { + if (getParentEdgeAt(0)->getShape().getRank() == 4) { pushDesc(memory::format_tag::nhwc, impl_type); - } else if (getParentEdgeAt(0)->getDims().ndims() == 5) { + } else if (getParentEdgeAt(0)->getShape().getRank() == 5) { pushDesc(memory::format_tag::ndhwc, impl_type); } // blk if (impl_desc_type::jit_avx512 == impl_type) { - if (getParentEdgeAt(0)->getDims().ndims() == 4) { + if (getParentEdgeAt(0)->getShape().getRank() == 4) { pushDesc(memory::format_tag::nChw16c, impl_type); - } else if (getParentEdgeAt(0)->getDims().ndims() == 5) { + } else if (getParentEdgeAt(0)->getShape().getRank() == 5) { pushDesc(memory::format_tag::nCdhw16c, impl_type); } } else if (impl_desc_type::jit_avx2 == impl_type || impl_desc_type::jit_sse42 == impl_type) { - if (getParentEdgeAt(0)->getDims().ndims() == 4) { + if (getParentEdgeAt(0)->getShape().getRank() == 4) { pushDesc(memory::format_tag::nChw8c, impl_type); - } else if (getParentEdgeAt(0)->getDims().ndims() == 5) { + } else if (getParentEdgeAt(0)->getShape().getRank() == 5) { pushDesc(memory::format_tag::nCdhw8c, impl_type); } } @@ -792,7 +790,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { // planar if (canBeInplace) config.inConfs[0].inPlace = 0; - pushDesc(MKLDNNMemory::GetPlainFormat(getChildEdgeAt(0)->getDims()), impl_type); + pushDesc(MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), impl_type); } void MKLDNNMVNNode::createPrimitive() { @@ -805,15 +803,15 @@ void MKLDNNMVNNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; - const SizeVector in_dims = getParentEdgeAt(0)->getDims().ToSizeVector(); + const SizeVector in_dims = getParentEdgeAt(0)->getShape().getStaticDims(); transformTo5DCase(in_dims); auto selectedPD = getSelectedPrimitiveDescriptor(); auto jcp = jit_mvn_config_params(); - jcp.src_prc = selectedPD->getConfig().inConfs[0].desc.getPrecision(); - jcp.dst_prc = selectedPD->getConfig().outConfs[0].desc.getPrecision(); + jcp.src_prc = selectedPD->getConfig().inConfs[0].desc->getPrecision(); + jcp.dst_prc = selectedPD->getConfig().outConfs[0].desc->getPrecision(); jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.src_prc)); jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.dst_prc)); - jcp.planar_layout = MKLDNNMemory::GetPlainLayout(getChildEdgeAt(0)->getDims()) == selectedPD->getConfig().inConfs[0].desc.getLayout(); + jcp.planar_layout = selectedPD->getConfig().inConfs[0].desc->hasLayoutType(LayoutType::ncsp); jcp.normalize_variance = normalizeVariance_; jcp.across_channels = acrossChannels_; int N = 0; @@ -913,13 +911,12 @@ void MKLDNNMVNNode::execute(mkldnn::stream strm) { uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); uint8_t *src_data = reinterpret_cast(srcMemPtr->GetPtr()); - auto dim = getParentEdgeAt(0)->getDesc().getDims(); + auto dim = getParentEdgeAt(0)->getShape().getStaticDims(); if (mayiuse(cpu::x64::sse41)) { if (!mvn_mean_kernel || (normalizeVariance_ && !mvn_variance_kernel) || !mvn_kernel) { IE_THROW() << "MVN layer with name '" << getName() << "' doesn't create kernel to execute on sse41 above platform."; } - Layout layout = getParentEdgeAt(0)->getDesc().getLayout(); - if (layout == C || layout == NC || layout == CHW || layout == NCHW || layout == NCDHW) { + if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) { mvn_pln(src_data, dst_data, dim); } else { mvn_blk(src_data, dst_data, dim); @@ -1173,10 +1170,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si size_t N = 1; size_t C = 1; size_t D = 1; size_t H = 1; size_t W = 1; std::tie(N, C, D, H, W) = shape5D; - bool is_nhwc = false; - Layout layout = getParentEdgeAt(0)->getDesc().getLayout(); - if (layout == NHWC || layout == NDHWC) - is_nhwc = true; + bool is_nhwc = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc); size_t CB = div_up(C, blk_size); @@ -1407,7 +1401,7 @@ bool MKLDNNMVNNode::canFuse(const MKLDNNNodePtr& node) const { } // limit post ops to unary when shape transformed on channel // 1D only fused with unary - int inputRank = getParentEdgeAt(0)->getDims().ndims(); + int inputRank = getParentEdgeAt(0)->getShape().getRank(); bool unaryEltwise = one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh, EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp index 093127eada5..a6c0bc07b28 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp @@ -114,18 +114,18 @@ void MKLDNNNonMaxSuppressionNode::initSupportedPrimitiveDescriptors() { checkOutput(outputShape_SELECTEDINDICES, supportedIntOutputPrecision, "selected_indices", NMS_SELECTEDINDICES); checkOutput(outputShape_SELECTEDSCORES, supportedFloatPrecision, "selected_scores", NMS_SELECTEDSCORES); - std::vector inDataConf; + std::vector inDataConf; inDataConf.reserve(getOriginalInputsNumber()); for (int i = 0; i < getOriginalInputsNumber(); ++i) { Precision inPrecision = i == NMS_MAXOUTPUTBOXESPERCLASS ? Precision::I32 : Precision::FP32; - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, inPrecision); + inDataConf.emplace_back(LayoutType::ncsp, inPrecision); } - std::vector outDataConf; + std::vector outDataConf; outDataConf.reserve(getOriginalOutputsNumber()); for (int i = 0; i < getOriginalOutputsNumber(); ++i) { Precision outPrecision = i == NMS_SELECTEDSCORES ? Precision::FP32 : Precision::I32; - outDataConf.emplace_back(TensorDescCreatorTypes::ncsp, outPrecision); + outDataConf.emplace_back(LayoutType::ncsp, outPrecision); } addSupportedPrimDesc(inDataConf, outDataConf, impl_desc_type::ref_any); @@ -135,24 +135,24 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { const float *boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); const float *scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); - max_output_boxes_per_class = outDims.size() > NMS_SELECTEDSCORES ? 0 : num_boxes; - if (inDims.size() > NMS_MAXOUTPUTBOXESPERCLASS) { + max_output_boxes_per_class = outputShapes.size() > NMS_SELECTEDSCORES ? 0 : num_boxes; + if (inputShapes.size() > NMS_MAXOUTPUTBOXESPERCLASS) { max_output_boxes_per_class = reinterpret_cast(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->GetPtr())[0]; } if (max_output_boxes_per_class == 0) return; - iou_threshold = outDims.size() > NMS_SELECTEDSCORES ? 0.0f : 1.0f; - if (inDims.size() > NMS_IOUTHRESHOLD) + iou_threshold = outputShapes.size() > NMS_SELECTEDSCORES ? 0.0f : 1.0f; + if (inputShapes.size() > NMS_IOUTHRESHOLD) iou_threshold = reinterpret_cast(getParentEdgeAt(NMS_IOUTHRESHOLD)->getMemoryPtr()->GetPtr())[0]; score_threshold = 0.0f; - if (inDims.size() > NMS_SCORETHRESHOLD) + if (inputShapes.size() > NMS_SCORETHRESHOLD) score_threshold = reinterpret_cast(getParentEdgeAt(NMS_SCORETHRESHOLD)->getMemoryPtr()->GetPtr())[0]; soft_nms_sigma = 0.0f; - if (inDims.size() > NMS_SOFTNMSSIGMA) + if (inputShapes.size() > NMS_SOFTNMSSIGMA) soft_nms_sigma = reinterpret_cast(getParentEdgeAt(NMS_SOFTNMSSIGMA)->getMemoryPtr()->GetPtr())[0]; scale = 0.0f; if (soft_nms_sigma > 0.0) { @@ -162,15 +162,15 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { int *selected_indices = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr()->GetPtr()); float *selected_scores = nullptr; - if (outDims.size() > NMS_SELECTEDSCORES) + if (outputShapes.size() > NMS_SELECTEDSCORES) selected_scores = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDSCORES)[0]->getMemoryPtr()->GetPtr()); int *valid_outputs = nullptr; - if (outDims.size() > NMS_VALIDOUTPUTS) + if (outputShapes.size() > NMS_VALIDOUTPUTS) valid_outputs = reinterpret_cast(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->GetPtr()); - auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getDesc().getBlockingDesc().getStrides(); - auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getDesc().getBlockingDesc().getStrides(); + auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType().getStrides(); + auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType().getStrides(); std::vector filtBoxes(max_output_boxes_per_class * num_batches * num_classes); @@ -205,10 +205,10 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { }); } - const size_t selectedBoxesNum = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getDims()[0]; + const size_t selectedBoxesNum = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getShape().getStaticDims()[0]; const size_t validOutputs = std::min(filtBoxes.size(), selectedBoxesNum); - int selectedIndicesStride = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getDesc().getBlockingDesc().getStrides()[0]; + int selectedIndicesStride = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemory().GetDescWithType().getStrides()[0]; int *selectedIndicesPtr = selected_indices; float *selectedScoresPtr = selected_scores; @@ -218,7 +218,7 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { selectedIndicesPtr[1] = filtBoxes[idx].class_index; selectedIndicesPtr[2] = filtBoxes[idx].box_index; selectedIndicesPtr += selectedIndicesStride; - if (outDims.size() > NMS_SELECTEDSCORES) { + if (outputShapes.size() > NMS_SELECTEDSCORES) { selectedScoresPtr[0] = static_cast(filtBoxes[idx].batch_index); selectedScoresPtr[1] = static_cast(filtBoxes[idx].class_index); selectedScoresPtr[2] = static_cast(filtBoxes[idx].score); @@ -226,10 +226,10 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { } } std::fill(selectedIndicesPtr, selectedIndicesPtr + (selectedBoxesNum - idx) * selectedIndicesStride, -1); - if (outDims.size() > NMS_SELECTEDSCORES) { + if (outputShapes.size() > NMS_SELECTEDSCORES) { std::fill(selectedScoresPtr, selectedScoresPtr + (selectedBoxesNum - idx) * selectedIndicesStride, -1.f); } - if (outDims.size() > NMS_VALIDOUTPUTS) + if (outputShapes.size() > NMS_VALIDOUTPUTS) *valid_outputs = static_cast(validOutputs); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp index 2da3ae8f330..10b1be0dac5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp @@ -682,8 +682,10 @@ bool MKLDNNNormalizeL2Node::isSupportedOperation(const std::shared_ptrgetDims().ndims() > 4 || getParentEdgeAt(0)->getDims().ndims() < 2) { + if (getParentEdgeAt(0)->getShape().getRank() > 4 || getParentEdgeAt(0)->getShape().getRank() < 2) { IE_THROW() << errorPrefix << "has invalid input shape. Normalize supports from 2D to 4D blobs."; } } @@ -757,21 +759,22 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() { bool canBeInplace = src_data_size == dst_data_size && getParentEdgeAt(DATA)->getParent()->getChildEdges().size() == 1; - LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(2); config.outConfs.resize(1); config.outConfs[0].inPlace = canBeInplace ? 0 : -1; auto pushDesc = [&](memory::format_tag format) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA)->getDims(), inputDataType, format); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(AXES)->getDims(), memory::data_type::s32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA)->getDims(), outputDataType, format); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, format}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA)->getShape().getStaticDims(), inputDataType, format); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXES)->getShape().getStaticDims(), memory::data_type::s32, + memory::format_tag::x); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA)->getShape().getStaticDims(), outputDataType, format); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); }; // only plain layout support when w/o sse42 - if (getParentEdgeAt(DATA)->getDims().ndims() == 4 && !cornerCase) { + if (getParentEdgeAt(DATA)->getShape().getRank() == 4 && !cornerCase) { if (mayiuse(cpu::x64::sse41)) { pushDesc(memory::format_tag::nhwc); if (mayiuse(cpu::x64::avx512_common)) { @@ -783,7 +786,7 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() { } if (canBeInplace) config.inConfs[0].inPlace = 0; - pushDesc(MKLDNNMemory::GetPlainFormat(getChildEdgeAt(DATA)->getDims())); + pushDesc(MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(DATA)->getShape().getRank())); } bool MKLDNNNormalizeL2Node::canFuse(const MKLDNNNodePtr& node) const { @@ -824,22 +827,23 @@ void MKLDNNNormalizeL2Node::createPrimitive() { if (!cornerCase) { auto selectedPD = getSelectedPrimitiveDescriptor(); - jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[0].desc.getPrecision()); - jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc.getPrecision()); + jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[0].desc->getPrecision()); + jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc->getPrecision()); jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt); jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt); jcp.is_nchw = jcp.is_nhwc = jcp.is_blk = false; - if (getParentEdgeAt(0)->getMemory().GetDesc().isPlainFormat()) { + if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) { jcp.is_nchw = true; - } else if (getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat()) { + } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) || + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c)) { jcp.is_blk = true; } else { jcp.is_nhwc = true; } jcp.across_spatial = across_spatial; - auto dims = getParentEdgeAt(0)->getDesc().getDims(); + auto dims = getParentEdgeAt(0)->getShape().getStaticDims(); size_t dims_size = dims.size(); jcp.n = (dims_size > 0) ? dims[0] : 1lu; jcp.c = (dims_size > 1) ? dims[1] : 1lu; @@ -905,7 +909,7 @@ void MKLDNNNormalizeL2Node::execute(mkldnn::stream strm) { const uint8_t *src_ptr = reinterpret_cast(srcMemPtr->GetPtr()); uint8_t *dst_ptr = reinterpret_cast(dstMemPtr->GetPtr()); - auto dims = getParentEdgeAt(DATA)->getDesc().getDims(); + auto dims = getParentEdgeAt(DATA)->getShape().getStaticDims(); NormalizeContext ctx = { *this, diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp index 8f164c33c18..350e86e556e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp @@ -10,7 +10,7 @@ #include "utils/bfloat16.hpp" #include #include "mkldnn_one_hot_node.h" -#include +#include #include #include "common/cpu_memcpy.h" @@ -89,11 +89,11 @@ void MKLDNNOneHotNode::initSupportedPrimitiveDescriptors() { } output_precision = getOriginalOutputPrecisionAtPort(0); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, input_precision}, - {TensorDescCreatorTypes::ncsp, input_precision}, - {TensorDescCreatorTypes::ncsp, output_precision}, - {TensorDescCreatorTypes::ncsp, output_precision}}, - {{TensorDescCreatorTypes::ncsp, output_precision}}, + addSupportedPrimDesc({{LayoutType::ncsp, input_precision}, + {LayoutType::ncsp, input_precision}, + {LayoutType::ncsp, output_precision}, + {LayoutType::ncsp, output_precision}}, + {{LayoutType::ncsp, output_precision}}, impl_desc_type::ref_any); } @@ -125,13 +125,13 @@ void MKLDNNOneHotNode::one_hot(size_t prefix_size, size_t suffix_size) { void MKLDNNOneHotNode::execute(mkldnn::stream strm) { std::size_t prefix_size = 1; - auto input_dims = getParentEdgeAt(0)->getDesc().getDims(); + auto input_dims = getParentEdgeAt(0)->getShape().getStaticDims(); std::size_t actual_axis = (axis == -1) ? src_dims.size() : axis; for (size_t i = 0; i < actual_axis; ++i) prefix_size *= input_dims[i]; - std::size_t suffix_size = getParentEdgeAt(0)->getBlob()->size() / prefix_size; + std::size_t suffix_size = getParentEdgeAt(0)->getShape().getElementsCount() / prefix_size; OneHotContext ctx = {this, prefix_size, suffix_size}; OV_SWITCH(MKLDNNPlugin, OneHotExecute, ctx, output_precision.size(), diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp index b4ef82481ca..584eb4bce79 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp @@ -92,8 +92,8 @@ void MKLDNNPadNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << "Incorrect number of output edges"; - const SizeVector srcDims = getParentEdgeAt(DATA_ID)->getDims().ToSizeVector(); - const SizeVector dstDims = getChildEdgeAt(DATA_ID)->getDims().ToSizeVector(); + const SizeVector srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + const SizeVector dstDims = getChildEdgeAt(DATA_ID)->getShape().getStaticDims(); if (srcDims.size() != dstDims.size() || padsBegin.size() != srcDims.size() || padsEnd.size() != srcDims.size()) IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions!"; @@ -122,22 +122,26 @@ void MKLDNNPadNode::initSupportedPrimitiveDescriptors() { precision = precision.is_float() ? InferenceEngine::Precision::FP32 : InferenceEngine::Precision::I32; auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - auto srcDims = getParentEdgeAt(DATA_ID)->getDims(); - int numOfDims = srcDims.ToSizeVector().size(); + auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + int numOfDims = srcDims.size(); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(isPadValueSpecified ? 4 : 3); config.outConfs.resize(1); auto pushSupportedPrimitiveDescriptor = [&](memory::format_tag memoryFormat) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA_ID)->getDims(), dataType, memoryFormat); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(PADS_BEGIN_ID)->getDims(), memory::data_type::s32, memory::format_tag::x); - config.inConfs[2].desc = MKLDNNMemoryDesc(getParentEdgeAt(PADS_END_ID)->getDims(), memory::data_type::s32, memory::format_tag::x); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, + memoryFormat); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PADS_BEGIN_ID)->getShape().getStaticDims(), + memory::data_type::s32, memory::format_tag::x); + config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PADS_END_ID)->getShape().getStaticDims(), + memory::data_type::s32, memory::format_tag::x); if (isPadValueSpecified) - config.inConfs[3].desc = MKLDNNMemoryDesc(getParentEdgeAt(PAD_VALUE_ID)->getDims(), memory::data_type::f32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(DATA_ID)->getDims(), dataType, memoryFormat); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, memoryFormat}); + config.inConfs[3].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PAD_VALUE_ID)->getShape().getStaticDims(), + memory::data_type::f32, memory::format_tag::x); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, memoryFormat); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref}); }; if (numOfDims == 4) @@ -145,7 +149,7 @@ void MKLDNNPadNode::initSupportedPrimitiveDescriptors() { else if (numOfDims == 5) pushSupportedPrimitiveDescriptor(mkldnn::memory::format_tag::ndhwc); - pushSupportedPrimitiveDescriptor(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims())); + pushSupportedPrimitiveDescriptor(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(0)->getShape().getRank())); auto canUseBlocked = [=](const size_t blockSize) { return (padMode == CONSTANT && padsBegin[1] % blockSize == 0 && padsEnd[1] % blockSize == 0) || @@ -175,10 +179,11 @@ void MKLDNNPadNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Preferable primitive descriptor for Pad " << getName() << " is not set."; - params.sizeData = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size(); + params.sizeData = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size(); - params.srcDims = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims(); - params.dstDims = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims(); + const auto inBlkDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + params.srcDims = inBlkDesc.getBlockDims(); + params.dstDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); size_t nDims = params.srcDims.size(); params.srcStrides.resize(nDims, 1); @@ -188,13 +193,14 @@ void MKLDNNPadNode::createPrimitive() { params.dstStrides[i] = params.dstStrides[i + 1] * params.dstDims[i + 1]; } - if (getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat()) { + if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) || + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c)) { padsBegin[1] /= params.srcDims[params.srcDims.size() - 1]; padsEnd[1] /= params.srcDims[params.srcDims.size() - 1]; padsBegin.push_back(0); padsEnd.push_back(0); } else { - auto order = getParentEdgeAt(0)->getDesc().getBlockingDesc().getOrder(); + auto order = inBlkDesc.getOrder(); std::vector newPadsBegin(padsBegin.size(), 0), newPadsEnd(padsEnd.size(), 0); for (size_t i = 0; i < padsBegin.size(); ++i) { newPadsBegin[i] = padsBegin[order[i]]; @@ -304,7 +310,7 @@ void MKLDNNPadNode::padConstant() { auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) IE_THROW() << "CPU Pad node with name '" << getName() << "' doesn't have primitive descriptors."; - InferenceEngine::Precision precision = selectedPrimitiveDescriptor->getConfig().inConfs[0].desc.getPrecision(); + InferenceEngine::Precision precision = selectedPrimitiveDescriptor->getConfig().inConfs[0].desc->getPrecision(); OV_SWITCH(MKLDNNPlugin, PadConstantEmitter, this, precision, OV_CASE(InferenceEngine::Precision::FP32, float), OV_CASE(InferenceEngine::Precision::I32, int32_t), diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp index c7a007d0c6f..5d6e900d75d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp @@ -13,6 +13,7 @@ #include #include #include +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -60,18 +61,18 @@ MKLDNNPoolingNode::MKLDNNPoolingNode(const std::shared_ptr& op, co } } -std::vector MKLDNNPoolingNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const { - if (dims.ndims() == 0) +std::vector MKLDNNPoolingNode::getAvailableFormatsForDims(const Shape &dims) const { + if (dims.getRank() == 0) return {memory::format_tag::x}; - else if (dims.ndims() == 1) + else if (dims.getRank() == 1) return {memory::format_tag::x}; - else if (dims.ndims() == 2) + else if (dims.getRank() == 2) return {memory::format_tag::nc}; - else if (dims.ndims() == 3) + else if (dims.getRank() == 3) return {memory::format_tag::tnc, memory::format_tag::ntc}; - else if (dims.ndims() == 4) + else if (dims.getRank() == 4) return {memory::format_tag::nChw8c, memory::format_tag::nChw16c, memory::format_tag::nhwc, memory::format_tag::nchw}; - else if (dims.ndims() == 5) + else if (dims.getRank() == 5) return {memory::format_tag::nCdhw8c, memory::format_tag::nCdhw16c, memory::format_tag::ndhwc, memory::format_tag::ncdhw}; return {memory::format_tag::any}; } @@ -112,15 +113,17 @@ void MKLDNNPoolingNode::getSupportedDescriptors() { effective_pad_begin = data_pad_begin; effective_pad_end.resize(data_pad_end.size()); - auto parentDims = getParentEdgeAt(0)->getDims(); - auto childDims = getChildEdgeAt(0)->getDims(); - if ((parentDims.ndims() < 4) || (parentDims.ndims() > 5)) + auto parentDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto childDims = getChildEdgeAt(0)->getShape().getStaticDims(); + const size_t inputRank = getParentEdgeAt(0)->getShape().getRank(); + + if ((inputRank < 4) || (inputRank > 5)) IE_THROW() << "Pooling layer. Unsupported mode. Only 4D and 5D blobs are supported as input."; for (int i = 0; i < effective_pad_end.size(); i++) { int krn = kernel[i]; - int src = getParentEdgeAt(0)->getDims()[2 + i]; - int dst = getChildEdgeAt(0)->getDims()[2 + i]; + int src = getParentEdgeAt(0)->getShape().getStaticDims()[2 + i]; + int dst = getChildEdgeAt(0)->getShape().getStaticDims()[2 + i]; int calc_dst = (src - krn + data_pad_begin[i]) / stride[i] + 1; effective_pad_end[i] = (dst - calc_dst) * stride[i]; @@ -130,24 +133,28 @@ void MKLDNNPoolingNode::getSupportedDescriptors() { if (outputDataType == memory::data_type::bf16) outputDataType = memory::data_type::f32; // i8 layers supports only ndhwc and nhwc layouts - MKLDNNMemoryDesc in_candidate{parentDims, inputDataType, parentDims.ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc}; - MKLDNNMemoryDesc out_candidate{childDims, outputDataType, parentDims.ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc}; - createDescriptor({ in_candidate }, { out_candidate }); - } else if ((parentDims.ndims() == 4 || parentDims.ndims() == 5) && parentDims[1] == 1) { + const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, inputRank == 5 ? + memory::format_tag::ndhwc : memory::format_tag::nhwc); + const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, inputRank == 5 ? + memory::format_tag::ndhwc : memory::format_tag::nhwc); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + } else if ((inputRank == 4 || inputRank == 5) && parentDims[1] == 1) { // WA. We should force planar layout since it provides better performance - MKLDNNMemoryDesc in_candidate{parentDims, inputDataType, parentDims.ndims() == 5 ? memory::format_tag::ncdhw : memory::format_tag::nchw}; - MKLDNNMemoryDesc out_candidate{childDims, outputDataType, parentDims.ndims() == 5 ? memory::format_tag::ncdhw : memory::format_tag::nchw}; - createDescriptor({ in_candidate }, { out_candidate }); + const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, inputRank == 5 ? + memory::format_tag::ncdhw : memory::format_tag::nchw); + const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, inputRank == 5 ? + memory::format_tag::ncdhw : memory::format_tag::nchw); + createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } else { if (inputDataType != memory::data_type::bf16) { inputDataType = memory::data_type::f32; outputDataType = memory::data_type::f32; } // It doesn't support any format - for (auto format : getAvailableFormatsForDims(parentDims)) { - MKLDNNMemoryDesc in_candidate{parentDims, inputDataType, format}; - MKLDNNMemoryDesc out_candidate{childDims, outputDataType, format}; - createDescriptor({in_candidate}, {out_candidate}); + for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { + const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, format); + const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, format); + createDescriptor({in_candidate.get()}, {out_candidate.get()}); } } } @@ -172,10 +179,10 @@ bool MKLDNNPoolingNode::created() const { return getType() == Pooling; } -void MKLDNNPoolingNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - MKLDNNMemoryDesc in_candidate(inputDesc[0]); - MKLDNNMemoryDesc out_candidate(outputDesc[0]); +void MKLDNNPoolingNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { + MKLDNNMemoryDesc in_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); + MKLDNNMemoryDesc out_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]); mkldnn::algorithm alg; if (algorithm == PoolingAvg) { @@ -240,21 +247,23 @@ void MKLDNNPoolingNode::initSupportedPrimitiveDescriptors() { for (auto& desc : descs) { auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr); while (static_cast(itpd)) { - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; for (size_t i = 0; i < descInputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getSrcMemDesc(itpd, i)); + dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getSrcMemDesc(itpd, i)); + dataConfig.desc = getSrcMemDesc(itpd, i); config.inConfs.push_back(dataConfig); } for (size_t i = 0; i < descOutputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = canBeInPlace() ? 0 : -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getDstMemDesc(itpd, i)); + dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getDstMemDesc(itpd, i)); + dataConfig.desc = getDstMemDesc(itpd, i); config.outConfs.push_back(dataConfig); } impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); @@ -266,23 +275,23 @@ void MKLDNNPoolingNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNPoolingNode::initDescriptor(const InferenceEngine::LayerConfig &config) { +void MKLDNNPoolingNode::initDescriptor(const NodeConfig& config) { auto* selectedPD = getSelectedPrimitiveDescriptor(); if (!selectedPD) { return; } - std::vector inDescs; + std::vector inDescs; for (const auto& inConf : config.inConfs) - inDescs.push_back(inConf.desc); - std::vector outDescs; + inDescs.push_back(inConf.desc.get()); + std::vector outDescs; for (const auto& outConf : config.outConfs) - outDescs.push_back(outConf.desc); + outDescs.push_back(outConf.desc.get()); createDescriptor({inDescs}, {outDescs}); mkldnn::primitive_attr attr; setPostOps(attr); - InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig(); + NodeConfig rightConfig = selectedPD->getConfig(); size_t selected_count = 0; for (size_t j = 0; j < descs.size(); j++) { const auto &desc = descs[j]; @@ -291,10 +300,10 @@ void MKLDNNPoolingNode::initDescriptor(const InferenceEngine::LayerConfig &confi itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr); while (itpd) { - InferenceEngine::LayerConfig cfg; + NodeConfig cfg; cfg.dynBatchSupport = true; for (size_t i = 0; i < descInputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = canBeInPlace() ? 0 : -1; dataConfig.constant = false; dataConfig.desc = getSrcMemDesc(itpd, i); @@ -302,7 +311,7 @@ void MKLDNNPoolingNode::initDescriptor(const InferenceEngine::LayerConfig &confi } for (size_t i = 0; i < descOutputNumbers(desc); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; dataConfig.desc = getDstMemDesc(itpd, i); @@ -332,20 +341,18 @@ void MKLDNNPoolingNode::initDescriptor(const InferenceEngine::LayerConfig &confi return; for (size_t i = 0; i < selectedConfig.inConfs.size(); i++) { - if (selectedConfig.inConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY && - !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.inConfs[i].desc, config.inConfs[i].desc)) + if (!selectedConfig.inConfs[i].desc->isCompatible(*config.inConfs[i].desc)) IE_THROW() << "Incorrect descriptor for node: " << getName(); } for (size_t i = 0; i < selectedConfig.outConfs.size(); i++) { - if (selectedConfig.outConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY && - !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.outConfs[i].desc, config.outConfs[i].desc)) + if (!selectedConfig.outConfs[i].desc->isCompatible(*config.outConfs[i].desc)) IE_THROW() << "Incorrect descriptor for node: " << getName(); } rightConfig = config; } - selectedPD->getConfig() = rightConfig; + selectedPD->setConfig(rightConfig); } void MKLDNNPoolingNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h index 1f6acf58b78..a594e774e47 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h @@ -16,12 +16,12 @@ class MKLDNNPoolingNode : public MKLDNNNode { public: MKLDNNPoolingNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; - std::vector getAvailableFormatsForDims(const MKLDNNDims &dims) const override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; + std::vector getAvailableFormatsForDims(const Shape &dims) const override; void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; - void initDescriptor(const InferenceEngine::LayerConfig &config) override; + void initDescriptor(const NodeConfig& config) override; void createPrimitive() override; bool created() const override; bool canBeInPlace() const override { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp index 584960373ae..e7421d82f12 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp @@ -1,7 +1,6 @@ // Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" #include #include @@ -141,17 +140,17 @@ void MKLDNNProposalNode::initSupportedPrimitiveDescriptors() { return; if (store_prob) { - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } else { - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } } @@ -166,8 +165,8 @@ void MKLDNNProposalNode::execute(mkldnn::stream strm) { if (store_prob) outProbData = reinterpret_cast (getChildEdgesAtPort(PROBABILITIES_OUT_IDX)[0]->getMemoryPtr()->GetPtr()); - auto inProbDims = getParentEdgeAt(0)->getDims().ToSizeVector(); - const size_t imgInfoSize = getParentEdgeAt(2)->getDims()[0]; + auto inProbDims = getParentEdgeAt(0)->getShape().getStaticDims(); + const size_t imgInfoSize = getParentEdgeAt(2)->getShape().getStaticDims()[0]; // input image height & width const float imgHeight = imgInfoData[0]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp index 393ef27921a..e56d6d2c245 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp @@ -12,7 +12,7 @@ #include #include "mkldnn_psroi_pooling_node.h" #include -#include +#include using namespace MKLDNNPlugin; using namespace InferenceEngine; @@ -139,27 +139,27 @@ void MKLDNNPSROIPoolingNode::initSupportedPrimitiveDescriptors() { auto dataPrecision = getOriginalInputPrecisionAtPort(0) == Precision::BF16 ? Precision::BF16 : Precision::FP32; if (getAlgorithm() == Algorithm::PSROIPoolingAverage || getAlgorithm() == Algorithm::PSROIPoolingBilinear) { - std::vector> dataFomats{ - {TensorDescCreatorTypes::ncsp, TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::nspc, TensorDescCreatorTypes::nspc}, - {TensorDescCreatorTypes::nCsp16c, TensorDescCreatorTypes::nCsp16c}, - {TensorDescCreatorTypes::nCsp8c, TensorDescCreatorTypes::nCsp8c} + std::vector> dataFomats{ + {LayoutType::ncsp, LayoutType::ncsp}, + {LayoutType::nspc, LayoutType::nspc}, + {LayoutType::nCsp16c, LayoutType::nCsp16c}, + {LayoutType::nCsp8c, LayoutType::nCsp8c} }; for (const auto &df : dataFomats) { - addSupportedPrimDesc({{df.first, dataPrecision}, {TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{df.first, dataPrecision}, {LayoutType::ncsp, Precision::FP32}}, {{df.second, dataPrecision}}, impl_type); } } else if (getAlgorithm() == Algorithm::PSROIPoolingBilinearDeformable && noTrans) { - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision}, {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, dataPrecision}}, + addSupportedPrimDesc({{LayoutType::ncsp, dataPrecision}, {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, dataPrecision}}, impl_type); } else if (getAlgorithm() == Algorithm::PSROIPoolingBilinearDeformable) { - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, dataPrecision}}, + addSupportedPrimDesc({{LayoutType::ncsp, dataPrecision}, + {LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, dataPrecision}}, impl_type); } } @@ -182,19 +182,18 @@ inline float bilinearInterp(const inputType* data, const float x, const float y, return value; } -void MKLDNNPSROIPoolingNode::unpackParams(const TensorDesc& srcDesc, const TensorDesc& dstDesc, +void MKLDNNPSROIPoolingNode::unpackParams(const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc, int& hInputStride, int& wInputStride, int& hOutputStride, int& wOutputStride, - Layout& inFmt, Layout& outFmt, int& inBlockSize, int& outBlockSize, int& outBlockCount, unsigned long& inputChannelsPadding, unsigned long& outputChannelsPadding) { - inFmt = srcDesc.getLayout(); - outFmt = dstDesc.getLayout(); - int expectedInBlockDimsSize = (inFmt == Layout::BLOCKED ? 5 : 4); - int expectedOutBlockDimsSize = (outFmt == Layout::BLOCKED ? 5 : 4); - auto inBlkDims = srcDesc.getBlockingDesc().getBlockDims(); - auto outBlkDims = dstDesc.getBlockingDesc().getBlockDims(); + const bool inpIsBlk = srcDesc.hasLayoutType(LayoutType::nCsp16c) || srcDesc.hasLayoutType(LayoutType::nCsp8c); + const bool outIsBlk = dstDesc.hasLayoutType(LayoutType::nCsp16c) || dstDesc.hasLayoutType(LayoutType::nCsp8c); + int expectedInBlockDimsSize = (inpIsBlk ? 5 : 4); + int expectedOutBlockDimsSize = (outIsBlk ? 5 : 4); + auto inBlkDims = srcDesc.getBlockDims(); + auto outBlkDims = dstDesc.getBlockDims(); if (inBlkDims.size() != expectedInBlockDimsSize) IE_THROW() << errorPrefix << " has unexpected size of blocking dims in input (given " << inBlkDims.size() << ", expected " << expectedInBlockDimsSize << ")"; @@ -202,15 +201,15 @@ void MKLDNNPSROIPoolingNode::unpackParams(const TensorDesc& srcDesc, const Tenso IE_THROW() << errorPrefix << " has unexpected size of blocking dims in output (given " << outBlkDims.size() << ", expected " << expectedOutBlockDimsSize << ")"; - inBlockSize = (inFmt == Layout::BLOCKED ? srcDesc.getBlockingDesc().getBlockDims()[4] : 1); - outBlockSize = (outFmt == Layout::BLOCKED ? dstDesc.getBlockingDesc().getBlockDims()[4] : 1); - inputChannelsPadding = srcDesc.getBlockingDesc().getBlockDims()[1] * inBlockSize; - outputChannelsPadding = dstDesc.getBlockingDesc().getBlockDims()[1] * outBlockSize; + inBlockSize = (inpIsBlk ? srcDesc.getBlockDims()[4] : 1); + outBlockSize = (outIsBlk ? dstDesc.getBlockDims()[4] : 1); + inputChannelsPadding = srcDesc.getBlockDims()[1] * inBlockSize; + outputChannelsPadding = dstDesc.getBlockDims()[1] * outBlockSize; outBlockCount = outputChannelsPadding / outBlockSize; int hOutStrIndex = 0, wOutStrIndex = 0, hInStrIndex = 0, wInStrIndex = 0; - const auto& outOrder = dstDesc.getBlockingDesc().getOrder(); - const auto& inOrder = srcDesc.getBlockingDesc().getOrder(); + const auto& outOrder = dstDesc.getOrder(); + const auto& inOrder = srcDesc.getOrder(); for (int i = 0; i < outOrder.size(); i++) { if (outOrder[i] == 2) hOutStrIndex = i; if (outOrder[i] == 3) wOutStrIndex = i; @@ -219,21 +218,20 @@ void MKLDNNPSROIPoolingNode::unpackParams(const TensorDesc& srcDesc, const Tenso if (inOrder[i] == 2) hInStrIndex = i; if (inOrder[i] == 3) wInStrIndex = i; } - hInputStride = srcDesc.getBlockingDesc().getStrides()[hInStrIndex]; - wInputStride = srcDesc.getBlockingDesc().getStrides()[wInStrIndex]; - hOutputStride = dstDesc.getBlockingDesc().getStrides()[hOutStrIndex]; - wOutputStride = dstDesc.getBlockingDesc().getStrides()[wOutStrIndex]; + hInputStride = srcDesc.getStrides()[hInStrIndex]; + wInputStride = srcDesc.getStrides()[wInStrIndex]; + hOutputStride = dstDesc.getStrides()[hOutStrIndex]; + wOutputStride = dstDesc.getStrides()[wOutStrIndex]; } template void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType *dstData, const float *bottomRois, const int n, const int roiBatchInd, - const TensorDesc& srcDesc, const TensorDesc& dstDesc) { - Layout inFmt, outFmt; + const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc) { int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride; unsigned long inputChannelsPadding, outputChannelsPadding; unpackParams(srcDesc, dstDesc, hInputStride, wInputStride, hOutputStride, wOutputStride, - inFmt, outFmt, inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding); + inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding); const float roiStartW = static_cast(round(bottomRois[1])) * spatialScale; const float roiStartH = static_cast(round(bottomRois[2])) * spatialScale; const float roiEndW = static_cast(round(bottomRois[3] + 1.0f)) * spatialScale; @@ -273,7 +271,7 @@ void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType dstData[dstIndex] = outSum / binArea; } }; - if (inFmt == Layout::NHWC) { + if (srcDesc.hasLayoutType(LayoutType::nspc)) { parallel_for2d(nh, nw, [&](int h, int w) { const int binOffsetOutput = n * nc * nh * nw; const int binOffsetInput = roiBatchInd * channels * height * width; @@ -282,10 +280,10 @@ void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType avgPsroi(c, h, w, 0, 0, binOffsetInput + gc, binOffsetOutput + c); } }); - } else if (inFmt == Layout::NCHW) { + } else if (srcDesc.hasLayoutType(LayoutType::ncsp)) { parallel_for3d(nc, nh, nw, [&](int c, int h, int w) { const int gc = (c * groupSize + h) * groupSize + w; - const int outputBlockResidual = (outFmt == Layout::NCHW ? 0 : c % inBlockSize); + const int outputBlockResidual = (dstDesc.hasLayoutType(LayoutType::ncsp) ? 0 : c % inBlockSize); const int outputBlockIdx = (c / outBlockSize) * outBlockSize; const int binOffsetInput = (roiBatchInd * inputChannelsPadding + gc) * height * width; const int binOffsetOutput = (n * outputChannelsPadding + outputBlockIdx) * nh * nw; @@ -297,8 +295,8 @@ void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType int cEnd = (blkIdx == outBlockCount - 1 ? nc : cStart + outBlockSize); for (int c = cStart; c < cEnd; c++) { const int gc = (c * groupSize + h) * groupSize + w; - const int inputBlockResidual = (inFmt == Layout::NCHW ? 0 : gc % inBlockSize); - const int outputBlockResidual = (outFmt == Layout::NCHW ? 0 : c % inBlockSize); + const int inputBlockResidual = (srcDesc.hasLayoutType(LayoutType::ncsp) ? 0 : gc % inBlockSize); + const int outputBlockResidual = (dstDesc.hasLayoutType(LayoutType::ncsp) ? 0 : c % inBlockSize); const int inputBlockIdx = (gc / inBlockSize) * inBlockSize; const int outputBlockIdx = (c / outBlockSize) * outBlockSize; const int binOffsetInput = (roiBatchInd * inputChannelsPadding + inputBlockIdx) * height * width; @@ -312,12 +310,11 @@ void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType template void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputType *dstData, const float *bottomRois, const int currentRoi, const int roiBatchInd, - const TensorDesc& srcDesc, const TensorDesc& dstDesc) { - Layout inFmt, outFmt; + const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc) { int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride; unsigned long inputChannelsPadding, outputChannelsPadding; unpackParams(srcDesc, dstDesc, hInputStride, wInputStride, hOutputStride, wOutputStride, - inFmt, outFmt, inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding); + inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding); const float roiStartW = bottomRois[1] * spatialScale; const float roiStartH = bottomRois[2] * spatialScale; const float roiEndW = bottomRois[3] * spatialScale; @@ -340,13 +337,14 @@ void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputTyp const float inY = nh > 1 ? (h * heightScale + boxYmin * (height - 1)) : 0.5f * (boxYmin + boxYmax) * (height - 1); for (size_t binX = 0; binX < spatialBinsX; binX++) { size_t gc = c + (binY * spatialBinsX + binX) * nc; - if (inFmt == Layout::NHWC) { + if (srcDesc.hasLayoutType(LayoutType::nspc)) { binOffIn = roiBatchInd * channels * height * width + gc; inBlkRes = 0; } else { // nchw, nChw16c, nChw8c const int inputBlockIdx = (gc / inBlockSize) * inBlockSize; binOffIn = (roiBatchInd * inputChannelsPadding + inputBlockIdx) * height * width; - inBlkRes = (inFmt == Layout::BLOCKED ? gc % inBlockSize : 0); + inBlkRes = ((srcDesc.hasLayoutType(LayoutType::nCsp16c) || srcDesc.hasLayoutType(LayoutType::nCsp8c)) + ? gc % inBlockSize : 0); } const auto *bottomData = srcData + binOffIn; @@ -386,14 +384,14 @@ void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputTyp dstData[dstIndex] = accum; }; - if (inFmt == Layout::NHWC) { + if (srcDesc.hasLayoutType(LayoutType::nspc)) { const int binOffsetOutput = currentRoi * nc * nh * nw; parallel_for2d(nh, nw, [&](int h, int w) { for (int c = 0; c < nc; c++) { bilinearPsroi(c, h, w, 0, binOffsetOutput + c); } }); - } else if (inFmt == Layout::NCHW) { + } else if (srcDesc.hasLayoutType(LayoutType::ncsp)) { parallel_for3d(nc, nh, nw, [&](int c, int h, int w) { bilinearPsroi(c, h, w, 0, (currentRoi * outputChannelsPadding + c) * binCount); }); @@ -404,7 +402,8 @@ void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputTyp for (int c = cStart; c < cEnd; c++) { const int outputBlockIdx = (c / inBlockSize) * inBlockSize; const int binOffsetOutput = (currentRoi * outputChannelsPadding + outputBlockIdx) * binCount; - const int outputBlockResidual = (inFmt == Layout::BLOCKED ? c % inBlockSize : 0); + const int outputBlockResidual = ((srcDesc.hasLayoutType(LayoutType::nCsp16c) || srcDesc.hasLayoutType(LayoutType::nCsp8c)) + ? c % inBlockSize : 0); bilinearPsroi(c, h, w, outputBlockResidual, binOffsetOutput); } }); @@ -480,8 +479,8 @@ void MKLDNNPSROIPoolingNode::executeSpecified() { const auto *bottomRoisBeginning = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto srcDesc = getParentEdgeAt(0)->getDesc(); - auto dstDesc = getChildEdgeAt(0)->getDesc(); + auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); int realRois = 0; for (; realRois < nn; realRois++) { @@ -497,7 +496,7 @@ void MKLDNNPSROIPoolingNode::executeSpecified() { int channelsEachClass = outputDim; if (!noTrans) { bottomTrans = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); - numClasses = static_cast(getParentEdgeAt(2)->getDesc().getDims()[1]) / 2; + numClasses = static_cast(getParentEdgeAt(2)->getShape().getStaticDims()[1]) / 2; channelsEachClass /= numClasses; } @@ -534,8 +533,8 @@ struct MKLDNNPSROIPoolingNode::PSROIPoolingExecute { }; void MKLDNNPSROIPoolingNode::execute(mkldnn::stream strm) { - auto inputPrec = getParentEdgesAtPort(0)[0]->getDesc().getPrecision(); - auto outputPrec = getChildEdgesAtPort(0)[0]->getDesc().getPrecision(); + auto inputPrec = getParentEdgesAtPort(0)[0]->getMemory().GetDesc().getPrecision(); + auto outputPrec = getChildEdgesAtPort(0)[0]->getMemory().GetDesc().getPrecision(); if (!((inputPrec == Precision::BF16 && outputPrec == Precision::BF16) || (inputPrec == Precision::FP32 && outputPrec == Precision::FP32))) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h index 24e015d3a6d..45f275fe1dd 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h @@ -50,10 +50,9 @@ private: std::string errorPrefix; - void unpackParams(const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc, + void unpackParams(const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc, int& hInputStride, int& wInputStride, int& hOutputStride, int& wOutputStride, - InferenceEngine::Layout& inFmt, InferenceEngine::Layout& outFmt, int& inBlockSize, int& outBlockSize, int& outBlockCount, unsigned long& inputChannelsPadding, unsigned long& outputChannelsPadding); @@ -61,12 +60,12 @@ private: template void executeAverage(const inputType *srcData, outputType *dstData, const float *bottomRois, const int n, const int roiBatchInd, - const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc); + const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc); template void executeBilinear(const inputType *srcData, outputType *dstData, const float *bottomRois, const int currentRoi, const int roiBatchInd, - const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc); + const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc); template void executeBilinearDeformable(const inputType *srcData, outputType *dstData, const float *bottomRois, diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp index 33e625fce6f..86818d36140 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include @@ -65,8 +63,8 @@ void MKLDNNRangeNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - std::vector inDataConf; - std::vector outDataConf; + std::vector inDataConf; + std::vector outDataConf; if (!(getOriginalInputPrecisionAtPort(RANGE_START) == Precision::I32 && getOriginalInputPrecisionAtPort(RANGE_LIMIT) == Precision::I32 && @@ -78,23 +76,23 @@ void MKLDNNRangeNode::initSupportedPrimitiveDescriptors() { getOriginalOutputPrecisionAtPort(0) == Precision::FP32)) { inDataConf.reserve(getOriginalInputsNumber()); for (int i = 0; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); outDataConf.reserve(1); - outDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + outDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, outDataConf, impl_desc_type::ref_any); } else { inDataConf.reserve(getOriginalInputsNumber()); for (int i = 0; i < getOriginalInputsNumber(); ++i) - inDataConf.emplace_back(TensorDescCreatorTypes::ncsp); + inDataConf.emplace_back(LayoutType::ncsp); outDataConf.reserve(1); - outDataConf.emplace_back(TensorDescCreatorTypes::ncsp); + outDataConf.emplace_back(LayoutType::ncsp); addSupportedPrimDesc(inDataConf, outDataConf, impl_desc_type::ref_any); } } void MKLDNNRangeNode::execute(mkldnn::stream strm) { StatusCode retcode = OK; - switch (getParentEdgeAt(0)->getDesc().getPrecision()) { + switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision()) { case Precision::FP32: retcode = rangeKernel(); break; @@ -112,7 +110,7 @@ void MKLDNNRangeNode::execute(mkldnn::stream strm) { template InferenceEngine::StatusCode MKLDNNRangeNode::rangeKernel() noexcept { - size_t dst_size = (getChildEdgesAtPort(0)[0]->getDims())[0]; + size_t dst_size = (getChildEdgesAtPort(0)[0]->getShape().getStaticDims())[0]; data_t* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); data_t start = reinterpret_cast(getParentEdgeAt(RANGE_START)->getMemoryPtr()->GetPtr())[0]; data_t limit = reinterpret_cast(getParentEdgeAt(RANGE_LIMIT)->getMemoryPtr()->GetPtr())[0]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp index 7828bc55f27..c76156ec4ae 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp @@ -1405,18 +1405,18 @@ void MKLDNNReduceNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " gets incorrect number of output edges!"; - if (getParentEdgeAt(REDUCE_INDEXES)->getDims().ndims() != 1) { + if (getParentEdgeAt(REDUCE_INDEXES)->getShape().getRank() != 1) { IE_THROW() << errorPrefix << " gets incorrect index vector dimension! Index vector should be 1 dimension."; } if (keep_dims) { - if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() != getChildEdgeAt(0)->getDims().ndims()) + if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() != getChildEdgeAt(0)->getShape().getRank()) IE_THROW() << errorPrefix << " gets incorrect number of input/output dimensions!"; } else { // In fact, after the Reduce operation, the shape must be a scalar if the previous one was 1d. // But for now, 0d tensor (scalar) is emulated as 1d tensor. Skip checking in such cases. - bool is_emulated_0d_as_1d = getParentEdgeAt(REDUCE_DATA)->getDims().ndims() == 1 && getChildEdgeAt(0)->getDims().ndims() == 1; - if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() <= getChildEdgeAt(0)->getDims().ndims() && !is_emulated_0d_as_1d) + bool is_emulated_0d_as_1d = getParentEdgeAt(REDUCE_DATA)->getShape().getRank() == 1 && getChildEdgeAt(0)->getShape().getRank() == 1; + if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() <= getChildEdgeAt(0)->getShape().getRank() && !is_emulated_0d_as_1d) IE_THROW() << errorPrefix << "gets incorrect number of input/output dimensions!"; } } @@ -1436,7 +1436,7 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { Precision inputPrecision = getOriginalInputPrecisionAtPort(REDUCE_DATA); Precision outputPrecision = getOriginalOutputPrecisionAtPort(0); - jit_mode = (mayiuse(cpu::x64::sse41)) && getParentEdgeAt(REDUCE_DATA)->getDims().ndims() <= 5 && + jit_mode = (mayiuse(cpu::x64::sse41)) && getParentEdgeAt(REDUCE_DATA)->getShape().getRank() <= 5 && std::find(std::begin(supportedPrecisions), std::end(supportedPrecisions), inputPrecision) != std::end(supportedPrecisions) && std::find(std::begin(supportedPrecisions), std::end(supportedPrecisions), outputPrecision) != std::end(supportedPrecisions); @@ -1461,7 +1461,7 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { src_data_size = MKLDNNExtensionUtils::sizeOfDataType(inputDataType); dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(outputDataType); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(2); config.outConfs.resize(1); @@ -1474,10 +1474,12 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { auto pushDesc = [&](memory::format_tag inFormat, memory::format_tag outFormat, memory::data_type inDataType, memory::data_type outDataType, impl_desc_type impl_type) { - config.inConfs[REDUCE_DATA].desc = MKLDNNMemoryDesc(getParentEdgeAt(REDUCE_DATA)->getDims(), inDataType, inFormat); - config.inConfs[REDUCE_INDEXES].desc = MKLDNNMemoryDesc(getParentEdgeAt(REDUCE_INDEXES)->getDims(), memory::data_type::s32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outDataType, outFormat); - supportedPrimitiveDescriptors.push_back({config, impl_type, outFormat}); + config.inConfs[REDUCE_DATA].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims(), + inDataType, inFormat); + config.inConfs[REDUCE_INDEXES].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(REDUCE_INDEXES)->getShape().getStaticDims(), + memory::data_type::s32, memory::format_tag::x); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outDataType, outFormat); + supportedPrimitiveDescriptors.push_back({config, impl_type}); }; if (jit_mode) { @@ -1488,16 +1490,16 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::jit_avx2; } - pushDesc(MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(REDUCE_DATA)->getDims().ndims())), - MKLDNNMemory::GetPlainFormat(memory::dims(getChildEdgeAt(0)->getDims().ndims())), inputDataType, outputDataType, impl_type); + pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(REDUCE_DATA)->getShape().getRank()), + MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), inputDataType, outputDataType, impl_type); if (keep_dims) { - if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() == 4 && getParentEdgeAt(REDUCE_DATA)->getDims().ToSizeVector()[1] > 1) { + if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() == 4 && getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims()[1] > 1) { if (mayiuse(cpu::x64::avx512_common)) { pushDesc(memory::format_tag::nChw16c, memory::format_tag::nChw16c, inputDataType, outputDataType, impl_type); } else if (mayiuse(cpu::x64::avx2) || mayiuse(cpu::x64::sse41)) { pushDesc(memory::format_tag::nChw8c, memory::format_tag::nChw8c, inputDataType, outputDataType, impl_type); } - } else if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() == 5 && getParentEdgeAt(REDUCE_DATA)->getDims().ToSizeVector()[1] > 1) { + } else if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() == 5 && getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims()[1] > 1) { if (mayiuse(cpu::x64::avx512_common)) { pushDesc(memory::format_tag::nCdhw16c, memory::format_tag::nCdhw16c, inputDataType, outputDataType, impl_type); } else if (mayiuse(cpu::x64::avx2) || mayiuse(cpu::x64::sse41)) { @@ -1506,8 +1508,8 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { } } } else { - pushDesc(MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(REDUCE_DATA)->getDims().ndims())), - MKLDNNMemory::GetPlainFormat(memory::dims(getChildEdgeAt(0)->getDims().ndims())), + pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(REDUCE_DATA)->getShape().getRank()), + MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), memory::data_type::f32, memory::data_type::f32, impl_desc_type::ref); } } @@ -1524,11 +1526,11 @@ void MKLDNNReduceNode::createPrimitive() { IE_THROW() << errorPrefix << " has nullable preferable primitive descriptor"; auto selectedPD = getSelectedPrimitiveDescriptor(); - planar_layout = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDesc().isPlainFormat(); + planar_layout = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp); auto jcp = jit_reduce_config_params(); - jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[REDUCE_DATA].desc.getPrecision()); - jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc.getPrecision()); + jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[REDUCE_DATA].desc->getPrecision()); + jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc->getPrecision()); jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt); jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt); jcp.planar_layout = planar_layout; @@ -1564,8 +1566,8 @@ void MKLDNNReduceNode::execute(mkldnn::stream strm) { const auto idx_data = reinterpret_cast(srcIndexesMemPtr->GetData()); size_t dst_size = dstMemPtr->GetSize(); - src_dims = getParentEdgeAt(REDUCE_DATA)->getDesc().getDims(); - src_strides = getParentEdgeAt(REDUCE_DATA)->getDesc().getBlockingDesc().getStrides(); + src_dims = getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims(); + src_strides = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDescWithType().getStrides(); dims_size = src_dims.size(); calc_process_dst_dims(idx_data); @@ -1930,9 +1932,9 @@ inline void MKLDNNReduceNode::init_dst_data(uint8_t *out_ptr, size_t dst_size) { inline void MKLDNNReduceNode::calc_process_dst_dims(const int32_t *idx_data) { SizeVector out_dims; - SizeVector dst_dims = getChildEdgeAt(0)->getDesc().getDims(); + SizeVector dst_dims = getChildEdgeAt(0)->getShape().getStaticDims(); std::set axes; - for (size_t i = 0; i < getParentEdgeAt(REDUCE_INDEXES)->getDims()[0]; i++) { + for (size_t i = 0; i < getParentEdgeAt(REDUCE_INDEXES)->getShape().getStaticDims()[0]; i++) { int32_t axis = idx_data[i]; if (axis < 0) axis += src_dims.size(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp index e4950732ab6..f7ddad8b679 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp @@ -6,6 +6,7 @@ #include #include #include +#include "common/blocked_desc_creator.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -28,45 +29,32 @@ void MKLDNNReferenceNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - InferenceEngine::LayerConfig config; - for (size_t i = 0; i < inDims.size(); i++) { - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - - dataConfig.desc = MKLDNNMemoryDesc(inDims[i], - MKLDNNExtensionUtils::IEPrecisionToDataType(convertPrecision(ngraphOp->get_input_element_type(i))), - MKLDNNMemory::GetPlainFormat(inDims[i])); - - config.inConfs.push_back(dataConfig); + std::vector inputConfigurators; + inputConfigurators.reserve(inputShapes.size()); + for (size_t i = 0; i < inputShapes.size(); i++) { + inputConfigurators.emplace_back(LayoutType::ncsp, convertPrecision(ngraphOp->get_input_element_type(i)), inputShapes[i]); } - for (size_t i = 0; i < outDims.size(); i++) { - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - - dataConfig.desc = MKLDNNMemoryDesc(outDims[i], - MKLDNNExtensionUtils::IEPrecisionToDataType(convertPrecision(ngraphOp->get_output_element_type(i))), - MKLDNNMemory::GetPlainFormat(outDims[i])); - - config.outConfs.push_back(dataConfig); + std::vector outputConfigurators; + outputConfigurators.reserve(inputShapes.size()); + for (size_t i = 0; i < outputShapes.size(); i++) { + outputConfigurators.emplace_back(LayoutType::ncsp, convertPrecision(ngraphOp->get_output_element_type(i)), outputShapes[i]); } - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, memory::format_tag::undef}); + addSupportedPrimDesc(inputConfigurators, outputConfigurators, impl_desc_type::ref); } void MKLDNNReferenceNode::createPrimitive() {} void MKLDNNReferenceNode::execute(mkldnn::stream strm) { ngraph::HostTensorVector inputs; - for (size_t i = 0; i < inDims.size(); i++) { + for (size_t i = 0; i < inputShapes.size(); i++) { void *srcDataPtr = getParentEdgesAtPort(i)[0]->getMemory().GetPtr(); inputs.push_back(std::make_shared(ngraphOp->get_input_element_type(i), ngraphOp->get_input_shape(i), srcDataPtr)); } ngraph::HostTensorVector outputs; - for (size_t i = 0; i < outDims.size(); i++) { + for (size_t i = 0; i < outputShapes.size(); i++) { void *dstDataPtr = getChildEdgesAtPort(i)[0]->getMemory().GetPtr(); outputs.push_back(std::make_shared(ngraphOp->get_output_element_type(i), ngraphOp->get_output_shape(i), dstDataPtr)); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp index af1159bb07d..c140baa88c5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp @@ -8,7 +8,7 @@ #include #include "ie_parallel.hpp" #include "mkldnn_region_yolo_node.h" -#include +#include #include #include "common/cpu_convert.h" #include @@ -291,8 +291,8 @@ void MKLDNNRegionYoloNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::ref; } - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, input_prec}}, - {{TensorDescCreatorTypes::ncsp, output_prec}}, + addSupportedPrimDesc({{LayoutType::ncsp, input_prec}}, + {{LayoutType::ncsp, output_prec}}, impl_type); } @@ -367,13 +367,10 @@ inline void MKLDNNRegionYoloNode::calculate_logistic(size_t start_index, int cou } void MKLDNNRegionYoloNode::execute(mkldnn::stream strm) { - auto inputDesc = getParentEdgeAt(0)->getDesc(); - auto outputDesc = getChildEdgeAt(0)->getDesc(); - - size_t B = (inputDesc.getDims().size() > 0) ? inputDesc.getDims()[0] : 1; - size_t IC = (inputDesc.getDims().size() > 1) ? inputDesc.getDims()[1] : 1; - size_t IH = (inputDesc.getDims().size() > 2) ? inputDesc.getDims()[2] : 1; - size_t IW = (inputDesc.getDims().size() > 3) ? inputDesc.getDims()[3] : 1; + size_t B = (getParentEdgeAt(0)->getShape().getRank() > 0) ? getParentEdgeAt(0)->getShape().getStaticDims()[0] : 1; + size_t IC = (getParentEdgeAt(0)->getShape().getRank() > 1) ? getParentEdgeAt(0)->getShape().getStaticDims()[1] : 1; + size_t IH = (getParentEdgeAt(0)->getShape().getRank() > 2) ? getParentEdgeAt(0)->getShape().getStaticDims()[2] : 1; + size_t IW = (getParentEdgeAt(0)->getShape().getRank() > 3) ? getParentEdgeAt(0)->getShape().getStaticDims()[3] : 1; size_t mask_size = mask.size(); int end_index = 0; @@ -400,7 +397,8 @@ void MKLDNNRegionYoloNode::execute(mkldnn::stream strm) { const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - cpu_convert(src_data, dst_data, inputDesc.getPrecision(), outputDesc.getPrecision(), output_size); + cpu_convert(src_data, dst_data, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), + getChildEdgeAt(0)->getMemory().GetDesc().getPrecision(), output_size); for (int b = 0; b < B; b++) { for (int n = 0; n < num_; n++) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp index c318468ef1e..99bd606a9a0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp @@ -25,10 +25,6 @@ MKLDNNReorderNode::MKLDNNReorderNode(const std::string& name, const mkldnn::engi MKLDNNNode("Reorder", name, eng, w_cache) { } void MKLDNNReorderNode::getSupportedDescriptors() { - if (outDims.empty() && output.getLayout() != InferenceEngine::Layout::ANY) - outDims.push_back(MKLDNNDims(output.getDims())); - if (inDims.empty() && input.getLayout() != InferenceEngine::Layout::ANY) - inDims.push_back(MKLDNNDims(input.getDims())); if (getParentEdges().size() != 1) IE_THROW() << "Incorrect number of input edges for layer " << getName(); if (getChildEdges().empty()) @@ -39,13 +35,10 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - auto inputDataType = MKLDNNMemoryDesc(input).getDataType(); - auto outputDataType = MKLDNNMemoryDesc(output).getDataType(); - auto parent = getParentEdgeAt(0)->getParent(); auto child = getChildEdgeAt(0)->getChild(); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(1); config.outConfs.resize(1); @@ -57,19 +50,18 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() { config.inConfs[0].inPlace = 0; config.outConfs[0].inPlace = 0; } - if (input.getLayout() != InferenceEngine::Layout::ANY && output.getLayout() != InferenceEngine::Layout::ANY) { - config.inConfs[0].desc = input; - config.outConfs[0].desc = output; + if (input && output) { + config.inConfs[0].desc = input->clone(); + config.outConfs[0].desc = output->clone(); } else if (parent->getSelectedPrimitiveDescriptor() != nullptr && child->getSelectedPrimitiveDescriptor() != nullptr) { - config.inConfs[0].desc = parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc; - config.outConfs[0].desc = child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc; + config.inConfs[0].desc = parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc->clone(); + config.outConfs[0].desc = child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->clone(); } else { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::any); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::any); + IE_THROW() << "Cannot initialize supported PDs for Reorder node with name `" << getName() << "`"; } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::reorder, MKLDNNMemory::Convert(config.outConfs[0].desc.getLayout())); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::reorder); } void MKLDNNReorderNode::createPrimitive() { @@ -82,21 +74,23 @@ void MKLDNNReorderNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; + auto inDims = getParentEdgeAt(0)->getShape().getStaticDims(); + if (!isOptimized) { - if (MKLDNNPlugin::one_of(getParentEdgeAt(0)->getDims().ndims(), 4, 5) && - getParentEdgeAt(0)->getDims()[1] <= 64 && - getParentEdgeAt(0)->getDims()[1] >= 16 && - (getParentEdgeAt(0)->getMemory().GetElementsCount() / getParentEdgeAt(0)->getDims()[1]) >= 128 && - getParentEdgeAt(0)->getMemory().GetDesc().isTailCFormat() && - getChildEdgeAt(0)->getMemory().GetDesc().isPlainFormat() && - getParentEdgeAt(0)->getMemory().GetDesc().getDataType() == memory::data_type::f32 && - getChildEdgeAt(0)->getMemory().GetDesc().getDataType() == memory::data_type::f32) { + if (MKLDNNPlugin::one_of(inDims.size(), 4, 5) && + inDims[1] <= 64 && + inDims[1] >= 16 && + (getParentEdgeAt(0)->getMemory().GetElementsCount() / inDims[1]) >= 128 && + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc) && + getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp) && + getParentEdgeAt(0)->getMemory().GetDesc().getPrecision() == Precision::FP32 && + getChildEdgeAt(0)->getMemory().GetDesc().getPrecision() == Precision::FP32) { // oneDNN JIT reorder shows bad perf for nspc to ncsp reorder case so we fallback on simple c++ implementation canUseOptimizedNspc2Ncsp = true; } else if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) && - MKLDNNPlugin::one_of(getParentEdgeAt(0)->getDims().ndims(), 4, 5) && - getParentEdgeAt(0)->getMemory().GetDesc().isPlainFormat() && - getChildEdgeAt(0)->getMemory().GetDesc().isTailCFormat() && + MKLDNNPlugin::one_of(inDims.size(), 4, 5) && + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp) && + getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc) && getParentEdgeAt(0)->getMemory().GetDataType() == getChildEdgeAt(0)->getMemory().GetDataType() && MKLDNNExtensionUtils::sizeOfDataType(getParentEdgeAt(0)->getMemory().GetDataType()) == 1) { // oneDNN doesn't provide JIT reorder impl for non-avx2 targets so we fallback on simple c++ implementation which shows better perf @@ -110,29 +104,12 @@ void MKLDNNReorderNode::createPrimitive() { void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDesc, void* srcPtr, const mkldnn::memory::desc &dstDesc, void* dstPtr) { src_blocked = std::make_shared(getEngine()); - src_blocked->Create(srcDesc, srcPtr, false); + src_blocked->Create(MKLDNNMemoryDesc(srcDesc), srcPtr, false); dst_blocked = std::make_shared(getEngine()); - dst_blocked->Create(dstDesc, dstPtr, false); + dst_blocked->Create(MKLDNNMemoryDesc(dstDesc), dstPtr, false); mkldnn::primitive_attr attr; - - if (_scales) { - std::vector scales; - - float* scaleData = static_cast(_scales->buffer()); - - for (size_t i = 0; i < _scales->size(); i++) { - scales.push_back(scaleData[i]); - } - - int mask = 0; - int oc_dim_id = 1; - mask = 1 << oc_dim_id; - - attr.set_output_scales(mask, scales); - } - auto createReorder = [&]() -> bool { // No autoblocking. Reorder can be applied as is reorder::primitive_desc pd = mkldnn::reorder::primitive_desc(src_blocked->GetPrimitive(), dst_blocked->GetPrimitive(), attr, true); @@ -159,13 +136,13 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDe // MKLDNN doesn't support direct reorders from planar data formats to grouped weights formats. // Code block below tries to detect such cases and reinterpret data planar formats (e.g. nchw) // as grouped weights planar formats (e.g. goihw) since they have same physical memory layout. - if (src_blocked->GetDesc().isPlainFormat() && + if (src_blocked->GetDesc().hasLayoutType(LayoutType::ncsp) && src_blocked->GetDims().size() + 1 == dst_blocked->GetDims().size()) { const auto newDims = dst_blocked->GetDims(); - const auto newFormat = MKLDNNMemory::GetPlainFormat(newDims); + const auto newFormat = MKLDNNMemory::GetPlainFormatByRank(newDims.size()); auto newDesc = mkldnn::memory::desc(newDims, src_blocked->GetDataType(), newFormat); - src_blocked->Create(newDesc, srcPtr, false); + src_blocked->Create(MKLDNNMemoryDesc(newDesc), srcPtr, false); success = createReorder(); } @@ -192,12 +169,14 @@ bool MKLDNNReorderNode::created() const { void MKLDNNReorderNode::optimizedNcsp2Nspc() { auto parentEdge = getParentEdgeAt(0); auto childEdge = getChildEdgeAt(0); - const int ndims = parentEdge->getDims().ndims(); - const size_t DIM0 = parentEdge->getDims()[0]; - const size_t DIM1 = parentEdge->getDims()[1]; - const size_t DIM2 = ndims == 5 ? parentEdge->getDims()[ndims - 3] : 1; - const size_t DIM3 = parentEdge->getDims()[ndims - 2]; - const size_t DIM4 = parentEdge->getDims()[ndims - 1]; + + auto inDims = parentEdge->getShape().getStaticDims(); + const size_t ndims = inDims.size(); + const size_t DIM0 = inDims[0]; + const size_t DIM1 = inDims[1]; + const size_t DIM2 = ndims == 5 ? inDims[ndims - 3] : 1; + const size_t DIM3 = inDims[ndims - 2]; + const size_t DIM4 = inDims[ndims - 1]; auto src_data = reinterpret_cast(parentEdge->getMemoryPtr()->GetPtr()); auto dst_data = reinterpret_cast(childEdge->getMemoryPtr()->GetPtr()); @@ -221,12 +200,14 @@ void MKLDNNReorderNode::optimizedNcsp2Nspc() { void MKLDNNReorderNode::optimizedNspc2Ncsp() { auto parentEdge = getParentEdgeAt(0); auto childEdge = getChildEdgeAt(0); - const int ndims = parentEdge->getDims().ndims(); - const size_t DIM0 = parentEdge->getDims()[0]; - const size_t DIM1 = parentEdge->getDims()[1]; - const size_t DIM2 = ndims == 5 ? parentEdge->getDims()[ndims - 3] : 1; - const size_t DIM3 = parentEdge->getDims()[ndims - 2]; - const size_t DIM4 = parentEdge->getDims()[ndims - 1]; + + auto inDims = parentEdge->getShape().getStaticDims(); + const size_t ndims = inDims.size(); + const size_t DIM0 = inDims[0]; + const size_t DIM1 = inDims[1]; + const size_t DIM2 = ndims == 5 ? inDims[ndims - 3] : 1; + const size_t DIM3 = inDims[ndims - 2]; + const size_t DIM4 = inDims[ndims - 1]; auto src_data = reinterpret_cast(parentEdge->getMemoryPtr()->GetPtr()); auto dst_data = reinterpret_cast(childEdge->getMemoryPtr()->GetPtr()); @@ -279,4 +260,20 @@ void MKLDNNReorderNode::setDynamicBatchLim(int lim) { createReorderPrimitive(src_d, src_data_hdl, dst_d, dst_data_hdl); } } + +std::string MKLDNNReorderNode::getReorderArgs(const MemoryDesc &parentDesc, const MemoryDesc &childDesc) { + std::string inArgs, outArgs; + if (parentDesc.getPrecision() != childDesc.getPrecision()) { + inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name()); + outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name()); + } + auto formatSrc = parentDesc.serializeFormat(); + auto formatDst = childDesc.serializeFormat(); + if (formatSrc != formatDst || one_of(std::string("undef"), formatSrc, formatDst)) { + inArgs += (inArgs.empty() ? "" : "_") + formatSrc; + outArgs += (outArgs.empty() ? "" : "_") + formatDst; + } + return inArgs + "_" + outArgs; +} + REG_MKLDNN_PRIM_FOR(MKLDNNReorderNode, Reorder); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h index 729097453fb..da821878035 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace MKLDNNPlugin { @@ -24,9 +25,14 @@ public: bool created() const override; const std::vector& getPrimitivesPriority() override; - void setDescs(const InferenceEngine::TensorDesc& input, const InferenceEngine::TensorDesc& output) { - this->input = input; - this->output = output; + void setDescs(const MemoryDesc& input, const MemoryDesc& output) { + this->input = input.clone(); + inputShapes.clear(); + inputShapes.push_back(this->input->getShape()); + + this->output = output.clone(); + outputShapes.clear(); + outputShapes.push_back(this->output->getShape()); } void setOptimized(bool isOptimized) { @@ -39,17 +45,14 @@ public: return false; } - const InferenceEngine::TensorDesc& getInput() { return input; } - const InferenceEngine::TensorDesc& getOutput() { return output; } + const MemoryDesc& getInput() { return *input; } + const MemoryDesc& getOutput() { return *output; } - /** - * @brief A pointer to a scales blob - */ - InferenceEngine::Blob::Ptr _scales; + static std::string getReorderArgs(const MemoryDesc &parentDesc, const MemoryDesc &childDesc); private: - InferenceEngine::TensorDesc input; - InferenceEngine::TensorDesc output; + std::unique_ptr input; + std::unique_ptr output; MKLDNNMemoryPtr dst_blocked; MKLDNNMemoryPtr src_blocked; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp index 3db7470e92f..48e2eaf9992 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include @@ -48,8 +46,8 @@ void MKLDNNReorgYoloNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } @@ -57,10 +55,10 @@ void MKLDNNReorgYoloNode::execute(mkldnn::stream strm) { const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - int IW = (getParentEdgeAt(0)->getDesc().getDims().size() > 3) ? getParentEdgeAt(0)->getDims()[3] : 1; - int IH = (getParentEdgeAt(0)->getDesc().getDims().size() > 2) ? getParentEdgeAt(0)->getDims()[2] : 1; - int IC = (getParentEdgeAt(0)->getDesc().getDims().size() > 1) ? getParentEdgeAt(0)->getDims()[1] : 1; - int B = (getParentEdgeAt(0)->getDesc().getDims().size() > 0) ? getParentEdgeAt(0)->getDims()[0] : 1; + int IW = (getParentEdgeAt(0)->getShape().getRank() > 3) ? getParentEdgeAt(0)->getShape().getStaticDims()[3] : 1; + int IH = (getParentEdgeAt(0)->getShape().getRank() > 2) ? getParentEdgeAt(0)->getShape().getStaticDims()[2] : 1; + int IC = (getParentEdgeAt(0)->getShape().getRank() > 1) ? getParentEdgeAt(0)->getShape().getStaticDims()[1] : 1; + int B = (getParentEdgeAt(0)->getShape().getRank() > 0) ? getParentEdgeAt(0)->getShape().getStaticDims()[0] : 1; int ic_off = IC / (stride * stride); int ih_off = IH * stride; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp index 543e0a86bcb..81175dcaf41 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp @@ -35,18 +35,18 @@ void MKLDNNReshapeNode::initSupportedPrimitiveDescriptors() { if (inputDataType != outputDataType) inputDataType = outputDataType; - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(getParentEdges().size()); for (size_t i = 0; i getDims(), inputDataType); + config.inConfs[i].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(i)->getShape().getStaticDims(), inputDataType); } config.outConfs.resize(1); config.outConfs[0].inPlace = 0; config.outConfs[0].constant = false; - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp index 5f6e6083e90..ffa831a670d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base.hpp" - #include #include @@ -85,9 +83,9 @@ void MKLDNNReverseSequenceNode::initSupportedPrimitiveDescriptors() { if (lengthsPrecision != Precision::I32 && lengthsPrecision != Precision::FP32) lengthsPrecision = Precision::I32; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, lengthsPrecision}}, - {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, lengthsPrecision}}, + {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); } @@ -96,7 +94,7 @@ void MKLDNNReverseSequenceNode::execute(mkldnn::stream strm) { const float *src_data = reinterpret_cast(getParentEdgeAt(REVERSESEQUENCE_DATA)->getMemoryPtr()->GetPtr()); float* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - switch (getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getDesc().getPrecision()) { + switch (getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().GetDesc().getPrecision()) { case Precision::FP32: { float *seq_lengths_data = reinterpret_cast(getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemoryPtr()->GetPtr()); for (i = 0; i < src_dims[batch_axis]; i++) { @@ -171,7 +169,7 @@ void MKLDNNReverseSequenceNode::execute(mkldnn::stream strm) { break; default: IE_THROW() << "ReverseSequence layer does not support " - << getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getDesc().getPrecision() << " precision"; + << getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().GetDesc().getPrecision() << " precision"; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp index a85544e9e96..91201da8592 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp @@ -260,19 +260,19 @@ void MKLDNNRNN::initCell(const std::shared_ptr& op) { Gb = (cell_type != mkldnn::algorithm::lbr_gru) ? G : G + 1; // Expected shapes - MKLDNNDims D_shape {N, DC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC}; + std::vector D_shape {N, DC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC}; - if (in_data_dims != D_shape.ToSizeVector() - || in_h_state_dims != S_shape.ToSizeVector() - || out_h_state_dims != S_shape.ToSizeVector()) + if (in_data_dims != D_shape + || in_h_state_dims != S_shape + || out_h_state_dims != S_shape) IE_THROW() << "Incorrect shape of input/output ports for layer " << getName(); if (S == 2) { auto in_c_state_dims = op->get_input_shape(2); auto out_c_state_dims = op->get_output_shape(1); - if (in_c_state_dims != S_shape.ToSizeVector() - || out_c_state_dims != S_shape.ToSizeVector()) + if (in_c_state_dims != S_shape + || out_c_state_dims != S_shape) IE_THROW() << "Incorrect shape of input/output ports for layer " << getName(); } } @@ -281,52 +281,57 @@ void MKLDNNRNN::fillCellDesc() { runtimePrecision = getOriginalInputPrecisionAtPort(0); auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision); - MKLDNNDims S_4D_shape {L, D, N, SC}; + std::vector S_4D_shape {L, D, N, SC}; // layer input plus states - in_data_d.resize(S + 1); - out_data_d.resize(S + 1); + in_data_d.reserve(S + 1); + out_data_d.reserve(S + 1); // Shapes and Attributes are correct. Can start internal stuff initialization. - in_data_d[RNNInOutKind::Layer] = {MKLDNNDims{T, N, DC}, dataType, memory::format_tag::tnc}; - out_data_d[RNNInOutKind::Layer] = {MKLDNNDims{T, N, SC}, dataType, memory::format_tag::tnc}; + in_data_d.emplace_back(std::vector{T, N, DC}, dataType, memory::format_tag::tnc); + out_data_d.emplace_back(std::vector{T, N, SC}, dataType, memory::format_tag::tnc); - in_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc}; - out_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc}; + in_data_d.emplace_back(S_4D_shape, dataType, memory::format_tag::ldnc); + out_data_d.emplace_back(S_4D_shape, dataType, memory::format_tag::ldnc); if (haveCellState(cell_type)) { - in_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc}; - out_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc}; + in_data_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc); + out_data_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc); } - w_data_d = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo}; - w_state_d = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo}; + w_data_d = MKLDNNPlugin::make_unique(std::vector{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo); + w_state_d = MKLDNNPlugin::make_unique(std::vector{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo); // Add 5th input - w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo}; + w_bias_d = MKLDNNPlugin::make_unique(std::vector{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo); copyWeightsData(); // Expected shapes - MKLDNNDims D_shape {N, DC}, S_shape {N, SC}, WShape {SC * G, DC}, RShape {SC * G, SC}, BShape {SC * Gb}; - std::vector in_candidate, out_candidate; + std::vector D_shape {N, DC}, S_shape {N, SC}, WShape {SC * G, DC}, RShape {SC * G, SC}, BShape {SC * Gb}; + std::vector in_candidate, out_candidate; in_candidate.reserve(6); - in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, dataType, memory::format_tag::nc}); - in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc}); - out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc}); + in_candidate.emplace_back(D_shape, dataType, memory::format_tag::nc); + in_candidate.emplace_back(S_shape, dataType, memory::format_tag::nc); + out_candidate.emplace_back(S_shape, dataType, memory::format_tag::nc); if (haveCellState(cell_type)) { - in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc}); - out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc}); + in_candidate.emplace_back(S_shape, memory::data_type::f32, memory::format_tag::nc); + out_candidate.emplace_back(S_shape, memory::data_type::f32, memory::format_tag::nc); } if (one_of(cell_type, mkldnn::algorithm::vanilla_rnn, mkldnn::algorithm::vanilla_gru, mkldnn::algorithm::lbr_gru, mkldnn::algorithm::vanilla_lstm)) { - in_candidate.emplace_back(MKLDNNMemoryDesc {WShape, memory::data_type::f32, memory::format_tag::nc}); - in_candidate.emplace_back(MKLDNNMemoryDesc {RShape, memory::data_type::f32, memory::format_tag::nc}); - in_candidate.emplace_back(MKLDNNMemoryDesc {BShape, memory::data_type::f32, memory::format_tag::x}); + in_candidate.emplace_back(WShape, memory::data_type::f32, memory::format_tag::nc); + in_candidate.emplace_back(RShape, memory::data_type::f32, memory::format_tag::nc); + in_candidate.emplace_back(BShape, memory::data_type::f32, memory::format_tag::x); } - createDescriptor(in_candidate, out_candidate); + std::vector in_candidate_ptrs(in_candidate.size()); + std::vector out_candidate_ptrs(out_candidate.size()); + std::transform(in_candidate.begin(), in_candidate.end(), in_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); + std::transform(out_candidate.begin(), out_candidate.end(), out_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); + + createDescriptor(in_candidate_ptrs, out_candidate_ptrs); } void MKLDNNRNN::initSeq(const std::shared_ptr& op) { @@ -373,64 +378,71 @@ void MKLDNNRNN::initSeq(const std::shared_ptr& op) { Gb = (cell_type != mkldnn::algorithm::lbr_gru) ? G : G + 1; // layer input plus states - in_data_d.resize(S + 1); - out_data_d.resize(S + 1); + in_data_d.reserve(S + 1); + out_data_d.reserve(S + 1); } void MKLDNNRNN::fillSeqDesc() { runtimePrecision = getOriginalInputPrecisionAtPort(0); auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision); - MKLDNNDims S_4D_shape {L, D, N, SC}; + std::vector S_4D_shape {L, D, N, SC}; // Try to create descriptor and corresponding configuration - in_data_d[RNNInOutKind::Layer] = {MKLDNNDims{in_data_dims}, dataType, memory::format_tag::tnc}; - out_data_d[RNNInOutKind::Layer] = {MKLDNNDims{out_data_dims}, dataType, memory::format_tag::tnc}; + in_data_d.emplace_back(std::vector{in_data_dims}, dataType, memory::format_tag::tnc); + out_data_d.emplace_back(std::vector{out_data_dims}, dataType, memory::format_tag::tnc); - in_data_d[RNNInOutKind::HiddenState] = {MKLDNNDims{S_4D_shape}, dataType, memory::format_tag::ldnc}; - out_data_d[RNNInOutKind::HiddenState] = {MKLDNNDims{S_4D_shape}, dataType, memory::format_tag::ldnc}; + in_data_d.emplace_back(std::vector{S_4D_shape}, dataType, memory::format_tag::ldnc); + out_data_d.emplace_back(std::vector{S_4D_shape}, dataType, memory::format_tag::ldnc); if (haveCellState(cell_type)) { - in_data_d[RNNInOutKind::CellState] = {MKLDNNDims{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc}; - out_data_d[RNNInOutKind::CellState] = {MKLDNNDims{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc}; + in_data_d.emplace_back(std::vector{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc); + out_data_d.emplace_back(std::vector{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc); } - w_data_d = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo}; - w_state_d = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo}; + w_data_d = MKLDNNPlugin::make_unique(std::vector{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo); + w_state_d = MKLDNNPlugin::make_unique(std::vector{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo); - w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo}; + w_bias_d = MKLDNNPlugin::make_unique(std::vector{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo); copyWeightsData(); - std::vector in_candidate; + std::vector in_candidate; + in_candidate.reserve(7); if (nativeOrder) - in_candidate.push_back(MKLDNNMemoryDesc{inDims[RNNInOutKind::Layer], dataType, memory::format_tag::tnc}); + in_candidate.emplace_back(inputShapes[RNNInOutKind::Layer].getStaticDims(), dataType, memory::format_tag::tnc); else - in_candidate.push_back(MKLDNNMemoryDesc{{N, T, DC}, dataType, memory::format_tag::ntc}); + in_candidate.emplace_back(std::vector{N, T, DC}, dataType, memory::format_tag::ntc); - in_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, dataType, memory::format_tag::ntc}); // initial hidden state + in_candidate.emplace_back(std::vector{N, D, SC}, dataType, memory::format_tag::ntc); // initial hidden state if (haveCellState(cell_type)) - in_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc}); // initial cell state - in_candidate.push_back(MKLDNNMemoryDesc{{N}, memory::data_type::s32, memory::format_tag::x}); // sequence lengths - in_candidate.push_back(MKLDNNMemoryDesc{{D, G * SC, DC}, memory::data_type::f32, memory::format_tag::ntc}); // W - in_candidate.push_back(MKLDNNMemoryDesc{{D, G * SC, SC}, memory::data_type::f32, memory::format_tag::ntc}); // R - in_candidate.push_back(MKLDNNMemoryDesc{{D, Gb * SC}, memory::data_type::f32, memory::format_tag::nc}); // B + in_candidate.emplace_back(std::vector{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc); // initial cell state + in_candidate.emplace_back(std::vector{N}, memory::data_type::s32, memory::format_tag::x); // sequence lengths + in_candidate.emplace_back(std::vector{D, G * SC, DC}, memory::data_type::f32, memory::format_tag::ntc); // W + in_candidate.emplace_back(std::vector{D, G * SC, SC}, memory::data_type::f32, memory::format_tag::ntc); // R + in_candidate.emplace_back(std::vector{D, Gb * SC}, memory::data_type::f32, memory::format_tag::nc); // B - std::vector out_candidate; + std::vector out_candidate; + out_candidate.reserve(3); if (nativeOrder) { - out_candidate.push_back(out_data_d[RNNInOutKind::Layer]); + out_candidate.emplace_back(out_data_d[RNNInOutKind::Layer]); } else { // TODO reorder ntc -> ndtc does not work, thus use tnc(plain) + transformation reshape-transpose-reshape for now. - out_candidate.push_back(MKLDNNMemoryDesc{{T, N, SC}, dataType, memory::format_tag::tnc}); + out_candidate.emplace_back(std::vector{T, N, SC}, dataType, memory::format_tag::tnc); } - out_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, dataType, memory::format_tag::ntc}); + out_candidate.emplace_back(std::vector{N, D, SC}, dataType, memory::format_tag::ntc); if (haveCellState(cell_type)) - out_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc}); + out_candidate.emplace_back(std::vector{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc); - createDescriptor(in_candidate, out_candidate); + std::vector in_candidate_ptrs(in_candidate.size()); + std::vector out_candidate_ptrs(out_candidate.size()); + std::transform(in_candidate.begin(), in_candidate.end(), in_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); + std::transform(out_candidate.begin(), out_candidate.end(), out_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); + + createDescriptor(in_candidate_ptrs, out_candidate_ptrs); } bool MKLDNNRNN::verifyWeightsPrecision(const Precision &layerPrec, const Precision &weightsPrec) { @@ -447,14 +459,14 @@ void MKLDNNRNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t } // create weight blobs (data and state part) auto w_data_mem = std::make_shared(getEngine()); - w_data_mem->Create(w_data_d); + w_data_mem->Create(*w_data_d); internalBlobMemory.push_back(w_data_mem); auto w_state_mem = std::make_shared(getEngine()); - w_state_mem->Create(w_state_d); + w_state_mem->Create(*w_state_d); internalBlobMemory.push_back(w_state_mem); - const size_t ie_w_vec_size = getParentEdgesAtPort(wIdx)[0]->getDims().size(); - const size_t ie_r_vec_size = getParentEdgesAtPort(rIdx)[0]->getDims().size(); + const size_t ie_w_vec_size = getParentEdgesAtPort(wIdx)[0]->getShape().getElementsCount(); + const size_t ie_r_vec_size = getParentEdgesAtPort(rIdx)[0]->getShape().getElementsCount(); auto *wInputNode = dynamic_cast(getParentEdgesAtPort(wIdx)[0]->getParent().get()); auto wConstBlob = wInputNode->getMemoryPtr(); @@ -504,7 +516,7 @@ void MKLDNNRNN::fillBiases(const int *gate_map) { } auto w_bias_mem = std::make_shared(getEngine()); - w_bias_mem->Create(w_bias_d); + w_bias_mem->Create(*w_bias_d); internalBlobMemory.push_back(w_bias_mem); auto *constInputNode = dynamic_cast(getParentEdgesAtPort(bIdx)[0]->getParent().get()); @@ -590,18 +602,17 @@ void MKLDNNRNN::copyWeightsData() { if (runtimePrecision == Precision::BF16 || runtimePrecision == Precision::FP32) fillBiases(gate_map); } - -void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { +void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { switch (cell_type) { case mkldnn::algorithm::vanilla_rnn: { MKLDNNDescriptor desc(std::shared_ptr( new vanilla_rnn_forward::desc(prop_kind::forward_scoring, cell_act, direction, /* In Data */ in_data_d[RNNInOutKind::Layer], /* In State */ in_data_d[RNNInOutKind::HiddenState], - /* Weights data */ w_data_d, - /* Weights state */ w_state_d, - /* Bias */ w_bias_d, + /* Weights data */ *w_data_d, + /* Weights state */ *w_state_d, + /* Bias */ *w_bias_d, /* Out Data */ out_data_d[RNNInOutKind::Layer], /* Out State */ out_data_d[RNNInOutKind::HiddenState]))); descs.push_back(desc); @@ -611,9 +622,9 @@ void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, new gru_forward::desc(prop_kind::forward_scoring, direction, /* In Data */ in_data_d[RNNInOutKind::Layer], /* In State */ in_data_d[RNNInOutKind::HiddenState], - /* Weights data */ w_data_d, - /* Weights state */ w_state_d, - /* Bias */ w_bias_d, + /* Weights data */ *w_data_d, + /* Weights state */ *w_state_d, + /* Bias */ *w_bias_d, /* Out Data */ out_data_d[RNNInOutKind::Layer], /* Out State */ out_data_d[RNNInOutKind::HiddenState]))); descs.push_back(desc); @@ -623,9 +634,9 @@ void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, new lbr_gru_forward::desc(prop_kind::forward_scoring, direction, /* In Data */ in_data_d[RNNInOutKind::Layer], /* In State */ in_data_d[RNNInOutKind::HiddenState], - /* Weights data */ w_data_d, - /* Weights state */ w_state_d, - /* Bias */ w_bias_d, + /* Weights data */ *w_data_d, + /* Weights state */ *w_state_d, + /* Bias */ *w_bias_d, /* Out Data */ out_data_d[RNNInOutKind::Layer], /* Out State */ out_data_d[RNNInOutKind::HiddenState]))); descs.push_back(desc); @@ -636,9 +647,9 @@ void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, /* In Data */ in_data_d[RNNInOutKind::Layer], /* In State */ in_data_d[RNNInOutKind::HiddenState], /* In State C */ in_data_d[RNNInOutKind::CellState], - /* Weights data */ w_data_d, - /* Weights state */ w_state_d, - /* Bias */ w_bias_d, + /* Weights data */ *w_data_d, + /* Weights state */ *w_state_d, + /* Bias */ *w_bias_d, /* Out Data */ out_data_d[RNNInOutKind::Layer], /* Out State */ out_data_d[RNNInOutKind::HiddenState], /* Out State C */ out_data_d[RNNInOutKind::CellState]))); @@ -649,21 +660,21 @@ void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, } // Fill supported config - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; for (size_t i = 0; i < inputDesc.size(); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = inputDesc[i]; + dataConfig.desc = inputDesc[i]->clone(); config.inConfs.push_back(dataConfig); } for (size_t i = 0; i < outputDesc.size(); i++) { - InferenceEngine::DataConfig dataConfig; + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = outputDesc[i]; + dataConfig.desc = outputDesc[i]->clone(); config.outConfs.push_back(dataConfig); } @@ -705,9 +716,9 @@ void MKLDNNRNN::execute(mkldnn::stream strm) { args[state_o_tags[s]] = getChildEdgesAtPort(s)[0]->getMemoryPtr()->GetPrimitive(); } } else { - ptrdiff_t n_ports_with_init_states = outDims.size() - 1; // first is a sequence data + size_t n_ports_with_init_states = outputShapes.size() - 1; // first is a sequence data for (size_t s = 0; s < std::min(S, n_ports_with_init_states); s++) { - if (s < inDims.size()) { + if (s < outputShapes.size()) { args[state_o_tags[s]] = getChildEdgesAtPort(s+1)[0]->getMemoryPtr()->GetPrimitive(); } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h index 7b42760a425..0a2bd93d3d9 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h @@ -19,8 +19,8 @@ public: void getSupportedDescriptors() override; void createPrimitive() override; bool created() const override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; void execute(mkldnn::stream strm) override; @@ -40,6 +40,8 @@ private: void copyWeightsData(); private: + using MKLDNNMemoryDescPtr = std::unique_ptr; + InferenceEngine::Precision runtimePrecision; /** Specify mode Cell or Seq. true - Cell, false - Seq */ bool is_cell = false; @@ -57,15 +59,15 @@ private: mkldnn::algorithm cell_act = mkldnn::algorithm::eltwise_tanh; // Internal attributes - ptrdiff_t N = 0; /**< Batch value */ - ptrdiff_t T = 0; /**< Sequence value */ - ptrdiff_t DC = 0; /**< Input data channel size */ - ptrdiff_t SC = 0; /**< State channel size value */ - ptrdiff_t G = 0; /**< Gate size. LSTM - 4, GRU - 3, RNN - 1 */ - ptrdiff_t Gb = 0; /**< Gate size for biases. Gb = GRU_lbr ? G+1 : G */ - ptrdiff_t S = 2; /**< Num of state. LSTM - 2, GRU & RNN - 1 */ - const ptrdiff_t L = 1; /**< What is it??. Constant for mkldnn impl */ - const ptrdiff_t D = 1; /**< Num of direction. 1 or 2 */ + size_t N = 0; /**< Batch value */ + size_t T = 0; /**< Sequence value */ + size_t DC = 0; /**< Input data channel size */ + size_t SC = 0; /**< State channel size value */ + size_t G = 0; /**< Gate size. LSTM - 4, GRU - 3, RNN - 1 */ + size_t Gb = 0; /**< Gate size for biases. Gb = GRU_lbr ? G+1 : G */ + size_t S = 2; /**< Num of state. LSTM - 2, GRU & RNN - 1 */ + const size_t L = 1; /**< What is it??. Constant for mkldnn impl */ + const size_t D = 1; /**< Num of direction. 1 or 2 */ std::vector in_data_d; std::vector out_data_d; @@ -76,9 +78,9 @@ private: CellState = 2 }; - MKLDNNMemoryDesc w_data_d; - MKLDNNMemoryDesc w_state_d; - MKLDNNMemoryDesc w_bias_d; + MKLDNNMemoryDescPtr w_data_d; + MKLDNNMemoryDescPtr w_state_d; + MKLDNNMemoryDescPtr w_bias_d; std::vector in_data_dims; std::vector out_data_dims; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp index 1aa7752f456..0517350e09c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp @@ -73,31 +73,31 @@ void MKLDNNROIAlignNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size(); - if (getParentEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getDims().ndims(); + if (getParentEdgeAt(0)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank(); } - if (getParentEdgeAt(1)->getDims().ndims() != 2) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims(); + if (getParentEdgeAt(1)->getShape().getRank() != 2) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); } - if (getParentEdgeAt(2)->getDims().ndims() != 1) { - IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getDims().ndims(); + if (getParentEdgeAt(2)->getShape().getRank() != 1) { + IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getShape().getRank(); } - if (getChildEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims(); + if (getChildEdgeAt(0)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank(); } - if (getParentEdgeAt(1)->getDims()[1] != 4) { + if (getParentEdgeAt(1)->getShape().getStaticDims()[1] != 4) { IE_THROW() << errorPrefix << "has invalid shape on 1st input: [" - << getParentEdgeAt(1)->getDims()[0] << "," << getParentEdgeAt(1)->getDims()[1] << "]"; + << getParentEdgeAt(1)->getShape().getStaticDims()[0] << "," << getParentEdgeAt(1)->getShape().getStaticDims()[1] << "]"; } - if (getParentEdgeAt(1)->getDims()[0] != getParentEdgeAt(2)->getDims()[0]) { + if (getParentEdgeAt(1)->getShape().getStaticDims()[0] != getParentEdgeAt(2)->getShape().getStaticDims()[0]) { IE_THROW() << errorPrefix << "has different sizes of inputs for proposals (" - << getParentEdgeAt(1)->getDims()[0] << ") and indexes (" - << getParentEdgeAt(2)->getDims()[0] << ")"; + << getParentEdgeAt(1)->getShape().getStaticDims()[0] << ") and indexes (" + << getParentEdgeAt(2)->getShape().getStaticDims()[0] << ")"; } } @@ -116,7 +116,7 @@ void MKLDNNROIAlignNode::initSupportedPrimitiveDescriptors() { auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrec0); auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrec); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(3); config.outConfs.resize(1); @@ -129,11 +129,13 @@ void MKLDNNROIAlignNode::initSupportedPrimitiveDescriptors() { }; for (auto fmts : supportedFormats) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, fmts.first); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::f32, memory::format_tag::nc); - config.inConfs[2].desc = MKLDNNMemoryDesc(getParentEdgeAt(2)->getDims(), memory::data_type::s32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, fmts.second); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, fmts.second}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, fmts.first); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::f32, + memory::format_tag::nc); + config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::s32, + memory::format_tag::x); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, fmts.second); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } } @@ -178,8 +180,8 @@ void MKLDNNROIAlignNode::executeSpecified() { auto dstBlockDesc = dstMemory.GetDescriptor().data.format_desc.blocking; int blockSize = srcBlockDesc.inner_nblks > 0 ? srcBlockDesc.inner_blks[0] : 1; - auto isPlainFmt = srcMemory0.GetDesc().isPlainFormat(); - auto isNhwcFmt = srcMemory0.GetDesc().isTailCFormat(); + auto isPlainFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::ncsp); + auto isNhwcFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::nspc); const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); const auto *srcRoi = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp index a1a7f8329a5..23fd252ae2b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp @@ -354,21 +354,21 @@ void MKLDNNROIPoolingNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size(); - if (getParentEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getDims().ndims(); + if (getParentEdgeAt(0)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank(); } - if (getParentEdgeAt(1)->getDims().ndims() != 2) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims(); + if (getParentEdgeAt(1)->getShape().getRank() != 2) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); } - if (getChildEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims(); + if (getChildEdgeAt(0)->getShape().getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank(); } - if (getParentEdgeAt(1)->getDims()[1] != 5) { + if (getParentEdgeAt(1)->getShape().getStaticDims()[1] != 5) { IE_THROW() << errorPrefix << "has invalid shape on 1st input: [" - << getParentEdgeAt(1)->getDims()[0] << "," << getParentEdgeAt(1)->getDims()[1] << "]"; + << getParentEdgeAt(1)->getShape().getStaticDims()[0] << "," << getParentEdgeAt(1)->getShape().getStaticDims()[1] << "]"; } } @@ -388,7 +388,7 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() { src_data_size = MKLDNNExtensionUtils::sizeOfDataType(dataType); dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(dataType); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(2); config.inConfs[0].constant = false; @@ -400,7 +400,7 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() { config.outConfs[0].constant = false; config.outConfs[0].inPlace = -1; - auto parentDims = getParentEdgeAt(0)->getDims(); + auto parentDims = getParentEdgeAt(0)->getShape().getStaticDims(); auto format = mayiuse(avx512_common) ? memory::format_tag::nChw16c : memory::format_tag::nChw8c; impl_desc_type impl_type; if (mayiuse(cpu::x64::avx512_common)) { @@ -413,10 +413,10 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::ref; } - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), dataType, format); - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), dataType, memory::format_tag::nc); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), dataType, format); - supportedPrimitiveDescriptors.push_back({config, impl_type, format}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), dataType, format); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), dataType, memory::format_tag::nc); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), dataType, format); + supportedPrimitiveDescriptors.push_back({config, impl_type}); } void MKLDNNROIPoolingNode::createPrimitive() { @@ -428,8 +428,8 @@ void MKLDNNROIPoolingNode::createPrimitive() { const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8; jpp.c_block = simd_w; - auto inDims = config.inConfs[0].desc.getDims(); - auto outDims = config.outConfs[0].desc.getDims(); + auto inDims = config.inConfs[0].desc->getShape().getStaticDims(); + auto outDims = config.outConfs[0].desc->getShape().getStaticDims(); jpp.mb = outDims[0]; jpp.c = rnd_up(inDims[1], simd_w); @@ -447,8 +447,8 @@ void MKLDNNROIPoolingNode::createPrimitive() { jpp.nb_c_blocking = mayiuse(cpu::x64::avx512_common) ? 15 : 7; auto selectedPD = getSelectedPrimitiveDescriptor(); - jpp.src_prc = selectedPD->getConfig().inConfs[0].desc.getPrecision(); - jpp.dst_prc = selectedPD->getConfig().outConfs[0].desc.getPrecision(); + jpp.src_prc = selectedPD->getConfig().inConfs[0].desc->getPrecision(); + jpp.dst_prc = selectedPD->getConfig().outConfs[0].desc->getPrecision(); jpp.src_data_size = jpp.src_prc.size(); jpp.dst_data_size = jpp.dst_prc.size(); @@ -481,9 +481,9 @@ void MKLDNNROIPoolingNode::execute() { IE_THROW() << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors."; auto config = selectedPrimitiveDescriptor->getConfig(); - auto src_strides = config.inConfs[0].desc.getBlockingDesc().getStrides(); - auto dst_strides = config.outConfs[0].desc.getBlockingDesc().getStrides(); - size_t src_roi_step = config.inConfs[1].desc.getBlockingDesc().getStrides()[0]; + auto src_strides = srcMemory0.GetDescWithType().getStrides(); + auto dst_strides = dstMemory.GetDescWithType().getStrides(); + size_t src_roi_step = srcMemory1.GetDescWithType().getStrides()[0]; int cb_work = impl::utils::div_up(jpp.nb_c, jpp.nb_c_blocking); int MB = jpp.mb; @@ -512,13 +512,18 @@ void MKLDNNROIPoolingNode::execute() { if (roi_pooling_kernel) { arg.bin_area = 0; arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]]; + (*roi_pooling_kernel)(&arg); } else { - for (int c = 0; c < c_block; c++) { - dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0; + for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) { + int ch_blk_cur = cbb * cb_num + cbb_cur; + if (ch_blk_cur >= jpp.nb_c) { + break; // current block work is done + } + for (int c = 0; c < c_block; c++) { + dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0; + } } } - - (*roi_pooling_kernel)(&arg); } else { size_t roi_off = n * src_roi_step; const auto *src_roi_ptr = &src_roi[roi_off]; @@ -568,18 +573,23 @@ void MKLDNNROIPoolingNode::execute() { arg.kh = hend - hstart; arg.kw = wend - wstart; } else { - for (int c = 0; c < c_block; c++) { - const size_t pool_index = n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c; - if ((hend <= hstart) || (wend <= wstart)) { - dst[pool_index] = 0; - } else { - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - float batch_data = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] + - h * src_strides[2] + w * src_strides[3] + c]; - - if (batch_data > dst[pool_index]) { - dst[pool_index] = batch_data; + for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) { + int ch_blk_cur = cbb * cb_num + cbb_cur; + if (ch_blk_cur >= jpp.nb_c) { + break; // current block work is done + } + for (int c = 0; c < c_block; c++) { + const size_t pool_index = n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c; + if ((hend <= hstart) || (wend <= wstart)) { + dst[pool_index] = 0; + } else { + dst[pool_index] = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] + + hstart * src_strides[2] + wstart * src_strides[3] + c]; + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + float batch_data = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] + + h * src_strides[2] + w * src_strides[3] + c]; + dst[pool_index] = std::fmax(batch_data, dst[pool_index]); } } } @@ -595,18 +605,35 @@ void MKLDNNROIPoolingNode::execute() { float height_scale = (jpp.pooled_h > 1 ? ((roi_end_h_ - roi_start_h_) * (jpp.ih - 1)) / (jpp.pooled_h - 1) : 0); float width_scale = (jpp.pooled_w > 1 ? ((roi_end_w_ - roi_start_w_) * (jpp.iw - 1)) / (jpp.pooled_w - 1) : 0); - float in_y = (jpp.pooled_h > 1 ? (oh * height_scale + roi_start_h_ * (jpp.ih - 1)) : - 0.5 * (roi_start_h_ + roi_end_h_) * (jpp.ih - 1)); - float in_x = (jpp.pooled_w > 1 ? (ow * width_scale + roi_start_w_ * (jpp.iw - 1)) : - 0.5 * (roi_start_w_ + roi_end_w_) * (jpp.iw - 1)); + float in_y, in_x; + // because of nonalgebraic character of floating point operation, some proposals can cause violation of inequality: + // ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) * (pooled_h - 1) <= (end_h - start_h) * (input_h - 1), + // and as result excess of right limit for proposal value, + // if the border case (current_h == pooled_h - 1) will not be handled explicitly + if (jpp.pooled_h > 1) { + in_y = (oh == jpp.pooled_h - 1 ? roi_end_h_ * (jpp.ih - 1) : (oh * height_scale + roi_start_h_ * (jpp.ih - 1))); + } else { + in_y = 0.5 * (roi_start_h_ + roi_end_h_) * (jpp.ih - 1); + } + if (jpp.pooled_w > 1) { + in_x = (ow == jpp.pooled_w - 1 ? roi_end_w_ * (jpp.iw - 1) : (ow * width_scale + roi_start_w_ * (jpp.iw - 1))); + } else { + in_x = 0.5 * (roi_start_w_ + roi_end_w_) * (jpp.iw - 1); + } if (in_y < 0 || in_y > jpp.ih - 1 || in_x < 0 || in_x > jpp.iw - 1) { if (roi_pooling_kernel) { arg.bin_area = 0; arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]]; } else { - for (int c = 0; c < c_block; c++) { - dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0; + for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) { + int ch_blk_cur = cbb * cb_num + cbb_cur; + if (ch_blk_cur >= jpp.nb_c) { + break; // current block work is done + } + for (int c = 0; c < c_block; c++) { + dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0; + } } } } else { @@ -635,21 +662,27 @@ void MKLDNNROIPoolingNode::execute() { arg.bin_area = 1; } else { - for (int c = 0; c < 1; c++) { - const float top_left = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] + - top_y_index * src_strides[2] + left_x_index * src_strides[3] + c]; - const float top_right = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] + - top_y_index * src_strides[2] + right_x_index * src_strides[3] + c]; - const float bottom_left = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] + - bottom_y_index * src_strides[2] + left_x_index * src_strides[3] + c]; - const float bottom_right = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] + - bottom_y_index * src_strides[2] + right_x_index * src_strides[3] + c]; + for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) { + int ch_blk_cur = cbb * cb_num + cbb_cur; + if (ch_blk_cur >= jpp.nb_c) { + break; // current block work is done + } + for (int c = 0; c < c_block; c++) { + const float top_left = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] + + top_y_index * src_strides[2] + left_x_index * src_strides[3] + c]; + const float top_right = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] + + top_y_index * src_strides[2] + right_x_index * src_strides[3] + c]; + const float bottom_left = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] + + bottom_y_index * src_strides[2] + left_x_index * src_strides[3] + c]; + const float bottom_right = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] + + bottom_y_index * src_strides[2] + right_x_index * src_strides[3] + c]; - const float top = top_left + (top_right - top_left) * (in_x - left_x_index); - const float bottom = bottom_left + (bottom_right - bottom_left) * (in_x - left_x_index); + const float top = top_left + (top_right - top_left) * (in_x - left_x_index); + const float bottom = bottom_left + (bottom_right - bottom_left) * (in_x - left_x_index); - dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = - top + (bottom - top) * (in_y - top_y_index); + dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = + top + (bottom - top) * (in_y - top_y_index); + } } } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp index 136ccba9c64..410051c7be4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp @@ -41,7 +41,7 @@ MKLDNNRollNode::MKLDNNRollNode(const std::shared_ptr& op, const mk IE_THROW() << layerErrorPrefix << " has incorrect number of input/output edges!"; } - shape = inDims[DATA_INDEX].ToSizeVector(); + shape = inputShapes[DATA_INDEX].getStaticDims(); const auto &dataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX); if (std::find(supportedPrecisionSizes.begin(), supportedPrecisionSizes.end(), dataPrecision.size()) == supportedPrecisionSizes.end()) @@ -52,7 +52,7 @@ MKLDNNRollNode::MKLDNNRollNode(const std::shared_ptr& op, const mk } numOfDims = shape.size(); - if (shape != outDims[0].ToSizeVector()) { + if (shape != outputShapes[0].getStaticDims()) { IE_THROW() << layerErrorPrefix << " has different 'data' input and output dimensions"; } @@ -62,7 +62,7 @@ MKLDNNRollNode::MKLDNNRollNode(const std::shared_ptr& op, const mk IE_THROW() << layerErrorPrefix << " has unsupported 'axes' input precision: " << axesTensorPrec.name(); } - const auto axesTensorRank = inDims[AXES_INDEX].ndims(); + const auto axesTensorRank = inputShapes[AXES_INDEX].getRank(); if (axesTensorRank > 1) { IE_THROW() << layerErrorPrefix << " doesn't support 'axes' input tensor with rank: " << axesTensorRank; } @@ -73,7 +73,7 @@ MKLDNNRollNode::MKLDNNRollNode(const std::shared_ptr& op, const mk IE_THROW() << layerErrorPrefix << " has unsupported 'shift' input precision: " << shiftTensorPrec.name(); } - const auto shiftTensorRank = inDims[SHIFT_INDEX].ndims(); + const auto shiftTensorRank = inputShapes[SHIFT_INDEX].getRank(); if (shiftTensorRank > 1) { IE_THROW() << layerErrorPrefix << " doesn't support 'shift' input tensor with rank: " << shiftTensorRank; } @@ -92,32 +92,31 @@ void MKLDNNRollNode::initSupportedPrimitiveDescriptors() { auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - auto srcDims = getParentEdgeAt(0)->getDims(); + auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); - auto dataMemoryFormat = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims()); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; - auto createDataConfig = [](const MKLDNNDims& dims, memory::data_type dataType) -> InferenceEngine::DataConfig { - InferenceEngine::DataConfig dataConfig; + auto createDataConfig = [](const Shape& dims, memory::data_type dataType) -> PortConfig { + PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNMemoryDesc(dims, dataType, MKLDNNMemory::GetPlainFormat(dims)); + dataConfig.desc = MKLDNNPlugin::make_unique(dims.getStaticDims(), dataType, MKLDNNMemory::GetPlainFormatByRank(dims.getRank())); return dataConfig; }; - config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getDims(), dataType)); - config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getDims(), memory::data_type::s32)); - config.inConfs.push_back(createDataConfig(getParentEdgeAt(2)->getDims(), memory::data_type::s32)); + config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getShape(), dataType)); + config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getShape(), memory::data_type::s32)); + config.inConfs.push_back(createDataConfig(getParentEdgeAt(2)->getShape(), memory::data_type::s32)); - config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getDims(), dataType)); + config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getShape(), dataType)); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, dataMemoryFormat}); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref}); } void MKLDNNRollNode::execute(mkldnn::stream strm) { - const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getDesc().getPrecision(); + const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getMemory().GetDesc().getPrecision(); const auto& dataTypeSize = dataPrecision.size(); switch (dataTypeSize) { case sizeof(PrecisionTrait::value_type): { @@ -156,7 +155,7 @@ void MKLDNNRollNode::rollImpl() { auto *output = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); std::vector shiftsVector(numOfDims, 0); - const size_t axesLength = axesEdge->getDims()[0]; + const size_t axesLength = axesEdge->getShape().getStaticDims()[0]; for (size_t dim = 0; dim < axesLength ; ++dim) { int32_t currentAxis = axes[dim] < 0 ? axes[dim] + numOfDims : axes[dim]; int32_t shiftSum = shiftsVector[currentAxis] + shifts[dim]; @@ -171,7 +170,7 @@ void MKLDNNRollNode::rollImpl() { const size_t elementSize = sizeof(DataType); const size_t nIterations = totalElements / blockSize; - const auto strides = dataEdge->getDesc().getBlockingDesc().getStrides(); + const auto strides = dataEdge->getMemory().GetDescWithType().getStrides(); parallel_for(nIterations, [&](size_t iter) { size_t start = iter * blockSize; size_t leftBlockStartOffset = start; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp index 5b9692fc562..af7b36dd7f3 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp @@ -52,9 +52,9 @@ void MKLDNNScatterUpdateNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " has incorrect number of output edges"; - if (getParentEdgeAt(DATA_ID)->getDims().ndims() < 1 || - getParentEdgeAt(INDICES_ID)->getDims().ndims() < 1 || - getParentEdgeAt(UPDATE_ID)->getDims().ndims() < 1) { + if (getParentEdgeAt(DATA_ID)->getShape().getRank() < 1 || + getParentEdgeAt(INDICES_ID)->getShape().getRank() < 1 || + getParentEdgeAt(UPDATE_ID)->getShape().getRank() < 1) { IE_THROW() << errorPrefix << " do not support scalar input"; } @@ -77,15 +77,15 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - auto srcDataDim = getParentEdgeAt(DATA_ID)->getDims(); - auto indicesDim = getParentEdgeAt(INDICES_ID)->getDims(); - auto updateDim = getParentEdgeAt(UPDATE_ID)->getDims(); - auto dstDataDim = getChildEdgeAt(0)->getDims(); + auto srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + auto indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); + auto updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(); + auto dstDataDim = getChildEdgeAt(0)->getShape().getStaticDims(); - size_t srcRank = srcDataDim.ndims(); - size_t indicesRank = indicesDim.ndims(); - size_t updateRank = updateDim.ndims(); - size_t dstRank = dstDataDim.ndims(); + size_t srcRank = srcDataDim.size(); + size_t indicesRank = indicesDim.size(); + size_t updateRank = updateDim.size(); + size_t dstRank = dstDataDim.size(); // common check if (srcRank != dstRank) { @@ -179,7 +179,7 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { bool canBeInplace = getParentEdgeAt(DATA_ID)->getParent()->getChildEdges().size() == 1 && !getParentEdgeAt(DATA_ID)->getParent()->isConstant(); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; if (axisRelaxed) { config.inConfs.resize(4); @@ -201,20 +201,22 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { } auto pushDesc = [&](memory::format_tag inFormat, memory::format_tag idxFormat, memory::format_tag updateFormat, memory::format_tag outFormat) { - config.inConfs[DATA_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA_ID)->getDims(), dataType, inFormat); - config.inConfs[INDICES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(INDICES_ID)->getDims(), indicesType, idxFormat); - config.inConfs[UPDATE_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(UPDATE_ID)->getDims(), dataType, updateFormat); + config.inConfs[DATA_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, inFormat); + config.inConfs[INDICES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(), indicesType, + idxFormat); + config.inConfs[UPDATE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(), dataType, + updateFormat); if (axisRelaxed) - config.inConfs[AXIS_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(AXIS_ID)->getDims(), + config.inConfs[AXIS_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXIS_ID)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(axisPrec), memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), dataType, outFormat); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, outFormat}); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), dataType, outFormat); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); }; - pushDesc(MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(DATA_ID)->getDims())), - MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(INDICES_ID)->getDims())), - MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(UPDATE_ID)->getDims())), - MKLDNNMemory::GetPlainFormat(memory::dims(getChildEdgeAt(0)->getDims()))); + pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), + MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(INDICES_ID)->getShape().getRank()), + MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(UPDATE_ID)->getShape().getRank()), + MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank())); } void MKLDNNScatterUpdateNode::createPrimitive() { @@ -272,8 +274,8 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) { uint8_t *indicesPtr = reinterpret_cast(indicesMemPtr->GetPtr()); uint8_t *updatePtr = reinterpret_cast(updateMemPtr->GetPtr()); - SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getDesc().getDims(); - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims(); + SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); size_t srcRank = srcDataDim.size(); int axis = 0; if (axisRelaxed) { @@ -309,8 +311,8 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) { }); if (scatterUpdateMode == ScatterUpdateMode::ScatterUpdate) { - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims(); - SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getDesc().getDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); + SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(); size_t indicesRank = indicesDim.size(); size_t updateRank = updateDim.size(); SizeVector expectUpdateShape = {}; @@ -370,9 +372,9 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) { // and indices tensor of shape [i_0, i_1, ..., i_k]. // Updates tensor shape should be [d_0, d_1, ... d_(axis - 1), i_0, i_1, ..., i_k, d_(axis + 1), ..., d_n]. void MKLDNNScatterUpdateNode::scatterUpdate(uint8_t *indices, uint8_t *update, int axis, uint8_t *dstData) { - SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getDesc().getDims(); - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims(); - SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getDesc().getDims(); + SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); + SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(); size_t indicesRank = indicesDim.size(); std::vector srcBlockND = getBlockND(srcDataDim); @@ -403,8 +405,8 @@ void MKLDNNScatterUpdateNode::scatterUpdate(uint8_t *indices, uint8_t *update, i // k is indices.shape[-1] and should not be greater than rank of input, q is rank of indicies. // updates is a (q-1)-dimension tensor of replacement-slice-values void MKLDNNScatterUpdateNode::scatterNDUpdate(uint8_t *indices, uint8_t *update, uint8_t *dstData) { - SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getDesc().getDims(); - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims(); + SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); size_t indicesRank = indicesDim.size(); std::vector srcBlockND = getBlockND(srcDataDim); @@ -433,9 +435,9 @@ void MKLDNNScatterUpdateNode::scatterNDUpdate(uint8_t *indices, uint8_t *update, // output[i][indices[i][j][k]][k] = updates[i][j][k] if axis = 1, // output[i][j][indices[i][j][k]] = updates[i][j][k] if axis = 2. void MKLDNNScatterUpdateNode::scatterElementsUpdate(uint8_t *indices, uint8_t *update, int axis, uint8_t *dstData) { - SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getDesc().getDims(); - SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getDesc().getDims(); - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims(); + SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); size_t updateRank = updateDim.size(); std::vector srcBlockND = getBlockND(srcDataDim); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp index c67a4394ed8..093ee7e8255 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp @@ -7,7 +7,7 @@ #include #include "ie_parallel.hpp" #include "mkldnn_select_node.h" -#include +#include #include #include #include "common/cpu_memcpy.h" @@ -129,10 +129,10 @@ void MKLDNNSelectNode::initSupportedPrimitiveDescriptors() { if (inputPrecisionSize != 1 && inputPrecisionSize != 2 && inputPrecisionSize != 4 && inputPrecisionSize != 8) IE_THROW() << errorPrefix << " has unsupported precision: " << inputPrecision << " on 'Then' and 'Else' inputs"; - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, conditionPrecision}, - {TensorDescCreatorTypes::ncsp, inputPrecision}, - {TensorDescCreatorTypes::ncsp, inputPrecision}}, - {{TensorDescCreatorTypes::ncsp, inputPrecision}}, + addSupportedPrimDesc({{LayoutType::ncsp, conditionPrecision}, + {LayoutType::ncsp, inputPrecision}, + {LayoutType::ncsp, inputPrecision}}, + {{LayoutType::ncsp, inputPrecision}}, impl_desc_type::ref_any); } @@ -180,8 +180,8 @@ void MKLDNNSelectNode::execute_impl() { } void MKLDNNSelectNode::execute(mkldnn::stream strm) { - const size_t condPrecSize = getParentEdgeAt(CONDITION)->getDesc().getPrecision().size(); - const size_t inputsPrecSize = getParentEdgeAt(THEN)->getDesc().getPrecision().size(); + const size_t condPrecSize = getParentEdgeAt(CONDITION)->getMemory().GetDesc().getPrecision().size(); + const size_t inputsPrecSize = getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().size(); switch (condPrecSize) { case 1: { @@ -192,7 +192,7 @@ void MKLDNNSelectNode::execute(mkldnn::stream strm) { case 8: { execute_impl(); break; } default: IE_THROW() << "Select layer doesn't support 'Then' and 'Else' inputs' precision: " - + std::string(getParentEdgeAt(THEN)->getDesc().getPrecision().name()); + + std::string(getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().name()); } break; } @@ -204,13 +204,13 @@ void MKLDNNSelectNode::execute(mkldnn::stream strm) { case 8: { execute_impl(); break; } default: IE_THROW() << "Select layer doesn't support 'Then' and 'Else' inputs' precision: " - + std::string(getParentEdgeAt(THEN)->getDesc().getPrecision().name()); + + std::string(getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().name()); } break; } default: { IE_THROW() << "Select layer doesn't support 'Condition' inputs' precision: " - + std::string(getParentEdgeAt(CONDITION)->getDesc().getPrecision().name()); + + std::string(getParentEdgeAt(CONDITION)->getMemory().GetDesc().getPrecision().name()); } } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp index 95b00af386b..f83ddfed0d0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp @@ -7,7 +7,7 @@ #include #include #include -#include "common/tensor_desc_creator.h" +#include "common/blocked_desc_creator.h" #include "common/cpu_memcpy.h" #include "utils/general_utils.h" @@ -95,8 +95,8 @@ void MKLDNNShuffleChannelsNode::initSupportedPrimitiveDescriptors() { } // use ncsp as default for non-quantized networks and nspc for quantized - auto firstCreatorType = isInQuantizedGraph ? TensorDescCreatorTypes::nspc : TensorDescCreatorTypes::ncsp; - auto secondCreatorType = isInQuantizedGraph ? TensorDescCreatorTypes::ncsp : TensorDescCreatorTypes::nspc; + auto firstCreatorType = isInQuantizedGraph ? LayoutType::nspc : LayoutType::ncsp; + auto secondCreatorType = isInQuantizedGraph ? LayoutType::ncsp : LayoutType::nspc; addSupportedPrimDesc({{firstCreatorType, precision}}, {{firstCreatorType, precision}}, @@ -106,11 +106,11 @@ void MKLDNNShuffleChannelsNode::initSupportedPrimitiveDescriptors() { impl_type, supportDynamicBatch_); // canUseBlocked if (axis_ != 1) { - addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp8c, precision}}, - {{TensorDescCreatorTypes::nCsp8c, precision}}, + addSupportedPrimDesc({{LayoutType::nCsp8c, precision}}, + {{LayoutType::nCsp8c, precision}}, impl_type, supportDynamicBatch_); - addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp16c, precision}}, - {{TensorDescCreatorTypes::nCsp16c, precision}}, + addSupportedPrimDesc({{LayoutType::nCsp16c, precision}}, + {{LayoutType::nCsp16c, precision}}, impl_type, supportDynamicBatch_); } } @@ -127,7 +127,8 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_SHCH_ERROR << "has unidentified preferable primitive descriptor"; - const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat(); + const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); int batchRank = axis_; int spatialRank = dataRank_ - axis_ - 1; @@ -135,7 +136,7 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { // 2 for decomposed axis dim, 1 for composed spatial dim int reshapedRank = batchRank + 2 + static_cast(spatialRank != 0) + static_cast(isBlocked && (spatialRank == 0)); PermuteParams params; - params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size(); + params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size(); params.order.resize(reshapedRank, 0); params.src_block_order.resize(reshapedRank); params.dst_block_order.resize(reshapedRank); @@ -158,9 +159,10 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { const int channelDim = 1; if (isBlocked) { - size_t blkSize = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims().back(); + const auto blkDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + size_t blkSize = blkDesc.getBlockDims().back(); size_t CB = div_up(inShape_[1], blkSize); - SizeVector srcBlockedDims = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims(); + SizeVector srcBlockedDims = blkDesc.getBlockDims(); if (axis_ > channelDim) { // axis on spatial for (int i = 0; i < batchRank; i++) { params.order[i] = i; @@ -179,7 +181,7 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { params.order[2] = 2; params.src_block_dims[2] = spatialShapeSize; } - } else if (getParentEdgeAt(0)->getMemory().GetDesc().isTailCFormat()) { + } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { if (axis_ == channelDim) { // axis on channel params.order[0] = 0; params.src_block_dims[0] = inShape_[0]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp index 53dda785e69..9fe05e475fc 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp @@ -7,6 +7,7 @@ #include #include #include +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -37,19 +38,20 @@ void MKLDNNSoftMaxNode::getSupportedDescriptors() { if (!getChildEdges().size()) IE_THROW() << "Incorrect number of output edges for layer " << getName(); - if (getParentEdgeAt(0)->getDims().ndims() == 3) { - MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::abc); - createDescriptor({in_candidate}, {}); + if (getParentEdgeAt(0)->getShape().getRank() == 3) { + MemoryDescPtr in_candidate = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::abc); + createDescriptor({in_candidate.get()}, {}); } - for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getDims())) { - MKLDNNDims dims = getParentEdgeAt(0)->getDims(); + for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { + const auto dims = getParentEdgeAt(0)->getShape().getStaticDims(); if (MKLDNNMemoryDesc(dims, inputDataType, format).blocksExtended()) continue; - MKLDNNMemoryDesc in_candidate(dims, inputDataType, format); + MemoryDescPtr in_candidate = MKLDNNPlugin::make_unique(dims, inputDataType, format); - createDescriptor({in_candidate}, {}); + createDescriptor({in_candidate.get()}, {}); } } @@ -63,7 +65,7 @@ void MKLDNNSoftMaxNode::createPrimitive() { descs[0] = desc; std::shared_ptr selected_desc_ptr = descs[0]; - const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor(); + const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << "."; @@ -92,33 +94,34 @@ bool MKLDNNSoftMaxNode::created() const { return getType() == Softmax; } -void MKLDNNSoftMaxNode::initOptimalPrimitiveDescriptor() { - auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) - IE_THROW() << "Preferable primitive descriptor is not set."; - auto config = selected_pd->getConfig(); - if (isInitConfig(config)) - return; + void MKLDNNSoftMaxNode::initOptimalPrimitiveDescriptor() { + auto selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set."; + auto config = selected_pd->getConfig(); + if (isConfigDefined(config)) + return; - if (config.inConfs.size() != 1 || config.outConfs.size() != 1 || - (!isUninitTensorDesc(config.inConfs[0].desc) && - !isUninitTensorDesc(config.outConfs[0].desc) && config.inConfs[0].desc != config.outConfs[0].desc)) - IE_THROW() << "Layer " << getName() << " has incorrect selected config!"; + if (config.inConfs.size() != 1 || config.outConfs.size() != 1 || + (config.inConfs[0].desc->isDefined() && + config.outConfs[0].desc->isDefined() && !config.inConfs[0].desc->isCompatible(*config.outConfs[0].desc))) + IE_THROW() << "Layer " << getName() << " has incorrect selected config!"; - if (!isUninitTensorDesc(config.inConfs[0].desc)) { - config.outConfs[0].desc = config.inConfs[0].desc; - } else if (!isUninitTensorDesc(config.outConfs[0].desc)) { - config.inConfs[0].desc = config.outConfs[0].desc; - } else { - config.outConfs[0].desc = config.inConfs[0].desc = getConfiguredInputDesc(config, 0); - } + if (config.inConfs[0].desc->isDefined()) { + config.outConfs[0].desc = config.inConfs[0].desc->clone(); + } else if (config.outConfs[0].desc->isDefined()) { + config.inConfs[0].desc = config.outConfs[0].desc->clone(); + } else { + config.inConfs[0].desc = getDefinedInputDesc(config, 0); + config.outConfs[0].desc = config.inConfs[0].desc->clone(); + } - initDescriptor(config); -} + initDescriptor(config); + } -void MKLDNNSoftMaxNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - MKLDNNMemoryDesc in_candidate(inputDesc[0]); +void MKLDNNSoftMaxNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { + MKLDNNMemoryDesc in_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); MKLDNNDescriptor desc(std::shared_ptr( new softmax_forward::desc(prop_kind::forward_scoring, in_candidate, axis))); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h index b422eb3f030..fd200cdb145 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h @@ -17,8 +17,8 @@ public: MKLDNNSoftMaxNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); void initOptimalPrimitiveDescriptor() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; void getSupportedDescriptors() override; void createPrimitive() override; bool created() const override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp index 4702f97e0fb..1861799f97c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp @@ -10,7 +10,7 @@ #include "utils/bfloat16.hpp" #include #include "mkldnn_space_to_batch_node.h" -#include +#include #include using namespace MKLDNNPlugin; @@ -67,32 +67,32 @@ void MKLDNNSpaceToBatchNode::initSupportedPrimitiveDescriptors() { if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end()) IE_THROW() << errorPrefix << " has unsupported precision: " << precision.name(); - addSupportedPrimDesc({{TensorDescCreatorTypes::nspc, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::nspc, precision}}, + addSupportedPrimDesc({{LayoutType::nspc, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::nspc, precision}}, impl_desc_type::ref_any); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::ncsp, precision}}, + addSupportedPrimDesc({{LayoutType::ncsp, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::ncsp, precision}}, impl_desc_type::ref_any); if (inDims[1] % 8 == 0) { - addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp8c, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::nCsp8c, precision}}, + addSupportedPrimDesc({{LayoutType::nCsp8c, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::nCsp8c, precision}}, impl_desc_type::ref_any); } if (inDims[1] % 16 == 0) { - addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp16c, precision}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}, - {TensorDescCreatorTypes::ncsp}}, - {{TensorDescCreatorTypes::nCsp16c, precision}}, + addSupportedPrimDesc({{LayoutType::nCsp16c, precision}, + {LayoutType::ncsp}, + {LayoutType::ncsp}, + {LayoutType::ncsp}}, + {{LayoutType::nCsp16c, precision}}, impl_desc_type::ref_any); } } @@ -112,15 +112,15 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto layout = getParentEdgeAt(0)->getDesc().getLayout(); - const bool blocked = layout != NCHW && layout != NCDHW && layout != NHWC && layout != NDHWC; + const bool blocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) || + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c); const auto dimsSize = inDims.size(); auto inShape5D = getShape5D(outDims); auto outShape5D = getShape5D(inDims); auto blockShape = getShape5D(blockShapeIn); - if (layout == NHWC || layout == NDHWC) { + if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { inShape5D.push_back(inShape5D[1]); inShape5D.erase(inShape5D.begin() + 1); outShape5D.push_back(outShape5D[1]); @@ -129,9 +129,10 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { blockShape.erase(blockShape.begin() + 1); } - const size_t blockSize = blocked ? getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims().back() : 1lu; - const size_t blockCountInput = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1]; - const size_t blockCountOutput = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1]; + const auto outBlkDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + const size_t blockSize = blocked ? outBlkDims.back() : 1lu; + const size_t blockCountInput = outBlkDims[1]; + const size_t blockCountOutput = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims()[1]; const auto blockRemainder = inShape5D[1] % blockSize; const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder; @@ -172,7 +173,7 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - padsBeginIn[2] : 0lu; bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx; oAdd[1] = bIdx % blockShapeIn[1] - padsBeginIn[1]; - if (layout == NHWC || layout == NDHWC) { + if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { oAdd.push_back(oAdd[1]); oAdd.erase(oAdd.begin() + 1); } @@ -226,12 +227,13 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { } void MKLDNNSpaceToBatchNode::execute(mkldnn::stream strm) { - switch (getParentEdgeAt(0)->getDesc().getPrecision().size()) { + switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().size()) { case 1: SpaceToBatchKernel::value_type>(); break; case 2: SpaceToBatchKernel::value_type>(); break; case 4: SpaceToBatchKernel::value_type>(); break; default: - IE_THROW() << "SpaceToBatch layer does not support precision '" + std::string(getParentEdgeAt(0)->getDesc().getPrecision().name()) + "'"; + IE_THROW() << "SpaceToBatch layer does not support precision '" + std::string(getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name()) + + "'"; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp index 69c3356a2f0..25003088139 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp @@ -6,7 +6,7 @@ #include #include -#include "common/tensor_desc_creator.h" +#include "common/blocked_desc_creator.h" #include #include @@ -64,13 +64,13 @@ MKLDNNSpaceToDepthNode::MKLDNNSpaceToDepthNode(const std::shared_ptr 5) THROW_ERROR << "doesn't support dimensions with rank greater than 5"; - SizeVector dstDims = outDims[0].ToSizeVector(); + SizeVector dstDims = outputShapes[0].getStaticDims(); if (srcDims.size() != dstDims.size()) THROW_ERROR << "has incorrect number of input/output dimensions"; @@ -98,8 +98,8 @@ void MKLDNNSpaceToDepthNode::initSupportedPrimitiveDescriptors() { return; InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); - auto srcDims = getParentEdgeAt(0)->getDims(); - const size_t nDims = srcDims.ndims(); + auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); + const size_t nDims = srcDims.size(); impl_desc_type impl_type; if (mayiuse(impl::cpu::x64::avx512_common)) { @@ -112,7 +112,7 @@ void MKLDNNSpaceToDepthNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::ref; } - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(1); config.outConfs.resize(1); @@ -121,26 +121,26 @@ void MKLDNNSpaceToDepthNode::initSupportedPrimitiveDescriptors() { config.outConfs[0].inPlace = -1; config.outConfs[0].constant = false; - std::vector supportedTypes; + std::vector supportedTypes; if (nDims > 2) { auto canUseBlocked = [=](const size_t block) { return srcDims[1] % block == 0 && (mode == Mode::DEPTH_FIRST ? block % blockStep == 0 : true); }; - supportedTypes.push_back(TensorDescCreatorTypes::nspc); + supportedTypes.push_back(LayoutType::nspc); if (canUseBlocked(8lu)) - supportedTypes.push_back(TensorDescCreatorTypes::nCsp8c); + supportedTypes.push_back(LayoutType::nCsp8c); if (canUseBlocked(16lu)) - supportedTypes.push_back(TensorDescCreatorTypes::nCsp16c); + supportedTypes.push_back(LayoutType::nCsp16c); } - supportedTypes.push_back(TensorDescCreatorTypes::ncsp); - auto creators = TensorDescCreator::getCommonCreators(); - auto range = TensorDescCreator::makeFilteredRange(creators, nDims, supportedTypes); + supportedTypes.push_back(LayoutType::ncsp); + auto creators = BlockedDescCreator::getCommonCreators(); + auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes); for (auto itr = range.first; itr != range.second; ++itr) { - config.inConfs[0].desc = itr->second->createDesc(precision, getParentEdgeAt(0)->getDims().ToSizeVector()); - config.outConfs[0].desc = itr->second->createDesc(precision, getChildEdgeAt(0)->getDims().ToSizeVector()); - supportedPrimitiveDescriptors.emplace_back(config, impl_type, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat()); + config.inConfs[0].desc = itr->second->createUniqueDesc(precision, getParentEdgeAt(0)->getShape().getStaticDims()); + config.outConfs[0].desc = itr->second->createUniqueDesc(precision, getChildEdgeAt(0)->getShape().getStaticDims()); + supportedPrimitiveDescriptors.emplace_back(config, impl_type); } } @@ -154,18 +154,19 @@ void MKLDNNSpaceToDepthNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << "has unidentified preferable primitive descriptor"; - SizeVector srcDims = getParentEdgeAt(0)->getBlob()->getTensorDesc().getDims(); - SizeVector dstDims = getChildEdgeAt(0)->getBlob()->getTensorDesc().getDims(); + SizeVector srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); + SizeVector dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); size_t nDims = srcDims.size(); const size_t nSpatialDims = nDims - 2; - const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat(); + const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); const size_t reshapedRank = nDims + nSpatialDims + static_cast(isBlocked) + static_cast(isBlocked && mode == Mode::DEPTH_FIRST); const size_t lastIdx = reshapedRank - 1; size_t firstSpatialOrder = 2; PermuteParams params; - params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size(); + params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size(); params.order.resize(reshapedRank, 0); params.src_block_order.resize(reshapedRank); params.dst_block_order.resize(reshapedRank); @@ -190,8 +191,8 @@ void MKLDNNSpaceToDepthNode::createPrimitive() { }; if (isBlocked) { - SizeVector srcBlockedDims = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims(); - SizeVector dstBlockedDims = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims(); + SizeVector srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + SizeVector dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); size_t orderShiftForBlocks, orderShiftForDims; if (mode == Mode::BLOCKS_FIRST) { @@ -218,7 +219,7 @@ void MKLDNNSpaceToDepthNode::createPrimitive() { } reshapeAndSetPermOrder(orderShiftForBlocks, orderShiftForDims, firstSpatialOrder, dstBlockedDims); - } else if (getParentEdgeAt(0)->getMemory().GetDesc().isTailCFormat()) { + } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { srcDims.push_back(srcDims[1]); dstDims.push_back(dstDims[1]); srcDims.erase(srcDims.begin() + 1); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp index 201bebf4e63..a95bd0c4f75 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp @@ -4,12 +4,13 @@ #include "mkldnn_split_node.h" #include "common/cpu_memcpy.h" -#include "common/tensor_desc_creator.h" +#include "common/blocked_desc_creator.h" #include #include #include #include #include "utils/general_utils.h" +#include #define THROW_ERROR IE_THROW() << "Split layer with name '" << getName() <<"' " @@ -74,17 +75,17 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - auto srcDims = getParentEdgeAt(0)->getDims(); + auto srcShape = getParentEdgeAt(0)->getShape(); auto axis_size = 0; - auto dstFirstDims = getChildEdgeAt(0)->getDims(); - for (size_t i = 0; i < outDims.size(); i++) { - auto o_Dims = outDims[i]; - if (dstFirstDims.ndims() != o_Dims.ndims()) { + auto dstFirstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + for (size_t i = 0; i < outputShapes.size(); i++) { + auto o_Dims = outputShapes[i].getStaticDims(); + if (dstFirstDims.size() != o_Dims.size()) { THROW_ERROR << "only supports output blobs with equal number of dimensions"; } axis_size += o_Dims[axis]; - for (size_t j = 0; j < dstFirstDims.ndims(); j++) { + for (size_t j = 0; j < dstFirstDims.size(); j++) { if (j == axis) continue; if (o_Dims[j] != dstFirstDims[j]) @@ -92,7 +93,7 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { } } dstFirstDims[axis] = axis_size; - if (dstFirstDims.size() != srcDims.size()) + if (std::accumulate(dstFirstDims.begin(), dstFirstDims.end(), 1, std::multiplies()) != srcShape.getElementsCount()) THROW_ERROR << "sizes of input blob and sum of output blobs are not equal."; InferenceEngine::Precision inpPrecision = getOriginalInputPrecisionAtPort(0); @@ -105,18 +106,18 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { } //Set plain and tailC formats - std::vector tdCreatorTypes{ TensorDescCreatorTypes::ncsp, TensorDescCreatorTypes::nspc }; + std::vector tdCreatorTypes{ LayoutType::ncsp, LayoutType::nspc }; //Support channel blocked format - if (srcDims.ndims() > 2) { - for (auto item : { std::make_pair(8lu, TensorDescCreatorTypes::nCsp8c), std::make_pair(16lu, TensorDescCreatorTypes::nCsp16c) }) { - SizeVector blkDims = srcDims.ToSizeVector(); + if (srcShape.getRank() > 2) { + for (auto item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c) }) { + SizeVector blkDims = srcShape.getStaticDims(); if (blkDims[channelsPos] % item.first) continue; bool blocked = true; - for (size_t i = 0; i < outDims.size(); i++) { - if (outDims[i].ToSizeVector()[channelsPos] % item.first) { + for (size_t i = 0; i < outputShapes.size(); i++) { + if (outputShapes[i].getStaticDims()[channelsPos] % item.first) { blocked = false; break; } @@ -129,43 +130,37 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { std::vector pdIndexesToReuse; - auto& creatorsMap = TensorDescCreator::getCommonCreators(); - auto itrRange = TensorDescCreator::makeFilteredRange(creatorsMap, static_cast(srcDims.ndims()), tdCreatorTypes); + auto& creatorsMap = BlockedDescCreator::getCommonCreators(); + auto itrRange = BlockedDescCreator::makeFilteredRange(creatorsMap, static_cast(srcShape.getRank()), tdCreatorTypes); for (auto itr = itrRange.first; itr != itrRange.second; ++itr) { - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = dynBatchSupport; config.inConfs.resize(INPUTS_NUM); config.inConfs[0].inPlace = -1; config.inConfs[0].constant = false; - config.inConfs[0].desc = itr->second->createDesc(inpPrecision, srcDims.ToSizeVector()); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(inpPrecision, srcShape.getStaticDims())); config.inConfs[1].inPlace = -1; config.inConfs[1].constant = true; - config.inConfs[1].desc.setDims({1}); - config.inConfs[1].desc.setPrecision(axisPrecision); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(axisPrecision, SizeVector{1}); if (INPUTS_NUM == 3) { - config.inConfs[2].desc = TensorDesc(axisPrecision, SizeVector{outDims.size()}, TensorDesc::getLayoutByDims(SizeVector{outDims.size()})); + config.inConfs[2].desc = MKLDNNPlugin::make_unique(axisPrecision, SizeVector{outputShapes.size()}); config.inConfs[2].constant = true; } - config.outConfs.resize(outDims.size()); - - std::vector outFormats; - - for (size_t i = 0; i < outDims.size(); i++) { - auto o_Dims = outDims[i]; + config.outConfs.resize(outputShapes.size()); + for (size_t i = 0; i < outputShapes.size(); i++) { config.outConfs[i].inPlace = -1; config.outConfs[i].constant = false; - config.outConfs[i].desc = itr->second->createDesc(inpPrecision, o_Dims.ToSizeVector()); - outFormats.push_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat()); + config.outConfs[i].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(inpPrecision, outputShapes[i].getStaticDims())); } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, outFormats); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); - if (itr->first == TensorDescCreatorTypes::ncsp) { + if (itr->first == LayoutType::ncsp) { // at least the plain layout can be optimized inplace. pdIndexesToReuse.emplace_back(supportedPrimitiveDescriptors.size() - 1); - } else if (itr->first == TensorDescCreatorTypes::nCsp8c || itr->first == TensorDescCreatorTypes::nCsp16c) { + } else if (itr->first == LayoutType::nCsp8c || itr->first == LayoutType::nCsp16c) { if (axis < 2) { pdIndexesToReuse.emplace_back(supportedPrimitiveDescriptors.size() - 1); } @@ -176,12 +171,11 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { for (auto refPdIndex : pdIndexesToReuse) { const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig(); auto config = refConfig; - - const auto& order = refConfig.inConfs[0].desc.getBlockingDesc().getOrder(); - const auto& blkDims = refConfig.inConfs[0].desc.getBlockingDesc().getBlockDims(); + const auto inBlockingDesc = refConfig.inConfs[0].desc->as(); + const auto& order = inBlockingDesc->getOrder(); + const auto& blkDims = inBlockingDesc->getBlockDims(); auto numOfDim = blkDims.size(); - std::vector outFormats; SizeVector offsets(numOfDim, 0lu); SizeVector strides(numOfDim); strides.back() = 1lu; @@ -195,49 +189,43 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { } } - config.inConfs[0].desc = TensorDesc(inpPrecision, srcDims.ToSizeVector(), {blkDims, order, offset, offsets, strides}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(inpPrecision, srcShape.getStaticDims(), blkDims, order, offset, offsets, strides); - for (size_t i = 0; i < outDims.size(); i++) { - const auto& outBlkDims = refConfig.outConfs[i].desc.getBlockingDesc().getBlockDims(); - const auto& dims = refConfig.outConfs[i].desc.getDims(); + for (size_t i = 0; i < outputShapes.size(); i++) { + auto outBlockingDesc = refConfig.outConfs[i].desc->as(); + const auto& outBlkDims = outBlockingDesc->getBlockDims(); + const auto& dims = outBlockingDesc->getShape().getStaticDims(); config.outConfs[i].inPlace = 0; - config.outConfs[i].desc = TensorDesc(outPrecision, dims, {outBlkDims, order, offset, offsets, strides}); - outFormats.emplace_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat()); + config.outConfs[i].desc = MKLDNNPlugin::make_unique(outPrecision, dims, outBlkDims, order, offset, offsets, strides); } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFormats); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } // Special nspc -> ncsp case when splitting channels - if (axis == 1 && (dstFirstDims.ndims() == 4 || dstFirstDims.ndims() == 5)) { - InferenceEngine::LayerConfig config; + if (axis == 1 && (dstFirstDims.size() == 4 || dstFirstDims.size() == 5)) { + NodeConfig config; config.dynBatchSupport = dynBatchSupport; config.inConfs.resize(INPUTS_NUM); config.inConfs[0].inPlace = -1; config.inConfs[0].constant = false; - config.inConfs[0].desc = creatorsMap.at(TensorDescCreatorTypes::nspc)->createDesc(inpPrecision, srcDims.ToSizeVector()); + config.inConfs[0].desc = creatorsMap.at(LayoutType::nspc)->createUniqueDesc(inpPrecision, srcShape.getStaticDims()); config.inConfs[1].inPlace = -1; config.inConfs[1].constant = true; - config.inConfs[1].desc.setDims({1}); - config.inConfs[1].desc.setPrecision(axisPrecision); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(axisPrecision, SizeVector{1}); if (INPUTS_NUM == 3) { - config.inConfs[2].desc = TensorDesc(axisPrecision, SizeVector{outDims.size()}, TensorDesc::getLayoutByDims(SizeVector{outDims.size()})); + config.inConfs[2].desc = MKLDNNPlugin::make_unique(axisPrecision, SizeVector{outputShapes.size()}); config.inConfs[2].constant = true; } - config.outConfs.resize(outDims.size()); - - std::vector outFormats; - - for (size_t i = 0; i < outDims.size(); i++) { - auto o_Dims = outDims[i]; + config.outConfs.resize(outputShapes.size()); + for (size_t i = 0; i < outputShapes.size(); i++) { config.outConfs[i].inPlace = -1; config.outConfs[i].constant = false; - config.outConfs[i].desc = creatorsMap.at(TensorDescCreatorTypes::ncsp)->createDesc(inpPrecision, o_Dims.ToSizeVector()); - outFormats.push_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat()); + config.outConfs[i].desc = creatorsMap.at(LayoutType::ncsp)->createUniqueDesc(inpPrecision, outputShapes[i].getStaticDims()); } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, outFormats); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); } } @@ -252,18 +240,16 @@ void MKLDNNSplitNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << "Preferable primitive descriptor is not set."; - canUseOptimizedNspc2Ncsp = true; - if (axis != 1) - canUseOptimizedNspc2Ncsp = false; + auto& memDesc = getParentEdgeAt(0)->getMemoryPtr()->GetDesc(); - if (getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NHWC && - getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NDHWC) - canUseOptimizedNspc2Ncsp = false; - - for (size_t i = 0; i < getChildEdges().size(); i++) { - if (getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCHW && - getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCDHW) - canUseOptimizedNspc2Ncsp = false; + canUseOptimizedNspc2Ncsp = false; + if (axis == 1 && one_of(memDesc.getShape().getRank(), 4, 5) && memDesc.hasLayoutType(LayoutType::nspc)) { + canUseOptimizedNspc2Ncsp = true; + for (size_t i = 0; i < getChildEdges().size(); i++) { + auto& childMemDesc = getChildEdgeAt(i)->getMemoryPtr()->GetDesc(); + if (!childMemDesc.hasLayoutType(LayoutType::ncsp)) + canUseOptimizedNspc2Ncsp = false; + } } if (!isOptimized()) { @@ -288,7 +274,7 @@ void MKLDNNSplitNode::execute(mkldnn::stream strm) { } uint8_t* srcData = reinterpret_cast(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - size_t batch = this->getParentEdgeAt(0)->getDims()[0]; + size_t batch = this->getParentEdgeAt(0)->getShape().getStaticDims()[0]; if (batch != MB) optimizedParams.countStrides = optimizedParams.countStrides / batch * MB; @@ -320,50 +306,47 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() { if (selected_pd == nullptr) THROW_ERROR << "Preferable primitive descriptor is not set."; auto config = selected_pd->getConfig(); - if (isInitConfig(config)) + if (isConfigDefined(config)) return; for (size_t i = 0; i < config.inConfs.size(); i++) { - if (config.inConfs[i].desc.getLayout() == InferenceEngine::Layout::ANY || - !isUninitTensorDesc(config.inConfs[i].desc)) + if (config.inConfs[i].desc->isDefined()) continue; int num = getParentEdgeAt(i)->getOutputNum(); if (getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()) { if (num >= 0) { - if (isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) && - getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].inPlace >= 0) + const auto& parentConfig = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num]; + if (!parentConfig.desc->isDefined() && parentConfig.inPlace >= 0) getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor(); - if (!isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) && - MKLDNNExtensionUtils::initTensorsAreEqual( - getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc, - config.inConfs[i].desc)) { - config.inConfs[i].desc = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc; + if (parentConfig.desc->isDefined() && parentConfig.desc->isCompatible(*config.inConfs[i].desc)) { + config.inConfs[i].desc = parentConfig.desc->clone(); continue; } } } - config.inConfs[i].desc = InferenceEngine::TensorDesc(config.inConfs[i].desc.getPrecision(), - config.inConfs[i].desc.getDims(), { - config.inConfs[i].desc.getBlockingDesc().getBlockDims(), - config.inConfs[i].desc.getBlockingDesc().getOrder() - }); + + // reset undefined offsets + config.inConfs[i].desc = MemoryDescUtils::resetOffset(config.inConfs[i].desc.get()); } - if (config.outConfs.size() != outDims.size()) + if (config.outConfs.size() != outputShapes.size()) THROW_ERROR << "has invalid config"; + + auto firstInBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[0].desc); size_t offset = 0; - for (size_t i = 0; i < outDims.size(); i++) { - config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(), - config.outConfs[i].desc.getDims(), { - config.outConfs[i].desc.getBlockingDesc().getBlockDims(), - config.outConfs[i].desc.getBlockingDesc().getOrder(), - config.inConfs[0].desc.getBlockingDesc().getOffsetPadding() + offset, - config.inConfs[0].desc.getBlockingDesc().getOffsetPaddingToData(), - config.inConfs[0].desc.getBlockingDesc().getStrides() - }); + for (size_t i = 0; i < outputShapes.size(); i++) { + auto outBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.outConfs[i].desc); + config.outConfs[i].desc = MKLDNNPlugin::make_unique(outBlockingDesc.getPrecision(), + outBlockingDesc.getShape().getStaticDims(), + outBlockingDesc.getBlockDims(), + outBlockingDesc.getOrder(), + firstInBlockingDesc.getOffsetPadding() + offset, + firstInBlockingDesc.getOffsetPaddingToData(), + firstInBlockingDesc.getStrides()); + size_t axisSize = 1; - for (size_t j = axis; j < config.outConfs[i].desc.getBlockingDesc().getBlockDims().size(); j++) { - axisSize *= config.outConfs[i].desc.getBlockingDesc().getBlockDims()[j]; + for (size_t j = axis; j < outBlockingDesc.getBlockDims().size(); j++) { + axisSize *= outBlockingDesc.getBlockDims()[j]; } offset += axisSize; } @@ -375,10 +358,9 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() { // This is needed mostly for the testing purposes, since for the planar layout Split works always in place, we need to enforce // the reference implementation when it is selected in a test to test that piece of code. if (!implPriorities.empty() && implPriorities[0] == impl_desc_type::ref) { - auto plain = PartialBlkDesc::makePlain(getParentEdgeAt(0)->getDims().ToSizeVector()); for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); ++i) { auto& pd = supportedPrimitiveDescriptors[i]; - if (PartialBlkDesc::extractFrom(pd.getConfig().inConfs[0].desc) == plain && + if (pd.getConfig().inConfs[0].desc->hasLayoutType(LayoutType::ncsp) && impl_desc_type::ref == pd.getImplementationType()) { selectPrimitiveDescriptorByIndex(static_cast(i)); return; @@ -399,9 +381,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() { if (inNum < 0 || inNum >= parent_spd->getConfig().outConfs.size()) { inNum = 0; } - if (MKLDNNExtensionUtils::initTensorsAreEqual( - supportedPrimitiveDescriptors[i].getConfig().inConfs[0].desc, - parent_spd->getConfig().outConfs[inNum].desc)) { + if (supportedPrimitiveDescriptors[i].getConfig().inConfs[0].desc->isCompatible(*parent_spd->getConfig().outConfs[inNum].desc)) { canSelectPrimitive.push_back(i); } } @@ -425,7 +405,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() { auto childEdge = getChildEdgeAt(i); auto childPtr = childEdge->getChild(); auto& vecChildSpd = childPtr->getSupportedPrimitiveDescriptors(); - const auto& outputDesc = supportedPrimitiveDescriptors[indx].getConfig().outConfs[i].desc; + const auto& outputDesc = supportedPrimitiveDescriptors[indx].getConfig().outConfs[childEdge->getInputNum()].desc; if (!vecChildSpd.empty()) { int inNum = childEdge->getOutputNum(); @@ -437,7 +417,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() { if (inNum >= childSpd.getConfig().inConfs.size()) { inNum = 0; } - if (MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, childSpd.getConfig().inConfs[inNum].desc)) { + if (outputDesc->isCompatible(*childSpd.getConfig().inConfs[inNum].desc)) { hasMatchDesc = true; break; } @@ -480,11 +460,11 @@ void MKLDNNSplitNode::prepareOptimizedParams() { auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) IE_THROW() << "CPU Split node with name '" << getName() << "' doesn't have primitive descriptors."; - const auto& inpTensorDesc = selectedPrimitiveDescriptor->getConfig().inConfs[0].desc; - const auto outputPortsCount = outDims.size(); + const auto inpTensorDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + const auto outputPortsCount = outputShapes.size(); //find axis order position - const auto& order = inpTensorDesc.getBlockingDesc().getOrder(); + const auto& order = inpTensorDesc.getOrder(); unsigned axisOrderPos = std::numeric_limits::max(); for (size_t i = 0; i < order.size(); ++i) { if (order[i] == axis) { @@ -497,8 +477,8 @@ void MKLDNNSplitNode::prepareOptimizedParams() { } uint8_t srcDataSize = inpTensorDesc.getPrecision().size(); - const auto& srcDims = inpTensorDesc.getBlockingDesc().getBlockDims(); - const auto nDims = srcDims.size(); + const auto& srcDims = inpTensorDesc.getBlockDims(); + const auto getRank = srcDims.size(); optimizedParams.countStrides = 1; for (int i = 0; i < axisOrderPos; i++) @@ -511,8 +491,9 @@ void MKLDNNSplitNode::prepareOptimizedParams() { auto outputEdge = this->getChildEdgesAtPort(i).front(); optimizedParams.dataSize[i] = srcDataSize; - for (size_t j = axisOrderPos; j < nDims; j++) - optimizedParams.dataSize[i] *= outputEdge->getDesc().getBlockingDesc().getBlockDims()[j]; + auto desc = outputEdge->getMemory().GetDesc().as(); + for (size_t j = axisOrderPos; j < getRank; j++) + optimizedParams.dataSize[i] *= desc->getBlockDims()[j]; optimizedParams.srcDataStride += optimizedParams.dataSize[i]; } @@ -526,31 +507,32 @@ void MKLDNNSplitNode::prepareOptimizedParams() { void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) { auto parentEdge = getParentEdgeAt(0); - const int ndims = parentEdge->getDims().ndims(); - const size_t IC = parentEdge->getDims()[1]; - const size_t D = ndims == 5 ? parentEdge->getDims()[ndims - 3] : 1; - const size_t H = parentEdge->getDims()[ndims - 2]; - const size_t W = parentEdge->getDims()[ndims - 1]; + const int rank = parentEdge->getShape().getRank(); + const auto parentDims = parentEdge->getShape().getStaticDims(); + const size_t IC = parentDims[1]; + const size_t D = rank == 5 ? parentDims[rank - 3] : 1; + const size_t H = parentDims[rank - 2]; + const size_t W = parentDims[rank - 1]; - auto srcBlob = parentEdge->getBlob(); - auto srcData = srcBlob->cbuffer().as(); - const auto dataSize = srcBlob->getTensorDesc().getPrecision().size(); + auto& srcMem = parentEdge->getMemory(); + auto srcData = reinterpret_cast(srcMem.GetData()); + const auto dataSize = srcMem.GetDesc().getPrecision().size(); const size_t DHW = D*H*W; const size_t strideIB = DHW * IC * dataSize; const size_t strideIW = IC*dataSize; const size_t strideOC = DHW * dataSize; - for (size_t i = 0, sIdx = 0; i < outDims.size(); i++) { + for (size_t i = 0, sIdx = 0; i < outputShapes.size(); i++) { auto dstData = dstMemPtrs[i]; size_t innerSize = 1; - auto dims = outDims[i].ToSizeVector(); + auto dims = outputShapes[i].getStaticDims(); for (size_t j = axis; j < dims.size(); j++) { innerSize *= dims[j]; } - auto srcPtr = srcData + srcBlob->getTensorDesc().offset(sIdx) * dataSize; + auto srcPtr = srcData + srcMem.GetDesc().getElementOffset(sIdx) * dataSize; const size_t OC = dims[1]; const size_t strideOB = OC * strideOC; @@ -572,7 +554,7 @@ void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) { void MKLDNNSplitNode::initializeDstMemPtrs() { dstMemPtrs.clear(); - for (size_t i = 0; i < outDims.size(); ++i) { + for (size_t i = 0; i < outputShapes.size(); ++i) { auto outputEdges = this->getChildEdgesAtPort(i); if (uint8_t* dstData = reinterpret_cast(outputEdges.front()->getMemoryPtr()->GetPtr())) { dstMemPtrs.push_back(dstData); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp index 1b70de9f0f8..4f98fc1099f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp @@ -10,7 +10,7 @@ #include "ie_parallel.hpp" #include "caseless.hpp" #include "common/cpu_memcpy.h" -#include "common/tensor_desc_creator.h" +#include "common/blocked_desc_creator.h" #include "utils/general_utils.h" #include "mkldnn_input_node.h" @@ -54,7 +54,7 @@ MKLDNNStridedSliceNode::MKLDNNStridedSliceNode(const std::shared_ptr(op); - const size_t nDims = std::max(inDims[DATA_ID].ndims(), outDims[0].ndims()); + const size_t nDims = std::max(inputShapes[DATA_ID].getRank(), outputShapes[0].getRank()); auto createMask = [&](const std::vector &origMask, const int bit = 0, bool needReverse = false) { std::vector mask(origMask.begin(), origMask.end()); @@ -92,8 +92,8 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() { params.parametersAreConstant = isConstantNode(getParentEdgesAtPort(BEGIN_ID)[0]->getParent()) && isConstantNode(getParentEdgesAtPort(END_ID)[0]->getParent()); - const SizeVector srcDims = inDims[DATA_ID].ToSizeVector(); - const SizeVector dstDims = outDims[0].ToSizeVector(); + const SizeVector srcDims = inputShapes[DATA_ID].getStaticDims(); + const SizeVector dstDims = outputShapes[0].getStaticDims(); const size_t nSrcDims = srcDims.size(); const size_t nDims = std::max(nSrcDims, dstDims.size()); @@ -102,21 +102,21 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() { if (!getChildEdges().size()) THROW_ERROR << "has incorrect number of output edges"; - beginDims = inDims[BEGIN_ID].ToSizeVector(); + beginDims = inputShapes[BEGIN_ID].getStaticDims(); if (beginDims.size() != 1) THROW_ERROR << " should have begin vector with 1 dimension"; - endDims = inDims[END_ID].ToSizeVector(); + endDims = inputShapes[END_ID].getStaticDims(); if (endDims.size() != 1) THROW_ERROR << "should have end vector with 1 dimension"; if (beginDims[0] != endDims[0]) THROW_ERROR << "should have begin vector with size equal to end vector size"; - if (inDims.size() > STRIDE_ID) { + if (inputShapes.size() > STRIDE_ID) { if (!isConstantNode(getParentEdgesAtPort(STRIDE_ID)[0]->getParent())) params.parametersAreConstant = false; - strideDims = inDims[STRIDE_ID].ToSizeVector(); + strideDims = inputShapes[STRIDE_ID].getStaticDims(); if (strideDims.size() > 1) THROW_ERROR << "should have stride vector with 1 dimension"; if (beginDims[0] != strideDims[0]) @@ -206,11 +206,11 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() { if (hasStrides) stridePrecision = getOriginalInputPrecisionAtPort(STRIDE_ID); - auto srcDims = getParentEdgeAt(DATA_ID)->getDims(); - auto dstDims = getChildEdgeAt(0)->getDims(); - size_t nDims = srcDims.ndims(); + auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + size_t nDims = srcDims.size(); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(getParentEdges().size()); config.inConfs[DATA_ID].inPlace = -1; @@ -225,33 +225,35 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() { } config.outConfs.resize(1); - std::vector supportedTypes; + std::vector supportedTypes; if (nDims > 2 && params.equalDims) { auto canUseBlocked = [=](const size_t blockSize) { return srcDims[1] % blockSize == 0 && abs(stride[1]) == 1 && (begin[1] > srcDims[1] || begin[1] % blockSize == 0); }; - supportedTypes.push_back(TensorDescCreatorTypes::nspc); + supportedTypes.push_back(LayoutType::nspc); if (canUseBlocked(8lu)) - supportedTypes.push_back(TensorDescCreatorTypes::nCsp8c); + supportedTypes.push_back(LayoutType::nCsp8c); if (canUseBlocked(16lu)) - supportedTypes.push_back(TensorDescCreatorTypes::nCsp16c); + supportedTypes.push_back(LayoutType::nCsp16c); } - supportedTypes.push_back(TensorDescCreatorTypes::ncsp); - auto creators = TensorDescCreator::getCommonCreators(); - auto range = TensorDescCreator::makeFilteredRange(creators, nDims, supportedTypes); + supportedTypes.push_back(LayoutType::ncsp); + auto creators = BlockedDescCreator::getCommonCreators(); + auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes); for (auto itr = range.first; itr != range.second; ++itr) { - config.inConfs[0].desc = itr->second->createDesc(dataPrecision, getParentEdgeAt(DATA_ID)->getDims().ToSizeVector()); - config.inConfs[BEGIN_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(BEGIN_ID)->getDims(), beginDataType, mkldnn::memory::format_tag::x); - config.inConfs[END_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(END_ID)->getDims(), endDataType, mkldnn::memory::format_tag::x); + config.inConfs[0].desc = itr->second->createUniqueDesc(dataPrecision, getParentEdgeAt(DATA_ID)->getShape().getStaticDims()); + config.inConfs[BEGIN_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(BEGIN_ID)->getShape().getStaticDims(), beginDataType, + mkldnn::memory::format_tag::x); + config.inConfs[END_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(END_ID)->getShape().getStaticDims(), endDataType, + mkldnn::memory::format_tag::x); if (hasStrides) - config.inConfs[STRIDE_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(STRIDE_ID)->getDims(), + config.inConfs[STRIDE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(STRIDE_ID)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(stridePrecision), mkldnn::memory::format_tag::x); - config.outConfs[0].desc = itr->second->createDesc(dataPrecision, getChildEdgeAt(DATA_ID)->getDims().ToSizeVector()); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat()); + config.outConfs[0].desc = itr->second->createUniqueDesc(dataPrecision, getChildEdgeAt(DATA_ID)->getShape().getStaticDims()); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); } } @@ -265,16 +267,16 @@ void MKLDNNStridedSliceNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << "has unidentified preferable primitive descriptor."; - auto srcBlockingDesc = getParentEdgeAt(DATA_ID)->getDesc().getBlockingDesc(); - auto dstBlockingDesc = getChildEdgeAt(0)->getDesc().getBlockingDesc(); + auto srcBlockingDesc = getParentEdgeAt(DATA_ID)->getMemory().GetDescWithType(); + auto dstBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); auto srcOrder = srcBlockingDesc.getOrder(); params.srcDims = srcBlockingDesc.getBlockDims(); params.dstDims = dstBlockingDesc.getBlockDims(); - params.dataSize = getSelectedPrimitiveDescriptor()->getConfig().inConfs[DATA_ID].desc.getPrecision().size(); + params.dataSize = getSelectedPrimitiveDescriptor()->getConfig().inConfs[DATA_ID].desc->getPrecision().size(); if (params.parametersAreConstant) { size_t realNDims = params.dstDims.size(); - if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().isPlainFormat()) + if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) orderParametersByLayouts(); SizeVector newSrcDims, newDstDims; @@ -287,9 +289,10 @@ void MKLDNNStridedSliceNode::createPrimitive() { } void MKLDNNStridedSliceNode::orderParametersByLayouts() { - const bool isPerChannelLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().isTailCFormat(); - const bool isBlockedLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().isBlockedCFormat(); - auto srcOrder = getParentEdgeAt(DATA_ID)->getDesc().getBlockingDesc().getOrder(); + const bool isPerChannelLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc); + const bool isBlockedLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); + auto srcOrder = getParentEdgeAt(DATA_ID)->getMemory().GetDescWithType().getOrder(); if (isBlockedLayout) { const size_t blk = params.srcDims.back(); @@ -553,9 +556,9 @@ void MKLDNNStridedSliceNode::indicesCalculation() { void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) { if (!params.parametersAreConstant) { - auto srcDims = getParentEdgeAt(DATA_ID)->getDims(); - auto dstDims = getChildEdgeAt(0)->getDims(); - const size_t nDims = std::max(srcDims.ndims(), dstDims.ndims()); + auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + const size_t nDims = std::max(srcDims.size(), dstDims.size()); const size_t ellipsisMaskCounter = std::accumulate(ellipsisMask.begin(), ellipsisMask.end(), 0); auto fillingInParameters = [&](std::vector ¶meter, const size_t type, const size_t size, const int value) { @@ -574,15 +577,15 @@ void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) { if (strideDims.size()) fillingInParameters(stride, STRIDE_ID, strideDims[0], 1); - if (srcDims.ndims() > 3 && params.equalDims && ellipsisMaskCounter != 0) - addHiddenDims(srcDims.ndims()); + if (srcDims.size() > 3 && params.equalDims && ellipsisMaskCounter != 0) + addHiddenDims(srcDims.size()); - if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().isPlainFormat()) + if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) orderParametersByLayouts(); SizeVector newSrcDims, newDstDims; dimsNormalization(newSrcDims, newDstDims); - dimsGluing(dstDims.ndims(), newSrcDims, newDstDims); + dimsGluing(dstDims.size(), newSrcDims, newDstDims); if (params.dstDims.size() == 1 || params.nDimsForWork != 1) indicesCalculation(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp index d1d80e1b7cb..2e1a9f426ef 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp @@ -10,6 +10,7 @@ #include #include #include +#include "common/blocked_desc_creator.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -17,15 +18,16 @@ using namespace InferenceEngine::details; namespace MKLDNNPlugin { -static InferenceEngine::LayerConfig make_plain_config(const std::shared_ptr& op) { - InferenceEngine::LayerConfig config; +static NodeConfig make_plain_config(const std::shared_ptr& op) { + NodeConfig config; for (size_t i = 0; i < op->get_input_size(); i++) { const auto& dims = op->get_input_shape(i); const auto prec = InferenceEngine::details::convertPrecision(op->get_input_element_type(i)); - InferenceEngine::DataConfig data_conf {}; - data_conf.desc = InferenceEngine::TensorDesc { prec, dims, InferenceEngine::TensorDesc::getLayoutByDims(dims) }; + PortConfig data_conf {}; + auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); + data_conf.desc = descCreator->createUniqueDesc(prec, dims); config.inConfs.push_back(data_conf); } @@ -33,8 +35,9 @@ static InferenceEngine::LayerConfig make_plain_config(const std::shared_ptrget_output_shape(i); const auto prec = InferenceEngine::details::convertPrecision(op->get_output_element_type(i)); - InferenceEngine::DataConfig data_conf {}; - data_conf.desc = InferenceEngine::TensorDesc { prec, dims, InferenceEngine::TensorDesc::getLayoutByDims(dims) }; + PortConfig data_conf {}; + auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); + data_conf.desc = descCreator->createUniqueDesc(prec, dims); config.outConfs.push_back(data_conf); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h index 32e5eac70b2..3ba49ae9ad9 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h @@ -95,7 +95,7 @@ private: int loopTripCountIdx = -1; int loopExecutionConditionIdx = -1; - InferenceEngine::LayerConfig config; + NodeConfig config; const std::shared_ptr ngraphOp; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp index 663f3a376f8..c92193c6e92 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp @@ -85,20 +85,18 @@ void MKLDNNTileNode::initSupportedPrimitiveDescriptors() { precision.size() != sizeof(PrecisionTrait::value_type)) { IE_THROW() << errorPrefix << " has unsupported input precision: " << precision; } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - auto& inDims = getParentEdgeAt(0)->getDims(); - memory::format_tag fmt = MKLDNNMemory::GetPlainFormat(inDims); + auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(2); config.outConfs.resize(1); - config.inConfs[TILE_INPUT].desc = MKLDNNMemoryDesc(getParentEdgeAt(TILE_INPUT)->getDims(), inputDataType, fmt); - config.inConfs[TILE_REPEATS].desc = MKLDNNMemoryDesc(getParentEdgeAt(TILE_REPEATS)->getDims(), memory::data_type::s32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), inputDataType, fmt); + config.inConfs[TILE_INPUT].desc = descCreator->createUniqueDesc(precision, getParentEdgeAt(TILE_INPUT)->getShape().getStaticDims()); + config.inConfs[TILE_REPEATS].desc = descCreator->createUniqueDesc(Precision::I32, getParentEdgeAt(TILE_REPEATS)->getShape().getStaticDims()); + config.outConfs[0].desc = descCreator->createUniqueDesc(precision, getChildEdgeAt(0)->getShape().getStaticDims()); config.outConfs[0].inPlace = noTiling ? 0 : -1; - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, fmt}); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } void MKLDNNTileNode::createPrimitive() { @@ -135,13 +133,13 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) { m_inner_dim *= batchToProcess(); } - if (m_inner_dim == 1 && m_outer_dim % 8 == 0 && srcMemory.GetDesc().isBlockedCFormat(8)) { + if (m_inner_dim == 1 && m_outer_dim % 8 == 0 && srcMemory.GetDesc().hasLayoutType(LayoutType::nCsp8c)) { /* * We may enable tile processing directly to appropriate output format (nChw8c) */ m_inner_dim *= 8; m_outer_dim /= 8; - } else if (m_inner_dim == 1 && m_outer_dim % 16 == 0 && srcMemory.GetDesc().isBlockedCFormat(16)) { + } else if (m_inner_dim == 1 && m_outer_dim % 16 == 0 && srcMemory.GetDesc().hasLayoutType(LayoutType::nCsp16c)) { /* * We may enable tile processing directly to appropriate output format (nChw16c) */ @@ -149,7 +147,7 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) { m_outer_dim /= 16; } - m_inner_dim *= srcMemory.GetDesc().GetElementSize(); + m_inner_dim *= srcMemory.GetDesc().getPrecision().size(); for (int i = 0; i < m_outer_dim; ++i) { for (int t = 0; t < tiles; ++t) { cpu_memcpy(dst_ptr, src_ptr, m_inner_dim); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp index 1c78c44b48d..f3fa2e69b5f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp @@ -84,14 +84,14 @@ void MKLDNNTopKNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - std::vector outDataConf; + std::vector outDataConf; outDataConf.reserve(getOriginalOutputsNumber()); - outDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32); + outDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); for (int i = 1; i < getOriginalOutputsNumber(); ++i) - outDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::I32); + outDataConf.emplace_back(LayoutType::ncsp, Precision::I32); - addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}, - {TensorDescCreatorTypes::ncsp, Precision::I32}}, + addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::I32}}, outDataConf, impl_desc_type::ref_any); } @@ -102,24 +102,24 @@ void MKLDNNTopKNode::execute(mkldnn::stream strm) { float* dst_data = nullptr; int* dst_idx = nullptr; - if (outDims.size() == 1) { + if (outputShapes.size() == 1) { if (getOriginalOutputPrecisionAtPort(0) == Precision::FP32) { dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); } else { dst_idx = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); } - SizeVector dstDims = getChildEdgesAtPort(0)[0]->getDims().ToSizeVector(); + SizeVector dstDims = getChildEdgesAtPort(0)[0]->getShape().getStaticDims(); if (dstDims[axis] != static_cast(src_k)) { std::string errorMsg = "Output tensor dimension mismatch"; IE_THROW() << errorMsg; } - } else if (outDims.size() == 2) { + } else if (outputShapes.size() == 2) { dst_data = reinterpret_cast(getChildEdgesAtPort(TOPK_VALUE)[0]->getMemoryPtr()->GetPtr()); - SizeVector dst_data_dims = getChildEdgesAtPort(TOPK_VALUE)[0]->getDims().ToSizeVector(); + SizeVector dst_data_dims = getChildEdgesAtPort(TOPK_VALUE)[0]->getShape().getStaticDims(); dst_idx = reinterpret_cast(getChildEdgesAtPort(TOPK_INDEX)[0]->getMemoryPtr()->GetPtr()); - SizeVector dst_idx_dims = getChildEdgesAtPort(TOPK_INDEX)[0]->getDims().ToSizeVector(); + SizeVector dst_idx_dims = getChildEdgesAtPort(TOPK_INDEX)[0]->getShape().getStaticDims(); if (dst_idx_dims[axis] != static_cast(src_k) || dst_data_dims[axis] != static_cast(src_k)) { std::string errorMsg = "Output tensors dimension mismatch"; @@ -133,7 +133,7 @@ void MKLDNNTopKNode::execute(mkldnn::stream strm) { if (src_dims[axis] < static_cast(src_k)) src_k = src_dims[axis]; - SizeVector in_dims = getParentEdgeAt(TOPK_DATA)->getDims().ToSizeVector(); + SizeVector in_dims = getParentEdgeAt(TOPK_DATA)->getShape().getStaticDims(); if (src_k == 1) { if (is_last_dim) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp index 49bc1bb695d..5ea5b902e3e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp @@ -10,7 +10,7 @@ #include #include "ie_parallel.hpp" #include "utils/bfloat16.hpp" - +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -66,7 +66,7 @@ void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() { auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec); auto inputOrderDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(1)); - InferenceEngine::LayerConfig config; + NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(2); config.outConfs.resize(1); @@ -74,53 +74,66 @@ void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() { config.inConfs[0].constant = false; config.outConfs[0].inPlace = -1; config.outConfs[0].constant = false; - config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), inputOrderDataType, memory::format_tag::x); - if (getParentEdgeAt(0)->getDims().ndims() == 4) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nchw}); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), inputOrderDataType, + memory::format_tag::x); + if (getParentEdgeAt(0)->getShape().getRank() == 4) { + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::nchw); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, + memory::format_tag::nchw); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); - auto srcDims = getParentEdgeAt(0)->getDims(); + auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); if (srcDims[1] % 8 == 0) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw8c); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nChw8c}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::nChw8c); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } if (srcDims[1] % 16 == 0) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw16c); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nChw16c}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::nChw16c); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } if (prec == Precision::FP32 || prec == Precision::I8 || prec == Precision::U8) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nhwc); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nhwc); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nhwc}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::nhwc); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, + memory::format_tag::nhwc); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } - } else if (getParentEdgeAt(0)->getDims().ndims() == 5) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::ncdhw}); + } else if (getParentEdgeAt(0)->getShape().getRank() == 5) { + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::ncdhw); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, + memory::format_tag::ncdhw); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); - auto srcDims = getParentEdgeAt(0)->getDims(); + auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); if (srcDims[1] % 8 == 0) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw8c); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nCdhw8c}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::nCdhw8c); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } if (srcDims[1] % 16 == 0) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw16c); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nCdhw16c}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::nCdhw16c); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } if (prec == Precision::FP32 || prec == Precision::I8 || prec == Precision::U8) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ndhwc); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ndhwc); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::ndhwc}); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + memory::format_tag::ndhwc); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, + memory::format_tag::ndhwc); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } } else { // general plain case - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } } @@ -135,23 +148,22 @@ void MKLDNNTransposeNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; - if (getParentEdgeAt(0)->getMemory().GetDesc().isPlainFormat() && + if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp) && std::find(optimizedOrders.begin(), optimizedOrders.end(), order) != optimizedOrders.end()) { isOptimized = true; return; } PermuteParams params; - params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size(); + params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size(); params.order = order; + auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + params.src_block_dims = srcDesc.getBlockDims(); + params.src_block_order = srcDesc.getOrder(); - auto srcDesc = getParentEdgeAt(0)->getDesc(); - params.src_block_dims = srcDesc.getBlockingDesc().getBlockDims(); - params.src_block_order = srcDesc.getBlockingDesc().getOrder(); - - auto dstDesc = getChildEdgeAt(0)->getDesc(); - params.dst_block_dims = dstDesc.getBlockingDesc().getBlockDims(); - params.dst_block_order = dstDesc.getBlockingDesc().getOrder(); + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + params.dst_block_dims = dstDesc.getBlockDims(); + params.dst_block_order = dstDesc.getOrder(); permuteKernel = std::unique_ptr(new PermuteKernel(params)); } @@ -263,7 +275,7 @@ void MKLDNNTransposeNode::execute(mkldnn::stream strm) { int MB = batchToProcess(); if (isOptimized) { - const size_t dataSize = getParentEdgeAt(0)->getDesc().getPrecision().size(); + const size_t dataSize = getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().size(); TransposeContext ctx = {this, srcMemPtr, dstMemPtr, MB}; OV_SWITCH(MKLDNNPlugin, TransposeOptimizedEmitter, ctx, dataSize, OV_CASE(1, PrecisionTrait::value_type), diff --git a/inference-engine/src/mkldnn_plugin/normalize_preprocess.cpp b/inference-engine/src/mkldnn_plugin/normalize_preprocess.cpp index 7007c6ad00a..3aa58888b58 100644 --- a/inference-engine/src/mkldnn_plugin/normalize_preprocess.cpp +++ b/inference-engine/src/mkldnn_plugin/normalize_preprocess.cpp @@ -5,6 +5,7 @@ #include "normalize_preprocess.h" #include "ie_parallel.hpp" #include "nodes/common/cpu_memcpy.h" +#include "utils/general_utils.h" using namespace MKLDNNPlugin; using namespace InferenceEngine; @@ -12,7 +13,7 @@ using namespace InferenceEngine; NormalizePreprocess::NormalizePreprocess() : meanBuffer(nullptr) { } -void NormalizePreprocess::Load(const MKLDNNDims& inputDims, InputInfo::Ptr inputInfo) { +void NormalizePreprocess::Load(const Shape& inputShape, InputInfo::Ptr inputInfo) { PreProcessInfo &pp = inputInfo->getPreProcess(); size_t inChannels = pp.getNumberOfChannels(); if (inChannels == 0) { @@ -20,7 +21,7 @@ void NormalizePreprocess::Load(const MKLDNNDims& inputDims, InputInfo::Ptr input return; } - if (inChannels != inputDims[1]) { + if (!dimsEqualStrong(inChannels, inputShape.getDims()[1])) { IE_THROW() << "channels mismatch between mean and input"; } @@ -76,10 +77,11 @@ void NormalizePreprocess::Load(const MKLDNNDims& inputDims, InputInfo::Ptr input } } -void NormalizePreprocess::NormalizeImage(const MKLDNNDims &inputDims, float *input, InferenceEngine::Layout layout) { +void NormalizePreprocess::NormalizeImage(const Shape &inputShape, float *input, InferenceEngine::Layout layout) { IE_ASSERT(input != nullptr); - if (inputDims.ndims() != 4) { + const auto inputDims = inputShape.getStaticDims(); + if (inputDims.size() != 4) { IE_THROW() << "Expecting input as 4 dimension blob with format NxCxHxW."; } @@ -88,7 +90,7 @@ void NormalizePreprocess::NormalizeImage(const MKLDNNDims &inputDims, float *inp } int MB = inputDims[0]; - int srcSize = inputDims.size() / MB; + int srcSize = inputShape.getElementsCount() / MB; if (meanBuffer && meanBuffer->size()) { const float * meanBufferValues = meanBuffer->readOnly(); diff --git a/inference-engine/src/mkldnn_plugin/normalize_preprocess.h b/inference-engine/src/mkldnn_plugin/normalize_preprocess.h index 1bc6d843195..72ba9fd27a8 100644 --- a/inference-engine/src/mkldnn_plugin/normalize_preprocess.h +++ b/inference-engine/src/mkldnn_plugin/normalize_preprocess.h @@ -6,7 +6,7 @@ #include "ie_input_info.hpp" -#include "mkldnn_dims.h" +#include "cpu_shape.h" #include "ie_parallel.hpp" #include #include @@ -18,14 +18,15 @@ public: NormalizePreprocess(); public: - void Load(const MKLDNNDims& inputDims, InferenceEngine::InputInfo::Ptr inputInfo); - void NormalizeImage(const MKLDNNDims &inputDims, float *input, InferenceEngine::Layout layout); + void Load(const Shape& inputShape, InferenceEngine::InputInfo::Ptr inputInfo); + void NormalizeImage(const Shape &inputShape, float *input, InferenceEngine::Layout layout); template::value>::type* = nullptr> - void NormalizeImage(const MKLDNNDims &inputDims, T *input, InferenceEngine::Layout layout) { + void NormalizeImage(const Shape &inputShape, T *input, InferenceEngine::Layout layout) { IE_ASSERT(input != nullptr); - if (inputDims.ndims() != 4) { + const auto inputDims = inputShape.getStaticDims(); + if (inputDims.size() != 4) { IE_THROW() << "Expecting input as 4 dimension blob with format NxCxHxW."; } @@ -34,7 +35,7 @@ public: } int MB = inputDims[0]; - int srcSize = inputDims.size() / MB; + int srcSize = inputShape.getElementsCount() / MB; if (meanBuffer && meanBuffer->size()) { const float * meanBufferValues = meanBuffer->readOnly(); diff --git a/inference-engine/src/mkldnn_plugin/perf_count.h b/inference-engine/src/mkldnn_plugin/perf_count.h index 3fce79b5e68..0f230c4c76f 100644 --- a/inference-engine/src/mkldnn_plugin/perf_count.h +++ b/inference-engine/src/mkldnn_plugin/perf_count.h @@ -46,4 +46,5 @@ public: } // namespace MKLDNNPlugin -#define PERF(_counter) PerfHelper __helper##__counter (_counter->PerfCounter()); +#define GET_PERF(_counter) std::unique_ptr(new PerfHelper(_counter->PerfCounter())) +#define PERF(_need, _counter) auto pc = _need ? GET_PERF(_counter) : nullptr; diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp index 17b13034f7f..1272183c68b 100644 --- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp +++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp @@ -5,10 +5,13 @@ #include "blob_dump.h" #include "blob_factory.hpp" #include "mkldnn_memory.h" +#include "mkldnn_extension_utils.h" +#include #include "common/memory_desc_wrapper.hpp" #include +#include using namespace InferenceEngine; @@ -35,7 +38,7 @@ struct IEB_HEADER { unsigned long scaling_data_size; }; -static IEB_HEADER prepare_header(const TensorDesc& desc) { +static IEB_HEADER prepare_header(const MemoryDesc& desc) { IEB_HEADER header = {}; header.magic[0] = IEB_MAGIC[0]; @@ -49,19 +52,20 @@ static IEB_HEADER prepare_header(const TensorDesc& desc) { header.precision = desc.getPrecision(); - if (desc.getDims().size() > 7) + if (desc.getShape().getRank() > 7) IE_THROW() << "Dumper support max 7D blobs"; - header.ndims = desc.getDims().size(); + header.ndims = desc.getShape().getRank(); + const auto &dims = desc.getShape().getStaticDims(); for (int i = 0; i < header.ndims; i++) - header.dims[i] = desc.getDims()[i]; + header.dims[i] = dims[i]; header.scaling_axis = NO_SCALES; return header; } -static TensorDesc parse_header(IEB_HEADER &header) { +static MKLDNNMemoryDesc parse_header(IEB_HEADER &header) { if (header.magic[0] != IEB_MAGIC[0] || header.magic[1] != IEB_MAGIC[1] || header.magic[2] != IEB_MAGIC[2] || @@ -72,175 +76,126 @@ static TensorDesc parse_header(IEB_HEADER &header) { header.ver[1] != 1) IE_THROW() << "Dumper cannot parse file. Unsupported IEB format version."; - Precision prc = Precision(static_cast(header.precision)); + const auto prc = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision(static_cast(header.precision))); SizeVector dims(header.ndims); for (int i = 0; i < header.ndims; i++) dims[i] = header.dims[i]; - return TensorDesc {prc, dims, TensorDesc::getLayoutByDims(dims) }; + return MKLDNNMemoryDesc{dims, prc, MKLDNNMemory::GetPlainFormatByRank(dims.size()) }; } +void BlobDumper::prepare_plain_data(const MKLDNNMemoryPtr &memory, std::vector &data) const { + const auto &desc = memory->GetDesc(); + size_t data_size = desc.getShape().getElementsCount(); + const auto size = data_size * desc.getPrecision().size(); + data.resize(size); -bool is_plain(const Blob::Ptr &blob) { - bool res = true; - - auto orig_strides = blob->getTensorDesc().getBlockingDesc().getStrides(); - auto orig_order = blob->getTensorDesc().getBlockingDesc().getOrder(); - auto dims = blob->getTensorDesc().getDims(); - - for (int stride = 1, i = dims.size() - 1; i >= 0; --i) { - if (stride != orig_strides[i] || i != orig_order[i]) res = false; - stride *= dims[i]; + // check if it already plain + if (desc.hasLayoutType(LayoutType::ncsp)) { + cpu_memcpy(data.data(), reinterpret_cast(memory->GetPtr()), size); + return; } - return res; -} - -static Blob::Ptr prepare_plain_data(Blob::Ptr blob) { - // check if it already plain - if (is_plain(blob)) return blob; - - Blob::Ptr pln_blob = make_plain_blob(blob->getTensorDesc().getPrecision(), blob->getTensorDesc().getDims()); - pln_blob->allocate(); - // Copy to plain - MKLDNNMemoryDesc mdesc(blob->getTensorDesc()); - mkldnn::memory::desc desc = mdesc; - mkldnn::impl::memory_desc_wrapper blob_wrp(desc.data); + const void *ptr = memory->GetData(); - size_t data_size = blob->size(); - - // TODO: make it with blob_copy utility - switch (blob->getTensorDesc().getPrecision()) { + switch (desc.getPrecision()) { case Precision::FP32: case Precision::I32: { - auto *pln_blob_ptr = pln_blob->buffer().as(); - auto *blob_ptr = blob->buffer().as(); + auto *pln_blob_ptr = reinterpret_cast(data.data()); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)]; + pln_blob_ptr[i] = blob_ptr[desc.getElementOffset(i)]; break; } - case Precision::I16: - case Precision::U16: case Precision::BF16: { - auto *pln_blob_ptr = pln_blob->buffer().as(); - auto *blob_ptr = blob->buffer().as(); - for (size_t i = 0; i < data_size; i++) pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)]; + auto *pln_blob_ptr = reinterpret_cast(data.data()); + auto *blob_ptr = reinterpret_cast(ptr); + for (size_t i = 0; i < data_size; i++) + pln_blob_ptr[i] = blob_ptr[desc.getElementOffset(i)]; break; } case Precision::I8: case Precision::U8: { - auto *pln_blob_ptr = pln_blob->buffer().as(); - auto *blob_ptr = blob->buffer().as(); + auto *pln_blob_ptr = reinterpret_cast(data.data()); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)]; + pln_blob_ptr[i] = blob_ptr[desc.getElementOffset(i)]; break; } default: IE_THROW() << "Dumper. Unsupported precision"; } - - return pln_blob; } void BlobDumper::dump(std::ostream &stream) const { - if (!_blob) - IE_THROW() << "Dumper cannot dump empty Blob"; + if (memory == nullptr) + IE_THROW() << "Dumper cannot dump. Memory is not allocated."; - if (_blob->buffer().as() == nullptr) - IE_THROW() << "Dumper cannot dump. Blob is not allocated."; - - IEB_HEADER header = prepare_header(_blob->getTensorDesc()); - Blob::Ptr pln_blob = prepare_plain_data(_blob); + IEB_HEADER header = prepare_header(memory->GetDesc()); + std::vector data; + prepare_plain_data(this->memory, data); header.data_offset = sizeof(header); - header.data_size = pln_blob->byteSize(); + header.data_size = data.size(); header.scaling_data_offset = 0; header.scaling_data_size = 0; - if (_scales) { - header.scaling_axis = 1; - header.scaling_data_offset = header.data_offset + header.data_size; - header.scaling_data_size = _scales->byteSize(); - } - - stream.write(reinterpret_cast(&header), sizeof(header)); - stream.write(pln_blob->buffer().as(), pln_blob->byteSize()); - - if (_scales) { - stream.write(_scales->buffer().as(), _scales->byteSize()); - } + stream.write(reinterpret_cast(&header), sizeof(header)); + stream.write(reinterpret_cast(data.data()), data.size()); } void BlobDumper::dumpAsTxt(std::ostream &stream) const { - if (!_blob) - IE_THROW() << "Dumper cannot dump empty Blob"; + if (memory == nullptr) + IE_THROW() << "Dumper cannot dump. Memory is not allocated."; - if (_blob->buffer().as() == nullptr) - IE_THROW() << "Dumper cannot dump. Blob is not allocated."; - - SizeVector dims = _blob->getTensorDesc().getDims(); + const auto dims = memory->GetDims(); + const auto &desc = memory->GetDesc(); + size_t data_size = desc.getShape().getElementsCount(); // Header like "U8 4D shape: 2 3 224 224 () - stream << _blob->getTensorDesc().getPrecision().name() << " " + stream << memory->GetDesc().getPrecision().name() << " " << dims.size() << "D " << "shape: "; for (size_t d : dims) stream << d << " "; - stream << "(" << _blob->size() << ")" << - " by address 0x" << std::hex << _blob->buffer().as() << std::dec <(memory->GetData()) << std::dec <getTensorDesc()); - mkldnn::memory::desc desc = mdesc; - mkldnn::impl::memory_desc_wrapper blob_wrp(desc.data); + const void *ptr = memory->GetData(); - size_t data_size = _blob->size(); - switch (_blob->getTensorDesc().getPrecision()) { - case Precision::FP32: { - auto *blob_ptr = _blob->buffer().as(); + switch (desc.getPrecision()) { + case Precision::FP32 : { + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - stream << blob_ptr[blob_wrp.off_l(i)] << std::endl; + stream << blob_ptr[desc.getElementOffset(i)] << std::endl; break; } - case Precision::BF16: - { - auto *blob_ptr = _blob->buffer().as(); + case Precision::BF16: { + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) { - int i16n = blob_ptr[blob_wrp.off_l(i)]; + int i16n = blob_ptr[desc.getElementOffset(i)]; i16n = i16n << 16; - float fn = *(reinterpret_cast(&i16n)); + float fn = *(reinterpret_cast(&i16n)); stream << fn << std::endl; } break; } case Precision::I32: { - auto *blob_ptr = _blob->buffer().as(); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - stream << blob_ptr[blob_wrp.off_l(i)] << std::endl; - break; - } - case Precision::I16: { - auto *blob_ptr = _blob->buffer().as(); - for (size_t i = 0; i < data_size; i++) - stream << static_cast(blob_ptr[blob_wrp.off_l(i)]) << std::endl; - break; - } - case Precision::U16: { - auto *blob_ptr = _blob->buffer().as(); - for (size_t i = 0; i < data_size; i++) - stream << static_cast(blob_ptr[blob_wrp.off_l(i)]) << std::endl; + stream << blob_ptr[desc.getElementOffset(i)] << std::endl; break; } case Precision::I8: { - auto *blob_ptr = _blob->buffer().as(); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - stream << static_cast(blob_ptr[blob_wrp.off_l(i)]) << std::endl; + stream << static_cast(blob_ptr[desc.getElementOffset(i)]) << std::endl; break; } case Precision::U8: { - auto *blob_ptr = _blob->buffer().as(); + auto *blob_ptr = reinterpret_cast(ptr); for (size_t i = 0; i < data_size; i++) - stream << static_cast(blob_ptr[blob_wrp.off_l(i)]) << std::endl; + stream << static_cast(blob_ptr[desc.getElementOffset(i)]) << std::endl; break; } default: @@ -252,29 +207,12 @@ BlobDumper BlobDumper::read(std::istream &stream) { IEB_HEADER header; stream.read(reinterpret_cast(&header), sizeof(header)); - TensorDesc desc = parse_header(header); - Blob::Ptr blob = make_blob_with_precision(desc); - blob->allocate(); + const auto desc = parse_header(header); + BlobDumper res(desc); stream.seekg(header.data_offset, stream.beg); - stream.read(blob->buffer().as(), header.data_size); + stream.read(reinterpret_cast(res.getDataPtr()), header.data_size); - BlobDumper res(blob); - - // Parse scales fields. - if (header.scaling_axis != NO_SCALES) { - if (header.scaling_axis != 1) - IE_THROW() << "Dumper support scaling only for channel dims."; - - size_t scl_size = header.scaling_data_size / sizeof(float); - auto scl = make_blob_with_precision({Precision::FP32, {scl_size}, C}); - scl->allocate(); - - stream.seekg(header.scaling_data_offset, stream.beg); - stream.read(scl->buffer().as(), header.scaling_data_size); - - res._scales = scl; - } return res; } @@ -309,73 +247,4 @@ void BlobDumper::dumpAsTxt(const std::string& dump_path) const { dump_file.close(); } -Blob::Ptr BlobDumper::get() { - return _blob; -} - -template -static void plain_copy(const Blob::Ptr &from, const Blob::Ptr &scls, Blob::Ptr &to) { - auto dims = from->getTensorDesc().getDims(); - - size_t data_size = from->size(); - size_t outer_size = dims[0]; - size_t c_size = dims.size() > 1 ? dims[1] : 1; - size_t inner_size = dims.size() == 4 ? dims[2]*dims[3] : - dims.size() == 3 ? dims[2] : 1; - - auto to_data = to->buffer().as(); - auto from_data = from->buffer().as(); - - if (scls) { - auto scls_data = scls->buffer().as(); - - for (size_t o=0; o < outer_size; o++) - for (size_t c=0; c < c_size; c++) - for (size_t i=0; i < inner_size; i++) - *to_data++ = static_cast(*from_data++) * scls_data[c]; - } else { - for (size_t i=0; i < data_size; i++) - *to_data++ = static_cast(*from_data++); - } -} - -Blob::Ptr BlobDumper::getRealValue() { - if (_blob->getTensorDesc().getPrecision() == Precision::FP32 && !_scales) - return _blob; - - auto res = make_plain_blob(Precision::FP32, _blob->getTensorDesc().getDims()); - res->allocate(); - - switch (_blob->getTensorDesc().getPrecision()) { - case Precision::U8: plain_copy(_blob, _scales, res); break; - case Precision::FP32: plain_copy(_blob, _scales, res); break; - case Precision::I8: plain_copy(_blob, _scales, res); break; - default: IE_THROW() << "Unsupported precesion for getRealValue method."; - } - - return res; -} - - -BlobDumper& BlobDumper::withScales(InferenceEngine::Blob::Ptr scales) { - if ( _blob->getTensorDesc().getDims().size() < 2 || - scales->getTensorDesc().getDims().size() != 1 || - scales->getTensorDesc().getDims()[0] != _blob->getTensorDesc().getDims()[1] || - scales->getTensorDesc().getPrecision() != Precision::FP32) - IE_THROW() << "Dumper cannot use passed scales. Blob has incompatible shape."; - - _scales = scales; - return *this; -} - -BlobDumper& BlobDumper::withoutScales() { - _scales.reset(); - return *this; -} - - -const InferenceEngine::Blob::Ptr& BlobDumper::getScales() const { - return _scales; -} - } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h index c2cc793e421..5271f351d6b 100644 --- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h +++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h @@ -4,7 +4,7 @@ #pragma once -#include "ie_blob.h" +#include "mkldnn_memory.h" #include @@ -19,15 +19,21 @@ namespace MKLDNNPlugin { * NB! Channel is a second dimension for all blob types. */ class BlobDumper { - InferenceEngine::Blob::Ptr _blob; - InferenceEngine::Blob::Ptr _scales; + MKLDNNMemoryPtr memory; + + void prepare_plain_data(const MKLDNNMemoryPtr &memory, std::vector &data) const; public: BlobDumper() = default; + BlobDumper(const MKLDNNMemoryDesc &desc) { + mkldnn::engine eng(mkldnn::engine::kind::cpu, 0); + memory = std::make_shared(eng); + memory->Create(desc); + } BlobDumper(const BlobDumper&) = default; BlobDumper& operator = (BlobDumper&&) = default; - explicit BlobDumper(const InferenceEngine::Blob::Ptr blob):_blob(blob) {} + explicit BlobDumper(const MKLDNNMemoryPtr &_memory) : memory(_memory) {} static BlobDumper read(const std::string &file_path); static BlobDumper read(std::istream &stream); @@ -38,13 +44,9 @@ public: void dumpAsTxt(const std::string &file_path) const; void dumpAsTxt(std::ostream &stream) const; - BlobDumper& withScales(InferenceEngine::Blob::Ptr scales); - BlobDumper& withoutScales(); - - const InferenceEngine::Blob::Ptr& getScales() const; - - InferenceEngine::Blob::Ptr get(); - InferenceEngine::Blob::Ptr getRealValue(); + void *getDataPtr() const { + return memory->GetPtr(); + } }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp b/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp index 0754e346a6e..0cd3975c39a 100644 --- a/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp +++ b/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp @@ -90,5 +90,4 @@ inline InferenceEngine::Precision normalizeToSupportedPrecision(InferenceEngine: } return precision; } - } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/utils/general_utils.h b/inference-engine/src/mkldnn_plugin/utils/general_utils.h index 952bf43dbf5..35640212a55 100644 --- a/inference-engine/src/mkldnn_plugin/utils/general_utils.h +++ b/inference-engine/src/mkldnn_plugin/utils/general_utils.h @@ -6,6 +6,7 @@ #include #include +#include "cpu_shape.h" namespace MKLDNNPlugin { @@ -40,6 +41,11 @@ constexpr inline bool implication(bool cause, bool cond) { return !cause || !!cond; } +template +std::unique_ptr make_unique(Args&&... args) { + return std::unique_ptr(new T(std::forward(args)...)); +} + inline std::string getExceptionDescWithoutStatus(const InferenceEngine::Exception& ex) { std::string desc = ex.what(); IE_SUPPRESS_DEPRECATED_START @@ -70,4 +76,62 @@ std::string vec2str(const std::vector &vec) { return std::string("()"); } +/** + * @brief Compares that two dims are equal and defined + * @param lhs + * first dim + * @param rhs + * second dim + * @return result of comparison + */ +inline bool dimsEqualStrong(size_t lhs, size_t rhs) { + return (lhs == rhs && lhs != Shape::UNDEFINED_DIM && rhs != Shape::UNDEFINED_DIM); +} + +/** + * @brief Compares that two dims are equal or undefined + * @param lhs + * first dim + * @param rhs + * second dim + * @return result of comparison + */ +inline bool dimsEqualWeak(size_t lhs, size_t rhs) { + return (lhs == Shape::UNDEFINED_DIM || rhs == Shape::UNDEFINED_DIM || lhs == rhs); +} + +/** + * @brief Compares that two shapes are equal or undefined + * @param lhs + * first shape + * @param rhs + * second shape + * @param skipAxis + * marks shape axis which shouldn't be validated + * @return order + */ +inline bool dimsEqualWeak(const std::vector& lhs, const std::vector& rhs, size_t skipAxis = Shape::UNDEFINED_DIM) { + if (lhs.size() != rhs.size()) + return false; + + for (size_t i = 0; i < lhs.size(); i++) { + if (i != skipAxis && !dimsEqualWeak(lhs[i], rhs[i])) + return false; + } + + return true; +} + +inline InferenceEngine::Precision getMaxPrecision(std::vector precisions) { + if (!precisions.empty()) { + std::sort(precisions.begin(), precisions.end(), + [](const InferenceEngine::Precision &lhs, const InferenceEngine::Precision &rhs) { + return lhs.size() > rhs.size(); + }); + return precisions[0]; + } + + return InferenceEngine::Precision::UNSPECIFIED; +} + } // namespace MKLDNNPlugin \ No newline at end of file diff --git a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp index 1cfbae1ab5f..2e0b06c0e4d 100644 --- a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp +++ b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp @@ -9,6 +9,7 @@ #include "ie_common.h" #include "utils/blob_dump.h" #include "utils/debug_capabilities.h" +#include "cpu_memory_desc_utils.h" #include #include @@ -65,14 +66,11 @@ void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const { auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name; std::cout << "Dump inputs: " << dump_file << std::endl; - TensorDesc desc = prEdge->getDesc(); + auto& desc = prEdge->getMemory().GetDesc(); if (desc.getPrecision() == Precision::BIN) continue; - BlobDumper dumper(prEdge->getBlob()); - if (pr->ext_scales) - dumper.withScales(pr->ext_scales); - + BlobDumper dumper(prEdge->getMemoryPtr()); dump(dumper, dump_file); } @@ -101,14 +99,11 @@ void NodeDumper::dumpOutputBlobs(const MKLDNNNodePtr& node) const { auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name; std::cout << "Dump outputs: " << dump_file << std::endl; - TensorDesc desc = childEdge->getDesc(); + auto& desc = childEdge->getMemory().GetDesc(); if (desc.getPrecision() == Precision::BIN) continue; - BlobDumper dumper(childEdge->getBlob()); - if (node->ext_scales) - dumper.withScales(node->ext_scales); - + BlobDumper dumper(childEdge->getMemoryPtr()); dump(dumper, dump_file); } } @@ -126,7 +121,9 @@ void NodeDumper::dumpInternalBlobs(const MKLDNNNodePtr& node) const { if (desc.getPrecision() == Precision::BIN) continue; - BlobDumper dumper(blb); + MKLDNNMemoryPtr memory = std::make_shared(node->getEngine()); + memory->Create(MemoryDescUtils::convertToMKLDNNMemoryDesc(desc), blb->buffer()); + BlobDumper dumper(memory); dump(dumper, dump_file); } } diff --git a/inference-engine/src/offline_transformations/include/disable_shapeof_constant_folding.hpp b/inference-engine/src/offline_transformations/include/disable_shapeof_constant_folding.hpp new file mode 100644 index 00000000000..678b41af0ef --- /dev/null +++ b/inference-engine/src/offline_transformations/include/disable_shapeof_constant_folding.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include + +namespace ngraph { +namespace pass { + +class DisableShapeOfConstantFolding; + +} // namespace pass +} // namespace ngraph + + +class ngraph::pass::DisableShapeOfConstantFolding: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + DisableShapeOfConstantFolding(); +}; diff --git a/inference-engine/src/offline_transformations/src/disable_shapeof_constant_folding.cpp b/inference-engine/src/offline_transformations/src/disable_shapeof_constant_folding.cpp new file mode 100644 index 00000000000..456ba721647 --- /dev/null +++ b/inference-engine/src/offline_transformations/src/disable_shapeof_constant_folding.cpp @@ -0,0 +1,32 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "disable_shapeof_constant_folding.hpp" + +NGRAPH_RTTI_DEFINITION(ngraph::pass::DisableShapeOfConstantFolding, "DisableShapeOfConstantFolding", 0); + +ngraph::pass::DisableShapeOfConstantFolding::DisableShapeOfConstantFolding() { + auto shape_of = pattern::wrap_type([=](const Output & output) { + const auto & shape = output.get_partial_shape(); + return shape.is_dynamic() || shape_size(shape.get_shape()) != 1; + }); + + ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { + disable_constant_folding(m.get_match_root()); + return true; + }; + + auto m = std::make_shared(shape_of, "DisableShapeOfConstantFolding"); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/offline_transformations/src/moc_transformations.cpp b/inference-engine/src/offline_transformations/src/moc_transformations.cpp index 0b7d66f3743..1a23f72e607 100644 --- a/inference-engine/src/offline_transformations/src/moc_transformations.cpp +++ b/inference-engine/src/offline_transformations/src/moc_transformations.cpp @@ -5,8 +5,10 @@ #include #include "moc_transformations.hpp" +#include "disable_shapeof_constant_folding.hpp" #include +#include #include #include #include @@ -18,6 +20,21 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0); @@ -34,20 +51,56 @@ bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr(); + manager.register_pass( + element::TypeVector{ ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4 }); + manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); + auto transpose_sinking = manager.register_pass(); + transpose_sinking->add_matcher(); + // SplitSqueezeConcatFusion should work in same GraphRewrite as TransposesSinking, + // because it replaces pattern that may contain Transposes which must be optimized before + // the transformation and it also inserts Transpose that can be optimized by TransposeSinking + transpose_sinking->add_matcher(); + + auto eliminations = manager.register_pass(); + eliminations->add_matcher(); + eliminations->add_matcher(false /* do not use shape for elimination */); + eliminations->set_name("ngraph::pass::CommonEliminations"); + auto common_fusions = manager.register_pass(); + common_fusions->add_matcher(); + common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); + common_fusions->add_matcher(); common_fusions->add_matcher(); + common_fusions->add_matcher(); + common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->set_name("ngraph::pass::CommonFusions"); + manager.register_pass(); + manager.register_pass(); + + auto decomp = manager.register_pass(); + decomp->add_matcher(); + + manager.register_pass(); + + auto conv_fusions = manager.register_pass(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->set_name("ngraph::pass::ConvFusions"); + manager.run_passes(f); // Restore original shapes to the nGraph Function diff --git a/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp b/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp index 271b200f31b..e944ffff57b 100644 --- a/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp +++ b/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp @@ -19,12 +19,12 @@ namespace mask_propagation { class Convolution; class GroupConvolution; +class GroupConvolutionReshape; class Elementwise; class PassThrough; class StopPropagation; class FakeQuantize; class Concat; -class Reshape; } // namespace mask_propagation } // namespace pass @@ -192,9 +192,9 @@ public: } }; -class ngraph::pass::mask_propagation::Reshape : public MatcherPass { +class ngraph::pass::mask_propagation::GroupConvolutionReshape : public MatcherPass { public: - Reshape() { + GroupConvolutionReshape() { auto input = pattern::any_input(pattern::has_static_shape()); auto shape = pattern::any_input(); // Working only for Reshapes on Group Convolution weights @@ -258,10 +258,12 @@ public: ngraph::replace_node(old_shape_const, new_const); setMask(m_output, output_mask); - return true; + // This transformation propagates only Reshape mask and doesn't do anything with GroupConvolution. + // So, not to disable GroupConvolution mask propagation we return false here. + return false; }; - auto m = std::make_shared(reshape, "ReshapeMaskPropagation"); + auto m = std::make_shared(gconv, "ReshapeMaskPropagation"); register_matcher(m, callback); } }; @@ -419,13 +421,12 @@ public: auto fq_node = std::dynamic_pointer_cast(m_output.get_node_shared_ptr()); size_t idx = 0; if (fq_node->get_auto_broadcast() != ngraph::op::AutoBroadcastType::NONE) { - for (auto const_node : fq_params_nodes) { + for (auto node : fq_params_nodes) { + auto const_node = std::dynamic_pointer_cast(node); + if (!const_node) throw ngraph_error("Unexpected operation type."); auto new_shape = broadcast_shape_to_rank(const_node->get_shape(), m_input.get_partial_shape().rank().get_length()); - auto const_copy = const_node->clone_with_new_inputs(const_node->input_values()); - auto new_const = std::dynamic_pointer_cast(const_copy); - new_const->set_data_shape(new_shape); - new_const->validate_and_infer_types(); + auto new_const = std::make_shared(*const_node, new_shape); new_const->set_friendly_name(const_node->get_friendly_name()); ngraph::copy_runtime_info(const_node, new_const); ngraph::replace_node(const_node, new_const); @@ -605,11 +606,11 @@ public: ngraph::pass::PropagateMasks::PropagateMasks() { add_matcher(); + add_matcher(); add_matcher(); add_matcher(); add_matcher(); add_matcher(); add_matcher(); - add_matcher(); add_matcher(); } diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp index ffefeed06f0..3af55071aa9 100644 --- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp +++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp @@ -349,7 +349,7 @@ template - const int operator()(type_to_type) { return cv_type_to_depth::depth; } + int operator()(type_to_type) { return cv_type_to_depth::depth; } }; } // namespace diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp index cc00ec77114..5faf7bc37c4 100644 --- a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp +++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp @@ -716,9 +716,9 @@ V10Parser::V10Parser::GenericLayerParams XmlDeserializer::parseGenericParams( int64_t dim = 0; const pugi::char_t* dimVal = node.child_value(); std::stringstream ss(dimVal); - if (!(ss >> dim) || dim < 0) { + if (!(ss >> dim) || dim < -1) { IE_THROW() << "dimension (" << dimVal << ") in node " << node.name() - << " must be a non-negative integer: at offset " + << " must be greater or equal to -1: at offset " << node.offset_debug(); } port.dims.push_back(dim); @@ -855,7 +855,7 @@ std::shared_ptr XmlDeserializer::createNode( size_t index{0}; for (const auto & output_params : params.outputPorts) { - ngraphNode->set_output_type(index, output_params.precision, ngraph::Shape(output_params.dims)); + ngraphNode->set_output_type(index, output_params.precision, ngraph::PartialShape(output_params.dims)); ++index; } } diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp index 540f8454887..15ac63f531e 100644 --- a/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp +++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp @@ -67,7 +67,7 @@ public: struct GenericLayerParams { struct LayerPortData { size_t portId; - SizeVector dims; + std::vector dims; ngraph::element::Type_t precision; std::unordered_set names; }; diff --git a/inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp b/inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp new file mode 100644 index 00000000000..3bed4a37e6a --- /dev/null +++ b/inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp @@ -0,0 +1,114 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include + +#include "ngraph/op/op.hpp" + +namespace ngraph { +namespace op { +namespace internal { + +template +class NmsStaticShapeIE : public BaseNmsOp { +public: + NGRAPH_RTTI_DECLARATION; + + using Attributes = typename BaseNmsOp::Attributes; + + /// \brief Constructs a NmsStaticShapeIE operation + /// + /// \param boxes Node producing the box coordinates + /// \param scores Node producing the box scores + /// \param attrs Attributes of the operation + NmsStaticShapeIE(const Output& boxes, + const Output& scores, + const Attributes& attrs) : BaseNmsOp(boxes, scores, attrs) { + this->constructor_validate_and_infer_types(); + } + void validate_and_infer_types() override; + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + return std::make_shared(new_args.at(0), new_args.at(1), this->m_attrs); + } +}; + +template +void NmsStaticShapeIE::validate_and_infer_types() { + const auto boxes_ps = this->get_input_partial_shape(0); + const auto scores_ps = this->get_input_partial_shape(1); + + auto first_dim_shape = Dimension::dynamic(); + + if (boxes_ps.rank().is_static() && scores_ps.rank().is_static()) { + const auto num_boxes_boxes = boxes_ps[1]; + if (num_boxes_boxes.is_static() && scores_ps[0].is_static() && scores_ps[1].is_static()) { + const auto num_boxes = num_boxes_boxes.get_length(); + auto num_classes = scores_ps[1].get_length(); + if (this->m_attrs.background_class >=0 && this->m_attrs.background_class <= num_classes) { + num_classes = num_classes - 1; + } + int64_t max_output_boxes_per_class = 0; + if (this->m_attrs.nms_top_k >= 0) + max_output_boxes_per_class = std::min(num_boxes, static_cast(this->m_attrs.nms_top_k)); + else + max_output_boxes_per_class = num_boxes; + + auto max_output_boxes_per_batch = max_output_boxes_per_class * num_classes; + if (this->m_keep_top_k >= 0) + max_output_boxes_per_batch = + std::min(max_output_boxes_per_batch, static_cast(this->m_attrs.keep_top_k)); + + first_dim_shape = max_output_boxes_per_batch * scores_ps[0].get_length(); + } + } + + // 'selected_outputs' have the following format: + // [number of selected boxes, [class_id, box_score, xmin, ymin, xmax, ymax]] + this->set_output_type(0, element::f32, {first_dim_shape, 6}); + // 'selected_indices' have the following format: + // [number of selected boxes, 1] + this->set_output_type(1, this->m_attrs.output_type, {first_dim_shape, 1}); + // 'selected_num' have the following format: + // [num_batches, ] + if (boxes_ps.rank().is_static() && boxes_ps.rank().get_length() > 0) { + this->set_output_type(2, this->m_attrs.output_type, {boxes_ps[0]}); + } else { + this->set_output_type(2, this->m_attrs.output_type, {Dimension::dynamic()}); + } +} + +template +const ::ngraph::Node::type_info_t& NmsStaticShapeIE::get_type_info() const { return get_type_info_static(); } + +template +const ::ngraph::Node::type_info_t& NmsStaticShapeIE::get_type_info_static() { + auto BaseNmsOpTypeInfoPtr = &BaseNmsOp::get_type_info_static(); + + // TODO: it should be static const std::string name = std::string("NmsStaticShapeIE_") + BaseNmsOpTypeInfoPtr->name; + // but currently it will not pass conversion ot Legacy Opset correctly + static const std::string name = BaseNmsOpTypeInfoPtr->name; + + static const ::ngraph::Node::type_info_t type_info_static{ + name.c_str(), BaseNmsOpTypeInfoPtr->version, BaseNmsOpTypeInfoPtr}; + return type_info_static; +} + +template +const ::ngraph::Node::type_info_t NmsStaticShapeIE::type_info = NmsStaticShapeIE::get_type_info_static(); + +#ifdef __clang__ +extern template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE; +extern template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE; +#endif // __clang__ + +} // namespace internal +} // namespace op +} // namespace ngraph diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/algebraic_simplification.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/algebraic_simplification.hpp index c363f513d9e..02f7860e203 100644 --- a/inference-engine/src/transformations/include/transformations/common_optimizations/algebraic_simplification.hpp +++ b/inference-engine/src/transformations/include/transformations/common_optimizations/algebraic_simplification.hpp @@ -20,5 +20,5 @@ class TRANSFORMATIONS_API AlgebraicSimplification; class ngraph::pass::AlgebraicSimplification : public GraphRewrite { public: NGRAPH_RTTI_DECLARATION; - AlgebraicSimplification(); + AlgebraicSimplification() = default; }; diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp new file mode 100644 index 00000000000..79e203485fa --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp @@ -0,0 +1,32 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include + +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API LeakyReluFusion; + +} // namespace pass +} // namespace ngraph + +/** + * @ingroup ie_transformation_common_api + * @brief LeakyReluFusion transformation replaces following graph: + * Multiply->Maximum to LeakyRelu + */ + +class ngraph::pass::LeakyReluFusion: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + LeakyReluFusion(); +}; diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/nop_elimination.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/nop_elimination.hpp index a5d9f7cd19f..ca5028d5126 100644 --- a/inference-engine/src/transformations/include/transformations/common_optimizations/nop_elimination.hpp +++ b/inference-engine/src/transformations/include/transformations/common_optimizations/nop_elimination.hpp @@ -15,13 +15,80 @@ namespace ngraph { namespace pass { +class TRANSFORMATIONS_API EliminatePad; +class TRANSFORMATIONS_API EliminateConvert; +class TRANSFORMATIONS_API EliminateConvertNonZero; +class TRANSFORMATIONS_API EliminateConcat; +class TRANSFORMATIONS_API EliminateSplit; +class TRANSFORMATIONS_API EliminateTranspose; class TRANSFORMATIONS_API NopElimination; } // namespace pass } // namespace ngraph +/** + * @ingroup ie_transformation_common_api + * @brief EliminatePad eliminates pad that does nothing + */ +class ngraph::pass::EliminatePad: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + EliminatePad(); +}; + +/** + * @ingroup ie_transformation_common_api + * @brief EliminateConvert eliminates convert that does nothing + */ +class ngraph::pass::EliminateConvert: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + EliminateConvert(); +}; + +/** + * @ingroup ie_transformation_common_api + * @brief EliminateConvertNonZero eliminates convert before NonZero + */ +class ngraph::pass::EliminateConvertNonZero: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + EliminateConvertNonZero(); +}; + +/** + * @ingroup ie_transformation_common_api + * @brief EliminateConcat eliminates concat that does nothing + */ +class ngraph::pass::EliminateConcat: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + EliminateConcat(); +}; + +/** + * @ingroup ie_transformation_common_api + * @brief EliminateSplit eliminates split that does nothing + */ +class ngraph::pass::EliminateSplit: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + EliminateSplit(); +}; + +/** + * @ingroup ie_transformation_common_api + * @brief EliminateTranspose eliminates transpose that does nothing + */ +class ngraph::pass::EliminateTranspose: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + EliminateTranspose(); +}; + + class ngraph::pass::NopElimination: public GraphRewrite { public: NGRAPH_RTTI_DECLARATION; - NopElimination(); + NopElimination(bool use_shape_for_elimination = true); }; diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/pad_fusion.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/pad_fusion.hpp index 37c903de952..7c11f1ee02a 100644 --- a/inference-engine/src/transformations/include/transformations/common_optimizations/pad_fusion.hpp +++ b/inference-engine/src/transformations/include/transformations/common_optimizations/pad_fusion.hpp @@ -12,7 +12,6 @@ namespace ngraph { namespace pass { class TRANSFORMATIONS_API PadFusion; -class TRANSFORMATIONS_API PadElimination; class TRANSFORMATIONS_API PadFusionAvgPool; class TRANSFORMATIONS_API PadFusionMaxPool; class TRANSFORMATIONS_API PadFusionConvolution; @@ -23,16 +22,6 @@ class TRANSFORMATIONS_API PadFusionGroupConvolutionBackpropData; } // namespace pass } // namespace ngraph -/** - * @ingroup ie_transformation_common_api - * @brief PadElimination eliminates pad that does nothing - */ -class ngraph::pass::PadElimination: public ngraph::pass::MatcherPass { -public: - NGRAPH_RTTI_DECLARATION; - PadElimination(); -}; - /** * @ingroup ie_transformation_common_api * @brief PadFusion transformation replaces following graph: @@ -124,6 +113,5 @@ public: add_matcher(); add_matcher(); add_matcher(); - add_matcher(); } }; diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp index 752054baa49..2c7c7e5cd0d 100644 --- a/inference-engine/src/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp +++ b/inference-engine/src/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp @@ -21,6 +21,7 @@ class TRANSFORMATIONS_API SimplifyShapeOfSubGraph; class TRANSFORMATIONS_API SharedShapeOf; class TRANSFORMATIONS_API GroupedGatherElimination; class TRANSFORMATIONS_API GatherNopElimination; +class TRANSFORMATIONS_API SimplifyGatherShapeOf; } // namespace pass } // namespace ngraph @@ -69,3 +70,15 @@ public: NGRAPH_RTTI_DECLARATION; GatherNopElimination(); }; + +/** + * @ingroup ie_transformation_common_api + * @brief SimplifyGatherShapeOf optimizes `gather->shapeof` into `shapeof->gather` for 0D indices. + * Other cases into Concat of shapeof/gather(data) + shapeof(indices) transformation optimizes out + * useless Gather operations + */ +class ngraph::pass::SimplifyGatherShapeOf: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + SimplifyGatherShapeOf(); +}; diff --git a/inference-engine/src/transformations/include/transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp b/inference-engine/src/transformations/include/transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp index 79ad6e3e882..f5405daa6d6 100644 --- a/inference-engine/src/transformations/include/transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp +++ b/inference-engine/src/transformations/include/transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp @@ -22,5 +22,5 @@ class ngraph::pass::DisableConvertConstantFoldingOnConstPath : public ngraph::pa public: NGRAPH_RTTI_DECLARATION; DisableConvertConstantFoldingOnConstPath( - const std::vector& inputPrecisions = {}); + const element::TypeVector & inputPrecisions = {}); }; diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp new file mode 100644 index 00000000000..080a0868322 --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API ConvertMatrixNmsToMatrixNmsIE; + +} // namespace pass +} // namespace ngraph + +class ngraph::pass::ConvertMatrixNmsToMatrixNmsIE: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + ConvertMatrixNmsToMatrixNmsIE(); +}; diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp new file mode 100644 index 00000000000..b639364b24e --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API ConvertMulticlassNmsToMulticlassNmsIE; + +} // namespace pass +} // namespace ngraph + +class ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + ConvertMulticlassNmsToMulticlassNmsIE(); +}; diff --git a/inference-engine/src/transformations/include/transformations/rt_info/disable_constant_folding.hpp b/inference-engine/src/transformations/include/transformations/rt_info/disable_constant_folding.hpp new file mode 100644 index 00000000000..1e04ce22dcc --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/rt_info/disable_constant_folding.hpp @@ -0,0 +1,44 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace ngraph { + +/** + * @ingroup ie_runtime_attr_api + * @brief DisableConstantFolding disable ConstantFolding for given operation + */ +class TRANSFORMATIONS_API DisableConstantFolding { +public: + DisableConstantFolding() = default; +}; + +extern template class TRANSFORMATIONS_API VariantImpl; + +template<> +class TRANSFORMATIONS_API VariantWrapper : public VariantImpl { +public: + static constexpr VariantTypeInfo type_info{"DISABLED_CONSTANT_FOLDING", 0}; + + const VariantTypeInfo &get_type_info() const override { + return type_info; + } + + VariantWrapper(const value_type &value) : VariantImpl(value) {} + + bool is_copyable() const override { return false; } +}; + +TRANSFORMATIONS_API void disable_constant_folding(const std::shared_ptr& node); +} // namespace ngraph diff --git a/inference-engine/src/transformations/include/transformations/rt_info/strides_property.hpp b/inference-engine/src/transformations/include/transformations/rt_info/strides_property.hpp index a964c490fe8..2ec78ce6892 100644 --- a/inference-engine/src/transformations/include/transformations/rt_info/strides_property.hpp +++ b/inference-engine/src/transformations/include/transformations/rt_info/strides_property.hpp @@ -4,6 +4,8 @@ #pragma once +#include +#include #include #include diff --git a/inference-engine/src/transformations/src/ngraph_ops/nms_static_shape_ie.cpp b/inference-engine/src/transformations/src/ngraph_ops/nms_static_shape_ie.cpp new file mode 100644 index 00000000000..8f173eafcae --- /dev/null +++ b/inference-engine/src/transformations/src/ngraph_ops/nms_static_shape_ie.cpp @@ -0,0 +1,19 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "ngraph/ops.hpp" +#include "ngraph_ops/nms_static_shape_ie.hpp" + +namespace ngraph { +namespace op { +namespace internal { + +template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE; +template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE; + +} // namespace internal +} // namespace op +} // namespace ngraph diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/algebraic_simplification.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/algebraic_simplification.cpp index 519c48aa05f..14ed78d4e97 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/algebraic_simplification.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/algebraic_simplification.cpp @@ -2,163 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include - #include "transformations/common_optimizations/algebraic_simplification.hpp" -#include "itt.hpp" - -#include -#include -#include -#include -#include - -using namespace std; -using namespace ngraph; NGRAPH_RTTI_DEFINITION(ngraph::pass::AlgebraicSimplification, "AlgebraicSimplification", 0); - -//`simplify_gather`, optimizes gather if Gather is gathering the -// whole input tensor -static bool simplify_gather(std::shared_ptr node) { - if (auto gather = as_type_ptr(node)) { - // check if we are gathering the whole input - auto data = gather->input_value(0); - auto indices = gather->input_value(1); - - // we need to know data and indices shape to infer if gather is Nop - if (data.get_partial_shape().is_dynamic() || indices.get_partial_shape().is_dynamic()) { - return false; - } - // if rank of data and gather output dont match, we will skip - if (data.get_shape().size() != node->get_shape().size()) { - return false; - } - - auto axis = gather->get_axis(); - if (axis == opset3::Gather::AXIS_NOT_SET_VALUE) { - NGRAPH_DEBUG << "axis value not set"; - return false; - } - - // case_1 : if the input tensor is of shape (4, 1, 4) - // and axis = 1, then the gather would be simply - // gathering the whole input tensor, so we can optimize this - // op has Nop - - if (data.get_shape()[axis] == 1 && data.get_shape() == node->get_shape()) { - return replace_output_update_name(gather->output(0), gather->input_value(0)); - } - - // case_2 : if the input tensor is of shape (4, 3, 4) - // we need to check the contents of indices, if indices - // is 1D tensor of value {0, 1, 2}, we can optimize this - // op has Nop - - // check if the indices is constant - auto constant_indices = - as_type_ptr(gather->input_value(1).get_node_shared_ptr()); - if (!constant_indices) { - return false; - } else { - // if ref_inidices == indices, we are capturing the - // entire input tensor - std::vector ref_indices(data.get_shape()[axis], 0); - std::iota(ref_indices.begin(), ref_indices.end(), 0); - if (ref_indices == constant_indices->cast_vector()) { - return replace_output_update_name(gather->output(0), gather->input_value(0)); - } - } - } - return false; -} - -// optimizes `gather->shapeof` into `shapeof->gather` for 0D indices -// other cases into Concat of shapeof/gather(data) + shapeof(indices) -static bool simplify_gather_shapeof(shared_ptr node) { - auto gather = as_type_ptr(node->input_value(0).get_node_shared_ptr()); - if (!gather) { - return false; - } - auto gather_in_rank = gather->get_input_partial_shape(0).rank(); - auto indices_rank = gather->get_input_partial_shape(1).rank(); - auto axis = gather->get_axis(); - if (gather_in_rank.is_dynamic() || indices_rank.is_dynamic() || - axis == opset3::Gather::AXIS_NOT_SET_VALUE) { - NGRAPH_DEBUG << gather << " cannot simplify gather->shapeof"; - return false; - } - - auto zero_axis = opset3::Constant::create(element::i64, Shape{}, {0}); - NodeVector new_ops; - auto new_shapeof = make_shared(gather->input_value(0), node->get_output_element_type(0)); - new_ops.push_back(new_shapeof); - std::shared_ptr replace_op; - if (indices_rank.get_length() == 0) { - std::vector vi(gather_in_rank.get_length()); - std::iota(vi.begin(), vi.end(), 0); - vi.erase(vi.begin() + axis); - auto new_indices = opset3::Constant::create(element::i64, Shape{vi.size()}, vi); - replace_op = make_shared(new_shapeof, new_indices, zero_axis); - new_ops.push_back(replace_op); - } else { - NodeVector concat_inputs; - if (axis > 0) { - std::vector vi(axis); - std::iota(vi.begin(), vi.end(), 0); - auto indices = opset3::Constant::create(element::i64, Shape{vi.size()}, vi); - auto gather = make_shared(new_shapeof, indices, zero_axis); - new_ops.push_back(gather); - concat_inputs.push_back(gather); - } - auto shapeof_indices = make_shared(gather->input_value(1), node->get_output_element_type(0)); - new_ops.push_back(shapeof_indices); - - concat_inputs.push_back(shapeof_indices); - - if (gather_in_rank.get_length() - 1 > axis) { - std::vector vi(gather_in_rank.get_length() - (axis + 1)); - std::iota(vi.begin(), vi.end(), axis + 1); - auto indices = opset3::Constant::create(element::i64, Shape{vi.size()}, vi); - auto gather = make_shared(new_shapeof, indices, zero_axis); - new_ops.push_back(gather); - concat_inputs.push_back(gather); - } - replace_op = make_shared(concat_inputs, 0); - new_ops.push_back(replace_op); - } - replace_op->set_friendly_name(node->get_friendly_name()); - copy_runtime_info(node, new_ops); - replace_node(node, replace_op); - return true; -} - -#define ECHO(NAME) #NAME -#define STR(NAME) ECHO(NAME) -#define SIMPLE_MATCHER_PASS_DEFINITION(NAME, OP, FUNC) \ -class NAME : public ngraph::pass::MatcherPass { \ -public: \ -NGRAPH_RTTI_DECLARATION; \ -NAME() { \ - MATCHER_SCOPE(NAME); \ - auto match_node = ngraph::pattern::wrap_type(); \ - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { \ - return FUNC(m.get_match_root()); \ - }; \ - auto m = std::make_shared(match_node, matcher_name); \ - register_matcher(m, callback); \ -} \ -}; \ -NGRAPH_RTTI_DEFINITION(NAME, STR(NAME), 0); - -SIMPLE_MATCHER_PASS_DEFINITION(EliminateGather, opset3::Gather, simplify_gather); -SIMPLE_MATCHER_PASS_DEFINITION(SimplifyShapeOf2Gather, opset2::ShapeOf, simplify_gather_shapeof); -SIMPLE_MATCHER_PASS_DEFINITION(SimplifyShapeOf3Gather, opset3::ShapeOf, simplify_gather_shapeof); - -ngraph::pass::AlgebraicSimplification::AlgebraicSimplification() { - add_matcher(); - add_matcher(); - add_matcher(); -} diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 415ecb11610..e0089d644da 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -6,7 +6,6 @@ #include "transformations/init_node_info.hpp" #include "itt.hpp" -#include "transformations/common_optimizations/algebraic_simplification.hpp" #include "transformations/common_optimizations/broadcast_elementwise_fusion.hpp" #include "transformations/common_optimizations/nop_elimination.hpp" #include "transformations/common_optimizations/common_optimizations.hpp" @@ -21,6 +20,7 @@ #include "transformations/common_optimizations/swish_fusion.hpp" #include "transformations/common_optimizations/normalize_l2_fusion.hpp" #include "transformations/common_optimizations/pull_transpose_through_fq.hpp" +#include "transformations/common_optimizations/leaky_relu_fusion.hpp" #include "transformations/common_optimizations/lin_op_sequence_fusion.hpp" #include "transformations/common_optimizations/remove_filtering_boxes_by_size.hpp" #include "transformations/common_optimizations/hsigmoid_fusion.hpp" @@ -108,7 +108,6 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr(); eliminations->add_matcher(); - eliminations->add_matcher(); // may introduce fake dynamism eliminations->add_matcher(); // may introduce fake dynamism eliminations->set_name("ngraph::pass::CommonEliminations"); @@ -133,6 +132,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptradd_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); + common_fusions->add_matcher(); common_fusions->set_name("ngraph::pass::CommonFusions"); manager.register_pass(); diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/leaky_relu_fusion.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/leaky_relu_fusion.cpp new file mode 100644 index 00000000000..388d2f17104 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/leaky_relu_fusion.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/leaky_relu_fusion.hpp" +#include "transformations/utils/utils.hpp" + +#include +#include + +#include +#include +#include +#include "itt.hpp" + + +NGRAPH_RTTI_DEFINITION(ngraph::pass::LeakyReluFusion, "LeakyReluFusion", 0); + +ngraph::pass::LeakyReluFusion::LeakyReluFusion() { + MATCHER_SCOPE(LeakyReluFusion); + auto data_pattern = ngraph::pattern::any_input(); + auto alpha_pattern = ngraph::pattern::any_input(pattern::has_static_shape()); + auto multiply_pattern = ngraph::pattern::wrap_type({data_pattern, alpha_pattern}, pattern::consumers_count(1)); + auto max_pattern = ngraph::pattern::wrap_type({data_pattern, multiply_pattern}); + + ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto pattern_map = m.get_pattern_value_map(); + auto data = pattern_map.at(data_pattern); + const auto & original_alpha_pattern = pattern_map.at(alpha_pattern); + + if (shape_size(original_alpha_pattern.get_shape()) != 1) + return false; + + auto leaky_relu = register_new_node(data, original_alpha_pattern); + auto maximum = pattern_map.at(max_pattern); + leaky_relu->set_friendly_name(maximum.get_node()->get_friendly_name()); + + copy_runtime_info({ + pattern_map.at(multiply_pattern).get_node_shared_ptr(), + maximum.get_node_shared_ptr() + }, + leaky_relu); + replace_node(maximum.get_node_shared_ptr(), leaky_relu); + + return true; + }; + + auto m = std::make_shared(max_pattern, matcher_name); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/nop_elimination.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/nop_elimination.cpp index db1ea01fd3d..32f139a15ed 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/nop_elimination.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/nop_elimination.cpp @@ -5,11 +5,10 @@ #include "itt.hpp" #include #include -#include -#include -#include +#include #include +#include #include #include #include @@ -18,9 +17,60 @@ using namespace std; using namespace ngraph; -#define TI(x) x::type_info +//`simplify_gather`, optimizes gather if Gather is gathering the +// whole input tensor +static bool simplify_gather(std::shared_ptr node) { + if (auto gather = as_type_ptr(node)) { + // check if we are gathering the whole input + auto data = gather->input_value(0); + auto indices = gather->input_value(1); -NGRAPH_RTTI_DEFINITION(ngraph::pass::NopElimination, "NopElimination", 0); + // we need to know data and indices shape to infer if gather is Nop + if (data.get_partial_shape().is_dynamic() || indices.get_partial_shape().is_dynamic()) { + return false; + } + // if rank of data and gather output dont match, we will skip + if (data.get_shape().size() != node->get_shape().size()) { + return false; + } + + auto axis = gather->get_axis(); + if (axis == opset3::Gather::AXIS_NOT_SET_VALUE) { + NGRAPH_DEBUG << "axis value not set"; + return false; + } + + // case_1 : if the input tensor is of shape (4, 1, 4) + // and axis = 1, then the gather would be simply + // gathering the whole input tensor, so we can optimize this + // op has Nop + + if (data.get_shape()[axis] == 1 && data.get_shape() == node->get_shape()) { + return replace_output_update_name(gather->output(0), gather->input_value(0)); + } + + // case_2 : if the input tensor is of shape (4, 3, 4) + // we need to check the contents of indices, if indices + // is 1D tensor of value {0, 1, 2}, we can optimize this + // op has Nop + + // check if the indices is constant + auto constant_indices = + as_type_ptr(gather->input_value(1).get_node_shared_ptr()); + if (!constant_indices) { + return false; + } else { + // if ref_inidices == indices, we are capturing the + // entire input tensor + std::vector ref_indices(data.get_shape()[axis], 0); + std::iota(ref_indices.begin(), ref_indices.end(), 0); + if (ref_indices == constant_indices->cast_vector()) { + return replace_output_update_name(gather->output(0), gather->input_value(0)); + } + } + } + return false; +} static bool eliminate_nop(const std::shared_ptr& node) { // skip if shapes are dynamic @@ -35,34 +85,6 @@ static bool eliminate_nop(const std::shared_ptr& node) { return false; } -static bool eliminate_convert(const std::shared_ptr& node) { - bool is_out_type_agnostic = false; - static const std::set type_agnostic{TI(opset3::NonZero)}; - if (node->output(0).get_target_inputs().size() == 1) { - Input out = *node->output(0).get_target_inputs().begin(); - is_out_type_agnostic = type_agnostic.count(out.get_node()->get_type_info()) == 1; - } - auto convert = as_type_ptr(node); - auto input = convert->input_value(0); - if (convert->get_convert_element_type() == input.get_element_type() || is_out_type_agnostic) { - if (is_out_type_agnostic && is_type(input.get_node())) { - input = input.get_node()->input_value(0); - } - return replace_output_update_name(node->output(0), input); - } - return false; -} - -static bool eliminate_concat(const std::shared_ptr& node) { - auto node_input = node->input_value(0); - - // remove concat with single input - if (node->get_input_size() == 1) { - return replace_output_update_name(node->output(0), node_input); - } - return false; -} - static bool eliminate_reshape_v1(const std::shared_ptr& node) { auto input = node->input_value(0); // check if reshape is not identity op @@ -336,20 +358,168 @@ NAME() { \ }; \ NGRAPH_RTTI_DEFINITION(NAME, STR(NAME), 0); -SIMPLE_MATCHER_PASS_DEFINITION(EliminatePad, opset3::Pad, eliminate_nop); -SIMPLE_MATCHER_PASS_DEFINITION(EliminateConvert, opset3::Convert, eliminate_convert); SIMPLE_MATCHER_PASS_DEFINITION(EliminateReshape, opset3::Reshape, eliminate_reshape_v1); -SIMPLE_MATCHER_PASS_DEFINITION(EliminateConcat, opset3::Concat, eliminate_concat); SIMPLE_MATCHER_PASS_DEFINITION(EliminateSqueeze, opset3::Squeeze, eliminate_squeeze); SIMPLE_MATCHER_PASS_DEFINITION(EliminateUnsqueeze, opset3::Unsqueeze, eliminate_unsqueeze); SIMPLE_MATCHER_PASS_DEFINITION(EliminateBroadcast, op::v1::Broadcast, eliminate_nop); +SIMPLE_MATCHER_PASS_DEFINITION(EliminateGather, opset3::Gather, simplify_gather); -ngraph::pass::NopElimination::NopElimination() { + +NGRAPH_RTTI_DEFINITION(pass::EliminatePad, "EliminatePad", 0); + +pass::EliminatePad::EliminatePad() { + MATCHER_SCOPE(EliminatePad); + auto pad_node_pattern = pattern::wrap_type(); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto pad = m.get_match_root(); + + auto pad_begin_const = ngraph::get_constant_from_source(pad->input_value(1)); + auto pad_end_const = ngraph::get_constant_from_source(pad->input_value(2)); + + if (!pad_begin_const || !pad_end_const) { + return false; + } + + const auto pad_begin_value = pad_begin_const->cast_vector(); + const auto pad_end_value = pad_end_const->cast_vector(); + + if (std::any_of(pad_begin_value.begin(), pad_begin_value.end(), [](int64_t value) { return value != 0; }) || + std::any_of(pad_end_value.begin(), pad_end_value.end(), [](int64_t value) { return value != 0; })) { + return false; + } + + return replace_output_update_name(pad->output(0), pad->input_value(0)); + }; + + auto m = std::make_shared(pad_node_pattern, matcher_name); + this->register_matcher(m, callback); +} + +NGRAPH_RTTI_DEFINITION(pass::EliminateConvert, "EliminateConvert", 0); + +pass::EliminateConvert::EliminateConvert() { + MATCHER_SCOPE(EliminateConvert); + auto convert_pattern = pattern::wrap_type(); + + matcher_pass_callback callback = [](pattern::Matcher& m) { + auto convert = std::dynamic_pointer_cast(m.get_match_root()); + if (!convert) { + return false; + } + if (convert->get_input_element_type(0) == convert->get_element_type()) { + return replace_output_update_name(convert->output(0), convert->input_value(0)); + } + return false; + }; + + auto m = std::make_shared(convert_pattern, matcher_name); + this->register_matcher(m, callback); +} + +NGRAPH_RTTI_DEFINITION(pass::EliminateConvertNonZero, "EliminateConvertNonZero", 0); + +pass::EliminateConvertNonZero::EliminateConvertNonZero() { + MATCHER_SCOPE(EliminateConvertNonZero); + auto convert_pattern = pattern::wrap_type(pattern::consumers_count(1)); + auto non_zero = pattern::wrap_type({convert_pattern}); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto & pattern_map = m.get_pattern_map(); + auto convert = pattern_map.at(convert_pattern); + // remove convert + convert->output(0).replace(convert->input_value(0)); + // to make this elimination recursive we register NonZero as a node which will be used to repeat matching + register_new_node(m.get_match_root()); + return true; + }; + + auto m = std::make_shared(non_zero, matcher_name); + this->register_matcher(m, callback); +} + +NGRAPH_RTTI_DEFINITION(pass::EliminateConcat, "EliminateConcat", 0); + +pass::EliminateConcat::EliminateConcat() { + MATCHER_SCOPE(EliminateConcat); + auto convert_pattern = pattern::wrap_type(); + + matcher_pass_callback callback = [](pattern::Matcher& m) { + auto concat = m.get_match_root(); + if (concat->inputs().size() == 1) { + return replace_output_update_name(concat->output(0), concat->input_value(0)); + } + return false; + }; + + auto m = std::make_shared(convert_pattern, matcher_name); + this->register_matcher(m, callback); +} + +NGRAPH_RTTI_DEFINITION(pass::EliminateSplit, "EliminateSplit", 0); + +pass::EliminateSplit::EliminateSplit() { + MATCHER_SCOPE(EliminateConcat); + auto convert_pattern = pattern::wrap_type(); + + matcher_pass_callback callback = [](pattern::Matcher& m) { + auto split = std::dynamic_pointer_cast(m.get_match_root()); + if (!split || split->get_num_splits() != 1) { + return false; + } + return replace_output_update_name(split->output(0), split->input_value(0)); + }; + + auto m = std::make_shared(convert_pattern, matcher_name); + this->register_matcher(m, callback); +} + +NGRAPH_RTTI_DEFINITION(pass::EliminateTranspose, "EliminateTranspose", 0); + +pass::EliminateTranspose::EliminateTranspose() { + MATCHER_SCOPE(EliminateTranspose); + auto order = pattern::wrap_type(); + auto transpose_pattern = pattern::wrap_type({pattern::any_input(), order}); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto & pattern_map = m.get_pattern_map(); + auto order_const = std::dynamic_pointer_cast(pattern_map.at(order)); + if (!order_const) { + return false; + } + + const auto & order_values = order_const->cast_vector(); + vector ref_values(order_values.size()); + std::iota(ref_values.begin(), ref_values.end(), 0); + if (order_values != ref_values) { + return false; + } + + auto transpose = m.get_match_root(); + return replace_output_update_name(transpose->output(0), transpose->input_value(0)); + }; + + auto m = std::make_shared(transpose_pattern, matcher_name); + this->register_matcher(m, callback); +} + +NGRAPH_RTTI_DEFINITION(ngraph::pass::NopElimination, "NopElimination", 0); + +ngraph::pass::NopElimination::NopElimination(bool use_shape_for_elimination) { + // shape-agnostic transformations add_matcher(); add_matcher(); - add_matcher(); + add_matcher(); add_matcher(); - add_matcher(); - add_matcher(); - add_matcher(); + add_matcher(); + add_matcher(); + + // shape-dependent transformations + if (use_shape_for_elimination) { + add_matcher(); + add_matcher(); + add_matcher(); + add_matcher(); + add_matcher(); + } } \ No newline at end of file diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/pad_fusion.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/pad_fusion.cpp index b2655f8797c..30ffdf934c8 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/pad_fusion.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/pad_fusion.cpp @@ -386,34 +386,3 @@ pass::PadFusionGroupConvolutionBackpropData::PadFusionGroupConvolutionBackpropDa auto m = std::make_shared(conv_pattern, matcher_name); this->register_matcher(m, callback); } - -NGRAPH_RTTI_DEFINITION(pass::PadElimination, "PadElimination", 0); - -pass::PadElimination::PadElimination() { - MATCHER_SCOPE(PadElimination); - auto pad_node_pattern = pattern::wrap_type(); - - matcher_pass_callback callback = [=](pattern::Matcher& m) { - auto pad = m.get_match_root(); - - auto pad_begin_const = ngraph::get_constant_from_source(pad->input_value(1)); - auto pad_end_const = ngraph::get_constant_from_source(pad->input_value(2)); - - if (!pad_begin_const || !pad_end_const) { - return false; - } - - const auto pad_begin_value = pad_begin_const->cast_vector(); - const auto pad_end_value = pad_end_const->cast_vector(); - - if (std::any_of(pad_begin_value.begin(), pad_begin_value.end(), [](int64_t value) { return value != 0; }) || - std::any_of(pad_end_value.begin(), pad_end_value.end(), [](int64_t value) { return value != 0; })) { - return false; - } - - return replace_output_update_name(pad->output(0), pad->input_value(0)); - }; - - auto m = std::make_shared(pad_node_pattern, matcher_name); - this->register_matcher(m, callback); -} \ No newline at end of file diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp index ec4614241b9..244670d3678 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp @@ -7,6 +7,7 @@ #include "itt.hpp" #include +#include #include #include #include @@ -121,16 +122,85 @@ ngraph::pass::GatherNopElimination::GatherNopElimination() { this->register_matcher(m, callback); } +NGRAPH_RTTI_DEFINITION(ngraph::pass::SimplifyGatherShapeOf, "SimplifyGatherShapeOf", 0); + +ngraph::pass::SimplifyGatherShapeOf::SimplifyGatherShapeOf() { + MATCHER_SCOPE(SimplifyGatherShapeOf); + const auto gather_pattern = ngraph::pattern::wrap_type(); + const auto shape_of_pattern = ngraph::pattern::wrap_type({gather_pattern}); + + ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) { + auto node = m.get_match_root(); + auto gather = as_type_ptr(node->input_value(0).get_node_shared_ptr()); + if (!gather) { + return false; + } + auto gather_in_rank = gather->get_input_partial_shape(0).rank(); + auto indices_rank = gather->get_input_partial_shape(1).rank(); + auto axis = gather->get_axis(); + if (gather_in_rank.is_dynamic() || indices_rank.is_dynamic() || + axis == opset3::Gather::AXIS_NOT_SET_VALUE) { + return false; + } + + auto zero_axis = opset3::Constant::create(element::i64, Shape{}, {0}); + NodeVector new_ops; + auto new_shapeof = std::make_shared(gather->input_value(0), node->get_output_element_type(0)); + new_ops.push_back(new_shapeof); + std::shared_ptr replace_op; + if (indices_rank.get_length() == 0) { + std::vector vi(gather_in_rank.get_length()); + std::iota(vi.begin(), vi.end(), 0); + vi.erase(vi.begin() + axis); + auto new_indices = opset3::Constant::create(element::i64, Shape{vi.size()}, vi); + replace_op = std::make_shared(new_shapeof, new_indices, zero_axis); + new_ops.push_back(replace_op); + } else { + NodeVector concat_inputs; + if (axis > 0) { + std::vector vi(axis); + std::iota(vi.begin(), vi.end(), 0); + auto indices = opset3::Constant::create(element::i64, Shape{vi.size()}, vi); + auto new_gather = std::make_shared(new_shapeof, indices, zero_axis); + new_ops.push_back(new_gather); + concat_inputs.push_back(new_gather); + } + auto shapeof_indices = std::make_shared(gather->input_value(1), node->get_output_element_type(0)); + new_ops.push_back(shapeof_indices); + + concat_inputs.push_back(shapeof_indices); + + if (gather_in_rank.get_length() - 1 > axis) { + std::vector vi(gather_in_rank.get_length() - (axis + 1)); + std::iota(vi.begin(), vi.end(), axis + 1); + auto indices = opset3::Constant::create(element::i64, Shape{vi.size()}, vi); + auto new_gather = std::make_shared(new_shapeof, indices, zero_axis); + new_ops.push_back(new_gather); + concat_inputs.push_back(new_gather); + } + replace_op = std::make_shared(concat_inputs, 0); + new_ops.push_back(replace_op); + } + replace_op->set_friendly_name(node->get_friendly_name()); + copy_runtime_info(node, new_ops); + replace_node(node, replace_op); + return true; + }; + + auto m = std::make_shared(shape_of_pattern, matcher_name); + this->register_matcher(m, callback); +} NGRAPH_RTTI_DEFINITION(ngraph::pass::SimplifyShapeOfSubGraph, "SimplifyShapeOfSubGraph", 0); bool ngraph::pass::SimplifyShapeOfSubGraph::run_on_function(std::shared_ptr f) { - RUN_ON_FUNCTION_SCOPE(GroupedGatherElimination); + RUN_ON_FUNCTION_SCOPE(SimplifyShapeOfSubGraph); ngraph::pass::Manager manager; manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); manager.run_passes(f); return false; } diff --git a/inference-engine/src/transformations/src/transformations/low_precision/disable_convert_constant_folding_on_const_path.cpp b/inference-engine/src/transformations/src/transformations/low_precision/disable_convert_constant_folding_on_const_path.cpp index d5c30e73e4f..44d05860c4f 100644 --- a/inference-engine/src/transformations/src/transformations/low_precision/disable_convert_constant_folding_on_const_path.cpp +++ b/inference-engine/src/transformations/src/transformations/low_precision/disable_convert_constant_folding_on_const_path.cpp @@ -20,7 +20,7 @@ using namespace ngraph; NGRAPH_RTTI_DEFINITION(ngraph::pass::DisableConvertConstantFoldingOnConstPath, "DisableConvertConstantFoldingOnConstPath", 0); ngraph::pass::DisableConvertConstantFoldingOnConstPath::DisableConvertConstantFoldingOnConstPath( - const std::vector& inputPrecisions) { + const element::TypeVector & inputPrecisions) { auto matcherData = ngraph::pattern::any_input(); auto matcherConvert = ngraph::pattern::wrap_type({ matcherData }, pattern::consumers_count(1)); diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp new file mode 100644 index 00000000000..34163fc4860 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp @@ -0,0 +1,66 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" +#include +#include + +#include +#include +#include + +#include +#include + +#include "ngraph_ops/nms_static_shape_ie.hpp" +#include "transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp" + +NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMatrixNmsToMatrixNmsIE, "ConvertMatrixNmsToMatrixNmsIE", 0); + +ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE() { + MATCHER_SCOPE(ConvertMatrixNmsToMatrixNmsIE); + auto nms = ngraph::pattern::wrap_type(); + + ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) { + auto nms = std::dynamic_pointer_cast(m.get_match_root()); + if (!nms) { + return false; + } + + const auto new_args = nms->input_values(); + // vector of new nGraph operations + NodeVector new_ops; + auto attrs = nms->get_attrs(); + attrs.output_type = element::i32; + auto nms_new = std::make_shared>( + new_args.at(0), + new_args.at(1), + attrs); + new_ops.emplace_back(nms_new); + + Output output_0 = nms_new->output(0); + Output output_1 = nms_new->output(1); + Output output_2 = nms_new->output(2); + + if (nms->output(1).get_element_type() != output_1.get_element_type()) { + output_1 = std::make_shared(output_1, nms->output(1).get_element_type()); + output_1.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.1"); + new_ops.emplace_back(output_1.get_node_shared_ptr()); + } + + if (nms->output(2).get_element_type() != output_2.get_element_type()) { + output_2 = std::make_shared(output_2, nms->output(2).get_element_type()); + output_2.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.2"); + new_ops.emplace_back(output_2.get_node_shared_ptr()); + } + + nms_new->set_friendly_name(nms->get_friendly_name()); + ngraph::copy_runtime_info(nms, new_ops); + ngraph::replace_node(nms, {output_0, output_1, output_2}); + return true; + }; + + auto m = std::make_shared(nms, matcher_name); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp new file mode 100644 index 00000000000..1f236610e53 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp @@ -0,0 +1,67 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" +#include +#include + +#include +#include +#include + +#include +#include + +#include "ngraph_ops/nms_static_shape_ie.hpp" +#include "transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp" + +NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE, "ConvertMulticlassNmsToMulticlassNmsIE", 0); + +ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE::ConvertMulticlassNmsToMulticlassNmsIE() { + MATCHER_SCOPE(ConvertMulticlassNmsToMulticlassNmsIE); + auto nms = ngraph::pattern::wrap_type(); + + ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) { + auto nms = std::dynamic_pointer_cast(m.get_match_root()); + if (!nms) { + return false; + } + + const auto new_args = nms->input_values(); + // vector of new nGraph operations + NodeVector new_ops; + auto attrs = nms->get_attrs(); + attrs.output_type = element::i32; + + auto nms_new = std::make_shared>( + new_args.at(0), + new_args.at(1), + attrs); + new_ops.emplace_back(nms_new); + + Output output_0 = nms_new->output(0); + Output output_1 = nms_new->output(1); + Output output_2 = nms_new->output(2); + + if (nms->output(1).get_element_type() != output_1.get_element_type()) { + output_1 = std::make_shared(output_1, nms->output(1).get_element_type()); + output_1.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.1"); + new_ops.emplace_back(output_1.get_node_shared_ptr()); + } + + if (nms->output(2).get_element_type() != output_2.get_element_type()) { + output_2 = std::make_shared(output_2, nms->output(2).get_element_type()); + output_2.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.2"); + new_ops.emplace_back(output_2.get_node_shared_ptr()); + } + + nms_new->set_friendly_name(nms->get_friendly_name()); + ngraph::copy_runtime_info(nms, new_ops); + ngraph::replace_node(nms, {output_0, output_1, output_2}); + return true; + }; + + auto m = std::make_shared(nms, matcher_name); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp index 40ae55c0f90..69e57b69547 100644 --- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp +++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertTensorIteratorToLSTMSequence, "ConvertTensorIteratorToLSTMSequence", 0); diff --git a/inference-engine/src/transformations/src/transformations/rt_info/disable_constant_folding.cpp b/inference-engine/src/transformations/src/transformations/rt_info/disable_constant_folding.cpp new file mode 100644 index 00000000000..791102ed1f4 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/rt_info/disable_constant_folding.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/rt_info/disable_constant_folding.hpp" + +template class ngraph::VariantImpl; + +constexpr ngraph::VariantTypeInfo ngraph::VariantWrapper::type_info; + +void ngraph::disable_constant_folding(const std::shared_ptr& node) { + auto & rt_info = node->get_rt_info(); + rt_info[VariantWrapper::type_info.name] = make_variant({}); +} \ No newline at end of file diff --git a/inference-engine/src/transformations/src/transformations/serialize.cpp b/inference-engine/src/transformations/src/transformations/serialize.cpp index 93f9c24e4b8..0ce92c208ea 100644 --- a/inference-engine/src/transformations/src/transformations/serialize.cpp +++ b/inference-engine/src/transformations/src/transformations/serialize.cpp @@ -642,8 +642,6 @@ bool resolve_dynamic_shapes(const ngraph::Function& f) { [](const Dimension& d) -> Dimension { return d.get_max_length(); }); - NGRAPH_CHECK(PartialShape(out_shape).is_static(), - "Dynamic dimension cannot be resolved in ", op); return out_shape; }; @@ -685,6 +683,7 @@ void ngfunction_2_irv10(pugi::xml_node& netXml, create_layer_ids(f); std::unordered_set unique_names; + // TODO remove resolve_dynamic_shapes function completely when support for -1 will be implemented in the MO bool has_dynamic_shapes = resolve_dynamic_shapes(f); const bool exec_graph = is_exec_graph(f); @@ -711,9 +710,6 @@ void ngfunction_2_irv10(pugi::xml_node& netXml, if (node->get_input_size() > 0) { pugi::xml_node input = layer.append_child("input"); for (const auto & i : node->inputs()) { - NGRAPH_CHECK(i.get_partial_shape().is_static(), - "Unsupported dynamic input shape in ", node); - // WA for LSTMCellv0, peephole input shall not be serialized if (i.get_index() == 6 && dynamic_cast(node)) { port_id++; @@ -724,10 +720,14 @@ void ngfunction_2_irv10(pugi::xml_node& netXml, port.append_attribute("id").set_value(port_id++); port.append_attribute("precision") .set_value(get_precision_name(i.get_element_type()).c_str()); - for (auto d : i.get_shape()) { + for (auto d : i.get_partial_shape()) { pugi::xml_node dim = port.append_child("dim"); - dim.append_child(pugi::xml_node_type::node_pcdata) - .set_value(std::to_string(d).c_str()); + if (d.is_dynamic()) { + dim.append_child(pugi::xml_node_type::node_pcdata).set_value("-1"); + } else { + dim.append_child(pugi::xml_node_type::node_pcdata) + .set_value(std::to_string(d.get_length()).c_str()); + } } } @@ -739,9 +739,6 @@ void ngfunction_2_irv10(pugi::xml_node& netXml, if ((node->get_output_size() > 0) && !ngraph::op::is_output(node)) { pugi::xml_node output = layer.append_child("output"); for (const auto & o : node->outputs()) { - NGRAPH_CHECK(o.get_partial_shape().is_static(), - "Unsupported dynamic output shape in ", node); - pugi::xml_node port = output.append_child("port"); port.append_attribute("id").set_value(port_id++); port.append_attribute("precision") @@ -762,10 +759,14 @@ void ngfunction_2_irv10(pugi::xml_node& netXml, port.append_attribute("names").set_value(names.c_str()); } - for (auto d : o.get_shape()) { + for (auto d : o.get_partial_shape()) { pugi::xml_node dim = port.append_child("dim"); - dim.append_child(pugi::xml_node_type::node_pcdata) - .set_value(std::to_string(d).c_str()); + if (d.is_dynamic()) { + dim.append_child(pugi::xml_node_type::node_pcdata).set_value("-1"); + } else { + dim.append_child(pugi::xml_node_type::node_pcdata) + .set_value(std::to_string(d.get_length()).c_str()); + } } } if (node_type_name == "TensorIterator" || node_type_name == "Loop") { @@ -851,7 +852,7 @@ bool pass::Serialize::run_on_function(std::shared_ptr f) { try { serializeFunc(xml_file, bin_file); - } catch (const ngraph::CheckFailure& e) { + } catch (const ngraph::CheckFailure&) { // optimization decission was made to create .bin file upfront and // write to it directly instead of buffering its content in memory, // hence we need to delete it here in case of failure diff --git a/inference-engine/src/vpu/common/CMakeLists.txt b/inference-engine/src/vpu/common/CMakeLists.txt index 71c727b631a..d8b55be4825 100644 --- a/inference-engine/src/vpu/common/CMakeLists.txt +++ b/inference-engine/src/vpu/common/CMakeLists.txt @@ -15,7 +15,7 @@ function(add_common_target TARGET_NAME STATIC_IE) UNITY ) - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if(CMAKE_COMPILER_IS_GNUCXX) # TODO: enable some day and fix all warnings # target_compile_options(${TARGET_NAME} PRIVATE "-Wall") target_compile_options(${TARGET_NAME} PRIVATE "-Werror=unused-function") diff --git a/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp b/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp index 745613c977e..ada40a74d84 100644 --- a/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp +++ b/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp @@ -11,24 +11,33 @@ namespace vpu { -template class Map> -inline std::vector getKeys(const Map& map) { +template