Merge remote-tracking branch 'upstream/master' into itikhono/ts/fix_performance_issues
This commit is contained in:
commit
20579455b7
@ -10,23 +10,54 @@ See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNo
|
||||
|
||||
|
||||
@sphinxdirective
|
||||
|
||||
.. tab:: System Requirements
|
||||
|
||||
| Full requirement listing is available in:
|
||||
| `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`_
|
||||
| Full requirement listing is available in:
|
||||
| `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`_
|
||||
|
||||
.. tab:: Processor Notes
|
||||
|
||||
Processor graphics are not included in all processors.
|
||||
See `Product Specifications`_ for information about your processor.
|
||||
|
||||
|
||||
.. _Product Specifications: https://ark.intel.com/
|
||||
|
||||
.. tab:: Software
|
||||
|
||||
* `CMake 3.13 or higher, 64-bit <https://cmake.org/download/>`_
|
||||
* GCC 7.5.0 (for Ubuntu 18.04) or GCC 9.3.0 (for Ubuntu 20.04)
|
||||
* `Python 3.7 - 3.10, 64-bit <https://www.python.org/downloads/>`_
|
||||
* GCC:
|
||||
|
||||
.. tab:: Ubuntu 18.04
|
||||
|
||||
* GCC 7.5.0
|
||||
|
||||
.. tab:: Ubuntu 20.04
|
||||
|
||||
* GCC 9.3.0
|
||||
|
||||
.. tab:: RHEL 8
|
||||
|
||||
* GCC 8.4.1
|
||||
|
||||
.. tab:: CENTOS 7
|
||||
|
||||
* GCC 8.3.1
|
||||
Use folloving instructions to install it:
|
||||
Install GCC 8.3.1 via devtoolset-8
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
sudo yum update -y && sudo yum install -y centos-release-scl epel-release
|
||||
sudo yum install -y devtoolset-8 git patchelf
|
||||
|
||||
Enable devtoolset-8 and check current gcc version
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
source /opt/rh/devtoolset-8/enable
|
||||
gcc -v
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
|
@ -125,6 +125,46 @@ elif [ -f /etc/redhat-release ] || grep -q "rhel" /etc/os-release ; then
|
||||
`# samples and tools` \
|
||||
zlib-devel \
|
||||
gflags-devel
|
||||
elif [ -f /etc/os-release ] && grep -q "SUSE" /etc/os-release ; then
|
||||
zypper refresh
|
||||
zypper install -y \
|
||||
file \
|
||||
`# build tools` \
|
||||
cmake \
|
||||
ccache \
|
||||
ninja \
|
||||
scons \
|
||||
gcc \
|
||||
gcc-c++ \
|
||||
make \
|
||||
`# to determine openvino version via git` \
|
||||
git \
|
||||
git-lfs \
|
||||
`# to build and check pip packages` \
|
||||
patchelf \
|
||||
fdupes \
|
||||
`# to build and check rpm packages` \
|
||||
rpm-build \
|
||||
rpmlint \
|
||||
`# check bash scripts for correctness` \
|
||||
ShellCheck \
|
||||
`# main openvino dependencies` \
|
||||
tbb-devel \
|
||||
pugixml-devel \
|
||||
`# GPU plugin dependency` \
|
||||
libva-devel \
|
||||
`# OpenCL for GPU` \
|
||||
ocl-icd-devel \
|
||||
opencl-cpp-headers \
|
||||
opencl-headers \
|
||||
`# python API` \
|
||||
python39-pip \
|
||||
python39-setuptools \
|
||||
python39-devel \
|
||||
`# samples and tools` \
|
||||
zlib-devel \
|
||||
gflags-devel-static \
|
||||
nlohmann_json-devel
|
||||
elif [ -f /etc/os-release ] && grep -q "raspbian" /etc/os-release; then
|
||||
# Raspbian
|
||||
apt update
|
||||
@ -176,8 +216,10 @@ if [ ! "$(printf '%s\n' "$required_cmake_ver" "$current_cmake_ver" | sort -V | h
|
||||
|
||||
if command -v apt-get &> /dev/null; then
|
||||
apt-get install -y --no-install-recommends wget
|
||||
else
|
||||
elif command -v yum &> /dev/null; then
|
||||
yum install -y wget
|
||||
elif command -v zypper &> /dev/null; then
|
||||
zypper in -y wget
|
||||
fi
|
||||
|
||||
cmake_install_bin="cmake-${installed_cmake_ver}-linux-${arch}.sh"
|
||||
|
@ -15,11 +15,6 @@ set(shellcheck_skip_list
|
||||
"${OpenVINO_SOURCE_DIR}/src/bindings/python/thirdparty/pybind11"
|
||||
"${TEMP}")
|
||||
|
||||
if(shellcheck_VERSION VERSION_GREATER_EQUAL 0.7.0)
|
||||
list(APPEND shellcheck_skip_list
|
||||
"${OpenVINO_SOURCE_DIR}/scripts/setupvars/setupvars.sh")
|
||||
endif()
|
||||
|
||||
ie_shellcheck_process(DIRECTORY "${OpenVINO_SOURCE_DIR}"
|
||||
SKIP ${shellcheck_skip_list})
|
||||
|
||||
|
@ -95,6 +95,7 @@ if [ "$os" == "auto" ] ; then
|
||||
case $os in
|
||||
centos7|centos8|rhel8|rhel9.1|\
|
||||
almalinux8.7|amzn2|\
|
||||
opensuse-leap15.3| \
|
||||
fedora34|fedora35|fedora36|fedora37|fedora38|\
|
||||
raspbian9|debian9|ubuntu18.04|\
|
||||
raspbian10|debian10|ubuntu20.04|ubuntu20.10|ubuntu21.04|\
|
||||
@ -216,6 +217,11 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
|
||||
pkgs_dev+=("https://download-ib01.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-devel-2.2.2-9.el9.$arch.rpm")
|
||||
extra_repos+=("https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm")
|
||||
fi
|
||||
elif [ "$os" == "opensuse-leap15.3" ] ; then
|
||||
pkgs_core=(libtbb2 libtbbmalloc2 libpugixml1)
|
||||
pkgs_gpu=()
|
||||
pkgs_python=(python39-base python39 python39-venv python39-pip)
|
||||
pkgs_dev=(cmake pkg-config gcc-c++ gcc gflags-devel-static zlib-devel nlohmann_json-devel make curl sudo)
|
||||
else
|
||||
echo "Internal script error: invalid OS (${os}) after check (package selection)" >&2
|
||||
exit 3
|
||||
@ -280,6 +286,14 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
|
||||
|
||||
yum install "$iopt" "${pkgs[@]}"
|
||||
|
||||
elif [ "$os" == "opensuse-leap15.3" ] ; then
|
||||
|
||||
[ -z "$interactive" ] && iopt="-y"
|
||||
[ -n "$dry" ] && iopt="--dry-run"
|
||||
[ -n "$keepcache" ] && zypper clean --all
|
||||
|
||||
zypper ref && zypper in --auto-agree-with-licenses --no-recommends "$iopt" "${pkgs[@]}"
|
||||
|
||||
else
|
||||
echo "Internal script error: invalid OS (${os}) after check (package installation)" >&2
|
||||
exit 3
|
||||
|
@ -3,7 +3,13 @@
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
SCRIPT_DIR="$( cd "$( dirname "$(realpath "${BASH_SOURCE[0]}")" )" >/dev/null 2>&1 && pwd )"
|
||||
abs_path () {
|
||||
path=$(eval echo "$1")
|
||||
directory=$(dirname "$path")
|
||||
echo "$(cd "$directory" || exit; pwd -P)/$(basename "$path")";
|
||||
}
|
||||
|
||||
SCRIPT_DIR="$( cd "$( dirname "$(abs_path "${BASH_SOURCE[0]}")" )" >/dev/null 2>&1 && pwd )"
|
||||
INSTALLDIR="${SCRIPT_DIR}"
|
||||
export INTEL_OPENVINO_DIR="$INSTALLDIR"
|
||||
|
||||
@ -79,10 +85,12 @@ fi
|
||||
|
||||
# OpenCV environment
|
||||
if [ -f "$INSTALLDIR/opencv/setupvars.sh" ]; then
|
||||
# shellcheck source=/dev/null
|
||||
source "$INSTALLDIR/opencv/setupvars.sh"
|
||||
fi
|
||||
|
||||
if [ -f "$INSTALLDIR/extras/opencv/setupvars.sh" ]; then
|
||||
# shellcheck source=/dev/null
|
||||
source "$INSTALLDIR/extras/opencv/setupvars.sh"
|
||||
fi
|
||||
|
||||
@ -97,23 +105,12 @@ MAX_SUPPORTED_PYTHON_VERSION_MINOR="10"
|
||||
|
||||
check_python_version () {
|
||||
if [ -z "$python_version" ]; then
|
||||
python_version=$(python3 -c 'import sys; print(str(sys.version_info[0])+"."+str(sys.version_info[1]))')
|
||||
fi
|
||||
|
||||
# splitting Python version variable depending on the used shell
|
||||
if [ -n "$ZSH_VERSION" ]; then
|
||||
version_arr=(${(@s:.:)python_version})
|
||||
if [ "${#version_arr[@]}" -ge "2" ]; then
|
||||
# zsh starts indexing from 1
|
||||
python_version_major=${version_arr[1]}
|
||||
python_version_minor=${version_arr[2]}
|
||||
fi
|
||||
python_version_major=$( python3 -c 'import sys; print(str(sys.version_info[0]))' )
|
||||
python_version_minor=$( python3 -c 'import sys; print(str(sys.version_info[1]))' )
|
||||
python_version="$python_version_major.$python_version_minor"
|
||||
else
|
||||
version_arr=(${python_version//./ })
|
||||
if [ "${#version_arr[@]}" -ge "2" ]; then
|
||||
python_version_major=${version_arr[0]}
|
||||
python_version_minor=${version_arr[1]}
|
||||
fi
|
||||
python_version_major=$( python3 -c "import sys; print(str(\"${python_version}\".split('.')[0]))" )
|
||||
python_version_minor=$( python3 -c "import sys; print(str(\"${python_version}\".split('.')[1]))" )
|
||||
fi
|
||||
|
||||
if [ "$PYTHON_VERSION_MAJOR" != "$python_version_major" ] ||
|
||||
|
@ -8,17 +8,26 @@ import os
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import openvino.runtime.opset8 as ov
|
||||
from openvino.runtime import Model
|
||||
import openvino.runtime.opset10 as ops
|
||||
from openvino.runtime import Core, Model
|
||||
from openvino.runtime.passes import Manager, Serialize, ConstantFolding, Version
|
||||
from tests.test_graph.util import count_ops_of_type
|
||||
from openvino.runtime import Core
|
||||
|
||||
from tests.test_utils.test_utils import create_filename_for_test
|
||||
|
||||
def create_model():
|
||||
shape = [100, 100, 2]
|
||||
parameter_a = ops.parameter(shape, dtype=np.float32, name="A")
|
||||
parameter_b = ops.parameter(shape, dtype=np.float32, name="B")
|
||||
parameter_c = ops.parameter(shape, dtype=np.float32, name="C")
|
||||
model = ops.floor(ops.minimum(ops.abs(parameter_a), ops.multiply(parameter_b, parameter_c)))
|
||||
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
|
||||
return func
|
||||
|
||||
|
||||
def test_constant_folding():
|
||||
node_constant = ov.constant(np.array([[0.0, 0.1, -0.1], [-2.5, 2.5, 3.0]], dtype=np.float32))
|
||||
node_ceil = ov.ceiling(node_constant)
|
||||
node_constant = ops.constant(np.array([[0.0, 0.1, -0.1], [-2.5, 2.5, 3.0]], dtype=np.float32))
|
||||
node_ceil = ops.ceiling(node_constant)
|
||||
model = Model(node_ceil, [], "TestFunction")
|
||||
|
||||
assert count_ops_of_type(model, node_ceil) == 1
|
||||
@ -43,9 +52,9 @@ def test_serialize_seperate_paths_kwargs(request, tmp_path):
|
||||
core = Core()
|
||||
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
|
||||
shape = [2, 2]
|
||||
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
|
||||
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
|
||||
parameter_c = ov.parameter(shape, dtype=np.float32, name="C")
|
||||
parameter_a = ops.parameter(shape, dtype=np.float32, name="A")
|
||||
parameter_b = ops.parameter(shape, dtype=np.float32, name="B")
|
||||
parameter_c = ops.parameter(shape, dtype=np.float32, name="C")
|
||||
model = (parameter_a + parameter_b) * parameter_c
|
||||
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
|
||||
|
||||
@ -67,10 +76,10 @@ def test_serialize_seperate_paths_args(request, tmp_path):
|
||||
core = Core()
|
||||
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
|
||||
shape = [2, 2]
|
||||
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
|
||||
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
|
||||
parameter_c = ov.parameter(shape, dtype=np.float32, name="C")
|
||||
parameter_d = ov.parameter(shape, dtype=np.float32, name="D")
|
||||
parameter_a = ops.parameter(shape, dtype=np.float32, name="A")
|
||||
parameter_b = ops.parameter(shape, dtype=np.float32, name="B")
|
||||
parameter_c = ops.parameter(shape, dtype=np.float32, name="C")
|
||||
parameter_d = ops.parameter(shape, dtype=np.float32, name="D")
|
||||
model = ((parameter_a + parameter_b) * parameter_c) / parameter_d
|
||||
func = Model(model, [parameter_a, parameter_b, parameter_c, parameter_d], "Model")
|
||||
|
||||
@ -92,8 +101,8 @@ def test_serialize_pass_mixed_args_kwargs(request, tmp_path):
|
||||
core = Core()
|
||||
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
|
||||
shape = [3, 2]
|
||||
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
|
||||
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
|
||||
parameter_a = ops.parameter(shape, dtype=np.float32, name="A")
|
||||
parameter_b = ops.parameter(shape, dtype=np.float32, name="B")
|
||||
model = parameter_a - parameter_b
|
||||
func = Model(model, [parameter_a, parameter_b], "Model")
|
||||
|
||||
@ -114,20 +123,15 @@ def test_serialize_pass_mixed_args_kwargs(request, tmp_path):
|
||||
def test_serialize_pass_mixed_args_kwargs_v2(request, tmp_path):
|
||||
core = Core()
|
||||
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
|
||||
shape = [100, 100, 2]
|
||||
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
|
||||
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
|
||||
parameter_c = ov.parameter(shape, dtype=np.float32, name="C")
|
||||
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
|
||||
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
|
||||
model = create_model()
|
||||
pass_manager = Manager()
|
||||
pass_manager.register_pass(Serialize(path_to_xml=xml_path, path_to_bin=bin_path))
|
||||
pass_manager.run_passes(func)
|
||||
pass_manager.run_passes(model)
|
||||
|
||||
res_model = core.read_model(model=xml_path, weights=bin_path)
|
||||
|
||||
assert func.get_parameters() == res_model.get_parameters()
|
||||
assert func.get_ordered_ops() == res_model.get_ordered_ops()
|
||||
assert model.get_parameters() == res_model.get_parameters()
|
||||
assert model.get_ordered_ops() == res_model.get_ordered_ops()
|
||||
|
||||
os.remove(xml_path)
|
||||
os.remove(bin_path)
|
||||
@ -146,8 +150,8 @@ def test_serialize_pass_wrong_num_of_args(request, tmp_path):
|
||||
# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
|
||||
def test_serialize_results(request, tmp_path):
|
||||
core = Core()
|
||||
node_constant = ov.constant(np.array([[0.0, 0.1, -0.1], [-2.5, 2.5, 3.0]], dtype=np.float32))
|
||||
node_ceil = ov.ceiling(node_constant)
|
||||
node_constant = ops.constant(np.array([[0.0, 0.1, -0.1], [-2.5, 2.5, 3.0]], dtype=np.float32))
|
||||
node_ceil = ops.ceiling(node_constant)
|
||||
func = Model(node_ceil, [], "Model")
|
||||
|
||||
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
|
||||
@ -165,73 +169,19 @@ def test_serialize_results(request, tmp_path):
|
||||
os.remove(bin_path)
|
||||
|
||||
|
||||
# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
|
||||
def test_serialize_pass_tuple(request, tmp_path):
|
||||
core = Core()
|
||||
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
|
||||
shape = [100, 100, 2]
|
||||
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
|
||||
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
|
||||
parameter_c = ov.parameter(shape, dtype=np.float32, name="C")
|
||||
parameter_d = ov.parameter(shape, dtype=np.float32, name="D")
|
||||
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
|
||||
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
|
||||
pass_manager = Manager()
|
||||
pass_manager.register_pass("Serialize", output_files=(xml_path, bin_path))
|
||||
pass_manager.run_passes(func)
|
||||
|
||||
res_model = core.read_model(model=xml_path, weights=bin_path)
|
||||
|
||||
assert func.get_parameters() == res_model.get_parameters()
|
||||
assert func.get_ordered_ops() == res_model.get_ordered_ops()
|
||||
|
||||
os.remove(xml_path)
|
||||
os.remove(bin_path)
|
||||
|
||||
|
||||
# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
|
||||
def test_default_version(request, tmp_path):
|
||||
core = Core()
|
||||
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
|
||||
shape = [100, 100, 2]
|
||||
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
|
||||
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
|
||||
parameter_c = ov.parameter(shape, dtype=np.float32, name="C")
|
||||
parameter_d = ov.parameter(shape, dtype=np.float32, name="D")
|
||||
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
|
||||
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
|
||||
model = create_model()
|
||||
pass_manager = Manager()
|
||||
pass_manager.register_pass("Serialize", output_files=(xml_path, bin_path))
|
||||
pass_manager.run_passes(func)
|
||||
pass_manager.register_pass(Serialize(xml_path, bin_path))
|
||||
pass_manager.run_passes(model)
|
||||
|
||||
res_model = core.read_model(model=xml_path, weights=bin_path)
|
||||
|
||||
assert func.get_parameters() == res_model.get_parameters()
|
||||
assert func.get_ordered_ops() == res_model.get_ordered_ops()
|
||||
|
||||
os.remove(xml_path)
|
||||
os.remove(bin_path)
|
||||
|
||||
|
||||
# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
|
||||
def test_default_version_IR_V11_tuple(request, tmp_path):
|
||||
core = Core()
|
||||
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
|
||||
shape = [100, 100, 2]
|
||||
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
|
||||
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
|
||||
parameter_c = ov.parameter(shape, dtype=np.float32, name="C")
|
||||
parameter_d = ov.parameter(shape, dtype=np.float32, name="D")
|
||||
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
|
||||
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
|
||||
pass_manager = Manager()
|
||||
pass_manager.register_pass("Serialize", output_files=(xml_path, bin_path), version="IR_V11")
|
||||
pass_manager.run_passes(func)
|
||||
|
||||
res_model = core.read_model(model=xml_path, weights=bin_path)
|
||||
|
||||
assert func.get_parameters() == res_model.get_parameters()
|
||||
assert func.get_ordered_ops() == res_model.get_ordered_ops()
|
||||
assert model.get_parameters() == res_model.get_parameters()
|
||||
assert model.get_ordered_ops() == res_model.get_ordered_ops()
|
||||
|
||||
os.remove(xml_path)
|
||||
os.remove(bin_path)
|
||||
@ -241,21 +191,15 @@ def test_default_version_IR_V11_tuple(request, tmp_path):
|
||||
def test_default_version_IR_V11_seperate_paths(request, tmp_path):
|
||||
core = Core()
|
||||
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
|
||||
shape = [100, 100, 2]
|
||||
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
|
||||
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
|
||||
parameter_c = ov.parameter(shape, dtype=np.float32, name="C")
|
||||
parameter_d = ov.parameter(shape, dtype=np.float32, name="D")
|
||||
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
|
||||
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
|
||||
model = create_model()
|
||||
pass_manager = Manager()
|
||||
pass_manager.register_pass(Serialize(path_to_xml=xml_path, path_to_bin=bin_path, version=Version.IR_V11))
|
||||
pass_manager.run_passes(func)
|
||||
pass_manager.run_passes(model)
|
||||
|
||||
res_model = core.read_model(model=xml_path, weights=bin_path)
|
||||
|
||||
assert func.get_parameters() == res_model.get_parameters()
|
||||
assert func.get_ordered_ops() == res_model.get_ordered_ops()
|
||||
assert model.get_parameters() == res_model.get_parameters()
|
||||
assert model.get_ordered_ops() == res_model.get_ordered_ops()
|
||||
|
||||
os.remove(xml_path)
|
||||
os.remove(bin_path)
|
||||
|
@ -32,14 +32,10 @@ def test_registration_and_pass_name():
|
||||
GraphRewrite().set_name("Anchor")
|
||||
BackwardGraphRewrite().set_name("BackAnchor")
|
||||
|
||||
# Preserve legacy behaviour when registered pass doesn't exist
|
||||
# and in this case we shouldn't throw an exception.
|
||||
manager.register_pass("NotExistingPass")
|
||||
|
||||
|
||||
def test_negative_pass_registration():
|
||||
manager = Manager()
|
||||
expect_exception(lambda: manager.register_pass(PatternReplacement))
|
||||
expect_exception(lambda: manager.register_pass("PatternReplacement", PatternReplacement()))
|
||||
expect_exception(lambda: manager.register_pass("Serialize", Serialize("out.xml", "out.bin")))
|
||||
expect_exception(lambda: manager.register_pass("Serialize", "out.xml", "out.bin", "out.wrong"))
|
||||
expect_exception(lambda: manager.register_pass(Serialize("out.xml", "out.bin", "out.wrong")))
|
||||
|
@ -6,7 +6,7 @@ import os
|
||||
import pytest
|
||||
import numpy as np
|
||||
from openvino.runtime import serialize
|
||||
from openvino.offline_transformations import (
|
||||
from openvino._offline_transformations import (
|
||||
apply_moc_transformations,
|
||||
apply_pot_transformations,
|
||||
apply_low_latency_transformation,
|
||||
|
@ -27,7 +27,7 @@ def einsum_op_exec(input_shapes: list, equation: str, data_type: np.dtype,
|
||||
ng_inputs = []
|
||||
np_inputs = []
|
||||
for i in range(num_inputs):
|
||||
input_i = np.random.random_integers(10, size=input_shapes[i]).astype(data_type)
|
||||
input_i = np.random.randint(1, 10 + 1, size=input_shapes[i]).astype(data_type)
|
||||
np_inputs.append(input_i)
|
||||
ng_inputs.append(ng.parameter(input_i.shape, dtype=data_type))
|
||||
|
||||
|
@ -33,7 +33,7 @@ def test_elu_operator_with_scalar():
|
||||
|
||||
|
||||
def test_fake_quantize():
|
||||
levels = np.float32(4)
|
||||
levels = np.int32(4)
|
||||
|
||||
data_shape = [1, 2, 3, 4]
|
||||
bound_shape = []
|
||||
@ -60,7 +60,7 @@ def test_fake_quantize():
|
||||
def test_depth_to_space():
|
||||
data_shape = [1, 4, 2, 3]
|
||||
mode = "blocks_first"
|
||||
block_size = np.float32(2)
|
||||
block_size = np.int32(2)
|
||||
|
||||
parameter_data = ng.parameter(data_shape, name="Data", dtype=np.float32)
|
||||
|
||||
|
@ -103,6 +103,7 @@ public:
|
||||
bool operator>(const Output& other) const;
|
||||
bool operator<=(const Output& other) const;
|
||||
bool operator>=(const Output& other) const;
|
||||
operator Output<const Node>() const;
|
||||
|
||||
private:
|
||||
std::shared_ptr<Node> m_node;
|
||||
|
@ -116,6 +116,23 @@ public:
|
||||
*/
|
||||
Tensor(const element::Type type, const Shape& shape, void* host_ptr, const Strides& strides = {});
|
||||
|
||||
/**
|
||||
* @brief Constructs Tensor using port from node. Allocate internal host storage using default allocator
|
||||
* @param port port from node
|
||||
* @param allocator allocates memory for internal tensor storage
|
||||
*/
|
||||
Tensor(const ov::Output<const ov::Node>& port, const Allocator& allocator = {});
|
||||
|
||||
/**
|
||||
* @brief Constructs Tensor using port from node. Wraps allocated host memory.
|
||||
* @note Does not perform memory allocation internally
|
||||
* @param port port from node
|
||||
* @param host_ptr Pointer to pre-allocated host memory
|
||||
* @param strides Optional strides parameters in bytes. Strides are supposed to be computed automatically based
|
||||
* on shape and element size
|
||||
*/
|
||||
Tensor(const ov::Output<const ov::Node>& port, void* host_ptr, const Strides& strides = {});
|
||||
|
||||
/**
|
||||
* @brief Constructs region of interest (ROI) tensor form another tensor.
|
||||
* @note Does not perform memory allocation internally
|
||||
@ -143,10 +160,17 @@ public:
|
||||
*/
|
||||
Shape get_shape() const;
|
||||
|
||||
/**
|
||||
* @brief Copy tensor, destination tensor should have the same element type and shape
|
||||
*
|
||||
* @param dst destination tensor
|
||||
*/
|
||||
void copy_to(ov::Tensor& dst) const;
|
||||
|
||||
/**
|
||||
* @brief Reports whether the tensor is continuous or not
|
||||
*
|
||||
* @return true if blob is continuous
|
||||
* @return true if tensor is continuous
|
||||
*/
|
||||
bool is_continuous() const;
|
||||
|
||||
|
@ -42,12 +42,12 @@ void color_convert_nv12(const T* arg_y,
|
||||
size_t stride_y,
|
||||
size_t stride_uv,
|
||||
ov::op::util::ConvertColorNV12Base::ColorConversion color_format) {
|
||||
for (int batch = 0; batch < batch_size; batch++) {
|
||||
for (size_t batch = 0; batch < batch_size; batch++) {
|
||||
T* out = out_ptr + batch * image_w * image_h * 3;
|
||||
auto y_ptr = arg_y + batch * stride_y;
|
||||
auto uv_ptr = arg_uv + batch * stride_uv;
|
||||
for (int h = 0; h < image_h; h++) {
|
||||
for (int w = 0; w < image_w; w++) {
|
||||
for (size_t h = 0; h < image_h; h++) {
|
||||
for (size_t w = 0; w < image_w; w++) {
|
||||
auto y_index = h * image_w + w;
|
||||
auto y_val = static_cast<float>(y_ptr[y_index]);
|
||||
auto uv_index = (h / 2) * image_w + (w / 2) * 2;
|
||||
@ -80,13 +80,13 @@ void color_convert_i420(const T* arg_y,
|
||||
size_t stride_y,
|
||||
size_t stride_uv,
|
||||
ov::op::util::ConvertColorI420Base::ColorConversion color_format) {
|
||||
for (int batch = 0; batch < batch_size; batch++) {
|
||||
for (size_t batch = 0; batch < batch_size; batch++) {
|
||||
T* out = out_ptr + batch * image_w * image_h * 3;
|
||||
auto y_ptr = arg_y + batch * stride_y;
|
||||
auto u_ptr = arg_u + batch * stride_uv;
|
||||
auto v_ptr = arg_v + batch * stride_uv;
|
||||
for (int h = 0; h < image_h; h++) {
|
||||
for (int w = 0; w < image_w; w++) {
|
||||
for (size_t h = 0; h < image_h; h++) {
|
||||
for (size_t w = 0; w < image_w; w++) {
|
||||
auto y_index = h * image_w + w;
|
||||
auto y_val = static_cast<float>(y_ptr[y_index]);
|
||||
auto uv_index = (h / 2) * (image_w / 2) + (w / 2);
|
||||
|
@ -46,15 +46,15 @@ void extend_with_zeros(const Strides& strides,
|
||||
const auto offset_batch = batch * input_size * input_shape[1];
|
||||
for (size_t channel = 0; channel < input_shape[1]; ++channel) {
|
||||
const auto offset_channel = offset_batch + channel * input_size;
|
||||
for (int i_z = 0; i_z < input_3d[0]; ++i_z) {
|
||||
for (size_t i_z = 0; i_z < input_3d[0]; ++i_z) {
|
||||
const auto offset_i_z = i_z * input_3d[2] * input_3d[1];
|
||||
for (int i_y = 0; i_y < input_3d[1]; ++i_y) {
|
||||
for (size_t i_y = 0; i_y < input_3d[1]; ++i_y) {
|
||||
const auto offset_i_y = i_y * input_3d[2];
|
||||
for (int i_x = 0; i_x < input_3d[2]; ++i_x) {
|
||||
for (size_t i_x = 0; i_x < input_3d[2]; ++i_x) {
|
||||
input_zeros.push_back(in[offset_channel + i_x + offset_i_y + offset_i_z]);
|
||||
|
||||
if (i_x < input_3d[2] - 1) {
|
||||
for (int k = 0; k < strides_3d[2] - 1; k++) {
|
||||
for (size_t k = 0; k < strides_3d[2] - 1; k++) {
|
||||
input_zeros.push_back(0);
|
||||
}
|
||||
}
|
||||
|
@ -38,7 +38,7 @@ private:
|
||||
size_t offset;
|
||||
size_t numResults;
|
||||
size_t outTotalSize;
|
||||
size_t numClasses;
|
||||
int numClasses;
|
||||
|
||||
void GetLocPredictions(const dataType* locData, std::vector<LabelBBox>& locations) {
|
||||
locations.resize(numImages);
|
||||
@ -445,7 +445,7 @@ public:
|
||||
offset = _attrs.normalized ? 0 : 1;
|
||||
numPriors = priorsShape[2] / priorSize;
|
||||
priorsBatchSize = priorsShape[0];
|
||||
numClasses = classPredShape[1] / numPriors;
|
||||
numClasses = classPredShape[1] / static_cast<int>(numPriors);
|
||||
numLocClasses = _attrs.share_location ? 1 : numClasses;
|
||||
numResults = outShape[2];
|
||||
outTotalSize = shape_size(outShape);
|
||||
|
@ -109,8 +109,8 @@ void roi_align(const T* feature_maps,
|
||||
T sample_x = x1 + static_cast<T>(x_bin_ind) * bin_width +
|
||||
sample_distance_x * (static_cast<T>(x_sample_ind) + static_cast<T>(0.5f));
|
||||
|
||||
if (sample_x < -1.0 || sample_x > feature_map_width || sample_y < -1.0 ||
|
||||
sample_y > feature_map_height) {
|
||||
if (sample_x < -1.0 || sample_x > static_cast<T>(feature_map_width) || sample_y < -1.0 ||
|
||||
sample_y > static_cast<T>(feature_map_height)) {
|
||||
// For this sample we save 4x point (0,0) with weight 0
|
||||
pooling_points.insert(pooling_points.end(), 4, {0, 0});
|
||||
pooling_weights.insert(pooling_weights.end(), 4, T{0});
|
||||
|
@ -147,6 +147,10 @@ bool Output<Node>::operator>=(const Output& other) const {
|
||||
return !(*this < other);
|
||||
}
|
||||
|
||||
Output<Node>::operator Output<const Node>() const {
|
||||
return Output<const Node>(get_node(), get_index());
|
||||
}
|
||||
|
||||
Output<const Node>::Output(const Node* node, size_t index) : m_index(index) {
|
||||
OPENVINO_ASSERT(node, "Cannot create ov::Output<const ov::Node> from nullptr!");
|
||||
m_node = node->shared_from_this();
|
||||
|
@ -7,6 +7,9 @@
|
||||
#include "blob_factory.hpp" // IE private header
|
||||
#include "ie_ngraph_utils.hpp" // IE private header
|
||||
#include "openvino/core/except.hpp"
|
||||
#include "openvino/core/shape.hpp"
|
||||
#include "openvino/core/strides.hpp"
|
||||
#include "openvino/runtime/remote_tensor.hpp"
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
#include "runtime/blob_allocator.hpp"
|
||||
#include "shape_util.hpp"
|
||||
@ -94,6 +97,17 @@ Tensor::Tensor(const Tensor& owner, const Coordinate& begin, const Coordinate& e
|
||||
}
|
||||
}
|
||||
|
||||
Tensor::Tensor(const ov::Output<const ov::Node>& port, const Allocator& allocator)
|
||||
: Tensor(port.get_element_type(),
|
||||
port.get_partial_shape().is_dynamic() ? ov::Shape{0} : port.get_shape(),
|
||||
allocator) {}
|
||||
|
||||
Tensor::Tensor(const ov::Output<const ov::Node>& port, void* host_ptr, const Strides& byte_strides)
|
||||
: Tensor(port.get_element_type(),
|
||||
port.get_partial_shape().is_dynamic() ? ov::Shape{0} : port.get_shape(),
|
||||
host_ptr,
|
||||
byte_strides) {}
|
||||
|
||||
element::Type Tensor::get_element_type() const {
|
||||
OV_TENSOR_STATEMENT(return ie::details::convertPrecision(_impl->getTensorDesc().getPrecision()));
|
||||
}
|
||||
@ -113,6 +127,128 @@ Shape Tensor::get_shape() const {
|
||||
OV_TENSOR_STATEMENT({ return _impl->getTensorDesc().getBlockingDesc().getBlockDims(); });
|
||||
}
|
||||
|
||||
void Tensor::copy_to(ov::Tensor& dst) const {
|
||||
OV_TENSOR_STATEMENT({
|
||||
OPENVINO_ASSERT(dst, "Destination tensor was not initialized.");
|
||||
OPENVINO_ASSERT(!is<ov::RemoteTensor>(), "Default copy to doesn't support copy from remote tensor.");
|
||||
OPENVINO_ASSERT(!dst.is<ov::RemoteTensor>(), "Default copy to doesn't support copy to remote tensor.");
|
||||
OPENVINO_ASSERT(dst.get_element_type() == get_element_type(),
|
||||
"Tensor element types are not equal. (src: ",
|
||||
get_element_type(),
|
||||
" != dst: ",
|
||||
dst.get_element_type(),
|
||||
")");
|
||||
if (dst.get_shape() == ov::Shape{0})
|
||||
dst.set_shape(get_shape());
|
||||
OPENVINO_ASSERT(dst.get_shape() == get_shape(),
|
||||
"Tensor shapes are not equal. (src: ",
|
||||
get_shape(),
|
||||
" != dst: ",
|
||||
dst.get_shape(),
|
||||
")");
|
||||
const auto& shape = get_shape();
|
||||
auto* src_data = static_cast<const uint8_t*>(data());
|
||||
auto* dst_data = static_cast<uint8_t*>(dst.data());
|
||||
ov::Strides src_strides{get_byte_size()};
|
||||
ov::Strides dst_strides{dst.get_byte_size()};
|
||||
ov::Shape cur_pos{0};
|
||||
ov::Shape max_pos{1};
|
||||
|
||||
if (get_element_type().bitwidth() < 8 || (get_strides() == dst.get_strides() && is_continuous())) {
|
||||
// OpenVINO doesn't support strides for LP types
|
||||
// or both tensors have default strides
|
||||
// Strides and positions already initialized
|
||||
} else {
|
||||
// Tensors have default strides
|
||||
const auto& type = get_element_type();
|
||||
std::vector<size_t> strides(shape.size());
|
||||
if (!shape.empty()) {
|
||||
strides[shape.size() - 1] = 1;
|
||||
}
|
||||
auto size = shape.size();
|
||||
for (size_t i = 1; i < size; i++) {
|
||||
strides[size - i - 1] = strides[size - i] * shape[size - i];
|
||||
}
|
||||
|
||||
ov::Strides default_strides(strides.size());
|
||||
for (size_t i = 0; i < strides.size(); ++i)
|
||||
default_strides[i] = strides[i] * type.size();
|
||||
|
||||
src_strides = get_strides();
|
||||
dst_strides = dst.get_strides();
|
||||
|
||||
ov::Strides src_str, dst_str;
|
||||
|
||||
// Calculate src and dst shapes
|
||||
bool found_step = false;
|
||||
for (size_t i = 0; i < shape.size(); i++) {
|
||||
size_t inverted_idx = shape.size() - i - 1;
|
||||
if (!found_step) {
|
||||
if (default_strides[inverted_idx] == src_strides[inverted_idx] &&
|
||||
src_strides[inverted_idx] == dst_strides[inverted_idx]) {
|
||||
continue;
|
||||
} else {
|
||||
found_step = true;
|
||||
size_t strides_size = inverted_idx + 1;
|
||||
// Set right size
|
||||
src_str.resize(strides_size + 1);
|
||||
dst_str.resize(strides_size + 1);
|
||||
max_pos.resize(strides_size + 1);
|
||||
cur_pos.resize(strides_size + 1);
|
||||
// In case of default continuous strides we can copy several elements
|
||||
// In other case only one element
|
||||
size_t dim = 1;
|
||||
size_t strides = type.size();
|
||||
|
||||
if (strides_size < default_strides.size()) {
|
||||
strides = default_strides[strides_size];
|
||||
dim = get_shape()[strides_size];
|
||||
}
|
||||
src_str[strides_size] = strides;
|
||||
dst_str[strides_size] = strides;
|
||||
max_pos[strides_size] = dim;
|
||||
cur_pos[strides_size] = 0;
|
||||
}
|
||||
}
|
||||
src_str[inverted_idx] = src_strides[inverted_idx];
|
||||
dst_str[inverted_idx] = dst_strides[inverted_idx];
|
||||
max_pos[inverted_idx] = shape[inverted_idx];
|
||||
cur_pos[inverted_idx] = 0;
|
||||
}
|
||||
src_strides = src_str;
|
||||
dst_strides = dst_str;
|
||||
}
|
||||
|
||||
const auto update_index = [](const ov::Shape& pos, const ov::Shape& shape, const ov::Strides& strides) {
|
||||
size_t offset = 0;
|
||||
|
||||
for (size_t i = 0; i < pos.size(); i++) {
|
||||
offset += pos[i] * strides[i];
|
||||
}
|
||||
return offset;
|
||||
};
|
||||
|
||||
bool finish = false;
|
||||
for (size_t dst_idx = 0, src_idx = 0; !finish;) {
|
||||
memcpy(dst_data + dst_idx, src_data + src_idx, src_strides[src_strides.size() - 1]);
|
||||
// update indexes
|
||||
for (size_t i = 0; i < cur_pos.size(); i++) {
|
||||
size_t inverted_idx = cur_pos.size() - i - 1;
|
||||
cur_pos[inverted_idx]++;
|
||||
if (cur_pos[inverted_idx] != max_pos[inverted_idx]) {
|
||||
break;
|
||||
}
|
||||
if (inverted_idx)
|
||||
cur_pos[inverted_idx] = 0;
|
||||
else
|
||||
finish = true;
|
||||
}
|
||||
src_idx = update_index(cur_pos, max_pos, src_strides);
|
||||
dst_idx = update_index(cur_pos, max_pos, dst_strides);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Strides Tensor::get_strides() const {
|
||||
OPENVINO_ASSERT(get_element_type().bitwidth() >= 8,
|
||||
"Could not get strides for types with bitwidths less then 8 bit. Tensor type: ",
|
||||
@ -174,24 +310,26 @@ Tensor::operator bool() const noexcept {
|
||||
}
|
||||
|
||||
bool Tensor::is_continuous() const {
|
||||
if (get_element_type().bitwidth() < 8)
|
||||
// OpenVINO doesn't support strides for lp types
|
||||
return true;
|
||||
const auto& shape = get_shape();
|
||||
const auto& type = get_element_type();
|
||||
std::vector<size_t> strides(shape.size());
|
||||
if (!shape.empty()) {
|
||||
strides[shape.size() - 1] = 1;
|
||||
}
|
||||
auto size = shape.size();
|
||||
for (size_t i = 1; i < size; i++) {
|
||||
strides[size - i - 1] = strides[size - i] * shape[size - i];
|
||||
}
|
||||
OV_TENSOR_STATEMENT({
|
||||
if (get_element_type().bitwidth() < 8)
|
||||
// OpenVINO doesn't support strides for lp types
|
||||
return true;
|
||||
const auto& shape = get_shape();
|
||||
const auto& type = get_element_type();
|
||||
std::vector<size_t> strides(shape.size());
|
||||
if (!shape.empty()) {
|
||||
strides[shape.size() - 1] = 1;
|
||||
}
|
||||
auto size = shape.size();
|
||||
for (size_t i = 1; i < size; i++) {
|
||||
strides[size - i - 1] = strides[size - i] * shape[size - i];
|
||||
}
|
||||
|
||||
ov::Strides byte_strides(strides.size());
|
||||
for (size_t i = 0; i < strides.size(); ++i)
|
||||
byte_strides[i] = strides[i] * type.size();
|
||||
return byte_strides == get_strides();
|
||||
ov::Strides byte_strides(strides.size());
|
||||
for (size_t i = 0; i < strides.size(); ++i)
|
||||
byte_strides[i] = strides[i] * type.size();
|
||||
return byte_strides == get_strides();
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace ov
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include <gmock/gmock-spec-builders.h>
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest-param-test.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <cstdint>
|
||||
@ -13,7 +14,11 @@
|
||||
|
||||
#include "ngraph/coordinate_transform.hpp"
|
||||
#include "openvino/core/except.hpp"
|
||||
#include "openvino/core/partial_shape.hpp"
|
||||
#include "openvino/core/type/element_type_traits.hpp"
|
||||
#include "openvino/op/parameter.hpp"
|
||||
#include "openvino/runtime/allocator.hpp"
|
||||
#include "openvino/runtime/remote_tensor.hpp"
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
|
||||
using OVTensorTest = ::testing::Test;
|
||||
@ -40,6 +45,26 @@ TEST_F(OVTensorTest, canCreateTensor) {
|
||||
ASSERT_THROW(t.data<std::int32_t>(), ov::Exception);
|
||||
}
|
||||
|
||||
TEST_F(OVTensorTest, createTensorFromPort) {
|
||||
auto parameter1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f64, ov::Shape{1, 3, 2, 2});
|
||||
auto parameter2 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::Shape{1, 3});
|
||||
auto parameter3 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape::dynamic());
|
||||
float data[] = {5.f, 6.f, 7.f};
|
||||
ov::Tensor t1{parameter1->output(0)};
|
||||
ov::Tensor t2{parameter2->output(0), data};
|
||||
ov::Tensor t3{parameter3->output(0)};
|
||||
ov::Tensor t4{parameter3->output(0), data};
|
||||
|
||||
EXPECT_EQ(t1.get_shape(), parameter1->get_shape());
|
||||
EXPECT_EQ(t1.get_element_type(), parameter1->get_element_type());
|
||||
EXPECT_EQ(t2.get_shape(), parameter2->get_shape());
|
||||
EXPECT_EQ(t2.get_element_type(), parameter2->get_element_type());
|
||||
EXPECT_EQ(t3.get_shape(), ov::Shape{0});
|
||||
EXPECT_EQ(t3.get_element_type(), parameter3->get_element_type());
|
||||
EXPECT_EQ(t4.get_shape(), ov::Shape{0});
|
||||
EXPECT_EQ(t4.get_element_type(), parameter3->get_element_type());
|
||||
}
|
||||
|
||||
TEST_F(OVTensorTest, canAccessF16Tensor) {
|
||||
ov::Shape shape = {4, 3, 2};
|
||||
ov::Tensor t{ov::element::f16, shape};
|
||||
@ -281,3 +306,201 @@ TEST_F(OVTensorTest, readRangeRoiBlob) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct TestParams {
|
||||
ov::Shape src_shape;
|
||||
ov::Strides src_strides;
|
||||
ov::Shape dst_shape;
|
||||
ov::Strides dst_strides;
|
||||
};
|
||||
|
||||
struct OVTensorTestCopy : ::testing::TestWithParam<std::tuple<ov::element::Type, TestParams>> {};
|
||||
|
||||
namespace {
|
||||
template <class T>
|
||||
std::vector<T> fill_data(const ov::Tensor& tensor) {
|
||||
std::vector<T> actual;
|
||||
const T* data = tensor.data<T>();
|
||||
auto strides = tensor.get_strides();
|
||||
for (auto&& c : ngraph::CoordinateTransformBasic{tensor.get_shape()}) {
|
||||
actual.emplace_back(
|
||||
*(data + (c[2] * strides[2] + c[1] * strides[1] + c[0] * strides[0]) / tensor.get_element_type().size()));
|
||||
}
|
||||
return actual;
|
||||
};
|
||||
template <class T>
|
||||
void compare_data(const ov::Tensor& src, const ov::Tensor& dst) {
|
||||
auto source_vec = fill_data<T>(src);
|
||||
auto dest_vec = fill_data<T>(dst);
|
||||
|
||||
ASSERT_EQ(source_vec.size(), dest_vec.size());
|
||||
|
||||
for (size_t i = 0; i < source_vec.size(); i++) {
|
||||
EXPECT_EQ(source_vec[i], dest_vec[i]);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
void init_tensor(const ov::Tensor& tensor, bool input) {
|
||||
const auto origPtr = tensor.data<T>();
|
||||
ASSERT_NE(nullptr, origPtr);
|
||||
for (size_t i = 0; i < tensor.get_size(); ++i) {
|
||||
origPtr[i] = static_cast<T>(input ? i : -1);
|
||||
}
|
||||
}
|
||||
|
||||
void init_tensor(const ov::Tensor& tensor, bool input) {
|
||||
switch (tensor.get_element_type()) {
|
||||
case ov::element::bf16:
|
||||
init_tensor<ov::element_type_traits<ov::element::bf16>::value_type>(tensor, input);
|
||||
break;
|
||||
case ov::element::f16:
|
||||
init_tensor<ov::element_type_traits<ov::element::f16>::value_type>(tensor, input);
|
||||
break;
|
||||
case ov::element::f32:
|
||||
init_tensor<ov::element_type_traits<ov::element::f32>::value_type>(tensor, input);
|
||||
break;
|
||||
case ov::element::f64:
|
||||
init_tensor<ov::element_type_traits<ov::element::f64>::value_type>(tensor, input);
|
||||
break;
|
||||
case ov::element::i8:
|
||||
init_tensor<ov::element_type_traits<ov::element::i8>::value_type>(tensor, input);
|
||||
break;
|
||||
case ov::element::i16:
|
||||
init_tensor<ov::element_type_traits<ov::element::i16>::value_type>(tensor, input);
|
||||
break;
|
||||
case ov::element::i32:
|
||||
init_tensor<ov::element_type_traits<ov::element::i32>::value_type>(tensor, input);
|
||||
break;
|
||||
case ov::element::i64:
|
||||
init_tensor<ov::element_type_traits<ov::element::i64>::value_type>(tensor, input);
|
||||
break;
|
||||
case ov::element::u8:
|
||||
init_tensor<ov::element_type_traits<ov::element::u8>::value_type>(tensor, input);
|
||||
break;
|
||||
case ov::element::u16:
|
||||
init_tensor<ov::element_type_traits<ov::element::u16>::value_type>(tensor, input);
|
||||
break;
|
||||
case ov::element::u32:
|
||||
init_tensor<ov::element_type_traits<ov::element::u32>::value_type>(tensor, input);
|
||||
break;
|
||||
case ov::element::u64:
|
||||
init_tensor<ov::element_type_traits<ov::element::u64>::value_type>(tensor, input);
|
||||
break;
|
||||
default:
|
||||
OPENVINO_UNREACHABLE("Unsupported data type");
|
||||
}
|
||||
}
|
||||
|
||||
void compare_tensors(const ov::Tensor& src, const ov::Tensor& dst) {
|
||||
ASSERT_EQ(src.get_byte_size(), dst.get_byte_size());
|
||||
ASSERT_EQ(src.get_shape(), dst.get_shape());
|
||||
ASSERT_EQ(src.get_element_type(), dst.get_element_type());
|
||||
switch (src.get_element_type()) {
|
||||
case ov::element::bf16:
|
||||
compare_data<ov::element_type_traits<ov::element::bf16>::value_type>(src, dst);
|
||||
break;
|
||||
case ov::element::f16:
|
||||
compare_data<ov::element_type_traits<ov::element::f16>::value_type>(src, dst);
|
||||
break;
|
||||
case ov::element::f32:
|
||||
compare_data<ov::element_type_traits<ov::element::f32>::value_type>(src, dst);
|
||||
break;
|
||||
case ov::element::f64:
|
||||
compare_data<ov::element_type_traits<ov::element::f64>::value_type>(src, dst);
|
||||
break;
|
||||
case ov::element::i8:
|
||||
compare_data<ov::element_type_traits<ov::element::i8>::value_type>(src, dst);
|
||||
break;
|
||||
case ov::element::i16:
|
||||
compare_data<ov::element_type_traits<ov::element::i16>::value_type>(src, dst);
|
||||
break;
|
||||
case ov::element::i32:
|
||||
compare_data<ov::element_type_traits<ov::element::i32>::value_type>(src, dst);
|
||||
break;
|
||||
case ov::element::i64:
|
||||
compare_data<ov::element_type_traits<ov::element::i64>::value_type>(src, dst);
|
||||
break;
|
||||
case ov::element::u8:
|
||||
compare_data<ov::element_type_traits<ov::element::u8>::value_type>(src, dst);
|
||||
break;
|
||||
case ov::element::u16:
|
||||
compare_data<ov::element_type_traits<ov::element::u16>::value_type>(src, dst);
|
||||
break;
|
||||
case ov::element::u32:
|
||||
compare_data<ov::element_type_traits<ov::element::u32>::value_type>(src, dst);
|
||||
break;
|
||||
case ov::element::u64:
|
||||
compare_data<ov::element_type_traits<ov::element::u64>::value_type>(src, dst);
|
||||
break;
|
||||
default:
|
||||
OPENVINO_UNREACHABLE("Unsupported data type");
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST_P(OVTensorTestCopy, copy_to) {
|
||||
ov::element::Type type;
|
||||
TestParams p;
|
||||
std::tie(type, p) = GetParam();
|
||||
// Source tensors
|
||||
ov::Tensor full_src_tensor;
|
||||
ov::Tensor src_tensor;
|
||||
if (!p.src_strides.empty()) {
|
||||
full_src_tensor = ov::Tensor(type, ov::Shape{p.src_shape[0] * p.src_strides[0]});
|
||||
src_tensor = ov::Tensor(type, p.src_shape, full_src_tensor.data(), p.src_strides);
|
||||
} else {
|
||||
src_tensor = full_src_tensor = ov::Tensor(type, p.src_shape);
|
||||
}
|
||||
init_tensor(full_src_tensor, true);
|
||||
|
||||
ov::Tensor full_dst_tensor;
|
||||
ov::Tensor dst_tensor;
|
||||
if (!p.dst_strides.empty()) {
|
||||
full_dst_tensor = ov::Tensor(type, ov::Shape{p.dst_shape[0] * p.dst_strides[0]});
|
||||
dst_tensor = ov::Tensor(type, p.dst_shape, full_dst_tensor.data(), p.dst_strides);
|
||||
} else {
|
||||
dst_tensor = full_dst_tensor = ov::Tensor(type, p.dst_shape);
|
||||
}
|
||||
init_tensor(full_src_tensor, false);
|
||||
|
||||
src_tensor.copy_to(dst_tensor);
|
||||
compare_tensors(src_tensor, dst_tensor);
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
INSTANTIATE_TEST_SUITE_P(copy_tests,
|
||||
OVTensorTestCopy,
|
||||
::testing::Combine(::testing::Values(
|
||||
ov::element::bf16,
|
||||
ov::element::f16,
|
||||
ov::element::f32,
|
||||
ov::element::f64,
|
||||
ov::element::i8,
|
||||
ov::element::i16,
|
||||
ov::element::i32,
|
||||
ov::element::i64,
|
||||
ov::element::u8,
|
||||
ov::element::u16,
|
||||
ov::element::u32,
|
||||
ov::element::u64
|
||||
),
|
||||
::testing::Values(
|
||||
TestParams {
|
||||
ov::Shape{1, 3, 4, 8}, {},
|
||||
{0}, {}
|
||||
},
|
||||
TestParams {
|
||||
ov::Shape{3, 2, 2}, {},
|
||||
ov::Shape{3, 2, 2}, ov::Strides{128, 24, 8}
|
||||
},
|
||||
TestParams {
|
||||
ov::Shape{3, 2, 2}, ov::Strides{64, 16, 8},
|
||||
ov::Shape{3, 2, 2}, ov::Strides{}
|
||||
},
|
||||
TestParams {
|
||||
ov::Shape{3, 2, 2}, ov::Strides{64, 16, 8},
|
||||
ov::Shape{3, 2, 2}, ov::Strides{128, 24, 8}
|
||||
}
|
||||
)));
|
||||
// clang-format on
|
||||
|
@ -14,7 +14,7 @@
|
||||
|
||||
namespace ov {
|
||||
// Forward declaration
|
||||
void FRONTEND_API shutdown();
|
||||
FRONTEND_API void shutdown();
|
||||
namespace frontend {
|
||||
// -------------- FrontEndManager -----------------
|
||||
using FrontEndFactory = std::function<FrontEnd::Ptr()>;
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "transforms/aten_cat_replacer.hpp"
|
||||
#include "transforms/aten_getitem_replacer.hpp"
|
||||
#include "transforms/aten_stack_list_construct_replacer.hpp"
|
||||
#include "transforms/einsum_list_construct.hpp"
|
||||
#include "transforms/listconstruct_replacer.hpp"
|
||||
#include "transforms/min_max_prim_list_construct_replacer.hpp"
|
||||
#include "transforms/prim_list_construct_pad.hpp"
|
||||
@ -97,6 +98,7 @@ void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
|
||||
manager.register_pass<ov::frontend::pytorch::pass::AtenGetItemReplacer>();
|
||||
manager.register_pass<ov::frontend::pytorch::pass::ListConstructReplacer>();
|
||||
manager.register_pass<ov::frontend::pytorch::pass::PrimListConstructPadReplacer>();
|
||||
manager.register_pass<ov::frontend::pytorch::pass::AtenEinsumListConstructReplacer>();
|
||||
manager.register_pass<ov::frontend::pytorch::pass::MinMaxPrimListConstructReplacer>();
|
||||
manager.register_pass<ov::frontend::pytorch::pass::DecomposeListTupleResults>();
|
||||
manager.register_pass<ov::pass::RemoveMultiSubGraphOpDanglingParams>();
|
||||
|
@ -142,6 +142,11 @@ ngraph::Shape NodeContext::const_input<ngraph::Shape>(size_t index) const {
|
||||
return get_constant_at_input(*this, index)->cast_vector<ngraph::Shape::value_type>();
|
||||
}
|
||||
|
||||
template <>
|
||||
int32_t NodeContext::const_input<int32_t>(size_t index) const {
|
||||
return get_constant_at_input(*this, index)->cast_vector<int32_t>()[0];
|
||||
}
|
||||
|
||||
template <>
|
||||
int64_t NodeContext::const_input<int64_t>(size_t index) const {
|
||||
return get_constant_at_input(*this, index)->cast_vector<int64_t>()[0];
|
||||
|
68
src/frontends/pytorch/src/op/roi_align.cpp
Normal file
68
src/frontends/pytorch/src/op/roi_align.cpp
Normal file
@ -0,0 +1,68 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/op/roi_align.hpp"
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/convert.hpp"
|
||||
#include "openvino/op/convert_like.hpp"
|
||||
#include "openvino/op/gather.hpp"
|
||||
#include "openvino/op/reshape.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_roi_align(NodeContext& context) {
|
||||
num_inputs_check(context, 7, 7);
|
||||
auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1}));
|
||||
auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1}));
|
||||
auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0}));
|
||||
auto const_rois_indices = context.mark_node(v0::Constant::create(element::i32, Shape{4}, {1, 2, 3, 4}));
|
||||
|
||||
auto input = context.get_input(0);
|
||||
auto boxes_input = context.get_input(1);
|
||||
|
||||
auto input_real_type = context.mark_node(std::make_shared<v0::Convert>(input, element::f32));
|
||||
auto boxes = context.mark_node(std::make_shared<v1::ConvertLike>(boxes_input, input_real_type));
|
||||
|
||||
auto spatial_scale = context.const_input<float>(2);
|
||||
int output_size_h = context.const_input<int32_t>(3);
|
||||
int output_size_w = context.const_input<int32_t>(4);
|
||||
int sampling_ratio = context.const_input<int32_t>(5);
|
||||
|
||||
auto aligned = context.const_input<bool>(6);
|
||||
|
||||
auto rois = context.mark_node(std::make_shared<v8::Gather>(boxes, const_rois_indices, const_1));
|
||||
|
||||
auto batch_indices_gather = context.mark_node(std::make_shared<v8::Gather>(boxes, const_0, const_1));
|
||||
auto batch_indices_reshape =
|
||||
context.mark_node(std::make_shared<v1::Reshape>(batch_indices_gather, const_neg_1, false));
|
||||
auto batch_indices = context.mark_node(std::make_shared<v0::Convert>(batch_indices_reshape, element::i32));
|
||||
|
||||
v9::ROIAlign::AlignedMode aligned_mode =
|
||||
aligned ? v9::ROIAlign::AlignedMode::HALF_PIXEL_FOR_NN : v9::ROIAlign::AlignedMode::ASYMMETRIC;
|
||||
|
||||
auto roi_align = context.mark_node(std::make_shared<v9::ROIAlign>(input_real_type,
|
||||
rois,
|
||||
batch_indices,
|
||||
output_size_h,
|
||||
output_size_w,
|
||||
sampling_ratio,
|
||||
spatial_scale,
|
||||
v9::ROIAlign::PoolingMode::AVG,
|
||||
aligned_mode));
|
||||
|
||||
return {roi_align};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace pytorch
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
@ -16,10 +16,12 @@ namespace op {
|
||||
using namespace ov::op;
|
||||
|
||||
namespace {
|
||||
OutputVector base_translate_upsample2d(const NodeContext& context, v4::Interpolate::InterpolateMode interpolate_mode) {
|
||||
num_inputs_check(context, 3, 4);
|
||||
OutputVector base_translate_upsample(const NodeContext& context,
|
||||
v4::Interpolate::InterpolateMode interpolate_mode,
|
||||
size_t dims) {
|
||||
num_inputs_check(context, 1, 4);
|
||||
auto data = context.get_input(0);
|
||||
std::vector<size_t> pad{0};
|
||||
std::vector<size_t> pad(dims, 0);
|
||||
auto size_mode = v4::Interpolate::ShapeCalcMode::SIZES;
|
||||
bool align_corners = false;
|
||||
int scale_id = 2;
|
||||
@ -29,11 +31,21 @@ OutputVector base_translate_upsample2d(const NodeContext& context, v4::Interpola
|
||||
align_corners = context.const_input<bool>(2);
|
||||
}
|
||||
}
|
||||
auto target_axes = std::make_shared<v0::Constant>(element::i32, Shape{2}, std::vector<int>({2, 3}));
|
||||
std::vector<int> spatial_axes;
|
||||
if (dims == 1) {
|
||||
spatial_axes = {2};
|
||||
} else if (dims == 2) {
|
||||
spatial_axes = {2, 3};
|
||||
} else if (dims == 3) {
|
||||
spatial_axes = {2, 3, 4};
|
||||
} else {
|
||||
FRONT_END_OP_CONVERSION_CHECK(false, "Unsupported number of dimensions in upsample");
|
||||
}
|
||||
auto target_axes = std::make_shared<v0::Constant>(element::i32, Shape{spatial_axes.size()}, spatial_axes);
|
||||
auto scales =
|
||||
context.mark_node(std::make_shared<v0::Constant>(element::f32, Shape{2}, std::vector<double>({1, 1})));
|
||||
context.mark_node(std::make_shared<v0::Constant>(element::f32, Shape{dims}, std::vector<double>(dims, 1)));
|
||||
auto output_sizes =
|
||||
context.mark_node(std::make_shared<v0::Constant>(element::i32, Shape{2}, std::vector<int>({1, 1})));
|
||||
context.mark_node(std::make_shared<v0::Constant>(element::i32, Shape{dims}, std::vector<int>(dims, 1)));
|
||||
if (context.input_is_none(1)) {
|
||||
FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(scale_id), "Scale or Output size should be provided");
|
||||
auto spatial_scales = context.get_input(scale_id);
|
||||
@ -48,6 +60,7 @@ OutputVector base_translate_upsample2d(const NodeContext& context, v4::Interpola
|
||||
attrs.coordinate_transformation_mode = v4::Interpolate::CoordinateTransformMode::ASYMMETRIC;
|
||||
attrs.nearest_mode = v4::Interpolate::NearestMode::FLOOR;
|
||||
if (attrs.mode != v4::Interpolate::InterpolateMode::NEAREST) {
|
||||
attrs.coordinate_transformation_mode = v4::Interpolate::CoordinateTransformMode::PYTORCH_HALF_PIXEL;
|
||||
if (align_corners) {
|
||||
attrs.coordinate_transformation_mode = v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS;
|
||||
}
|
||||
@ -56,16 +69,33 @@ OutputVector base_translate_upsample2d(const NodeContext& context, v4::Interpola
|
||||
};
|
||||
} // namespace
|
||||
|
||||
OutputVector translate_upsample_linear1d(NodeContext& context) {
|
||||
return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 1);
|
||||
};
|
||||
|
||||
OutputVector translate_upsample_bilinear2d(NodeContext& context) {
|
||||
return base_translate_upsample2d(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX);
|
||||
return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 2);
|
||||
};
|
||||
|
||||
OutputVector translate_upsample_trilinear3d(NodeContext& context) {
|
||||
return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 3);
|
||||
};
|
||||
|
||||
OutputVector translate_upsample_nearest1d(NodeContext& context) {
|
||||
return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 1);
|
||||
};
|
||||
|
||||
OutputVector translate_upsample_nearest2d(NodeContext& context) {
|
||||
return base_translate_upsample2d(context, v4::Interpolate::InterpolateMode::NEAREST);
|
||||
return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 2);
|
||||
};
|
||||
|
||||
OutputVector translate_upsample_nearest3d(NodeContext& context) {
|
||||
return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 3);
|
||||
};
|
||||
|
||||
// bicubic is only supported for 2d in pytorch
|
||||
OutputVector translate_upsample_bicubic2d(NodeContext& context) {
|
||||
return base_translate_upsample2d(context, v4::Interpolate::InterpolateMode::CUBIC);
|
||||
return base_translate_upsample(context, v4::Interpolate::InterpolateMode::CUBIC, 2);
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
|
@ -89,6 +89,7 @@ OP_CONVERTER(translate_repeat);
|
||||
OP_CONVERTER(translate_repeat_interleave);
|
||||
OP_CONVERTER(translate_reshape);
|
||||
OP_CONVERTER(translate_reshape_as);
|
||||
OP_CONVERTER(translate_roi_align);
|
||||
OP_CONVERTER(translate_roll);
|
||||
OP_CONVERTER(translate_rsqrt);
|
||||
OP_CONVERTER(translate_rsub);
|
||||
@ -110,7 +111,11 @@ OP_CONVERTER(translate_triu);
|
||||
OP_CONVERTER(translate_unfold);
|
||||
OP_CONVERTER(translate_upsample_bicubic2d);
|
||||
OP_CONVERTER(translate_upsample_bilinear2d);
|
||||
OP_CONVERTER(translate_upsample_linear1d);
|
||||
OP_CONVERTER(translate_upsample_nearest1d);
|
||||
OP_CONVERTER(translate_upsample_nearest2d);
|
||||
OP_CONVERTER(translate_upsample_nearest3d);
|
||||
OP_CONVERTER(translate_upsample_trilinear3d);
|
||||
OP_CONVERTER(translate_var);
|
||||
OP_CONVERTER(translate_var_mean);
|
||||
OP_CONVERTER(translate_where);
|
||||
@ -303,7 +308,11 @@ const std::map<std::string, PytorchCreatorFunction> get_supported_ops() {
|
||||
{"aten::unsqueeze_", op::inplace_op<op::translate_1to1_match_2_inputs<opset10::Unsqueeze>>},
|
||||
{"aten::upsample_bicubic2d", op::translate_upsample_bicubic2d},
|
||||
{"aten::upsample_bilinear2d", op::translate_upsample_bilinear2d},
|
||||
{"aten::upsample_linear1d", op::translate_upsample_linear1d},
|
||||
{"aten::upsample_nearest1d", op::translate_upsample_nearest1d},
|
||||
{"aten::upsample_nearest2d", op::translate_upsample_nearest2d},
|
||||
{"aten::upsample_nearest3d", op::translate_upsample_nearest3d},
|
||||
{"aten::upsample_trilinear3d", op::translate_upsample_trilinear3d},
|
||||
{"aten::var", op::translate_var},
|
||||
{"aten::var_mean", op::translate_var_mean},
|
||||
{"aten::view", op::translate_reshape},
|
||||
@ -319,6 +328,7 @@ const std::map<std::string, PytorchCreatorFunction> get_supported_ops() {
|
||||
{"prim::NumToTensor", op::skip_node}, // In openvino we already store number as tensor with shape []
|
||||
{"prim::requires_grad", op::return_false_scalar},
|
||||
{"torchvision::nms", op::translate_nms},
|
||||
{"torchvision::roi_align", op::translate_roi_align},
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -0,0 +1,68 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "einsum_list_construct.hpp"
|
||||
|
||||
#include "openvino/core/rt_info.hpp"
|
||||
#include "openvino/op/einsum.hpp"
|
||||
#include "openvino/op/util/framework_node.hpp"
|
||||
#include "openvino/pass/pattern/matcher.hpp"
|
||||
#include "openvino/pass/pattern/op/wrap_type.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
using namespace ov::pass::pattern;
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace pass {
|
||||
|
||||
using namespace ov::pass;
|
||||
using namespace ov::op;
|
||||
|
||||
AtenEinsumListConstructReplacer::AtenEinsumListConstructReplacer() {
|
||||
auto einsum_op = pattern::wrap_type<ov::op::util::FrameworkNode>();
|
||||
ov::matcher_pass_callback callback = [](pattern::Matcher& m) {
|
||||
auto einsum_op = cast_fw_node(m.get_match_root(), "aten::einsum");
|
||||
if (!einsum_op) {
|
||||
return false;
|
||||
}
|
||||
auto equation_input = einsum_op->input_value(0).get_node_shared_ptr();
|
||||
auto tensor_list = einsum_op->input_value(1).get_node_shared_ptr();
|
||||
std::string equation;
|
||||
// equation should be string constant
|
||||
if (const auto& fw_node_mode = cast_fw_node(equation_input, "prim::Constant")) {
|
||||
const auto& attrs = fw_node_mode->get_attrs();
|
||||
if (attrs.find("string_value") != attrs.end()) {
|
||||
equation = attrs.at("string_value");
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
// Check if ListConstruct is an input
|
||||
if (auto list_construct_node = cast_fw_node(tensor_list, "prim::ListConstruct")) {
|
||||
const auto& list_inputs = list_construct_node->input_values();
|
||||
OutputVector node_vector;
|
||||
// Iterate over values in ListConstruct
|
||||
for (const auto& list_input : list_inputs) {
|
||||
node_vector.push_back(list_input);
|
||||
}
|
||||
|
||||
auto einsum = std::make_shared<v7::Einsum>(node_vector, equation);
|
||||
copy_runtime_info({einsum_op, equation_input, tensor_list}, einsum);
|
||||
replace_node(einsum_op, einsum);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
auto m =
|
||||
std::make_shared<pattern::Matcher>(einsum_op, "ov::frontend::pytorch::pass::AtenEinsumListConstructReplacer");
|
||||
this->register_matcher(m, callback);
|
||||
};
|
||||
|
||||
} // namespace pass
|
||||
} // namespace pytorch
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
@ -0,0 +1,24 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "openvino/pass/graph_rewrite.hpp"
|
||||
#include "openvino/pass/pass.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace pass {
|
||||
|
||||
class AtenEinsumListConstructReplacer : public ov::pass::MatcherPass {
|
||||
public:
|
||||
OPENVINO_RTTI("ov::frontend::pytorch::pass::AtenEinsumListConstructReplacer");
|
||||
AtenEinsumListConstructReplacer();
|
||||
};
|
||||
|
||||
} // namespace pass
|
||||
} // namespace pytorch
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
@ -58,11 +58,12 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
|
||||
if (variants.size() != 1)
|
||||
return false;
|
||||
|
||||
// Validating first path, it must contain a model
|
||||
if (variants[0].is<std::string>()) {
|
||||
std::string suffix = ".pb";
|
||||
std::string model_path = variants[0].as<std::string>();
|
||||
if (ov::util::ends_with(model_path, suffix.c_str())) {
|
||||
if (ov::util::ends_with(model_path, ".pb") && GraphIteratorProto::is_supported(model_path)) {
|
||||
// handle binary protobuf format
|
||||
// for automatic deduction of the frontend to convert the model
|
||||
// we have more strict rule that is to have `.pb` extension in the path
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -70,12 +71,16 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
|
||||
else if (variants[0].is<std::wstring>()) {
|
||||
std::wstring suffix = L".pb";
|
||||
std::wstring model_path = variants[0].as<std::wstring>();
|
||||
if (ov::util::ends_with(model_path, suffix)) {
|
||||
if (ov::util::ends_with(model_path, suffix) && GraphIteratorProto::is_supported(model_path)) {
|
||||
// handle binary protobuf format with a path in Unicode
|
||||
// for automatic deduction of the frontend to convert the model
|
||||
// we have more strict rule that is to have `.pb` extension in the path
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
else if (variants[0].is<GraphIterator::Ptr>()) {
|
||||
// this is used for OpenVINO with TensorFlow Integration
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -83,33 +88,36 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
|
||||
|
||||
ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const {
|
||||
// TODO: Support other TensorFlow formats: SavedModel, .meta, checkpoint, pbtxt
|
||||
if (variants.size() == 1) {
|
||||
// a case when binary protobuf format is provided
|
||||
if (variants[0].is<std::string>()) {
|
||||
std::string suffix = ".pb";
|
||||
std::string model_path = variants[0].as<std::string>();
|
||||
if (ov::util::ends_with(model_path, suffix.c_str())) {
|
||||
return std::make_shared<InputModel>(
|
||||
std::make_shared<::ov::frontend::tensorflow::GraphIteratorProto>(model_path),
|
||||
m_telemetry);
|
||||
}
|
||||
}
|
||||
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
|
||||
else if (variants[0].is<std::wstring>()) {
|
||||
std::wstring suffix = L".pb";
|
||||
std::wstring model_path = variants[0].as<std::wstring>();
|
||||
if (ov::util::ends_with(model_path, suffix)) {
|
||||
return std::make_shared<InputModel>(
|
||||
std::make_shared<::ov::frontend::tensorflow::GraphIteratorProto>(model_path),
|
||||
m_telemetry);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
else if (variants[0].is<GraphIterator::Ptr>()) {
|
||||
auto graph_iterator = variants[0].as<GraphIterator::Ptr>();
|
||||
return std::make_shared<InputModel>(graph_iterator, m_telemetry);
|
||||
FRONT_END_GENERAL_CHECK(variants.size() == 1,
|
||||
"[TensorFlow Frontend] Internal error or inconsistent input model: the frontend supports "
|
||||
"only frozen binary protobuf format.");
|
||||
|
||||
if (variants[0].is<std::string>()) {
|
||||
auto model_path = variants[0].as<std::string>();
|
||||
if (GraphIteratorProto::is_supported(model_path)) {
|
||||
// handle binary protobuf format
|
||||
return std::make_shared<InputModel>(std::make_shared<GraphIteratorProto>(model_path), m_telemetry);
|
||||
}
|
||||
}
|
||||
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
|
||||
else if (variants[0].is<std::wstring>()) {
|
||||
std::wstring model_path = variants[0].as<std::wstring>();
|
||||
if (GraphIteratorProto::is_supported(model_path)) {
|
||||
// handle binary protobuf format with a path in Unicode
|
||||
return std::make_shared<InputModel>(std::make_shared<GraphIteratorProto>(model_path), m_telemetry);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
else if (variants[0].is<GraphIterator::Ptr>()) {
|
||||
// this is used for OpenVINO with TensorFlow Integration
|
||||
auto graph_iterator = variants[0].as<GraphIterator::Ptr>();
|
||||
return std::make_shared<InputModel>(graph_iterator, m_telemetry);
|
||||
}
|
||||
|
||||
FRONT_END_GENERAL_CHECK(false,
|
||||
"[TensorFlow Frontend] Internal error or inconsistent input model: the frontend supports "
|
||||
"only frozen binary protobuf format.");
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -88,29 +88,40 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/// Set iterator to the start position
|
||||
/// \brief Check if the input file is supported
|
||||
template <typename T>
|
||||
static bool is_supported(const std::basic_string<T>& path) {
|
||||
std::ifstream pb_stream(path, std::ios::in | std::ifstream::binary);
|
||||
auto graph_def = std::make_shared<::tensorflow::GraphDef>();
|
||||
return pb_stream && pb_stream.is_open() && graph_def->ParsePartialFromIstream(&pb_stream);
|
||||
}
|
||||
|
||||
/// \brief Set iterator to the start position
|
||||
void reset() override {
|
||||
node_index = 0;
|
||||
}
|
||||
|
||||
/// \brief Return a number of nodes in the graph
|
||||
size_t size() const override {
|
||||
return m_decoders.size();
|
||||
}
|
||||
|
||||
/// Moves to the next node in the graph
|
||||
/// \brief Move to the next node in the graph
|
||||
void next() override {
|
||||
node_index++;
|
||||
}
|
||||
|
||||
/// \brief Check if the graph is fully traversed
|
||||
bool is_end() const override {
|
||||
return node_index >= m_decoders.size();
|
||||
}
|
||||
|
||||
/// Return NodeContext for the current node that iterator points to
|
||||
/// \brief Return NodeContext for the current node that iterator points to
|
||||
std::shared_ptr<DecoderBase> get_decoder() const override {
|
||||
return m_decoders[node_index];
|
||||
}
|
||||
|
||||
/// \brief Get GraphIterator for library funnction by name
|
||||
std::shared_ptr<GraphIterator> get_body_graph_iterator(const std::string& func_name) const override {
|
||||
if (m_library_map.count(func_name)) {
|
||||
auto func_ind = m_library_map.at(func_name);
|
||||
@ -127,10 +138,12 @@ public:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// \brief Get input names in the original order. Used for the library functions
|
||||
std::vector<std::string> get_input_names() const override {
|
||||
return m_input_names;
|
||||
}
|
||||
|
||||
/// \brief Get output names in the original order. Used for the library functions
|
||||
std::vector<std::string> get_output_names() const override {
|
||||
return m_output_names;
|
||||
}
|
||||
|
@ -14,6 +14,7 @@ file (GLOB LIBRARY_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/cpp/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/dev/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/dev/preprocessing/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/dev/threading/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/threading/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/cpp/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/cpp_interfaces/interface/*.cpp
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include "openvino/runtime/icompiled_model.hpp"
|
||||
#include "openvino/runtime/icore.hpp"
|
||||
#include "openvino/runtime/remote_context.hpp"
|
||||
#include "threading/ie_executor_manager.hpp"
|
||||
#include "openvino/runtime/threading/executor_manager.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -188,7 +188,7 @@ public:
|
||||
* @brief Gets reference to tasks execution manager
|
||||
* @return Reference to ExecutorManager interface
|
||||
*/
|
||||
const std::shared_ptr<InferenceEngine::ExecutorManager>& get_executor_manager() const;
|
||||
const std::shared_ptr<ov::ExecutorManager>& get_executor_manager() const;
|
||||
|
||||
~IPlugin() = default;
|
||||
|
||||
@ -198,11 +198,11 @@ protected:
|
||||
private:
|
||||
friend ::InferenceEngine::IPluginWrapper;
|
||||
|
||||
std::string m_plugin_name; //!< A device name that plugins enables
|
||||
std::weak_ptr<ov::ICore> m_core; //!< A pointer to ICore interface
|
||||
std::shared_ptr<InferenceEngine::ExecutorManager> m_executor_manager; //!< A tasks execution manager
|
||||
ov::Version m_version; //!< Member contains plugin version
|
||||
bool m_is_new_api; //!< A flag which shows used API
|
||||
std::string m_plugin_name; //!< A device name that plugins enables
|
||||
std::weak_ptr<ov::ICore> m_core; //!< A pointer to ICore interface
|
||||
std::shared_ptr<ov::ExecutorManager> m_executor_manager; //!< A tasks execution manager
|
||||
ov::Version m_version; //!< Member contains plugin version
|
||||
bool m_is_new_api; //!< A flag which shows used API
|
||||
};
|
||||
|
||||
} // namespace ov
|
||||
|
@ -0,0 +1,77 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief OpenVINO Runtime Executor Manager
|
||||
* @file openvino/runtime/threading/executor_manager.hpp
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "openvino/runtime/common.hpp"
|
||||
#include "threading/ie_istreams_executor.hpp"
|
||||
#include "threading/ie_itask_executor.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
||||
/**
|
||||
* @interface ExecutorManager
|
||||
* @brief Interface for tasks execution manager.
|
||||
* This is global point for getting task executor objects by string id.
|
||||
* It's necessary in multiple asynchronous requests for having unique executors to avoid oversubscription.
|
||||
* E.g. There 2 task executors for CPU device: one - in FPGA, another - in OneDNN. Parallel execution both of them leads
|
||||
* to not optimal CPU usage. More efficient to run the corresponding tasks one by one via single executor.
|
||||
* @ingroup ov_dev_api_threading
|
||||
*/
|
||||
class OPENVINO_RUNTIME_API ExecutorManager {
|
||||
public:
|
||||
/**
|
||||
* @brief Returns executor by unique identificator
|
||||
* @param id An unique identificator of device (Usually string representation of TargetDevice)
|
||||
* @return A shared pointer to existing or newly ITaskExecutor
|
||||
*/
|
||||
virtual InferenceEngine::ITaskExecutor::Ptr get_executor(const std::string& id) = 0;
|
||||
|
||||
/**
|
||||
* @brief Returns idle cpu streams executor
|
||||
*
|
||||
* @param config Streams executor config
|
||||
*
|
||||
* @return pointer to streams executor config
|
||||
*/
|
||||
virtual InferenceEngine::IStreamsExecutor::Ptr get_idle_cpu_streams_executor(
|
||||
const InferenceEngine::IStreamsExecutor::Config& config) = 0;
|
||||
|
||||
/**
|
||||
* @brief Allows to configure executor manager
|
||||
*
|
||||
* @param properties map with configuration
|
||||
*/
|
||||
virtual void set_property(const ov::AnyMap& properties) = 0;
|
||||
/**
|
||||
* @brief Returns configuration
|
||||
*
|
||||
* @param name property name
|
||||
*
|
||||
* @return Property value
|
||||
*/
|
||||
virtual ov::Any get_property(const std::string& name) const = 0;
|
||||
|
||||
/**
|
||||
* @cond
|
||||
*/
|
||||
virtual size_t get_executors_number() const = 0;
|
||||
|
||||
virtual size_t get_idle_cpu_streams_executors_number() const = 0;
|
||||
|
||||
virtual void clear(const std::string& id = {}) = 0;
|
||||
/**
|
||||
* @endcond
|
||||
*/
|
||||
virtual ~ExecutorManager() = default;
|
||||
};
|
||||
|
||||
OPENVINO_API std::shared_ptr<ExecutorManager> executor_manager();
|
||||
|
||||
} // namespace ov
|
@ -18,8 +18,16 @@
|
||||
#include "threading/ie_istreams_executor.hpp"
|
||||
#include "threading/ie_itask_executor.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
||||
class ExecutorManager;
|
||||
|
||||
}
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
class IPluginWrapper;
|
||||
|
||||
/**
|
||||
* @interface ExecutorManager
|
||||
* @brief Interface for tasks execution manager.
|
||||
@ -76,8 +84,15 @@ public:
|
||||
*/
|
||||
virtual void setTbbFlag(bool flag) = 0;
|
||||
virtual bool getTbbFlag() = 0;
|
||||
|
||||
private:
|
||||
virtual std::shared_ptr<ov::ExecutorManager> get_ov_manager() const = 0;
|
||||
friend class IPluginWrapper;
|
||||
};
|
||||
|
||||
INFERENCE_ENGINE_API_CPP(ExecutorManager::Ptr) executorManager();
|
||||
|
||||
std::shared_ptr<InferenceEngine::ExecutorManager> create_old_manager(
|
||||
const std::shared_ptr<ov::ExecutorManager>& manager);
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
@ -44,6 +44,8 @@ public:
|
||||
template <typename T>
|
||||
T* data() = delete;
|
||||
|
||||
void copy_to(ov::Tensor& dst) const = delete;
|
||||
|
||||
/**
|
||||
* @brief Returns a map of device-specific parameters required for low-level
|
||||
* operations with underlying object.
|
||||
|
@ -34,8 +34,10 @@
|
||||
#include "openvino/runtime/profiling_info.hpp"
|
||||
#include "openvino/runtime/remote_context.hpp"
|
||||
#include "openvino/runtime/tensor.hpp"
|
||||
#include "openvino/runtime/threading/executor_manager.hpp"
|
||||
#include "openvino/runtime/variable_state.hpp"
|
||||
#include "so_ptr.hpp"
|
||||
#include "threading/ie_executor_manager.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
namespace {
|
||||
@ -221,7 +223,7 @@ public:
|
||||
version.description = ver.description;
|
||||
SetVersion(version);
|
||||
_isNewAPI = plugin->is_new_api();
|
||||
_executorManager = plugin->get_executor_manager();
|
||||
_executorManager = InferenceEngine::create_old_manager(plugin->get_executor_manager());
|
||||
}
|
||||
std::string GetName() const noexcept override {
|
||||
return m_plugin->get_device_name();
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "openvino/pass/manager.hpp"
|
||||
#include "openvino/runtime/icompiled_model.hpp"
|
||||
#include "openvino/runtime/remote_context.hpp"
|
||||
#include "openvino/runtime/threading/executor_manager.hpp"
|
||||
#include "openvino/util/common_util.hpp"
|
||||
#include "openvino/util/shared_object.hpp"
|
||||
#include "preprocessing/preprocessing.hpp"
|
||||
@ -57,7 +58,7 @@ void stripDeviceName(std::string& device, const std::string& substr) {
|
||||
|
||||
ov::CoreImpl::CoreImpl(bool _newAPI) : m_new_api(_newAPI) {
|
||||
add_mutex(""); // Register global mutex
|
||||
executorManagerPtr = InferenceEngine::executorManager();
|
||||
m_executor_manager = ov::executor_manager();
|
||||
for (const auto& it : ov::get_available_opsets()) {
|
||||
opsetNames.insert(it.first);
|
||||
}
|
||||
@ -632,7 +633,7 @@ void ov::CoreImpl::set_property(const std::string& device_name, const AnyMap& pr
|
||||
|
||||
ov::Any ov::CoreImpl::get_property_for_core(const std::string& name) const {
|
||||
if (name == ov::force_tbb_terminate.name()) {
|
||||
const auto flag = InferenceEngine::executorManager()->getTbbFlag();
|
||||
const auto flag = ov::executor_manager()->get_property(name).as<bool>();
|
||||
return decltype(ov::force_tbb_terminate)::value_type(flag);
|
||||
} else if (name == ov::cache_dir.name()) {
|
||||
return ov::Any(coreConfig.get_cache_dir());
|
||||
@ -993,7 +994,7 @@ void ov::CoreImpl::CoreConfig::set_and_update(ov::AnyMap& config) {
|
||||
it = config.find(ov::force_tbb_terminate.name());
|
||||
if (it != config.end()) {
|
||||
auto flag = it->second.as<std::string>() == CONFIG_VALUE(YES) ? true : false;
|
||||
InferenceEngine::executorManager()->setTbbFlag(flag);
|
||||
ov::executor_manager()->set_property({{it->first, flag}});
|
||||
config.erase(it);
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,7 @@
|
||||
#include "openvino/core/version.hpp"
|
||||
#include "openvino/runtime/common.hpp"
|
||||
#include "openvino/runtime/icompiled_model.hpp"
|
||||
#include "threading/ie_executor_manager.hpp"
|
||||
#include "openvino/runtime/threading/executor_manager.hpp"
|
||||
|
||||
#ifdef OPENVINO_STATIC_LIBRARY
|
||||
# include "ie_plugins.hpp"
|
||||
@ -162,7 +162,7 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
InferenceEngine::ExecutorManager::Ptr executorManagerPtr;
|
||||
std::shared_ptr<ov::ExecutorManager> m_executor_manager;
|
||||
mutable std::unordered_set<std::string> opsetNames;
|
||||
// TODO: make extensions to be optional with conditional compilation
|
||||
mutable std::vector<InferenceEngine::IExtensionPtr> extensions;
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
#include "openvino/runtime/iplugin.hpp"
|
||||
|
||||
ov::IPlugin::IPlugin() : m_executor_manager(InferenceEngine::executorManager()), m_is_new_api(true) {}
|
||||
ov::IPlugin::IPlugin() : m_executor_manager(ov::executor_manager()), m_is_new_api(true) {}
|
||||
|
||||
void ov::IPlugin::set_version(const ov::Version& version) {
|
||||
m_version = version;
|
||||
@ -42,7 +42,7 @@ bool ov::IPlugin::is_new_api() const {
|
||||
return m_is_new_api;
|
||||
}
|
||||
|
||||
const std::shared_ptr<InferenceEngine::ExecutorManager>& ov::IPlugin::get_executor_manager() const {
|
||||
const std::shared_ptr<ov::ExecutorManager>& ov::IPlugin::get_executor_manager() const {
|
||||
return m_executor_manager;
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "any_copy.hpp"
|
||||
#include "dev/converter_utils.hpp"
|
||||
#include "ie_icore.hpp"
|
||||
#include "threading/ie_executor_manager.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -20,7 +21,7 @@ IPluginWrapper::IPluginWrapper(const std::shared_ptr<InferenceEngine::IInference
|
||||
m_plugin_name = m_old_plugin->GetName();
|
||||
m_is_new_api = m_old_plugin->IsNewAPI();
|
||||
m_core = m_old_plugin->GetCore();
|
||||
m_executor_manager = m_old_plugin->executorManager();
|
||||
m_executor_manager = m_old_plugin->executorManager()->get_ov_manager();
|
||||
}
|
||||
|
||||
const std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>& IPluginWrapper::update_exec_network(
|
||||
|
208
src/inference/src/dev/threading/executor_manager.cpp
Normal file
208
src/inference/src/dev/threading/executor_manager.cpp
Normal file
@ -0,0 +1,208 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/runtime/threading/executor_manager.hpp"
|
||||
|
||||
#include "openvino/core/parallel.hpp"
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
#include "threading/ie_cpu_streams_executor.hpp"
|
||||
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
|
||||
# if (TBB_INTERFACE_VERSION < 12000)
|
||||
# include <tbb/task_scheduler_init.h>
|
||||
# else
|
||||
# include <oneapi/tbb/global_control.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
namespace ov {
|
||||
namespace {
|
||||
class ExecutorManagerImpl : public ExecutorManager {
|
||||
public:
|
||||
~ExecutorManagerImpl();
|
||||
InferenceEngine::ITaskExecutor::Ptr get_executor(const std::string& id) override;
|
||||
InferenceEngine::IStreamsExecutor::Ptr get_idle_cpu_streams_executor(
|
||||
const InferenceEngine::IStreamsExecutor::Config& config) override;
|
||||
size_t get_executors_number() const override;
|
||||
size_t get_idle_cpu_streams_executors_number() const override;
|
||||
void clear(const std::string& id = {}) override;
|
||||
void set_property(const ov::AnyMap& properties) override;
|
||||
ov::Any get_property(const std::string& name) const override;
|
||||
|
||||
private:
|
||||
void reset_tbb();
|
||||
|
||||
std::unordered_map<std::string, InferenceEngine::ITaskExecutor::Ptr> executors;
|
||||
std::vector<std::pair<InferenceEngine::IStreamsExecutor::Config, InferenceEngine::IStreamsExecutor::Ptr>>
|
||||
cpuStreamsExecutors;
|
||||
mutable std::mutex streamExecutorMutex;
|
||||
mutable std::mutex taskExecutorMutex;
|
||||
bool tbbTerminateFlag = false;
|
||||
mutable std::mutex global_mutex;
|
||||
bool tbbThreadsCreated = false;
|
||||
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
|
||||
# if (TBB_INTERFACE_VERSION < 12000)
|
||||
std::shared_ptr<tbb::task_scheduler_init> tbbTaskScheduler = nullptr;
|
||||
# else
|
||||
std::shared_ptr<oneapi::tbb::task_scheduler_handle> tbbTaskScheduler = nullptr;
|
||||
# endif
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
ExecutorManagerImpl::~ExecutorManagerImpl() {
|
||||
reset_tbb();
|
||||
}
|
||||
|
||||
void ExecutorManagerImpl::set_property(const ov::AnyMap& properties) {
|
||||
std::lock_guard<std::mutex> guard(global_mutex);
|
||||
for (const auto& it : properties) {
|
||||
if (it.first == ov::force_tbb_terminate.name()) {
|
||||
tbbTerminateFlag = it.second.as<bool>();
|
||||
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
|
||||
if (tbbTerminateFlag) {
|
||||
if (!tbbTaskScheduler) {
|
||||
# if (TBB_INTERFACE_VERSION < 12000)
|
||||
tbbTaskScheduler = std::make_shared<tbb::task_scheduler_init>();
|
||||
# elif (TBB_INTERFACE_VERSION < 12060)
|
||||
tbbTaskScheduler =
|
||||
std::make_shared<oneapi::tbb::task_scheduler_handle>(oneapi::tbb::task_scheduler_handle::get());
|
||||
# else
|
||||
tbbTaskScheduler = std::make_shared<oneapi::tbb::task_scheduler_handle>(tbb::attach{});
|
||||
# endif
|
||||
}
|
||||
} else {
|
||||
tbbTaskScheduler = nullptr;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
ov::Any ExecutorManagerImpl::get_property(const std::string& name) const {
|
||||
std::lock_guard<std::mutex> guard(global_mutex);
|
||||
if (name == ov::force_tbb_terminate.name()) {
|
||||
return tbbTerminateFlag;
|
||||
}
|
||||
OPENVINO_UNREACHABLE("Property ", name, " is not supported.");
|
||||
}
|
||||
|
||||
void ExecutorManagerImpl::reset_tbb() {
|
||||
std::lock_guard<std::mutex> guard(global_mutex);
|
||||
if (tbbTerminateFlag) {
|
||||
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
|
||||
if (tbbTaskScheduler && tbbThreadsCreated) {
|
||||
# if (TBB_INTERFACE_VERSION < 12000)
|
||||
tbbTaskScheduler->terminate();
|
||||
# else
|
||||
tbb::finalize(*tbbTaskScheduler, std::nothrow);
|
||||
# endif
|
||||
}
|
||||
tbbThreadsCreated = false;
|
||||
tbbTaskScheduler = nullptr;
|
||||
#endif
|
||||
tbbTerminateFlag = false;
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::ITaskExecutor::Ptr ExecutorManagerImpl::get_executor(const std::string& id) {
|
||||
std::lock_guard<std::mutex> guard(taskExecutorMutex);
|
||||
auto foundEntry = executors.find(id);
|
||||
if (foundEntry == executors.end()) {
|
||||
auto newExec =
|
||||
std::make_shared<InferenceEngine::CPUStreamsExecutor>(InferenceEngine::IStreamsExecutor::Config{id});
|
||||
tbbThreadsCreated = true;
|
||||
executors[id] = newExec;
|
||||
return newExec;
|
||||
}
|
||||
return foundEntry->second;
|
||||
}
|
||||
|
||||
InferenceEngine::IStreamsExecutor::Ptr ExecutorManagerImpl::get_idle_cpu_streams_executor(
|
||||
const InferenceEngine::IStreamsExecutor::Config& config) {
|
||||
std::lock_guard<std::mutex> guard(streamExecutorMutex);
|
||||
for (const auto& it : cpuStreamsExecutors) {
|
||||
const auto& executor = it.second;
|
||||
if (executor.use_count() != 1)
|
||||
continue;
|
||||
|
||||
const auto& executorConfig = it.first;
|
||||
if (executorConfig._name == config._name && executorConfig._streams == config._streams &&
|
||||
executorConfig._threadsPerStream == config._threadsPerStream &&
|
||||
executorConfig._threadBindingType == config._threadBindingType &&
|
||||
executorConfig._threadBindingStep == config._threadBindingStep &&
|
||||
executorConfig._threadBindingOffset == config._threadBindingOffset)
|
||||
if (executorConfig._threadBindingType !=
|
||||
InferenceEngine::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE ||
|
||||
executorConfig._threadPreferredCoreType == config._threadPreferredCoreType)
|
||||
return executor;
|
||||
}
|
||||
auto newExec = std::make_shared<InferenceEngine::CPUStreamsExecutor>(config);
|
||||
tbbThreadsCreated = true;
|
||||
cpuStreamsExecutors.emplace_back(std::make_pair(config, newExec));
|
||||
return newExec;
|
||||
}
|
||||
|
||||
size_t ExecutorManagerImpl::get_executors_number() const {
|
||||
std::lock_guard<std::mutex> guard(taskExecutorMutex);
|
||||
return executors.size();
|
||||
}
|
||||
|
||||
size_t ExecutorManagerImpl::get_idle_cpu_streams_executors_number() const {
|
||||
std::lock_guard<std::mutex> guard(streamExecutorMutex);
|
||||
return cpuStreamsExecutors.size();
|
||||
}
|
||||
|
||||
void ExecutorManagerImpl::clear(const std::string& id) {
|
||||
std::lock_guard<std::mutex> stream_guard(streamExecutorMutex);
|
||||
std::lock_guard<std::mutex> task_guard(taskExecutorMutex);
|
||||
if (id.empty()) {
|
||||
executors.clear();
|
||||
cpuStreamsExecutors.clear();
|
||||
} else {
|
||||
executors.erase(id);
|
||||
cpuStreamsExecutors.erase(std::remove_if(cpuStreamsExecutors.begin(),
|
||||
cpuStreamsExecutors.end(),
|
||||
[&](const std::pair<InferenceEngine::IStreamsExecutor::Config,
|
||||
InferenceEngine::IStreamsExecutor::Ptr>& it) {
|
||||
return it.first._name == id;
|
||||
}),
|
||||
cpuStreamsExecutors.end());
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class ExecutorManagerHolder {
|
||||
std::mutex _mutex;
|
||||
std::weak_ptr<ExecutorManager> _manager;
|
||||
|
||||
public:
|
||||
ExecutorManagerHolder(const ExecutorManagerHolder&) = delete;
|
||||
ExecutorManagerHolder& operator=(const ExecutorManagerHolder&) = delete;
|
||||
|
||||
ExecutorManagerHolder() = default;
|
||||
|
||||
std::shared_ptr<ov::ExecutorManager> get() {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
auto manager = _manager.lock();
|
||||
if (!manager) {
|
||||
_manager = manager = std::make_shared<ExecutorManagerImpl>();
|
||||
}
|
||||
return manager;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
std::shared_ptr<ExecutorManager> executor_manager() {
|
||||
static ExecutorManagerHolder executorManagerHolder;
|
||||
return executorManagerHolder.get();
|
||||
}
|
||||
|
||||
} // namespace ov
|
@ -5,6 +5,8 @@
|
||||
#include "threading/ie_executor_manager.hpp"
|
||||
|
||||
#include "ie_parallel.hpp"
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
#include "openvino/runtime/threading/executor_manager.hpp"
|
||||
#include "threading/ie_cpu_streams_executor.hpp"
|
||||
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
|
||||
# if (TBB_INTERFACE_VERSION < 12000)
|
||||
@ -23,7 +25,7 @@ namespace InferenceEngine {
|
||||
namespace {
|
||||
class ExecutorManagerImpl : public ExecutorManager {
|
||||
public:
|
||||
~ExecutorManagerImpl();
|
||||
ExecutorManagerImpl(const std::shared_ptr<ov::ExecutorManager>& manager);
|
||||
ITaskExecutor::Ptr getExecutor(const std::string& id) override;
|
||||
IStreamsExecutor::Ptr getIdleCPUStreamsExecutor(const IStreamsExecutor::Config& config) override;
|
||||
size_t getExecutorsNumber() const override;
|
||||
@ -33,134 +35,47 @@ public:
|
||||
bool getTbbFlag() override;
|
||||
|
||||
private:
|
||||
void resetTbb();
|
||||
std::unordered_map<std::string, ITaskExecutor::Ptr> executors;
|
||||
std::vector<std::pair<IStreamsExecutor::Config, IStreamsExecutor::Ptr>> cpuStreamsExecutors;
|
||||
mutable std::mutex streamExecutorMutex;
|
||||
mutable std::mutex taskExecutorMutex;
|
||||
bool tbbTerminateFlag = false;
|
||||
mutable std::mutex tbbMutex;
|
||||
bool tbbThreadsCreated = false;
|
||||
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
|
||||
# if (TBB_INTERFACE_VERSION < 12000)
|
||||
std::shared_ptr<tbb::task_scheduler_init> tbbTaskScheduler = nullptr;
|
||||
# else
|
||||
std::shared_ptr<oneapi::tbb::task_scheduler_handle> tbbTaskScheduler = nullptr;
|
||||
# endif
|
||||
#endif
|
||||
std::shared_ptr<ov::ExecutorManager> m_manager;
|
||||
std::shared_ptr<ov::ExecutorManager> get_ov_manager() const override {
|
||||
return m_manager;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
ExecutorManagerImpl::~ExecutorManagerImpl() {
|
||||
resetTbb();
|
||||
}
|
||||
ExecutorManagerImpl::ExecutorManagerImpl(const std::shared_ptr<ov::ExecutorManager>& manager) : m_manager(manager) {}
|
||||
|
||||
void ExecutorManagerImpl::setTbbFlag(bool flag) {
|
||||
std::lock_guard<std::mutex> guard(tbbMutex);
|
||||
tbbTerminateFlag = flag;
|
||||
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
|
||||
if (tbbTerminateFlag) {
|
||||
if (!tbbTaskScheduler) {
|
||||
# if (TBB_INTERFACE_VERSION < 12000)
|
||||
tbbTaskScheduler = std::make_shared<tbb::task_scheduler_init>();
|
||||
# elif (TBB_INTERFACE_VERSION < 12060)
|
||||
tbbTaskScheduler =
|
||||
std::make_shared<oneapi::tbb::task_scheduler_handle>(oneapi::tbb::task_scheduler_handle::get());
|
||||
# else
|
||||
tbbTaskScheduler = std::make_shared<oneapi::tbb::task_scheduler_handle>(tbb::attach{});
|
||||
# endif
|
||||
}
|
||||
} else {
|
||||
tbbTaskScheduler = nullptr;
|
||||
}
|
||||
#endif
|
||||
m_manager->set_property({{ov::force_tbb_terminate.name(), flag}});
|
||||
}
|
||||
|
||||
bool ExecutorManagerImpl::getTbbFlag() {
|
||||
std::lock_guard<std::mutex> guard(tbbMutex);
|
||||
return tbbTerminateFlag;
|
||||
}
|
||||
|
||||
void ExecutorManagerImpl::resetTbb() {
|
||||
std::lock_guard<std::mutex> guard(tbbMutex);
|
||||
if (tbbTerminateFlag) {
|
||||
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
|
||||
if (tbbTaskScheduler && tbbThreadsCreated) {
|
||||
# if (TBB_INTERFACE_VERSION < 12000)
|
||||
tbbTaskScheduler->terminate();
|
||||
# else
|
||||
tbb::finalize(*tbbTaskScheduler, std::nothrow);
|
||||
# endif
|
||||
}
|
||||
tbbThreadsCreated = false;
|
||||
tbbTaskScheduler = nullptr;
|
||||
#endif
|
||||
tbbTerminateFlag = false;
|
||||
}
|
||||
return m_manager->get_property(ov::force_tbb_terminate.name()).as<bool>();
|
||||
}
|
||||
|
||||
ITaskExecutor::Ptr ExecutorManagerImpl::getExecutor(const std::string& id) {
|
||||
std::lock_guard<std::mutex> guard(taskExecutorMutex);
|
||||
auto foundEntry = executors.find(id);
|
||||
if (foundEntry == executors.end()) {
|
||||
auto newExec = std::make_shared<CPUStreamsExecutor>(IStreamsExecutor::Config{id});
|
||||
tbbThreadsCreated = true;
|
||||
executors[id] = newExec;
|
||||
return newExec;
|
||||
}
|
||||
return foundEntry->second;
|
||||
return m_manager->get_executor(id);
|
||||
}
|
||||
|
||||
IStreamsExecutor::Ptr ExecutorManagerImpl::getIdleCPUStreamsExecutor(const IStreamsExecutor::Config& config) {
|
||||
std::lock_guard<std::mutex> guard(streamExecutorMutex);
|
||||
for (const auto& it : cpuStreamsExecutors) {
|
||||
const auto& executor = it.second;
|
||||
if (executor.use_count() != 1)
|
||||
continue;
|
||||
|
||||
const auto& executorConfig = it.first;
|
||||
if (executorConfig._name == config._name && executorConfig._streams == config._streams &&
|
||||
executorConfig._threadsPerStream == config._threadsPerStream &&
|
||||
executorConfig._threadBindingType == config._threadBindingType &&
|
||||
executorConfig._threadBindingStep == config._threadBindingStep &&
|
||||
executorConfig._threadBindingOffset == config._threadBindingOffset)
|
||||
if (executorConfig._threadBindingType != IStreamsExecutor::ThreadBindingType::HYBRID_AWARE ||
|
||||
executorConfig._threadPreferredCoreType == config._threadPreferredCoreType)
|
||||
return executor;
|
||||
}
|
||||
auto newExec = std::make_shared<CPUStreamsExecutor>(config);
|
||||
tbbThreadsCreated = true;
|
||||
cpuStreamsExecutors.emplace_back(std::make_pair(config, newExec));
|
||||
return newExec;
|
||||
return m_manager->get_idle_cpu_streams_executor(config);
|
||||
}
|
||||
|
||||
size_t ExecutorManagerImpl::getExecutorsNumber() const {
|
||||
std::lock_guard<std::mutex> guard(taskExecutorMutex);
|
||||
return executors.size();
|
||||
return m_manager->get_executors_number();
|
||||
}
|
||||
|
||||
size_t ExecutorManagerImpl::getIdleCPUStreamsExecutorsNumber() const {
|
||||
std::lock_guard<std::mutex> guard(streamExecutorMutex);
|
||||
return cpuStreamsExecutors.size();
|
||||
return m_manager->get_idle_cpu_streams_executors_number();
|
||||
}
|
||||
|
||||
void ExecutorManagerImpl::clear(const std::string& id) {
|
||||
std::lock_guard<std::mutex> stream_guard(streamExecutorMutex);
|
||||
std::lock_guard<std::mutex> task_guard(taskExecutorMutex);
|
||||
if (id.empty()) {
|
||||
executors.clear();
|
||||
cpuStreamsExecutors.clear();
|
||||
} else {
|
||||
executors.erase(id);
|
||||
cpuStreamsExecutors.erase(
|
||||
std::remove_if(cpuStreamsExecutors.begin(),
|
||||
cpuStreamsExecutors.end(),
|
||||
[&](const std::pair<IStreamsExecutor::Config, IStreamsExecutor::Ptr>& it) {
|
||||
return it.first._name == id;
|
||||
}),
|
||||
cpuStreamsExecutors.end());
|
||||
}
|
||||
return m_manager->clear(id);
|
||||
}
|
||||
|
||||
std::shared_ptr<InferenceEngine::ExecutorManager> create_old_manager(
|
||||
const std::shared_ptr<ov::ExecutorManager>& manager) {
|
||||
return std::make_shared<ExecutorManagerImpl>(manager);
|
||||
}
|
||||
|
||||
namespace {
|
||||
@ -179,7 +94,7 @@ public:
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
auto manager = _manager.lock();
|
||||
if (!manager) {
|
||||
_manager = manager = std::make_shared<ExecutorManagerImpl>();
|
||||
_manager = manager = create_old_manager(ov::executor_manager());
|
||||
}
|
||||
return manager;
|
||||
}
|
||||
|
@ -109,6 +109,7 @@ program::program(engine& engine_ref,
|
||||
processing_order(),
|
||||
is_body_program(is_body_program),
|
||||
is_subgroup_local_block_io_supported(-1) {
|
||||
_config.apply_user_properties(_engine.get_device_info());
|
||||
init_primitives();
|
||||
set_options();
|
||||
query_local_block_io_supported();
|
||||
@ -141,6 +142,7 @@ program::program(engine& engine_ref,
|
||||
_task_executor(task_executor),
|
||||
processing_order(),
|
||||
is_subgroup_local_block_io_supported(-1) {
|
||||
_config.apply_user_properties(_engine.get_device_info());
|
||||
init_primitives();
|
||||
set_options();
|
||||
query_local_block_io_supported();
|
||||
@ -160,7 +162,9 @@ program::program(engine& engine)
|
||||
_stream(_engine.create_stream({})),
|
||||
_config(),
|
||||
processing_order(),
|
||||
is_subgroup_local_block_io_supported(-1) { }
|
||||
is_subgroup_local_block_io_supported(-1) {
|
||||
_config.apply_user_properties(_engine.get_device_info());
|
||||
}
|
||||
program::~program() {
|
||||
query_local_block_io_supported();
|
||||
}
|
||||
|
@ -4,27 +4,28 @@
|
||||
|
||||
#include "common.cl"
|
||||
|
||||
#define GET_FILTER_OS_IS_YX_ISV16_OSV16_INDEX(prefix, o, i, y, x, sub_group_size) \
|
||||
CAT(prefix, _OFFSET) + \
|
||||
((o) % (sub_group_size)) + \
|
||||
(sub_group_size)*( \
|
||||
(x)*(sub_group_size)*CAT(prefix, _X_PITCH) + \
|
||||
(y)*(sub_group_size)*CAT(prefix, _Y_PITCH) + \
|
||||
((i) % (sub_group_size)) + \
|
||||
((i) / (sub_group_size))*(sub_group_size)*CAT(prefix, _IFM_PITCH) + \
|
||||
((o) / (sub_group_size))*CAT(prefix, _OFM_PITCH) \
|
||||
#define GET_FILTER_OS_IS_YX_ISV_OSV_INDEX(prefix, o, i, y, x, osv, isv) \
|
||||
get_os_is_zyx_isv_osv_index( \
|
||||
o, i, 0, y, x, \
|
||||
CAT(prefix, _SIZE_X), \
|
||||
CAT(prefix, _SIZE_Y), \
|
||||
1, \
|
||||
CAT(prefix, _IFM_NUM), \
|
||||
CAT(prefix, _OFM_NUM), \
|
||||
osv, \
|
||||
isv \
|
||||
)
|
||||
|
||||
#define GET_FILTER_OS_IS_ZYX_ISV16_OSV16_INDEX(prefix, o, i, z, y, x, sub_group_size) \
|
||||
CAT(prefix, _OFFSET) + \
|
||||
((o) % (sub_group_size)) + \
|
||||
(sub_group_size)*( \
|
||||
(x)*(sub_group_size)*CAT(prefix, _X_PITCH) + \
|
||||
(y)*(sub_group_size)*CAT(prefix, _Y_PITCH) + \
|
||||
(z)*(sub_group_size)*CAT(prefix, _Z_PITCH) + \
|
||||
((i) % (sub_group_size)) + \
|
||||
((i) / (sub_group_size))*(sub_group_size)*CAT(prefix, _IFM_PITCH) + \
|
||||
((o) / (sub_group_size))*CAT(prefix, _OFM_PITCH) \
|
||||
#define GET_FILTER_OS_IS_ZYX_ISV_OSV_INDEX(prefix, o, i, z, y, x, osv, isv) \
|
||||
get_os_is_zyx_isv_osv_index( \
|
||||
o, i, z, y, x, \
|
||||
CAT(prefix, _SIZE_X), \
|
||||
CAT(prefix, _SIZE_Y), \
|
||||
CAT(prefix, _SIZE_Z), \
|
||||
CAT(prefix, _IFM_NUM), \
|
||||
CAT(prefix, _OFM_NUM), \
|
||||
osv, \
|
||||
isv \
|
||||
)
|
||||
|
||||
#define GET_FILTER_IS_OS_ZYX_ISV16_OSV16_INDEX(prefix, o, i, z, y, x, sub_group_size) \
|
||||
@ -85,6 +86,32 @@
|
||||
CAT(prefix, _OFFSET) \
|
||||
)
|
||||
|
||||
inline uint get_os_is_zyx_isv_osv_index(uint o, uint i, uint z, uint y, uint x,
|
||||
uint x_size, uint y_size, uint z_size, uint i_size, uint o_size, uint osv_size, uint isv_size)
|
||||
{
|
||||
const uint isv = i % isv_size;
|
||||
const uint osv = o % osv_size;
|
||||
const uint is = i / isv_size;
|
||||
const uint os = o / osv_size;
|
||||
|
||||
const uint x_pitch = osv_size * isv_size;
|
||||
const uint y_pitch = x_pitch * x_size;
|
||||
const uint z_pitch = y_pitch * y_size;
|
||||
const uint is_pitch = z_pitch * z_size;
|
||||
const uint os_pitch = is_pitch * ((i_size + isv_size - 1) / isv_size);
|
||||
|
||||
const uint output_offset =
|
||||
osv +
|
||||
isv * osv_size +
|
||||
x * x_pitch +
|
||||
y * y_pitch +
|
||||
z * z_pitch +
|
||||
is * is_pitch +
|
||||
os * os_pitch;
|
||||
|
||||
return output_offset;
|
||||
}
|
||||
|
||||
inline uint get_os_is_zyx_osv_isv_index(uint o, uint i, uint z, uint y, uint x,
|
||||
uint x_size, uint y_size, uint z_size, uint i_size, uint o_size, uint osv_size, uint isv_size)
|
||||
{
|
||||
@ -329,7 +356,7 @@ inline uint get_os_zyxi_osv16_index(uint o, uint i, uint z, uint y, uint x, uint
|
||||
|
||||
#define GET_FILTER_INDEX_5D_SAFE(prefix, g, o, i, z, y, x) GET_FILTER_GOIZYX_SAFE(prefix, g, o, i, z, y, x)
|
||||
|
||||
#define GET_FILTER_OS_IYX_OSV8_INDEX(prefix, o, i, y, x, sub_group_size) \
|
||||
#define GET_FILTER_OS_IYX_OSV_INDEX(prefix, o, i, y, x, sub_group_size) \
|
||||
CAT(prefix, _OFFSET) + \
|
||||
((o) % (sub_group_size)) + \
|
||||
(sub_group_size)*( \
|
||||
@ -339,7 +366,7 @@ inline uint get_os_zyxi_osv16_index(uint o, uint i, uint z, uint y, uint x, uint
|
||||
((o) / (sub_group_size))*CAT(prefix, _OFM_PITCH) \
|
||||
)
|
||||
|
||||
#define GET_FILTER_OS_IYX_OSV8_ROTATE_180_INDEX(prefix, o, i, y, x, sub_group_size) \
|
||||
#define GET_FILTER_OS_IYX_OSV_ROTATE_180_INDEX(prefix, o, i, y, x, sub_group_size) \
|
||||
CAT(prefix, _OFFSET) + \
|
||||
((o) % (sub_group_size)) + \
|
||||
(sub_group_size)*( \
|
||||
@ -1495,16 +1522,6 @@ inline uint get_os_i_yxs_osv_yxsv4_index(uint o, uint i, uint y, uint x, uint i_
|
||||
CAT(prefix, _SIZE_Y), \
|
||||
4)
|
||||
|
||||
#define GET_FILTER_OS_IYX_OSV32__AI32_INDEX(prefix, o, i, y, x, sub_group_size) \
|
||||
CAT(prefix, _OFFSET) + \
|
||||
((o) % (sub_group_size)) + \
|
||||
(sub_group_size)*( \
|
||||
(x)*CAT(prefix, _X_PITCH) + \
|
||||
(y)*CAT(prefix, _Y_PITCH) + \
|
||||
(i)*CAT(prefix, _IFM_PITCH) + \
|
||||
((o) / (sub_group_size))*CAT(prefix, _OFM_PITCH) \
|
||||
)
|
||||
|
||||
#define GET_FILTER_G_OS_IYX_OSV16(prefix, g, o, i, y, x, sub_group_size) \
|
||||
CAT(prefix, _OFFSET) + \
|
||||
(g * CAT(prefix, _GROUPS_PITCH)) + \
|
||||
|
@ -25,19 +25,20 @@ inline uint FUNC(get_input_index)(uint g, uint o, uint i, uint z, uint y, uint x
|
||||
return GET_FILTER_INDEX_5D(INPUT0, 0, o, i, z, y, x);
|
||||
#elif defined INPUT0_LAYOUT_OS_IYX_OSV16 || \
|
||||
defined INPUT0_LAYOUT_OS_I_OSV16 || \
|
||||
defined INPUT0_LAYOUT_OS_I_OSV8__AI8 || \
|
||||
defined INPUT0_LAYOUT_OS_I_OSV16__AI8
|
||||
return GET_FILTER_OS_IYX_OSV8_INDEX(INPUT0, o, i, y, x, SUB_GROUP_SIZE);
|
||||
return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 16);
|
||||
#elif defined INPUT0_LAYOUT_OS_I_OSV8__AI8
|
||||
return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 8);
|
||||
#elif defined INPUT0_LAYOUT_IYX_OSV32
|
||||
return GET_FILTER_OS_IYX_OSV8_INDEX(INPUT0, o, i, y, x, 32);
|
||||
return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 32);
|
||||
#elif defined INPUT0_LAYOUT_OS_IYX_OSV32__AI32
|
||||
return GET_FILTER_OS_IYX_OSV32__AI32_INDEX(OUTPUT, o, i, y, x, 32);
|
||||
return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 32);
|
||||
#elif defined INPUT0_LAYOUT_O_IS_YX_ISV16
|
||||
return GET_FILTER_O_IS_YX_ISV16_INDEX(INPUT0, o, i, y, x, 16);
|
||||
#elif defined INPUT0_LAYOUT_IYX_OSV64
|
||||
return GET_FILTER_OS_IYX_OSV8_INDEX(INPUT0, o, i, y, x, 64);
|
||||
return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 64);
|
||||
#elif defined INPUT0_LAYOUT_OS_IYX_OSV16_ROTATE_180
|
||||
return GET_FILTER_OS_IYX_OSV8_ROTATE_180_INDEX(INPUT0, o, i, y, x, SUB_GROUP_SIZE);
|
||||
return GET_FILTER_OS_IYX_OSV_ROTATE_180_INDEX(INPUT0, o, i, y, x, SUB_GROUP_SIZE);
|
||||
#elif defined INPUT0_LAYOUT_I_YXS_OS_YXSV2_OSV16
|
||||
return GET_FILTER_I_YXS_OS_YXSV2_OSV_INDEX(INPUT0, o, i, y, x, SUB_GROUP_SIZE);
|
||||
#elif defined INPUT0_LAYOUT_IY_XS_OS_XSV2_OSV16__AO32 || defined OUTPUT_LAYOUT_IY_XS_OS_XSV2_OSV8__AO32
|
||||
@ -61,11 +62,11 @@ inline uint FUNC(get_input_index)(uint g, uint o, uint i, uint z, uint y, uint x
|
||||
#elif defined INPUT0_LAYOUT_OS_IS_Y_X8_OSV8_ISV4_SWIZZLED_BY_4
|
||||
return GET_FILTER_OS_IS_Y_X8_OSV8_ISV4_SWIZZLED_BY_4(INPUT0, o, i, y, x);
|
||||
#elif defined INPUT0_LAYOUT_OS_IS_YX_ISV16_OSV16
|
||||
return GET_FILTER_OS_IS_YX_ISV16_OSV16_INDEX(INPUT0, o, i, y, x, SUB_GROUP_SIZE);
|
||||
return GET_FILTER_OS_IS_YX_ISV_OSV_INDEX(INPUT0, o, i, y, x, 16, 16);
|
||||
#elif defined INPUT0_LAYOUT_OIYX_O16
|
||||
return GET_FILTER_OIYX_O16(INPUT0, o, i, y, x);
|
||||
#elif defined INPUT0_LAYOUT_OS_IS_ZYX_ISV16_OSV16
|
||||
return GET_FILTER_OS_IS_ZYX_ISV16_OSV16_INDEX(INPUT0, o, i, z, y, x, SUB_GROUP_SIZE);
|
||||
return GET_FILTER_OS_IS_ZYX_ISV_OSV_INDEX(INPUT0, o, i, z, y, x, 16, 16);
|
||||
#elif defined INPUT0_LAYOUT_IS_OS_ZYX_ISV16_OSV16
|
||||
return GET_FILTER_IS_OS_ZYX_ISV16_OSV16_INDEX(INPUT0, o, i, z, y, x, SUB_GROUP_SIZE);
|
||||
#elif defined INPUT0_LAYOUT_IS_OS_YX_ISV16_OSV16
|
||||
@ -219,19 +220,20 @@ inline uint FUNC(get_output_index)(uint g, uint o, uint i, uint z, uint y, uint
|
||||
return GET_FILTER_INDEX_5D(OUTPUT, 0, o, i, z, y, x);
|
||||
#elif defined OUTPUT_LAYOUT_OS_IYX_OSV16 || \
|
||||
defined OUTPUT_LAYOUT_OS_I_OSV16 || \
|
||||
defined OUTPUT_LAYOUT_OS_I_OSV8__AI8 || \
|
||||
defined OUTPUT_LAYOUT_OS_I_OSV16__AI8
|
||||
return GET_FILTER_OS_IYX_OSV8_INDEX(OUTPUT, o, i, y, x, SUB_GROUP_SIZE);
|
||||
return GET_FILTER_OS_IYX_OSV_INDEX(OUTPUT, o, i, y, x, 16);
|
||||
#elif defined OUTPUT_LAYOUT_OS_I_OSV8__AI8
|
||||
return GET_FILTER_OS_IYX_OSV_INDEX(OUTPUT, o, i, y, x, 8);
|
||||
#elif defined OUTPUT_LAYOUT_OS_IYX_OSV32
|
||||
return GET_FILTER_OS_IYX_OSV8_INDEX(OUTPUT, o, i, y, x, 32);
|
||||
return GET_FILTER_OS_IYX_OSV_INDEX(OUTPUT, o, i, y, x, 32);
|
||||
#elif defined OUTPUT_LAYOUT_OS_IYX_OSV32__AI32
|
||||
return GET_FILTER_OS_IYX_OSV32__AI32_INDEX(OUTPUT, o, i, y, x, 32);
|
||||
return GET_FILTER_OS_IYX_OSV_INDEX(OUTPUT, o, i, y, x, 32);
|
||||
#elif defined OUTPUT_LAYOUT_OS_IYX_OSV64
|
||||
return GET_FILTER_OS_IYX_OSV8_INDEX(OUTPUT, o, i, y, x, 64);
|
||||
return GET_FILTER_OS_IYX_OSV_INDEX(OUTPUT, o, i, y, x, 64);
|
||||
#elif defined OUTPUT_LAYOUT_O_IS_YX_ISV16
|
||||
return GET_FILTER_O_IS_YX_ISV16_INDEX(OUTPUT, o, i, y, x, 16);
|
||||
#elif defined OUTPUT_LAYOUT_OS_IYX_OSV16_ROTATE_180
|
||||
return GET_FILTER_OS_IYX_OSV8_ROTATE_180_INDEX(OUTPUT, o, i, y, x, SUB_GROUP_SIZE);
|
||||
return GET_FILTER_OS_IYX_OSV_ROTATE_180_INDEX(OUTPUT, o, i, y, x, SUB_GROUP_SIZE);
|
||||
#elif defined OUTPUT_LAYOUT_I_YXS_OS_YXSV2_OSV16
|
||||
return GET_FILTER_I_YXS_OS_YXSV2_OSV_INDEX(OUTPUT, o, i, y, x, SUB_GROUP_SIZE);
|
||||
#elif defined OUTPUT_LAYOUT_IY_XS_OS_XSV2_OSV16__AO32 || defined OUTPUT_LAYOUT_IY_XS_OS_XSV2_OSV8__AO32
|
||||
@ -313,11 +315,11 @@ inline uint FUNC(get_output_index)(uint g, uint o, uint i, uint z, uint y, uint
|
||||
#elif defined OUTPUT_LAYOUT_OS_IS_ZYX_OSA4_ISA8_OSV8_ISV4_SWIZZLED_BY_4
|
||||
return GET_FILTER_OS_IS_ZYX_OSA4_ISA8_OSV8_ISV4_SWIZZLED_BY_4_INDEX(OUTPUT, o, i, z, y, x);
|
||||
#elif defined OUTPUT_LAYOUT_OS_IS_YX_ISV16_OSV16
|
||||
return GET_FILTER_OS_IS_YX_ISV16_OSV16_INDEX(OUTPUT, o, i, y, x, SUB_GROUP_SIZE);
|
||||
return GET_FILTER_OS_IS_YX_ISV_OSV_INDEX(OUTPUT, o, i, y, x, 16, 16);
|
||||
#elif defined OUTPUT_LAYOUT_OS_YXI_OSV16
|
||||
return GET_FILTER_OS_YXI_OSV16(OUTPUT, o, i, y, x);
|
||||
#elif defined OUTPUT_LAYOUT_OS_IS_ZYX_ISV16_OSV16
|
||||
return GET_FILTER_OS_IS_ZYX_ISV16_OSV16_INDEX(OUTPUT, o, i, z, y, x, SUB_GROUP_SIZE);
|
||||
return GET_FILTER_OS_IS_ZYX_ISV_OSV_INDEX(OUTPUT, o, i, z, y, x, 16, 16);
|
||||
#elif defined OUTPUT_LAYOUT_IS_OS_ZYX_ISV16_OSV16
|
||||
return GET_FILTER_IS_OS_ZYX_ISV16_OSV16_INDEX(OUTPUT, o, i, z, y, x, SUB_GROUP_SIZE);
|
||||
#elif defined OUTPUT_LAYOUT_IS_OS_YX_ISV16_OSV16
|
||||
|
@ -211,6 +211,10 @@ clEnqueueMemFillINTEL_fn)(
|
||||
|
||||
#define CL_DEVICE_UUID_KHR 0x106A
|
||||
|
||||
#endif // cl_khr_device_uuid
|
||||
|
||||
#ifndef OV_GPU_USE_OPENCL_HPP
|
||||
|
||||
// for C++ wrappers
|
||||
using uuid_array = std::array<cl_uchar, CL_UUID_SIZE_KHR>;
|
||||
|
||||
@ -220,7 +224,7 @@ CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_UUID_KHR, uuid_array)
|
||||
} // namespace detail
|
||||
} // namespace cl
|
||||
|
||||
#endif // cl_khr_device_uuid
|
||||
#endif // OV_GPU_USE_OPENCL_HPP
|
||||
|
||||
/***************************************************************
|
||||
* cl_intel_device_attribute_query
|
||||
|
@ -1717,14 +1717,14 @@ TEST_P(conv_swap_xy_with_eltwise_diff_sizes, basic) {
|
||||
// in_shape; out_shape; eltw_shape; kernel; stride; pad; dilation; groups; data_type; input_format; weights_type; weights_format; default_type; default_format;
|
||||
#define CASE_CONV_ELTW_FP16_SWAP_XY_1 { 1, 16, 1, 5 }, { 1, 32, 1, 7 }, { 1, 32, 1, 1 }, { 1, 1, 1, 3 }, { 1, 1 }, { 2, 0 }, { 1, 1 }, 1, data_types::f16, format::bfyx, data_types::f16, format::os_iyx_osv16, data_types::f16, format::bfyx
|
||||
#define CASE_CONV_ELTW_FP16_SWAP_XY_2 { 1, 16, 1, 5 }, { 1, 32, 1, 7 }, { 1, 32, 1, 7 }, { 1, 1, 1, 3 }, { 1, 1 }, { 2, 0 }, { 1, 1 }, 1, data_types::f16, format::bfyx, data_types::f16, format::os_iyx_osv16, data_types::f16, format::bfyx
|
||||
#define CASE_CONV_ELTW_FP32_SWAP_XY_1 { 3, 16, 1, 5 }, { 3, 32, 1, 7 }, { 1, 32, 1, 1 }, { 1, 1, 1, 3 }, { 1, 1 }, { 2, 0 }, { 1, 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::os_iyx_osv16, data_types::f32, format::bfyx
|
||||
#define CASE_CONV_ELTW_FP32_SWAP_XY_2 { 3, 16, 1, 5 }, { 3, 32, 1, 7 }, { 3, 32, 1, 7 }, { 1, 1, 1, 3 }, { 1, 1 }, { 2, 0 }, { 1, 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::os_iyx_osv16, data_types::f32, format::bfyx
|
||||
#define CASE_CONV_ELTW_FP16_SWAP_XY_3 { 3, 16, 1, 5 }, { 3, 32, 1, 7 }, { 1, 32, 1, 1 }, { 1, 1, 1, 3 }, { 1, 1 }, { 2, 0 }, { 1, 1 }, 1, data_types::f16, format::bfyx, data_types::f16, format::os_iyx_osv16, data_types::f16, format::bfyx
|
||||
#define CASE_CONV_ELTW_FP16_SWAP_XY_4 { 3, 16, 1, 5 }, { 3, 32, 1, 7 }, { 3, 32, 1, 7 }, { 1, 1, 1, 3 }, { 1, 1 }, { 2, 0 }, { 1, 1 }, 1, data_types::f16, format::bfyx, data_types::f16, format::os_iyx_osv16, data_types::f16, format::bfyx
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_swap_xy_with_eltwise_diff_sizes, ::testing::ValuesIn(std::vector<conv_eltw_test_params>{
|
||||
conv_eltw_test_params{ CASE_CONV_ELTW_FP16_SWAP_XY_1, 3, 3, 4 },
|
||||
conv_eltw_test_params{ CASE_CONV_ELTW_FP16_SWAP_XY_2, 3, 3, 4 },
|
||||
conv_eltw_test_params{ CASE_CONV_ELTW_FP32_SWAP_XY_1, 3, 3, 4 },
|
||||
conv_eltw_test_params{ CASE_CONV_ELTW_FP32_SWAP_XY_2, 3, 3, 4 },
|
||||
conv_eltw_test_params{ CASE_CONV_ELTW_FP16_SWAP_XY_1, 3, 2, 4 },
|
||||
conv_eltw_test_params{ CASE_CONV_ELTW_FP16_SWAP_XY_2, 3, 2, 4 },
|
||||
conv_eltw_test_params{ CASE_CONV_ELTW_FP16_SWAP_XY_3, 3, 2, 4 },
|
||||
conv_eltw_test_params{ CASE_CONV_ELTW_FP16_SWAP_XY_4, 3, 2, 4 },
|
||||
}));
|
||||
|
||||
class conv_scale_activation_eltwise_fp32_quantize_i8 : public ConvEltwTest {};
|
||||
|
@ -74,20 +74,15 @@ public:
|
||||
ASSERT_EQ(outputs_ref.size(), outputs_fused.size());
|
||||
ASSERT_EQ(outputs_ref.size(), size_t(1));
|
||||
|
||||
auto output_not_fused_prim = outputs_ref.begin()->second.get_memory();
|
||||
auto output_fused_prim = outputs_fused.begin()->second.get_memory();
|
||||
if (output_not_fused_prim->get_layout().data_type == data_types::f32) {
|
||||
cldnn::mem_lock<float> ref(output_not_fused_prim, get_test_stream());
|
||||
cldnn::mem_lock<float> output_ptr(output_fused_prim, get_test_stream());
|
||||
for (size_t i = 0; i < output_fused_prim->get_layout().count(); i++) {
|
||||
ASSERT_NEAR(ref[i], output_ptr[i], tolerance) << "i = " << i;
|
||||
}
|
||||
} else {
|
||||
cldnn::mem_lock<int16_t> ref(output_not_fused_prim, get_test_stream());
|
||||
cldnn::mem_lock<int16_t> output_ptr(output_fused_prim, get_test_stream());
|
||||
for (size_t i = 0; i < output_fused_prim->get_layout().count(); i++) {
|
||||
ASSERT_NEAR(half_to_float(ref[i]), half_to_float(output_ptr[i]), tolerance) << "i = " << i;
|
||||
}
|
||||
auto val_ref=get_output_values_to_float(not_fused, outputs_ref.begin()->first);
|
||||
auto val_opt=get_output_values_to_float(fused, outputs_fused.begin()->first);
|
||||
ASSERT_EQ(val_ref.size(), val_opt.size());
|
||||
for (size_t i = 0; i < val_ref.size(); i++) {
|
||||
ASSERT_NEAR(val_ref[i], val_opt[i], tolerance)
|
||||
<< "tolerance = " << tolerance
|
||||
<< "\ni = " << i
|
||||
<< "\nref[i] = " << val_ref[i]
|
||||
<< "\nopt[i] = " << val_opt[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -74,11 +74,6 @@ public:
|
||||
}
|
||||
|
||||
layout get_per_channel_layout(gemm_test_params& p) {
|
||||
// WA: per channel binary post-operation is not supported for onednn gemm. Use single value for such case.
|
||||
if (engine.get_device_info().supports_immad){
|
||||
std::cout << "per_channel layout for onednn gemm not supported." << std::endl;
|
||||
return layout{p.default_type, p.default_format, tensor{1, 1, 1, 1}};
|
||||
}
|
||||
return layout{ p.default_type, p.default_format, tensor{ 1, p.in_shapes.at(0).feature[0], 1, 1 } };
|
||||
}
|
||||
|
||||
|
@ -589,6 +589,26 @@ std::vector<float> get_output_values_to_float(network& net, const primitive_id&
|
||||
ret.push_back(mem[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline std::vector<float> get_output_values_to_float(network& net, const primitive_id& output_id, size_t max_cnt = std::numeric_limits<size_t>::max()) {
|
||||
switch(net.get_output_layout(output_id).data_type){
|
||||
case data_types::f16:
|
||||
return get_output_values_to_float<FLOAT16>(net, output_id, max_cnt);
|
||||
case data_types::f32:
|
||||
return get_output_values_to_float<float>(net, output_id, max_cnt);
|
||||
case data_types::i8:
|
||||
return get_output_values_to_float<int8_t>(net, output_id, max_cnt);
|
||||
case data_types::u8:
|
||||
return get_output_values_to_float<uint8_t>(net, output_id, max_cnt);
|
||||
case data_types::i32:
|
||||
return get_output_values_to_float<int32_t>(net, output_id, max_cnt);
|
||||
case data_types::i64:
|
||||
return get_output_values_to_float<int64_t>(net, output_id, max_cnt);
|
||||
default:
|
||||
IE_THROW() << "Unknown output data_type";
|
||||
}
|
||||
}
|
||||
|
||||
double default_tolerance(data_types dt);
|
||||
// inline void print_bin_blob(cldnn::memory& mem, std::string name)
|
||||
// {
|
||||
|
@ -28,7 +28,6 @@ add_library(openvino::interpreter_backend ALIAS interpreter_backend)
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
ie_add_compiler_flags(-Wno-missing-declarations)
|
||||
ie_add_compiler_flags(-Wno-sign-compare)
|
||||
endif()
|
||||
|
||||
ie_faster_build(interpreter_backend UNITY)
|
||||
|
@ -35,7 +35,7 @@ Plugin::Plugin() {
|
||||
_backend = ngraph::runtime::Backend::create();
|
||||
|
||||
// create default stream executor with a given name
|
||||
_waitExecutor = get_executor_manager()->getIdleCPUStreamsExecutor({wait_executor_name});
|
||||
_waitExecutor = get_executor_manager()->get_idle_cpu_streams_executor({wait_executor_name});
|
||||
}
|
||||
// ! [plugin:ctor]
|
||||
|
||||
@ -96,7 +96,7 @@ std::shared_ptr<ov::ICompiledModel> TemplatePlugin::Plugin::compile_model(const
|
||||
auto compiled_model =
|
||||
std::make_shared<CompiledModel>(model->clone(),
|
||||
shared_from_this(),
|
||||
get_executor_manager()->getIdleCPUStreamsExecutor(streamsExecutorConfig),
|
||||
get_executor_manager()->get_idle_cpu_streams_executor(streamsExecutorConfig),
|
||||
fullConfig);
|
||||
return compiled_model;
|
||||
}
|
||||
@ -136,7 +136,7 @@ std::shared_ptr<ov::ICompiledModel> TemplatePlugin::Plugin::import_model(std::is
|
||||
auto compiled_model =
|
||||
std::make_shared<CompiledModel>(ov_model,
|
||||
shared_from_this(),
|
||||
get_executor_manager()->getIdleCPUStreamsExecutor(streamsExecutorConfig),
|
||||
get_executor_manager()->get_idle_cpu_streams_executor(streamsExecutorConfig),
|
||||
fullConfig);
|
||||
return compiled_model;
|
||||
}
|
||||
|
@ -8,11 +8,23 @@ import numpy as np
|
||||
import openvino.runtime as ov
|
||||
import pytest
|
||||
import torch
|
||||
import unittest
|
||||
from openvino.runtime import PartialShape, Dimension, Model, Type
|
||||
|
||||
from common.mo_convert_test_class import CommonMOConvertTest
|
||||
|
||||
|
||||
class MyTorchOp(torch.autograd.Function):
|
||||
@staticmethod
|
||||
def symbolic(g, in_positions):
|
||||
return g.op("MyTorchOp", in_positions)
|
||||
|
||||
@staticmethod
|
||||
def forward(self, in_positions):
|
||||
out_pos = in_positions.reshape(-1)
|
||||
return out_pos + 0.5
|
||||
|
||||
|
||||
def make_pt_model_one_input():
|
||||
from torch import nn
|
||||
class NeuralNetwork(nn.Module):
|
||||
@ -735,3 +747,30 @@ class TestMoConvertPyTorch(CommonMOConvertTest):
|
||||
if mo_params is not None:
|
||||
test_params.update(mo_params)
|
||||
self._test_by_ref_graph(temp_dir, test_params, graph_ref, compare_tensor_names=False)
|
||||
|
||||
|
||||
def create_pt_model_with_custom_op():
|
||||
#
|
||||
# Create PyTorch model with custom operation
|
||||
#
|
||||
import torch.nn as nn
|
||||
|
||||
class MyModel(nn.Module):
|
||||
def __init__(self):
|
||||
super(MyModel, self).__init__()
|
||||
self.my_op = MyTorchOp()
|
||||
|
||||
def forward(self, x):
|
||||
return self.my_op.apply(x)
|
||||
|
||||
return MyModel()
|
||||
|
||||
|
||||
class ConvertONNXFallthroughTest(unittest.TestCase):
|
||||
def test_onnx_fallthrough(self):
|
||||
from openvino.tools.mo import convert_model
|
||||
pytorch_model = create_pt_model_with_custom_op()
|
||||
|
||||
# Check that ONNX conversion passed, so ONNX frontend raises error message of unsupported op.
|
||||
with self.assertRaisesRegex(RuntimeError, ".*OpenVINO does not support the following ONNX operations: MyTorchOp.*"):
|
||||
convert_model(pytorch_model, input_shape=[1, 2, 3], use_legacy_frontend=True)
|
||||
|
103
tests/layer_tests/pytorch_tests/test_einsum.py
Normal file
103
tests/layer_tests/pytorch_tests/test_einsum.py
Normal file
@ -0,0 +1,103 @@
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import pytest
|
||||
|
||||
from pytorch_layer_test_class import PytorchLayerTest
|
||||
|
||||
|
||||
class TestEinsumBatchMatMul(PytorchLayerTest):
|
||||
def _prepare_input(self):
|
||||
import numpy as np
|
||||
|
||||
return (np.random.randn(5, 2, 3).astype(np.float32), np.random.randn(5, 3, 4).astype(np.float32),)
|
||||
|
||||
def create_model(self):
|
||||
import torch
|
||||
|
||||
class EinsumModelBatchMatmul(torch.nn.Module):
|
||||
def forward(self, x, y):
|
||||
eqn = "bij, bjk -> bik"
|
||||
return torch.einsum(eqn, x, y)
|
||||
|
||||
ref_net = None
|
||||
|
||||
return EinsumModelBatchMatmul(), ref_net, "aten::einsum"
|
||||
|
||||
@pytest.mark.nightly
|
||||
@pytest.mark.precommit
|
||||
def test_einsum_batch_matmul(self, ie_device, precision, ir_version):
|
||||
self._test(*self.create_model(), ie_device, precision, ir_version)
|
||||
|
||||
|
||||
class TestEinsumBatchDiagonal(PytorchLayerTest):
|
||||
def _prepare_input(self):
|
||||
import numpy as np
|
||||
|
||||
return (np.random.randn(3, 5, 5).astype(np.float32),)
|
||||
|
||||
def create_model(self):
|
||||
import torch
|
||||
|
||||
class EinsumModelBatchDiagonal(torch.nn.Module):
|
||||
def forward(self, x):
|
||||
eqn = "kii -> ki"
|
||||
return torch.einsum(eqn, x)
|
||||
|
||||
ref_net = None
|
||||
|
||||
return EinsumModelBatchDiagonal(), ref_net, "aten::einsum"
|
||||
|
||||
@pytest.mark.nightly
|
||||
@pytest.mark.precommit
|
||||
@pytest.mark.xfail(reason='OpenVINO CPU plugin does not support einsum diagonal')
|
||||
def test_einsum_batch_diagonal(self, ie_device, precision, ir_version):
|
||||
self._test(*self.create_model(), ie_device, precision, ir_version, dynamic_shapes=False)
|
||||
|
||||
|
||||
class TestEinsumInnerProd(PytorchLayerTest):
|
||||
def _prepare_input(self):
|
||||
import numpy as np
|
||||
|
||||
return (np.random.randn(5).astype(np.float32), np.random.randn(5).astype(np.float32))
|
||||
|
||||
def create_model(self):
|
||||
import torch
|
||||
|
||||
class EinsumModelInnerProd(torch.nn.Module):
|
||||
def forward(self, x, y):
|
||||
eqn = "i,i"
|
||||
return torch.einsum(eqn, x, y)
|
||||
|
||||
ref_net = None
|
||||
|
||||
return EinsumModelInnerProd(), ref_net, "aten::einsum"
|
||||
|
||||
@pytest.mark.nightly
|
||||
@pytest.mark.precommit
|
||||
def test_einsum_inner_prod(self, ie_device, precision, ir_version):
|
||||
self._test(*self.create_model(), ie_device, precision, ir_version)
|
||||
|
||||
|
||||
class TestEinsumTranspose(PytorchLayerTest):
|
||||
def _prepare_input(self):
|
||||
import numpy as np
|
||||
|
||||
return (np.random.randn(3, 5).astype(np.float32),)
|
||||
|
||||
def create_model(self):
|
||||
import torch
|
||||
|
||||
class EinsumModelTranspose(torch.nn.Module):
|
||||
def forward(self, x):
|
||||
eqn = "ij->ji"
|
||||
return torch.einsum(eqn, x)
|
||||
|
||||
ref_net = None
|
||||
|
||||
return EinsumModelTranspose(), ref_net, "aten::einsum"
|
||||
|
||||
@pytest.mark.nightly
|
||||
@pytest.mark.precommit
|
||||
def test_einsum_transpose(self, ie_device, precision, ir_version):
|
||||
self._test(*self.create_model(), ie_device, precision, ir_version)
|
58
tests/layer_tests/pytorch_tests/test_roi_align.py
Normal file
58
tests/layer_tests/pytorch_tests/test_roi_align.py
Normal file
@ -0,0 +1,58 @@
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from pytorch_layer_test_class import PytorchLayerTest
|
||||
from torchvision.ops import roi_align
|
||||
|
||||
|
||||
class TestROIAlign(PytorchLayerTest):
|
||||
def _prepare_input(self):
|
||||
return (self.input_tensor, self.boxes)
|
||||
|
||||
def create_model(self, output_size, spatial_scale, sampling_ratio, aligned):
|
||||
|
||||
class torchvision_roi_align(torch.nn.Module):
|
||||
def __init__(self, output_size, spatial_scale, sampling_ratio, aligned):
|
||||
super().__init__()
|
||||
self.output_size = output_size
|
||||
self.spatial_scale = spatial_scale
|
||||
self.sampling_ratio = sampling_ratio
|
||||
self.aligned = aligned
|
||||
|
||||
def forward(self, input_tensor, rois):
|
||||
return roi_align(
|
||||
input_tensor,
|
||||
rois.to(dtype=input_tensor.dtype),
|
||||
self.output_size,
|
||||
self.spatial_scale,
|
||||
self.sampling_ratio,
|
||||
self.aligned,
|
||||
)
|
||||
|
||||
ref_net = None
|
||||
|
||||
return (torchvision_roi_align(output_size, spatial_scale, sampling_ratio, aligned),
|
||||
ref_net, "torchvision::roi_align")
|
||||
|
||||
@pytest.mark.parametrize('input_tensor', (np.random.randn(4, 5, 6, 7).astype(np.float32),))
|
||||
@pytest.mark.parametrize('boxes', (np.array([[1, 2, 2, 3, 3]]).astype(np.float32),
|
||||
np.array([[0, 1, 2, 5, 4],
|
||||
[2, 1, 2, 5, 4],
|
||||
[3, 1, 2, 5, 4]]).astype(np.float32)))
|
||||
@pytest.mark.parametrize('output_size', ((4, 5), (3, 2), 3))
|
||||
@pytest.mark.parametrize('spatial_scale', (0.5, 1.0))
|
||||
@pytest.mark.parametrize('sampling_ratio', (0, 1))
|
||||
@pytest.mark.parametrize('aligned', (True, False))
|
||||
@pytest.mark.nightly
|
||||
@pytest.mark.precommit
|
||||
def test_roi_align(self, ie_device, precision, ir_version, input_tensor, boxes, output_size,
|
||||
spatial_scale, sampling_ratio, aligned):
|
||||
self.input_tensor = input_tensor
|
||||
self.boxes = boxes
|
||||
self._test(*self.create_model(output_size, spatial_scale, sampling_ratio, aligned),
|
||||
ie_device, precision, ir_version, trace_model=True)
|
@ -6,10 +6,50 @@ import pytest
|
||||
from pytorch_layer_test_class import PytorchLayerTest
|
||||
|
||||
|
||||
class TestUpsample1D(PytorchLayerTest):
|
||||
def _prepare_input(self):
|
||||
import numpy as np
|
||||
return (np.random.randn(1, 3, 224).astype(np.float32),)
|
||||
|
||||
def create_model(self, size, scale, mode):
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
class aten_upsample(torch.nn.Module):
|
||||
def __init__(self, size, scale, mode):
|
||||
super().__init__()
|
||||
self.size = size
|
||||
self.scale = scale
|
||||
self.mode = mode
|
||||
|
||||
def forward(self, x):
|
||||
return F.interpolate(x, self.size, scale_factor=self.scale, mode=self.mode)
|
||||
|
||||
ref_net = None
|
||||
|
||||
return aten_upsample(size, scale, mode), ref_net, F"aten::upsample_{mode}1d"
|
||||
|
||||
@pytest.mark.parametrize("mode,size,scale", [
|
||||
('nearest', 300, None),
|
||||
('nearest', 200, None),
|
||||
('nearest', None, 2.5),
|
||||
('nearest', None, 0.75),
|
||||
('linear', 300, None),
|
||||
('linear', 200, None),
|
||||
('linear', None, 2.5,),
|
||||
('linear', None, 0.75),
|
||||
])
|
||||
@pytest.mark.nightly
|
||||
@pytest.mark.precommit
|
||||
def test_upsample1d(self, mode, size, scale, ie_device, precision, ir_version):
|
||||
self._test(*self.create_model(size, scale, mode), ie_device,
|
||||
precision, ir_version, trace_model=True)
|
||||
|
||||
|
||||
class TestUpsample2D(PytorchLayerTest):
|
||||
def _prepare_input(self):
|
||||
import numpy as np
|
||||
return (np.zeros((1, 3, 224, 224)).astype(np.float32),)
|
||||
return (np.random.randn(1, 3, 200, 200).astype(np.float32),)
|
||||
|
||||
def create_model(self, size, scale, mode):
|
||||
import torch
|
||||
@ -31,25 +71,70 @@ class TestUpsample2D(PytorchLayerTest):
|
||||
|
||||
@pytest.mark.parametrize("mode,size,scale", [
|
||||
('nearest', 300, None),
|
||||
('nearest', 200, None),
|
||||
('nearest', (128, 480), None),
|
||||
('nearest', None, 2.5,),
|
||||
('nearest', 150, None),
|
||||
('nearest', (300, 400), None),
|
||||
('nearest', None, 2.5),
|
||||
('nearest', None, 0.75),
|
||||
('nearest', None, (1.2, 0.8)),
|
||||
('nearest', None, (1.5, 2)),
|
||||
('bilinear', 300, None),
|
||||
('bilinear', 200, None),
|
||||
('bilinear', (128, 480), None),
|
||||
('bilinear', 150, None),
|
||||
('bilinear', (400, 480), None),
|
||||
('bilinear', None, 2.5,),
|
||||
('bilinear', None, 0.75),
|
||||
('bilinear', None, (1.2, 0.8)),
|
||||
('bilinear', None, (1.2, 1.3)),
|
||||
('bicubic', 300, None),
|
||||
('bicubic', 200, None),
|
||||
('bicubic', (128, 480), None),
|
||||
('bicubic', 150, None),
|
||||
('bicubic', (400, 480), None),
|
||||
('bicubic', None, 2.5,),
|
||||
('bicubic', None, 0.75),
|
||||
('bicubic', None, (1.2, 0.8))]
|
||||
)
|
||||
('bicubic', None, (1.2, 1.3))
|
||||
])
|
||||
@pytest.mark.nightly
|
||||
@pytest.mark.precommit
|
||||
def test_upsample(self, mode, size, scale, ie_device, precision, ir_version):
|
||||
self._test(*self.create_model(size, scale, mode), ie_device, precision, ir_version, trace_model=True)
|
||||
def test_upsample2d(self, mode, size, scale, ie_device, precision, ir_version):
|
||||
self._test(*self.create_model(size, scale, mode), ie_device,
|
||||
precision, ir_version, trace_model=True, **{"custom_eps": 1e-3})
|
||||
|
||||
|
||||
class TestUpsample3D(PytorchLayerTest):
|
||||
def _prepare_input(self):
|
||||
import numpy as np
|
||||
return (np.random.randn(1, 3, 100, 100, 100).astype(np.float32),)
|
||||
|
||||
def create_model(self, size, scale, mode):
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
class aten_upsample(torch.nn.Module):
|
||||
def __init__(self, size, scale, mode):
|
||||
super().__init__()
|
||||
self.size = size
|
||||
self.scale = scale
|
||||
self.mode = mode
|
||||
|
||||
def forward(self, x):
|
||||
return F.interpolate(x, self.size, scale_factor=self.scale, mode=self.mode)
|
||||
|
||||
ref_net = None
|
||||
|
||||
return aten_upsample(size, scale, mode), ref_net, F"aten::upsample_{mode}3d"
|
||||
|
||||
@pytest.mark.parametrize("mode,size,scale", [
|
||||
('nearest', 200, None),
|
||||
('nearest', 150, None),
|
||||
('nearest', (150, 200, 250), None),
|
||||
('nearest', None, 2.5),
|
||||
('nearest', None, 0.75),
|
||||
('nearest', None, (1.5, 2, 2.5)),
|
||||
('trilinear', 200, None),
|
||||
('trilinear', 150, None),
|
||||
('trilinear', (200, 240, 210), None),
|
||||
('trilinear', None, 2.5,),
|
||||
('trilinear', None, 0.75),
|
||||
('trilinear', None, (1.2, 1.1, 1.5)),
|
||||
])
|
||||
@pytest.mark.nightly
|
||||
@pytest.mark.precommit
|
||||
def test_upsample3d(self, mode, size, scale, ie_device, precision, ir_version):
|
||||
self._test(*self.create_model(size, scale, mode), ie_device,
|
||||
precision, ir_version, trace_model=True, **{"custom_eps": 1e-3})
|
||||
|
@ -131,6 +131,7 @@ def convert_pytorch_to_onnx(model, input_shape, opset_version, example_inputs, o
|
||||
torch.onnx.export(model,
|
||||
inputs,
|
||||
model_onnx,
|
||||
operator_export_type=torch.onnx.OperatorExportTypes.ONNX_FALLTHROUGH,
|
||||
**additional_params)
|
||||
return model_onnx
|
||||
|
||||
|
@ -309,3 +309,41 @@ class TestMoFreezePlaceholderTFFE(unittest.TestCase):
|
||||
def test_conversion_model_oneshot_iterator_default(self):
|
||||
self.basic("model_oneshot_iterator.pbtxt", None, None, None, None,
|
||||
None, None, True, True, False, False)
|
||||
|
||||
@generate(
|
||||
*[
|
||||
(
|
||||
"in2{f32}->[0.0 0.0 0.0 0.0]",
|
||||
{"in1": np.array([[1.0, 2.0], [3.0, 4.0]])},
|
||||
np.array([[1.0, 2.0], [3.0, 4.0]]),
|
||||
np.float32,
|
||||
),
|
||||
(
|
||||
"in2->[1.0 15.0 15.5 1.0]",
|
||||
{"in1": np.array([[2.0, 4.0], [12.0, 8.0]])},
|
||||
np.array([[3.0, 19.0], [27.5, 9.0]]),
|
||||
np.float32,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_conversion_model_with_non_standard_extension(self, input_freezing_value, inputs, expected,
|
||||
dtype):
|
||||
self.basic("model_fp32.frozen", input_freezing_value, inputs, dtype, expected, only_conversion=False,
|
||||
input_model_is_text=False, use_new_frontend=True,
|
||||
use_legacy_frontend=False)
|
||||
|
||||
def test_conversion_fake_model(self):
|
||||
with self.assertRaisesRegex(Exception,
|
||||
"Internal error or inconsistent input model: the frontend supports "
|
||||
"only frozen binary protobuf format."):
|
||||
self.basic("fake.pb", None, None, None, None,
|
||||
only_conversion=True, input_model_is_text=False, use_new_frontend=True,
|
||||
use_legacy_frontend=False)
|
||||
|
||||
def test_conversion_dir_model(self):
|
||||
with self.assertRaisesRegex(Exception,
|
||||
"Internal error or inconsistent input model: the frontend supports "
|
||||
"only frozen binary protobuf format."):
|
||||
self.basic(".", None, None, None, None,
|
||||
only_conversion=True, input_model_is_text=False, use_new_frontend=True,
|
||||
use_legacy_frontend=False)
|
||||
|
2
tools/mo/unit_tests/moc_tf_fe/test_models/fake.pb
Normal file
2
tools/mo/unit_tests/moc_tf_fe/test_models/fake.pb
Normal file
@ -0,0 +1,2 @@
|
||||
dcfsdcdsdcs
|
||||
cscscsc
|
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8a33c91148b5e72ca03608c7d2ee18229ee4b610344dadd6896efeb6ac7b93e0
|
||||
size 141
|
Loading…
Reference in New Issue
Block a user