Merge remote-tracking branch 'upstream/master' into itikhono/ts/fix_performance_issues

This commit is contained in:
Tikhonov Ivan 2023-02-23 10:32:14 +00:00
commit 20579455b7
59 changed files with 1642 additions and 418 deletions

View File

@ -10,23 +10,54 @@ See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNo
@sphinxdirective
.. tab:: System Requirements
| Full requirement listing is available in:
| `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`_
| Full requirement listing is available in:
| `System Requirements Page <https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/system-requirements.html>`_
.. tab:: Processor Notes
Processor graphics are not included in all processors.
See `Product Specifications`_ for information about your processor.
.. _Product Specifications: https://ark.intel.com/
.. tab:: Software
* `CMake 3.13 or higher, 64-bit <https://cmake.org/download/>`_
* GCC 7.5.0 (for Ubuntu 18.04) or GCC 9.3.0 (for Ubuntu 20.04)
* `Python 3.7 - 3.10, 64-bit <https://www.python.org/downloads/>`_
* GCC:
.. tab:: Ubuntu 18.04
* GCC 7.5.0
.. tab:: Ubuntu 20.04
* GCC 9.3.0
.. tab:: RHEL 8
* GCC 8.4.1
.. tab:: CENTOS 7
* GCC 8.3.1
Use folloving instructions to install it:
Install GCC 8.3.1 via devtoolset-8
.. code-block:: sh
sudo yum update -y && sudo yum install -y centos-release-scl epel-release
sudo yum install -y devtoolset-8 git patchelf
Enable devtoolset-8 and check current gcc version
.. code-block:: sh
source /opt/rh/devtoolset-8/enable
gcc -v
@endsphinxdirective

View File

@ -125,6 +125,46 @@ elif [ -f /etc/redhat-release ] || grep -q "rhel" /etc/os-release ; then
`# samples and tools` \
zlib-devel \
gflags-devel
elif [ -f /etc/os-release ] && grep -q "SUSE" /etc/os-release ; then
zypper refresh
zypper install -y \
file \
`# build tools` \
cmake \
ccache \
ninja \
scons \
gcc \
gcc-c++ \
make \
`# to determine openvino version via git` \
git \
git-lfs \
`# to build and check pip packages` \
patchelf \
fdupes \
`# to build and check rpm packages` \
rpm-build \
rpmlint \
`# check bash scripts for correctness` \
ShellCheck \
`# main openvino dependencies` \
tbb-devel \
pugixml-devel \
`# GPU plugin dependency` \
libva-devel \
`# OpenCL for GPU` \
ocl-icd-devel \
opencl-cpp-headers \
opencl-headers \
`# python API` \
python39-pip \
python39-setuptools \
python39-devel \
`# samples and tools` \
zlib-devel \
gflags-devel-static \
nlohmann_json-devel
elif [ -f /etc/os-release ] && grep -q "raspbian" /etc/os-release; then
# Raspbian
apt update
@ -176,8 +216,10 @@ if [ ! "$(printf '%s\n' "$required_cmake_ver" "$current_cmake_ver" | sort -V | h
if command -v apt-get &> /dev/null; then
apt-get install -y --no-install-recommends wget
else
elif command -v yum &> /dev/null; then
yum install -y wget
elif command -v zypper &> /dev/null; then
zypper in -y wget
fi
cmake_install_bin="cmake-${installed_cmake_ver}-linux-${arch}.sh"

View File

@ -15,11 +15,6 @@ set(shellcheck_skip_list
"${OpenVINO_SOURCE_DIR}/src/bindings/python/thirdparty/pybind11"
"${TEMP}")
if(shellcheck_VERSION VERSION_GREATER_EQUAL 0.7.0)
list(APPEND shellcheck_skip_list
"${OpenVINO_SOURCE_DIR}/scripts/setupvars/setupvars.sh")
endif()
ie_shellcheck_process(DIRECTORY "${OpenVINO_SOURCE_DIR}"
SKIP ${shellcheck_skip_list})

View File

@ -95,6 +95,7 @@ if [ "$os" == "auto" ] ; then
case $os in
centos7|centos8|rhel8|rhel9.1|\
almalinux8.7|amzn2|\
opensuse-leap15.3| \
fedora34|fedora35|fedora36|fedora37|fedora38|\
raspbian9|debian9|ubuntu18.04|\
raspbian10|debian10|ubuntu20.04|ubuntu20.10|ubuntu21.04|\
@ -216,6 +217,11 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
pkgs_dev+=("https://download-ib01.fedoraproject.org/pub/epel/9/Everything/$arch/Packages/g/gflags-devel-2.2.2-9.el9.$arch.rpm")
extra_repos+=("https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm")
fi
elif [ "$os" == "opensuse-leap15.3" ] ; then
pkgs_core=(libtbb2 libtbbmalloc2 libpugixml1)
pkgs_gpu=()
pkgs_python=(python39-base python39 python39-venv python39-pip)
pkgs_dev=(cmake pkg-config gcc-c++ gcc gflags-devel-static zlib-devel nlohmann_json-devel make curl sudo)
else
echo "Internal script error: invalid OS (${os}) after check (package selection)" >&2
exit 3
@ -280,6 +286,14 @@ elif [ "$os" == "centos7" ] || [ "$os" == "centos8" ] ||
yum install "$iopt" "${pkgs[@]}"
elif [ "$os" == "opensuse-leap15.3" ] ; then
[ -z "$interactive" ] && iopt="-y"
[ -n "$dry" ] && iopt="--dry-run"
[ -n "$keepcache" ] && zypper clean --all
zypper ref && zypper in --auto-agree-with-licenses --no-recommends "$iopt" "${pkgs[@]}"
else
echo "Internal script error: invalid OS (${os}) after check (package installation)" >&2
exit 3

View File

@ -3,7 +3,13 @@
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
SCRIPT_DIR="$( cd "$( dirname "$(realpath "${BASH_SOURCE[0]}")" )" >/dev/null 2>&1 && pwd )"
abs_path () {
path=$(eval echo "$1")
directory=$(dirname "$path")
echo "$(cd "$directory" || exit; pwd -P)/$(basename "$path")";
}
SCRIPT_DIR="$( cd "$( dirname "$(abs_path "${BASH_SOURCE[0]}")" )" >/dev/null 2>&1 && pwd )"
INSTALLDIR="${SCRIPT_DIR}"
export INTEL_OPENVINO_DIR="$INSTALLDIR"
@ -79,10 +85,12 @@ fi
# OpenCV environment
if [ -f "$INSTALLDIR/opencv/setupvars.sh" ]; then
# shellcheck source=/dev/null
source "$INSTALLDIR/opencv/setupvars.sh"
fi
if [ -f "$INSTALLDIR/extras/opencv/setupvars.sh" ]; then
# shellcheck source=/dev/null
source "$INSTALLDIR/extras/opencv/setupvars.sh"
fi
@ -97,23 +105,12 @@ MAX_SUPPORTED_PYTHON_VERSION_MINOR="10"
check_python_version () {
if [ -z "$python_version" ]; then
python_version=$(python3 -c 'import sys; print(str(sys.version_info[0])+"."+str(sys.version_info[1]))')
fi
# splitting Python version variable depending on the used shell
if [ -n "$ZSH_VERSION" ]; then
version_arr=(${(@s:.:)python_version})
if [ "${#version_arr[@]}" -ge "2" ]; then
# zsh starts indexing from 1
python_version_major=${version_arr[1]}
python_version_minor=${version_arr[2]}
fi
python_version_major=$( python3 -c 'import sys; print(str(sys.version_info[0]))' )
python_version_minor=$( python3 -c 'import sys; print(str(sys.version_info[1]))' )
python_version="$python_version_major.$python_version_minor"
else
version_arr=(${python_version//./ })
if [ "${#version_arr[@]}" -ge "2" ]; then
python_version_major=${version_arr[0]}
python_version_minor=${version_arr[1]}
fi
python_version_major=$( python3 -c "import sys; print(str(\"${python_version}\".split('.')[0]))" )
python_version_minor=$( python3 -c "import sys; print(str(\"${python_version}\".split('.')[1]))" )
fi
if [ "$PYTHON_VERSION_MAJOR" != "$python_version_major" ] ||

View File

@ -8,17 +8,26 @@ import os
import numpy as np
import pytest
import openvino.runtime.opset8 as ov
from openvino.runtime import Model
import openvino.runtime.opset10 as ops
from openvino.runtime import Core, Model
from openvino.runtime.passes import Manager, Serialize, ConstantFolding, Version
from tests.test_graph.util import count_ops_of_type
from openvino.runtime import Core
from tests.test_utils.test_utils import create_filename_for_test
def create_model():
shape = [100, 100, 2]
parameter_a = ops.parameter(shape, dtype=np.float32, name="A")
parameter_b = ops.parameter(shape, dtype=np.float32, name="B")
parameter_c = ops.parameter(shape, dtype=np.float32, name="C")
model = ops.floor(ops.minimum(ops.abs(parameter_a), ops.multiply(parameter_b, parameter_c)))
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
return func
def test_constant_folding():
node_constant = ov.constant(np.array([[0.0, 0.1, -0.1], [-2.5, 2.5, 3.0]], dtype=np.float32))
node_ceil = ov.ceiling(node_constant)
node_constant = ops.constant(np.array([[0.0, 0.1, -0.1], [-2.5, 2.5, 3.0]], dtype=np.float32))
node_ceil = ops.ceiling(node_constant)
model = Model(node_ceil, [], "TestFunction")
assert count_ops_of_type(model, node_ceil) == 1
@ -43,9 +52,9 @@ def test_serialize_seperate_paths_kwargs(request, tmp_path):
core = Core()
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
shape = [2, 2]
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
parameter_c = ov.parameter(shape, dtype=np.float32, name="C")
parameter_a = ops.parameter(shape, dtype=np.float32, name="A")
parameter_b = ops.parameter(shape, dtype=np.float32, name="B")
parameter_c = ops.parameter(shape, dtype=np.float32, name="C")
model = (parameter_a + parameter_b) * parameter_c
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
@ -67,10 +76,10 @@ def test_serialize_seperate_paths_args(request, tmp_path):
core = Core()
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
shape = [2, 2]
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
parameter_c = ov.parameter(shape, dtype=np.float32, name="C")
parameter_d = ov.parameter(shape, dtype=np.float32, name="D")
parameter_a = ops.parameter(shape, dtype=np.float32, name="A")
parameter_b = ops.parameter(shape, dtype=np.float32, name="B")
parameter_c = ops.parameter(shape, dtype=np.float32, name="C")
parameter_d = ops.parameter(shape, dtype=np.float32, name="D")
model = ((parameter_a + parameter_b) * parameter_c) / parameter_d
func = Model(model, [parameter_a, parameter_b, parameter_c, parameter_d], "Model")
@ -92,8 +101,8 @@ def test_serialize_pass_mixed_args_kwargs(request, tmp_path):
core = Core()
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
shape = [3, 2]
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
parameter_a = ops.parameter(shape, dtype=np.float32, name="A")
parameter_b = ops.parameter(shape, dtype=np.float32, name="B")
model = parameter_a - parameter_b
func = Model(model, [parameter_a, parameter_b], "Model")
@ -114,20 +123,15 @@ def test_serialize_pass_mixed_args_kwargs(request, tmp_path):
def test_serialize_pass_mixed_args_kwargs_v2(request, tmp_path):
core = Core()
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
shape = [100, 100, 2]
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
parameter_c = ov.parameter(shape, dtype=np.float32, name="C")
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
model = create_model()
pass_manager = Manager()
pass_manager.register_pass(Serialize(path_to_xml=xml_path, path_to_bin=bin_path))
pass_manager.run_passes(func)
pass_manager.run_passes(model)
res_model = core.read_model(model=xml_path, weights=bin_path)
assert func.get_parameters() == res_model.get_parameters()
assert func.get_ordered_ops() == res_model.get_ordered_ops()
assert model.get_parameters() == res_model.get_parameters()
assert model.get_ordered_ops() == res_model.get_ordered_ops()
os.remove(xml_path)
os.remove(bin_path)
@ -146,8 +150,8 @@ def test_serialize_pass_wrong_num_of_args(request, tmp_path):
# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
def test_serialize_results(request, tmp_path):
core = Core()
node_constant = ov.constant(np.array([[0.0, 0.1, -0.1], [-2.5, 2.5, 3.0]], dtype=np.float32))
node_ceil = ov.ceiling(node_constant)
node_constant = ops.constant(np.array([[0.0, 0.1, -0.1], [-2.5, 2.5, 3.0]], dtype=np.float32))
node_ceil = ops.ceiling(node_constant)
func = Model(node_ceil, [], "Model")
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
@ -165,73 +169,19 @@ def test_serialize_results(request, tmp_path):
os.remove(bin_path)
# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
def test_serialize_pass_tuple(request, tmp_path):
core = Core()
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
shape = [100, 100, 2]
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
parameter_c = ov.parameter(shape, dtype=np.float32, name="C")
parameter_d = ov.parameter(shape, dtype=np.float32, name="D")
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
pass_manager = Manager()
pass_manager.register_pass("Serialize", output_files=(xml_path, bin_path))
pass_manager.run_passes(func)
res_model = core.read_model(model=xml_path, weights=bin_path)
assert func.get_parameters() == res_model.get_parameters()
assert func.get_ordered_ops() == res_model.get_ordered_ops()
os.remove(xml_path)
os.remove(bin_path)
# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
def test_default_version(request, tmp_path):
core = Core()
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
shape = [100, 100, 2]
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
parameter_c = ov.parameter(shape, dtype=np.float32, name="C")
parameter_d = ov.parameter(shape, dtype=np.float32, name="D")
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
model = create_model()
pass_manager = Manager()
pass_manager.register_pass("Serialize", output_files=(xml_path, bin_path))
pass_manager.run_passes(func)
pass_manager.register_pass(Serialize(xml_path, bin_path))
pass_manager.run_passes(model)
res_model = core.read_model(model=xml_path, weights=bin_path)
assert func.get_parameters() == res_model.get_parameters()
assert func.get_ordered_ops() == res_model.get_ordered_ops()
os.remove(xml_path)
os.remove(bin_path)
# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
def test_default_version_IR_V11_tuple(request, tmp_path):
core = Core()
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
shape = [100, 100, 2]
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
parameter_c = ov.parameter(shape, dtype=np.float32, name="C")
parameter_d = ov.parameter(shape, dtype=np.float32, name="D")
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
pass_manager = Manager()
pass_manager.register_pass("Serialize", output_files=(xml_path, bin_path), version="IR_V11")
pass_manager.run_passes(func)
res_model = core.read_model(model=xml_path, weights=bin_path)
assert func.get_parameters() == res_model.get_parameters()
assert func.get_ordered_ops() == res_model.get_ordered_ops()
assert model.get_parameters() == res_model.get_parameters()
assert model.get_ordered_ops() == res_model.get_ordered_ops()
os.remove(xml_path)
os.remove(bin_path)
@ -241,21 +191,15 @@ def test_default_version_IR_V11_tuple(request, tmp_path):
def test_default_version_IR_V11_seperate_paths(request, tmp_path):
core = Core()
xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path)
shape = [100, 100, 2]
parameter_a = ov.parameter(shape, dtype=np.float32, name="A")
parameter_b = ov.parameter(shape, dtype=np.float32, name="B")
parameter_c = ov.parameter(shape, dtype=np.float32, name="C")
parameter_d = ov.parameter(shape, dtype=np.float32, name="D")
model = ov.floor(ov.minimum(ov.abs(parameter_a), ov.multiply(parameter_b, parameter_c)))
func = Model(model, [parameter_a, parameter_b, parameter_c], "Model")
model = create_model()
pass_manager = Manager()
pass_manager.register_pass(Serialize(path_to_xml=xml_path, path_to_bin=bin_path, version=Version.IR_V11))
pass_manager.run_passes(func)
pass_manager.run_passes(model)
res_model = core.read_model(model=xml_path, weights=bin_path)
assert func.get_parameters() == res_model.get_parameters()
assert func.get_ordered_ops() == res_model.get_ordered_ops()
assert model.get_parameters() == res_model.get_parameters()
assert model.get_ordered_ops() == res_model.get_ordered_ops()
os.remove(xml_path)
os.remove(bin_path)

View File

@ -32,14 +32,10 @@ def test_registration_and_pass_name():
GraphRewrite().set_name("Anchor")
BackwardGraphRewrite().set_name("BackAnchor")
# Preserve legacy behaviour when registered pass doesn't exist
# and in this case we shouldn't throw an exception.
manager.register_pass("NotExistingPass")
def test_negative_pass_registration():
manager = Manager()
expect_exception(lambda: manager.register_pass(PatternReplacement))
expect_exception(lambda: manager.register_pass("PatternReplacement", PatternReplacement()))
expect_exception(lambda: manager.register_pass("Serialize", Serialize("out.xml", "out.bin")))
expect_exception(lambda: manager.register_pass("Serialize", "out.xml", "out.bin", "out.wrong"))
expect_exception(lambda: manager.register_pass(Serialize("out.xml", "out.bin", "out.wrong")))

View File

@ -6,7 +6,7 @@ import os
import pytest
import numpy as np
from openvino.runtime import serialize
from openvino.offline_transformations import (
from openvino._offline_transformations import (
apply_moc_transformations,
apply_pot_transformations,
apply_low_latency_transformation,

View File

@ -27,7 +27,7 @@ def einsum_op_exec(input_shapes: list, equation: str, data_type: np.dtype,
ng_inputs = []
np_inputs = []
for i in range(num_inputs):
input_i = np.random.random_integers(10, size=input_shapes[i]).astype(data_type)
input_i = np.random.randint(1, 10 + 1, size=input_shapes[i]).astype(data_type)
np_inputs.append(input_i)
ng_inputs.append(ng.parameter(input_i.shape, dtype=data_type))

View File

@ -33,7 +33,7 @@ def test_elu_operator_with_scalar():
def test_fake_quantize():
levels = np.float32(4)
levels = np.int32(4)
data_shape = [1, 2, 3, 4]
bound_shape = []
@ -60,7 +60,7 @@ def test_fake_quantize():
def test_depth_to_space():
data_shape = [1, 4, 2, 3]
mode = "blocks_first"
block_size = np.float32(2)
block_size = np.int32(2)
parameter_data = ng.parameter(data_shape, name="Data", dtype=np.float32)

View File

@ -103,6 +103,7 @@ public:
bool operator>(const Output& other) const;
bool operator<=(const Output& other) const;
bool operator>=(const Output& other) const;
operator Output<const Node>() const;
private:
std::shared_ptr<Node> m_node;

View File

@ -116,6 +116,23 @@ public:
*/
Tensor(const element::Type type, const Shape& shape, void* host_ptr, const Strides& strides = {});
/**
* @brief Constructs Tensor using port from node. Allocate internal host storage using default allocator
* @param port port from node
* @param allocator allocates memory for internal tensor storage
*/
Tensor(const ov::Output<const ov::Node>& port, const Allocator& allocator = {});
/**
* @brief Constructs Tensor using port from node. Wraps allocated host memory.
* @note Does not perform memory allocation internally
* @param port port from node
* @param host_ptr Pointer to pre-allocated host memory
* @param strides Optional strides parameters in bytes. Strides are supposed to be computed automatically based
* on shape and element size
*/
Tensor(const ov::Output<const ov::Node>& port, void* host_ptr, const Strides& strides = {});
/**
* @brief Constructs region of interest (ROI) tensor form another tensor.
* @note Does not perform memory allocation internally
@ -143,10 +160,17 @@ public:
*/
Shape get_shape() const;
/**
* @brief Copy tensor, destination tensor should have the same element type and shape
*
* @param dst destination tensor
*/
void copy_to(ov::Tensor& dst) const;
/**
* @brief Reports whether the tensor is continuous or not
*
* @return true if blob is continuous
* @return true if tensor is continuous
*/
bool is_continuous() const;

View File

@ -42,12 +42,12 @@ void color_convert_nv12(const T* arg_y,
size_t stride_y,
size_t stride_uv,
ov::op::util::ConvertColorNV12Base::ColorConversion color_format) {
for (int batch = 0; batch < batch_size; batch++) {
for (size_t batch = 0; batch < batch_size; batch++) {
T* out = out_ptr + batch * image_w * image_h * 3;
auto y_ptr = arg_y + batch * stride_y;
auto uv_ptr = arg_uv + batch * stride_uv;
for (int h = 0; h < image_h; h++) {
for (int w = 0; w < image_w; w++) {
for (size_t h = 0; h < image_h; h++) {
for (size_t w = 0; w < image_w; w++) {
auto y_index = h * image_w + w;
auto y_val = static_cast<float>(y_ptr[y_index]);
auto uv_index = (h / 2) * image_w + (w / 2) * 2;
@ -80,13 +80,13 @@ void color_convert_i420(const T* arg_y,
size_t stride_y,
size_t stride_uv,
ov::op::util::ConvertColorI420Base::ColorConversion color_format) {
for (int batch = 0; batch < batch_size; batch++) {
for (size_t batch = 0; batch < batch_size; batch++) {
T* out = out_ptr + batch * image_w * image_h * 3;
auto y_ptr = arg_y + batch * stride_y;
auto u_ptr = arg_u + batch * stride_uv;
auto v_ptr = arg_v + batch * stride_uv;
for (int h = 0; h < image_h; h++) {
for (int w = 0; w < image_w; w++) {
for (size_t h = 0; h < image_h; h++) {
for (size_t w = 0; w < image_w; w++) {
auto y_index = h * image_w + w;
auto y_val = static_cast<float>(y_ptr[y_index]);
auto uv_index = (h / 2) * (image_w / 2) + (w / 2);

View File

@ -46,15 +46,15 @@ void extend_with_zeros(const Strides& strides,
const auto offset_batch = batch * input_size * input_shape[1];
for (size_t channel = 0; channel < input_shape[1]; ++channel) {
const auto offset_channel = offset_batch + channel * input_size;
for (int i_z = 0; i_z < input_3d[0]; ++i_z) {
for (size_t i_z = 0; i_z < input_3d[0]; ++i_z) {
const auto offset_i_z = i_z * input_3d[2] * input_3d[1];
for (int i_y = 0; i_y < input_3d[1]; ++i_y) {
for (size_t i_y = 0; i_y < input_3d[1]; ++i_y) {
const auto offset_i_y = i_y * input_3d[2];
for (int i_x = 0; i_x < input_3d[2]; ++i_x) {
for (size_t i_x = 0; i_x < input_3d[2]; ++i_x) {
input_zeros.push_back(in[offset_channel + i_x + offset_i_y + offset_i_z]);
if (i_x < input_3d[2] - 1) {
for (int k = 0; k < strides_3d[2] - 1; k++) {
for (size_t k = 0; k < strides_3d[2] - 1; k++) {
input_zeros.push_back(0);
}
}

View File

@ -38,7 +38,7 @@ private:
size_t offset;
size_t numResults;
size_t outTotalSize;
size_t numClasses;
int numClasses;
void GetLocPredictions(const dataType* locData, std::vector<LabelBBox>& locations) {
locations.resize(numImages);
@ -445,7 +445,7 @@ public:
offset = _attrs.normalized ? 0 : 1;
numPriors = priorsShape[2] / priorSize;
priorsBatchSize = priorsShape[0];
numClasses = classPredShape[1] / numPriors;
numClasses = classPredShape[1] / static_cast<int>(numPriors);
numLocClasses = _attrs.share_location ? 1 : numClasses;
numResults = outShape[2];
outTotalSize = shape_size(outShape);

View File

@ -109,8 +109,8 @@ void roi_align(const T* feature_maps,
T sample_x = x1 + static_cast<T>(x_bin_ind) * bin_width +
sample_distance_x * (static_cast<T>(x_sample_ind) + static_cast<T>(0.5f));
if (sample_x < -1.0 || sample_x > feature_map_width || sample_y < -1.0 ||
sample_y > feature_map_height) {
if (sample_x < -1.0 || sample_x > static_cast<T>(feature_map_width) || sample_y < -1.0 ||
sample_y > static_cast<T>(feature_map_height)) {
// For this sample we save 4x point (0,0) with weight 0
pooling_points.insert(pooling_points.end(), 4, {0, 0});
pooling_weights.insert(pooling_weights.end(), 4, T{0});

View File

@ -147,6 +147,10 @@ bool Output<Node>::operator>=(const Output& other) const {
return !(*this < other);
}
Output<Node>::operator Output<const Node>() const {
return Output<const Node>(get_node(), get_index());
}
Output<const Node>::Output(const Node* node, size_t index) : m_index(index) {
OPENVINO_ASSERT(node, "Cannot create ov::Output<const ov::Node> from nullptr!");
m_node = node->shared_from_this();

View File

@ -7,6 +7,9 @@
#include "blob_factory.hpp" // IE private header
#include "ie_ngraph_utils.hpp" // IE private header
#include "openvino/core/except.hpp"
#include "openvino/core/shape.hpp"
#include "openvino/core/strides.hpp"
#include "openvino/runtime/remote_tensor.hpp"
#include "openvino/runtime/tensor.hpp"
#include "runtime/blob_allocator.hpp"
#include "shape_util.hpp"
@ -94,6 +97,17 @@ Tensor::Tensor(const Tensor& owner, const Coordinate& begin, const Coordinate& e
}
}
Tensor::Tensor(const ov::Output<const ov::Node>& port, const Allocator& allocator)
: Tensor(port.get_element_type(),
port.get_partial_shape().is_dynamic() ? ov::Shape{0} : port.get_shape(),
allocator) {}
Tensor::Tensor(const ov::Output<const ov::Node>& port, void* host_ptr, const Strides& byte_strides)
: Tensor(port.get_element_type(),
port.get_partial_shape().is_dynamic() ? ov::Shape{0} : port.get_shape(),
host_ptr,
byte_strides) {}
element::Type Tensor::get_element_type() const {
OV_TENSOR_STATEMENT(return ie::details::convertPrecision(_impl->getTensorDesc().getPrecision()));
}
@ -113,6 +127,128 @@ Shape Tensor::get_shape() const {
OV_TENSOR_STATEMENT({ return _impl->getTensorDesc().getBlockingDesc().getBlockDims(); });
}
void Tensor::copy_to(ov::Tensor& dst) const {
OV_TENSOR_STATEMENT({
OPENVINO_ASSERT(dst, "Destination tensor was not initialized.");
OPENVINO_ASSERT(!is<ov::RemoteTensor>(), "Default copy to doesn't support copy from remote tensor.");
OPENVINO_ASSERT(!dst.is<ov::RemoteTensor>(), "Default copy to doesn't support copy to remote tensor.");
OPENVINO_ASSERT(dst.get_element_type() == get_element_type(),
"Tensor element types are not equal. (src: ",
get_element_type(),
" != dst: ",
dst.get_element_type(),
")");
if (dst.get_shape() == ov::Shape{0})
dst.set_shape(get_shape());
OPENVINO_ASSERT(dst.get_shape() == get_shape(),
"Tensor shapes are not equal. (src: ",
get_shape(),
" != dst: ",
dst.get_shape(),
")");
const auto& shape = get_shape();
auto* src_data = static_cast<const uint8_t*>(data());
auto* dst_data = static_cast<uint8_t*>(dst.data());
ov::Strides src_strides{get_byte_size()};
ov::Strides dst_strides{dst.get_byte_size()};
ov::Shape cur_pos{0};
ov::Shape max_pos{1};
if (get_element_type().bitwidth() < 8 || (get_strides() == dst.get_strides() && is_continuous())) {
// OpenVINO doesn't support strides for LP types
// or both tensors have default strides
// Strides and positions already initialized
} else {
// Tensors have default strides
const auto& type = get_element_type();
std::vector<size_t> strides(shape.size());
if (!shape.empty()) {
strides[shape.size() - 1] = 1;
}
auto size = shape.size();
for (size_t i = 1; i < size; i++) {
strides[size - i - 1] = strides[size - i] * shape[size - i];
}
ov::Strides default_strides(strides.size());
for (size_t i = 0; i < strides.size(); ++i)
default_strides[i] = strides[i] * type.size();
src_strides = get_strides();
dst_strides = dst.get_strides();
ov::Strides src_str, dst_str;
// Calculate src and dst shapes
bool found_step = false;
for (size_t i = 0; i < shape.size(); i++) {
size_t inverted_idx = shape.size() - i - 1;
if (!found_step) {
if (default_strides[inverted_idx] == src_strides[inverted_idx] &&
src_strides[inverted_idx] == dst_strides[inverted_idx]) {
continue;
} else {
found_step = true;
size_t strides_size = inverted_idx + 1;
// Set right size
src_str.resize(strides_size + 1);
dst_str.resize(strides_size + 1);
max_pos.resize(strides_size + 1);
cur_pos.resize(strides_size + 1);
// In case of default continuous strides we can copy several elements
// In other case only one element
size_t dim = 1;
size_t strides = type.size();
if (strides_size < default_strides.size()) {
strides = default_strides[strides_size];
dim = get_shape()[strides_size];
}
src_str[strides_size] = strides;
dst_str[strides_size] = strides;
max_pos[strides_size] = dim;
cur_pos[strides_size] = 0;
}
}
src_str[inverted_idx] = src_strides[inverted_idx];
dst_str[inverted_idx] = dst_strides[inverted_idx];
max_pos[inverted_idx] = shape[inverted_idx];
cur_pos[inverted_idx] = 0;
}
src_strides = src_str;
dst_strides = dst_str;
}
const auto update_index = [](const ov::Shape& pos, const ov::Shape& shape, const ov::Strides& strides) {
size_t offset = 0;
for (size_t i = 0; i < pos.size(); i++) {
offset += pos[i] * strides[i];
}
return offset;
};
bool finish = false;
for (size_t dst_idx = 0, src_idx = 0; !finish;) {
memcpy(dst_data + dst_idx, src_data + src_idx, src_strides[src_strides.size() - 1]);
// update indexes
for (size_t i = 0; i < cur_pos.size(); i++) {
size_t inverted_idx = cur_pos.size() - i - 1;
cur_pos[inverted_idx]++;
if (cur_pos[inverted_idx] != max_pos[inverted_idx]) {
break;
}
if (inverted_idx)
cur_pos[inverted_idx] = 0;
else
finish = true;
}
src_idx = update_index(cur_pos, max_pos, src_strides);
dst_idx = update_index(cur_pos, max_pos, dst_strides);
}
});
}
Strides Tensor::get_strides() const {
OPENVINO_ASSERT(get_element_type().bitwidth() >= 8,
"Could not get strides for types with bitwidths less then 8 bit. Tensor type: ",
@ -174,24 +310,26 @@ Tensor::operator bool() const noexcept {
}
bool Tensor::is_continuous() const {
if (get_element_type().bitwidth() < 8)
// OpenVINO doesn't support strides for lp types
return true;
const auto& shape = get_shape();
const auto& type = get_element_type();
std::vector<size_t> strides(shape.size());
if (!shape.empty()) {
strides[shape.size() - 1] = 1;
}
auto size = shape.size();
for (size_t i = 1; i < size; i++) {
strides[size - i - 1] = strides[size - i] * shape[size - i];
}
OV_TENSOR_STATEMENT({
if (get_element_type().bitwidth() < 8)
// OpenVINO doesn't support strides for lp types
return true;
const auto& shape = get_shape();
const auto& type = get_element_type();
std::vector<size_t> strides(shape.size());
if (!shape.empty()) {
strides[shape.size() - 1] = 1;
}
auto size = shape.size();
for (size_t i = 1; i < size; i++) {
strides[size - i - 1] = strides[size - i] * shape[size - i];
}
ov::Strides byte_strides(strides.size());
for (size_t i = 0; i < strides.size(); ++i)
byte_strides[i] = strides[i] * type.size();
return byte_strides == get_strides();
ov::Strides byte_strides(strides.size());
for (size_t i = 0; i < strides.size(); ++i)
byte_strides[i] = strides[i] * type.size();
return byte_strides == get_strides();
});
}
} // namespace ov

View File

@ -4,6 +4,7 @@
#include <gmock/gmock-spec-builders.h>
#include <gmock/gmock.h>
#include <gtest/gtest-param-test.h>
#include <gtest/gtest.h>
#include <cstdint>
@ -13,7 +14,11 @@
#include "ngraph/coordinate_transform.hpp"
#include "openvino/core/except.hpp"
#include "openvino/core/partial_shape.hpp"
#include "openvino/core/type/element_type_traits.hpp"
#include "openvino/op/parameter.hpp"
#include "openvino/runtime/allocator.hpp"
#include "openvino/runtime/remote_tensor.hpp"
#include "openvino/runtime/tensor.hpp"
using OVTensorTest = ::testing::Test;
@ -40,6 +45,26 @@ TEST_F(OVTensorTest, canCreateTensor) {
ASSERT_THROW(t.data<std::int32_t>(), ov::Exception);
}
TEST_F(OVTensorTest, createTensorFromPort) {
auto parameter1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f64, ov::Shape{1, 3, 2, 2});
auto parameter2 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::Shape{1, 3});
auto parameter3 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape::dynamic());
float data[] = {5.f, 6.f, 7.f};
ov::Tensor t1{parameter1->output(0)};
ov::Tensor t2{parameter2->output(0), data};
ov::Tensor t3{parameter3->output(0)};
ov::Tensor t4{parameter3->output(0), data};
EXPECT_EQ(t1.get_shape(), parameter1->get_shape());
EXPECT_EQ(t1.get_element_type(), parameter1->get_element_type());
EXPECT_EQ(t2.get_shape(), parameter2->get_shape());
EXPECT_EQ(t2.get_element_type(), parameter2->get_element_type());
EXPECT_EQ(t3.get_shape(), ov::Shape{0});
EXPECT_EQ(t3.get_element_type(), parameter3->get_element_type());
EXPECT_EQ(t4.get_shape(), ov::Shape{0});
EXPECT_EQ(t4.get_element_type(), parameter3->get_element_type());
}
TEST_F(OVTensorTest, canAccessF16Tensor) {
ov::Shape shape = {4, 3, 2};
ov::Tensor t{ov::element::f16, shape};
@ -281,3 +306,201 @@ TEST_F(OVTensorTest, readRangeRoiBlob) {
}
}
}
struct TestParams {
ov::Shape src_shape;
ov::Strides src_strides;
ov::Shape dst_shape;
ov::Strides dst_strides;
};
struct OVTensorTestCopy : ::testing::TestWithParam<std::tuple<ov::element::Type, TestParams>> {};
namespace {
template <class T>
std::vector<T> fill_data(const ov::Tensor& tensor) {
std::vector<T> actual;
const T* data = tensor.data<T>();
auto strides = tensor.get_strides();
for (auto&& c : ngraph::CoordinateTransformBasic{tensor.get_shape()}) {
actual.emplace_back(
*(data + (c[2] * strides[2] + c[1] * strides[1] + c[0] * strides[0]) / tensor.get_element_type().size()));
}
return actual;
};
template <class T>
void compare_data(const ov::Tensor& src, const ov::Tensor& dst) {
auto source_vec = fill_data<T>(src);
auto dest_vec = fill_data<T>(dst);
ASSERT_EQ(source_vec.size(), dest_vec.size());
for (size_t i = 0; i < source_vec.size(); i++) {
EXPECT_EQ(source_vec[i], dest_vec[i]);
}
};
template <class T>
void init_tensor(const ov::Tensor& tensor, bool input) {
const auto origPtr = tensor.data<T>();
ASSERT_NE(nullptr, origPtr);
for (size_t i = 0; i < tensor.get_size(); ++i) {
origPtr[i] = static_cast<T>(input ? i : -1);
}
}
void init_tensor(const ov::Tensor& tensor, bool input) {
switch (tensor.get_element_type()) {
case ov::element::bf16:
init_tensor<ov::element_type_traits<ov::element::bf16>::value_type>(tensor, input);
break;
case ov::element::f16:
init_tensor<ov::element_type_traits<ov::element::f16>::value_type>(tensor, input);
break;
case ov::element::f32:
init_tensor<ov::element_type_traits<ov::element::f32>::value_type>(tensor, input);
break;
case ov::element::f64:
init_tensor<ov::element_type_traits<ov::element::f64>::value_type>(tensor, input);
break;
case ov::element::i8:
init_tensor<ov::element_type_traits<ov::element::i8>::value_type>(tensor, input);
break;
case ov::element::i16:
init_tensor<ov::element_type_traits<ov::element::i16>::value_type>(tensor, input);
break;
case ov::element::i32:
init_tensor<ov::element_type_traits<ov::element::i32>::value_type>(tensor, input);
break;
case ov::element::i64:
init_tensor<ov::element_type_traits<ov::element::i64>::value_type>(tensor, input);
break;
case ov::element::u8:
init_tensor<ov::element_type_traits<ov::element::u8>::value_type>(tensor, input);
break;
case ov::element::u16:
init_tensor<ov::element_type_traits<ov::element::u16>::value_type>(tensor, input);
break;
case ov::element::u32:
init_tensor<ov::element_type_traits<ov::element::u32>::value_type>(tensor, input);
break;
case ov::element::u64:
init_tensor<ov::element_type_traits<ov::element::u64>::value_type>(tensor, input);
break;
default:
OPENVINO_UNREACHABLE("Unsupported data type");
}
}
void compare_tensors(const ov::Tensor& src, const ov::Tensor& dst) {
ASSERT_EQ(src.get_byte_size(), dst.get_byte_size());
ASSERT_EQ(src.get_shape(), dst.get_shape());
ASSERT_EQ(src.get_element_type(), dst.get_element_type());
switch (src.get_element_type()) {
case ov::element::bf16:
compare_data<ov::element_type_traits<ov::element::bf16>::value_type>(src, dst);
break;
case ov::element::f16:
compare_data<ov::element_type_traits<ov::element::f16>::value_type>(src, dst);
break;
case ov::element::f32:
compare_data<ov::element_type_traits<ov::element::f32>::value_type>(src, dst);
break;
case ov::element::f64:
compare_data<ov::element_type_traits<ov::element::f64>::value_type>(src, dst);
break;
case ov::element::i8:
compare_data<ov::element_type_traits<ov::element::i8>::value_type>(src, dst);
break;
case ov::element::i16:
compare_data<ov::element_type_traits<ov::element::i16>::value_type>(src, dst);
break;
case ov::element::i32:
compare_data<ov::element_type_traits<ov::element::i32>::value_type>(src, dst);
break;
case ov::element::i64:
compare_data<ov::element_type_traits<ov::element::i64>::value_type>(src, dst);
break;
case ov::element::u8:
compare_data<ov::element_type_traits<ov::element::u8>::value_type>(src, dst);
break;
case ov::element::u16:
compare_data<ov::element_type_traits<ov::element::u16>::value_type>(src, dst);
break;
case ov::element::u32:
compare_data<ov::element_type_traits<ov::element::u32>::value_type>(src, dst);
break;
case ov::element::u64:
compare_data<ov::element_type_traits<ov::element::u64>::value_type>(src, dst);
break;
default:
OPENVINO_UNREACHABLE("Unsupported data type");
}
}
} // namespace
TEST_P(OVTensorTestCopy, copy_to) {
ov::element::Type type;
TestParams p;
std::tie(type, p) = GetParam();
// Source tensors
ov::Tensor full_src_tensor;
ov::Tensor src_tensor;
if (!p.src_strides.empty()) {
full_src_tensor = ov::Tensor(type, ov::Shape{p.src_shape[0] * p.src_strides[0]});
src_tensor = ov::Tensor(type, p.src_shape, full_src_tensor.data(), p.src_strides);
} else {
src_tensor = full_src_tensor = ov::Tensor(type, p.src_shape);
}
init_tensor(full_src_tensor, true);
ov::Tensor full_dst_tensor;
ov::Tensor dst_tensor;
if (!p.dst_strides.empty()) {
full_dst_tensor = ov::Tensor(type, ov::Shape{p.dst_shape[0] * p.dst_strides[0]});
dst_tensor = ov::Tensor(type, p.dst_shape, full_dst_tensor.data(), p.dst_strides);
} else {
dst_tensor = full_dst_tensor = ov::Tensor(type, p.dst_shape);
}
init_tensor(full_src_tensor, false);
src_tensor.copy_to(dst_tensor);
compare_tensors(src_tensor, dst_tensor);
}
// clang-format off
INSTANTIATE_TEST_SUITE_P(copy_tests,
OVTensorTestCopy,
::testing::Combine(::testing::Values(
ov::element::bf16,
ov::element::f16,
ov::element::f32,
ov::element::f64,
ov::element::i8,
ov::element::i16,
ov::element::i32,
ov::element::i64,
ov::element::u8,
ov::element::u16,
ov::element::u32,
ov::element::u64
),
::testing::Values(
TestParams {
ov::Shape{1, 3, 4, 8}, {},
{0}, {}
},
TestParams {
ov::Shape{3, 2, 2}, {},
ov::Shape{3, 2, 2}, ov::Strides{128, 24, 8}
},
TestParams {
ov::Shape{3, 2, 2}, ov::Strides{64, 16, 8},
ov::Shape{3, 2, 2}, ov::Strides{}
},
TestParams {
ov::Shape{3, 2, 2}, ov::Strides{64, 16, 8},
ov::Shape{3, 2, 2}, ov::Strides{128, 24, 8}
}
)));
// clang-format on

View File

@ -14,7 +14,7 @@
namespace ov {
// Forward declaration
void FRONTEND_API shutdown();
FRONTEND_API void shutdown();
namespace frontend {
// -------------- FrontEndManager -----------------
using FrontEndFactory = std::function<FrontEnd::Ptr()>;

View File

@ -19,6 +19,7 @@
#include "transforms/aten_cat_replacer.hpp"
#include "transforms/aten_getitem_replacer.hpp"
#include "transforms/aten_stack_list_construct_replacer.hpp"
#include "transforms/einsum_list_construct.hpp"
#include "transforms/listconstruct_replacer.hpp"
#include "transforms/min_max_prim_list_construct_replacer.hpp"
#include "transforms/prim_list_construct_pad.hpp"
@ -97,6 +98,7 @@ void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
manager.register_pass<ov::frontend::pytorch::pass::AtenGetItemReplacer>();
manager.register_pass<ov::frontend::pytorch::pass::ListConstructReplacer>();
manager.register_pass<ov::frontend::pytorch::pass::PrimListConstructPadReplacer>();
manager.register_pass<ov::frontend::pytorch::pass::AtenEinsumListConstructReplacer>();
manager.register_pass<ov::frontend::pytorch::pass::MinMaxPrimListConstructReplacer>();
manager.register_pass<ov::frontend::pytorch::pass::DecomposeListTupleResults>();
manager.register_pass<ov::pass::RemoveMultiSubGraphOpDanglingParams>();

View File

@ -142,6 +142,11 @@ ngraph::Shape NodeContext::const_input<ngraph::Shape>(size_t index) const {
return get_constant_at_input(*this, index)->cast_vector<ngraph::Shape::value_type>();
}
template <>
int32_t NodeContext::const_input<int32_t>(size_t index) const {
return get_constant_at_input(*this, index)->cast_vector<int32_t>()[0];
}
template <>
int64_t NodeContext::const_input<int64_t>(size_t index) const {
return get_constant_at_input(*this, index)->cast_vector<int64_t>()[0];

View File

@ -0,0 +1,68 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "openvino/op/roi_align.hpp"
#include "openvino/frontend/pytorch/node_context.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/convert.hpp"
#include "openvino/op/convert_like.hpp"
#include "openvino/op/gather.hpp"
#include "openvino/op/reshape.hpp"
#include "utils.hpp"
namespace ov {
namespace frontend {
namespace pytorch {
namespace op {
using namespace ov::op;
OutputVector translate_roi_align(NodeContext& context) {
num_inputs_check(context, 7, 7);
auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1}));
auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1}));
auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0}));
auto const_rois_indices = context.mark_node(v0::Constant::create(element::i32, Shape{4}, {1, 2, 3, 4}));
auto input = context.get_input(0);
auto boxes_input = context.get_input(1);
auto input_real_type = context.mark_node(std::make_shared<v0::Convert>(input, element::f32));
auto boxes = context.mark_node(std::make_shared<v1::ConvertLike>(boxes_input, input_real_type));
auto spatial_scale = context.const_input<float>(2);
int output_size_h = context.const_input<int32_t>(3);
int output_size_w = context.const_input<int32_t>(4);
int sampling_ratio = context.const_input<int32_t>(5);
auto aligned = context.const_input<bool>(6);
auto rois = context.mark_node(std::make_shared<v8::Gather>(boxes, const_rois_indices, const_1));
auto batch_indices_gather = context.mark_node(std::make_shared<v8::Gather>(boxes, const_0, const_1));
auto batch_indices_reshape =
context.mark_node(std::make_shared<v1::Reshape>(batch_indices_gather, const_neg_1, false));
auto batch_indices = context.mark_node(std::make_shared<v0::Convert>(batch_indices_reshape, element::i32));
v9::ROIAlign::AlignedMode aligned_mode =
aligned ? v9::ROIAlign::AlignedMode::HALF_PIXEL_FOR_NN : v9::ROIAlign::AlignedMode::ASYMMETRIC;
auto roi_align = context.mark_node(std::make_shared<v9::ROIAlign>(input_real_type,
rois,
batch_indices,
output_size_h,
output_size_w,
sampling_ratio,
spatial_scale,
v9::ROIAlign::PoolingMode::AVG,
aligned_mode));
return {roi_align};
};
} // namespace op
} // namespace pytorch
} // namespace frontend
} // namespace ov

View File

@ -16,10 +16,12 @@ namespace op {
using namespace ov::op;
namespace {
OutputVector base_translate_upsample2d(const NodeContext& context, v4::Interpolate::InterpolateMode interpolate_mode) {
num_inputs_check(context, 3, 4);
OutputVector base_translate_upsample(const NodeContext& context,
v4::Interpolate::InterpolateMode interpolate_mode,
size_t dims) {
num_inputs_check(context, 1, 4);
auto data = context.get_input(0);
std::vector<size_t> pad{0};
std::vector<size_t> pad(dims, 0);
auto size_mode = v4::Interpolate::ShapeCalcMode::SIZES;
bool align_corners = false;
int scale_id = 2;
@ -29,11 +31,21 @@ OutputVector base_translate_upsample2d(const NodeContext& context, v4::Interpola
align_corners = context.const_input<bool>(2);
}
}
auto target_axes = std::make_shared<v0::Constant>(element::i32, Shape{2}, std::vector<int>({2, 3}));
std::vector<int> spatial_axes;
if (dims == 1) {
spatial_axes = {2};
} else if (dims == 2) {
spatial_axes = {2, 3};
} else if (dims == 3) {
spatial_axes = {2, 3, 4};
} else {
FRONT_END_OP_CONVERSION_CHECK(false, "Unsupported number of dimensions in upsample");
}
auto target_axes = std::make_shared<v0::Constant>(element::i32, Shape{spatial_axes.size()}, spatial_axes);
auto scales =
context.mark_node(std::make_shared<v0::Constant>(element::f32, Shape{2}, std::vector<double>({1, 1})));
context.mark_node(std::make_shared<v0::Constant>(element::f32, Shape{dims}, std::vector<double>(dims, 1)));
auto output_sizes =
context.mark_node(std::make_shared<v0::Constant>(element::i32, Shape{2}, std::vector<int>({1, 1})));
context.mark_node(std::make_shared<v0::Constant>(element::i32, Shape{dims}, std::vector<int>(dims, 1)));
if (context.input_is_none(1)) {
FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(scale_id), "Scale or Output size should be provided");
auto spatial_scales = context.get_input(scale_id);
@ -48,6 +60,7 @@ OutputVector base_translate_upsample2d(const NodeContext& context, v4::Interpola
attrs.coordinate_transformation_mode = v4::Interpolate::CoordinateTransformMode::ASYMMETRIC;
attrs.nearest_mode = v4::Interpolate::NearestMode::FLOOR;
if (attrs.mode != v4::Interpolate::InterpolateMode::NEAREST) {
attrs.coordinate_transformation_mode = v4::Interpolate::CoordinateTransformMode::PYTORCH_HALF_PIXEL;
if (align_corners) {
attrs.coordinate_transformation_mode = v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS;
}
@ -56,16 +69,33 @@ OutputVector base_translate_upsample2d(const NodeContext& context, v4::Interpola
};
} // namespace
OutputVector translate_upsample_linear1d(NodeContext& context) {
return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 1);
};
OutputVector translate_upsample_bilinear2d(NodeContext& context) {
return base_translate_upsample2d(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX);
return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 2);
};
OutputVector translate_upsample_trilinear3d(NodeContext& context) {
return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 3);
};
OutputVector translate_upsample_nearest1d(NodeContext& context) {
return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 1);
};
OutputVector translate_upsample_nearest2d(NodeContext& context) {
return base_translate_upsample2d(context, v4::Interpolate::InterpolateMode::NEAREST);
return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 2);
};
OutputVector translate_upsample_nearest3d(NodeContext& context) {
return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 3);
};
// bicubic is only supported for 2d in pytorch
OutputVector translate_upsample_bicubic2d(NodeContext& context) {
return base_translate_upsample2d(context, v4::Interpolate::InterpolateMode::CUBIC);
return base_translate_upsample(context, v4::Interpolate::InterpolateMode::CUBIC, 2);
};
} // namespace op

View File

@ -89,6 +89,7 @@ OP_CONVERTER(translate_repeat);
OP_CONVERTER(translate_repeat_interleave);
OP_CONVERTER(translate_reshape);
OP_CONVERTER(translate_reshape_as);
OP_CONVERTER(translate_roi_align);
OP_CONVERTER(translate_roll);
OP_CONVERTER(translate_rsqrt);
OP_CONVERTER(translate_rsub);
@ -110,7 +111,11 @@ OP_CONVERTER(translate_triu);
OP_CONVERTER(translate_unfold);
OP_CONVERTER(translate_upsample_bicubic2d);
OP_CONVERTER(translate_upsample_bilinear2d);
OP_CONVERTER(translate_upsample_linear1d);
OP_CONVERTER(translate_upsample_nearest1d);
OP_CONVERTER(translate_upsample_nearest2d);
OP_CONVERTER(translate_upsample_nearest3d);
OP_CONVERTER(translate_upsample_trilinear3d);
OP_CONVERTER(translate_var);
OP_CONVERTER(translate_var_mean);
OP_CONVERTER(translate_where);
@ -303,7 +308,11 @@ const std::map<std::string, PytorchCreatorFunction> get_supported_ops() {
{"aten::unsqueeze_", op::inplace_op<op::translate_1to1_match_2_inputs<opset10::Unsqueeze>>},
{"aten::upsample_bicubic2d", op::translate_upsample_bicubic2d},
{"aten::upsample_bilinear2d", op::translate_upsample_bilinear2d},
{"aten::upsample_linear1d", op::translate_upsample_linear1d},
{"aten::upsample_nearest1d", op::translate_upsample_nearest1d},
{"aten::upsample_nearest2d", op::translate_upsample_nearest2d},
{"aten::upsample_nearest3d", op::translate_upsample_nearest3d},
{"aten::upsample_trilinear3d", op::translate_upsample_trilinear3d},
{"aten::var", op::translate_var},
{"aten::var_mean", op::translate_var_mean},
{"aten::view", op::translate_reshape},
@ -319,6 +328,7 @@ const std::map<std::string, PytorchCreatorFunction> get_supported_ops() {
{"prim::NumToTensor", op::skip_node}, // In openvino we already store number as tensor with shape []
{"prim::requires_grad", op::return_false_scalar},
{"torchvision::nms", op::translate_nms},
{"torchvision::roi_align", op::translate_roi_align},
};
};

View File

@ -0,0 +1,68 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "einsum_list_construct.hpp"
#include "openvino/core/rt_info.hpp"
#include "openvino/op/einsum.hpp"
#include "openvino/op/util/framework_node.hpp"
#include "openvino/pass/pattern/matcher.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "utils.hpp"
using namespace ov::pass::pattern;
namespace ov {
namespace frontend {
namespace pytorch {
namespace pass {
using namespace ov::pass;
using namespace ov::op;
AtenEinsumListConstructReplacer::AtenEinsumListConstructReplacer() {
auto einsum_op = pattern::wrap_type<ov::op::util::FrameworkNode>();
ov::matcher_pass_callback callback = [](pattern::Matcher& m) {
auto einsum_op = cast_fw_node(m.get_match_root(), "aten::einsum");
if (!einsum_op) {
return false;
}
auto equation_input = einsum_op->input_value(0).get_node_shared_ptr();
auto tensor_list = einsum_op->input_value(1).get_node_shared_ptr();
std::string equation;
// equation should be string constant
if (const auto& fw_node_mode = cast_fw_node(equation_input, "prim::Constant")) {
const auto& attrs = fw_node_mode->get_attrs();
if (attrs.find("string_value") != attrs.end()) {
equation = attrs.at("string_value");
}
} else {
return false;
}
// Check if ListConstruct is an input
if (auto list_construct_node = cast_fw_node(tensor_list, "prim::ListConstruct")) {
const auto& list_inputs = list_construct_node->input_values();
OutputVector node_vector;
// Iterate over values in ListConstruct
for (const auto& list_input : list_inputs) {
node_vector.push_back(list_input);
}
auto einsum = std::make_shared<v7::Einsum>(node_vector, equation);
copy_runtime_info({einsum_op, equation_input, tensor_list}, einsum);
replace_node(einsum_op, einsum);
return true;
}
return false;
};
auto m =
std::make_shared<pattern::Matcher>(einsum_op, "ov::frontend::pytorch::pass::AtenEinsumListConstructReplacer");
this->register_matcher(m, callback);
};
} // namespace pass
} // namespace pytorch
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,24 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "openvino/pass/graph_rewrite.hpp"
#include "openvino/pass/pass.hpp"
namespace ov {
namespace frontend {
namespace pytorch {
namespace pass {
class AtenEinsumListConstructReplacer : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("ov::frontend::pytorch::pass::AtenEinsumListConstructReplacer");
AtenEinsumListConstructReplacer();
};
} // namespace pass
} // namespace pytorch
} // namespace frontend
} // namespace ov

View File

@ -58,11 +58,12 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
if (variants.size() != 1)
return false;
// Validating first path, it must contain a model
if (variants[0].is<std::string>()) {
std::string suffix = ".pb";
std::string model_path = variants[0].as<std::string>();
if (ov::util::ends_with(model_path, suffix.c_str())) {
if (ov::util::ends_with(model_path, ".pb") && GraphIteratorProto::is_supported(model_path)) {
// handle binary protobuf format
// for automatic deduction of the frontend to convert the model
// we have more strict rule that is to have `.pb` extension in the path
return true;
}
}
@ -70,12 +71,16 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
else if (variants[0].is<std::wstring>()) {
std::wstring suffix = L".pb";
std::wstring model_path = variants[0].as<std::wstring>();
if (ov::util::ends_with(model_path, suffix)) {
if (ov::util::ends_with(model_path, suffix) && GraphIteratorProto::is_supported(model_path)) {
// handle binary protobuf format with a path in Unicode
// for automatic deduction of the frontend to convert the model
// we have more strict rule that is to have `.pb` extension in the path
return true;
}
}
#endif
else if (variants[0].is<GraphIterator::Ptr>()) {
// this is used for OpenVINO with TensorFlow Integration
return true;
}
return false;
@ -83,33 +88,36 @@ bool FrontEnd::supported_impl(const std::vector<ov::Any>& variants) const {
ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector<ov::Any>& variants) const {
// TODO: Support other TensorFlow formats: SavedModel, .meta, checkpoint, pbtxt
if (variants.size() == 1) {
// a case when binary protobuf format is provided
if (variants[0].is<std::string>()) {
std::string suffix = ".pb";
std::string model_path = variants[0].as<std::string>();
if (ov::util::ends_with(model_path, suffix.c_str())) {
return std::make_shared<InputModel>(
std::make_shared<::ov::frontend::tensorflow::GraphIteratorProto>(model_path),
m_telemetry);
}
}
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
else if (variants[0].is<std::wstring>()) {
std::wstring suffix = L".pb";
std::wstring model_path = variants[0].as<std::wstring>();
if (ov::util::ends_with(model_path, suffix)) {
return std::make_shared<InputModel>(
std::make_shared<::ov::frontend::tensorflow::GraphIteratorProto>(model_path),
m_telemetry);
}
}
#endif
else if (variants[0].is<GraphIterator::Ptr>()) {
auto graph_iterator = variants[0].as<GraphIterator::Ptr>();
return std::make_shared<InputModel>(graph_iterator, m_telemetry);
FRONT_END_GENERAL_CHECK(variants.size() == 1,
"[TensorFlow Frontend] Internal error or inconsistent input model: the frontend supports "
"only frozen binary protobuf format.");
if (variants[0].is<std::string>()) {
auto model_path = variants[0].as<std::string>();
if (GraphIteratorProto::is_supported(model_path)) {
// handle binary protobuf format
return std::make_shared<InputModel>(std::make_shared<GraphIteratorProto>(model_path), m_telemetry);
}
}
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
else if (variants[0].is<std::wstring>()) {
std::wstring model_path = variants[0].as<std::wstring>();
if (GraphIteratorProto::is_supported(model_path)) {
// handle binary protobuf format with a path in Unicode
return std::make_shared<InputModel>(std::make_shared<GraphIteratorProto>(model_path), m_telemetry);
}
}
#endif
else if (variants[0].is<GraphIterator::Ptr>()) {
// this is used for OpenVINO with TensorFlow Integration
auto graph_iterator = variants[0].as<GraphIterator::Ptr>();
return std::make_shared<InputModel>(graph_iterator, m_telemetry);
}
FRONT_END_GENERAL_CHECK(false,
"[TensorFlow Frontend] Internal error or inconsistent input model: the frontend supports "
"only frozen binary protobuf format.");
return nullptr;
}

View File

@ -88,29 +88,40 @@ public:
}
}
/// Set iterator to the start position
/// \brief Check if the input file is supported
template <typename T>
static bool is_supported(const std::basic_string<T>& path) {
std::ifstream pb_stream(path, std::ios::in | std::ifstream::binary);
auto graph_def = std::make_shared<::tensorflow::GraphDef>();
return pb_stream && pb_stream.is_open() && graph_def->ParsePartialFromIstream(&pb_stream);
}
/// \brief Set iterator to the start position
void reset() override {
node_index = 0;
}
/// \brief Return a number of nodes in the graph
size_t size() const override {
return m_decoders.size();
}
/// Moves to the next node in the graph
/// \brief Move to the next node in the graph
void next() override {
node_index++;
}
/// \brief Check if the graph is fully traversed
bool is_end() const override {
return node_index >= m_decoders.size();
}
/// Return NodeContext for the current node that iterator points to
/// \brief Return NodeContext for the current node that iterator points to
std::shared_ptr<DecoderBase> get_decoder() const override {
return m_decoders[node_index];
}
/// \brief Get GraphIterator for library funnction by name
std::shared_ptr<GraphIterator> get_body_graph_iterator(const std::string& func_name) const override {
if (m_library_map.count(func_name)) {
auto func_ind = m_library_map.at(func_name);
@ -127,10 +138,12 @@ public:
return nullptr;
}
/// \brief Get input names in the original order. Used for the library functions
std::vector<std::string> get_input_names() const override {
return m_input_names;
}
/// \brief Get output names in the original order. Used for the library functions
std::vector<std::string> get_output_names() const override {
return m_output_names;
}

View File

@ -14,6 +14,7 @@ file (GLOB LIBRARY_SRC
${CMAKE_CURRENT_SOURCE_DIR}/src/cpp/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/dev/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/dev/preprocessing/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/dev/threading/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/threading/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/cpp/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/cpp_interfaces/interface/*.cpp

View File

@ -19,7 +19,7 @@
#include "openvino/runtime/icompiled_model.hpp"
#include "openvino/runtime/icore.hpp"
#include "openvino/runtime/remote_context.hpp"
#include "threading/ie_executor_manager.hpp"
#include "openvino/runtime/threading/executor_manager.hpp"
namespace InferenceEngine {
@ -188,7 +188,7 @@ public:
* @brief Gets reference to tasks execution manager
* @return Reference to ExecutorManager interface
*/
const std::shared_ptr<InferenceEngine::ExecutorManager>& get_executor_manager() const;
const std::shared_ptr<ov::ExecutorManager>& get_executor_manager() const;
~IPlugin() = default;
@ -198,11 +198,11 @@ protected:
private:
friend ::InferenceEngine::IPluginWrapper;
std::string m_plugin_name; //!< A device name that plugins enables
std::weak_ptr<ov::ICore> m_core; //!< A pointer to ICore interface
std::shared_ptr<InferenceEngine::ExecutorManager> m_executor_manager; //!< A tasks execution manager
ov::Version m_version; //!< Member contains plugin version
bool m_is_new_api; //!< A flag which shows used API
std::string m_plugin_name; //!< A device name that plugins enables
std::weak_ptr<ov::ICore> m_core; //!< A pointer to ICore interface
std::shared_ptr<ov::ExecutorManager> m_executor_manager; //!< A tasks execution manager
ov::Version m_version; //!< Member contains plugin version
bool m_is_new_api; //!< A flag which shows used API
};
} // namespace ov

View File

@ -0,0 +1,77 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief OpenVINO Runtime Executor Manager
* @file openvino/runtime/threading/executor_manager.hpp
*/
#pragma once
#include "openvino/runtime/common.hpp"
#include "threading/ie_istreams_executor.hpp"
#include "threading/ie_itask_executor.hpp"
namespace ov {
/**
* @interface ExecutorManager
* @brief Interface for tasks execution manager.
* This is global point for getting task executor objects by string id.
* It's necessary in multiple asynchronous requests for having unique executors to avoid oversubscription.
* E.g. There 2 task executors for CPU device: one - in FPGA, another - in OneDNN. Parallel execution both of them leads
* to not optimal CPU usage. More efficient to run the corresponding tasks one by one via single executor.
* @ingroup ov_dev_api_threading
*/
class OPENVINO_RUNTIME_API ExecutorManager {
public:
/**
* @brief Returns executor by unique identificator
* @param id An unique identificator of device (Usually string representation of TargetDevice)
* @return A shared pointer to existing or newly ITaskExecutor
*/
virtual InferenceEngine::ITaskExecutor::Ptr get_executor(const std::string& id) = 0;
/**
* @brief Returns idle cpu streams executor
*
* @param config Streams executor config
*
* @return pointer to streams executor config
*/
virtual InferenceEngine::IStreamsExecutor::Ptr get_idle_cpu_streams_executor(
const InferenceEngine::IStreamsExecutor::Config& config) = 0;
/**
* @brief Allows to configure executor manager
*
* @param properties map with configuration
*/
virtual void set_property(const ov::AnyMap& properties) = 0;
/**
* @brief Returns configuration
*
* @param name property name
*
* @return Property value
*/
virtual ov::Any get_property(const std::string& name) const = 0;
/**
* @cond
*/
virtual size_t get_executors_number() const = 0;
virtual size_t get_idle_cpu_streams_executors_number() const = 0;
virtual void clear(const std::string& id = {}) = 0;
/**
* @endcond
*/
virtual ~ExecutorManager() = default;
};
OPENVINO_API std::shared_ptr<ExecutorManager> executor_manager();
} // namespace ov

View File

@ -18,8 +18,16 @@
#include "threading/ie_istreams_executor.hpp"
#include "threading/ie_itask_executor.hpp"
namespace ov {
class ExecutorManager;
}
namespace InferenceEngine {
class IPluginWrapper;
/**
* @interface ExecutorManager
* @brief Interface for tasks execution manager.
@ -76,8 +84,15 @@ public:
*/
virtual void setTbbFlag(bool flag) = 0;
virtual bool getTbbFlag() = 0;
private:
virtual std::shared_ptr<ov::ExecutorManager> get_ov_manager() const = 0;
friend class IPluginWrapper;
};
INFERENCE_ENGINE_API_CPP(ExecutorManager::Ptr) executorManager();
std::shared_ptr<InferenceEngine::ExecutorManager> create_old_manager(
const std::shared_ptr<ov::ExecutorManager>& manager);
} // namespace InferenceEngine

View File

@ -44,6 +44,8 @@ public:
template <typename T>
T* data() = delete;
void copy_to(ov::Tensor& dst) const = delete;
/**
* @brief Returns a map of device-specific parameters required for low-level
* operations with underlying object.

View File

@ -34,8 +34,10 @@
#include "openvino/runtime/profiling_info.hpp"
#include "openvino/runtime/remote_context.hpp"
#include "openvino/runtime/tensor.hpp"
#include "openvino/runtime/threading/executor_manager.hpp"
#include "openvino/runtime/variable_state.hpp"
#include "so_ptr.hpp"
#include "threading/ie_executor_manager.hpp"
#include "transformations/utils/utils.hpp"
namespace {
@ -221,7 +223,7 @@ public:
version.description = ver.description;
SetVersion(version);
_isNewAPI = plugin->is_new_api();
_executorManager = plugin->get_executor_manager();
_executorManager = InferenceEngine::create_old_manager(plugin->get_executor_manager());
}
std::string GetName() const noexcept override {
return m_plugin->get_device_name();

View File

@ -28,6 +28,7 @@
#include "openvino/pass/manager.hpp"
#include "openvino/runtime/icompiled_model.hpp"
#include "openvino/runtime/remote_context.hpp"
#include "openvino/runtime/threading/executor_manager.hpp"
#include "openvino/util/common_util.hpp"
#include "openvino/util/shared_object.hpp"
#include "preprocessing/preprocessing.hpp"
@ -57,7 +58,7 @@ void stripDeviceName(std::string& device, const std::string& substr) {
ov::CoreImpl::CoreImpl(bool _newAPI) : m_new_api(_newAPI) {
add_mutex(""); // Register global mutex
executorManagerPtr = InferenceEngine::executorManager();
m_executor_manager = ov::executor_manager();
for (const auto& it : ov::get_available_opsets()) {
opsetNames.insert(it.first);
}
@ -632,7 +633,7 @@ void ov::CoreImpl::set_property(const std::string& device_name, const AnyMap& pr
ov::Any ov::CoreImpl::get_property_for_core(const std::string& name) const {
if (name == ov::force_tbb_terminate.name()) {
const auto flag = InferenceEngine::executorManager()->getTbbFlag();
const auto flag = ov::executor_manager()->get_property(name).as<bool>();
return decltype(ov::force_tbb_terminate)::value_type(flag);
} else if (name == ov::cache_dir.name()) {
return ov::Any(coreConfig.get_cache_dir());
@ -993,7 +994,7 @@ void ov::CoreImpl::CoreConfig::set_and_update(ov::AnyMap& config) {
it = config.find(ov::force_tbb_terminate.name());
if (it != config.end()) {
auto flag = it->second.as<std::string>() == CONFIG_VALUE(YES) ? true : false;
InferenceEngine::executorManager()->setTbbFlag(flag);
ov::executor_manager()->set_property({{it->first, flag}});
config.erase(it);
}

View File

@ -21,7 +21,7 @@
#include "openvino/core/version.hpp"
#include "openvino/runtime/common.hpp"
#include "openvino/runtime/icompiled_model.hpp"
#include "threading/ie_executor_manager.hpp"
#include "openvino/runtime/threading/executor_manager.hpp"
#ifdef OPENVINO_STATIC_LIBRARY
# include "ie_plugins.hpp"
@ -162,7 +162,7 @@ private:
}
};
InferenceEngine::ExecutorManager::Ptr executorManagerPtr;
std::shared_ptr<ov::ExecutorManager> m_executor_manager;
mutable std::unordered_set<std::string> opsetNames;
// TODO: make extensions to be optional with conditional compilation
mutable std::vector<InferenceEngine::IExtensionPtr> extensions;

View File

@ -4,7 +4,7 @@
#include "openvino/runtime/iplugin.hpp"
ov::IPlugin::IPlugin() : m_executor_manager(InferenceEngine::executorManager()), m_is_new_api(true) {}
ov::IPlugin::IPlugin() : m_executor_manager(ov::executor_manager()), m_is_new_api(true) {}
void ov::IPlugin::set_version(const ov::Version& version) {
m_version = version;
@ -42,7 +42,7 @@ bool ov::IPlugin::is_new_api() const {
return m_is_new_api;
}
const std::shared_ptr<InferenceEngine::ExecutorManager>& ov::IPlugin::get_executor_manager() const {
const std::shared_ptr<ov::ExecutorManager>& ov::IPlugin::get_executor_manager() const {
return m_executor_manager;
}

View File

@ -9,6 +9,7 @@
#include "any_copy.hpp"
#include "dev/converter_utils.hpp"
#include "ie_icore.hpp"
#include "threading/ie_executor_manager.hpp"
namespace InferenceEngine {
@ -20,7 +21,7 @@ IPluginWrapper::IPluginWrapper(const std::shared_ptr<InferenceEngine::IInference
m_plugin_name = m_old_plugin->GetName();
m_is_new_api = m_old_plugin->IsNewAPI();
m_core = m_old_plugin->GetCore();
m_executor_manager = m_old_plugin->executorManager();
m_executor_manager = m_old_plugin->executorManager()->get_ov_manager();
}
const std::shared_ptr<InferenceEngine::IExecutableNetworkInternal>& IPluginWrapper::update_exec_network(

View File

@ -0,0 +1,208 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "openvino/runtime/threading/executor_manager.hpp"
#include "openvino/core/parallel.hpp"
#include "openvino/runtime/properties.hpp"
#include "threading/ie_cpu_streams_executor.hpp"
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
# if (TBB_INTERFACE_VERSION < 12000)
# include <tbb/task_scheduler_init.h>
# else
# include <oneapi/tbb/global_control.h>
# endif
#endif
#include <memory>
#include <mutex>
#include <string>
#include <utility>
namespace ov {
namespace {
class ExecutorManagerImpl : public ExecutorManager {
public:
~ExecutorManagerImpl();
InferenceEngine::ITaskExecutor::Ptr get_executor(const std::string& id) override;
InferenceEngine::IStreamsExecutor::Ptr get_idle_cpu_streams_executor(
const InferenceEngine::IStreamsExecutor::Config& config) override;
size_t get_executors_number() const override;
size_t get_idle_cpu_streams_executors_number() const override;
void clear(const std::string& id = {}) override;
void set_property(const ov::AnyMap& properties) override;
ov::Any get_property(const std::string& name) const override;
private:
void reset_tbb();
std::unordered_map<std::string, InferenceEngine::ITaskExecutor::Ptr> executors;
std::vector<std::pair<InferenceEngine::IStreamsExecutor::Config, InferenceEngine::IStreamsExecutor::Ptr>>
cpuStreamsExecutors;
mutable std::mutex streamExecutorMutex;
mutable std::mutex taskExecutorMutex;
bool tbbTerminateFlag = false;
mutable std::mutex global_mutex;
bool tbbThreadsCreated = false;
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
# if (TBB_INTERFACE_VERSION < 12000)
std::shared_ptr<tbb::task_scheduler_init> tbbTaskScheduler = nullptr;
# else
std::shared_ptr<oneapi::tbb::task_scheduler_handle> tbbTaskScheduler = nullptr;
# endif
#endif
};
} // namespace
ExecutorManagerImpl::~ExecutorManagerImpl() {
reset_tbb();
}
void ExecutorManagerImpl::set_property(const ov::AnyMap& properties) {
std::lock_guard<std::mutex> guard(global_mutex);
for (const auto& it : properties) {
if (it.first == ov::force_tbb_terminate.name()) {
tbbTerminateFlag = it.second.as<bool>();
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
if (tbbTerminateFlag) {
if (!tbbTaskScheduler) {
# if (TBB_INTERFACE_VERSION < 12000)
tbbTaskScheduler = std::make_shared<tbb::task_scheduler_init>();
# elif (TBB_INTERFACE_VERSION < 12060)
tbbTaskScheduler =
std::make_shared<oneapi::tbb::task_scheduler_handle>(oneapi::tbb::task_scheduler_handle::get());
# else
tbbTaskScheduler = std::make_shared<oneapi::tbb::task_scheduler_handle>(tbb::attach{});
# endif
}
} else {
tbbTaskScheduler = nullptr;
}
#endif
}
}
}
ov::Any ExecutorManagerImpl::get_property(const std::string& name) const {
std::lock_guard<std::mutex> guard(global_mutex);
if (name == ov::force_tbb_terminate.name()) {
return tbbTerminateFlag;
}
OPENVINO_UNREACHABLE("Property ", name, " is not supported.");
}
void ExecutorManagerImpl::reset_tbb() {
std::lock_guard<std::mutex> guard(global_mutex);
if (tbbTerminateFlag) {
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
if (tbbTaskScheduler && tbbThreadsCreated) {
# if (TBB_INTERFACE_VERSION < 12000)
tbbTaskScheduler->terminate();
# else
tbb::finalize(*tbbTaskScheduler, std::nothrow);
# endif
}
tbbThreadsCreated = false;
tbbTaskScheduler = nullptr;
#endif
tbbTerminateFlag = false;
}
}
InferenceEngine::ITaskExecutor::Ptr ExecutorManagerImpl::get_executor(const std::string& id) {
std::lock_guard<std::mutex> guard(taskExecutorMutex);
auto foundEntry = executors.find(id);
if (foundEntry == executors.end()) {
auto newExec =
std::make_shared<InferenceEngine::CPUStreamsExecutor>(InferenceEngine::IStreamsExecutor::Config{id});
tbbThreadsCreated = true;
executors[id] = newExec;
return newExec;
}
return foundEntry->second;
}
InferenceEngine::IStreamsExecutor::Ptr ExecutorManagerImpl::get_idle_cpu_streams_executor(
const InferenceEngine::IStreamsExecutor::Config& config) {
std::lock_guard<std::mutex> guard(streamExecutorMutex);
for (const auto& it : cpuStreamsExecutors) {
const auto& executor = it.second;
if (executor.use_count() != 1)
continue;
const auto& executorConfig = it.first;
if (executorConfig._name == config._name && executorConfig._streams == config._streams &&
executorConfig._threadsPerStream == config._threadsPerStream &&
executorConfig._threadBindingType == config._threadBindingType &&
executorConfig._threadBindingStep == config._threadBindingStep &&
executorConfig._threadBindingOffset == config._threadBindingOffset)
if (executorConfig._threadBindingType !=
InferenceEngine::IStreamsExecutor::ThreadBindingType::HYBRID_AWARE ||
executorConfig._threadPreferredCoreType == config._threadPreferredCoreType)
return executor;
}
auto newExec = std::make_shared<InferenceEngine::CPUStreamsExecutor>(config);
tbbThreadsCreated = true;
cpuStreamsExecutors.emplace_back(std::make_pair(config, newExec));
return newExec;
}
size_t ExecutorManagerImpl::get_executors_number() const {
std::lock_guard<std::mutex> guard(taskExecutorMutex);
return executors.size();
}
size_t ExecutorManagerImpl::get_idle_cpu_streams_executors_number() const {
std::lock_guard<std::mutex> guard(streamExecutorMutex);
return cpuStreamsExecutors.size();
}
void ExecutorManagerImpl::clear(const std::string& id) {
std::lock_guard<std::mutex> stream_guard(streamExecutorMutex);
std::lock_guard<std::mutex> task_guard(taskExecutorMutex);
if (id.empty()) {
executors.clear();
cpuStreamsExecutors.clear();
} else {
executors.erase(id);
cpuStreamsExecutors.erase(std::remove_if(cpuStreamsExecutors.begin(),
cpuStreamsExecutors.end(),
[&](const std::pair<InferenceEngine::IStreamsExecutor::Config,
InferenceEngine::IStreamsExecutor::Ptr>& it) {
return it.first._name == id;
}),
cpuStreamsExecutors.end());
}
}
namespace {
class ExecutorManagerHolder {
std::mutex _mutex;
std::weak_ptr<ExecutorManager> _manager;
public:
ExecutorManagerHolder(const ExecutorManagerHolder&) = delete;
ExecutorManagerHolder& operator=(const ExecutorManagerHolder&) = delete;
ExecutorManagerHolder() = default;
std::shared_ptr<ov::ExecutorManager> get() {
std::lock_guard<std::mutex> lock(_mutex);
auto manager = _manager.lock();
if (!manager) {
_manager = manager = std::make_shared<ExecutorManagerImpl>();
}
return manager;
}
};
} // namespace
std::shared_ptr<ExecutorManager> executor_manager() {
static ExecutorManagerHolder executorManagerHolder;
return executorManagerHolder.get();
}
} // namespace ov

View File

@ -5,6 +5,8 @@
#include "threading/ie_executor_manager.hpp"
#include "ie_parallel.hpp"
#include "openvino/runtime/properties.hpp"
#include "openvino/runtime/threading/executor_manager.hpp"
#include "threading/ie_cpu_streams_executor.hpp"
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
# if (TBB_INTERFACE_VERSION < 12000)
@ -23,7 +25,7 @@ namespace InferenceEngine {
namespace {
class ExecutorManagerImpl : public ExecutorManager {
public:
~ExecutorManagerImpl();
ExecutorManagerImpl(const std::shared_ptr<ov::ExecutorManager>& manager);
ITaskExecutor::Ptr getExecutor(const std::string& id) override;
IStreamsExecutor::Ptr getIdleCPUStreamsExecutor(const IStreamsExecutor::Config& config) override;
size_t getExecutorsNumber() const override;
@ -33,134 +35,47 @@ public:
bool getTbbFlag() override;
private:
void resetTbb();
std::unordered_map<std::string, ITaskExecutor::Ptr> executors;
std::vector<std::pair<IStreamsExecutor::Config, IStreamsExecutor::Ptr>> cpuStreamsExecutors;
mutable std::mutex streamExecutorMutex;
mutable std::mutex taskExecutorMutex;
bool tbbTerminateFlag = false;
mutable std::mutex tbbMutex;
bool tbbThreadsCreated = false;
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
# if (TBB_INTERFACE_VERSION < 12000)
std::shared_ptr<tbb::task_scheduler_init> tbbTaskScheduler = nullptr;
# else
std::shared_ptr<oneapi::tbb::task_scheduler_handle> tbbTaskScheduler = nullptr;
# endif
#endif
std::shared_ptr<ov::ExecutorManager> m_manager;
std::shared_ptr<ov::ExecutorManager> get_ov_manager() const override {
return m_manager;
}
};
} // namespace
ExecutorManagerImpl::~ExecutorManagerImpl() {
resetTbb();
}
ExecutorManagerImpl::ExecutorManagerImpl(const std::shared_ptr<ov::ExecutorManager>& manager) : m_manager(manager) {}
void ExecutorManagerImpl::setTbbFlag(bool flag) {
std::lock_guard<std::mutex> guard(tbbMutex);
tbbTerminateFlag = flag;
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
if (tbbTerminateFlag) {
if (!tbbTaskScheduler) {
# if (TBB_INTERFACE_VERSION < 12000)
tbbTaskScheduler = std::make_shared<tbb::task_scheduler_init>();
# elif (TBB_INTERFACE_VERSION < 12060)
tbbTaskScheduler =
std::make_shared<oneapi::tbb::task_scheduler_handle>(oneapi::tbb::task_scheduler_handle::get());
# else
tbbTaskScheduler = std::make_shared<oneapi::tbb::task_scheduler_handle>(tbb::attach{});
# endif
}
} else {
tbbTaskScheduler = nullptr;
}
#endif
m_manager->set_property({{ov::force_tbb_terminate.name(), flag}});
}
bool ExecutorManagerImpl::getTbbFlag() {
std::lock_guard<std::mutex> guard(tbbMutex);
return tbbTerminateFlag;
}
void ExecutorManagerImpl::resetTbb() {
std::lock_guard<std::mutex> guard(tbbMutex);
if (tbbTerminateFlag) {
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
if (tbbTaskScheduler && tbbThreadsCreated) {
# if (TBB_INTERFACE_VERSION < 12000)
tbbTaskScheduler->terminate();
# else
tbb::finalize(*tbbTaskScheduler, std::nothrow);
# endif
}
tbbThreadsCreated = false;
tbbTaskScheduler = nullptr;
#endif
tbbTerminateFlag = false;
}
return m_manager->get_property(ov::force_tbb_terminate.name()).as<bool>();
}
ITaskExecutor::Ptr ExecutorManagerImpl::getExecutor(const std::string& id) {
std::lock_guard<std::mutex> guard(taskExecutorMutex);
auto foundEntry = executors.find(id);
if (foundEntry == executors.end()) {
auto newExec = std::make_shared<CPUStreamsExecutor>(IStreamsExecutor::Config{id});
tbbThreadsCreated = true;
executors[id] = newExec;
return newExec;
}
return foundEntry->second;
return m_manager->get_executor(id);
}
IStreamsExecutor::Ptr ExecutorManagerImpl::getIdleCPUStreamsExecutor(const IStreamsExecutor::Config& config) {
std::lock_guard<std::mutex> guard(streamExecutorMutex);
for (const auto& it : cpuStreamsExecutors) {
const auto& executor = it.second;
if (executor.use_count() != 1)
continue;
const auto& executorConfig = it.first;
if (executorConfig._name == config._name && executorConfig._streams == config._streams &&
executorConfig._threadsPerStream == config._threadsPerStream &&
executorConfig._threadBindingType == config._threadBindingType &&
executorConfig._threadBindingStep == config._threadBindingStep &&
executorConfig._threadBindingOffset == config._threadBindingOffset)
if (executorConfig._threadBindingType != IStreamsExecutor::ThreadBindingType::HYBRID_AWARE ||
executorConfig._threadPreferredCoreType == config._threadPreferredCoreType)
return executor;
}
auto newExec = std::make_shared<CPUStreamsExecutor>(config);
tbbThreadsCreated = true;
cpuStreamsExecutors.emplace_back(std::make_pair(config, newExec));
return newExec;
return m_manager->get_idle_cpu_streams_executor(config);
}
size_t ExecutorManagerImpl::getExecutorsNumber() const {
std::lock_guard<std::mutex> guard(taskExecutorMutex);
return executors.size();
return m_manager->get_executors_number();
}
size_t ExecutorManagerImpl::getIdleCPUStreamsExecutorsNumber() const {
std::lock_guard<std::mutex> guard(streamExecutorMutex);
return cpuStreamsExecutors.size();
return m_manager->get_idle_cpu_streams_executors_number();
}
void ExecutorManagerImpl::clear(const std::string& id) {
std::lock_guard<std::mutex> stream_guard(streamExecutorMutex);
std::lock_guard<std::mutex> task_guard(taskExecutorMutex);
if (id.empty()) {
executors.clear();
cpuStreamsExecutors.clear();
} else {
executors.erase(id);
cpuStreamsExecutors.erase(
std::remove_if(cpuStreamsExecutors.begin(),
cpuStreamsExecutors.end(),
[&](const std::pair<IStreamsExecutor::Config, IStreamsExecutor::Ptr>& it) {
return it.first._name == id;
}),
cpuStreamsExecutors.end());
}
return m_manager->clear(id);
}
std::shared_ptr<InferenceEngine::ExecutorManager> create_old_manager(
const std::shared_ptr<ov::ExecutorManager>& manager) {
return std::make_shared<ExecutorManagerImpl>(manager);
}
namespace {
@ -179,7 +94,7 @@ public:
std::lock_guard<std::mutex> lock(_mutex);
auto manager = _manager.lock();
if (!manager) {
_manager = manager = std::make_shared<ExecutorManagerImpl>();
_manager = manager = create_old_manager(ov::executor_manager());
}
return manager;
}

View File

@ -109,6 +109,7 @@ program::program(engine& engine_ref,
processing_order(),
is_body_program(is_body_program),
is_subgroup_local_block_io_supported(-1) {
_config.apply_user_properties(_engine.get_device_info());
init_primitives();
set_options();
query_local_block_io_supported();
@ -141,6 +142,7 @@ program::program(engine& engine_ref,
_task_executor(task_executor),
processing_order(),
is_subgroup_local_block_io_supported(-1) {
_config.apply_user_properties(_engine.get_device_info());
init_primitives();
set_options();
query_local_block_io_supported();
@ -160,7 +162,9 @@ program::program(engine& engine)
_stream(_engine.create_stream({})),
_config(),
processing_order(),
is_subgroup_local_block_io_supported(-1) { }
is_subgroup_local_block_io_supported(-1) {
_config.apply_user_properties(_engine.get_device_info());
}
program::~program() {
query_local_block_io_supported();
}

View File

@ -4,27 +4,28 @@
#include "common.cl"
#define GET_FILTER_OS_IS_YX_ISV16_OSV16_INDEX(prefix, o, i, y, x, sub_group_size) \
CAT(prefix, _OFFSET) + \
((o) % (sub_group_size)) + \
(sub_group_size)*( \
(x)*(sub_group_size)*CAT(prefix, _X_PITCH) + \
(y)*(sub_group_size)*CAT(prefix, _Y_PITCH) + \
((i) % (sub_group_size)) + \
((i) / (sub_group_size))*(sub_group_size)*CAT(prefix, _IFM_PITCH) + \
((o) / (sub_group_size))*CAT(prefix, _OFM_PITCH) \
#define GET_FILTER_OS_IS_YX_ISV_OSV_INDEX(prefix, o, i, y, x, osv, isv) \
get_os_is_zyx_isv_osv_index( \
o, i, 0, y, x, \
CAT(prefix, _SIZE_X), \
CAT(prefix, _SIZE_Y), \
1, \
CAT(prefix, _IFM_NUM), \
CAT(prefix, _OFM_NUM), \
osv, \
isv \
)
#define GET_FILTER_OS_IS_ZYX_ISV16_OSV16_INDEX(prefix, o, i, z, y, x, sub_group_size) \
CAT(prefix, _OFFSET) + \
((o) % (sub_group_size)) + \
(sub_group_size)*( \
(x)*(sub_group_size)*CAT(prefix, _X_PITCH) + \
(y)*(sub_group_size)*CAT(prefix, _Y_PITCH) + \
(z)*(sub_group_size)*CAT(prefix, _Z_PITCH) + \
((i) % (sub_group_size)) + \
((i) / (sub_group_size))*(sub_group_size)*CAT(prefix, _IFM_PITCH) + \
((o) / (sub_group_size))*CAT(prefix, _OFM_PITCH) \
#define GET_FILTER_OS_IS_ZYX_ISV_OSV_INDEX(prefix, o, i, z, y, x, osv, isv) \
get_os_is_zyx_isv_osv_index( \
o, i, z, y, x, \
CAT(prefix, _SIZE_X), \
CAT(prefix, _SIZE_Y), \
CAT(prefix, _SIZE_Z), \
CAT(prefix, _IFM_NUM), \
CAT(prefix, _OFM_NUM), \
osv, \
isv \
)
#define GET_FILTER_IS_OS_ZYX_ISV16_OSV16_INDEX(prefix, o, i, z, y, x, sub_group_size) \
@ -85,6 +86,32 @@
CAT(prefix, _OFFSET) \
)
inline uint get_os_is_zyx_isv_osv_index(uint o, uint i, uint z, uint y, uint x,
uint x_size, uint y_size, uint z_size, uint i_size, uint o_size, uint osv_size, uint isv_size)
{
const uint isv = i % isv_size;
const uint osv = o % osv_size;
const uint is = i / isv_size;
const uint os = o / osv_size;
const uint x_pitch = osv_size * isv_size;
const uint y_pitch = x_pitch * x_size;
const uint z_pitch = y_pitch * y_size;
const uint is_pitch = z_pitch * z_size;
const uint os_pitch = is_pitch * ((i_size + isv_size - 1) / isv_size);
const uint output_offset =
osv +
isv * osv_size +
x * x_pitch +
y * y_pitch +
z * z_pitch +
is * is_pitch +
os * os_pitch;
return output_offset;
}
inline uint get_os_is_zyx_osv_isv_index(uint o, uint i, uint z, uint y, uint x,
uint x_size, uint y_size, uint z_size, uint i_size, uint o_size, uint osv_size, uint isv_size)
{
@ -329,7 +356,7 @@ inline uint get_os_zyxi_osv16_index(uint o, uint i, uint z, uint y, uint x, uint
#define GET_FILTER_INDEX_5D_SAFE(prefix, g, o, i, z, y, x) GET_FILTER_GOIZYX_SAFE(prefix, g, o, i, z, y, x)
#define GET_FILTER_OS_IYX_OSV8_INDEX(prefix, o, i, y, x, sub_group_size) \
#define GET_FILTER_OS_IYX_OSV_INDEX(prefix, o, i, y, x, sub_group_size) \
CAT(prefix, _OFFSET) + \
((o) % (sub_group_size)) + \
(sub_group_size)*( \
@ -339,7 +366,7 @@ inline uint get_os_zyxi_osv16_index(uint o, uint i, uint z, uint y, uint x, uint
((o) / (sub_group_size))*CAT(prefix, _OFM_PITCH) \
)
#define GET_FILTER_OS_IYX_OSV8_ROTATE_180_INDEX(prefix, o, i, y, x, sub_group_size) \
#define GET_FILTER_OS_IYX_OSV_ROTATE_180_INDEX(prefix, o, i, y, x, sub_group_size) \
CAT(prefix, _OFFSET) + \
((o) % (sub_group_size)) + \
(sub_group_size)*( \
@ -1495,16 +1522,6 @@ inline uint get_os_i_yxs_osv_yxsv4_index(uint o, uint i, uint y, uint x, uint i_
CAT(prefix, _SIZE_Y), \
4)
#define GET_FILTER_OS_IYX_OSV32__AI32_INDEX(prefix, o, i, y, x, sub_group_size) \
CAT(prefix, _OFFSET) + \
((o) % (sub_group_size)) + \
(sub_group_size)*( \
(x)*CAT(prefix, _X_PITCH) + \
(y)*CAT(prefix, _Y_PITCH) + \
(i)*CAT(prefix, _IFM_PITCH) + \
((o) / (sub_group_size))*CAT(prefix, _OFM_PITCH) \
)
#define GET_FILTER_G_OS_IYX_OSV16(prefix, g, o, i, y, x, sub_group_size) \
CAT(prefix, _OFFSET) + \
(g * CAT(prefix, _GROUPS_PITCH)) + \

View File

@ -25,19 +25,20 @@ inline uint FUNC(get_input_index)(uint g, uint o, uint i, uint z, uint y, uint x
return GET_FILTER_INDEX_5D(INPUT0, 0, o, i, z, y, x);
#elif defined INPUT0_LAYOUT_OS_IYX_OSV16 || \
defined INPUT0_LAYOUT_OS_I_OSV16 || \
defined INPUT0_LAYOUT_OS_I_OSV8__AI8 || \
defined INPUT0_LAYOUT_OS_I_OSV16__AI8
return GET_FILTER_OS_IYX_OSV8_INDEX(INPUT0, o, i, y, x, SUB_GROUP_SIZE);
return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 16);
#elif defined INPUT0_LAYOUT_OS_I_OSV8__AI8
return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 8);
#elif defined INPUT0_LAYOUT_IYX_OSV32
return GET_FILTER_OS_IYX_OSV8_INDEX(INPUT0, o, i, y, x, 32);
return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 32);
#elif defined INPUT0_LAYOUT_OS_IYX_OSV32__AI32
return GET_FILTER_OS_IYX_OSV32__AI32_INDEX(OUTPUT, o, i, y, x, 32);
return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 32);
#elif defined INPUT0_LAYOUT_O_IS_YX_ISV16
return GET_FILTER_O_IS_YX_ISV16_INDEX(INPUT0, o, i, y, x, 16);
#elif defined INPUT0_LAYOUT_IYX_OSV64
return GET_FILTER_OS_IYX_OSV8_INDEX(INPUT0, o, i, y, x, 64);
return GET_FILTER_OS_IYX_OSV_INDEX(INPUT0, o, i, y, x, 64);
#elif defined INPUT0_LAYOUT_OS_IYX_OSV16_ROTATE_180
return GET_FILTER_OS_IYX_OSV8_ROTATE_180_INDEX(INPUT0, o, i, y, x, SUB_GROUP_SIZE);
return GET_FILTER_OS_IYX_OSV_ROTATE_180_INDEX(INPUT0, o, i, y, x, SUB_GROUP_SIZE);
#elif defined INPUT0_LAYOUT_I_YXS_OS_YXSV2_OSV16
return GET_FILTER_I_YXS_OS_YXSV2_OSV_INDEX(INPUT0, o, i, y, x, SUB_GROUP_SIZE);
#elif defined INPUT0_LAYOUT_IY_XS_OS_XSV2_OSV16__AO32 || defined OUTPUT_LAYOUT_IY_XS_OS_XSV2_OSV8__AO32
@ -61,11 +62,11 @@ inline uint FUNC(get_input_index)(uint g, uint o, uint i, uint z, uint y, uint x
#elif defined INPUT0_LAYOUT_OS_IS_Y_X8_OSV8_ISV4_SWIZZLED_BY_4
return GET_FILTER_OS_IS_Y_X8_OSV8_ISV4_SWIZZLED_BY_4(INPUT0, o, i, y, x);
#elif defined INPUT0_LAYOUT_OS_IS_YX_ISV16_OSV16
return GET_FILTER_OS_IS_YX_ISV16_OSV16_INDEX(INPUT0, o, i, y, x, SUB_GROUP_SIZE);
return GET_FILTER_OS_IS_YX_ISV_OSV_INDEX(INPUT0, o, i, y, x, 16, 16);
#elif defined INPUT0_LAYOUT_OIYX_O16
return GET_FILTER_OIYX_O16(INPUT0, o, i, y, x);
#elif defined INPUT0_LAYOUT_OS_IS_ZYX_ISV16_OSV16
return GET_FILTER_OS_IS_ZYX_ISV16_OSV16_INDEX(INPUT0, o, i, z, y, x, SUB_GROUP_SIZE);
return GET_FILTER_OS_IS_ZYX_ISV_OSV_INDEX(INPUT0, o, i, z, y, x, 16, 16);
#elif defined INPUT0_LAYOUT_IS_OS_ZYX_ISV16_OSV16
return GET_FILTER_IS_OS_ZYX_ISV16_OSV16_INDEX(INPUT0, o, i, z, y, x, SUB_GROUP_SIZE);
#elif defined INPUT0_LAYOUT_IS_OS_YX_ISV16_OSV16
@ -219,19 +220,20 @@ inline uint FUNC(get_output_index)(uint g, uint o, uint i, uint z, uint y, uint
return GET_FILTER_INDEX_5D(OUTPUT, 0, o, i, z, y, x);
#elif defined OUTPUT_LAYOUT_OS_IYX_OSV16 || \
defined OUTPUT_LAYOUT_OS_I_OSV16 || \
defined OUTPUT_LAYOUT_OS_I_OSV8__AI8 || \
defined OUTPUT_LAYOUT_OS_I_OSV16__AI8
return GET_FILTER_OS_IYX_OSV8_INDEX(OUTPUT, o, i, y, x, SUB_GROUP_SIZE);
return GET_FILTER_OS_IYX_OSV_INDEX(OUTPUT, o, i, y, x, 16);
#elif defined OUTPUT_LAYOUT_OS_I_OSV8__AI8
return GET_FILTER_OS_IYX_OSV_INDEX(OUTPUT, o, i, y, x, 8);
#elif defined OUTPUT_LAYOUT_OS_IYX_OSV32
return GET_FILTER_OS_IYX_OSV8_INDEX(OUTPUT, o, i, y, x, 32);
return GET_FILTER_OS_IYX_OSV_INDEX(OUTPUT, o, i, y, x, 32);
#elif defined OUTPUT_LAYOUT_OS_IYX_OSV32__AI32
return GET_FILTER_OS_IYX_OSV32__AI32_INDEX(OUTPUT, o, i, y, x, 32);
return GET_FILTER_OS_IYX_OSV_INDEX(OUTPUT, o, i, y, x, 32);
#elif defined OUTPUT_LAYOUT_OS_IYX_OSV64
return GET_FILTER_OS_IYX_OSV8_INDEX(OUTPUT, o, i, y, x, 64);
return GET_FILTER_OS_IYX_OSV_INDEX(OUTPUT, o, i, y, x, 64);
#elif defined OUTPUT_LAYOUT_O_IS_YX_ISV16
return GET_FILTER_O_IS_YX_ISV16_INDEX(OUTPUT, o, i, y, x, 16);
#elif defined OUTPUT_LAYOUT_OS_IYX_OSV16_ROTATE_180
return GET_FILTER_OS_IYX_OSV8_ROTATE_180_INDEX(OUTPUT, o, i, y, x, SUB_GROUP_SIZE);
return GET_FILTER_OS_IYX_OSV_ROTATE_180_INDEX(OUTPUT, o, i, y, x, SUB_GROUP_SIZE);
#elif defined OUTPUT_LAYOUT_I_YXS_OS_YXSV2_OSV16
return GET_FILTER_I_YXS_OS_YXSV2_OSV_INDEX(OUTPUT, o, i, y, x, SUB_GROUP_SIZE);
#elif defined OUTPUT_LAYOUT_IY_XS_OS_XSV2_OSV16__AO32 || defined OUTPUT_LAYOUT_IY_XS_OS_XSV2_OSV8__AO32
@ -313,11 +315,11 @@ inline uint FUNC(get_output_index)(uint g, uint o, uint i, uint z, uint y, uint
#elif defined OUTPUT_LAYOUT_OS_IS_ZYX_OSA4_ISA8_OSV8_ISV4_SWIZZLED_BY_4
return GET_FILTER_OS_IS_ZYX_OSA4_ISA8_OSV8_ISV4_SWIZZLED_BY_4_INDEX(OUTPUT, o, i, z, y, x);
#elif defined OUTPUT_LAYOUT_OS_IS_YX_ISV16_OSV16
return GET_FILTER_OS_IS_YX_ISV16_OSV16_INDEX(OUTPUT, o, i, y, x, SUB_GROUP_SIZE);
return GET_FILTER_OS_IS_YX_ISV_OSV_INDEX(OUTPUT, o, i, y, x, 16, 16);
#elif defined OUTPUT_LAYOUT_OS_YXI_OSV16
return GET_FILTER_OS_YXI_OSV16(OUTPUT, o, i, y, x);
#elif defined OUTPUT_LAYOUT_OS_IS_ZYX_ISV16_OSV16
return GET_FILTER_OS_IS_ZYX_ISV16_OSV16_INDEX(OUTPUT, o, i, z, y, x, SUB_GROUP_SIZE);
return GET_FILTER_OS_IS_ZYX_ISV_OSV_INDEX(OUTPUT, o, i, z, y, x, 16, 16);
#elif defined OUTPUT_LAYOUT_IS_OS_ZYX_ISV16_OSV16
return GET_FILTER_IS_OS_ZYX_ISV16_OSV16_INDEX(OUTPUT, o, i, z, y, x, SUB_GROUP_SIZE);
#elif defined OUTPUT_LAYOUT_IS_OS_YX_ISV16_OSV16

View File

@ -211,6 +211,10 @@ clEnqueueMemFillINTEL_fn)(
#define CL_DEVICE_UUID_KHR 0x106A
#endif // cl_khr_device_uuid
#ifndef OV_GPU_USE_OPENCL_HPP
// for C++ wrappers
using uuid_array = std::array<cl_uchar, CL_UUID_SIZE_KHR>;
@ -220,7 +224,7 @@ CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_UUID_KHR, uuid_array)
} // namespace detail
} // namespace cl
#endif // cl_khr_device_uuid
#endif // OV_GPU_USE_OPENCL_HPP
/***************************************************************
* cl_intel_device_attribute_query

View File

@ -1717,14 +1717,14 @@ TEST_P(conv_swap_xy_with_eltwise_diff_sizes, basic) {
// in_shape; out_shape; eltw_shape; kernel; stride; pad; dilation; groups; data_type; input_format; weights_type; weights_format; default_type; default_format;
#define CASE_CONV_ELTW_FP16_SWAP_XY_1 { 1, 16, 1, 5 }, { 1, 32, 1, 7 }, { 1, 32, 1, 1 }, { 1, 1, 1, 3 }, { 1, 1 }, { 2, 0 }, { 1, 1 }, 1, data_types::f16, format::bfyx, data_types::f16, format::os_iyx_osv16, data_types::f16, format::bfyx
#define CASE_CONV_ELTW_FP16_SWAP_XY_2 { 1, 16, 1, 5 }, { 1, 32, 1, 7 }, { 1, 32, 1, 7 }, { 1, 1, 1, 3 }, { 1, 1 }, { 2, 0 }, { 1, 1 }, 1, data_types::f16, format::bfyx, data_types::f16, format::os_iyx_osv16, data_types::f16, format::bfyx
#define CASE_CONV_ELTW_FP32_SWAP_XY_1 { 3, 16, 1, 5 }, { 3, 32, 1, 7 }, { 1, 32, 1, 1 }, { 1, 1, 1, 3 }, { 1, 1 }, { 2, 0 }, { 1, 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::os_iyx_osv16, data_types::f32, format::bfyx
#define CASE_CONV_ELTW_FP32_SWAP_XY_2 { 3, 16, 1, 5 }, { 3, 32, 1, 7 }, { 3, 32, 1, 7 }, { 1, 1, 1, 3 }, { 1, 1 }, { 2, 0 }, { 1, 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::os_iyx_osv16, data_types::f32, format::bfyx
#define CASE_CONV_ELTW_FP16_SWAP_XY_3 { 3, 16, 1, 5 }, { 3, 32, 1, 7 }, { 1, 32, 1, 1 }, { 1, 1, 1, 3 }, { 1, 1 }, { 2, 0 }, { 1, 1 }, 1, data_types::f16, format::bfyx, data_types::f16, format::os_iyx_osv16, data_types::f16, format::bfyx
#define CASE_CONV_ELTW_FP16_SWAP_XY_4 { 3, 16, 1, 5 }, { 3, 32, 1, 7 }, { 3, 32, 1, 7 }, { 1, 1, 1, 3 }, { 1, 1 }, { 2, 0 }, { 1, 1 }, 1, data_types::f16, format::bfyx, data_types::f16, format::os_iyx_osv16, data_types::f16, format::bfyx
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_swap_xy_with_eltwise_diff_sizes, ::testing::ValuesIn(std::vector<conv_eltw_test_params>{
conv_eltw_test_params{ CASE_CONV_ELTW_FP16_SWAP_XY_1, 3, 3, 4 },
conv_eltw_test_params{ CASE_CONV_ELTW_FP16_SWAP_XY_2, 3, 3, 4 },
conv_eltw_test_params{ CASE_CONV_ELTW_FP32_SWAP_XY_1, 3, 3, 4 },
conv_eltw_test_params{ CASE_CONV_ELTW_FP32_SWAP_XY_2, 3, 3, 4 },
conv_eltw_test_params{ CASE_CONV_ELTW_FP16_SWAP_XY_1, 3, 2, 4 },
conv_eltw_test_params{ CASE_CONV_ELTW_FP16_SWAP_XY_2, 3, 2, 4 },
conv_eltw_test_params{ CASE_CONV_ELTW_FP16_SWAP_XY_3, 3, 2, 4 },
conv_eltw_test_params{ CASE_CONV_ELTW_FP16_SWAP_XY_4, 3, 2, 4 },
}));
class conv_scale_activation_eltwise_fp32_quantize_i8 : public ConvEltwTest {};

View File

@ -74,20 +74,15 @@ public:
ASSERT_EQ(outputs_ref.size(), outputs_fused.size());
ASSERT_EQ(outputs_ref.size(), size_t(1));
auto output_not_fused_prim = outputs_ref.begin()->second.get_memory();
auto output_fused_prim = outputs_fused.begin()->second.get_memory();
if (output_not_fused_prim->get_layout().data_type == data_types::f32) {
cldnn::mem_lock<float> ref(output_not_fused_prim, get_test_stream());
cldnn::mem_lock<float> output_ptr(output_fused_prim, get_test_stream());
for (size_t i = 0; i < output_fused_prim->get_layout().count(); i++) {
ASSERT_NEAR(ref[i], output_ptr[i], tolerance) << "i = " << i;
}
} else {
cldnn::mem_lock<int16_t> ref(output_not_fused_prim, get_test_stream());
cldnn::mem_lock<int16_t> output_ptr(output_fused_prim, get_test_stream());
for (size_t i = 0; i < output_fused_prim->get_layout().count(); i++) {
ASSERT_NEAR(half_to_float(ref[i]), half_to_float(output_ptr[i]), tolerance) << "i = " << i;
}
auto val_ref=get_output_values_to_float(not_fused, outputs_ref.begin()->first);
auto val_opt=get_output_values_to_float(fused, outputs_fused.begin()->first);
ASSERT_EQ(val_ref.size(), val_opt.size());
for (size_t i = 0; i < val_ref.size(); i++) {
ASSERT_NEAR(val_ref[i], val_opt[i], tolerance)
<< "tolerance = " << tolerance
<< "\ni = " << i
<< "\nref[i] = " << val_ref[i]
<< "\nopt[i] = " << val_opt[i];
}
}

View File

@ -74,11 +74,6 @@ public:
}
layout get_per_channel_layout(gemm_test_params& p) {
// WA: per channel binary post-operation is not supported for onednn gemm. Use single value for such case.
if (engine.get_device_info().supports_immad){
std::cout << "per_channel layout for onednn gemm not supported." << std::endl;
return layout{p.default_type, p.default_format, tensor{1, 1, 1, 1}};
}
return layout{ p.default_type, p.default_format, tensor{ 1, p.in_shapes.at(0).feature[0], 1, 1 } };
}

View File

@ -589,6 +589,26 @@ std::vector<float> get_output_values_to_float(network& net, const primitive_id&
ret.push_back(mem[i]);
return ret;
}
inline std::vector<float> get_output_values_to_float(network& net, const primitive_id& output_id, size_t max_cnt = std::numeric_limits<size_t>::max()) {
switch(net.get_output_layout(output_id).data_type){
case data_types::f16:
return get_output_values_to_float<FLOAT16>(net, output_id, max_cnt);
case data_types::f32:
return get_output_values_to_float<float>(net, output_id, max_cnt);
case data_types::i8:
return get_output_values_to_float<int8_t>(net, output_id, max_cnt);
case data_types::u8:
return get_output_values_to_float<uint8_t>(net, output_id, max_cnt);
case data_types::i32:
return get_output_values_to_float<int32_t>(net, output_id, max_cnt);
case data_types::i64:
return get_output_values_to_float<int64_t>(net, output_id, max_cnt);
default:
IE_THROW() << "Unknown output data_type";
}
}
double default_tolerance(data_types dt);
// inline void print_bin_blob(cldnn::memory& mem, std::string name)
// {

View File

@ -28,7 +28,6 @@ add_library(openvino::interpreter_backend ALIAS interpreter_backend)
if(CMAKE_COMPILER_IS_GNUCXX)
ie_add_compiler_flags(-Wno-missing-declarations)
ie_add_compiler_flags(-Wno-sign-compare)
endif()
ie_faster_build(interpreter_backend UNITY)

View File

@ -35,7 +35,7 @@ Plugin::Plugin() {
_backend = ngraph::runtime::Backend::create();
// create default stream executor with a given name
_waitExecutor = get_executor_manager()->getIdleCPUStreamsExecutor({wait_executor_name});
_waitExecutor = get_executor_manager()->get_idle_cpu_streams_executor({wait_executor_name});
}
// ! [plugin:ctor]
@ -96,7 +96,7 @@ std::shared_ptr<ov::ICompiledModel> TemplatePlugin::Plugin::compile_model(const
auto compiled_model =
std::make_shared<CompiledModel>(model->clone(),
shared_from_this(),
get_executor_manager()->getIdleCPUStreamsExecutor(streamsExecutorConfig),
get_executor_manager()->get_idle_cpu_streams_executor(streamsExecutorConfig),
fullConfig);
return compiled_model;
}
@ -136,7 +136,7 @@ std::shared_ptr<ov::ICompiledModel> TemplatePlugin::Plugin::import_model(std::is
auto compiled_model =
std::make_shared<CompiledModel>(ov_model,
shared_from_this(),
get_executor_manager()->getIdleCPUStreamsExecutor(streamsExecutorConfig),
get_executor_manager()->get_idle_cpu_streams_executor(streamsExecutorConfig),
fullConfig);
return compiled_model;
}

View File

@ -8,11 +8,23 @@ import numpy as np
import openvino.runtime as ov
import pytest
import torch
import unittest
from openvino.runtime import PartialShape, Dimension, Model, Type
from common.mo_convert_test_class import CommonMOConvertTest
class MyTorchOp(torch.autograd.Function):
@staticmethod
def symbolic(g, in_positions):
return g.op("MyTorchOp", in_positions)
@staticmethod
def forward(self, in_positions):
out_pos = in_positions.reshape(-1)
return out_pos + 0.5
def make_pt_model_one_input():
from torch import nn
class NeuralNetwork(nn.Module):
@ -735,3 +747,30 @@ class TestMoConvertPyTorch(CommonMOConvertTest):
if mo_params is not None:
test_params.update(mo_params)
self._test_by_ref_graph(temp_dir, test_params, graph_ref, compare_tensor_names=False)
def create_pt_model_with_custom_op():
#
# Create PyTorch model with custom operation
#
import torch.nn as nn
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.my_op = MyTorchOp()
def forward(self, x):
return self.my_op.apply(x)
return MyModel()
class ConvertONNXFallthroughTest(unittest.TestCase):
def test_onnx_fallthrough(self):
from openvino.tools.mo import convert_model
pytorch_model = create_pt_model_with_custom_op()
# Check that ONNX conversion passed, so ONNX frontend raises error message of unsupported op.
with self.assertRaisesRegex(RuntimeError, ".*OpenVINO does not support the following ONNX operations: MyTorchOp.*"):
convert_model(pytorch_model, input_shape=[1, 2, 3], use_legacy_frontend=True)

View File

@ -0,0 +1,103 @@
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import pytest
from pytorch_layer_test_class import PytorchLayerTest
class TestEinsumBatchMatMul(PytorchLayerTest):
def _prepare_input(self):
import numpy as np
return (np.random.randn(5, 2, 3).astype(np.float32), np.random.randn(5, 3, 4).astype(np.float32),)
def create_model(self):
import torch
class EinsumModelBatchMatmul(torch.nn.Module):
def forward(self, x, y):
eqn = "bij, bjk -> bik"
return torch.einsum(eqn, x, y)
ref_net = None
return EinsumModelBatchMatmul(), ref_net, "aten::einsum"
@pytest.mark.nightly
@pytest.mark.precommit
def test_einsum_batch_matmul(self, ie_device, precision, ir_version):
self._test(*self.create_model(), ie_device, precision, ir_version)
class TestEinsumBatchDiagonal(PytorchLayerTest):
def _prepare_input(self):
import numpy as np
return (np.random.randn(3, 5, 5).astype(np.float32),)
def create_model(self):
import torch
class EinsumModelBatchDiagonal(torch.nn.Module):
def forward(self, x):
eqn = "kii -> ki"
return torch.einsum(eqn, x)
ref_net = None
return EinsumModelBatchDiagonal(), ref_net, "aten::einsum"
@pytest.mark.nightly
@pytest.mark.precommit
@pytest.mark.xfail(reason='OpenVINO CPU plugin does not support einsum diagonal')
def test_einsum_batch_diagonal(self, ie_device, precision, ir_version):
self._test(*self.create_model(), ie_device, precision, ir_version, dynamic_shapes=False)
class TestEinsumInnerProd(PytorchLayerTest):
def _prepare_input(self):
import numpy as np
return (np.random.randn(5).astype(np.float32), np.random.randn(5).astype(np.float32))
def create_model(self):
import torch
class EinsumModelInnerProd(torch.nn.Module):
def forward(self, x, y):
eqn = "i,i"
return torch.einsum(eqn, x, y)
ref_net = None
return EinsumModelInnerProd(), ref_net, "aten::einsum"
@pytest.mark.nightly
@pytest.mark.precommit
def test_einsum_inner_prod(self, ie_device, precision, ir_version):
self._test(*self.create_model(), ie_device, precision, ir_version)
class TestEinsumTranspose(PytorchLayerTest):
def _prepare_input(self):
import numpy as np
return (np.random.randn(3, 5).astype(np.float32),)
def create_model(self):
import torch
class EinsumModelTranspose(torch.nn.Module):
def forward(self, x):
eqn = "ij->ji"
return torch.einsum(eqn, x)
ref_net = None
return EinsumModelTranspose(), ref_net, "aten::einsum"
@pytest.mark.nightly
@pytest.mark.precommit
def test_einsum_transpose(self, ie_device, precision, ir_version):
self._test(*self.create_model(), ie_device, precision, ir_version)

View File

@ -0,0 +1,58 @@
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import numpy as np
import pytest
import torch
from pytorch_layer_test_class import PytorchLayerTest
from torchvision.ops import roi_align
class TestROIAlign(PytorchLayerTest):
def _prepare_input(self):
return (self.input_tensor, self.boxes)
def create_model(self, output_size, spatial_scale, sampling_ratio, aligned):
class torchvision_roi_align(torch.nn.Module):
def __init__(self, output_size, spatial_scale, sampling_ratio, aligned):
super().__init__()
self.output_size = output_size
self.spatial_scale = spatial_scale
self.sampling_ratio = sampling_ratio
self.aligned = aligned
def forward(self, input_tensor, rois):
return roi_align(
input_tensor,
rois.to(dtype=input_tensor.dtype),
self.output_size,
self.spatial_scale,
self.sampling_ratio,
self.aligned,
)
ref_net = None
return (torchvision_roi_align(output_size, spatial_scale, sampling_ratio, aligned),
ref_net, "torchvision::roi_align")
@pytest.mark.parametrize('input_tensor', (np.random.randn(4, 5, 6, 7).astype(np.float32),))
@pytest.mark.parametrize('boxes', (np.array([[1, 2, 2, 3, 3]]).astype(np.float32),
np.array([[0, 1, 2, 5, 4],
[2, 1, 2, 5, 4],
[3, 1, 2, 5, 4]]).astype(np.float32)))
@pytest.mark.parametrize('output_size', ((4, 5), (3, 2), 3))
@pytest.mark.parametrize('spatial_scale', (0.5, 1.0))
@pytest.mark.parametrize('sampling_ratio', (0, 1))
@pytest.mark.parametrize('aligned', (True, False))
@pytest.mark.nightly
@pytest.mark.precommit
def test_roi_align(self, ie_device, precision, ir_version, input_tensor, boxes, output_size,
spatial_scale, sampling_ratio, aligned):
self.input_tensor = input_tensor
self.boxes = boxes
self._test(*self.create_model(output_size, spatial_scale, sampling_ratio, aligned),
ie_device, precision, ir_version, trace_model=True)

View File

@ -6,10 +6,50 @@ import pytest
from pytorch_layer_test_class import PytorchLayerTest
class TestUpsample1D(PytorchLayerTest):
def _prepare_input(self):
import numpy as np
return (np.random.randn(1, 3, 224).astype(np.float32),)
def create_model(self, size, scale, mode):
import torch
import torch.nn.functional as F
class aten_upsample(torch.nn.Module):
def __init__(self, size, scale, mode):
super().__init__()
self.size = size
self.scale = scale
self.mode = mode
def forward(self, x):
return F.interpolate(x, self.size, scale_factor=self.scale, mode=self.mode)
ref_net = None
return aten_upsample(size, scale, mode), ref_net, F"aten::upsample_{mode}1d"
@pytest.mark.parametrize("mode,size,scale", [
('nearest', 300, None),
('nearest', 200, None),
('nearest', None, 2.5),
('nearest', None, 0.75),
('linear', 300, None),
('linear', 200, None),
('linear', None, 2.5,),
('linear', None, 0.75),
])
@pytest.mark.nightly
@pytest.mark.precommit
def test_upsample1d(self, mode, size, scale, ie_device, precision, ir_version):
self._test(*self.create_model(size, scale, mode), ie_device,
precision, ir_version, trace_model=True)
class TestUpsample2D(PytorchLayerTest):
def _prepare_input(self):
import numpy as np
return (np.zeros((1, 3, 224, 224)).astype(np.float32),)
return (np.random.randn(1, 3, 200, 200).astype(np.float32),)
def create_model(self, size, scale, mode):
import torch
@ -31,25 +71,70 @@ class TestUpsample2D(PytorchLayerTest):
@pytest.mark.parametrize("mode,size,scale", [
('nearest', 300, None),
('nearest', 200, None),
('nearest', (128, 480), None),
('nearest', None, 2.5,),
('nearest', 150, None),
('nearest', (300, 400), None),
('nearest', None, 2.5),
('nearest', None, 0.75),
('nearest', None, (1.2, 0.8)),
('nearest', None, (1.5, 2)),
('bilinear', 300, None),
('bilinear', 200, None),
('bilinear', (128, 480), None),
('bilinear', 150, None),
('bilinear', (400, 480), None),
('bilinear', None, 2.5,),
('bilinear', None, 0.75),
('bilinear', None, (1.2, 0.8)),
('bilinear', None, (1.2, 1.3)),
('bicubic', 300, None),
('bicubic', 200, None),
('bicubic', (128, 480), None),
('bicubic', 150, None),
('bicubic', (400, 480), None),
('bicubic', None, 2.5,),
('bicubic', None, 0.75),
('bicubic', None, (1.2, 0.8))]
)
('bicubic', None, (1.2, 1.3))
])
@pytest.mark.nightly
@pytest.mark.precommit
def test_upsample(self, mode, size, scale, ie_device, precision, ir_version):
self._test(*self.create_model(size, scale, mode), ie_device, precision, ir_version, trace_model=True)
def test_upsample2d(self, mode, size, scale, ie_device, precision, ir_version):
self._test(*self.create_model(size, scale, mode), ie_device,
precision, ir_version, trace_model=True, **{"custom_eps": 1e-3})
class TestUpsample3D(PytorchLayerTest):
def _prepare_input(self):
import numpy as np
return (np.random.randn(1, 3, 100, 100, 100).astype(np.float32),)
def create_model(self, size, scale, mode):
import torch
import torch.nn.functional as F
class aten_upsample(torch.nn.Module):
def __init__(self, size, scale, mode):
super().__init__()
self.size = size
self.scale = scale
self.mode = mode
def forward(self, x):
return F.interpolate(x, self.size, scale_factor=self.scale, mode=self.mode)
ref_net = None
return aten_upsample(size, scale, mode), ref_net, F"aten::upsample_{mode}3d"
@pytest.mark.parametrize("mode,size,scale", [
('nearest', 200, None),
('nearest', 150, None),
('nearest', (150, 200, 250), None),
('nearest', None, 2.5),
('nearest', None, 0.75),
('nearest', None, (1.5, 2, 2.5)),
('trilinear', 200, None),
('trilinear', 150, None),
('trilinear', (200, 240, 210), None),
('trilinear', None, 2.5,),
('trilinear', None, 0.75),
('trilinear', None, (1.2, 1.1, 1.5)),
])
@pytest.mark.nightly
@pytest.mark.precommit
def test_upsample3d(self, mode, size, scale, ie_device, precision, ir_version):
self._test(*self.create_model(size, scale, mode), ie_device,
precision, ir_version, trace_model=True, **{"custom_eps": 1e-3})

View File

@ -131,6 +131,7 @@ def convert_pytorch_to_onnx(model, input_shape, opset_version, example_inputs, o
torch.onnx.export(model,
inputs,
model_onnx,
operator_export_type=torch.onnx.OperatorExportTypes.ONNX_FALLTHROUGH,
**additional_params)
return model_onnx

View File

@ -309,3 +309,41 @@ class TestMoFreezePlaceholderTFFE(unittest.TestCase):
def test_conversion_model_oneshot_iterator_default(self):
self.basic("model_oneshot_iterator.pbtxt", None, None, None, None,
None, None, True, True, False, False)
@generate(
*[
(
"in2{f32}->[0.0 0.0 0.0 0.0]",
{"in1": np.array([[1.0, 2.0], [3.0, 4.0]])},
np.array([[1.0, 2.0], [3.0, 4.0]]),
np.float32,
),
(
"in2->[1.0 15.0 15.5 1.0]",
{"in1": np.array([[2.0, 4.0], [12.0, 8.0]])},
np.array([[3.0, 19.0], [27.5, 9.0]]),
np.float32,
),
],
)
def test_conversion_model_with_non_standard_extension(self, input_freezing_value, inputs, expected,
dtype):
self.basic("model_fp32.frozen", input_freezing_value, inputs, dtype, expected, only_conversion=False,
input_model_is_text=False, use_new_frontend=True,
use_legacy_frontend=False)
def test_conversion_fake_model(self):
with self.assertRaisesRegex(Exception,
"Internal error or inconsistent input model: the frontend supports "
"only frozen binary protobuf format."):
self.basic("fake.pb", None, None, None, None,
only_conversion=True, input_model_is_text=False, use_new_frontend=True,
use_legacy_frontend=False)
def test_conversion_dir_model(self):
with self.assertRaisesRegex(Exception,
"Internal error or inconsistent input model: the frontend supports "
"only frozen binary protobuf format."):
self.basic(".", None, None, None, None,
only_conversion=True, input_model_is_text=False, use_new_frontend=True,
use_legacy_frontend=False)

View File

@ -0,0 +1,2 @@
dcfsdcdsdcs
cscscsc

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8a33c91148b5e72ca03608c7d2ee18229ee4b610344dadd6896efeb6ac7b93e0
size 141